VisionFive2 Linux kernel

StarFive Tech Linux Kernel for VisionFive (JH7110) boards (mirror)

More than 9999 Commits   33 Branches   55 Tags
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700    1) /*
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700    2)  * mm/rmap.c - physical to virtual reverse mappings
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700    3)  *
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700    4)  * Copyright 2001, Rik van Riel <riel@conectiva.com.br>
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700    5)  * Released under the General Public License (GPL).
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700    6)  *
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700    7)  * Simple, low overhead reverse mapping scheme.
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700    8)  * Please try to keep this thing as modular as possible.
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700    9)  *
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700   10)  * Provides methods for unmapping each kind of mapped page:
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700   11)  * the anon methods track anonymous pages, and
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700   12)  * the file methods track pages belonging to an inode.
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700   13)  *
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700   14)  * Original design by Rik van Riel <riel@conectiva.com.br> 2001
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700   15)  * File methods by Dave McCracken <dmccr@us.ibm.com> 2003, 2004
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700   16)  * Anonymous methods by Andrea Arcangeli <andrea@suse.de> 2004
98f32602d4295 (Hugh Dickins            2009-05-21 20:33:58 +0100   17)  * Contributions by Hugh Dickins 2003, 2004
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700   18)  */
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700   19) 
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700   20) /*
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700   21)  * Lock ordering in mm:
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700   22)  *
1b1dcc1b57a49 (Jes Sorensen            2006-01-09 15:59:24 -0800   23)  * inode->i_mutex	(while writing or truncating, not reading or faulting)
c1e8d7c6a7a68 (Michel Lespinasse       2020-06-08 21:33:54 -0700   24)  *   mm->mmap_lock
c0d0381ade798 (Mike Kravetz            2020-04-01 21:11:05 -0700   25)  *     page->flags PG_locked (lock_page)   * (see huegtlbfs below)
88f306b68cbb3 (Kirill A. Shutemov      2016-01-15 16:57:31 -0800   26)  *       hugetlbfs_i_mmap_rwsem_key (in huge_pmd_share)
88f306b68cbb3 (Kirill A. Shutemov      2016-01-15 16:57:31 -0800   27)  *         mapping->i_mmap_rwsem
c0d0381ade798 (Mike Kravetz            2020-04-01 21:11:05 -0700   28)  *           hugetlb_fault_mutex (hugetlbfs specific page fault mutex)
88f306b68cbb3 (Kirill A. Shutemov      2016-01-15 16:57:31 -0800   29)  *           anon_vma->rwsem
88f306b68cbb3 (Kirill A. Shutemov      2016-01-15 16:57:31 -0800   30)  *             mm->page_table_lock or pte_lock
88f306b68cbb3 (Kirill A. Shutemov      2016-01-15 16:57:31 -0800   31)  *               swap_lock (in swap_duplicate, swap_info_get)
88f306b68cbb3 (Kirill A. Shutemov      2016-01-15 16:57:31 -0800   32)  *                 mmlist_lock (in mmput, drain_mmlist and others)
88f306b68cbb3 (Kirill A. Shutemov      2016-01-15 16:57:31 -0800   33)  *                 mapping->private_lock (in __set_page_dirty_buffers)
15b4473617942 (Hugh Dickins            2020-12-15 14:21:31 -0800   34)  *                   lock_page_memcg move_lock (in __set_page_dirty_buffers)
b93b016313b3b (Matthew Wilcox          2018-04-10 16:36:56 -0700   35)  *                     i_pages lock (widely used)
15b4473617942 (Hugh Dickins            2020-12-15 14:21:31 -0800   36)  *                       lruvec->lru_lock (in lock_page_lruvec_irq)
88f306b68cbb3 (Kirill A. Shutemov      2016-01-15 16:57:31 -0800   37)  *                 inode->i_lock (in set_page_dirty's __mark_inode_dirty)
88f306b68cbb3 (Kirill A. Shutemov      2016-01-15 16:57:31 -0800   38)  *                 bdi.wb->list_lock (in set_page_dirty's __mark_inode_dirty)
88f306b68cbb3 (Kirill A. Shutemov      2016-01-15 16:57:31 -0800   39)  *                   sb_lock (within inode_lock in fs/fs-writeback.c)
b93b016313b3b (Matthew Wilcox          2018-04-10 16:36:56 -0700   40)  *                   i_pages lock (widely used, in set_page_dirty,
88f306b68cbb3 (Kirill A. Shutemov      2016-01-15 16:57:31 -0800   41)  *                             in arch-dependent flush_dcache_mmap_lock,
88f306b68cbb3 (Kirill A. Shutemov      2016-01-15 16:57:31 -0800   42)  *                             within bdi.wb->list_lock in __sync_single_inode)
6a46079cf57a7 (Andi Kleen              2009-09-16 11:50:15 +0200   43)  *
5a505085f043e (Ingo Molnar             2012-12-02 19:56:46 +0000   44)  * anon_vma->rwsem,mapping->i_mutex      (memory_failure, collect_procs_anon)
9b679320a5fbf (Peter Zijlstra          2011-06-27 16:18:09 -0700   45)  *   ->tasklist_lock
6a46079cf57a7 (Andi Kleen              2009-09-16 11:50:15 +0200   46)  *     pte map lock
c0d0381ade798 (Mike Kravetz            2020-04-01 21:11:05 -0700   47)  *
c0d0381ade798 (Mike Kravetz            2020-04-01 21:11:05 -0700   48)  * * hugetlbfs PageHuge() pages take locks in this order:
c0d0381ade798 (Mike Kravetz            2020-04-01 21:11:05 -0700   49)  *         mapping->i_mmap_rwsem
c0d0381ade798 (Mike Kravetz            2020-04-01 21:11:05 -0700   50)  *           hugetlb_fault_mutex (hugetlbfs specific page fault mutex)
c0d0381ade798 (Mike Kravetz            2020-04-01 21:11:05 -0700   51)  *             page->flags PG_locked (lock_page)
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700   52)  */
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700   53) 
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700   54) #include <linux/mm.h>
6e84f31522f93 (Ingo Molnar             2017-02-08 18:51:29 +0100   55) #include <linux/sched/mm.h>
299300258d1bc (Ingo Molnar             2017-02-08 18:51:36 +0100   56) #include <linux/sched/task.h>
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700   57) #include <linux/pagemap.h>
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700   58) #include <linux/swap.h>
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700   59) #include <linux/swapops.h>
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700   60) #include <linux/slab.h>
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700   61) #include <linux/init.h>
5ad6468801d28 (Hugh Dickins            2009-12-14 17:59:24 -0800   62) #include <linux/ksm.h>
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700   63) #include <linux/rmap.h>
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700   64) #include <linux/rcupdate.h>
b95f1b31b7558 (Paul Gortmaker          2011-10-16 02:01:52 -0400   65) #include <linux/export.h>
8a9f3ccd24741 (Balbir Singh            2008-02-07 00:13:53 -0800   66) #include <linux/memcontrol.h>
cddb8a5c14aa8 (Andrea Arcangeli        2008-07-28 15:46:29 -0700   67) #include <linux/mmu_notifier.h>
64cdd548ffe26 (KOSAKI Motohiro         2009-01-06 14:39:16 -0800   68) #include <linux/migrate.h>
0fe6e20b9c4c5 (Naoya Horiguchi         2010-05-28 09:29:16 +0900   69) #include <linux/hugetlb.h>
444f84fd2ac7b (Ben Dooks               2019-10-18 20:20:17 -0700   70) #include <linux/huge_mm.h>
ef5d437f71afd (Jan Kara                2012-10-25 13:37:31 -0700   71) #include <linux/backing-dev.h>
33c3fc71c8cfa (Vladimir Davydov        2015-09-09 15:35:45 -0700   72) #include <linux/page_idle.h>
a5430dda8a3a1 (Jérôme Glisse           2017-09-08 16:12:17 -0700   73) #include <linux/memremap.h>
bce73e4842390 (Christian Borntraeger   2018-07-13 16:58:52 -0700   74) #include <linux/userfaultfd_k.h>
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700   75) 
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700   76) #include <asm/tlbflush.h>
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700   77) 
72b252aed506b (Mel Gorman              2015-09-04 15:47:32 -0700   78) #include <trace/events/tlb.h>
72b252aed506b (Mel Gorman              2015-09-04 15:47:32 -0700   79) 
b291f000393f5 (Nicholas Piggin         2008-10-18 20:26:44 -0700   80) #include "internal.h"
b291f000393f5 (Nicholas Piggin         2008-10-18 20:26:44 -0700   81) 
fdd2e5f88a259 (Adrian Bunk             2008-10-18 20:28:38 -0700   82) static struct kmem_cache *anon_vma_cachep;
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800   83) static struct kmem_cache *anon_vma_chain_cachep;
fdd2e5f88a259 (Adrian Bunk             2008-10-18 20:28:38 -0700   84) 
fdd2e5f88a259 (Adrian Bunk             2008-10-18 20:28:38 -0700   85) static inline struct anon_vma *anon_vma_alloc(void)
fdd2e5f88a259 (Adrian Bunk             2008-10-18 20:28:38 -0700   86) {
01d8b20dec5f4 (Peter Zijlstra          2011-03-22 16:32:49 -0700   87) 	struct anon_vma *anon_vma;
01d8b20dec5f4 (Peter Zijlstra          2011-03-22 16:32:49 -0700   88) 
01d8b20dec5f4 (Peter Zijlstra          2011-03-22 16:32:49 -0700   89) 	anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
01d8b20dec5f4 (Peter Zijlstra          2011-03-22 16:32:49 -0700   90) 	if (anon_vma) {
01d8b20dec5f4 (Peter Zijlstra          2011-03-22 16:32:49 -0700   91) 		atomic_set(&anon_vma->refcount, 1);
7a3ef208e662f (Konstantin Khlebnikov   2015-01-08 14:32:15 -0800   92) 		anon_vma->degree = 1;	/* Reference for first vma */
7a3ef208e662f (Konstantin Khlebnikov   2015-01-08 14:32:15 -0800   93) 		anon_vma->parent = anon_vma;
01d8b20dec5f4 (Peter Zijlstra          2011-03-22 16:32:49 -0700   94) 		/*
01d8b20dec5f4 (Peter Zijlstra          2011-03-22 16:32:49 -0700   95) 		 * Initialise the anon_vma root to point to itself. If called
01d8b20dec5f4 (Peter Zijlstra          2011-03-22 16:32:49 -0700   96) 		 * from fork, the root will be reset to the parents anon_vma.
01d8b20dec5f4 (Peter Zijlstra          2011-03-22 16:32:49 -0700   97) 		 */
01d8b20dec5f4 (Peter Zijlstra          2011-03-22 16:32:49 -0700   98) 		anon_vma->root = anon_vma;
01d8b20dec5f4 (Peter Zijlstra          2011-03-22 16:32:49 -0700   99) 	}
01d8b20dec5f4 (Peter Zijlstra          2011-03-22 16:32:49 -0700  100) 
01d8b20dec5f4 (Peter Zijlstra          2011-03-22 16:32:49 -0700  101) 	return anon_vma;
fdd2e5f88a259 (Adrian Bunk             2008-10-18 20:28:38 -0700  102) }
fdd2e5f88a259 (Adrian Bunk             2008-10-18 20:28:38 -0700  103) 
01d8b20dec5f4 (Peter Zijlstra          2011-03-22 16:32:49 -0700  104) static inline void anon_vma_free(struct anon_vma *anon_vma)
fdd2e5f88a259 (Adrian Bunk             2008-10-18 20:28:38 -0700  105) {
01d8b20dec5f4 (Peter Zijlstra          2011-03-22 16:32:49 -0700  106) 	VM_BUG_ON(atomic_read(&anon_vma->refcount));
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  107) 
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  108) 	/*
4fc3f1d66b1ef (Ingo Molnar             2012-12-02 19:56:50 +0000  109) 	 * Synchronize against page_lock_anon_vma_read() such that
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  110) 	 * we can safely hold the lock without the anon_vma getting
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  111) 	 * freed.
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  112) 	 *
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  113) 	 * Relies on the full mb implied by the atomic_dec_and_test() from
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  114) 	 * put_anon_vma() against the acquire barrier implied by
4fc3f1d66b1ef (Ingo Molnar             2012-12-02 19:56:50 +0000  115) 	 * down_read_trylock() from page_lock_anon_vma_read(). This orders:
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  116) 	 *
4fc3f1d66b1ef (Ingo Molnar             2012-12-02 19:56:50 +0000  117) 	 * page_lock_anon_vma_read()	VS	put_anon_vma()
4fc3f1d66b1ef (Ingo Molnar             2012-12-02 19:56:50 +0000  118) 	 *   down_read_trylock()		  atomic_dec_and_test()
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  119) 	 *   LOCK				  MB
4fc3f1d66b1ef (Ingo Molnar             2012-12-02 19:56:50 +0000  120) 	 *   atomic_read()			  rwsem_is_locked()
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  121) 	 *
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  122) 	 * LOCK should suffice since the actual taking of the lock must
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  123) 	 * happen _before_ what follows.
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  124) 	 */
7f39dda9d86fb (Hugh Dickins            2014-06-04 16:05:33 -0700  125) 	might_sleep();
5a505085f043e (Ingo Molnar             2012-12-02 19:56:46 +0000  126) 	if (rwsem_is_locked(&anon_vma->root->rwsem)) {
4fc3f1d66b1ef (Ingo Molnar             2012-12-02 19:56:50 +0000  127) 		anon_vma_lock_write(anon_vma);
08b52706d5056 (Konstantin Khlebnikov   2013-02-22 16:34:40 -0800  128) 		anon_vma_unlock_write(anon_vma);
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  129) 	}
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  130) 
fdd2e5f88a259 (Adrian Bunk             2008-10-18 20:28:38 -0700  131) 	kmem_cache_free(anon_vma_cachep, anon_vma);
fdd2e5f88a259 (Adrian Bunk             2008-10-18 20:28:38 -0700  132) }
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  133) 
dd34739c03f2f (Linus Torvalds          2011-06-17 19:05:36 -0700  134) static inline struct anon_vma_chain *anon_vma_chain_alloc(gfp_t gfp)
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  135) {
dd34739c03f2f (Linus Torvalds          2011-06-17 19:05:36 -0700  136) 	return kmem_cache_alloc(anon_vma_chain_cachep, gfp);
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  137) }
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  138) 
e574b5fd20027 (Namhyung Kim            2010-10-26 14:22:02 -0700  139) static void anon_vma_chain_free(struct anon_vma_chain *anon_vma_chain)
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  140) {
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  141) 	kmem_cache_free(anon_vma_chain_cachep, anon_vma_chain);
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  142) }
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  143) 
6583a84304e5a (Kautuk Consul           2012-03-21 16:34:01 -0700  144) static void anon_vma_chain_link(struct vm_area_struct *vma,
6583a84304e5a (Kautuk Consul           2012-03-21 16:34:01 -0700  145) 				struct anon_vma_chain *avc,
6583a84304e5a (Kautuk Consul           2012-03-21 16:34:01 -0700  146) 				struct anon_vma *anon_vma)
6583a84304e5a (Kautuk Consul           2012-03-21 16:34:01 -0700  147) {
6583a84304e5a (Kautuk Consul           2012-03-21 16:34:01 -0700  148) 	avc->vma = vma;
6583a84304e5a (Kautuk Consul           2012-03-21 16:34:01 -0700  149) 	avc->anon_vma = anon_vma;
6583a84304e5a (Kautuk Consul           2012-03-21 16:34:01 -0700  150) 	list_add(&avc->same_vma, &vma->anon_vma_chain);
bf181b9f9d8df (Michel Lespinasse       2012-10-08 16:31:39 -0700  151) 	anon_vma_interval_tree_insert(avc, &anon_vma->rb_root);
6583a84304e5a (Kautuk Consul           2012-03-21 16:34:01 -0700  152) }
6583a84304e5a (Kautuk Consul           2012-03-21 16:34:01 -0700  153) 
d9d332e0874f4 (Linus Torvalds          2008-10-19 10:32:20 -0700  154) /**
d5a187daf5856 (Vlastimil Babka         2016-12-12 16:44:38 -0800  155)  * __anon_vma_prepare - attach an anon_vma to a memory region
d9d332e0874f4 (Linus Torvalds          2008-10-19 10:32:20 -0700  156)  * @vma: the memory region in question
d9d332e0874f4 (Linus Torvalds          2008-10-19 10:32:20 -0700  157)  *
d9d332e0874f4 (Linus Torvalds          2008-10-19 10:32:20 -0700  158)  * This makes sure the memory mapping described by 'vma' has
d9d332e0874f4 (Linus Torvalds          2008-10-19 10:32:20 -0700  159)  * an 'anon_vma' attached to it, so that we can associate the
d9d332e0874f4 (Linus Torvalds          2008-10-19 10:32:20 -0700  160)  * anonymous pages mapped into it with that anon_vma.
d9d332e0874f4 (Linus Torvalds          2008-10-19 10:32:20 -0700  161)  *
d5a187daf5856 (Vlastimil Babka         2016-12-12 16:44:38 -0800  162)  * The common case will be that we already have one, which
d5a187daf5856 (Vlastimil Babka         2016-12-12 16:44:38 -0800  163)  * is handled inline by anon_vma_prepare(). But if
23a0790af27b0 (Figo.zhang              2010-12-27 15:14:06 +0100  164)  * not we either need to find an adjacent mapping that we
d9d332e0874f4 (Linus Torvalds          2008-10-19 10:32:20 -0700  165)  * can re-use the anon_vma from (very common when the only
d9d332e0874f4 (Linus Torvalds          2008-10-19 10:32:20 -0700  166)  * reason for splitting a vma has been mprotect()), or we
d9d332e0874f4 (Linus Torvalds          2008-10-19 10:32:20 -0700  167)  * allocate a new one.
d9d332e0874f4 (Linus Torvalds          2008-10-19 10:32:20 -0700  168)  *
d9d332e0874f4 (Linus Torvalds          2008-10-19 10:32:20 -0700  169)  * Anon-vma allocations are very subtle, because we may have
4fc3f1d66b1ef (Ingo Molnar             2012-12-02 19:56:50 +0000  170)  * optimistically looked up an anon_vma in page_lock_anon_vma_read()
aaf1f990aee40 (Miaohe Lin              2021-02-25 17:17:53 -0800  171)  * and that may actually touch the rwsem even in the newly
d9d332e0874f4 (Linus Torvalds          2008-10-19 10:32:20 -0700  172)  * allocated vma (it depends on RCU to make sure that the
d9d332e0874f4 (Linus Torvalds          2008-10-19 10:32:20 -0700  173)  * anon_vma isn't actually destroyed).
d9d332e0874f4 (Linus Torvalds          2008-10-19 10:32:20 -0700  174)  *
d9d332e0874f4 (Linus Torvalds          2008-10-19 10:32:20 -0700  175)  * As a result, we need to do proper anon_vma locking even
d9d332e0874f4 (Linus Torvalds          2008-10-19 10:32:20 -0700  176)  * for the new allocation. At the same time, we do not want
d9d332e0874f4 (Linus Torvalds          2008-10-19 10:32:20 -0700  177)  * to do any locking for the common case of already having
d9d332e0874f4 (Linus Torvalds          2008-10-19 10:32:20 -0700  178)  * an anon_vma.
d9d332e0874f4 (Linus Torvalds          2008-10-19 10:32:20 -0700  179)  *
c1e8d7c6a7a68 (Michel Lespinasse       2020-06-08 21:33:54 -0700  180)  * This must be called with the mmap_lock held for reading.
d9d332e0874f4 (Linus Torvalds          2008-10-19 10:32:20 -0700  181)  */
d5a187daf5856 (Vlastimil Babka         2016-12-12 16:44:38 -0800  182) int __anon_vma_prepare(struct vm_area_struct *vma)
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  183) {
d5a187daf5856 (Vlastimil Babka         2016-12-12 16:44:38 -0800  184) 	struct mm_struct *mm = vma->vm_mm;
d5a187daf5856 (Vlastimil Babka         2016-12-12 16:44:38 -0800  185) 	struct anon_vma *anon_vma, *allocated;
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  186) 	struct anon_vma_chain *avc;
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  187) 
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  188) 	might_sleep();
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  189) 
d5a187daf5856 (Vlastimil Babka         2016-12-12 16:44:38 -0800  190) 	avc = anon_vma_chain_alloc(GFP_KERNEL);
d5a187daf5856 (Vlastimil Babka         2016-12-12 16:44:38 -0800  191) 	if (!avc)
d5a187daf5856 (Vlastimil Babka         2016-12-12 16:44:38 -0800  192) 		goto out_enomem;
d5a187daf5856 (Vlastimil Babka         2016-12-12 16:44:38 -0800  193) 
d5a187daf5856 (Vlastimil Babka         2016-12-12 16:44:38 -0800  194) 	anon_vma = find_mergeable_anon_vma(vma);
d5a187daf5856 (Vlastimil Babka         2016-12-12 16:44:38 -0800  195) 	allocated = NULL;
d5a187daf5856 (Vlastimil Babka         2016-12-12 16:44:38 -0800  196) 	if (!anon_vma) {
d5a187daf5856 (Vlastimil Babka         2016-12-12 16:44:38 -0800  197) 		anon_vma = anon_vma_alloc();
d5a187daf5856 (Vlastimil Babka         2016-12-12 16:44:38 -0800  198) 		if (unlikely(!anon_vma))
d5a187daf5856 (Vlastimil Babka         2016-12-12 16:44:38 -0800  199) 			goto out_enomem_free_avc;
d5a187daf5856 (Vlastimil Babka         2016-12-12 16:44:38 -0800  200) 		allocated = anon_vma;
d5a187daf5856 (Vlastimil Babka         2016-12-12 16:44:38 -0800  201) 	}
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  202) 
d5a187daf5856 (Vlastimil Babka         2016-12-12 16:44:38 -0800  203) 	anon_vma_lock_write(anon_vma);
d5a187daf5856 (Vlastimil Babka         2016-12-12 16:44:38 -0800  204) 	/* page_table_lock to protect against threads */
d5a187daf5856 (Vlastimil Babka         2016-12-12 16:44:38 -0800  205) 	spin_lock(&mm->page_table_lock);
d5a187daf5856 (Vlastimil Babka         2016-12-12 16:44:38 -0800  206) 	if (likely(!vma->anon_vma)) {
d5a187daf5856 (Vlastimil Babka         2016-12-12 16:44:38 -0800  207) 		vma->anon_vma = anon_vma;
d5a187daf5856 (Vlastimil Babka         2016-12-12 16:44:38 -0800  208) 		anon_vma_chain_link(vma, avc, anon_vma);
d5a187daf5856 (Vlastimil Babka         2016-12-12 16:44:38 -0800  209) 		/* vma reference or self-parent link for new root */
d5a187daf5856 (Vlastimil Babka         2016-12-12 16:44:38 -0800  210) 		anon_vma->degree++;
d9d332e0874f4 (Linus Torvalds          2008-10-19 10:32:20 -0700  211) 		allocated = NULL;
d5a187daf5856 (Vlastimil Babka         2016-12-12 16:44:38 -0800  212) 		avc = NULL;
d5a187daf5856 (Vlastimil Babka         2016-12-12 16:44:38 -0800  213) 	}
d5a187daf5856 (Vlastimil Babka         2016-12-12 16:44:38 -0800  214) 	spin_unlock(&mm->page_table_lock);
d5a187daf5856 (Vlastimil Babka         2016-12-12 16:44:38 -0800  215) 	anon_vma_unlock_write(anon_vma);
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  216) 
d5a187daf5856 (Vlastimil Babka         2016-12-12 16:44:38 -0800  217) 	if (unlikely(allocated))
d5a187daf5856 (Vlastimil Babka         2016-12-12 16:44:38 -0800  218) 		put_anon_vma(allocated);
d5a187daf5856 (Vlastimil Babka         2016-12-12 16:44:38 -0800  219) 	if (unlikely(avc))
d5a187daf5856 (Vlastimil Babka         2016-12-12 16:44:38 -0800  220) 		anon_vma_chain_free(avc);
31f2b0ebc01fd (Oleg Nesterov           2010-04-23 13:18:01 -0400  221) 
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  222) 	return 0;
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  223) 
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  224)  out_enomem_free_avc:
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  225) 	anon_vma_chain_free(avc);
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  226)  out_enomem:
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  227) 	return -ENOMEM;
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  228) }
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  229) 
bb4aa39676f73 (Linus Torvalds          2011-06-16 20:44:51 -0700  230) /*
bb4aa39676f73 (Linus Torvalds          2011-06-16 20:44:51 -0700  231)  * This is a useful helper function for locking the anon_vma root as
bb4aa39676f73 (Linus Torvalds          2011-06-16 20:44:51 -0700  232)  * we traverse the vma->anon_vma_chain, looping over anon_vma's that
bb4aa39676f73 (Linus Torvalds          2011-06-16 20:44:51 -0700  233)  * have the same vma.
bb4aa39676f73 (Linus Torvalds          2011-06-16 20:44:51 -0700  234)  *
bb4aa39676f73 (Linus Torvalds          2011-06-16 20:44:51 -0700  235)  * Such anon_vma's should have the same root, so you'd expect to see
bb4aa39676f73 (Linus Torvalds          2011-06-16 20:44:51 -0700  236)  * just a single mutex_lock for the whole traversal.
bb4aa39676f73 (Linus Torvalds          2011-06-16 20:44:51 -0700  237)  */
bb4aa39676f73 (Linus Torvalds          2011-06-16 20:44:51 -0700  238) static inline struct anon_vma *lock_anon_vma_root(struct anon_vma *root, struct anon_vma *anon_vma)
bb4aa39676f73 (Linus Torvalds          2011-06-16 20:44:51 -0700  239) {
bb4aa39676f73 (Linus Torvalds          2011-06-16 20:44:51 -0700  240) 	struct anon_vma *new_root = anon_vma->root;
bb4aa39676f73 (Linus Torvalds          2011-06-16 20:44:51 -0700  241) 	if (new_root != root) {
bb4aa39676f73 (Linus Torvalds          2011-06-16 20:44:51 -0700  242) 		if (WARN_ON_ONCE(root))
5a505085f043e (Ingo Molnar             2012-12-02 19:56:46 +0000  243) 			up_write(&root->rwsem);
bb4aa39676f73 (Linus Torvalds          2011-06-16 20:44:51 -0700  244) 		root = new_root;
5a505085f043e (Ingo Molnar             2012-12-02 19:56:46 +0000  245) 		down_write(&root->rwsem);
bb4aa39676f73 (Linus Torvalds          2011-06-16 20:44:51 -0700  246) 	}
bb4aa39676f73 (Linus Torvalds          2011-06-16 20:44:51 -0700  247) 	return root;
bb4aa39676f73 (Linus Torvalds          2011-06-16 20:44:51 -0700  248) }
bb4aa39676f73 (Linus Torvalds          2011-06-16 20:44:51 -0700  249) 
bb4aa39676f73 (Linus Torvalds          2011-06-16 20:44:51 -0700  250) static inline void unlock_anon_vma_root(struct anon_vma *root)
bb4aa39676f73 (Linus Torvalds          2011-06-16 20:44:51 -0700  251) {
bb4aa39676f73 (Linus Torvalds          2011-06-16 20:44:51 -0700  252) 	if (root)
5a505085f043e (Ingo Molnar             2012-12-02 19:56:46 +0000  253) 		up_write(&root->rwsem);
bb4aa39676f73 (Linus Torvalds          2011-06-16 20:44:51 -0700  254) }
bb4aa39676f73 (Linus Torvalds          2011-06-16 20:44:51 -0700  255) 
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  256) /*
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  257)  * Attach the anon_vmas from src to dst.
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  258)  * Returns 0 on success, -ENOMEM on failure.
7a3ef208e662f (Konstantin Khlebnikov   2015-01-08 14:32:15 -0800  259)  *
cb152a1a95606 (Shijie Luo              2021-05-06 18:05:51 -0700  260)  * anon_vma_clone() is called by __vma_adjust(), __split_vma(), copy_vma() and
47b390d23bf81 (Wei Yang                2019-11-30 17:50:56 -0800  261)  * anon_vma_fork(). The first three want an exact copy of src, while the last
47b390d23bf81 (Wei Yang                2019-11-30 17:50:56 -0800  262)  * one, anon_vma_fork(), may try to reuse an existing anon_vma to prevent
47b390d23bf81 (Wei Yang                2019-11-30 17:50:56 -0800  263)  * endless growth of anon_vma. Since dst->anon_vma is set to NULL before call,
47b390d23bf81 (Wei Yang                2019-11-30 17:50:56 -0800  264)  * we can identify this case by checking (!dst->anon_vma && src->anon_vma).
47b390d23bf81 (Wei Yang                2019-11-30 17:50:56 -0800  265)  *
47b390d23bf81 (Wei Yang                2019-11-30 17:50:56 -0800  266)  * If (!dst->anon_vma && src->anon_vma) is true, this function tries to find
47b390d23bf81 (Wei Yang                2019-11-30 17:50:56 -0800  267)  * and reuse existing anon_vma which has no vmas and only one child anon_vma.
47b390d23bf81 (Wei Yang                2019-11-30 17:50:56 -0800  268)  * This prevents degradation of anon_vma hierarchy to endless linear chain in
47b390d23bf81 (Wei Yang                2019-11-30 17:50:56 -0800  269)  * case of constantly forking task. On the other hand, an anon_vma with more
47b390d23bf81 (Wei Yang                2019-11-30 17:50:56 -0800  270)  * than one child isn't reused even if there was no alive vma, thus rmap
47b390d23bf81 (Wei Yang                2019-11-30 17:50:56 -0800  271)  * walker has a good chance of avoiding scanning the whole hierarchy when it
47b390d23bf81 (Wei Yang                2019-11-30 17:50:56 -0800  272)  * searches where page is mapped.
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  273)  */
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  274) int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  275) {
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  276) 	struct anon_vma_chain *avc, *pavc;
bb4aa39676f73 (Linus Torvalds          2011-06-16 20:44:51 -0700  277) 	struct anon_vma *root = NULL;
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  278) 
646d87b481dab (Linus Torvalds          2010-04-11 17:15:03 -0700  279) 	list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) {
bb4aa39676f73 (Linus Torvalds          2011-06-16 20:44:51 -0700  280) 		struct anon_vma *anon_vma;
bb4aa39676f73 (Linus Torvalds          2011-06-16 20:44:51 -0700  281) 
dd34739c03f2f (Linus Torvalds          2011-06-17 19:05:36 -0700  282) 		avc = anon_vma_chain_alloc(GFP_NOWAIT | __GFP_NOWARN);
dd34739c03f2f (Linus Torvalds          2011-06-17 19:05:36 -0700  283) 		if (unlikely(!avc)) {
dd34739c03f2f (Linus Torvalds          2011-06-17 19:05:36 -0700  284) 			unlock_anon_vma_root(root);
dd34739c03f2f (Linus Torvalds          2011-06-17 19:05:36 -0700  285) 			root = NULL;
dd34739c03f2f (Linus Torvalds          2011-06-17 19:05:36 -0700  286) 			avc = anon_vma_chain_alloc(GFP_KERNEL);
dd34739c03f2f (Linus Torvalds          2011-06-17 19:05:36 -0700  287) 			if (!avc)
dd34739c03f2f (Linus Torvalds          2011-06-17 19:05:36 -0700  288) 				goto enomem_failure;
dd34739c03f2f (Linus Torvalds          2011-06-17 19:05:36 -0700  289) 		}
bb4aa39676f73 (Linus Torvalds          2011-06-16 20:44:51 -0700  290) 		anon_vma = pavc->anon_vma;
bb4aa39676f73 (Linus Torvalds          2011-06-16 20:44:51 -0700  291) 		root = lock_anon_vma_root(root, anon_vma);
bb4aa39676f73 (Linus Torvalds          2011-06-16 20:44:51 -0700  292) 		anon_vma_chain_link(dst, avc, anon_vma);
7a3ef208e662f (Konstantin Khlebnikov   2015-01-08 14:32:15 -0800  293) 
7a3ef208e662f (Konstantin Khlebnikov   2015-01-08 14:32:15 -0800  294) 		/*
7a3ef208e662f (Konstantin Khlebnikov   2015-01-08 14:32:15 -0800  295) 		 * Reuse existing anon_vma if its degree lower than two,
7a3ef208e662f (Konstantin Khlebnikov   2015-01-08 14:32:15 -0800  296) 		 * that means it has no vma and only one anon_vma child.
7a3ef208e662f (Konstantin Khlebnikov   2015-01-08 14:32:15 -0800  297) 		 *
7a3ef208e662f (Konstantin Khlebnikov   2015-01-08 14:32:15 -0800  298) 		 * Do not chose parent anon_vma, otherwise first child
7a3ef208e662f (Konstantin Khlebnikov   2015-01-08 14:32:15 -0800  299) 		 * will always reuse it. Root anon_vma is never reused:
7a3ef208e662f (Konstantin Khlebnikov   2015-01-08 14:32:15 -0800  300) 		 * it has self-parent reference and at least one child.
7a3ef208e662f (Konstantin Khlebnikov   2015-01-08 14:32:15 -0800  301) 		 */
47b390d23bf81 (Wei Yang                2019-11-30 17:50:56 -0800  302) 		if (!dst->anon_vma && src->anon_vma &&
47b390d23bf81 (Wei Yang                2019-11-30 17:50:56 -0800  303) 		    anon_vma != src->anon_vma && anon_vma->degree < 2)
7a3ef208e662f (Konstantin Khlebnikov   2015-01-08 14:32:15 -0800  304) 			dst->anon_vma = anon_vma;
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  305) 	}
7a3ef208e662f (Konstantin Khlebnikov   2015-01-08 14:32:15 -0800  306) 	if (dst->anon_vma)
7a3ef208e662f (Konstantin Khlebnikov   2015-01-08 14:32:15 -0800  307) 		dst->anon_vma->degree++;
bb4aa39676f73 (Linus Torvalds          2011-06-16 20:44:51 -0700  308) 	unlock_anon_vma_root(root);
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  309) 	return 0;
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  310) 
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  311)  enomem_failure:
3fe89b3e2a7bb (Leon Yu                 2015-03-25 15:55:11 -0700  312) 	/*
3fe89b3e2a7bb (Leon Yu                 2015-03-25 15:55:11 -0700  313) 	 * dst->anon_vma is dropped here otherwise its degree can be incorrectly
3fe89b3e2a7bb (Leon Yu                 2015-03-25 15:55:11 -0700  314) 	 * decremented in unlink_anon_vmas().
3fe89b3e2a7bb (Leon Yu                 2015-03-25 15:55:11 -0700  315) 	 * We can safely do this because callers of anon_vma_clone() don't care
3fe89b3e2a7bb (Leon Yu                 2015-03-25 15:55:11 -0700  316) 	 * about dst->anon_vma if anon_vma_clone() failed.
3fe89b3e2a7bb (Leon Yu                 2015-03-25 15:55:11 -0700  317) 	 */
3fe89b3e2a7bb (Leon Yu                 2015-03-25 15:55:11 -0700  318) 	dst->anon_vma = NULL;
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  319) 	unlink_anon_vmas(dst);
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  320) 	return -ENOMEM;
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  321) }
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  322) 
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  323) /*
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  324)  * Attach vma to its own anon_vma, as well as to the anon_vmas that
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  325)  * the corresponding VMA in the parent process is attached to.
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  326)  * Returns 0 on success, non-zero on failure.
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  327)  */
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  328) int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  329) {
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  330) 	struct anon_vma_chain *avc;
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  331) 	struct anon_vma *anon_vma;
c4ea95d7cd08d (Daniel Forrest          2014-12-02 15:59:42 -0800  332) 	int error;
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  333) 
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  334) 	/* Don't bother if the parent process has no anon_vma here. */
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  335) 	if (!pvma->anon_vma)
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  336) 		return 0;
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  337) 
7a3ef208e662f (Konstantin Khlebnikov   2015-01-08 14:32:15 -0800  338) 	/* Drop inherited anon_vma, we'll reuse existing or allocate new. */
7a3ef208e662f (Konstantin Khlebnikov   2015-01-08 14:32:15 -0800  339) 	vma->anon_vma = NULL;
7a3ef208e662f (Konstantin Khlebnikov   2015-01-08 14:32:15 -0800  340) 
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  341) 	/*
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  342) 	 * First, attach the new VMA to the parent VMA's anon_vmas,
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  343) 	 * so rmap can find non-COWed pages in child processes.
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  344) 	 */
c4ea95d7cd08d (Daniel Forrest          2014-12-02 15:59:42 -0800  345) 	error = anon_vma_clone(vma, pvma);
c4ea95d7cd08d (Daniel Forrest          2014-12-02 15:59:42 -0800  346) 	if (error)
c4ea95d7cd08d (Daniel Forrest          2014-12-02 15:59:42 -0800  347) 		return error;
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  348) 
7a3ef208e662f (Konstantin Khlebnikov   2015-01-08 14:32:15 -0800  349) 	/* An existing anon_vma has been reused, all done then. */
7a3ef208e662f (Konstantin Khlebnikov   2015-01-08 14:32:15 -0800  350) 	if (vma->anon_vma)
7a3ef208e662f (Konstantin Khlebnikov   2015-01-08 14:32:15 -0800  351) 		return 0;
7a3ef208e662f (Konstantin Khlebnikov   2015-01-08 14:32:15 -0800  352) 
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  353) 	/* Then add our own anon_vma. */
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  354) 	anon_vma = anon_vma_alloc();
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  355) 	if (!anon_vma)
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  356) 		goto out_error;
dd34739c03f2f (Linus Torvalds          2011-06-17 19:05:36 -0700  357) 	avc = anon_vma_chain_alloc(GFP_KERNEL);
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  358) 	if (!avc)
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  359) 		goto out_error_free_anon_vma;
5c341ee1dfc8f (Rik van Riel            2010-08-09 17:18:39 -0700  360) 
5c341ee1dfc8f (Rik van Riel            2010-08-09 17:18:39 -0700  361) 	/*
aaf1f990aee40 (Miaohe Lin              2021-02-25 17:17:53 -0800  362) 	 * The root anon_vma's rwsem is the lock actually used when we
5c341ee1dfc8f (Rik van Riel            2010-08-09 17:18:39 -0700  363) 	 * lock any of the anon_vmas in this anon_vma tree.
5c341ee1dfc8f (Rik van Riel            2010-08-09 17:18:39 -0700  364) 	 */
5c341ee1dfc8f (Rik van Riel            2010-08-09 17:18:39 -0700  365) 	anon_vma->root = pvma->anon_vma->root;
7a3ef208e662f (Konstantin Khlebnikov   2015-01-08 14:32:15 -0800  366) 	anon_vma->parent = pvma->anon_vma;
76545066c8521 (Rik van Riel            2010-08-09 17:18:41 -0700  367) 	/*
01d8b20dec5f4 (Peter Zijlstra          2011-03-22 16:32:49 -0700  368) 	 * With refcounts, an anon_vma can stay around longer than the
01d8b20dec5f4 (Peter Zijlstra          2011-03-22 16:32:49 -0700  369) 	 * process it belongs to. The root anon_vma needs to be pinned until
01d8b20dec5f4 (Peter Zijlstra          2011-03-22 16:32:49 -0700  370) 	 * this anon_vma is freed, because the lock lives in the root.
76545066c8521 (Rik van Riel            2010-08-09 17:18:41 -0700  371) 	 */
76545066c8521 (Rik van Riel            2010-08-09 17:18:41 -0700  372) 	get_anon_vma(anon_vma->root);
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  373) 	/* Mark this anon_vma as the one where our new (COWed) pages go. */
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  374) 	vma->anon_vma = anon_vma;
4fc3f1d66b1ef (Ingo Molnar             2012-12-02 19:56:50 +0000  375) 	anon_vma_lock_write(anon_vma);
5c341ee1dfc8f (Rik van Riel            2010-08-09 17:18:39 -0700  376) 	anon_vma_chain_link(vma, avc, anon_vma);
7a3ef208e662f (Konstantin Khlebnikov   2015-01-08 14:32:15 -0800  377) 	anon_vma->parent->degree++;
08b52706d5056 (Konstantin Khlebnikov   2013-02-22 16:34:40 -0800  378) 	anon_vma_unlock_write(anon_vma);
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  379) 
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  380) 	return 0;
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  381) 
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  382)  out_error_free_anon_vma:
01d8b20dec5f4 (Peter Zijlstra          2011-03-22 16:32:49 -0700  383) 	put_anon_vma(anon_vma);
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  384)  out_error:
4946d54cb55e8 (Rik van Riel            2010-04-05 12:13:33 -0400  385) 	unlink_anon_vmas(vma);
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  386) 	return -ENOMEM;
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  387) }
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  388) 
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  389) void unlink_anon_vmas(struct vm_area_struct *vma)
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  390) {
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  391) 	struct anon_vma_chain *avc, *next;
eee2acbae9555 (Peter Zijlstra          2011-06-17 13:54:23 +0200  392) 	struct anon_vma *root = NULL;
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  393) 
5c341ee1dfc8f (Rik van Riel            2010-08-09 17:18:39 -0700  394) 	/*
5c341ee1dfc8f (Rik van Riel            2010-08-09 17:18:39 -0700  395) 	 * Unlink each anon_vma chained to the VMA.  This list is ordered
5c341ee1dfc8f (Rik van Riel            2010-08-09 17:18:39 -0700  396) 	 * from newest to oldest, ensuring the root anon_vma gets freed last.
5c341ee1dfc8f (Rik van Riel            2010-08-09 17:18:39 -0700  397) 	 */
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  398) 	list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
eee2acbae9555 (Peter Zijlstra          2011-06-17 13:54:23 +0200  399) 		struct anon_vma *anon_vma = avc->anon_vma;
eee2acbae9555 (Peter Zijlstra          2011-06-17 13:54:23 +0200  400) 
eee2acbae9555 (Peter Zijlstra          2011-06-17 13:54:23 +0200  401) 		root = lock_anon_vma_root(root, anon_vma);
bf181b9f9d8df (Michel Lespinasse       2012-10-08 16:31:39 -0700  402) 		anon_vma_interval_tree_remove(avc, &anon_vma->rb_root);
eee2acbae9555 (Peter Zijlstra          2011-06-17 13:54:23 +0200  403) 
eee2acbae9555 (Peter Zijlstra          2011-06-17 13:54:23 +0200  404) 		/*
eee2acbae9555 (Peter Zijlstra          2011-06-17 13:54:23 +0200  405) 		 * Leave empty anon_vmas on the list - we'll need
eee2acbae9555 (Peter Zijlstra          2011-06-17 13:54:23 +0200  406) 		 * to free them outside the lock.
eee2acbae9555 (Peter Zijlstra          2011-06-17 13:54:23 +0200  407) 		 */
f808c13fd3738 (Davidlohr Bueso         2017-09-08 16:15:08 -0700  408) 		if (RB_EMPTY_ROOT(&anon_vma->rb_root.rb_root)) {
7a3ef208e662f (Konstantin Khlebnikov   2015-01-08 14:32:15 -0800  409) 			anon_vma->parent->degree--;
eee2acbae9555 (Peter Zijlstra          2011-06-17 13:54:23 +0200  410) 			continue;
7a3ef208e662f (Konstantin Khlebnikov   2015-01-08 14:32:15 -0800  411) 		}
eee2acbae9555 (Peter Zijlstra          2011-06-17 13:54:23 +0200  412) 
eee2acbae9555 (Peter Zijlstra          2011-06-17 13:54:23 +0200  413) 		list_del(&avc->same_vma);
eee2acbae9555 (Peter Zijlstra          2011-06-17 13:54:23 +0200  414) 		anon_vma_chain_free(avc);
eee2acbae9555 (Peter Zijlstra          2011-06-17 13:54:23 +0200  415) 	}
ee8ab1903e3d9 (Li Xinhai               2021-02-24 12:04:49 -0800  416) 	if (vma->anon_vma) {
7a3ef208e662f (Konstantin Khlebnikov   2015-01-08 14:32:15 -0800  417) 		vma->anon_vma->degree--;
ee8ab1903e3d9 (Li Xinhai               2021-02-24 12:04:49 -0800  418) 
ee8ab1903e3d9 (Li Xinhai               2021-02-24 12:04:49 -0800  419) 		/*
ee8ab1903e3d9 (Li Xinhai               2021-02-24 12:04:49 -0800  420) 		 * vma would still be needed after unlink, and anon_vma will be prepared
ee8ab1903e3d9 (Li Xinhai               2021-02-24 12:04:49 -0800  421) 		 * when handle fault.
ee8ab1903e3d9 (Li Xinhai               2021-02-24 12:04:49 -0800  422) 		 */
ee8ab1903e3d9 (Li Xinhai               2021-02-24 12:04:49 -0800  423) 		vma->anon_vma = NULL;
ee8ab1903e3d9 (Li Xinhai               2021-02-24 12:04:49 -0800  424) 	}
eee2acbae9555 (Peter Zijlstra          2011-06-17 13:54:23 +0200  425) 	unlock_anon_vma_root(root);
eee2acbae9555 (Peter Zijlstra          2011-06-17 13:54:23 +0200  426) 
eee2acbae9555 (Peter Zijlstra          2011-06-17 13:54:23 +0200  427) 	/*
eee2acbae9555 (Peter Zijlstra          2011-06-17 13:54:23 +0200  428) 	 * Iterate the list once more, it now only contains empty and unlinked
eee2acbae9555 (Peter Zijlstra          2011-06-17 13:54:23 +0200  429) 	 * anon_vmas, destroy them. Could not do before due to __put_anon_vma()
5a505085f043e (Ingo Molnar             2012-12-02 19:56:46 +0000  430) 	 * needing to write-acquire the anon_vma->root->rwsem.
eee2acbae9555 (Peter Zijlstra          2011-06-17 13:54:23 +0200  431) 	 */
eee2acbae9555 (Peter Zijlstra          2011-06-17 13:54:23 +0200  432) 	list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
eee2acbae9555 (Peter Zijlstra          2011-06-17 13:54:23 +0200  433) 		struct anon_vma *anon_vma = avc->anon_vma;
eee2acbae9555 (Peter Zijlstra          2011-06-17 13:54:23 +0200  434) 
e4c5800a3991f (Konstantin Khlebnikov   2016-05-19 17:11:46 -0700  435) 		VM_WARN_ON(anon_vma->degree);
eee2acbae9555 (Peter Zijlstra          2011-06-17 13:54:23 +0200  436) 		put_anon_vma(anon_vma);
eee2acbae9555 (Peter Zijlstra          2011-06-17 13:54:23 +0200  437) 
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  438) 		list_del(&avc->same_vma);
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  439) 		anon_vma_chain_free(avc);
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  440) 	}
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  441) }
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800  442) 
51cc50685a427 (Alexey Dobriyan         2008-07-25 19:45:34 -0700  443) static void anon_vma_ctor(void *data)
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  444) {
a35afb830f8d7 (Christoph Lameter       2007-05-16 22:10:57 -0700  445) 	struct anon_vma *anon_vma = data;
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  446) 
5a505085f043e (Ingo Molnar             2012-12-02 19:56:46 +0000  447) 	init_rwsem(&anon_vma->rwsem);
83813267c699a (Peter Zijlstra          2011-03-22 16:32:48 -0700  448) 	atomic_set(&anon_vma->refcount, 0);
f808c13fd3738 (Davidlohr Bueso         2017-09-08 16:15:08 -0700  449) 	anon_vma->rb_root = RB_ROOT_CACHED;
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  450) }
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  451) 
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  452) void __init anon_vma_init(void)
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  453) {
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  454) 	anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma),
5f0d5a3ae7cff (Paul E. McKenney        2017-01-18 02:53:44 -0800  455) 			0, SLAB_TYPESAFE_BY_RCU|SLAB_PANIC|SLAB_ACCOUNT,
5d097056c9a01 (Vladimir Davydov        2016-01-14 15:18:21 -0800  456) 			anon_vma_ctor);
5d097056c9a01 (Vladimir Davydov        2016-01-14 15:18:21 -0800  457) 	anon_vma_chain_cachep = KMEM_CACHE(anon_vma_chain,
5d097056c9a01 (Vladimir Davydov        2016-01-14 15:18:21 -0800  458) 			SLAB_PANIC|SLAB_ACCOUNT);
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  459) }
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  460) 
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  461) /*
6111e4ca6829a (Peter Zijlstra          2011-05-24 17:12:08 -0700  462)  * Getting a lock on a stable anon_vma from a page off the LRU is tricky!
6111e4ca6829a (Peter Zijlstra          2011-05-24 17:12:08 -0700  463)  *
6111e4ca6829a (Peter Zijlstra          2011-05-24 17:12:08 -0700  464)  * Since there is no serialization what so ever against page_remove_rmap()
ad8a20cf6d19a (Miaohe Lin              2021-02-25 17:18:06 -0800  465)  * the best this function can do is return a refcount increased anon_vma
ad8a20cf6d19a (Miaohe Lin              2021-02-25 17:18:06 -0800  466)  * that might have been relevant to this page.
6111e4ca6829a (Peter Zijlstra          2011-05-24 17:12:08 -0700  467)  *
6111e4ca6829a (Peter Zijlstra          2011-05-24 17:12:08 -0700  468)  * The page might have been remapped to a different anon_vma or the anon_vma
6111e4ca6829a (Peter Zijlstra          2011-05-24 17:12:08 -0700  469)  * returned may already be freed (and even reused).
6111e4ca6829a (Peter Zijlstra          2011-05-24 17:12:08 -0700  470)  *
bc658c96037fc (Peter Zijlstra          2011-05-29 10:33:44 +0200  471)  * In case it was remapped to a different anon_vma, the new anon_vma will be a
bc658c96037fc (Peter Zijlstra          2011-05-29 10:33:44 +0200  472)  * child of the old anon_vma, and the anon_vma lifetime rules will therefore
bc658c96037fc (Peter Zijlstra          2011-05-29 10:33:44 +0200  473)  * ensure that any anon_vma obtained from the page will still be valid for as
bc658c96037fc (Peter Zijlstra          2011-05-29 10:33:44 +0200  474)  * long as we observe page_mapped() [ hence all those page_mapped() tests ].
bc658c96037fc (Peter Zijlstra          2011-05-29 10:33:44 +0200  475)  *
6111e4ca6829a (Peter Zijlstra          2011-05-24 17:12:08 -0700  476)  * All users of this function must be very careful when walking the anon_vma
6111e4ca6829a (Peter Zijlstra          2011-05-24 17:12:08 -0700  477)  * chain and verify that the page in question is indeed mapped in it
6111e4ca6829a (Peter Zijlstra          2011-05-24 17:12:08 -0700  478)  * [ something equivalent to page_mapped_in_vma() ].
6111e4ca6829a (Peter Zijlstra          2011-05-24 17:12:08 -0700  479)  *
091e4299544f8 (Miles Chen              2019-11-30 17:51:23 -0800  480)  * Since anon_vma's slab is SLAB_TYPESAFE_BY_RCU and we know from
091e4299544f8 (Miles Chen              2019-11-30 17:51:23 -0800  481)  * page_remove_rmap() that the anon_vma pointer from page->mapping is valid
091e4299544f8 (Miles Chen              2019-11-30 17:51:23 -0800  482)  * if there is a mapcount, we can dereference the anon_vma after observing
091e4299544f8 (Miles Chen              2019-11-30 17:51:23 -0800  483)  * those.
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  484)  */
746b18d421da7 (Peter Zijlstra          2011-05-24 17:12:10 -0700  485) struct anon_vma *page_get_anon_vma(struct page *page)
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  486) {
746b18d421da7 (Peter Zijlstra          2011-05-24 17:12:10 -0700  487) 	struct anon_vma *anon_vma = NULL;
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  488) 	unsigned long anon_mapping;
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  489) 
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  490) 	rcu_read_lock();
4db0c3c2983cc (Jason Low               2015-04-15 16:14:08 -0700  491) 	anon_mapping = (unsigned long)READ_ONCE(page->mapping);
3ca7b3c5b64d3 (Hugh Dickins            2009-12-14 17:58:57 -0800  492) 	if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  493) 		goto out;
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  494) 	if (!page_mapped(page))
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  495) 		goto out;
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  496) 
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  497) 	anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
746b18d421da7 (Peter Zijlstra          2011-05-24 17:12:10 -0700  498) 	if (!atomic_inc_not_zero(&anon_vma->refcount)) {
746b18d421da7 (Peter Zijlstra          2011-05-24 17:12:10 -0700  499) 		anon_vma = NULL;
746b18d421da7 (Peter Zijlstra          2011-05-24 17:12:10 -0700  500) 		goto out;
746b18d421da7 (Peter Zijlstra          2011-05-24 17:12:10 -0700  501) 	}
f18194275c398 (Hugh Dickins            2010-08-25 23:12:54 -0700  502) 
f18194275c398 (Hugh Dickins            2010-08-25 23:12:54 -0700  503) 	/*
f18194275c398 (Hugh Dickins            2010-08-25 23:12:54 -0700  504) 	 * If this page is still mapped, then its anon_vma cannot have been
746b18d421da7 (Peter Zijlstra          2011-05-24 17:12:10 -0700  505) 	 * freed.  But if it has been unmapped, we have no security against the
746b18d421da7 (Peter Zijlstra          2011-05-24 17:12:10 -0700  506) 	 * anon_vma structure being freed and reused (for another anon_vma:
5f0d5a3ae7cff (Paul E. McKenney        2017-01-18 02:53:44 -0800  507) 	 * SLAB_TYPESAFE_BY_RCU guarantees that - so the atomic_inc_not_zero()
746b18d421da7 (Peter Zijlstra          2011-05-24 17:12:10 -0700  508) 	 * above cannot corrupt).
f18194275c398 (Hugh Dickins            2010-08-25 23:12:54 -0700  509) 	 */
746b18d421da7 (Peter Zijlstra          2011-05-24 17:12:10 -0700  510) 	if (!page_mapped(page)) {
7f39dda9d86fb (Hugh Dickins            2014-06-04 16:05:33 -0700  511) 		rcu_read_unlock();
746b18d421da7 (Peter Zijlstra          2011-05-24 17:12:10 -0700  512) 		put_anon_vma(anon_vma);
7f39dda9d86fb (Hugh Dickins            2014-06-04 16:05:33 -0700  513) 		return NULL;
746b18d421da7 (Peter Zijlstra          2011-05-24 17:12:10 -0700  514) 	}
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  515) out:
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  516) 	rcu_read_unlock();
746b18d421da7 (Peter Zijlstra          2011-05-24 17:12:10 -0700  517) 
746b18d421da7 (Peter Zijlstra          2011-05-24 17:12:10 -0700  518) 	return anon_vma;
746b18d421da7 (Peter Zijlstra          2011-05-24 17:12:10 -0700  519) }
746b18d421da7 (Peter Zijlstra          2011-05-24 17:12:10 -0700  520) 
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  521) /*
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  522)  * Similar to page_get_anon_vma() except it locks the anon_vma.
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  523)  *
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  524)  * Its a little more complex as it tries to keep the fast path to a single
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  525)  * atomic op -- the trylock. If we fail the trylock, we fall back to getting a
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  526)  * reference like with page_get_anon_vma() and then block on the mutex.
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  527)  */
4fc3f1d66b1ef (Ingo Molnar             2012-12-02 19:56:50 +0000  528) struct anon_vma *page_lock_anon_vma_read(struct page *page)
746b18d421da7 (Peter Zijlstra          2011-05-24 17:12:10 -0700  529) {
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  530) 	struct anon_vma *anon_vma = NULL;
eee0f252c6537 (Hugh Dickins            2011-05-28 13:20:21 -0700  531) 	struct anon_vma *root_anon_vma;
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  532) 	unsigned long anon_mapping;
746b18d421da7 (Peter Zijlstra          2011-05-24 17:12:10 -0700  533) 
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  534) 	rcu_read_lock();
4db0c3c2983cc (Jason Low               2015-04-15 16:14:08 -0700  535) 	anon_mapping = (unsigned long)READ_ONCE(page->mapping);
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  536) 	if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  537) 		goto out;
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  538) 	if (!page_mapped(page))
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  539) 		goto out;
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  540) 
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  541) 	anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
4db0c3c2983cc (Jason Low               2015-04-15 16:14:08 -0700  542) 	root_anon_vma = READ_ONCE(anon_vma->root);
4fc3f1d66b1ef (Ingo Molnar             2012-12-02 19:56:50 +0000  543) 	if (down_read_trylock(&root_anon_vma->rwsem)) {
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  544) 		/*
eee0f252c6537 (Hugh Dickins            2011-05-28 13:20:21 -0700  545) 		 * If the page is still mapped, then this anon_vma is still
eee0f252c6537 (Hugh Dickins            2011-05-28 13:20:21 -0700  546) 		 * its anon_vma, and holding the mutex ensures that it will
bc658c96037fc (Peter Zijlstra          2011-05-29 10:33:44 +0200  547) 		 * not go away, see anon_vma_free().
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  548) 		 */
eee0f252c6537 (Hugh Dickins            2011-05-28 13:20:21 -0700  549) 		if (!page_mapped(page)) {
4fc3f1d66b1ef (Ingo Molnar             2012-12-02 19:56:50 +0000  550) 			up_read(&root_anon_vma->rwsem);
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  551) 			anon_vma = NULL;
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  552) 		}
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  553) 		goto out;
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  554) 	}
746b18d421da7 (Peter Zijlstra          2011-05-24 17:12:10 -0700  555) 
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  556) 	/* trylock failed, we got to sleep */
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  557) 	if (!atomic_inc_not_zero(&anon_vma->refcount)) {
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  558) 		anon_vma = NULL;
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  559) 		goto out;
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  560) 	}
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  561) 
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  562) 	if (!page_mapped(page)) {
7f39dda9d86fb (Hugh Dickins            2014-06-04 16:05:33 -0700  563) 		rcu_read_unlock();
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  564) 		put_anon_vma(anon_vma);
7f39dda9d86fb (Hugh Dickins            2014-06-04 16:05:33 -0700  565) 		return NULL;
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  566) 	}
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  567) 
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  568) 	/* we pinned the anon_vma, its safe to sleep */
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  569) 	rcu_read_unlock();
4fc3f1d66b1ef (Ingo Molnar             2012-12-02 19:56:50 +0000  570) 	anon_vma_lock_read(anon_vma);
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  571) 
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  572) 	if (atomic_dec_and_test(&anon_vma->refcount)) {
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  573) 		/*
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  574) 		 * Oops, we held the last refcount, release the lock
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  575) 		 * and bail -- can't simply use put_anon_vma() because
4fc3f1d66b1ef (Ingo Molnar             2012-12-02 19:56:50 +0000  576) 		 * we'll deadlock on the anon_vma_lock_write() recursion.
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  577) 		 */
4fc3f1d66b1ef (Ingo Molnar             2012-12-02 19:56:50 +0000  578) 		anon_vma_unlock_read(anon_vma);
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  579) 		__put_anon_vma(anon_vma);
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  580) 		anon_vma = NULL;
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  581) 	}
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  582) 
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  583) 	return anon_vma;
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  584) 
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  585) out:
88c22088bf235 (Peter Zijlstra          2011-05-24 17:12:13 -0700  586) 	rcu_read_unlock();
746b18d421da7 (Peter Zijlstra          2011-05-24 17:12:10 -0700  587) 	return anon_vma;
34bbd704051c9 (Oleg Nesterov           2007-02-28 20:13:49 -0800  588) }
34bbd704051c9 (Oleg Nesterov           2007-02-28 20:13:49 -0800  589) 
4fc3f1d66b1ef (Ingo Molnar             2012-12-02 19:56:50 +0000  590) void page_unlock_anon_vma_read(struct anon_vma *anon_vma)
34bbd704051c9 (Oleg Nesterov           2007-02-28 20:13:49 -0800  591) {
4fc3f1d66b1ef (Ingo Molnar             2012-12-02 19:56:50 +0000  592) 	anon_vma_unlock_read(anon_vma);
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  593) }
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  594) 
72b252aed506b (Mel Gorman              2015-09-04 15:47:32 -0700  595) #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
72b252aed506b (Mel Gorman              2015-09-04 15:47:32 -0700  596) /*
72b252aed506b (Mel Gorman              2015-09-04 15:47:32 -0700  597)  * Flush TLB entries for recently unmapped pages from remote CPUs. It is
72b252aed506b (Mel Gorman              2015-09-04 15:47:32 -0700  598)  * important if a PTE was dirty when it was unmapped that it's flushed
72b252aed506b (Mel Gorman              2015-09-04 15:47:32 -0700  599)  * before any IO is initiated on the page to prevent lost writes. Similarly,
72b252aed506b (Mel Gorman              2015-09-04 15:47:32 -0700  600)  * it must be flushed before freeing to prevent data leakage.
72b252aed506b (Mel Gorman              2015-09-04 15:47:32 -0700  601)  */
72b252aed506b (Mel Gorman              2015-09-04 15:47:32 -0700  602) void try_to_unmap_flush(void)
72b252aed506b (Mel Gorman              2015-09-04 15:47:32 -0700  603) {
72b252aed506b (Mel Gorman              2015-09-04 15:47:32 -0700  604) 	struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
72b252aed506b (Mel Gorman              2015-09-04 15:47:32 -0700  605) 
72b252aed506b (Mel Gorman              2015-09-04 15:47:32 -0700  606) 	if (!tlb_ubc->flush_required)
72b252aed506b (Mel Gorman              2015-09-04 15:47:32 -0700  607) 		return;
72b252aed506b (Mel Gorman              2015-09-04 15:47:32 -0700  608) 
e73ad5ff2f76d (Andy Lutomirski         2017-05-22 15:30:03 -0700  609) 	arch_tlbbatch_flush(&tlb_ubc->arch);
72b252aed506b (Mel Gorman              2015-09-04 15:47:32 -0700  610) 	tlb_ubc->flush_required = false;
d950c9477d51f (Mel Gorman              2015-09-04 15:47:35 -0700  611) 	tlb_ubc->writable = false;
72b252aed506b (Mel Gorman              2015-09-04 15:47:32 -0700  612) }
72b252aed506b (Mel Gorman              2015-09-04 15:47:32 -0700  613) 
d950c9477d51f (Mel Gorman              2015-09-04 15:47:35 -0700  614) /* Flush iff there are potentially writable TLB entries that can race with IO */
d950c9477d51f (Mel Gorman              2015-09-04 15:47:35 -0700  615) void try_to_unmap_flush_dirty(void)
d950c9477d51f (Mel Gorman              2015-09-04 15:47:35 -0700  616) {
d950c9477d51f (Mel Gorman              2015-09-04 15:47:35 -0700  617) 	struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
d950c9477d51f (Mel Gorman              2015-09-04 15:47:35 -0700  618) 
d950c9477d51f (Mel Gorman              2015-09-04 15:47:35 -0700  619) 	if (tlb_ubc->writable)
d950c9477d51f (Mel Gorman              2015-09-04 15:47:35 -0700  620) 		try_to_unmap_flush();
d950c9477d51f (Mel Gorman              2015-09-04 15:47:35 -0700  621) }
d950c9477d51f (Mel Gorman              2015-09-04 15:47:35 -0700  622) 
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800  623) static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable)
72b252aed506b (Mel Gorman              2015-09-04 15:47:32 -0700  624) {
72b252aed506b (Mel Gorman              2015-09-04 15:47:32 -0700  625) 	struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
72b252aed506b (Mel Gorman              2015-09-04 15:47:32 -0700  626) 
e73ad5ff2f76d (Andy Lutomirski         2017-05-22 15:30:03 -0700  627) 	arch_tlbbatch_add_mm(&tlb_ubc->arch, mm);
72b252aed506b (Mel Gorman              2015-09-04 15:47:32 -0700  628) 	tlb_ubc->flush_required = true;
d950c9477d51f (Mel Gorman              2015-09-04 15:47:35 -0700  629) 
3ea277194daae (Mel Gorman              2017-08-02 13:31:52 -0700  630) 	/*
3ea277194daae (Mel Gorman              2017-08-02 13:31:52 -0700  631) 	 * Ensure compiler does not re-order the setting of tlb_flush_batched
3ea277194daae (Mel Gorman              2017-08-02 13:31:52 -0700  632) 	 * before the PTE is cleared.
3ea277194daae (Mel Gorman              2017-08-02 13:31:52 -0700  633) 	 */
3ea277194daae (Mel Gorman              2017-08-02 13:31:52 -0700  634) 	barrier();
3ea277194daae (Mel Gorman              2017-08-02 13:31:52 -0700  635) 	mm->tlb_flush_batched = true;
3ea277194daae (Mel Gorman              2017-08-02 13:31:52 -0700  636) 
d950c9477d51f (Mel Gorman              2015-09-04 15:47:35 -0700  637) 	/*
d950c9477d51f (Mel Gorman              2015-09-04 15:47:35 -0700  638) 	 * If the PTE was dirty then it's best to assume it's writable. The
d950c9477d51f (Mel Gorman              2015-09-04 15:47:35 -0700  639) 	 * caller must use try_to_unmap_flush_dirty() or try_to_unmap_flush()
d950c9477d51f (Mel Gorman              2015-09-04 15:47:35 -0700  640) 	 * before the page is queued for IO.
d950c9477d51f (Mel Gorman              2015-09-04 15:47:35 -0700  641) 	 */
d950c9477d51f (Mel Gorman              2015-09-04 15:47:35 -0700  642) 	if (writable)
d950c9477d51f (Mel Gorman              2015-09-04 15:47:35 -0700  643) 		tlb_ubc->writable = true;
72b252aed506b (Mel Gorman              2015-09-04 15:47:32 -0700  644) }
72b252aed506b (Mel Gorman              2015-09-04 15:47:32 -0700  645) 
72b252aed506b (Mel Gorman              2015-09-04 15:47:32 -0700  646) /*
72b252aed506b (Mel Gorman              2015-09-04 15:47:32 -0700  647)  * Returns true if the TLB flush should be deferred to the end of a batch of
72b252aed506b (Mel Gorman              2015-09-04 15:47:32 -0700  648)  * unmap operations to reduce IPIs.
72b252aed506b (Mel Gorman              2015-09-04 15:47:32 -0700  649)  */
72b252aed506b (Mel Gorman              2015-09-04 15:47:32 -0700  650) static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
72b252aed506b (Mel Gorman              2015-09-04 15:47:32 -0700  651) {
72b252aed506b (Mel Gorman              2015-09-04 15:47:32 -0700  652) 	bool should_defer = false;
72b252aed506b (Mel Gorman              2015-09-04 15:47:32 -0700  653) 
72b252aed506b (Mel Gorman              2015-09-04 15:47:32 -0700  654) 	if (!(flags & TTU_BATCH_FLUSH))
72b252aed506b (Mel Gorman              2015-09-04 15:47:32 -0700  655) 		return false;
72b252aed506b (Mel Gorman              2015-09-04 15:47:32 -0700  656) 
72b252aed506b (Mel Gorman              2015-09-04 15:47:32 -0700  657) 	/* If remote CPUs need to be flushed then defer batch the flush */
72b252aed506b (Mel Gorman              2015-09-04 15:47:32 -0700  658) 	if (cpumask_any_but(mm_cpumask(mm), get_cpu()) < nr_cpu_ids)
72b252aed506b (Mel Gorman              2015-09-04 15:47:32 -0700  659) 		should_defer = true;
72b252aed506b (Mel Gorman              2015-09-04 15:47:32 -0700  660) 	put_cpu();
72b252aed506b (Mel Gorman              2015-09-04 15:47:32 -0700  661) 
72b252aed506b (Mel Gorman              2015-09-04 15:47:32 -0700  662) 	return should_defer;
72b252aed506b (Mel Gorman              2015-09-04 15:47:32 -0700  663) }
3ea277194daae (Mel Gorman              2017-08-02 13:31:52 -0700  664) 
3ea277194daae (Mel Gorman              2017-08-02 13:31:52 -0700  665) /*
3ea277194daae (Mel Gorman              2017-08-02 13:31:52 -0700  666)  * Reclaim unmaps pages under the PTL but do not flush the TLB prior to
3ea277194daae (Mel Gorman              2017-08-02 13:31:52 -0700  667)  * releasing the PTL if TLB flushes are batched. It's possible for a parallel
3ea277194daae (Mel Gorman              2017-08-02 13:31:52 -0700  668)  * operation such as mprotect or munmap to race between reclaim unmapping
3ea277194daae (Mel Gorman              2017-08-02 13:31:52 -0700  669)  * the page and flushing the page. If this race occurs, it potentially allows
3ea277194daae (Mel Gorman              2017-08-02 13:31:52 -0700  670)  * access to data via a stale TLB entry. Tracking all mm's that have TLB
3ea277194daae (Mel Gorman              2017-08-02 13:31:52 -0700  671)  * batching in flight would be expensive during reclaim so instead track
3ea277194daae (Mel Gorman              2017-08-02 13:31:52 -0700  672)  * whether TLB batching occurred in the past and if so then do a flush here
3ea277194daae (Mel Gorman              2017-08-02 13:31:52 -0700  673)  * if required. This will cost one additional flush per reclaim cycle paid
3ea277194daae (Mel Gorman              2017-08-02 13:31:52 -0700  674)  * by the first operation at risk such as mprotect and mumap.
3ea277194daae (Mel Gorman              2017-08-02 13:31:52 -0700  675)  *
3ea277194daae (Mel Gorman              2017-08-02 13:31:52 -0700  676)  * This must be called under the PTL so that an access to tlb_flush_batched
3ea277194daae (Mel Gorman              2017-08-02 13:31:52 -0700  677)  * that is potentially a "reclaim vs mprotect/munmap/etc" race will synchronise
3ea277194daae (Mel Gorman              2017-08-02 13:31:52 -0700  678)  * via the PTL.
3ea277194daae (Mel Gorman              2017-08-02 13:31:52 -0700  679)  */
3ea277194daae (Mel Gorman              2017-08-02 13:31:52 -0700  680) void flush_tlb_batched_pending(struct mm_struct *mm)
3ea277194daae (Mel Gorman              2017-08-02 13:31:52 -0700  681) {
9c1177b62a8c7 (Qian Cai                2020-08-14 17:31:47 -0700  682) 	if (data_race(mm->tlb_flush_batched)) {
3ea277194daae (Mel Gorman              2017-08-02 13:31:52 -0700  683) 		flush_tlb_mm(mm);
3ea277194daae (Mel Gorman              2017-08-02 13:31:52 -0700  684) 
3ea277194daae (Mel Gorman              2017-08-02 13:31:52 -0700  685) 		/*
3ea277194daae (Mel Gorman              2017-08-02 13:31:52 -0700  686) 		 * Do not allow the compiler to re-order the clearing of
3ea277194daae (Mel Gorman              2017-08-02 13:31:52 -0700  687) 		 * tlb_flush_batched before the tlb is flushed.
3ea277194daae (Mel Gorman              2017-08-02 13:31:52 -0700  688) 		 */
3ea277194daae (Mel Gorman              2017-08-02 13:31:52 -0700  689) 		barrier();
3ea277194daae (Mel Gorman              2017-08-02 13:31:52 -0700  690) 		mm->tlb_flush_batched = false;
3ea277194daae (Mel Gorman              2017-08-02 13:31:52 -0700  691) 	}
3ea277194daae (Mel Gorman              2017-08-02 13:31:52 -0700  692) }
72b252aed506b (Mel Gorman              2015-09-04 15:47:32 -0700  693) #else
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800  694) static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable)
72b252aed506b (Mel Gorman              2015-09-04 15:47:32 -0700  695) {
72b252aed506b (Mel Gorman              2015-09-04 15:47:32 -0700  696) }
72b252aed506b (Mel Gorman              2015-09-04 15:47:32 -0700  697) 
72b252aed506b (Mel Gorman              2015-09-04 15:47:32 -0700  698) static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
72b252aed506b (Mel Gorman              2015-09-04 15:47:32 -0700  699) {
72b252aed506b (Mel Gorman              2015-09-04 15:47:32 -0700  700) 	return false;
72b252aed506b (Mel Gorman              2015-09-04 15:47:32 -0700  701) }
72b252aed506b (Mel Gorman              2015-09-04 15:47:32 -0700  702) #endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */
72b252aed506b (Mel Gorman              2015-09-04 15:47:32 -0700  703) 
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  704) /*
bf89c8c867322 (Huang Shijie            2009-10-01 15:44:04 -0700  705)  * At what user virtual address is page expected in vma?
ab941e0fff394 (Naoya Horiguchi         2010-05-11 14:06:55 -0700  706)  * Caller should check the page is actually part of the vma.
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  707)  */
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  708) unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  709) {
21d0d443cdc16 (Andrea Arcangeli        2010-08-09 17:19:10 -0700  710) 	if (PageAnon(page)) {
4829b906cc063 (Hugh Dickins            2010-10-02 17:46:06 -0700  711) 		struct anon_vma *page__anon_vma = page_anon_vma(page);
4829b906cc063 (Hugh Dickins            2010-10-02 17:46:06 -0700  712) 		/*
4829b906cc063 (Hugh Dickins            2010-10-02 17:46:06 -0700  713) 		 * Note: swapoff's unuse_vma() is more efficient with this
4829b906cc063 (Hugh Dickins            2010-10-02 17:46:06 -0700  714) 		 * check, and needs it to match anon_vma when KSM is active.
4829b906cc063 (Hugh Dickins            2010-10-02 17:46:06 -0700  715) 		 */
4829b906cc063 (Hugh Dickins            2010-10-02 17:46:06 -0700  716) 		if (!vma->anon_vma || !page__anon_vma ||
4829b906cc063 (Hugh Dickins            2010-10-02 17:46:06 -0700  717) 		    vma->anon_vma->root != page__anon_vma->root)
21d0d443cdc16 (Andrea Arcangeli        2010-08-09 17:19:10 -0700  718) 			return -EFAULT;
31657170deaf1 (Jue Wang                2021-06-15 18:24:00 -0700  719) 	} else if (!vma->vm_file) {
31657170deaf1 (Jue Wang                2021-06-15 18:24:00 -0700  720) 		return -EFAULT;
31657170deaf1 (Jue Wang                2021-06-15 18:24:00 -0700  721) 	} else if (vma->vm_file->f_mapping != compound_head(page)->mapping) {
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  722) 		return -EFAULT;
31657170deaf1 (Jue Wang                2021-06-15 18:24:00 -0700  723) 	}
494334e43c16d (Hugh Dickins            2021-06-15 18:23:56 -0700  724) 
494334e43c16d (Hugh Dickins            2021-06-15 18:23:56 -0700  725) 	return vma_address(page, vma);
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  726) }
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  727) 
6219049ae1ce3 (Bob Liu                 2012-12-11 16:00:37 -0800  728) pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address)
6219049ae1ce3 (Bob Liu                 2012-12-11 16:00:37 -0800  729) {
6219049ae1ce3 (Bob Liu                 2012-12-11 16:00:37 -0800  730) 	pgd_t *pgd;
c2febafc67734 (Kirill A. Shutemov      2017-03-09 17:24:07 +0300  731) 	p4d_t *p4d;
6219049ae1ce3 (Bob Liu                 2012-12-11 16:00:37 -0800  732) 	pud_t *pud;
6219049ae1ce3 (Bob Liu                 2012-12-11 16:00:37 -0800  733) 	pmd_t *pmd = NULL;
f72e7dcdd2522 (Hugh Dickins            2014-06-23 13:22:05 -0700  734) 	pmd_t pmde;
6219049ae1ce3 (Bob Liu                 2012-12-11 16:00:37 -0800  735) 
6219049ae1ce3 (Bob Liu                 2012-12-11 16:00:37 -0800  736) 	pgd = pgd_offset(mm, address);
6219049ae1ce3 (Bob Liu                 2012-12-11 16:00:37 -0800  737) 	if (!pgd_present(*pgd))
6219049ae1ce3 (Bob Liu                 2012-12-11 16:00:37 -0800  738) 		goto out;
6219049ae1ce3 (Bob Liu                 2012-12-11 16:00:37 -0800  739) 
c2febafc67734 (Kirill A. Shutemov      2017-03-09 17:24:07 +0300  740) 	p4d = p4d_offset(pgd, address);
c2febafc67734 (Kirill A. Shutemov      2017-03-09 17:24:07 +0300  741) 	if (!p4d_present(*p4d))
c2febafc67734 (Kirill A. Shutemov      2017-03-09 17:24:07 +0300  742) 		goto out;
c2febafc67734 (Kirill A. Shutemov      2017-03-09 17:24:07 +0300  743) 
c2febafc67734 (Kirill A. Shutemov      2017-03-09 17:24:07 +0300  744) 	pud = pud_offset(p4d, address);
6219049ae1ce3 (Bob Liu                 2012-12-11 16:00:37 -0800  745) 	if (!pud_present(*pud))
6219049ae1ce3 (Bob Liu                 2012-12-11 16:00:37 -0800  746) 		goto out;
6219049ae1ce3 (Bob Liu                 2012-12-11 16:00:37 -0800  747) 
6219049ae1ce3 (Bob Liu                 2012-12-11 16:00:37 -0800  748) 	pmd = pmd_offset(pud, address);
f72e7dcdd2522 (Hugh Dickins            2014-06-23 13:22:05 -0700  749) 	/*
8809aa2d28d74 (Aneesh Kumar K.V        2015-06-24 16:57:44 -0700  750) 	 * Some THP functions use the sequence pmdp_huge_clear_flush(), set_pmd_at()
f72e7dcdd2522 (Hugh Dickins            2014-06-23 13:22:05 -0700  751) 	 * without holding anon_vma lock for write.  So when looking for a
f72e7dcdd2522 (Hugh Dickins            2014-06-23 13:22:05 -0700  752) 	 * genuine pmde (in which to find pte), test present and !THP together.
f72e7dcdd2522 (Hugh Dickins            2014-06-23 13:22:05 -0700  753) 	 */
e37c698270633 (Christian Borntraeger   2014-12-07 21:41:33 +0100  754) 	pmde = *pmd;
e37c698270633 (Christian Borntraeger   2014-12-07 21:41:33 +0100  755) 	barrier();
f72e7dcdd2522 (Hugh Dickins            2014-06-23 13:22:05 -0700  756) 	if (!pmd_present(pmde) || pmd_trans_huge(pmde))
6219049ae1ce3 (Bob Liu                 2012-12-11 16:00:37 -0800  757) 		pmd = NULL;
6219049ae1ce3 (Bob Liu                 2012-12-11 16:00:37 -0800  758) out:
6219049ae1ce3 (Bob Liu                 2012-12-11 16:00:37 -0800  759) 	return pmd;
6219049ae1ce3 (Bob Liu                 2012-12-11 16:00:37 -0800  760) }
6219049ae1ce3 (Bob Liu                 2012-12-11 16:00:37 -0800  761) 
8749cfea11f3f (Vladimir Davydov        2016-01-15 16:54:45 -0800  762) struct page_referenced_arg {
8749cfea11f3f (Vladimir Davydov        2016-01-15 16:54:45 -0800  763) 	int mapcount;
8749cfea11f3f (Vladimir Davydov        2016-01-15 16:54:45 -0800  764) 	int referenced;
8749cfea11f3f (Vladimir Davydov        2016-01-15 16:54:45 -0800  765) 	unsigned long vm_flags;
8749cfea11f3f (Vladimir Davydov        2016-01-15 16:54:45 -0800  766) 	struct mem_cgroup *memcg;
8749cfea11f3f (Vladimir Davydov        2016-01-15 16:54:45 -0800  767) };
8749cfea11f3f (Vladimir Davydov        2016-01-15 16:54:45 -0800  768) /*
8749cfea11f3f (Vladimir Davydov        2016-01-15 16:54:45 -0800  769)  * arg: page_referenced_arg will be passed
8749cfea11f3f (Vladimir Davydov        2016-01-15 16:54:45 -0800  770)  */
e4b82222712ed (Minchan Kim             2017-05-03 14:54:27 -0700  771) static bool page_referenced_one(struct page *page, struct vm_area_struct *vma,
8749cfea11f3f (Vladimir Davydov        2016-01-15 16:54:45 -0800  772) 			unsigned long address, void *arg)
8749cfea11f3f (Vladimir Davydov        2016-01-15 16:54:45 -0800  773) {
8749cfea11f3f (Vladimir Davydov        2016-01-15 16:54:45 -0800  774) 	struct page_referenced_arg *pra = arg;
8eaedede825a0 (Kirill A. Shutemov      2017-02-24 14:57:48 -0800  775) 	struct page_vma_mapped_walk pvmw = {
8eaedede825a0 (Kirill A. Shutemov      2017-02-24 14:57:48 -0800  776) 		.page = page,
8eaedede825a0 (Kirill A. Shutemov      2017-02-24 14:57:48 -0800  777) 		.vma = vma,
8eaedede825a0 (Kirill A. Shutemov      2017-02-24 14:57:48 -0800  778) 		.address = address,
8eaedede825a0 (Kirill A. Shutemov      2017-02-24 14:57:48 -0800  779) 	};
8749cfea11f3f (Vladimir Davydov        2016-01-15 16:54:45 -0800  780) 	int referenced = 0;
8749cfea11f3f (Vladimir Davydov        2016-01-15 16:54:45 -0800  781) 
8eaedede825a0 (Kirill A. Shutemov      2017-02-24 14:57:48 -0800  782) 	while (page_vma_mapped_walk(&pvmw)) {
8eaedede825a0 (Kirill A. Shutemov      2017-02-24 14:57:48 -0800  783) 		address = pvmw.address;
b20ce5e03b936 (Kirill A. Shutemov      2016-01-15 16:54:37 -0800  784) 
8eaedede825a0 (Kirill A. Shutemov      2017-02-24 14:57:48 -0800  785) 		if (vma->vm_flags & VM_LOCKED) {
8eaedede825a0 (Kirill A. Shutemov      2017-02-24 14:57:48 -0800  786) 			page_vma_mapped_walk_done(&pvmw);
8eaedede825a0 (Kirill A. Shutemov      2017-02-24 14:57:48 -0800  787) 			pra->vm_flags |= VM_LOCKED;
e4b82222712ed (Minchan Kim             2017-05-03 14:54:27 -0700  788) 			return false; /* To break the loop */
8eaedede825a0 (Kirill A. Shutemov      2017-02-24 14:57:48 -0800  789) 		}
71e3aac0724ff (Andrea Arcangeli        2011-01-13 15:46:52 -0800  790) 
8eaedede825a0 (Kirill A. Shutemov      2017-02-24 14:57:48 -0800  791) 		if (pvmw.pte) {
8eaedede825a0 (Kirill A. Shutemov      2017-02-24 14:57:48 -0800  792) 			if (ptep_clear_flush_young_notify(vma, address,
8eaedede825a0 (Kirill A. Shutemov      2017-02-24 14:57:48 -0800  793) 						pvmw.pte)) {
8eaedede825a0 (Kirill A. Shutemov      2017-02-24 14:57:48 -0800  794) 				/*
8eaedede825a0 (Kirill A. Shutemov      2017-02-24 14:57:48 -0800  795) 				 * Don't treat a reference through
8eaedede825a0 (Kirill A. Shutemov      2017-02-24 14:57:48 -0800  796) 				 * a sequentially read mapping as such.
8eaedede825a0 (Kirill A. Shutemov      2017-02-24 14:57:48 -0800  797) 				 * If the page has been used in another mapping,
8eaedede825a0 (Kirill A. Shutemov      2017-02-24 14:57:48 -0800  798) 				 * we will catch it; if this other mapping is
8eaedede825a0 (Kirill A. Shutemov      2017-02-24 14:57:48 -0800  799) 				 * already gone, the unmap path will have set
8eaedede825a0 (Kirill A. Shutemov      2017-02-24 14:57:48 -0800  800) 				 * PG_referenced or activated the page.
8eaedede825a0 (Kirill A. Shutemov      2017-02-24 14:57:48 -0800  801) 				 */
8eaedede825a0 (Kirill A. Shutemov      2017-02-24 14:57:48 -0800  802) 				if (likely(!(vma->vm_flags & VM_SEQ_READ)))
8eaedede825a0 (Kirill A. Shutemov      2017-02-24 14:57:48 -0800  803) 					referenced++;
8eaedede825a0 (Kirill A. Shutemov      2017-02-24 14:57:48 -0800  804) 			}
8eaedede825a0 (Kirill A. Shutemov      2017-02-24 14:57:48 -0800  805) 		} else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
8eaedede825a0 (Kirill A. Shutemov      2017-02-24 14:57:48 -0800  806) 			if (pmdp_clear_flush_young_notify(vma, address,
8eaedede825a0 (Kirill A. Shutemov      2017-02-24 14:57:48 -0800  807) 						pvmw.pmd))
8749cfea11f3f (Vladimir Davydov        2016-01-15 16:54:45 -0800  808) 				referenced++;
8eaedede825a0 (Kirill A. Shutemov      2017-02-24 14:57:48 -0800  809) 		} else {
8eaedede825a0 (Kirill A. Shutemov      2017-02-24 14:57:48 -0800  810) 			/* unexpected pmd-mapped page? */
8eaedede825a0 (Kirill A. Shutemov      2017-02-24 14:57:48 -0800  811) 			WARN_ON_ONCE(1);
8749cfea11f3f (Vladimir Davydov        2016-01-15 16:54:45 -0800  812) 		}
8eaedede825a0 (Kirill A. Shutemov      2017-02-24 14:57:48 -0800  813) 
8eaedede825a0 (Kirill A. Shutemov      2017-02-24 14:57:48 -0800  814) 		pra->mapcount--;
b20ce5e03b936 (Kirill A. Shutemov      2016-01-15 16:54:37 -0800  815) 	}
b20ce5e03b936 (Kirill A. Shutemov      2016-01-15 16:54:37 -0800  816) 
33c3fc71c8cfa (Vladimir Davydov        2015-09-09 15:35:45 -0700  817) 	if (referenced)
33c3fc71c8cfa (Vladimir Davydov        2015-09-09 15:35:45 -0700  818) 		clear_page_idle(page);
33c3fc71c8cfa (Vladimir Davydov        2015-09-09 15:35:45 -0700  819) 	if (test_and_clear_page_young(page))
33c3fc71c8cfa (Vladimir Davydov        2015-09-09 15:35:45 -0700  820) 		referenced++;
33c3fc71c8cfa (Vladimir Davydov        2015-09-09 15:35:45 -0700  821) 
9f32624be9435 (Joonsoo Kim             2014-01-21 15:49:53 -0800  822) 	if (referenced) {
9f32624be9435 (Joonsoo Kim             2014-01-21 15:49:53 -0800  823) 		pra->referenced++;
9f32624be9435 (Joonsoo Kim             2014-01-21 15:49:53 -0800  824) 		pra->vm_flags |= vma->vm_flags;
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  825) 	}
34bbd704051c9 (Oleg Nesterov           2007-02-28 20:13:49 -0800  826) 
9f32624be9435 (Joonsoo Kim             2014-01-21 15:49:53 -0800  827) 	if (!pra->mapcount)
e4b82222712ed (Minchan Kim             2017-05-03 14:54:27 -0700  828) 		return false; /* To break the loop */
9f32624be9435 (Joonsoo Kim             2014-01-21 15:49:53 -0800  829) 
e4b82222712ed (Minchan Kim             2017-05-03 14:54:27 -0700  830) 	return true;
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  831) }
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  832) 
9f32624be9435 (Joonsoo Kim             2014-01-21 15:49:53 -0800  833) static bool invalid_page_referenced_vma(struct vm_area_struct *vma, void *arg)
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  834) {
9f32624be9435 (Joonsoo Kim             2014-01-21 15:49:53 -0800  835) 	struct page_referenced_arg *pra = arg;
9f32624be9435 (Joonsoo Kim             2014-01-21 15:49:53 -0800  836) 	struct mem_cgroup *memcg = pra->memcg;
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  837) 
9f32624be9435 (Joonsoo Kim             2014-01-21 15:49:53 -0800  838) 	if (!mm_match_cgroup(vma->vm_mm, memcg))
9f32624be9435 (Joonsoo Kim             2014-01-21 15:49:53 -0800  839) 		return true;
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  840) 
9f32624be9435 (Joonsoo Kim             2014-01-21 15:49:53 -0800  841) 	return false;
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  842) }
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  843) 
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  844) /**
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  845)  * page_referenced - test if the page was referenced
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  846)  * @page: the page to test
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  847)  * @is_locked: caller holds lock on the page
72835c86ca15d (Johannes Weiner         2012-01-12 17:18:32 -0800  848)  * @memcg: target memory cgroup
6fe6b7e35785e (Wu Fengguang            2009-06-16 15:33:05 -0700  849)  * @vm_flags: collect encountered vma->vm_flags who actually referenced the page
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  850)  *
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  851)  * Quick test_and_clear_referenced for all mappings to a page,
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  852)  * returns the number of ptes which referenced the page.
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  853)  */
6fe6b7e35785e (Wu Fengguang            2009-06-16 15:33:05 -0700  854) int page_referenced(struct page *page,
6fe6b7e35785e (Wu Fengguang            2009-06-16 15:33:05 -0700  855) 		    int is_locked,
72835c86ca15d (Johannes Weiner         2012-01-12 17:18:32 -0800  856) 		    struct mem_cgroup *memcg,
6fe6b7e35785e (Wu Fengguang            2009-06-16 15:33:05 -0700  857) 		    unsigned long *vm_flags)
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  858) {
5ad6468801d28 (Hugh Dickins            2009-12-14 17:59:24 -0800  859) 	int we_locked = 0;
9f32624be9435 (Joonsoo Kim             2014-01-21 15:49:53 -0800  860) 	struct page_referenced_arg pra = {
b20ce5e03b936 (Kirill A. Shutemov      2016-01-15 16:54:37 -0800  861) 		.mapcount = total_mapcount(page),
9f32624be9435 (Joonsoo Kim             2014-01-21 15:49:53 -0800  862) 		.memcg = memcg,
9f32624be9435 (Joonsoo Kim             2014-01-21 15:49:53 -0800  863) 	};
9f32624be9435 (Joonsoo Kim             2014-01-21 15:49:53 -0800  864) 	struct rmap_walk_control rwc = {
9f32624be9435 (Joonsoo Kim             2014-01-21 15:49:53 -0800  865) 		.rmap_one = page_referenced_one,
9f32624be9435 (Joonsoo Kim             2014-01-21 15:49:53 -0800  866) 		.arg = (void *)&pra,
9f32624be9435 (Joonsoo Kim             2014-01-21 15:49:53 -0800  867) 		.anon_lock = page_lock_anon_vma_read,
9f32624be9435 (Joonsoo Kim             2014-01-21 15:49:53 -0800  868) 	};
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  869) 
6fe6b7e35785e (Wu Fengguang            2009-06-16 15:33:05 -0700  870) 	*vm_flags = 0;
059d8442ea77d (Huang Shijie            2019-05-13 17:21:07 -0700  871) 	if (!pra.mapcount)
9f32624be9435 (Joonsoo Kim             2014-01-21 15:49:53 -0800  872) 		return 0;
9f32624be9435 (Joonsoo Kim             2014-01-21 15:49:53 -0800  873) 
9f32624be9435 (Joonsoo Kim             2014-01-21 15:49:53 -0800  874) 	if (!page_rmapping(page))
9f32624be9435 (Joonsoo Kim             2014-01-21 15:49:53 -0800  875) 		return 0;
9f32624be9435 (Joonsoo Kim             2014-01-21 15:49:53 -0800  876) 
9f32624be9435 (Joonsoo Kim             2014-01-21 15:49:53 -0800  877) 	if (!is_locked && (!PageAnon(page) || PageKsm(page))) {
9f32624be9435 (Joonsoo Kim             2014-01-21 15:49:53 -0800  878) 		we_locked = trylock_page(page);
9f32624be9435 (Joonsoo Kim             2014-01-21 15:49:53 -0800  879) 		if (!we_locked)
9f32624be9435 (Joonsoo Kim             2014-01-21 15:49:53 -0800  880) 			return 1;
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  881) 	}
9f32624be9435 (Joonsoo Kim             2014-01-21 15:49:53 -0800  882) 
9f32624be9435 (Joonsoo Kim             2014-01-21 15:49:53 -0800  883) 	/*
9f32624be9435 (Joonsoo Kim             2014-01-21 15:49:53 -0800  884) 	 * If we are reclaiming on behalf of a cgroup, skip
9f32624be9435 (Joonsoo Kim             2014-01-21 15:49:53 -0800  885) 	 * counting on behalf of references from different
9f32624be9435 (Joonsoo Kim             2014-01-21 15:49:53 -0800  886) 	 * cgroups
9f32624be9435 (Joonsoo Kim             2014-01-21 15:49:53 -0800  887) 	 */
9f32624be9435 (Joonsoo Kim             2014-01-21 15:49:53 -0800  888) 	if (memcg) {
9f32624be9435 (Joonsoo Kim             2014-01-21 15:49:53 -0800  889) 		rwc.invalid_vma = invalid_page_referenced_vma;
9f32624be9435 (Joonsoo Kim             2014-01-21 15:49:53 -0800  890) 	}
9f32624be9435 (Joonsoo Kim             2014-01-21 15:49:53 -0800  891) 
c24f386c60b22 (Minchan Kim             2017-05-03 14:54:00 -0700  892) 	rmap_walk(page, &rwc);
9f32624be9435 (Joonsoo Kim             2014-01-21 15:49:53 -0800  893) 	*vm_flags = pra.vm_flags;
9f32624be9435 (Joonsoo Kim             2014-01-21 15:49:53 -0800  894) 
9f32624be9435 (Joonsoo Kim             2014-01-21 15:49:53 -0800  895) 	if (we_locked)
9f32624be9435 (Joonsoo Kim             2014-01-21 15:49:53 -0800  896) 		unlock_page(page);
9f32624be9435 (Joonsoo Kim             2014-01-21 15:49:53 -0800  897) 
9f32624be9435 (Joonsoo Kim             2014-01-21 15:49:53 -0800  898) 	return pra.referenced;
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  899) }
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700  900) 
e4b82222712ed (Minchan Kim             2017-05-03 14:54:27 -0700  901) static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
9853a407b97d8 (Joonsoo Kim             2014-01-21 15:49:55 -0800  902) 			    unsigned long address, void *arg)
d08b3851da41d (Peter Zijlstra          2006-09-25 23:30:57 -0700  903) {
f27176cfc363d (Kirill A. Shutemov      2017-02-24 14:57:57 -0800  904) 	struct page_vma_mapped_walk pvmw = {
f27176cfc363d (Kirill A. Shutemov      2017-02-24 14:57:57 -0800  905) 		.page = page,
f27176cfc363d (Kirill A. Shutemov      2017-02-24 14:57:57 -0800  906) 		.vma = vma,
f27176cfc363d (Kirill A. Shutemov      2017-02-24 14:57:57 -0800  907) 		.address = address,
f27176cfc363d (Kirill A. Shutemov      2017-02-24 14:57:57 -0800  908) 		.flags = PVMW_SYNC,
f27176cfc363d (Kirill A. Shutemov      2017-02-24 14:57:57 -0800  909) 	};
ac46d4f3c4324 (Jérôme Glisse           2018-12-28 00:38:09 -0800  910) 	struct mmu_notifier_range range;
9853a407b97d8 (Joonsoo Kim             2014-01-21 15:49:55 -0800  911) 	int *cleaned = arg;
d08b3851da41d (Peter Zijlstra          2006-09-25 23:30:57 -0700  912) 
369ea8242c0fb (Jérôme Glisse           2017-08-31 17:17:27 -0400  913) 	/*
369ea8242c0fb (Jérôme Glisse           2017-08-31 17:17:27 -0400  914) 	 * We have to assume the worse case ie pmd for invalidation. Note that
369ea8242c0fb (Jérôme Glisse           2017-08-31 17:17:27 -0400  915) 	 * the page can not be free from this function.
369ea8242c0fb (Jérôme Glisse           2017-08-31 17:17:27 -0400  916) 	 */
7269f999934b2 (Jérôme Glisse           2019-05-13 17:20:53 -0700  917) 	mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE,
7269f999934b2 (Jérôme Glisse           2019-05-13 17:20:53 -0700  918) 				0, vma, vma->vm_mm, address,
494334e43c16d (Hugh Dickins            2021-06-15 18:23:56 -0700  919) 				vma_address_end(page, vma));
ac46d4f3c4324 (Jérôme Glisse           2018-12-28 00:38:09 -0800  920) 	mmu_notifier_invalidate_range_start(&range);
369ea8242c0fb (Jérôme Glisse           2017-08-31 17:17:27 -0400  921) 
f27176cfc363d (Kirill A. Shutemov      2017-02-24 14:57:57 -0800  922) 	while (page_vma_mapped_walk(&pvmw)) {
f27176cfc363d (Kirill A. Shutemov      2017-02-24 14:57:57 -0800  923) 		int ret = 0;
369ea8242c0fb (Jérôme Glisse           2017-08-31 17:17:27 -0400  924) 
1f18b296699c8 (YueHaibing              2019-09-23 15:34:22 -0700  925) 		address = pvmw.address;
f27176cfc363d (Kirill A. Shutemov      2017-02-24 14:57:57 -0800  926) 		if (pvmw.pte) {
f27176cfc363d (Kirill A. Shutemov      2017-02-24 14:57:57 -0800  927) 			pte_t entry;
f27176cfc363d (Kirill A. Shutemov      2017-02-24 14:57:57 -0800  928) 			pte_t *pte = pvmw.pte;
f27176cfc363d (Kirill A. Shutemov      2017-02-24 14:57:57 -0800  929) 
f27176cfc363d (Kirill A. Shutemov      2017-02-24 14:57:57 -0800  930) 			if (!pte_dirty(*pte) && !pte_write(*pte))
f27176cfc363d (Kirill A. Shutemov      2017-02-24 14:57:57 -0800  931) 				continue;
f27176cfc363d (Kirill A. Shutemov      2017-02-24 14:57:57 -0800  932) 
785373b4c3871 (Linus Torvalds          2017-08-29 09:11:06 -0700  933) 			flush_cache_page(vma, address, pte_pfn(*pte));
785373b4c3871 (Linus Torvalds          2017-08-29 09:11:06 -0700  934) 			entry = ptep_clear_flush(vma, address, pte);
f27176cfc363d (Kirill A. Shutemov      2017-02-24 14:57:57 -0800  935) 			entry = pte_wrprotect(entry);
f27176cfc363d (Kirill A. Shutemov      2017-02-24 14:57:57 -0800  936) 			entry = pte_mkclean(entry);
785373b4c3871 (Linus Torvalds          2017-08-29 09:11:06 -0700  937) 			set_pte_at(vma->vm_mm, address, pte, entry);
f27176cfc363d (Kirill A. Shutemov      2017-02-24 14:57:57 -0800  938) 			ret = 1;
f27176cfc363d (Kirill A. Shutemov      2017-02-24 14:57:57 -0800  939) 		} else {
396bcc5299c28 (Matthew Wilcox (Oracle) 2020-04-06 20:04:35 -0700  940) #ifdef CONFIG_TRANSPARENT_HUGEPAGE
f27176cfc363d (Kirill A. Shutemov      2017-02-24 14:57:57 -0800  941) 			pmd_t *pmd = pvmw.pmd;
f27176cfc363d (Kirill A. Shutemov      2017-02-24 14:57:57 -0800  942) 			pmd_t entry;
f27176cfc363d (Kirill A. Shutemov      2017-02-24 14:57:57 -0800  943) 
f27176cfc363d (Kirill A. Shutemov      2017-02-24 14:57:57 -0800  944) 			if (!pmd_dirty(*pmd) && !pmd_write(*pmd))
f27176cfc363d (Kirill A. Shutemov      2017-02-24 14:57:57 -0800  945) 				continue;
f27176cfc363d (Kirill A. Shutemov      2017-02-24 14:57:57 -0800  946) 
785373b4c3871 (Linus Torvalds          2017-08-29 09:11:06 -0700  947) 			flush_cache_page(vma, address, page_to_pfn(page));
024eee0e83f0d (Aneesh Kumar K.V        2019-05-13 17:19:11 -0700  948) 			entry = pmdp_invalidate(vma, address, pmd);
f27176cfc363d (Kirill A. Shutemov      2017-02-24 14:57:57 -0800  949) 			entry = pmd_wrprotect(entry);
f27176cfc363d (Kirill A. Shutemov      2017-02-24 14:57:57 -0800  950) 			entry = pmd_mkclean(entry);
785373b4c3871 (Linus Torvalds          2017-08-29 09:11:06 -0700  951) 			set_pmd_at(vma->vm_mm, address, pmd, entry);
f27176cfc363d (Kirill A. Shutemov      2017-02-24 14:57:57 -0800  952) 			ret = 1;
f27176cfc363d (Kirill A. Shutemov      2017-02-24 14:57:57 -0800  953) #else
f27176cfc363d (Kirill A. Shutemov      2017-02-24 14:57:57 -0800  954) 			/* unexpected pmd-mapped page? */
f27176cfc363d (Kirill A. Shutemov      2017-02-24 14:57:57 -0800  955) 			WARN_ON_ONCE(1);
f27176cfc363d (Kirill A. Shutemov      2017-02-24 14:57:57 -0800  956) #endif
f27176cfc363d (Kirill A. Shutemov      2017-02-24 14:57:57 -0800  957) 		}
d08b3851da41d (Peter Zijlstra          2006-09-25 23:30:57 -0700  958) 
0f10851ea475e (Jérôme Glisse           2017-11-15 17:34:07 -0800  959) 		/*
0f10851ea475e (Jérôme Glisse           2017-11-15 17:34:07 -0800  960) 		 * No need to call mmu_notifier_invalidate_range() as we are
0f10851ea475e (Jérôme Glisse           2017-11-15 17:34:07 -0800  961) 		 * downgrading page table protection not changing it to point
0f10851ea475e (Jérôme Glisse           2017-11-15 17:34:07 -0800  962) 		 * to a new page.
0f10851ea475e (Jérôme Glisse           2017-11-15 17:34:07 -0800  963) 		 *
ad56b738c5dd2 (Mike Rapoport           2018-03-21 21:22:47 +0200  964) 		 * See Documentation/vm/mmu_notifier.rst
0f10851ea475e (Jérôme Glisse           2017-11-15 17:34:07 -0800  965) 		 */
0f10851ea475e (Jérôme Glisse           2017-11-15 17:34:07 -0800  966) 		if (ret)
f27176cfc363d (Kirill A. Shutemov      2017-02-24 14:57:57 -0800  967) 			(*cleaned)++;
c2fda5fed81ee (Peter Zijlstra          2006-12-22 14:25:52 +0100  968) 	}
d08b3851da41d (Peter Zijlstra          2006-09-25 23:30:57 -0700  969) 
ac46d4f3c4324 (Jérôme Glisse           2018-12-28 00:38:09 -0800  970) 	mmu_notifier_invalidate_range_end(&range);
369ea8242c0fb (Jérôme Glisse           2017-08-31 17:17:27 -0400  971) 
e4b82222712ed (Minchan Kim             2017-05-03 14:54:27 -0700  972) 	return true;
d08b3851da41d (Peter Zijlstra          2006-09-25 23:30:57 -0700  973) }
d08b3851da41d (Peter Zijlstra          2006-09-25 23:30:57 -0700  974) 
9853a407b97d8 (Joonsoo Kim             2014-01-21 15:49:55 -0800  975) static bool invalid_mkclean_vma(struct vm_area_struct *vma, void *arg)
d08b3851da41d (Peter Zijlstra          2006-09-25 23:30:57 -0700  976) {
9853a407b97d8 (Joonsoo Kim             2014-01-21 15:49:55 -0800  977) 	if (vma->vm_flags & VM_SHARED)
871beb8c313ab (Fengguang Wu            2014-01-23 15:53:41 -0800  978) 		return false;
d08b3851da41d (Peter Zijlstra          2006-09-25 23:30:57 -0700  979) 
871beb8c313ab (Fengguang Wu            2014-01-23 15:53:41 -0800  980) 	return true;
d08b3851da41d (Peter Zijlstra          2006-09-25 23:30:57 -0700  981) }
d08b3851da41d (Peter Zijlstra          2006-09-25 23:30:57 -0700  982) 
d08b3851da41d (Peter Zijlstra          2006-09-25 23:30:57 -0700  983) int page_mkclean(struct page *page)
d08b3851da41d (Peter Zijlstra          2006-09-25 23:30:57 -0700  984) {
9853a407b97d8 (Joonsoo Kim             2014-01-21 15:49:55 -0800  985) 	int cleaned = 0;
9853a407b97d8 (Joonsoo Kim             2014-01-21 15:49:55 -0800  986) 	struct address_space *mapping;
9853a407b97d8 (Joonsoo Kim             2014-01-21 15:49:55 -0800  987) 	struct rmap_walk_control rwc = {
9853a407b97d8 (Joonsoo Kim             2014-01-21 15:49:55 -0800  988) 		.arg = (void *)&cleaned,
9853a407b97d8 (Joonsoo Kim             2014-01-21 15:49:55 -0800  989) 		.rmap_one = page_mkclean_one,
9853a407b97d8 (Joonsoo Kim             2014-01-21 15:49:55 -0800  990) 		.invalid_vma = invalid_mkclean_vma,
9853a407b97d8 (Joonsoo Kim             2014-01-21 15:49:55 -0800  991) 	};
d08b3851da41d (Peter Zijlstra          2006-09-25 23:30:57 -0700  992) 
d08b3851da41d (Peter Zijlstra          2006-09-25 23:30:57 -0700  993) 	BUG_ON(!PageLocked(page));
d08b3851da41d (Peter Zijlstra          2006-09-25 23:30:57 -0700  994) 
9853a407b97d8 (Joonsoo Kim             2014-01-21 15:49:55 -0800  995) 	if (!page_mapped(page))
9853a407b97d8 (Joonsoo Kim             2014-01-21 15:49:55 -0800  996) 		return 0;
9853a407b97d8 (Joonsoo Kim             2014-01-21 15:49:55 -0800  997) 
9853a407b97d8 (Joonsoo Kim             2014-01-21 15:49:55 -0800  998) 	mapping = page_mapping(page);
9853a407b97d8 (Joonsoo Kim             2014-01-21 15:49:55 -0800  999) 	if (!mapping)
9853a407b97d8 (Joonsoo Kim             2014-01-21 15:49:55 -0800 1000) 		return 0;
9853a407b97d8 (Joonsoo Kim             2014-01-21 15:49:55 -0800 1001) 
9853a407b97d8 (Joonsoo Kim             2014-01-21 15:49:55 -0800 1002) 	rmap_walk(page, &rwc);
d08b3851da41d (Peter Zijlstra          2006-09-25 23:30:57 -0700 1003) 
9853a407b97d8 (Joonsoo Kim             2014-01-21 15:49:55 -0800 1004) 	return cleaned;
d08b3851da41d (Peter Zijlstra          2006-09-25 23:30:57 -0700 1005) }
60b59beafba87 (Jaya Kumar              2007-05-08 00:37:37 -0700 1006) EXPORT_SYMBOL_GPL(page_mkclean);
d08b3851da41d (Peter Zijlstra          2006-09-25 23:30:57 -0700 1007) 
c44b674323f4a (Rik van Riel            2010-03-05 13:42:09 -0800 1008) /**
c44b674323f4a (Rik van Riel            2010-03-05 13:42:09 -0800 1009)  * page_move_anon_rmap - move a page to our anon_vma
c44b674323f4a (Rik van Riel            2010-03-05 13:42:09 -0800 1010)  * @page:	the page to move to our anon_vma
c44b674323f4a (Rik van Riel            2010-03-05 13:42:09 -0800 1011)  * @vma:	the vma the page belongs to
c44b674323f4a (Rik van Riel            2010-03-05 13:42:09 -0800 1012)  *
c44b674323f4a (Rik van Riel            2010-03-05 13:42:09 -0800 1013)  * When a page belongs exclusively to one process after a COW event,
c44b674323f4a (Rik van Riel            2010-03-05 13:42:09 -0800 1014)  * that page can be moved into the anon_vma that belongs to just that
c44b674323f4a (Rik van Riel            2010-03-05 13:42:09 -0800 1015)  * process, so the rmap code will not search the parent or sibling
c44b674323f4a (Rik van Riel            2010-03-05 13:42:09 -0800 1016)  * processes.
c44b674323f4a (Rik van Riel            2010-03-05 13:42:09 -0800 1017)  */
5a49973d7143e (Hugh Dickins            2016-07-14 12:07:38 -0700 1018) void page_move_anon_rmap(struct page *page, struct vm_area_struct *vma)
c44b674323f4a (Rik van Riel            2010-03-05 13:42:09 -0800 1019) {
c44b674323f4a (Rik van Riel            2010-03-05 13:42:09 -0800 1020) 	struct anon_vma *anon_vma = vma->anon_vma;
c44b674323f4a (Rik van Riel            2010-03-05 13:42:09 -0800 1021) 
5a49973d7143e (Hugh Dickins            2016-07-14 12:07:38 -0700 1022) 	page = compound_head(page);
5a49973d7143e (Hugh Dickins            2016-07-14 12:07:38 -0700 1023) 
309381feaee56 (Sasha Levin             2014-01-23 15:52:54 -0800 1024) 	VM_BUG_ON_PAGE(!PageLocked(page), page);
81d1b09c6be66 (Sasha Levin             2014-10-09 15:28:10 -0700 1025) 	VM_BUG_ON_VMA(!anon_vma, vma);
c44b674323f4a (Rik van Riel            2010-03-05 13:42:09 -0800 1026) 
c44b674323f4a (Rik van Riel            2010-03-05 13:42:09 -0800 1027) 	anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
414e2fb8ce5a9 (Vladimir Davydov        2015-06-24 16:56:56 -0700 1028) 	/*
414e2fb8ce5a9 (Vladimir Davydov        2015-06-24 16:56:56 -0700 1029) 	 * Ensure that anon_vma and the PAGE_MAPPING_ANON bit are written
414e2fb8ce5a9 (Vladimir Davydov        2015-06-24 16:56:56 -0700 1030) 	 * simultaneously, so a concurrent reader (eg page_referenced()'s
414e2fb8ce5a9 (Vladimir Davydov        2015-06-24 16:56:56 -0700 1031) 	 * PageAnon()) will not see one without the other.
414e2fb8ce5a9 (Vladimir Davydov        2015-06-24 16:56:56 -0700 1032) 	 */
414e2fb8ce5a9 (Vladimir Davydov        2015-06-24 16:56:56 -0700 1033) 	WRITE_ONCE(page->mapping, (struct address_space *) anon_vma);
c44b674323f4a (Rik van Riel            2010-03-05 13:42:09 -0800 1034) }
c44b674323f4a (Rik van Riel            2010-03-05 13:42:09 -0800 1035) 
9617d95e6e9ff (Nicholas Piggin         2006-01-06 00:11:12 -0800 1036) /**
4e1c19750a899 (Andi Kleen              2010-09-22 12:43:56 +0200 1037)  * __page_set_anon_rmap - set up new anonymous rmap
451b9514a59f3 (Kirill Tkhai            2018-12-28 00:39:31 -0800 1038)  * @page:	Page or Hugepage to add to rmap
4e1c19750a899 (Andi Kleen              2010-09-22 12:43:56 +0200 1039)  * @vma:	VM area to add page to.
4e1c19750a899 (Andi Kleen              2010-09-22 12:43:56 +0200 1040)  * @address:	User virtual address of the mapping	
e8a03feb54ca7 (Rik van Riel            2010-04-14 17:59:28 -0400 1041)  * @exclusive:	the page is exclusively owned by the current process
9617d95e6e9ff (Nicholas Piggin         2006-01-06 00:11:12 -0800 1042)  */
9617d95e6e9ff (Nicholas Piggin         2006-01-06 00:11:12 -0800 1043) static void __page_set_anon_rmap(struct page *page,
e8a03feb54ca7 (Rik van Riel            2010-04-14 17:59:28 -0400 1044) 	struct vm_area_struct *vma, unsigned long address, int exclusive)
9617d95e6e9ff (Nicholas Piggin         2006-01-06 00:11:12 -0800 1045) {
e8a03feb54ca7 (Rik van Riel            2010-04-14 17:59:28 -0400 1046) 	struct anon_vma *anon_vma = vma->anon_vma;
ea90002b0fa7b (Linus Torvalds          2010-04-12 12:44:29 -0700 1047) 
e8a03feb54ca7 (Rik van Riel            2010-04-14 17:59:28 -0400 1048) 	BUG_ON(!anon_vma);
ea90002b0fa7b (Linus Torvalds          2010-04-12 12:44:29 -0700 1049) 
4e1c19750a899 (Andi Kleen              2010-09-22 12:43:56 +0200 1050) 	if (PageAnon(page))
4e1c19750a899 (Andi Kleen              2010-09-22 12:43:56 +0200 1051) 		return;
4e1c19750a899 (Andi Kleen              2010-09-22 12:43:56 +0200 1052) 
ea90002b0fa7b (Linus Torvalds          2010-04-12 12:44:29 -0700 1053) 	/*
e8a03feb54ca7 (Rik van Riel            2010-04-14 17:59:28 -0400 1054) 	 * If the page isn't exclusively mapped into this vma,
e8a03feb54ca7 (Rik van Riel            2010-04-14 17:59:28 -0400 1055) 	 * we must use the _oldest_ possible anon_vma for the
e8a03feb54ca7 (Rik van Riel            2010-04-14 17:59:28 -0400 1056) 	 * page mapping!
ea90002b0fa7b (Linus Torvalds          2010-04-12 12:44:29 -0700 1057) 	 */
4e1c19750a899 (Andi Kleen              2010-09-22 12:43:56 +0200 1058) 	if (!exclusive)
288468c334e98 (Andrea Arcangeli        2010-08-09 17:19:09 -0700 1059) 		anon_vma = anon_vma->root;
9617d95e6e9ff (Nicholas Piggin         2006-01-06 00:11:12 -0800 1060) 
16f5e707d6f6f (Alex Shi                2020-12-15 12:33:42 -0800 1061) 	/*
16f5e707d6f6f (Alex Shi                2020-12-15 12:33:42 -0800 1062) 	 * page_idle does a lockless/optimistic rmap scan on page->mapping.
16f5e707d6f6f (Alex Shi                2020-12-15 12:33:42 -0800 1063) 	 * Make sure the compiler doesn't split the stores of anon_vma and
16f5e707d6f6f (Alex Shi                2020-12-15 12:33:42 -0800 1064) 	 * the PAGE_MAPPING_ANON type identifier, otherwise the rmap code
16f5e707d6f6f (Alex Shi                2020-12-15 12:33:42 -0800 1065) 	 * could mistake the mapping for a struct address_space and crash.
16f5e707d6f6f (Alex Shi                2020-12-15 12:33:42 -0800 1066) 	 */
9617d95e6e9ff (Nicholas Piggin         2006-01-06 00:11:12 -0800 1067) 	anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
16f5e707d6f6f (Alex Shi                2020-12-15 12:33:42 -0800 1068) 	WRITE_ONCE(page->mapping, (struct address_space *) anon_vma);
9617d95e6e9ff (Nicholas Piggin         2006-01-06 00:11:12 -0800 1069) 	page->index = linear_page_index(vma, address);
9617d95e6e9ff (Nicholas Piggin         2006-01-06 00:11:12 -0800 1070) }
9617d95e6e9ff (Nicholas Piggin         2006-01-06 00:11:12 -0800 1071) 
c97a9e10eaee3 (Nicholas Piggin         2007-05-16 22:11:21 -0700 1072) /**
43d8eac44f28d (Randy Dunlap            2008-03-19 17:00:43 -0700 1073)  * __page_check_anon_rmap - sanity check anonymous rmap addition
c97a9e10eaee3 (Nicholas Piggin         2007-05-16 22:11:21 -0700 1074)  * @page:	the page to add the mapping to
c97a9e10eaee3 (Nicholas Piggin         2007-05-16 22:11:21 -0700 1075)  * @vma:	the vm area in which the mapping is added
c97a9e10eaee3 (Nicholas Piggin         2007-05-16 22:11:21 -0700 1076)  * @address:	the user virtual address mapped
c97a9e10eaee3 (Nicholas Piggin         2007-05-16 22:11:21 -0700 1077)  */
c97a9e10eaee3 (Nicholas Piggin         2007-05-16 22:11:21 -0700 1078) static void __page_check_anon_rmap(struct page *page,
c97a9e10eaee3 (Nicholas Piggin         2007-05-16 22:11:21 -0700 1079) 	struct vm_area_struct *vma, unsigned long address)
c97a9e10eaee3 (Nicholas Piggin         2007-05-16 22:11:21 -0700 1080) {
c97a9e10eaee3 (Nicholas Piggin         2007-05-16 22:11:21 -0700 1081) 	/*
c97a9e10eaee3 (Nicholas Piggin         2007-05-16 22:11:21 -0700 1082) 	 * The page's anon-rmap details (mapping and index) are guaranteed to
c97a9e10eaee3 (Nicholas Piggin         2007-05-16 22:11:21 -0700 1083) 	 * be set up correctly at this point.
c97a9e10eaee3 (Nicholas Piggin         2007-05-16 22:11:21 -0700 1084) 	 *
c97a9e10eaee3 (Nicholas Piggin         2007-05-16 22:11:21 -0700 1085) 	 * We have exclusion against page_add_anon_rmap because the caller
90aaca852ca13 (Miaohe Lin              2021-02-25 17:17:59 -0800 1086) 	 * always holds the page locked.
c97a9e10eaee3 (Nicholas Piggin         2007-05-16 22:11:21 -0700 1087) 	 *
c97a9e10eaee3 (Nicholas Piggin         2007-05-16 22:11:21 -0700 1088) 	 * We have exclusion against page_add_new_anon_rmap because those pages
c97a9e10eaee3 (Nicholas Piggin         2007-05-16 22:11:21 -0700 1089) 	 * are initially only visible via the pagetables, and the pte is locked
c97a9e10eaee3 (Nicholas Piggin         2007-05-16 22:11:21 -0700 1090) 	 * over the call to page_add_new_anon_rmap.
c97a9e10eaee3 (Nicholas Piggin         2007-05-16 22:11:21 -0700 1091) 	 */
30c46382855e0 (Yang Shi                2019-11-30 17:51:26 -0800 1092) 	VM_BUG_ON_PAGE(page_anon_vma(page)->root != vma->anon_vma->root, page);
30c46382855e0 (Yang Shi                2019-11-30 17:51:26 -0800 1093) 	VM_BUG_ON_PAGE(page_to_pgoff(page) != linear_page_index(vma, address),
30c46382855e0 (Yang Shi                2019-11-30 17:51:26 -0800 1094) 		       page);
c97a9e10eaee3 (Nicholas Piggin         2007-05-16 22:11:21 -0700 1095) }
c97a9e10eaee3 (Nicholas Piggin         2007-05-16 22:11:21 -0700 1096) 
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700 1097) /**
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700 1098)  * page_add_anon_rmap - add pte mapping to an anonymous page
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700 1099)  * @page:	the page to add the mapping to
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700 1100)  * @vma:	the vm area in which the mapping is added
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700 1101)  * @address:	the user virtual address mapped
d281ee6145183 (Kirill A. Shutemov      2016-01-15 16:52:16 -0800 1102)  * @compound:	charge the page as compound or small page
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700 1103)  *
5ad6468801d28 (Hugh Dickins            2009-12-14 17:59:24 -0800 1104)  * The caller needs to hold the pte lock, and the page must be locked in
80e1482260282 (Hugh Dickins            2009-12-14 17:59:29 -0800 1105)  * the anon_vma case: to serialize mapping,index checking after setting,
80e1482260282 (Hugh Dickins            2009-12-14 17:59:29 -0800 1106)  * and to ensure that PageAnon is not being upgraded racily to PageKsm
80e1482260282 (Hugh Dickins            2009-12-14 17:59:29 -0800 1107)  * (but PageKsm is never downgraded to PageAnon).
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700 1108)  */
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700 1109) void page_add_anon_rmap(struct page *page,
d281ee6145183 (Kirill A. Shutemov      2016-01-15 16:52:16 -0800 1110) 	struct vm_area_struct *vma, unsigned long address, bool compound)
ad8c2ee801ad7 (Rik van Riel            2010-08-09 17:19:48 -0700 1111) {
d281ee6145183 (Kirill A. Shutemov      2016-01-15 16:52:16 -0800 1112) 	do_page_add_anon_rmap(page, vma, address, compound ? RMAP_COMPOUND : 0);
ad8c2ee801ad7 (Rik van Riel            2010-08-09 17:19:48 -0700 1113) }
ad8c2ee801ad7 (Rik van Riel            2010-08-09 17:19:48 -0700 1114) 
ad8c2ee801ad7 (Rik van Riel            2010-08-09 17:19:48 -0700 1115) /*
ad8c2ee801ad7 (Rik van Riel            2010-08-09 17:19:48 -0700 1116)  * Special version of the above for do_swap_page, which often runs
ad8c2ee801ad7 (Rik van Riel            2010-08-09 17:19:48 -0700 1117)  * into pages that are exclusively owned by the current process.
ad8c2ee801ad7 (Rik van Riel            2010-08-09 17:19:48 -0700 1118)  * Everybody else should continue to use page_add_anon_rmap above.
ad8c2ee801ad7 (Rik van Riel            2010-08-09 17:19:48 -0700 1119)  */
ad8c2ee801ad7 (Rik van Riel            2010-08-09 17:19:48 -0700 1120) void do_page_add_anon_rmap(struct page *page,
d281ee6145183 (Kirill A. Shutemov      2016-01-15 16:52:16 -0800 1121) 	struct vm_area_struct *vma, unsigned long address, int flags)
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700 1122) {
53f9263baba69 (Kirill A. Shutemov      2016-01-15 16:53:42 -0800 1123) 	bool compound = flags & RMAP_COMPOUND;
53f9263baba69 (Kirill A. Shutemov      2016-01-15 16:53:42 -0800 1124) 	bool first;
53f9263baba69 (Kirill A. Shutemov      2016-01-15 16:53:42 -0800 1125) 
be5d0a74c62d8 (Johannes Weiner         2020-06-03 16:01:57 -0700 1126) 	if (unlikely(PageKsm(page)))
be5d0a74c62d8 (Johannes Weiner         2020-06-03 16:01:57 -0700 1127) 		lock_page_memcg(page);
be5d0a74c62d8 (Johannes Weiner         2020-06-03 16:01:57 -0700 1128) 	else
be5d0a74c62d8 (Johannes Weiner         2020-06-03 16:01:57 -0700 1129) 		VM_BUG_ON_PAGE(!PageLocked(page), page);
be5d0a74c62d8 (Johannes Weiner         2020-06-03 16:01:57 -0700 1130) 
e9b61f19858a5 (Kirill A. Shutemov      2016-01-15 16:54:10 -0800 1131) 	if (compound) {
e9b61f19858a5 (Kirill A. Shutemov      2016-01-15 16:54:10 -0800 1132) 		atomic_t *mapcount;
53f9263baba69 (Kirill A. Shutemov      2016-01-15 16:53:42 -0800 1133) 		VM_BUG_ON_PAGE(!PageLocked(page), page);
e9b61f19858a5 (Kirill A. Shutemov      2016-01-15 16:54:10 -0800 1134) 		VM_BUG_ON_PAGE(!PageTransHuge(page), page);
e9b61f19858a5 (Kirill A. Shutemov      2016-01-15 16:54:10 -0800 1135) 		mapcount = compound_mapcount_ptr(page);
e9b61f19858a5 (Kirill A. Shutemov      2016-01-15 16:54:10 -0800 1136) 		first = atomic_inc_and_test(mapcount);
53f9263baba69 (Kirill A. Shutemov      2016-01-15 16:53:42 -0800 1137) 	} else {
53f9263baba69 (Kirill A. Shutemov      2016-01-15 16:53:42 -0800 1138) 		first = atomic_inc_and_test(&page->_mapcount);
53f9263baba69 (Kirill A. Shutemov      2016-01-15 16:53:42 -0800 1139) 	}
53f9263baba69 (Kirill A. Shutemov      2016-01-15 16:53:42 -0800 1140) 
79134171df238 (Andrea Arcangeli        2011-01-13 15:46:58 -0800 1141) 	if (first) {
6c357848b44b4 (Matthew Wilcox (Oracle) 2020-08-14 17:30:37 -0700 1142) 		int nr = compound ? thp_nr_pages(page) : 1;
bea04b073292b (Jianyu Zhan             2014-06-04 16:09:51 -0700 1143) 		/*
bea04b073292b (Jianyu Zhan             2014-06-04 16:09:51 -0700 1144) 		 * We use the irq-unsafe __{inc|mod}_zone_page_stat because
bea04b073292b (Jianyu Zhan             2014-06-04 16:09:51 -0700 1145) 		 * these counters are not modified in interrupt context, and
bea04b073292b (Jianyu Zhan             2014-06-04 16:09:51 -0700 1146) 		 * pte lock(a spinlock) is held, which implies preemption
bea04b073292b (Jianyu Zhan             2014-06-04 16:09:51 -0700 1147) 		 * disabled.
bea04b073292b (Jianyu Zhan             2014-06-04 16:09:51 -0700 1148) 		 */
65c453778aea3 (Kirill A. Shutemov      2016-07-26 15:26:10 -0700 1149) 		if (compound)
69473e5de8738 (Muchun Song             2021-02-24 12:03:23 -0800 1150) 			__mod_lruvec_page_state(page, NR_ANON_THPS, nr);
be5d0a74c62d8 (Johannes Weiner         2020-06-03 16:01:57 -0700 1151) 		__mod_lruvec_page_state(page, NR_ANON_MAPPED, nr);
79134171df238 (Andrea Arcangeli        2011-01-13 15:46:58 -0800 1152) 	}
5ad6468801d28 (Hugh Dickins            2009-12-14 17:59:24 -0800 1153) 
be5d0a74c62d8 (Johannes Weiner         2020-06-03 16:01:57 -0700 1154) 	if (unlikely(PageKsm(page))) {
be5d0a74c62d8 (Johannes Weiner         2020-06-03 16:01:57 -0700 1155) 		unlock_page_memcg(page);
be5d0a74c62d8 (Johannes Weiner         2020-06-03 16:01:57 -0700 1156) 		return;
be5d0a74c62d8 (Johannes Weiner         2020-06-03 16:01:57 -0700 1157) 	}
53f9263baba69 (Kirill A. Shutemov      2016-01-15 16:53:42 -0800 1158) 
5dbe0af47f8a8 (Hugh Dickins            2011-05-28 13:17:04 -0700 1159) 	/* address might be in next vma when migration races vma_adjust */
5ad6468801d28 (Hugh Dickins            2009-12-14 17:59:24 -0800 1160) 	if (first)
d281ee6145183 (Kirill A. Shutemov      2016-01-15 16:52:16 -0800 1161) 		__page_set_anon_rmap(page, vma, address,
d281ee6145183 (Kirill A. Shutemov      2016-01-15 16:52:16 -0800 1162) 				flags & RMAP_EXCLUSIVE);
69029cd550284 (KAMEZAWA Hiroyuki       2008-07-25 01:47:14 -0700 1163) 	else
c97a9e10eaee3 (Nicholas Piggin         2007-05-16 22:11:21 -0700 1164) 		__page_check_anon_rmap(page, vma, address);
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700 1165) }
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700 1166) 
43d8eac44f28d (Randy Dunlap            2008-03-19 17:00:43 -0700 1167) /**
9617d95e6e9ff (Nicholas Piggin         2006-01-06 00:11:12 -0800 1168)  * page_add_new_anon_rmap - add pte mapping to a new anonymous page
9617d95e6e9ff (Nicholas Piggin         2006-01-06 00:11:12 -0800 1169)  * @page:	the page to add the mapping to
9617d95e6e9ff (Nicholas Piggin         2006-01-06 00:11:12 -0800 1170)  * @vma:	the vm area in which the mapping is added
9617d95e6e9ff (Nicholas Piggin         2006-01-06 00:11:12 -0800 1171)  * @address:	the user virtual address mapped
d281ee6145183 (Kirill A. Shutemov      2016-01-15 16:52:16 -0800 1172)  * @compound:	charge the page as compound or small page
9617d95e6e9ff (Nicholas Piggin         2006-01-06 00:11:12 -0800 1173)  *
9617d95e6e9ff (Nicholas Piggin         2006-01-06 00:11:12 -0800 1174)  * Same as page_add_anon_rmap but must only be called on *new* pages.
9617d95e6e9ff (Nicholas Piggin         2006-01-06 00:11:12 -0800 1175)  * This means the inc-and-test can be bypassed.
c97a9e10eaee3 (Nicholas Piggin         2007-05-16 22:11:21 -0700 1176)  * Page does not have to be locked.
9617d95e6e9ff (Nicholas Piggin         2006-01-06 00:11:12 -0800 1177)  */
9617d95e6e9ff (Nicholas Piggin         2006-01-06 00:11:12 -0800 1178) void page_add_new_anon_rmap(struct page *page,
d281ee6145183 (Kirill A. Shutemov      2016-01-15 16:52:16 -0800 1179) 	struct vm_area_struct *vma, unsigned long address, bool compound)
9617d95e6e9ff (Nicholas Piggin         2006-01-06 00:11:12 -0800 1180) {
6c357848b44b4 (Matthew Wilcox (Oracle) 2020-08-14 17:30:37 -0700 1181) 	int nr = compound ? thp_nr_pages(page) : 1;
d281ee6145183 (Kirill A. Shutemov      2016-01-15 16:52:16 -0800 1182) 
81d1b09c6be66 (Sasha Levin             2014-10-09 15:28:10 -0700 1183) 	VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma);
fa9949da59a15 (Hugh Dickins            2016-05-19 17:12:41 -0700 1184) 	__SetPageSwapBacked(page);
d281ee6145183 (Kirill A. Shutemov      2016-01-15 16:52:16 -0800 1185) 	if (compound) {
d281ee6145183 (Kirill A. Shutemov      2016-01-15 16:52:16 -0800 1186) 		VM_BUG_ON_PAGE(!PageTransHuge(page), page);
53f9263baba69 (Kirill A. Shutemov      2016-01-15 16:53:42 -0800 1187) 		/* increment count (starts at -1) */
53f9263baba69 (Kirill A. Shutemov      2016-01-15 16:53:42 -0800 1188) 		atomic_set(compound_mapcount_ptr(page), 0);
47e29d32afba1 (John Hubbard            2020-04-01 21:05:33 -0700 1189) 		if (hpage_pincount_available(page))
47e29d32afba1 (John Hubbard            2020-04-01 21:05:33 -0700 1190) 			atomic_set(compound_pincount_ptr(page), 0);
47e29d32afba1 (John Hubbard            2020-04-01 21:05:33 -0700 1191) 
69473e5de8738 (Muchun Song             2021-02-24 12:03:23 -0800 1192) 		__mod_lruvec_page_state(page, NR_ANON_THPS, nr);
53f9263baba69 (Kirill A. Shutemov      2016-01-15 16:53:42 -0800 1193) 	} else {
53f9263baba69 (Kirill A. Shutemov      2016-01-15 16:53:42 -0800 1194) 		/* Anon THP always mapped first with PMD */
53f9263baba69 (Kirill A. Shutemov      2016-01-15 16:53:42 -0800 1195) 		VM_BUG_ON_PAGE(PageTransCompound(page), page);
53f9263baba69 (Kirill A. Shutemov      2016-01-15 16:53:42 -0800 1196) 		/* increment count (starts at -1) */
53f9263baba69 (Kirill A. Shutemov      2016-01-15 16:53:42 -0800 1197) 		atomic_set(&page->_mapcount, 0);
d281ee6145183 (Kirill A. Shutemov      2016-01-15 16:52:16 -0800 1198) 	}
be5d0a74c62d8 (Johannes Weiner         2020-06-03 16:01:57 -0700 1199) 	__mod_lruvec_page_state(page, NR_ANON_MAPPED, nr);
e8a03feb54ca7 (Rik van Riel            2010-04-14 17:59:28 -0400 1200) 	__page_set_anon_rmap(page, vma, address, 1);
9617d95e6e9ff (Nicholas Piggin         2006-01-06 00:11:12 -0800 1201) }
9617d95e6e9ff (Nicholas Piggin         2006-01-06 00:11:12 -0800 1202) 
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700 1203) /**
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700 1204)  * page_add_file_rmap - add pte mapping to a file page
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700 1205)  * @page: the page to add the mapping to
e8b098fc5747a (Mike Rapoport           2018-04-05 16:24:57 -0700 1206)  * @compound: charge the page as compound or small page
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700 1207)  *
b8072f099b782 (Hugh Dickins            2005-10-29 18:16:41 -0700 1208)  * The caller needs to hold the pte lock.
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700 1209)  */
dd78fedde4b99 (Kirill A. Shutemov      2016-07-26 15:25:26 -0700 1210) void page_add_file_rmap(struct page *page, bool compound)
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700 1211) {
dd78fedde4b99 (Kirill A. Shutemov      2016-07-26 15:25:26 -0700 1212) 	int i, nr = 1;
dd78fedde4b99 (Kirill A. Shutemov      2016-07-26 15:25:26 -0700 1213) 
dd78fedde4b99 (Kirill A. Shutemov      2016-07-26 15:25:26 -0700 1214) 	VM_BUG_ON_PAGE(compound && !PageTransHuge(page), page);
62cccb8c8e7a3 (Johannes Weiner         2016-03-15 14:57:22 -0700 1215) 	lock_page_memcg(page);
dd78fedde4b99 (Kirill A. Shutemov      2016-07-26 15:25:26 -0700 1216) 	if (compound && PageTransHuge(page)) {
a1528e21f8915 (Muchun Song             2021-02-24 12:03:35 -0800 1217) 		int nr_pages = thp_nr_pages(page);
a1528e21f8915 (Muchun Song             2021-02-24 12:03:35 -0800 1218) 
a1528e21f8915 (Muchun Song             2021-02-24 12:03:35 -0800 1219) 		for (i = 0, nr = 0; i < nr_pages; i++) {
dd78fedde4b99 (Kirill A. Shutemov      2016-07-26 15:25:26 -0700 1220) 			if (atomic_inc_and_test(&page[i]._mapcount))
dd78fedde4b99 (Kirill A. Shutemov      2016-07-26 15:25:26 -0700 1221) 				nr++;
dd78fedde4b99 (Kirill A. Shutemov      2016-07-26 15:25:26 -0700 1222) 		}
dd78fedde4b99 (Kirill A. Shutemov      2016-07-26 15:25:26 -0700 1223) 		if (!atomic_inc_and_test(compound_mapcount_ptr(page)))
dd78fedde4b99 (Kirill A. Shutemov      2016-07-26 15:25:26 -0700 1224) 			goto out;
99cb0dbd47a15 (Song Liu                2019-09-23 15:38:00 -0700 1225) 		if (PageSwapBacked(page))
a1528e21f8915 (Muchun Song             2021-02-24 12:03:35 -0800 1226) 			__mod_lruvec_page_state(page, NR_SHMEM_PMDMAPPED,
a1528e21f8915 (Muchun Song             2021-02-24 12:03:35 -0800 1227) 						nr_pages);
99cb0dbd47a15 (Song Liu                2019-09-23 15:38:00 -0700 1228) 		else
380780e71895a (Muchun Song             2021-02-24 12:03:39 -0800 1229) 			__mod_lruvec_page_state(page, NR_FILE_PMDMAPPED,
380780e71895a (Muchun Song             2021-02-24 12:03:39 -0800 1230) 						nr_pages);
dd78fedde4b99 (Kirill A. Shutemov      2016-07-26 15:25:26 -0700 1231) 	} else {
c8efc390c1e0e (Kirill A. Shutemov      2016-08-10 16:27:52 -0700 1232) 		if (PageTransCompound(page) && page_mapping(page)) {
c8efc390c1e0e (Kirill A. Shutemov      2016-08-10 16:27:52 -0700 1233) 			VM_WARN_ON_ONCE(!PageLocked(page));
c8efc390c1e0e (Kirill A. Shutemov      2016-08-10 16:27:52 -0700 1234) 
9a73f61bdb8ac (Kirill A. Shutemov      2016-07-26 15:25:53 -0700 1235) 			SetPageDoubleMap(compound_head(page));
9a73f61bdb8ac (Kirill A. Shutemov      2016-07-26 15:25:53 -0700 1236) 			if (PageMlocked(page))
9a73f61bdb8ac (Kirill A. Shutemov      2016-07-26 15:25:53 -0700 1237) 				clear_page_mlock(compound_head(page));
9a73f61bdb8ac (Kirill A. Shutemov      2016-07-26 15:25:53 -0700 1238) 		}
dd78fedde4b99 (Kirill A. Shutemov      2016-07-26 15:25:26 -0700 1239) 		if (!atomic_inc_and_test(&page->_mapcount))
dd78fedde4b99 (Kirill A. Shutemov      2016-07-26 15:25:26 -0700 1240) 			goto out;
d69b042f3d740 (Balbir Singh            2009-06-17 16:26:34 -0700 1241) 	}
00f3ca2c2d663 (Johannes Weiner         2017-07-06 15:40:52 -0700 1242) 	__mod_lruvec_page_state(page, NR_FILE_MAPPED, nr);
dd78fedde4b99 (Kirill A. Shutemov      2016-07-26 15:25:26 -0700 1243) out:
62cccb8c8e7a3 (Johannes Weiner         2016-03-15 14:57:22 -0700 1244) 	unlock_page_memcg(page);
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700 1245) }
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700 1246) 
dd78fedde4b99 (Kirill A. Shutemov      2016-07-26 15:25:26 -0700 1247) static void page_remove_file_rmap(struct page *page, bool compound)
8186eb6a799e4 (Johannes Weiner         2014-10-29 14:50:51 -0700 1248) {
dd78fedde4b99 (Kirill A. Shutemov      2016-07-26 15:25:26 -0700 1249) 	int i, nr = 1;
dd78fedde4b99 (Kirill A. Shutemov      2016-07-26 15:25:26 -0700 1250) 
57dea93ac42d3 (Steve Capper            2016-08-10 16:27:55 -0700 1251) 	VM_BUG_ON_PAGE(compound && !PageHead(page), page);
8186eb6a799e4 (Johannes Weiner         2014-10-29 14:50:51 -0700 1252) 
53f9263baba69 (Kirill A. Shutemov      2016-01-15 16:53:42 -0800 1253) 	/* Hugepages are not counted in NR_FILE_MAPPED for now. */
53f9263baba69 (Kirill A. Shutemov      2016-01-15 16:53:42 -0800 1254) 	if (unlikely(PageHuge(page))) {
53f9263baba69 (Kirill A. Shutemov      2016-01-15 16:53:42 -0800 1255) 		/* hugetlb pages are always mapped with pmds */
53f9263baba69 (Kirill A. Shutemov      2016-01-15 16:53:42 -0800 1256) 		atomic_dec(compound_mapcount_ptr(page));
be5d0a74c62d8 (Johannes Weiner         2020-06-03 16:01:57 -0700 1257) 		return;
53f9263baba69 (Kirill A. Shutemov      2016-01-15 16:53:42 -0800 1258) 	}
8186eb6a799e4 (Johannes Weiner         2014-10-29 14:50:51 -0700 1259) 
53f9263baba69 (Kirill A. Shutemov      2016-01-15 16:53:42 -0800 1260) 	/* page still mapped by someone else? */
dd78fedde4b99 (Kirill A. Shutemov      2016-07-26 15:25:26 -0700 1261) 	if (compound && PageTransHuge(page)) {
a1528e21f8915 (Muchun Song             2021-02-24 12:03:35 -0800 1262) 		int nr_pages = thp_nr_pages(page);
a1528e21f8915 (Muchun Song             2021-02-24 12:03:35 -0800 1263) 
a1528e21f8915 (Muchun Song             2021-02-24 12:03:35 -0800 1264) 		for (i = 0, nr = 0; i < nr_pages; i++) {
dd78fedde4b99 (Kirill A. Shutemov      2016-07-26 15:25:26 -0700 1265) 			if (atomic_add_negative(-1, &page[i]._mapcount))
dd78fedde4b99 (Kirill A. Shutemov      2016-07-26 15:25:26 -0700 1266) 				nr++;
dd78fedde4b99 (Kirill A. Shutemov      2016-07-26 15:25:26 -0700 1267) 		}
dd78fedde4b99 (Kirill A. Shutemov      2016-07-26 15:25:26 -0700 1268) 		if (!atomic_add_negative(-1, compound_mapcount_ptr(page)))
be5d0a74c62d8 (Johannes Weiner         2020-06-03 16:01:57 -0700 1269) 			return;
99cb0dbd47a15 (Song Liu                2019-09-23 15:38:00 -0700 1270) 		if (PageSwapBacked(page))
a1528e21f8915 (Muchun Song             2021-02-24 12:03:35 -0800 1271) 			__mod_lruvec_page_state(page, NR_SHMEM_PMDMAPPED,
a1528e21f8915 (Muchun Song             2021-02-24 12:03:35 -0800 1272) 						-nr_pages);
99cb0dbd47a15 (Song Liu                2019-09-23 15:38:00 -0700 1273) 		else
380780e71895a (Muchun Song             2021-02-24 12:03:39 -0800 1274) 			__mod_lruvec_page_state(page, NR_FILE_PMDMAPPED,
380780e71895a (Muchun Song             2021-02-24 12:03:39 -0800 1275) 						-nr_pages);
dd78fedde4b99 (Kirill A. Shutemov      2016-07-26 15:25:26 -0700 1276) 	} else {
dd78fedde4b99 (Kirill A. Shutemov      2016-07-26 15:25:26 -0700 1277) 		if (!atomic_add_negative(-1, &page->_mapcount))
be5d0a74c62d8 (Johannes Weiner         2020-06-03 16:01:57 -0700 1278) 			return;
dd78fedde4b99 (Kirill A. Shutemov      2016-07-26 15:25:26 -0700 1279) 	}
8186eb6a799e4 (Johannes Weiner         2014-10-29 14:50:51 -0700 1280) 
8186eb6a799e4 (Johannes Weiner         2014-10-29 14:50:51 -0700 1281) 	/*
00f3ca2c2d663 (Johannes Weiner         2017-07-06 15:40:52 -0700 1282) 	 * We use the irq-unsafe __{inc|mod}_lruvec_page_state because
8186eb6a799e4 (Johannes Weiner         2014-10-29 14:50:51 -0700 1283) 	 * these counters are not modified in interrupt context, and
8186eb6a799e4 (Johannes Weiner         2014-10-29 14:50:51 -0700 1284) 	 * pte lock(a spinlock) is held, which implies preemption disabled.
8186eb6a799e4 (Johannes Weiner         2014-10-29 14:50:51 -0700 1285) 	 */
00f3ca2c2d663 (Johannes Weiner         2017-07-06 15:40:52 -0700 1286) 	__mod_lruvec_page_state(page, NR_FILE_MAPPED, -nr);
8186eb6a799e4 (Johannes Weiner         2014-10-29 14:50:51 -0700 1287) 
8186eb6a799e4 (Johannes Weiner         2014-10-29 14:50:51 -0700 1288) 	if (unlikely(PageMlocked(page)))
8186eb6a799e4 (Johannes Weiner         2014-10-29 14:50:51 -0700 1289) 		clear_page_mlock(page);
8186eb6a799e4 (Johannes Weiner         2014-10-29 14:50:51 -0700 1290) }
8186eb6a799e4 (Johannes Weiner         2014-10-29 14:50:51 -0700 1291) 
53f9263baba69 (Kirill A. Shutemov      2016-01-15 16:53:42 -0800 1292) static void page_remove_anon_compound_rmap(struct page *page)
53f9263baba69 (Kirill A. Shutemov      2016-01-15 16:53:42 -0800 1293) {
53f9263baba69 (Kirill A. Shutemov      2016-01-15 16:53:42 -0800 1294) 	int i, nr;
53f9263baba69 (Kirill A. Shutemov      2016-01-15 16:53:42 -0800 1295) 
53f9263baba69 (Kirill A. Shutemov      2016-01-15 16:53:42 -0800 1296) 	if (!atomic_add_negative(-1, compound_mapcount_ptr(page)))
53f9263baba69 (Kirill A. Shutemov      2016-01-15 16:53:42 -0800 1297) 		return;
53f9263baba69 (Kirill A. Shutemov      2016-01-15 16:53:42 -0800 1298) 
53f9263baba69 (Kirill A. Shutemov      2016-01-15 16:53:42 -0800 1299) 	/* Hugepages are not counted in NR_ANON_PAGES for now. */
53f9263baba69 (Kirill A. Shutemov      2016-01-15 16:53:42 -0800 1300) 	if (unlikely(PageHuge(page)))
53f9263baba69 (Kirill A. Shutemov      2016-01-15 16:53:42 -0800 1301) 		return;
53f9263baba69 (Kirill A. Shutemov      2016-01-15 16:53:42 -0800 1302) 
53f9263baba69 (Kirill A. Shutemov      2016-01-15 16:53:42 -0800 1303) 	if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
53f9263baba69 (Kirill A. Shutemov      2016-01-15 16:53:42 -0800 1304) 		return;
53f9263baba69 (Kirill A. Shutemov      2016-01-15 16:53:42 -0800 1305) 
69473e5de8738 (Muchun Song             2021-02-24 12:03:23 -0800 1306) 	__mod_lruvec_page_state(page, NR_ANON_THPS, -thp_nr_pages(page));
53f9263baba69 (Kirill A. Shutemov      2016-01-15 16:53:42 -0800 1307) 
53f9263baba69 (Kirill A. Shutemov      2016-01-15 16:53:42 -0800 1308) 	if (TestClearPageDoubleMap(page)) {
53f9263baba69 (Kirill A. Shutemov      2016-01-15 16:53:42 -0800 1309) 		/*
53f9263baba69 (Kirill A. Shutemov      2016-01-15 16:53:42 -0800 1310) 		 * Subpages can be mapped with PTEs too. Check how many of
f1fe80d4ae339 (Kirill A. Shutemov      2019-11-30 17:57:15 -0800 1311) 		 * them are still mapped.
53f9263baba69 (Kirill A. Shutemov      2016-01-15 16:53:42 -0800 1312) 		 */
5eaf35ab1275c (Matthew Wilcox (Oracle) 2020-10-15 20:05:46 -0700 1313) 		for (i = 0, nr = 0; i < thp_nr_pages(page); i++) {
53f9263baba69 (Kirill A. Shutemov      2016-01-15 16:53:42 -0800 1314) 			if (atomic_add_negative(-1, &page[i]._mapcount))
53f9263baba69 (Kirill A. Shutemov      2016-01-15 16:53:42 -0800 1315) 				nr++;
53f9263baba69 (Kirill A. Shutemov      2016-01-15 16:53:42 -0800 1316) 		}
f1fe80d4ae339 (Kirill A. Shutemov      2019-11-30 17:57:15 -0800 1317) 
f1fe80d4ae339 (Kirill A. Shutemov      2019-11-30 17:57:15 -0800 1318) 		/*
f1fe80d4ae339 (Kirill A. Shutemov      2019-11-30 17:57:15 -0800 1319) 		 * Queue the page for deferred split if at least one small
f1fe80d4ae339 (Kirill A. Shutemov      2019-11-30 17:57:15 -0800 1320) 		 * page of the compound page is unmapped, but at least one
f1fe80d4ae339 (Kirill A. Shutemov      2019-11-30 17:57:15 -0800 1321) 		 * small page is still mapped.
f1fe80d4ae339 (Kirill A. Shutemov      2019-11-30 17:57:15 -0800 1322) 		 */
5eaf35ab1275c (Matthew Wilcox (Oracle) 2020-10-15 20:05:46 -0700 1323) 		if (nr && nr < thp_nr_pages(page))
f1fe80d4ae339 (Kirill A. Shutemov      2019-11-30 17:57:15 -0800 1324) 			deferred_split_huge_page(page);
53f9263baba69 (Kirill A. Shutemov      2016-01-15 16:53:42 -0800 1325) 	} else {
5eaf35ab1275c (Matthew Wilcox (Oracle) 2020-10-15 20:05:46 -0700 1326) 		nr = thp_nr_pages(page);
53f9263baba69 (Kirill A. Shutemov      2016-01-15 16:53:42 -0800 1327) 	}
53f9263baba69 (Kirill A. Shutemov      2016-01-15 16:53:42 -0800 1328) 
e90309c9f7722 (Kirill A. Shutemov      2016-01-15 16:54:33 -0800 1329) 	if (unlikely(PageMlocked(page)))
e90309c9f7722 (Kirill A. Shutemov      2016-01-15 16:54:33 -0800 1330) 		clear_page_mlock(page);
e90309c9f7722 (Kirill A. Shutemov      2016-01-15 16:54:33 -0800 1331) 
f1fe80d4ae339 (Kirill A. Shutemov      2019-11-30 17:57:15 -0800 1332) 	if (nr)
be5d0a74c62d8 (Johannes Weiner         2020-06-03 16:01:57 -0700 1333) 		__mod_lruvec_page_state(page, NR_ANON_MAPPED, -nr);
53f9263baba69 (Kirill A. Shutemov      2016-01-15 16:53:42 -0800 1334) }
53f9263baba69 (Kirill A. Shutemov      2016-01-15 16:53:42 -0800 1335) 
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700 1336) /**
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700 1337)  * page_remove_rmap - take down pte mapping from a page
d281ee6145183 (Kirill A. Shutemov      2016-01-15 16:52:16 -0800 1338)  * @page:	page to remove mapping from
d281ee6145183 (Kirill A. Shutemov      2016-01-15 16:52:16 -0800 1339)  * @compound:	uncharge the page as compound or small page
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700 1340)  *
b8072f099b782 (Hugh Dickins            2005-10-29 18:16:41 -0700 1341)  * The caller needs to hold the pte lock.
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700 1342)  */
d281ee6145183 (Kirill A. Shutemov      2016-01-15 16:52:16 -0800 1343) void page_remove_rmap(struct page *page, bool compound)
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700 1344) {
be5d0a74c62d8 (Johannes Weiner         2020-06-03 16:01:57 -0700 1345) 	lock_page_memcg(page);
89c06bd52fb9f (KAMEZAWA Hiroyuki       2012-03-21 16:34:25 -0700 1346) 
be5d0a74c62d8 (Johannes Weiner         2020-06-03 16:01:57 -0700 1347) 	if (!PageAnon(page)) {
be5d0a74c62d8 (Johannes Weiner         2020-06-03 16:01:57 -0700 1348) 		page_remove_file_rmap(page, compound);
be5d0a74c62d8 (Johannes Weiner         2020-06-03 16:01:57 -0700 1349) 		goto out;
be5d0a74c62d8 (Johannes Weiner         2020-06-03 16:01:57 -0700 1350) 	}
be5d0a74c62d8 (Johannes Weiner         2020-06-03 16:01:57 -0700 1351) 
be5d0a74c62d8 (Johannes Weiner         2020-06-03 16:01:57 -0700 1352) 	if (compound) {
be5d0a74c62d8 (Johannes Weiner         2020-06-03 16:01:57 -0700 1353) 		page_remove_anon_compound_rmap(page);
be5d0a74c62d8 (Johannes Weiner         2020-06-03 16:01:57 -0700 1354) 		goto out;
be5d0a74c62d8 (Johannes Weiner         2020-06-03 16:01:57 -0700 1355) 	}
53f9263baba69 (Kirill A. Shutemov      2016-01-15 16:53:42 -0800 1356) 
b904dcfed6967 (KOSAKI Motohiro         2009-09-21 17:01:28 -0700 1357) 	/* page still mapped by someone else? */
b904dcfed6967 (KOSAKI Motohiro         2009-09-21 17:01:28 -0700 1358) 	if (!atomic_add_negative(-1, &page->_mapcount))
be5d0a74c62d8 (Johannes Weiner         2020-06-03 16:01:57 -0700 1359) 		goto out;
8186eb6a799e4 (Johannes Weiner         2014-10-29 14:50:51 -0700 1360) 
0fe6e20b9c4c5 (Naoya Horiguchi         2010-05-28 09:29:16 +0900 1361) 	/*
bea04b073292b (Jianyu Zhan             2014-06-04 16:09:51 -0700 1362) 	 * We use the irq-unsafe __{inc|mod}_zone_page_stat because
bea04b073292b (Jianyu Zhan             2014-06-04 16:09:51 -0700 1363) 	 * these counters are not modified in interrupt context, and
bea04b073292b (Jianyu Zhan             2014-06-04 16:09:51 -0700 1364) 	 * pte lock(a spinlock) is held, which implies preemption disabled.
0fe6e20b9c4c5 (Naoya Horiguchi         2010-05-28 09:29:16 +0900 1365) 	 */
be5d0a74c62d8 (Johannes Weiner         2020-06-03 16:01:57 -0700 1366) 	__dec_lruvec_page_state(page, NR_ANON_MAPPED);
8186eb6a799e4 (Johannes Weiner         2014-10-29 14:50:51 -0700 1367) 
e6c509f854550 (Hugh Dickins            2012-10-08 16:33:19 -0700 1368) 	if (unlikely(PageMlocked(page)))
e6c509f854550 (Hugh Dickins            2012-10-08 16:33:19 -0700 1369) 		clear_page_mlock(page);
8186eb6a799e4 (Johannes Weiner         2014-10-29 14:50:51 -0700 1370) 
9a982250f773c (Kirill A. Shutemov      2016-01-15 16:54:17 -0800 1371) 	if (PageTransCompound(page))
9a982250f773c (Kirill A. Shutemov      2016-01-15 16:54:17 -0800 1372) 		deferred_split_huge_page(compound_head(page));
9a982250f773c (Kirill A. Shutemov      2016-01-15 16:54:17 -0800 1373) 
b904dcfed6967 (KOSAKI Motohiro         2009-09-21 17:01:28 -0700 1374) 	/*
b904dcfed6967 (KOSAKI Motohiro         2009-09-21 17:01:28 -0700 1375) 	 * It would be tidy to reset the PageAnon mapping here,
b904dcfed6967 (KOSAKI Motohiro         2009-09-21 17:01:28 -0700 1376) 	 * but that might overwrite a racing page_add_anon_rmap
b904dcfed6967 (KOSAKI Motohiro         2009-09-21 17:01:28 -0700 1377) 	 * which increments mapcount after us but sets mapping
2d4894b5d2ae0 (Mel Gorman              2017-11-15 17:37:59 -0800 1378) 	 * before us: so leave the reset to free_unref_page,
b904dcfed6967 (KOSAKI Motohiro         2009-09-21 17:01:28 -0700 1379) 	 * and remember that it's only reliable while mapped.
b904dcfed6967 (KOSAKI Motohiro         2009-09-21 17:01:28 -0700 1380) 	 * Leaving it set also helps swapoff to reinstate ptes
b904dcfed6967 (KOSAKI Motohiro         2009-09-21 17:01:28 -0700 1381) 	 * faster for those pages still in swapcache.
b904dcfed6967 (KOSAKI Motohiro         2009-09-21 17:01:28 -0700 1382) 	 */
be5d0a74c62d8 (Johannes Weiner         2020-06-03 16:01:57 -0700 1383) out:
be5d0a74c62d8 (Johannes Weiner         2020-06-03 16:01:57 -0700 1384) 	unlock_page_memcg(page);
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700 1385) }
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700 1386) 
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700 1387) /*
52629506420ce (Joonsoo Kim             2014-01-21 15:49:50 -0800 1388)  * @arg: enum ttu_flags will be passed to this argument
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700 1389)  */
e4b82222712ed (Minchan Kim             2017-05-03 14:54:27 -0700 1390) static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
52629506420ce (Joonsoo Kim             2014-01-21 15:49:50 -0800 1391) 		     unsigned long address, void *arg)
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700 1392) {
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700 1393) 	struct mm_struct *mm = vma->vm_mm;
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1394) 	struct page_vma_mapped_walk pvmw = {
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1395) 		.page = page,
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1396) 		.vma = vma,
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1397) 		.address = address,
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1398) 	};
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700 1399) 	pte_t pteval;
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1400) 	struct page *subpage;
785373b4c3871 (Linus Torvalds          2017-08-29 09:11:06 -0700 1401) 	bool ret = true;
ac46d4f3c4324 (Jérôme Glisse           2018-12-28 00:38:09 -0800 1402) 	struct mmu_notifier_range range;
4708f31885a0d (Palmer Dabbelt          2020-04-06 20:08:00 -0700 1403) 	enum ttu_flags flags = (enum ttu_flags)(long)arg;
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700 1404) 
732ed55823fc3 (Hugh Dickins            2021-06-15 18:23:53 -0700 1405) 	/*
732ed55823fc3 (Hugh Dickins            2021-06-15 18:23:53 -0700 1406) 	 * When racing against e.g. zap_pte_range() on another cpu,
732ed55823fc3 (Hugh Dickins            2021-06-15 18:23:53 -0700 1407) 	 * in between its ptep_get_and_clear_full() and page_remove_rmap(),
732ed55823fc3 (Hugh Dickins            2021-06-15 18:23:53 -0700 1408) 	 * try_to_unmap() may return false when it is about to become true,
732ed55823fc3 (Hugh Dickins            2021-06-15 18:23:53 -0700 1409) 	 * if page table locking is skipped: use TTU_SYNC to wait for that.
732ed55823fc3 (Hugh Dickins            2021-06-15 18:23:53 -0700 1410) 	 */
732ed55823fc3 (Hugh Dickins            2021-06-15 18:23:53 -0700 1411) 	if (flags & TTU_SYNC)
732ed55823fc3 (Hugh Dickins            2021-06-15 18:23:53 -0700 1412) 		pvmw.flags = PVMW_SYNC;
732ed55823fc3 (Hugh Dickins            2021-06-15 18:23:53 -0700 1413) 
b87537d9e2feb (Hugh Dickins            2015-11-05 18:49:33 -0800 1414) 	/* munlock has nothing to gain from examining un-locked vmas */
b87537d9e2feb (Hugh Dickins            2015-11-05 18:49:33 -0800 1415) 	if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED))
e4b82222712ed (Minchan Kim             2017-05-03 14:54:27 -0700 1416) 		return true;
b87537d9e2feb (Hugh Dickins            2015-11-05 18:49:33 -0800 1417) 
a5430dda8a3a1 (Jérôme Glisse           2017-09-08 16:12:17 -0700 1418) 	if (IS_ENABLED(CONFIG_MIGRATION) && (flags & TTU_MIGRATION) &&
a5430dda8a3a1 (Jérôme Glisse           2017-09-08 16:12:17 -0700 1419) 	    is_zone_device_page(page) && !is_device_private_page(page))
a5430dda8a3a1 (Jérôme Glisse           2017-09-08 16:12:17 -0700 1420) 		return true;
a5430dda8a3a1 (Jérôme Glisse           2017-09-08 16:12:17 -0700 1421) 
fec89c109f3a7 (Kirill A. Shutemov      2016-03-17 14:20:10 -0700 1422) 	if (flags & TTU_SPLIT_HUGE_PMD) {
fec89c109f3a7 (Kirill A. Shutemov      2016-03-17 14:20:10 -0700 1423) 		split_huge_pmd_address(vma, address,
b5ff8161e37ce (Naoya Horiguchi         2017-09-08 16:10:49 -0700 1424) 				flags & TTU_SPLIT_FREEZE, page);
fec89c109f3a7 (Kirill A. Shutemov      2016-03-17 14:20:10 -0700 1425) 	}
fec89c109f3a7 (Kirill A. Shutemov      2016-03-17 14:20:10 -0700 1426) 
369ea8242c0fb (Jérôme Glisse           2017-08-31 17:17:27 -0400 1427) 	/*
017b1660df89f (Mike Kravetz            2018-10-05 15:51:29 -0700 1428) 	 * For THP, we have to assume the worse case ie pmd for invalidation.
017b1660df89f (Mike Kravetz            2018-10-05 15:51:29 -0700 1429) 	 * For hugetlb, it could be much worse if we need to do pud
017b1660df89f (Mike Kravetz            2018-10-05 15:51:29 -0700 1430) 	 * invalidation in the case of pmd sharing.
017b1660df89f (Mike Kravetz            2018-10-05 15:51:29 -0700 1431) 	 *
017b1660df89f (Mike Kravetz            2018-10-05 15:51:29 -0700 1432) 	 * Note that the page can not be free in this function as call of
017b1660df89f (Mike Kravetz            2018-10-05 15:51:29 -0700 1433) 	 * try_to_unmap() must hold a reference on the page.
369ea8242c0fb (Jérôme Glisse           2017-08-31 17:17:27 -0400 1434) 	 */
494334e43c16d (Hugh Dickins            2021-06-15 18:23:56 -0700 1435) 	range.end = PageKsm(page) ?
494334e43c16d (Hugh Dickins            2021-06-15 18:23:56 -0700 1436) 			address + PAGE_SIZE : vma_address_end(page, vma);
7269f999934b2 (Jérôme Glisse           2019-05-13 17:20:53 -0700 1437) 	mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
494334e43c16d (Hugh Dickins            2021-06-15 18:23:56 -0700 1438) 				address, range.end);
017b1660df89f (Mike Kravetz            2018-10-05 15:51:29 -0700 1439) 	if (PageHuge(page)) {
017b1660df89f (Mike Kravetz            2018-10-05 15:51:29 -0700 1440) 		/*
017b1660df89f (Mike Kravetz            2018-10-05 15:51:29 -0700 1441) 		 * If sharing is possible, start and end will be adjusted
017b1660df89f (Mike Kravetz            2018-10-05 15:51:29 -0700 1442) 		 * accordingly.
017b1660df89f (Mike Kravetz            2018-10-05 15:51:29 -0700 1443) 		 */
ac46d4f3c4324 (Jérôme Glisse           2018-12-28 00:38:09 -0800 1444) 		adjust_range_if_pmd_sharing_possible(vma, &range.start,
ac46d4f3c4324 (Jérôme Glisse           2018-12-28 00:38:09 -0800 1445) 						     &range.end);
017b1660df89f (Mike Kravetz            2018-10-05 15:51:29 -0700 1446) 	}
ac46d4f3c4324 (Jérôme Glisse           2018-12-28 00:38:09 -0800 1447) 	mmu_notifier_invalidate_range_start(&range);
369ea8242c0fb (Jérôme Glisse           2017-08-31 17:17:27 -0400 1448) 
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1449) 	while (page_vma_mapped_walk(&pvmw)) {
616b8371539a6 (Zi Yan                  2017-09-08 16:10:57 -0700 1450) #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
616b8371539a6 (Zi Yan                  2017-09-08 16:10:57 -0700 1451) 		/* PMD-mapped THP migration entry */
616b8371539a6 (Zi Yan                  2017-09-08 16:10:57 -0700 1452) 		if (!pvmw.pte && (flags & TTU_MIGRATION)) {
616b8371539a6 (Zi Yan                  2017-09-08 16:10:57 -0700 1453) 			VM_BUG_ON_PAGE(PageHuge(page) || !PageTransCompound(page), page);
616b8371539a6 (Zi Yan                  2017-09-08 16:10:57 -0700 1454) 
616b8371539a6 (Zi Yan                  2017-09-08 16:10:57 -0700 1455) 			set_pmd_migration_entry(&pvmw, page);
616b8371539a6 (Zi Yan                  2017-09-08 16:10:57 -0700 1456) 			continue;
616b8371539a6 (Zi Yan                  2017-09-08 16:10:57 -0700 1457) 		}
616b8371539a6 (Zi Yan                  2017-09-08 16:10:57 -0700 1458) #endif
616b8371539a6 (Zi Yan                  2017-09-08 16:10:57 -0700 1459) 
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1460) 		/*
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1461) 		 * If the page is mlock()d, we cannot swap it out.
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1462) 		 * If it's recently referenced (perhaps page_referenced
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1463) 		 * skipped over this mm) then we should reactivate it.
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1464) 		 */
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1465) 		if (!(flags & TTU_IGNORE_MLOCK)) {
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1466) 			if (vma->vm_flags & VM_LOCKED) {
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1467) 				/* PTE-mapped THP are never mlocked */
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1468) 				if (!PageTransCompound(page)) {
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1469) 					/*
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1470) 					 * Holding pte lock, we do *not* need
c1e8d7c6a7a68 (Michel Lespinasse       2020-06-08 21:33:54 -0700 1471) 					 * mmap_lock here
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1472) 					 */
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1473) 					mlock_vma_page(page);
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1474) 				}
e4b82222712ed (Minchan Kim             2017-05-03 14:54:27 -0700 1475) 				ret = false;
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1476) 				page_vma_mapped_walk_done(&pvmw);
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1477) 				break;
9a73f61bdb8ac (Kirill A. Shutemov      2016-07-26 15:25:53 -0700 1478) 			}
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1479) 			if (flags & TTU_MUNLOCK)
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1480) 				continue;
b87537d9e2feb (Hugh Dickins            2015-11-05 18:49:33 -0800 1481) 		}
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1482) 
8346242a7e32c (Kirill A. Shutemov      2017-03-09 16:17:20 -0800 1483) 		/* Unexpected PMD-mapped THP? */
8346242a7e32c (Kirill A. Shutemov      2017-03-09 16:17:20 -0800 1484) 		VM_BUG_ON_PAGE(!pvmw.pte, page);
8346242a7e32c (Kirill A. Shutemov      2017-03-09 16:17:20 -0800 1485) 
8346242a7e32c (Kirill A. Shutemov      2017-03-09 16:17:20 -0800 1486) 		subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
785373b4c3871 (Linus Torvalds          2017-08-29 09:11:06 -0700 1487) 		address = pvmw.address;
785373b4c3871 (Linus Torvalds          2017-08-29 09:11:06 -0700 1488) 
336bf30eb7658 (Mike Kravetz            2020-11-13 22:52:16 -0800 1489) 		if (PageHuge(page) && !PageAnon(page)) {
c0d0381ade798 (Mike Kravetz            2020-04-01 21:11:05 -0700 1490) 			/*
c0d0381ade798 (Mike Kravetz            2020-04-01 21:11:05 -0700 1491) 			 * To call huge_pmd_unshare, i_mmap_rwsem must be
c0d0381ade798 (Mike Kravetz            2020-04-01 21:11:05 -0700 1492) 			 * held in write mode.  Caller needs to explicitly
c0d0381ade798 (Mike Kravetz            2020-04-01 21:11:05 -0700 1493) 			 * do this outside rmap routines.
c0d0381ade798 (Mike Kravetz            2020-04-01 21:11:05 -0700 1494) 			 */
c0d0381ade798 (Mike Kravetz            2020-04-01 21:11:05 -0700 1495) 			VM_BUG_ON(!(flags & TTU_RMAP_LOCKED));
34ae204f18519 (Mike Kravetz            2020-08-11 18:31:38 -0700 1496) 			if (huge_pmd_unshare(mm, vma, &address, pvmw.pte)) {
017b1660df89f (Mike Kravetz            2018-10-05 15:51:29 -0700 1497) 				/*
017b1660df89f (Mike Kravetz            2018-10-05 15:51:29 -0700 1498) 				 * huge_pmd_unshare unmapped an entire PMD
017b1660df89f (Mike Kravetz            2018-10-05 15:51:29 -0700 1499) 				 * page.  There is no way of knowing exactly
017b1660df89f (Mike Kravetz            2018-10-05 15:51:29 -0700 1500) 				 * which PMDs may be cached for this mm, so
017b1660df89f (Mike Kravetz            2018-10-05 15:51:29 -0700 1501) 				 * we must flush them all.  start/end were
017b1660df89f (Mike Kravetz            2018-10-05 15:51:29 -0700 1502) 				 * already adjusted above to cover this range.
017b1660df89f (Mike Kravetz            2018-10-05 15:51:29 -0700 1503) 				 */
ac46d4f3c4324 (Jérôme Glisse           2018-12-28 00:38:09 -0800 1504) 				flush_cache_range(vma, range.start, range.end);
ac46d4f3c4324 (Jérôme Glisse           2018-12-28 00:38:09 -0800 1505) 				flush_tlb_range(vma, range.start, range.end);
ac46d4f3c4324 (Jérôme Glisse           2018-12-28 00:38:09 -0800 1506) 				mmu_notifier_invalidate_range(mm, range.start,
ac46d4f3c4324 (Jérôme Glisse           2018-12-28 00:38:09 -0800 1507) 							      range.end);
017b1660df89f (Mike Kravetz            2018-10-05 15:51:29 -0700 1508) 
017b1660df89f (Mike Kravetz            2018-10-05 15:51:29 -0700 1509) 				/*
017b1660df89f (Mike Kravetz            2018-10-05 15:51:29 -0700 1510) 				 * The ref count of the PMD page was dropped
017b1660df89f (Mike Kravetz            2018-10-05 15:51:29 -0700 1511) 				 * which is part of the way map counting
017b1660df89f (Mike Kravetz            2018-10-05 15:51:29 -0700 1512) 				 * is done for shared PMDs.  Return 'true'
017b1660df89f (Mike Kravetz            2018-10-05 15:51:29 -0700 1513) 				 * here.  When there is no other sharing,
017b1660df89f (Mike Kravetz            2018-10-05 15:51:29 -0700 1514) 				 * huge_pmd_unshare returns false and we will
017b1660df89f (Mike Kravetz            2018-10-05 15:51:29 -0700 1515) 				 * unmap the actual page and drop map count
017b1660df89f (Mike Kravetz            2018-10-05 15:51:29 -0700 1516) 				 * to zero.
017b1660df89f (Mike Kravetz            2018-10-05 15:51:29 -0700 1517) 				 */
017b1660df89f (Mike Kravetz            2018-10-05 15:51:29 -0700 1518) 				page_vma_mapped_walk_done(&pvmw);
017b1660df89f (Mike Kravetz            2018-10-05 15:51:29 -0700 1519) 				break;
017b1660df89f (Mike Kravetz            2018-10-05 15:51:29 -0700 1520) 			}
017b1660df89f (Mike Kravetz            2018-10-05 15:51:29 -0700 1521) 		}
8346242a7e32c (Kirill A. Shutemov      2017-03-09 16:17:20 -0800 1522) 
a5430dda8a3a1 (Jérôme Glisse           2017-09-08 16:12:17 -0700 1523) 		if (IS_ENABLED(CONFIG_MIGRATION) &&
a5430dda8a3a1 (Jérôme Glisse           2017-09-08 16:12:17 -0700 1524) 		    (flags & TTU_MIGRATION) &&
a5430dda8a3a1 (Jérôme Glisse           2017-09-08 16:12:17 -0700 1525) 		    is_zone_device_page(page)) {
a5430dda8a3a1 (Jérôme Glisse           2017-09-08 16:12:17 -0700 1526) 			swp_entry_t entry;
a5430dda8a3a1 (Jérôme Glisse           2017-09-08 16:12:17 -0700 1527) 			pte_t swp_pte;
a5430dda8a3a1 (Jérôme Glisse           2017-09-08 16:12:17 -0700 1528) 
a5430dda8a3a1 (Jérôme Glisse           2017-09-08 16:12:17 -0700 1529) 			pteval = ptep_get_and_clear(mm, pvmw.address, pvmw.pte);
a5430dda8a3a1 (Jérôme Glisse           2017-09-08 16:12:17 -0700 1530) 
a5430dda8a3a1 (Jérôme Glisse           2017-09-08 16:12:17 -0700 1531) 			/*
a5430dda8a3a1 (Jérôme Glisse           2017-09-08 16:12:17 -0700 1532) 			 * Store the pfn of the page in a special migration
a5430dda8a3a1 (Jérôme Glisse           2017-09-08 16:12:17 -0700 1533) 			 * pte. do_swap_page() will wait until the migration
a5430dda8a3a1 (Jérôme Glisse           2017-09-08 16:12:17 -0700 1534) 			 * pte is removed and then restart fault handling.
a5430dda8a3a1 (Jérôme Glisse           2017-09-08 16:12:17 -0700 1535) 			 */
a5430dda8a3a1 (Jérôme Glisse           2017-09-08 16:12:17 -0700 1536) 			entry = make_migration_entry(page, 0);
a5430dda8a3a1 (Jérôme Glisse           2017-09-08 16:12:17 -0700 1537) 			swp_pte = swp_entry_to_pte(entry);
ad7df764b7e1c (Alistair Popple         2020-09-04 16:36:01 -0700 1538) 
ad7df764b7e1c (Alistair Popple         2020-09-04 16:36:01 -0700 1539) 			/*
ad7df764b7e1c (Alistair Popple         2020-09-04 16:36:01 -0700 1540) 			 * pteval maps a zone device page and is therefore
ad7df764b7e1c (Alistair Popple         2020-09-04 16:36:01 -0700 1541) 			 * a swap pte.
ad7df764b7e1c (Alistair Popple         2020-09-04 16:36:01 -0700 1542) 			 */
ad7df764b7e1c (Alistair Popple         2020-09-04 16:36:01 -0700 1543) 			if (pte_swp_soft_dirty(pteval))
a5430dda8a3a1 (Jérôme Glisse           2017-09-08 16:12:17 -0700 1544) 				swp_pte = pte_swp_mksoft_dirty(swp_pte);
ad7df764b7e1c (Alistair Popple         2020-09-04 16:36:01 -0700 1545) 			if (pte_swp_uffd_wp(pteval))
f45ec5ff16a75 (Peter Xu                2020-04-06 20:06:01 -0700 1546) 				swp_pte = pte_swp_mkuffd_wp(swp_pte);
a5430dda8a3a1 (Jérôme Glisse           2017-09-08 16:12:17 -0700 1547) 			set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte);
0f10851ea475e (Jérôme Glisse           2017-11-15 17:34:07 -0800 1548) 			/*
0f10851ea475e (Jérôme Glisse           2017-11-15 17:34:07 -0800 1549) 			 * No need to invalidate here it will synchronize on
0f10851ea475e (Jérôme Glisse           2017-11-15 17:34:07 -0800 1550) 			 * against the special swap migration pte.
1de13ee59225d (Ralph Campbell          2019-08-13 15:37:11 -0700 1551) 			 *
1de13ee59225d (Ralph Campbell          2019-08-13 15:37:11 -0700 1552) 			 * The assignment to subpage above was computed from a
1de13ee59225d (Ralph Campbell          2019-08-13 15:37:11 -0700 1553) 			 * swap PTE which results in an invalid pointer.
1de13ee59225d (Ralph Campbell          2019-08-13 15:37:11 -0700 1554) 			 * Since only PAGE_SIZE pages can currently be
1de13ee59225d (Ralph Campbell          2019-08-13 15:37:11 -0700 1555) 			 * migrated, just set it to page. This will need to be
1de13ee59225d (Ralph Campbell          2019-08-13 15:37:11 -0700 1556) 			 * changed when hugepage migrations to device private
1de13ee59225d (Ralph Campbell          2019-08-13 15:37:11 -0700 1557) 			 * memory are supported.
0f10851ea475e (Jérôme Glisse           2017-11-15 17:34:07 -0800 1558) 			 */
1de13ee59225d (Ralph Campbell          2019-08-13 15:37:11 -0700 1559) 			subpage = page;
a5430dda8a3a1 (Jérôme Glisse           2017-09-08 16:12:17 -0700 1560) 			goto discard;
a5430dda8a3a1 (Jérôme Glisse           2017-09-08 16:12:17 -0700 1561) 		}
a5430dda8a3a1 (Jérôme Glisse           2017-09-08 16:12:17 -0700 1562) 
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1563) 		/* Nuke the page table entry. */
785373b4c3871 (Linus Torvalds          2017-08-29 09:11:06 -0700 1564) 		flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1565) 		if (should_defer_flush(mm, flags)) {
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1566) 			/*
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1567) 			 * We clear the PTE but do not flush so potentially
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1568) 			 * a remote CPU could still be writing to the page.
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1569) 			 * If the entry was previously clean then the
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1570) 			 * architecture must guarantee that a clear->dirty
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1571) 			 * transition on a cached TLB entry is written through
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1572) 			 * and traps if the PTE is unmapped.
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1573) 			 */
785373b4c3871 (Linus Torvalds          2017-08-29 09:11:06 -0700 1574) 			pteval = ptep_get_and_clear(mm, address, pvmw.pte);
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1575) 
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1576) 			set_tlb_ubc_flush_pending(mm, pte_dirty(pteval));
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1577) 		} else {
785373b4c3871 (Linus Torvalds          2017-08-29 09:11:06 -0700 1578) 			pteval = ptep_clear_flush(vma, address, pvmw.pte);
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1579) 		}
72b252aed506b (Mel Gorman              2015-09-04 15:47:32 -0700 1580) 
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1581) 		/* Move the dirty bit to the page. Now the pte is gone. */
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1582) 		if (pte_dirty(pteval))
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1583) 			set_page_dirty(page);
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700 1584) 
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1585) 		/* Update high watermark before we lower rss */
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1586) 		update_hiwater_rss(mm);
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700 1587) 
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1588) 		if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
5fd27b8e7dbca (Punit Agrawal           2017-07-06 15:39:53 -0700 1589) 			pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1590) 			if (PageHuge(page)) {
d8c6546b1aea8 (Matthew Wilcox (Oracle) 2019-09-23 15:34:30 -0700 1591) 				hugetlb_count_sub(compound_nr(page), mm);
785373b4c3871 (Linus Torvalds          2017-08-29 09:11:06 -0700 1592) 				set_huge_swap_pte_at(mm, address,
5fd27b8e7dbca (Punit Agrawal           2017-07-06 15:39:53 -0700 1593) 						     pvmw.pte, pteval,
5fd27b8e7dbca (Punit Agrawal           2017-07-06 15:39:53 -0700 1594) 						     vma_mmu_pagesize(vma));
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1595) 			} else {
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1596) 				dec_mm_counter(mm, mm_counter(page));
785373b4c3871 (Linus Torvalds          2017-08-29 09:11:06 -0700 1597) 				set_pte_at(mm, address, pvmw.pte, pteval);
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1598) 			}
365e9c87a982c (Hugh Dickins            2005-10-29 18:16:18 -0700 1599) 
bce73e4842390 (Christian Borntraeger   2018-07-13 16:58:52 -0700 1600) 		} else if (pte_unused(pteval) && !userfaultfd_armed(vma)) {
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1601) 			/*
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1602) 			 * The guest indicated that the page content is of no
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1603) 			 * interest anymore. Simply discard the pte, vmscan
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1604) 			 * will take care of the rest.
bce73e4842390 (Christian Borntraeger   2018-07-13 16:58:52 -0700 1605) 			 * A future reference will then fault in a new zero
bce73e4842390 (Christian Borntraeger   2018-07-13 16:58:52 -0700 1606) 			 * page. When userfaultfd is active, we must not drop
bce73e4842390 (Christian Borntraeger   2018-07-13 16:58:52 -0700 1607) 			 * this page though, as its main user (postcopy
bce73e4842390 (Christian Borntraeger   2018-07-13 16:58:52 -0700 1608) 			 * migration) will not expect userfaults on already
bce73e4842390 (Christian Borntraeger   2018-07-13 16:58:52 -0700 1609) 			 * copied pages.
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1610) 			 */
eca56ff906bdd (Jerome Marchand         2016-01-14 15:19:26 -0800 1611) 			dec_mm_counter(mm, mm_counter(page));
0f10851ea475e (Jérôme Glisse           2017-11-15 17:34:07 -0800 1612) 			/* We have to invalidate as we cleared the pte */
0f10851ea475e (Jérôme Glisse           2017-11-15 17:34:07 -0800 1613) 			mmu_notifier_invalidate_range(mm, address,
0f10851ea475e (Jérôme Glisse           2017-11-15 17:34:07 -0800 1614) 						      address + PAGE_SIZE);
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1615) 		} else if (IS_ENABLED(CONFIG_MIGRATION) &&
b5ff8161e37ce (Naoya Horiguchi         2017-09-08 16:10:49 -0700 1616) 				(flags & (TTU_MIGRATION|TTU_SPLIT_FREEZE))) {
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1617) 			swp_entry_t entry;
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1618) 			pte_t swp_pte;
ca827d55ebaa2 (Khalid Aziz             2018-02-21 10:15:44 -0700 1619) 
ca827d55ebaa2 (Khalid Aziz             2018-02-21 10:15:44 -0700 1620) 			if (arch_unmap_one(mm, vma, address, pteval) < 0) {
ca827d55ebaa2 (Khalid Aziz             2018-02-21 10:15:44 -0700 1621) 				set_pte_at(mm, address, pvmw.pte, pteval);
ca827d55ebaa2 (Khalid Aziz             2018-02-21 10:15:44 -0700 1622) 				ret = false;
ca827d55ebaa2 (Khalid Aziz             2018-02-21 10:15:44 -0700 1623) 				page_vma_mapped_walk_done(&pvmw);
ca827d55ebaa2 (Khalid Aziz             2018-02-21 10:15:44 -0700 1624) 				break;
ca827d55ebaa2 (Khalid Aziz             2018-02-21 10:15:44 -0700 1625) 			}
ca827d55ebaa2 (Khalid Aziz             2018-02-21 10:15:44 -0700 1626) 
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1627) 			/*
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1628) 			 * Store the pfn of the page in a special migration
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1629) 			 * pte. do_swap_page() will wait until the migration
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1630) 			 * pte is removed and then restart fault handling.
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1631) 			 */
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1632) 			entry = make_migration_entry(subpage,
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1633) 					pte_write(pteval));
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1634) 			swp_pte = swp_entry_to_pte(entry);
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1635) 			if (pte_soft_dirty(pteval))
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1636) 				swp_pte = pte_swp_mksoft_dirty(swp_pte);
f45ec5ff16a75 (Peter Xu                2020-04-06 20:06:01 -0700 1637) 			if (pte_uffd_wp(pteval))
f45ec5ff16a75 (Peter Xu                2020-04-06 20:06:01 -0700 1638) 				swp_pte = pte_swp_mkuffd_wp(swp_pte);
785373b4c3871 (Linus Torvalds          2017-08-29 09:11:06 -0700 1639) 			set_pte_at(mm, address, pvmw.pte, swp_pte);
0f10851ea475e (Jérôme Glisse           2017-11-15 17:34:07 -0800 1640) 			/*
0f10851ea475e (Jérôme Glisse           2017-11-15 17:34:07 -0800 1641) 			 * No need to invalidate here it will synchronize on
0f10851ea475e (Jérôme Glisse           2017-11-15 17:34:07 -0800 1642) 			 * against the special swap migration pte.
0f10851ea475e (Jérôme Glisse           2017-11-15 17:34:07 -0800 1643) 			 */
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1644) 		} else if (PageAnon(page)) {
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1645) 			swp_entry_t entry = { .val = page_private(subpage) };
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1646) 			pte_t swp_pte;
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1647) 			/*
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1648) 			 * Store the swap location in the pte.
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1649) 			 * See handle_pte_fault() ...
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1650) 			 */
eb94a8784427b (Minchan Kim             2017-05-03 14:52:36 -0700 1651) 			if (unlikely(PageSwapBacked(page) != PageSwapCache(page))) {
eb94a8784427b (Minchan Kim             2017-05-03 14:52:36 -0700 1652) 				WARN_ON_ONCE(1);
83612a948d3bd (Minchan Kim             2017-05-03 14:54:30 -0700 1653) 				ret = false;
369ea8242c0fb (Jérôme Glisse           2017-08-31 17:17:27 -0400 1654) 				/* We have to invalidate as we cleared the pte */
0f10851ea475e (Jérôme Glisse           2017-11-15 17:34:07 -0800 1655) 				mmu_notifier_invalidate_range(mm, address,
0f10851ea475e (Jérôme Glisse           2017-11-15 17:34:07 -0800 1656) 							address + PAGE_SIZE);
eb94a8784427b (Minchan Kim             2017-05-03 14:52:36 -0700 1657) 				page_vma_mapped_walk_done(&pvmw);
eb94a8784427b (Minchan Kim             2017-05-03 14:52:36 -0700 1658) 				break;
eb94a8784427b (Minchan Kim             2017-05-03 14:52:36 -0700 1659) 			}
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1660) 
802a3a92ad7ac (Shaohua Li              2017-05-03 14:52:32 -0700 1661) 			/* MADV_FREE page check */
802a3a92ad7ac (Shaohua Li              2017-05-03 14:52:32 -0700 1662) 			if (!PageSwapBacked(page)) {
802a3a92ad7ac (Shaohua Li              2017-05-03 14:52:32 -0700 1663) 				if (!PageDirty(page)) {
0f10851ea475e (Jérôme Glisse           2017-11-15 17:34:07 -0800 1664) 					/* Invalidate as we cleared the pte */
0f10851ea475e (Jérôme Glisse           2017-11-15 17:34:07 -0800 1665) 					mmu_notifier_invalidate_range(mm,
0f10851ea475e (Jérôme Glisse           2017-11-15 17:34:07 -0800 1666) 						address, address + PAGE_SIZE);
802a3a92ad7ac (Shaohua Li              2017-05-03 14:52:32 -0700 1667) 					dec_mm_counter(mm, MM_ANONPAGES);
802a3a92ad7ac (Shaohua Li              2017-05-03 14:52:32 -0700 1668) 					goto discard;
802a3a92ad7ac (Shaohua Li              2017-05-03 14:52:32 -0700 1669) 				}
802a3a92ad7ac (Shaohua Li              2017-05-03 14:52:32 -0700 1670) 
802a3a92ad7ac (Shaohua Li              2017-05-03 14:52:32 -0700 1671) 				/*
802a3a92ad7ac (Shaohua Li              2017-05-03 14:52:32 -0700 1672) 				 * If the page was redirtied, it cannot be
802a3a92ad7ac (Shaohua Li              2017-05-03 14:52:32 -0700 1673) 				 * discarded. Remap the page to page table.
802a3a92ad7ac (Shaohua Li              2017-05-03 14:52:32 -0700 1674) 				 */
785373b4c3871 (Linus Torvalds          2017-08-29 09:11:06 -0700 1675) 				set_pte_at(mm, address, pvmw.pte, pteval);
18863d3a3f593 (Minchan Kim             2017-05-03 14:54:04 -0700 1676) 				SetPageSwapBacked(page);
e4b82222712ed (Minchan Kim             2017-05-03 14:54:27 -0700 1677) 				ret = false;
802a3a92ad7ac (Shaohua Li              2017-05-03 14:52:32 -0700 1678) 				page_vma_mapped_walk_done(&pvmw);
802a3a92ad7ac (Shaohua Li              2017-05-03 14:52:32 -0700 1679) 				break;
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1680) 			}
854e9ed09dedf (Minchan Kim             2016-01-15 16:54:53 -0800 1681) 
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1682) 			if (swap_duplicate(entry) < 0) {
785373b4c3871 (Linus Torvalds          2017-08-29 09:11:06 -0700 1683) 				set_pte_at(mm, address, pvmw.pte, pteval);
e4b82222712ed (Minchan Kim             2017-05-03 14:54:27 -0700 1684) 				ret = false;
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1685) 				page_vma_mapped_walk_done(&pvmw);
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1686) 				break;
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1687) 			}
ca827d55ebaa2 (Khalid Aziz             2018-02-21 10:15:44 -0700 1688) 			if (arch_unmap_one(mm, vma, address, pteval) < 0) {
ca827d55ebaa2 (Khalid Aziz             2018-02-21 10:15:44 -0700 1689) 				set_pte_at(mm, address, pvmw.pte, pteval);
ca827d55ebaa2 (Khalid Aziz             2018-02-21 10:15:44 -0700 1690) 				ret = false;
ca827d55ebaa2 (Khalid Aziz             2018-02-21 10:15:44 -0700 1691) 				page_vma_mapped_walk_done(&pvmw);
ca827d55ebaa2 (Khalid Aziz             2018-02-21 10:15:44 -0700 1692) 				break;
ca827d55ebaa2 (Khalid Aziz             2018-02-21 10:15:44 -0700 1693) 			}
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1694) 			if (list_empty(&mm->mmlist)) {
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1695) 				spin_lock(&mmlist_lock);
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1696) 				if (list_empty(&mm->mmlist))
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1697) 					list_add(&mm->mmlist, &init_mm.mmlist);
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1698) 				spin_unlock(&mmlist_lock);
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1699) 			}
854e9ed09dedf (Minchan Kim             2016-01-15 16:54:53 -0800 1700) 			dec_mm_counter(mm, MM_ANONPAGES);
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1701) 			inc_mm_counter(mm, MM_SWAPENTS);
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1702) 			swp_pte = swp_entry_to_pte(entry);
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1703) 			if (pte_soft_dirty(pteval))
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1704) 				swp_pte = pte_swp_mksoft_dirty(swp_pte);
f45ec5ff16a75 (Peter Xu                2020-04-06 20:06:01 -0700 1705) 			if (pte_uffd_wp(pteval))
f45ec5ff16a75 (Peter Xu                2020-04-06 20:06:01 -0700 1706) 				swp_pte = pte_swp_mkuffd_wp(swp_pte);
785373b4c3871 (Linus Torvalds          2017-08-29 09:11:06 -0700 1707) 			set_pte_at(mm, address, pvmw.pte, swp_pte);
0f10851ea475e (Jérôme Glisse           2017-11-15 17:34:07 -0800 1708) 			/* Invalidate as we cleared the pte */
0f10851ea475e (Jérôme Glisse           2017-11-15 17:34:07 -0800 1709) 			mmu_notifier_invalidate_range(mm, address,
0f10851ea475e (Jérôme Glisse           2017-11-15 17:34:07 -0800 1710) 						      address + PAGE_SIZE);
0f10851ea475e (Jérôme Glisse           2017-11-15 17:34:07 -0800 1711) 		} else {
0f10851ea475e (Jérôme Glisse           2017-11-15 17:34:07 -0800 1712) 			/*
906f9cdfc2a08 (Hugh Dickins            2018-11-30 14:10:13 -0800 1713) 			 * This is a locked file-backed page, thus it cannot
906f9cdfc2a08 (Hugh Dickins            2018-11-30 14:10:13 -0800 1714) 			 * be removed from the page cache and replaced by a new
906f9cdfc2a08 (Hugh Dickins            2018-11-30 14:10:13 -0800 1715) 			 * page before mmu_notifier_invalidate_range_end, so no
0f10851ea475e (Jérôme Glisse           2017-11-15 17:34:07 -0800 1716) 			 * concurrent thread might update its page table to
0f10851ea475e (Jérôme Glisse           2017-11-15 17:34:07 -0800 1717) 			 * point at new page while a device still is using this
0f10851ea475e (Jérôme Glisse           2017-11-15 17:34:07 -0800 1718) 			 * page.
0f10851ea475e (Jérôme Glisse           2017-11-15 17:34:07 -0800 1719) 			 *
ad56b738c5dd2 (Mike Rapoport           2018-03-21 21:22:47 +0200 1720) 			 * See Documentation/vm/mmu_notifier.rst
0f10851ea475e (Jérôme Glisse           2017-11-15 17:34:07 -0800 1721) 			 */
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1722) 			dec_mm_counter(mm, mm_counter_file(page));
0f10851ea475e (Jérôme Glisse           2017-11-15 17:34:07 -0800 1723) 		}
854e9ed09dedf (Minchan Kim             2016-01-15 16:54:53 -0800 1724) discard:
0f10851ea475e (Jérôme Glisse           2017-11-15 17:34:07 -0800 1725) 		/*
0f10851ea475e (Jérôme Glisse           2017-11-15 17:34:07 -0800 1726) 		 * No need to call mmu_notifier_invalidate_range() it has be
0f10851ea475e (Jérôme Glisse           2017-11-15 17:34:07 -0800 1727) 		 * done above for all cases requiring it to happen under page
0f10851ea475e (Jérôme Glisse           2017-11-15 17:34:07 -0800 1728) 		 * table lock before mmu_notifier_invalidate_range_end()
0f10851ea475e (Jérôme Glisse           2017-11-15 17:34:07 -0800 1729) 		 *
ad56b738c5dd2 (Mike Rapoport           2018-03-21 21:22:47 +0200 1730) 		 * See Documentation/vm/mmu_notifier.rst
0f10851ea475e (Jérôme Glisse           2017-11-15 17:34:07 -0800 1731) 		 */
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1732) 		page_remove_rmap(subpage, PageHuge(page));
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1733) 		put_page(page);
c7ab0d2fdc840 (Kirill A. Shutemov      2017-02-24 14:58:01 -0800 1734) 	}
369ea8242c0fb (Jérôme Glisse           2017-08-31 17:17:27 -0400 1735) 
ac46d4f3c4324 (Jérôme Glisse           2018-12-28 00:38:09 -0800 1736) 	mmu_notifier_invalidate_range_end(&range);
369ea8242c0fb (Jérôme Glisse           2017-08-31 17:17:27 -0400 1737) 
caed0f486e582 (KOSAKI Motohiro         2009-12-14 17:59:45 -0800 1738) 	return ret;
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700 1739) }
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700 1740) 
52629506420ce (Joonsoo Kim             2014-01-21 15:49:50 -0800 1741) static bool invalid_migration_vma(struct vm_area_struct *vma, void *arg)
52629506420ce (Joonsoo Kim             2014-01-21 15:49:50 -0800 1742) {
222100eed264b (Anshuman Khandual       2020-04-01 21:07:52 -0700 1743) 	return vma_is_temporary_stack(vma);
52629506420ce (Joonsoo Kim             2014-01-21 15:49:50 -0800 1744) }
52629506420ce (Joonsoo Kim             2014-01-21 15:49:50 -0800 1745) 
b7e188ec98b16 (Miaohe Lin              2021-02-25 17:18:03 -0800 1746) static int page_not_mapped(struct page *page)
52629506420ce (Joonsoo Kim             2014-01-21 15:49:50 -0800 1747) {
b7e188ec98b16 (Miaohe Lin              2021-02-25 17:18:03 -0800 1748) 	return !page_mapped(page);
2a52bcbcc688e (Kirill A. Shutemov      2016-03-17 14:20:04 -0700 1749) }
52629506420ce (Joonsoo Kim             2014-01-21 15:49:50 -0800 1750) 
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700 1751) /**
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700 1752)  * try_to_unmap - try to remove all page table mappings to a page
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700 1753)  * @page: the page to get unmapped
14fa31b89c5ae (Andi Kleen              2009-09-16 11:50:10 +0200 1754)  * @flags: action and flags
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700 1755)  *
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700 1756)  * Tries to remove all the page table entries which are mapping this
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700 1757)  * page, used in the pageout path.  Caller must hold the page lock.
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700 1758)  *
666e5a406c3ed (Minchan Kim             2017-05-03 14:54:20 -0700 1759)  * If unmap is successful, return true. Otherwise, false.
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700 1760)  */
666e5a406c3ed (Minchan Kim             2017-05-03 14:54:20 -0700 1761) bool try_to_unmap(struct page *page, enum ttu_flags flags)
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700 1762) {
52629506420ce (Joonsoo Kim             2014-01-21 15:49:50 -0800 1763) 	struct rmap_walk_control rwc = {
52629506420ce (Joonsoo Kim             2014-01-21 15:49:50 -0800 1764) 		.rmap_one = try_to_unmap_one,
802a3a92ad7ac (Shaohua Li              2017-05-03 14:52:32 -0700 1765) 		.arg = (void *)flags,
b7e188ec98b16 (Miaohe Lin              2021-02-25 17:18:03 -0800 1766) 		.done = page_not_mapped,
52629506420ce (Joonsoo Kim             2014-01-21 15:49:50 -0800 1767) 		.anon_lock = page_lock_anon_vma_read,
52629506420ce (Joonsoo Kim             2014-01-21 15:49:50 -0800 1768) 	};
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700 1769) 
52629506420ce (Joonsoo Kim             2014-01-21 15:49:50 -0800 1770) 	/*
52629506420ce (Joonsoo Kim             2014-01-21 15:49:50 -0800 1771) 	 * During exec, a temporary VMA is setup and later moved.
52629506420ce (Joonsoo Kim             2014-01-21 15:49:50 -0800 1772) 	 * The VMA is moved under the anon_vma lock but not the
52629506420ce (Joonsoo Kim             2014-01-21 15:49:50 -0800 1773) 	 * page tables leading to a race where migration cannot
52629506420ce (Joonsoo Kim             2014-01-21 15:49:50 -0800 1774) 	 * find the migration ptes. Rather than increasing the
52629506420ce (Joonsoo Kim             2014-01-21 15:49:50 -0800 1775) 	 * locking requirements of exec(), migration skips
52629506420ce (Joonsoo Kim             2014-01-21 15:49:50 -0800 1776) 	 * temporary VMAs until after exec() completes.
52629506420ce (Joonsoo Kim             2014-01-21 15:49:50 -0800 1777) 	 */
b5ff8161e37ce (Naoya Horiguchi         2017-09-08 16:10:49 -0700 1778) 	if ((flags & (TTU_MIGRATION|TTU_SPLIT_FREEZE))
b5ff8161e37ce (Naoya Horiguchi         2017-09-08 16:10:49 -0700 1779) 	    && !PageKsm(page) && PageAnon(page))
52629506420ce (Joonsoo Kim             2014-01-21 15:49:50 -0800 1780) 		rwc.invalid_vma = invalid_migration_vma;
52629506420ce (Joonsoo Kim             2014-01-21 15:49:50 -0800 1781) 
2a52bcbcc688e (Kirill A. Shutemov      2016-03-17 14:20:04 -0700 1782) 	if (flags & TTU_RMAP_LOCKED)
33fc80e257473 (Minchan Kim             2017-05-03 14:54:17 -0700 1783) 		rmap_walk_locked(page, &rwc);
2a52bcbcc688e (Kirill A. Shutemov      2016-03-17 14:20:04 -0700 1784) 	else
33fc80e257473 (Minchan Kim             2017-05-03 14:54:17 -0700 1785) 		rmap_walk(page, &rwc);
52629506420ce (Joonsoo Kim             2014-01-21 15:49:50 -0800 1786) 
732ed55823fc3 (Hugh Dickins            2021-06-15 18:23:53 -0700 1787) 	/*
732ed55823fc3 (Hugh Dickins            2021-06-15 18:23:53 -0700 1788) 	 * When racing against e.g. zap_pte_range() on another cpu,
732ed55823fc3 (Hugh Dickins            2021-06-15 18:23:53 -0700 1789) 	 * in between its ptep_get_and_clear_full() and page_remove_rmap(),
732ed55823fc3 (Hugh Dickins            2021-06-15 18:23:53 -0700 1790) 	 * try_to_unmap() may return false when it is about to become true,
732ed55823fc3 (Hugh Dickins            2021-06-15 18:23:53 -0700 1791) 	 * if page table locking is skipped: use TTU_SYNC to wait for that.
732ed55823fc3 (Hugh Dickins            2021-06-15 18:23:53 -0700 1792) 	 */
732ed55823fc3 (Hugh Dickins            2021-06-15 18:23:53 -0700 1793) 	return !page_mapcount(page);
^1da177e4c3f4 (Linus Torvalds          2005-04-16 15:20:36 -0700 1794) }
81b4082dc7666 (Nikita Danilov          2005-05-01 08:58:36 -0700 1795) 
b291f000393f5 (Nicholas Piggin         2008-10-18 20:26:44 -0700 1796) /**
b291f000393f5 (Nicholas Piggin         2008-10-18 20:26:44 -0700 1797)  * try_to_munlock - try to munlock a page
b291f000393f5 (Nicholas Piggin         2008-10-18 20:26:44 -0700 1798)  * @page: the page to be munlocked
b291f000393f5 (Nicholas Piggin         2008-10-18 20:26:44 -0700 1799)  *
b291f000393f5 (Nicholas Piggin         2008-10-18 20:26:44 -0700 1800)  * Called from munlock code.  Checks all of the VMAs mapping the page
b291f000393f5 (Nicholas Piggin         2008-10-18 20:26:44 -0700 1801)  * to make sure nobody else has this page mlocked. The page will be
b291f000393f5 (Nicholas Piggin         2008-10-18 20:26:44 -0700 1802)  * returned with PG_mlocked cleared if no other vmas have it mlocked.
b291f000393f5 (Nicholas Piggin         2008-10-18 20:26:44 -0700 1803)  */
854e9ed09dedf (Minchan Kim             2016-01-15 16:54:53 -0800 1804) 
192d7232569ab (Minchan Kim             2017-05-03 14:54:10 -0700 1805) void try_to_munlock(struct page *page)
192d7232569ab (Minchan Kim             2017-05-03 14:54:10 -0700 1806) {
e8351ac9bfa7f (Joonsoo Kim             2014-01-21 15:49:52 -0800 1807) 	struct rmap_walk_control rwc = {
e8351ac9bfa7f (Joonsoo Kim             2014-01-21 15:49:52 -0800 1808) 		.rmap_one = try_to_unmap_one,
802a3a92ad7ac (Shaohua Li              2017-05-03 14:52:32 -0700 1809) 		.arg = (void *)TTU_MUNLOCK,
e8351ac9bfa7f (Joonsoo Kim             2014-01-21 15:49:52 -0800 1810) 		.done = page_not_mapped,
e8351ac9bfa7f (Joonsoo Kim             2014-01-21 15:49:52 -0800 1811) 		.anon_lock = page_lock_anon_vma_read,
e8351ac9bfa7f (Joonsoo Kim             2014-01-21 15:49:52 -0800 1812) 
e8351ac9bfa7f (Joonsoo Kim             2014-01-21 15:49:52 -0800 1813) 	};
e8351ac9bfa7f (Joonsoo Kim             2014-01-21 15:49:52 -0800 1814) 
309381feaee56 (Sasha Levin             2014-01-23 15:52:54 -0800 1815) 	VM_BUG_ON_PAGE(!PageLocked(page) || PageLRU(page), page);
192d7232569ab (Minchan Kim             2017-05-03 14:54:10 -0700 1816) 	VM_BUG_ON_PAGE(PageCompound(page) && PageDoubleMap(page), page);
b291f000393f5 (Nicholas Piggin         2008-10-18 20:26:44 -0700 1817) 
192d7232569ab (Minchan Kim             2017-05-03 14:54:10 -0700 1818) 	rmap_walk(page, &rwc);
b291f000393f5 (Nicholas Piggin         2008-10-18 20:26:44 -0700 1819) }
e9995ef978a7d (Hugh Dickins            2009-12-14 17:59:31 -0800 1820) 
01d8b20dec5f4 (Peter Zijlstra          2011-03-22 16:32:49 -0700 1821) void __put_anon_vma(struct anon_vma *anon_vma)
76545066c8521 (Rik van Riel            2010-08-09 17:18:41 -0700 1822) {
01d8b20dec5f4 (Peter Zijlstra          2011-03-22 16:32:49 -0700 1823) 	struct anon_vma *root = anon_vma->root;
76545066c8521 (Rik van Riel            2010-08-09 17:18:41 -0700 1824) 
624483f3ea825 (Andrey Ryabinin         2014-06-06 19:09:30 +0400 1825) 	anon_vma_free(anon_vma);
01d8b20dec5f4 (Peter Zijlstra          2011-03-22 16:32:49 -0700 1826) 	if (root != anon_vma && atomic_dec_and_test(&root->refcount))
01d8b20dec5f4 (Peter Zijlstra          2011-03-22 16:32:49 -0700 1827) 		anon_vma_free(root);
76545066c8521 (Rik van Riel            2010-08-09 17:18:41 -0700 1828) }
76545066c8521 (Rik van Riel            2010-08-09 17:18:41 -0700 1829) 
0dd1c7bbce8d1 (Joonsoo Kim             2014-01-21 15:49:49 -0800 1830) static struct anon_vma *rmap_walk_anon_lock(struct page *page,
0dd1c7bbce8d1 (Joonsoo Kim             2014-01-21 15:49:49 -0800 1831) 					struct rmap_walk_control *rwc)
faecd8dd852d4 (Joonsoo Kim             2014-01-21 15:49:46 -0800 1832) {
faecd8dd852d4 (Joonsoo Kim             2014-01-21 15:49:46 -0800 1833) 	struct anon_vma *anon_vma;
faecd8dd852d4 (Joonsoo Kim             2014-01-21 15:49:46 -0800 1834) 
0dd1c7bbce8d1 (Joonsoo Kim             2014-01-21 15:49:49 -0800 1835) 	if (rwc->anon_lock)
0dd1c7bbce8d1 (Joonsoo Kim             2014-01-21 15:49:49 -0800 1836) 		return rwc->anon_lock(page);
0dd1c7bbce8d1 (Joonsoo Kim             2014-01-21 15:49:49 -0800 1837) 
faecd8dd852d4 (Joonsoo Kim             2014-01-21 15:49:46 -0800 1838) 	/*
faecd8dd852d4 (Joonsoo Kim             2014-01-21 15:49:46 -0800 1839) 	 * Note: remove_migration_ptes() cannot use page_lock_anon_vma_read()
faecd8dd852d4 (Joonsoo Kim             2014-01-21 15:49:46 -0800 1840) 	 * because that depends on page_mapped(); but not all its usages
c1e8d7c6a7a68 (Michel Lespinasse       2020-06-08 21:33:54 -0700 1841) 	 * are holding mmap_lock. Users without mmap_lock are required to
faecd8dd852d4 (Joonsoo Kim             2014-01-21 15:49:46 -0800 1842) 	 * take a reference count to prevent the anon_vma disappearing
faecd8dd852d4 (Joonsoo Kim             2014-01-21 15:49:46 -0800 1843) 	 */
faecd8dd852d4 (Joonsoo Kim             2014-01-21 15:49:46 -0800 1844) 	anon_vma = page_anon_vma(page);
faecd8dd852d4 (Joonsoo Kim             2014-01-21 15:49:46 -0800 1845) 	if (!anon_vma)
faecd8dd852d4 (Joonsoo Kim             2014-01-21 15:49:46 -0800 1846) 		return NULL;
faecd8dd852d4 (Joonsoo Kim             2014-01-21 15:49:46 -0800 1847) 
faecd8dd852d4 (Joonsoo Kim             2014-01-21 15:49:46 -0800 1848) 	anon_vma_lock_read(anon_vma);
faecd8dd852d4 (Joonsoo Kim             2014-01-21 15:49:46 -0800 1849) 	return anon_vma;
faecd8dd852d4 (Joonsoo Kim             2014-01-21 15:49:46 -0800 1850) }
faecd8dd852d4 (Joonsoo Kim             2014-01-21 15:49:46 -0800 1851) 
e9995ef978a7d (Hugh Dickins            2009-12-14 17:59:31 -0800 1852) /*
e8351ac9bfa7f (Joonsoo Kim             2014-01-21 15:49:52 -0800 1853)  * rmap_walk_anon - do something to anonymous page using the object-based
e8351ac9bfa7f (Joonsoo Kim             2014-01-21 15:49:52 -0800 1854)  * rmap method
e8351ac9bfa7f (Joonsoo Kim             2014-01-21 15:49:52 -0800 1855)  * @page: the page to be handled
e8351ac9bfa7f (Joonsoo Kim             2014-01-21 15:49:52 -0800 1856)  * @rwc: control variable according to each walk type
e8351ac9bfa7f (Joonsoo Kim             2014-01-21 15:49:52 -0800 1857)  *
e8351ac9bfa7f (Joonsoo Kim             2014-01-21 15:49:52 -0800 1858)  * Find all the mappings of a page using the mapping pointer and the vma chains
e8351ac9bfa7f (Joonsoo Kim             2014-01-21 15:49:52 -0800 1859)  * contained in the anon_vma struct it points to.
e8351ac9bfa7f (Joonsoo Kim             2014-01-21 15:49:52 -0800 1860)  *
c1e8d7c6a7a68 (Michel Lespinasse       2020-06-08 21:33:54 -0700 1861)  * When called from try_to_munlock(), the mmap_lock of the mm containing the vma
e8351ac9bfa7f (Joonsoo Kim             2014-01-21 15:49:52 -0800 1862)  * where the page was found will be held for write.  So, we won't recheck
e8351ac9bfa7f (Joonsoo Kim             2014-01-21 15:49:52 -0800 1863)  * vm_flags for that VMA.  That should be OK, because that vma shouldn't be
e8351ac9bfa7f (Joonsoo Kim             2014-01-21 15:49:52 -0800 1864)  * LOCKED.
e9995ef978a7d (Hugh Dickins            2009-12-14 17:59:31 -0800 1865)  */
1df631ae19819 (Minchan Kim             2017-05-03 14:54:23 -0700 1866) static void rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc,
b97731992d00f (Kirill A. Shutemov      2016-03-17 14:20:01 -0700 1867) 		bool locked)
e9995ef978a7d (Hugh Dickins            2009-12-14 17:59:31 -0800 1868) {
e9995ef978a7d (Hugh Dickins            2009-12-14 17:59:31 -0800 1869) 	struct anon_vma *anon_vma;
a8fa41ad2f6f7 (Kirill A. Shutemov      2017-02-24 14:57:54 -0800 1870) 	pgoff_t pgoff_start, pgoff_end;
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800 1871) 	struct anon_vma_chain *avc;
e9995ef978a7d (Hugh Dickins            2009-12-14 17:59:31 -0800 1872) 
b97731992d00f (Kirill A. Shutemov      2016-03-17 14:20:01 -0700 1873) 	if (locked) {
b97731992d00f (Kirill A. Shutemov      2016-03-17 14:20:01 -0700 1874) 		anon_vma = page_anon_vma(page);
b97731992d00f (Kirill A. Shutemov      2016-03-17 14:20:01 -0700 1875) 		/* anon_vma disappear under us? */
b97731992d00f (Kirill A. Shutemov      2016-03-17 14:20:01 -0700 1876) 		VM_BUG_ON_PAGE(!anon_vma, page);
b97731992d00f (Kirill A. Shutemov      2016-03-17 14:20:01 -0700 1877) 	} else {
b97731992d00f (Kirill A. Shutemov      2016-03-17 14:20:01 -0700 1878) 		anon_vma = rmap_walk_anon_lock(page, rwc);
b97731992d00f (Kirill A. Shutemov      2016-03-17 14:20:01 -0700 1879) 	}
e9995ef978a7d (Hugh Dickins            2009-12-14 17:59:31 -0800 1880) 	if (!anon_vma)
1df631ae19819 (Minchan Kim             2017-05-03 14:54:23 -0700 1881) 		return;
faecd8dd852d4 (Joonsoo Kim             2014-01-21 15:49:46 -0800 1882) 
a8fa41ad2f6f7 (Kirill A. Shutemov      2017-02-24 14:57:54 -0800 1883) 	pgoff_start = page_to_pgoff(page);
6c357848b44b4 (Matthew Wilcox (Oracle) 2020-08-14 17:30:37 -0700 1884) 	pgoff_end = pgoff_start + thp_nr_pages(page) - 1;
a8fa41ad2f6f7 (Kirill A. Shutemov      2017-02-24 14:57:54 -0800 1885) 	anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root,
a8fa41ad2f6f7 (Kirill A. Shutemov      2017-02-24 14:57:54 -0800 1886) 			pgoff_start, pgoff_end) {
5beb49305251e (Rik van Riel            2010-03-05 13:42:07 -0800 1887) 		struct vm_area_struct *vma = avc->vma;
e9995ef978a7d (Hugh Dickins            2009-12-14 17:59:31 -0800 1888) 		unsigned long address = vma_address(page, vma);
0dd1c7bbce8d1 (Joonsoo Kim             2014-01-21 15:49:49 -0800 1889) 
494334e43c16d (Hugh Dickins            2021-06-15 18:23:56 -0700 1890) 		VM_BUG_ON_VMA(address == -EFAULT, vma);
ad12695f177c3 (Andrea Arcangeli        2015-11-05 18:49:07 -0800 1891) 		cond_resched();
ad12695f177c3 (Andrea Arcangeli        2015-11-05 18:49:07 -0800 1892) 
0dd1c7bbce8d1 (Joonsoo Kim             2014-01-21 15:49:49 -0800 1893) 		if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
0dd1c7bbce8d1 (Joonsoo Kim             2014-01-21 15:49:49 -0800 1894) 			continue;
0dd1c7bbce8d1 (Joonsoo Kim             2014-01-21 15:49:49 -0800 1895) 
e4b82222712ed (Minchan Kim             2017-05-03 14:54:27 -0700 1896) 		if (!rwc->rmap_one(page, vma, address, rwc->arg))
e9995ef978a7d (Hugh Dickins            2009-12-14 17:59:31 -0800 1897) 			break;
0dd1c7bbce8d1 (Joonsoo Kim             2014-01-21 15:49:49 -0800 1898) 		if (rwc->done && rwc->done(page))
0dd1c7bbce8d1 (Joonsoo Kim             2014-01-21 15:49:49 -0800 1899) 			break;
e9995ef978a7d (Hugh Dickins            2009-12-14 17:59:31 -0800 1900) 	}
b97731992d00f (Kirill A. Shutemov      2016-03-17 14:20:01 -0700 1901) 
b97731992d00f (Kirill A. Shutemov      2016-03-17 14:20:01 -0700 1902) 	if (!locked)
b97731992d00f (Kirill A. Shutemov      2016-03-17 14:20:01 -0700 1903) 		anon_vma_unlock_read(anon_vma);
e9995ef978a7d (Hugh Dickins            2009-12-14 17:59:31 -0800 1904) }
e9995ef978a7d (Hugh Dickins            2009-12-14 17:59:31 -0800 1905) 
e8351ac9bfa7f (Joonsoo Kim             2014-01-21 15:49:52 -0800 1906) /*
e8351ac9bfa7f (Joonsoo Kim             2014-01-21 15:49:52 -0800 1907)  * rmap_walk_file - do something to file page using the object-based rmap method
e8351ac9bfa7f (Joonsoo Kim             2014-01-21 15:49:52 -0800 1908)  * @page: the page to be handled
e8351ac9bfa7f (Joonsoo Kim             2014-01-21 15:49:52 -0800 1909)  * @rwc: control variable according to each walk type
e8351ac9bfa7f (Joonsoo Kim             2014-01-21 15:49:52 -0800 1910)  *
e8351ac9bfa7f (Joonsoo Kim             2014-01-21 15:49:52 -0800 1911)  * Find all the mappings of a page using the mapping pointer and the vma chains
e8351ac9bfa7f (Joonsoo Kim             2014-01-21 15:49:52 -0800 1912)  * contained in the address_space struct it points to.
e8351ac9bfa7f (Joonsoo Kim             2014-01-21 15:49:52 -0800 1913)  *
c1e8d7c6a7a68 (Michel Lespinasse       2020-06-08 21:33:54 -0700 1914)  * When called from try_to_munlock(), the mmap_lock of the mm containing the vma
e8351ac9bfa7f (Joonsoo Kim             2014-01-21 15:49:52 -0800 1915)  * where the page was found will be held for write.  So, we won't recheck
e8351ac9bfa7f (Joonsoo Kim             2014-01-21 15:49:52 -0800 1916)  * vm_flags for that VMA.  That should be OK, because that vma shouldn't be
e8351ac9bfa7f (Joonsoo Kim             2014-01-21 15:49:52 -0800 1917)  * LOCKED.
e8351ac9bfa7f (Joonsoo Kim             2014-01-21 15:49:52 -0800 1918)  */
1df631ae19819 (Minchan Kim             2017-05-03 14:54:23 -0700 1919) static void rmap_walk_file(struct page *page, struct rmap_walk_control *rwc,
b97731992d00f (Kirill A. Shutemov      2016-03-17 14:20:01 -0700 1920) 		bool locked)
e9995ef978a7d (Hugh Dickins            2009-12-14 17:59:31 -0800 1921) {
b97731992d00f (Kirill A. Shutemov      2016-03-17 14:20:01 -0700 1922) 	struct address_space *mapping = page_mapping(page);
a8fa41ad2f6f7 (Kirill A. Shutemov      2017-02-24 14:57:54 -0800 1923) 	pgoff_t pgoff_start, pgoff_end;
e9995ef978a7d (Hugh Dickins            2009-12-14 17:59:31 -0800 1924) 	struct vm_area_struct *vma;
e9995ef978a7d (Hugh Dickins            2009-12-14 17:59:31 -0800 1925) 
9f32624be9435 (Joonsoo Kim             2014-01-21 15:49:53 -0800 1926) 	/*
9f32624be9435 (Joonsoo Kim             2014-01-21 15:49:53 -0800 1927) 	 * The page lock not only makes sure that page->mapping cannot
9f32624be9435 (Joonsoo Kim             2014-01-21 15:49:53 -0800 1928) 	 * suddenly be NULLified by truncation, it makes sure that the
9f32624be9435 (Joonsoo Kim             2014-01-21 15:49:53 -0800 1929) 	 * structure at mapping cannot be freed and reused yet,
c8c06efa8b552 (Davidlohr Bueso         2014-12-12 16:54:24 -0800 1930) 	 * so we can safely take mapping->i_mmap_rwsem.
9f32624be9435 (Joonsoo Kim             2014-01-21 15:49:53 -0800 1931) 	 */
81d1b09c6be66 (Sasha Levin             2014-10-09 15:28:10 -0700 1932) 	VM_BUG_ON_PAGE(!PageLocked(page), page);
9f32624be9435 (Joonsoo Kim             2014-01-21 15:49:53 -0800 1933) 
e9995ef978a7d (Hugh Dickins            2009-12-14 17:59:31 -0800 1934) 	if (!mapping)
1df631ae19819 (Minchan Kim             2017-05-03 14:54:23 -0700 1935) 		return;
3dec0ba0be6a5 (Davidlohr Bueso         2014-12-12 16:54:27 -0800 1936) 
a8fa41ad2f6f7 (Kirill A. Shutemov      2017-02-24 14:57:54 -0800 1937) 	pgoff_start = page_to_pgoff(page);
6c357848b44b4 (Matthew Wilcox (Oracle) 2020-08-14 17:30:37 -0700 1938) 	pgoff_end = pgoff_start + thp_nr_pages(page) - 1;
b97731992d00f (Kirill A. Shutemov      2016-03-17 14:20:01 -0700 1939) 	if (!locked)
b97731992d00f (Kirill A. Shutemov      2016-03-17 14:20:01 -0700 1940) 		i_mmap_lock_read(mapping);
a8fa41ad2f6f7 (Kirill A. Shutemov      2017-02-24 14:57:54 -0800 1941) 	vma_interval_tree_foreach(vma, &mapping->i_mmap,
a8fa41ad2f6f7 (Kirill A. Shutemov      2017-02-24 14:57:54 -0800 1942) 			pgoff_start, pgoff_end) {
e9995ef978a7d (Hugh Dickins            2009-12-14 17:59:31 -0800 1943) 		unsigned long address = vma_address(page, vma);
0dd1c7bbce8d1 (Joonsoo Kim             2014-01-21 15:49:49 -0800 1944) 
494334e43c16d (Hugh Dickins            2021-06-15 18:23:56 -0700 1945) 		VM_BUG_ON_VMA(address == -EFAULT, vma);
ad12695f177c3 (Andrea Arcangeli        2015-11-05 18:49:07 -0800 1946) 		cond_resched();
ad12695f177c3 (Andrea Arcangeli        2015-11-05 18:49:07 -0800 1947) 
0dd1c7bbce8d1 (Joonsoo Kim             2014-01-21 15:49:49 -0800 1948) 		if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
0dd1c7bbce8d1 (Joonsoo Kim             2014-01-21 15:49:49 -0800 1949) 			continue;
0dd1c7bbce8d1 (Joonsoo Kim             2014-01-21 15:49:49 -0800 1950) 
e4b82222712ed (Minchan Kim             2017-05-03 14:54:27 -0700 1951) 		if (!rwc->rmap_one(page, vma, address, rwc->arg))
0dd1c7bbce8d1 (Joonsoo Kim             2014-01-21 15:49:49 -0800 1952) 			goto done;
0dd1c7bbce8d1 (Joonsoo Kim             2014-01-21 15:49:49 -0800 1953) 		if (rwc->done && rwc->done(page))
0dd1c7bbce8d1 (Joonsoo Kim             2014-01-21 15:49:49 -0800 1954) 			goto done;
e9995ef978a7d (Hugh Dickins            2009-12-14 17:59:31 -0800 1955) 	}
0dd1c7bbce8d1 (Joonsoo Kim             2014-01-21 15:49:49 -0800 1956) 
0dd1c7bbce8d1 (Joonsoo Kim             2014-01-21 15:49:49 -0800 1957) done:
b97731992d00f (Kirill A. Shutemov      2016-03-17 14:20:01 -0700 1958) 	if (!locked)
b97731992d00f (Kirill A. Shutemov      2016-03-17 14:20:01 -0700 1959) 		i_mmap_unlock_read(mapping);
e9995ef978a7d (Hugh Dickins            2009-12-14 17:59:31 -0800 1960) }
e9995ef978a7d (Hugh Dickins            2009-12-14 17:59:31 -0800 1961) 
1df631ae19819 (Minchan Kim             2017-05-03 14:54:23 -0700 1962) void rmap_walk(struct page *page, struct rmap_walk_control *rwc)
e9995ef978a7d (Hugh Dickins            2009-12-14 17:59:31 -0800 1963) {
e9995ef978a7d (Hugh Dickins            2009-12-14 17:59:31 -0800 1964) 	if (unlikely(PageKsm(page)))
1df631ae19819 (Minchan Kim             2017-05-03 14:54:23 -0700 1965) 		rmap_walk_ksm(page, rwc);
e9995ef978a7d (Hugh Dickins            2009-12-14 17:59:31 -0800 1966) 	else if (PageAnon(page))
1df631ae19819 (Minchan Kim             2017-05-03 14:54:23 -0700 1967) 		rmap_walk_anon(page, rwc, false);
b97731992d00f (Kirill A. Shutemov      2016-03-17 14:20:01 -0700 1968) 	else
1df631ae19819 (Minchan Kim             2017-05-03 14:54:23 -0700 1969) 		rmap_walk_file(page, rwc, false);
b97731992d00f (Kirill A. Shutemov      2016-03-17 14:20:01 -0700 1970) }
b97731992d00f (Kirill A. Shutemov      2016-03-17 14:20:01 -0700 1971) 
b97731992d00f (Kirill A. Shutemov      2016-03-17 14:20:01 -0700 1972) /* Like rmap_walk, but caller holds relevant rmap lock */
1df631ae19819 (Minchan Kim             2017-05-03 14:54:23 -0700 1973) void rmap_walk_locked(struct page *page, struct rmap_walk_control *rwc)
b97731992d00f (Kirill A. Shutemov      2016-03-17 14:20:01 -0700 1974) {
b97731992d00f (Kirill A. Shutemov      2016-03-17 14:20:01 -0700 1975) 	/* no ksm support for now */
b97731992d00f (Kirill A. Shutemov      2016-03-17 14:20:01 -0700 1976) 	VM_BUG_ON_PAGE(PageKsm(page), page);
b97731992d00f (Kirill A. Shutemov      2016-03-17 14:20:01 -0700 1977) 	if (PageAnon(page))
1df631ae19819 (Minchan Kim             2017-05-03 14:54:23 -0700 1978) 		rmap_walk_anon(page, rwc, true);
e9995ef978a7d (Hugh Dickins            2009-12-14 17:59:31 -0800 1979) 	else
1df631ae19819 (Minchan Kim             2017-05-03 14:54:23 -0700 1980) 		rmap_walk_file(page, rwc, true);
e9995ef978a7d (Hugh Dickins            2009-12-14 17:59:31 -0800 1981) }
0fe6e20b9c4c5 (Naoya Horiguchi         2010-05-28 09:29:16 +0900 1982) 
e3390f67a7267 (Naoya Horiguchi         2010-06-15 13:18:13 +0900 1983) #ifdef CONFIG_HUGETLB_PAGE
0fe6e20b9c4c5 (Naoya Horiguchi         2010-05-28 09:29:16 +0900 1984) /*
451b9514a59f3 (Kirill Tkhai            2018-12-28 00:39:31 -0800 1985)  * The following two functions are for anonymous (private mapped) hugepages.
0fe6e20b9c4c5 (Naoya Horiguchi         2010-05-28 09:29:16 +0900 1986)  * Unlike common anonymous pages, anonymous hugepages have no accounting code
0fe6e20b9c4c5 (Naoya Horiguchi         2010-05-28 09:29:16 +0900 1987)  * and no lru code, because we handle hugepages differently from common pages.
0fe6e20b9c4c5 (Naoya Horiguchi         2010-05-28 09:29:16 +0900 1988)  */
0fe6e20b9c4c5 (Naoya Horiguchi         2010-05-28 09:29:16 +0900 1989) void hugepage_add_anon_rmap(struct page *page,
0fe6e20b9c4c5 (Naoya Horiguchi         2010-05-28 09:29:16 +0900 1990) 			    struct vm_area_struct *vma, unsigned long address)
0fe6e20b9c4c5 (Naoya Horiguchi         2010-05-28 09:29:16 +0900 1991) {
0fe6e20b9c4c5 (Naoya Horiguchi         2010-05-28 09:29:16 +0900 1992) 	struct anon_vma *anon_vma = vma->anon_vma;
0fe6e20b9c4c5 (Naoya Horiguchi         2010-05-28 09:29:16 +0900 1993) 	int first;
a850ea30374eb (Naoya Horiguchi         2010-09-10 13:23:06 +0900 1994) 
a850ea30374eb (Naoya Horiguchi         2010-09-10 13:23:06 +0900 1995) 	BUG_ON(!PageLocked(page));
0fe6e20b9c4c5 (Naoya Horiguchi         2010-05-28 09:29:16 +0900 1996) 	BUG_ON(!anon_vma);
5dbe0af47f8a8 (Hugh Dickins            2011-05-28 13:17:04 -0700 1997) 	/* address might be in next vma when migration races vma_adjust */
53f9263baba69 (Kirill A. Shutemov      2016-01-15 16:53:42 -0800 1998) 	first = atomic_inc_and_test(compound_mapcount_ptr(page));
0fe6e20b9c4c5 (Naoya Horiguchi         2010-05-28 09:29:16 +0900 1999) 	if (first)
451b9514a59f3 (Kirill Tkhai            2018-12-28 00:39:31 -0800 2000) 		__page_set_anon_rmap(page, vma, address, 0);
0fe6e20b9c4c5 (Naoya Horiguchi         2010-05-28 09:29:16 +0900 2001) }
0fe6e20b9c4c5 (Naoya Horiguchi         2010-05-28 09:29:16 +0900 2002) 
0fe6e20b9c4c5 (Naoya Horiguchi         2010-05-28 09:29:16 +0900 2003) void hugepage_add_new_anon_rmap(struct page *page,
0fe6e20b9c4c5 (Naoya Horiguchi         2010-05-28 09:29:16 +0900 2004) 			struct vm_area_struct *vma, unsigned long address)
0fe6e20b9c4c5 (Naoya Horiguchi         2010-05-28 09:29:16 +0900 2005) {
0fe6e20b9c4c5 (Naoya Horiguchi         2010-05-28 09:29:16 +0900 2006) 	BUG_ON(address < vma->vm_start || address >= vma->vm_end);
53f9263baba69 (Kirill A. Shutemov      2016-01-15 16:53:42 -0800 2007) 	atomic_set(compound_mapcount_ptr(page), 0);
47e29d32afba1 (John Hubbard            2020-04-01 21:05:33 -0700 2008) 	if (hpage_pincount_available(page))
47e29d32afba1 (John Hubbard            2020-04-01 21:05:33 -0700 2009) 		atomic_set(compound_pincount_ptr(page), 0);
47e29d32afba1 (John Hubbard            2020-04-01 21:05:33 -0700 2010) 
451b9514a59f3 (Kirill Tkhai            2018-12-28 00:39:31 -0800 2011) 	__page_set_anon_rmap(page, vma, address, 1);
0fe6e20b9c4c5 (Naoya Horiguchi         2010-05-28 09:29:16 +0900 2012) }
e3390f67a7267 (Naoya Horiguchi         2010-06-15 13:18:13 +0900 2013) #endif /* CONFIG_HUGETLB_PAGE */