VisionFive2 Linux kernel

StarFive Tech Linux Kernel for VisionFive (JH7110) boards (mirror)

More than 9999 Commits   32 Branches   54 Tags
457c899653991 (Thomas Gleixner       2019-05-19 13:08:55 +0100    1) // SPDX-License-Identifier: GPL-2.0-only
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700    2) /*
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700    3)  * fs/fs-writeback.c
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700    4)  *
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700    5)  * Copyright (C) 2002, Linus Torvalds.
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700    6)  *
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700    7)  * Contains all the functions related to writing back and waiting
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700    8)  * upon dirty inodes against superblocks, and writing back dirty
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700    9)  * pages against inodes.  ie: data writeback.  Writeout of the
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700   10)  * inode itself is not handled here.
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700   11)  *
e1f8e87449147 (Francois Cami         2008-10-15 22:01:59 -0700   12)  * 10Apr2002	Andrew Morton
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700   13)  *		Split out of fs/inode.c
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700   14)  *		Additions for address_space-based writeback
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700   15)  */
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700   16) 
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700   17) #include <linux/kernel.h>
630d9c47274aa (Paul Gortmaker        2011-11-16 23:57:37 -0500   18) #include <linux/export.h>
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700   19) #include <linux/spinlock.h>
5a0e3ad6af866 (Tejun Heo             2010-03-24 17:04:11 +0900   20) #include <linux/slab.h>
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700   21) #include <linux/sched.h>
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700   22) #include <linux/fs.h>
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700   23) #include <linux/mm.h>
bc31b86a5923f (Wu Fengguang          2012-01-07 20:41:55 -0600   24) #include <linux/pagemap.h>
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200   25) #include <linux/kthread.h>
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700   26) #include <linux/writeback.h>
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700   27) #include <linux/blkdev.h>
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700   28) #include <linux/backing-dev.h>
455b2864686d3 (Dave Chinner          2010-07-07 13:24:06 +1000   29) #include <linux/tracepoint.h>
719ea2fbb553a (Al Viro               2013-09-29 11:24:49 -0400   30) #include <linux/device.h>
21c6321fbb3a3 (Tejun Heo             2015-05-28 14:50:49 -0400   31) #include <linux/memcontrol.h>
07f3f05c1e305 (David Howells         2006-09-30 20:52:18 +0200   32) #include "internal.h"
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700   33) 
bc31b86a5923f (Wu Fengguang          2012-01-07 20:41:55 -0600   34) /*
bc31b86a5923f (Wu Fengguang          2012-01-07 20:41:55 -0600   35)  * 4MB minimal write chunk size
bc31b86a5923f (Wu Fengguang          2012-01-07 20:41:55 -0600   36)  */
09cbfeaf1a5a6 (Kirill A. Shutemov    2016-04-01 15:29:47 +0300   37) #define MIN_WRITEBACK_PAGES	(4096UL >> (PAGE_SHIFT - 10))
bc31b86a5923f (Wu Fengguang          2012-01-07 20:41:55 -0600   38) 
c4a77a6c7dcff (Jens Axboe            2009-09-16 15:18:25 +0200   39) /*
c4a77a6c7dcff (Jens Axboe            2009-09-16 15:18:25 +0200   40)  * Passed into wb_writeback(), essentially a subset of writeback_control
c4a77a6c7dcff (Jens Axboe            2009-09-16 15:18:25 +0200   41)  */
83ba7b071f30f (Christoph Hellwig     2010-07-06 08:59:53 +0200   42) struct wb_writeback_work {
c4a77a6c7dcff (Jens Axboe            2009-09-16 15:18:25 +0200   43) 	long nr_pages;
c4a77a6c7dcff (Jens Axboe            2009-09-16 15:18:25 +0200   44) 	struct super_block *sb;
c4a77a6c7dcff (Jens Axboe            2009-09-16 15:18:25 +0200   45) 	enum writeback_sync_modes sync_mode;
6e6938b6d3130 (Wu Fengguang          2010-06-06 10:38:15 -0600   46) 	unsigned int tagged_writepages:1;
52957fe1c709d (H Hartley Sweeten     2010-04-01 20:36:30 -0500   47) 	unsigned int for_kupdate:1;
52957fe1c709d (H Hartley Sweeten     2010-04-01 20:36:30 -0500   48) 	unsigned int range_cyclic:1;
52957fe1c709d (H Hartley Sweeten     2010-04-01 20:36:30 -0500   49) 	unsigned int for_background:1;
7747bd4bceb30 (Dave Chinner          2013-07-02 22:38:35 +1000   50) 	unsigned int for_sync:1;	/* sync(2) WB_SYNC_ALL writeback */
ac7b19a34f332 (Tejun Heo             2015-05-22 17:13:57 -0400   51) 	unsigned int auto_free:1;	/* free on completion */
0e175a1835ffc (Curt Wohlgemuth       2011-10-07 21:54:10 -0600   52) 	enum wb_reason reason;		/* why was writeback initiated? */
c4a77a6c7dcff (Jens Axboe            2009-09-16 15:18:25 +0200   53) 
8010c3b6349b4 (Jens Axboe            2009-09-15 20:04:57 +0200   54) 	struct list_head list;		/* pending work list */
cc395d7f1f7b9 (Tejun Heo             2015-05-22 17:13:58 -0400   55) 	struct wb_completion *done;	/* set if the caller waits */
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200   56) };
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200   57) 
a2f4870697a5b (Theodore Ts'o         2015-03-17 12:23:19 -0400   58) /*
a2f4870697a5b (Theodore Ts'o         2015-03-17 12:23:19 -0400   59)  * If an inode is constantly having its pages dirtied, but then the
a2f4870697a5b (Theodore Ts'o         2015-03-17 12:23:19 -0400   60)  * updates stop dirtytime_expire_interval seconds in the past, it's
a2f4870697a5b (Theodore Ts'o         2015-03-17 12:23:19 -0400   61)  * possible for the worst case time between when an inode has its
a2f4870697a5b (Theodore Ts'o         2015-03-17 12:23:19 -0400   62)  * timestamps updated and when they finally get written out to be two
a2f4870697a5b (Theodore Ts'o         2015-03-17 12:23:19 -0400   63)  * dirtytime_expire_intervals.  We set the default to 12 hours (in
a2f4870697a5b (Theodore Ts'o         2015-03-17 12:23:19 -0400   64)  * seconds), which means most of the time inodes will have their
a2f4870697a5b (Theodore Ts'o         2015-03-17 12:23:19 -0400   65)  * timestamps written to disk after 12 hours, but in the worst case a
a2f4870697a5b (Theodore Ts'o         2015-03-17 12:23:19 -0400   66)  * few inodes might not their timestamps updated for 24 hours.
a2f4870697a5b (Theodore Ts'o         2015-03-17 12:23:19 -0400   67)  */
a2f4870697a5b (Theodore Ts'o         2015-03-17 12:23:19 -0400   68) unsigned int dirtytime_expire_interval = 12 * 60 * 60;
a2f4870697a5b (Theodore Ts'o         2015-03-17 12:23:19 -0400   69) 
7ccf19a8042e3 (Nicholas Piggin       2010-10-21 11:49:30 +1100   70) static inline struct inode *wb_inode(struct list_head *head)
7ccf19a8042e3 (Nicholas Piggin       2010-10-21 11:49:30 +1100   71) {
c7f5408493aeb (Dave Chinner          2015-03-04 14:07:22 -0500   72) 	return list_entry(head, struct inode, i_io_list);
7ccf19a8042e3 (Nicholas Piggin       2010-10-21 11:49:30 +1100   73) }
7ccf19a8042e3 (Nicholas Piggin       2010-10-21 11:49:30 +1100   74) 
15eb77a07c714 (Wu Fengguang          2012-01-17 11:18:56 -0600   75) /*
15eb77a07c714 (Wu Fengguang          2012-01-17 11:18:56 -0600   76)  * Include the creation of the trace points after defining the
15eb77a07c714 (Wu Fengguang          2012-01-17 11:18:56 -0600   77)  * wb_writeback_work structure and inline functions so that the definition
15eb77a07c714 (Wu Fengguang          2012-01-17 11:18:56 -0600   78)  * remains local to this file.
15eb77a07c714 (Wu Fengguang          2012-01-17 11:18:56 -0600   79)  */
15eb77a07c714 (Wu Fengguang          2012-01-17 11:18:56 -0600   80) #define CREATE_TRACE_POINTS
15eb77a07c714 (Wu Fengguang          2012-01-17 11:18:56 -0600   81) #include <trace/events/writeback.h>
15eb77a07c714 (Wu Fengguang          2012-01-17 11:18:56 -0600   82) 
774016b2d4550 (Steven Whitehouse     2014-02-06 15:47:47 +0000   83) EXPORT_TRACEPOINT_SYMBOL_GPL(wbc_writepage);
774016b2d4550 (Steven Whitehouse     2014-02-06 15:47:47 +0000   84) 
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400   85) static bool wb_io_lists_populated(struct bdi_writeback *wb)
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400   86) {
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400   87) 	if (wb_has_dirty_io(wb)) {
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400   88) 		return false;
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400   89) 	} else {
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400   90) 		set_bit(WB_has_dirty_io, &wb->state);
95a46c65e3c09 (Tejun Heo             2015-05-22 17:13:47 -0400   91) 		WARN_ON_ONCE(!wb->avg_write_bandwidth);
766a9d6e60578 (Tejun Heo             2015-05-22 17:13:46 -0400   92) 		atomic_long_add(wb->avg_write_bandwidth,
766a9d6e60578 (Tejun Heo             2015-05-22 17:13:46 -0400   93) 				&wb->bdi->tot_write_bandwidth);
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400   94) 		return true;
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400   95) 	}
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400   96) }
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400   97) 
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400   98) static void wb_io_lists_depopulated(struct bdi_writeback *wb)
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400   99) {
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400  100) 	if (wb_has_dirty_io(wb) && list_empty(&wb->b_dirty) &&
766a9d6e60578 (Tejun Heo             2015-05-22 17:13:46 -0400  101) 	    list_empty(&wb->b_io) && list_empty(&wb->b_more_io)) {
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400  102) 		clear_bit(WB_has_dirty_io, &wb->state);
95a46c65e3c09 (Tejun Heo             2015-05-22 17:13:47 -0400  103) 		WARN_ON_ONCE(atomic_long_sub_return(wb->avg_write_bandwidth,
95a46c65e3c09 (Tejun Heo             2015-05-22 17:13:47 -0400  104) 					&wb->bdi->tot_write_bandwidth) < 0);
766a9d6e60578 (Tejun Heo             2015-05-22 17:13:46 -0400  105) 	}
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400  106) }
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400  107) 
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400  108) /**
c7f5408493aeb (Dave Chinner          2015-03-04 14:07:22 -0500  109)  * inode_io_list_move_locked - move an inode onto a bdi_writeback IO list
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400  110)  * @inode: inode to be moved
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400  111)  * @wb: target bdi_writeback
bbbc3c1cfaf69 (Wang Long             2017-12-05 07:23:19 -0500  112)  * @head: one of @wb->b_{dirty|io|more_io|dirty_time}
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400  113)  *
c7f5408493aeb (Dave Chinner          2015-03-04 14:07:22 -0500  114)  * Move @inode->i_io_list to @list of @wb and set %WB_has_dirty_io.
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400  115)  * Returns %true if @inode is the first occupant of the !dirty_time IO
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400  116)  * lists; otherwise, %false.
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400  117)  */
c7f5408493aeb (Dave Chinner          2015-03-04 14:07:22 -0500  118) static bool inode_io_list_move_locked(struct inode *inode,
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400  119) 				      struct bdi_writeback *wb,
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400  120) 				      struct list_head *head)
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400  121) {
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400  122) 	assert_spin_locked(&wb->list_lock);
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400  123) 
c7f5408493aeb (Dave Chinner          2015-03-04 14:07:22 -0500  124) 	list_move(&inode->i_io_list, head);
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400  125) 
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400  126) 	/* dirty_time doesn't count as dirty_io until expiration */
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400  127) 	if (head != &wb->b_dirty_time)
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400  128) 		return wb_io_lists_populated(wb);
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400  129) 
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400  130) 	wb_io_lists_depopulated(wb);
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400  131) 	return false;
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400  132) }
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400  133) 
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400  134) /**
c7f5408493aeb (Dave Chinner          2015-03-04 14:07:22 -0500  135)  * inode_io_list_del_locked - remove an inode from its bdi_writeback IO list
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400  136)  * @inode: inode to be removed
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400  137)  * @wb: bdi_writeback @inode is being removed from
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400  138)  *
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400  139)  * Remove @inode which may be on one of @wb->b_{dirty|io|more_io} lists and
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400  140)  * clear %WB_has_dirty_io if all are empty afterwards.
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400  141)  */
c7f5408493aeb (Dave Chinner          2015-03-04 14:07:22 -0500  142) static void inode_io_list_del_locked(struct inode *inode,
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400  143) 				     struct bdi_writeback *wb)
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400  144) {
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400  145) 	assert_spin_locked(&wb->list_lock);
b35250c0816c7 (Jan Kara              2020-06-10 17:36:03 +0200  146) 	assert_spin_locked(&inode->i_lock);
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400  147) 
5afced3bf2810 (Jan Kara              2020-05-29 15:05:22 +0200  148) 	inode->i_state &= ~I_SYNC_QUEUED;
c7f5408493aeb (Dave Chinner          2015-03-04 14:07:22 -0500  149) 	list_del_init(&inode->i_io_list);
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400  150) 	wb_io_lists_depopulated(wb);
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400  151) }
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400  152) 
f0054bb1e1f3b (Tejun Heo             2015-05-22 17:13:30 -0400  153) static void wb_wakeup(struct bdi_writeback *wb)
5acda9d12dcf1 (Jan Kara              2014-04-03 14:46:23 -0700  154) {
f0054bb1e1f3b (Tejun Heo             2015-05-22 17:13:30 -0400  155) 	spin_lock_bh(&wb->work_lock);
f0054bb1e1f3b (Tejun Heo             2015-05-22 17:13:30 -0400  156) 	if (test_bit(WB_registered, &wb->state))
f0054bb1e1f3b (Tejun Heo             2015-05-22 17:13:30 -0400  157) 		mod_delayed_work(bdi_wq, &wb->dwork, 0);
f0054bb1e1f3b (Tejun Heo             2015-05-22 17:13:30 -0400  158) 	spin_unlock_bh(&wb->work_lock);
5acda9d12dcf1 (Jan Kara              2014-04-03 14:46:23 -0700  159) }
5acda9d12dcf1 (Jan Kara              2014-04-03 14:46:23 -0700  160) 
4a3a485b1ed0e (Tahsin Erdogan        2017-03-10 12:09:49 -0800  161) static void finish_writeback_work(struct bdi_writeback *wb,
4a3a485b1ed0e (Tahsin Erdogan        2017-03-10 12:09:49 -0800  162) 				  struct wb_writeback_work *work)
4a3a485b1ed0e (Tahsin Erdogan        2017-03-10 12:09:49 -0800  163) {
4a3a485b1ed0e (Tahsin Erdogan        2017-03-10 12:09:49 -0800  164) 	struct wb_completion *done = work->done;
4a3a485b1ed0e (Tahsin Erdogan        2017-03-10 12:09:49 -0800  165) 
4a3a485b1ed0e (Tahsin Erdogan        2017-03-10 12:09:49 -0800  166) 	if (work->auto_free)
4a3a485b1ed0e (Tahsin Erdogan        2017-03-10 12:09:49 -0800  167) 		kfree(work);
8e00c4e9dd852 (Tejun Heo             2019-10-06 17:58:09 -0700  168) 	if (done) {
8e00c4e9dd852 (Tejun Heo             2019-10-06 17:58:09 -0700  169) 		wait_queue_head_t *waitq = done->waitq;
8e00c4e9dd852 (Tejun Heo             2019-10-06 17:58:09 -0700  170) 
8e00c4e9dd852 (Tejun Heo             2019-10-06 17:58:09 -0700  171) 		/* @done can't be accessed after the following dec */
8e00c4e9dd852 (Tejun Heo             2019-10-06 17:58:09 -0700  172) 		if (atomic_dec_and_test(&done->cnt))
8e00c4e9dd852 (Tejun Heo             2019-10-06 17:58:09 -0700  173) 			wake_up_all(waitq);
8e00c4e9dd852 (Tejun Heo             2019-10-06 17:58:09 -0700  174) 	}
4a3a485b1ed0e (Tahsin Erdogan        2017-03-10 12:09:49 -0800  175) }
4a3a485b1ed0e (Tahsin Erdogan        2017-03-10 12:09:49 -0800  176) 
f0054bb1e1f3b (Tejun Heo             2015-05-22 17:13:30 -0400  177) static void wb_queue_work(struct bdi_writeback *wb,
f0054bb1e1f3b (Tejun Heo             2015-05-22 17:13:30 -0400  178) 			  struct wb_writeback_work *work)
6585027a5e8cb (Jan Kara              2011-01-13 15:45:44 -0800  179) {
5634cc2aa9aeb (Tejun Heo             2015-08-18 14:54:56 -0700  180) 	trace_writeback_queue(wb, work);
6585027a5e8cb (Jan Kara              2011-01-13 15:45:44 -0800  181) 
cc395d7f1f7b9 (Tejun Heo             2015-05-22 17:13:58 -0400  182) 	if (work->done)
cc395d7f1f7b9 (Tejun Heo             2015-05-22 17:13:58 -0400  183) 		atomic_inc(&work->done->cnt);
4a3a485b1ed0e (Tahsin Erdogan        2017-03-10 12:09:49 -0800  184) 
4a3a485b1ed0e (Tahsin Erdogan        2017-03-10 12:09:49 -0800  185) 	spin_lock_bh(&wb->work_lock);
4a3a485b1ed0e (Tahsin Erdogan        2017-03-10 12:09:49 -0800  186) 
4a3a485b1ed0e (Tahsin Erdogan        2017-03-10 12:09:49 -0800  187) 	if (test_bit(WB_registered, &wb->state)) {
4a3a485b1ed0e (Tahsin Erdogan        2017-03-10 12:09:49 -0800  188) 		list_add_tail(&work->list, &wb->work_list);
4a3a485b1ed0e (Tahsin Erdogan        2017-03-10 12:09:49 -0800  189) 		mod_delayed_work(bdi_wq, &wb->dwork, 0);
4a3a485b1ed0e (Tahsin Erdogan        2017-03-10 12:09:49 -0800  190) 	} else
4a3a485b1ed0e (Tahsin Erdogan        2017-03-10 12:09:49 -0800  191) 		finish_writeback_work(wb, work);
4a3a485b1ed0e (Tahsin Erdogan        2017-03-10 12:09:49 -0800  192) 
f0054bb1e1f3b (Tejun Heo             2015-05-22 17:13:30 -0400  193) 	spin_unlock_bh(&wb->work_lock);
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700  194) }
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700  195) 
cc395d7f1f7b9 (Tejun Heo             2015-05-22 17:13:58 -0400  196) /**
cc395d7f1f7b9 (Tejun Heo             2015-05-22 17:13:58 -0400  197)  * wb_wait_for_completion - wait for completion of bdi_writeback_works
cc395d7f1f7b9 (Tejun Heo             2015-05-22 17:13:58 -0400  198)  * @done: target wb_completion
cc395d7f1f7b9 (Tejun Heo             2015-05-22 17:13:58 -0400  199)  *
cc395d7f1f7b9 (Tejun Heo             2015-05-22 17:13:58 -0400  200)  * Wait for one or more work items issued to @bdi with their ->done field
5b9cce4c7eb06 (Tejun Heo             2019-08-26 09:06:52 -0700  201)  * set to @done, which should have been initialized with
5b9cce4c7eb06 (Tejun Heo             2019-08-26 09:06:52 -0700  202)  * DEFINE_WB_COMPLETION().  This function returns after all such work items
5b9cce4c7eb06 (Tejun Heo             2019-08-26 09:06:52 -0700  203)  * are completed.  Work items which are waited upon aren't freed
cc395d7f1f7b9 (Tejun Heo             2015-05-22 17:13:58 -0400  204)  * automatically on completion.
cc395d7f1f7b9 (Tejun Heo             2015-05-22 17:13:58 -0400  205)  */
5b9cce4c7eb06 (Tejun Heo             2019-08-26 09:06:52 -0700  206) void wb_wait_for_completion(struct wb_completion *done)
cc395d7f1f7b9 (Tejun Heo             2015-05-22 17:13:58 -0400  207) {
cc395d7f1f7b9 (Tejun Heo             2015-05-22 17:13:58 -0400  208) 	atomic_dec(&done->cnt);		/* put down the initial count */
5b9cce4c7eb06 (Tejun Heo             2019-08-26 09:06:52 -0700  209) 	wait_event(*done->waitq, !atomic_read(&done->cnt));
cc395d7f1f7b9 (Tejun Heo             2015-05-22 17:13:58 -0400  210) }
cc395d7f1f7b9 (Tejun Heo             2015-05-22 17:13:58 -0400  211) 
703c270887bb5 (Tejun Heo             2015-05-22 17:13:44 -0400  212) #ifdef CONFIG_CGROUP_WRITEBACK
703c270887bb5 (Tejun Heo             2015-05-22 17:13:44 -0400  213) 
55a694dffb7fd (Tejun Heo             2019-08-15 12:25:28 -0700  214) /*
55a694dffb7fd (Tejun Heo             2019-08-15 12:25:28 -0700  215)  * Parameters for foreign inode detection, see wbc_detach_inode() to see
55a694dffb7fd (Tejun Heo             2019-08-15 12:25:28 -0700  216)  * how they're used.
55a694dffb7fd (Tejun Heo             2019-08-15 12:25:28 -0700  217)  *
55a694dffb7fd (Tejun Heo             2019-08-15 12:25:28 -0700  218)  * These paramters are inherently heuristical as the detection target
55a694dffb7fd (Tejun Heo             2019-08-15 12:25:28 -0700  219)  * itself is fuzzy.  All we want to do is detaching an inode from the
55a694dffb7fd (Tejun Heo             2019-08-15 12:25:28 -0700  220)  * current owner if it's being written to by some other cgroups too much.
55a694dffb7fd (Tejun Heo             2019-08-15 12:25:28 -0700  221)  *
55a694dffb7fd (Tejun Heo             2019-08-15 12:25:28 -0700  222)  * The current cgroup writeback is built on the assumption that multiple
55a694dffb7fd (Tejun Heo             2019-08-15 12:25:28 -0700  223)  * cgroups writing to the same inode concurrently is very rare and a mode
55a694dffb7fd (Tejun Heo             2019-08-15 12:25:28 -0700  224)  * of operation which isn't well supported.  As such, the goal is not
55a694dffb7fd (Tejun Heo             2019-08-15 12:25:28 -0700  225)  * taking too long when a different cgroup takes over an inode while
55a694dffb7fd (Tejun Heo             2019-08-15 12:25:28 -0700  226)  * avoiding too aggressive flip-flops from occasional foreign writes.
55a694dffb7fd (Tejun Heo             2019-08-15 12:25:28 -0700  227)  *
55a694dffb7fd (Tejun Heo             2019-08-15 12:25:28 -0700  228)  * We record, very roughly, 2s worth of IO time history and if more than
55a694dffb7fd (Tejun Heo             2019-08-15 12:25:28 -0700  229)  * half of that is foreign, trigger the switch.  The recording is quantized
55a694dffb7fd (Tejun Heo             2019-08-15 12:25:28 -0700  230)  * to 16 slots.  To avoid tiny writes from swinging the decision too much,
55a694dffb7fd (Tejun Heo             2019-08-15 12:25:28 -0700  231)  * writes smaller than 1/8 of avg size are ignored.
55a694dffb7fd (Tejun Heo             2019-08-15 12:25:28 -0700  232)  */
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  233) #define WB_FRN_TIME_SHIFT	13	/* 1s = 2^13, upto 8 secs w/ 16bit */
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  234) #define WB_FRN_TIME_AVG_SHIFT	3	/* avg = avg * 7/8 + new * 1/8 */
55a694dffb7fd (Tejun Heo             2019-08-15 12:25:28 -0700  235) #define WB_FRN_TIME_CUT_DIV	8	/* ignore rounds < avg / 8 */
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  236) #define WB_FRN_TIME_PERIOD	(2 * (1 << WB_FRN_TIME_SHIFT))	/* 2s */
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  237) 
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  238) #define WB_FRN_HIST_SLOTS	16	/* inode->i_wb_frn_history is 16bit */
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  239) #define WB_FRN_HIST_UNIT	(WB_FRN_TIME_PERIOD / WB_FRN_HIST_SLOTS)
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  240) 					/* each slot's duration is 2s / 16 */
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  241) #define WB_FRN_HIST_THR_SLOTS	(WB_FRN_HIST_SLOTS / 2)
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  242) 					/* if foreign slots >= 8, switch */
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  243) #define WB_FRN_HIST_MAX_SLOTS	(WB_FRN_HIST_THR_SLOTS / 2 + 1)
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  244) 					/* one round can affect upto 5 slots */
6444f47eb8678 (Tejun Heo             2019-08-02 12:08:13 -0700  245) #define WB_FRN_MAX_IN_FLIGHT	1024	/* don't queue too many concurrently */
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  246) 
a1a0e23e49037 (Tejun Heo             2016-02-29 18:28:53 -0500  247) static atomic_t isw_nr_in_flight = ATOMIC_INIT(0);
a1a0e23e49037 (Tejun Heo             2016-02-29 18:28:53 -0500  248) static struct workqueue_struct *isw_wq;
a1a0e23e49037 (Tejun Heo             2016-02-29 18:28:53 -0500  249) 
21c6321fbb3a3 (Tejun Heo             2015-05-28 14:50:49 -0400  250) void __inode_attach_wb(struct inode *inode, struct page *page)
21c6321fbb3a3 (Tejun Heo             2015-05-28 14:50:49 -0400  251) {
21c6321fbb3a3 (Tejun Heo             2015-05-28 14:50:49 -0400  252) 	struct backing_dev_info *bdi = inode_to_bdi(inode);
21c6321fbb3a3 (Tejun Heo             2015-05-28 14:50:49 -0400  253) 	struct bdi_writeback *wb = NULL;
21c6321fbb3a3 (Tejun Heo             2015-05-28 14:50:49 -0400  254) 
21c6321fbb3a3 (Tejun Heo             2015-05-28 14:50:49 -0400  255) 	if (inode_cgwb_enabled(inode)) {
21c6321fbb3a3 (Tejun Heo             2015-05-28 14:50:49 -0400  256) 		struct cgroup_subsys_state *memcg_css;
21c6321fbb3a3 (Tejun Heo             2015-05-28 14:50:49 -0400  257) 
21c6321fbb3a3 (Tejun Heo             2015-05-28 14:50:49 -0400  258) 		if (page) {
21c6321fbb3a3 (Tejun Heo             2015-05-28 14:50:49 -0400  259) 			memcg_css = mem_cgroup_css_from_page(page);
21c6321fbb3a3 (Tejun Heo             2015-05-28 14:50:49 -0400  260) 			wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC);
21c6321fbb3a3 (Tejun Heo             2015-05-28 14:50:49 -0400  261) 		} else {
21c6321fbb3a3 (Tejun Heo             2015-05-28 14:50:49 -0400  262) 			/* must pin memcg_css, see wb_get_create() */
21c6321fbb3a3 (Tejun Heo             2015-05-28 14:50:49 -0400  263) 			memcg_css = task_get_css(current, memory_cgrp_id);
21c6321fbb3a3 (Tejun Heo             2015-05-28 14:50:49 -0400  264) 			wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC);
21c6321fbb3a3 (Tejun Heo             2015-05-28 14:50:49 -0400  265) 			css_put(memcg_css);
21c6321fbb3a3 (Tejun Heo             2015-05-28 14:50:49 -0400  266) 		}
21c6321fbb3a3 (Tejun Heo             2015-05-28 14:50:49 -0400  267) 	}
21c6321fbb3a3 (Tejun Heo             2015-05-28 14:50:49 -0400  268) 
21c6321fbb3a3 (Tejun Heo             2015-05-28 14:50:49 -0400  269) 	if (!wb)
21c6321fbb3a3 (Tejun Heo             2015-05-28 14:50:49 -0400  270) 		wb = &bdi->wb;
21c6321fbb3a3 (Tejun Heo             2015-05-28 14:50:49 -0400  271) 
21c6321fbb3a3 (Tejun Heo             2015-05-28 14:50:49 -0400  272) 	/*
21c6321fbb3a3 (Tejun Heo             2015-05-28 14:50:49 -0400  273) 	 * There may be multiple instances of this function racing to
21c6321fbb3a3 (Tejun Heo             2015-05-28 14:50:49 -0400  274) 	 * update the same inode.  Use cmpxchg() to tell the winner.
21c6321fbb3a3 (Tejun Heo             2015-05-28 14:50:49 -0400  275) 	 */
21c6321fbb3a3 (Tejun Heo             2015-05-28 14:50:49 -0400  276) 	if (unlikely(cmpxchg(&inode->i_wb, NULL, wb)))
21c6321fbb3a3 (Tejun Heo             2015-05-28 14:50:49 -0400  277) 		wb_put(wb);
21c6321fbb3a3 (Tejun Heo             2015-05-28 14:50:49 -0400  278) }
9b0eb69b75bcc (Tejun Heo             2019-06-27 13:39:48 -0700  279) EXPORT_SYMBOL_GPL(__inode_attach_wb);
21c6321fbb3a3 (Tejun Heo             2015-05-28 14:50:49 -0400  280) 
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400  281) /**
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400  282)  * locked_inode_to_wb_and_lock_list - determine a locked inode's wb and lock it
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400  283)  * @inode: inode of interest with i_lock held
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400  284)  *
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400  285)  * Returns @inode's wb with its list_lock held.  @inode->i_lock must be
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400  286)  * held on entry and is released on return.  The returned wb is guaranteed
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400  287)  * to stay @inode's associated wb until its list_lock is released.
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400  288)  */
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400  289) static struct bdi_writeback *
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400  290) locked_inode_to_wb_and_lock_list(struct inode *inode)
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400  291) 	__releases(&inode->i_lock)
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400  292) 	__acquires(&wb->list_lock)
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400  293) {
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400  294) 	while (true) {
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400  295) 		struct bdi_writeback *wb = inode_to_wb(inode);
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400  296) 
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400  297) 		/*
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400  298) 		 * inode_to_wb() association is protected by both
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400  299) 		 * @inode->i_lock and @wb->list_lock but list_lock nests
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400  300) 		 * outside i_lock.  Drop i_lock and verify that the
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400  301) 		 * association hasn't changed after acquiring list_lock.
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400  302) 		 */
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400  303) 		wb_get(wb);
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400  304) 		spin_unlock(&inode->i_lock);
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400  305) 		spin_lock(&wb->list_lock);
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400  306) 
aaa2cacf8184e (Tejun Heo             2015-05-28 14:50:55 -0400  307) 		/* i_wb may have changed inbetween, can't use inode_to_wb() */
614a4e3773148 (Tejun Heo             2016-03-18 13:50:03 -0400  308) 		if (likely(wb == inode->i_wb)) {
614a4e3773148 (Tejun Heo             2016-03-18 13:50:03 -0400  309) 			wb_put(wb);	/* @inode already has ref */
614a4e3773148 (Tejun Heo             2016-03-18 13:50:03 -0400  310) 			return wb;
614a4e3773148 (Tejun Heo             2016-03-18 13:50:03 -0400  311) 		}
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400  312) 
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400  313) 		spin_unlock(&wb->list_lock);
614a4e3773148 (Tejun Heo             2016-03-18 13:50:03 -0400  314) 		wb_put(wb);
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400  315) 		cpu_relax();
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400  316) 		spin_lock(&inode->i_lock);
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400  317) 	}
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400  318) }
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400  319) 
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400  320) /**
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400  321)  * inode_to_wb_and_lock_list - determine an inode's wb and lock it
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400  322)  * @inode: inode of interest
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400  323)  *
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400  324)  * Same as locked_inode_to_wb_and_lock_list() but @inode->i_lock isn't held
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400  325)  * on entry.
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400  326)  */
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400  327) static struct bdi_writeback *inode_to_wb_and_lock_list(struct inode *inode)
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400  328) 	__acquires(&wb->list_lock)
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400  329) {
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400  330) 	spin_lock(&inode->i_lock);
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400  331) 	return locked_inode_to_wb_and_lock_list(inode);
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400  332) }
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400  333) 
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  334) struct inode_switch_wbs_context {
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  335) 	struct inode		*inode;
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  336) 	struct bdi_writeback	*new_wb;
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  337) 
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  338) 	struct rcu_head		rcu_head;
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  339) 	struct work_struct	work;
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  340) };
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  341) 
7fc5854f8c6ef (Tejun Heo             2017-12-12 08:38:30 -0800  342) static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi)
7fc5854f8c6ef (Tejun Heo             2017-12-12 08:38:30 -0800  343) {
7fc5854f8c6ef (Tejun Heo             2017-12-12 08:38:30 -0800  344) 	down_write(&bdi->wb_switch_rwsem);
7fc5854f8c6ef (Tejun Heo             2017-12-12 08:38:30 -0800  345) }
7fc5854f8c6ef (Tejun Heo             2017-12-12 08:38:30 -0800  346) 
7fc5854f8c6ef (Tejun Heo             2017-12-12 08:38:30 -0800  347) static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi)
7fc5854f8c6ef (Tejun Heo             2017-12-12 08:38:30 -0800  348) {
7fc5854f8c6ef (Tejun Heo             2017-12-12 08:38:30 -0800  349) 	up_write(&bdi->wb_switch_rwsem);
7fc5854f8c6ef (Tejun Heo             2017-12-12 08:38:30 -0800  350) }
7fc5854f8c6ef (Tejun Heo             2017-12-12 08:38:30 -0800  351) 
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  352) static void inode_switch_wbs_work_fn(struct work_struct *work)
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  353) {
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  354) 	struct inode_switch_wbs_context *isw =
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  355) 		container_of(work, struct inode_switch_wbs_context, work);
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  356) 	struct inode *inode = isw->inode;
7fc5854f8c6ef (Tejun Heo             2017-12-12 08:38:30 -0800  357) 	struct backing_dev_info *bdi = inode_to_bdi(inode);
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  358) 	struct address_space *mapping = inode->i_mapping;
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  359) 	struct bdi_writeback *old_wb = inode->i_wb;
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  360) 	struct bdi_writeback *new_wb = isw->new_wb;
04edf02cdd37a (Matthew Wilcox        2017-12-04 10:46:23 -0500  361) 	XA_STATE(xas, &mapping->i_pages, 0);
04edf02cdd37a (Matthew Wilcox        2017-12-04 10:46:23 -0500  362) 	struct page *page;
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  363) 	bool switched = false;
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  364) 
7fc5854f8c6ef (Tejun Heo             2017-12-12 08:38:30 -0800  365) 	/*
7fc5854f8c6ef (Tejun Heo             2017-12-12 08:38:30 -0800  366) 	 * If @inode switches cgwb membership while sync_inodes_sb() is
7fc5854f8c6ef (Tejun Heo             2017-12-12 08:38:30 -0800  367) 	 * being issued, sync_inodes_sb() might miss it.  Synchronize.
7fc5854f8c6ef (Tejun Heo             2017-12-12 08:38:30 -0800  368) 	 */
7fc5854f8c6ef (Tejun Heo             2017-12-12 08:38:30 -0800  369) 	down_read(&bdi->wb_switch_rwsem);
7fc5854f8c6ef (Tejun Heo             2017-12-12 08:38:30 -0800  370) 
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  371) 	/*
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  372) 	 * By the time control reaches here, RCU grace period has passed
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  373) 	 * since I_WB_SWITCH assertion and all wb stat update transactions
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  374) 	 * between unlocked_inode_to_wb_begin/end() are guaranteed to be
b93b016313b3b (Matthew Wilcox        2018-04-10 16:36:56 -0700  375) 	 * synchronizing against the i_pages lock.
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  376) 	 *
b93b016313b3b (Matthew Wilcox        2018-04-10 16:36:56 -0700  377) 	 * Grabbing old_wb->list_lock, inode->i_lock and the i_pages lock
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  378) 	 * gives us exclusion against all wb related operations on @inode
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  379) 	 * including IO list manipulations and stat updates.
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  380) 	 */
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  381) 	if (old_wb < new_wb) {
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  382) 		spin_lock(&old_wb->list_lock);
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  383) 		spin_lock_nested(&new_wb->list_lock, SINGLE_DEPTH_NESTING);
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  384) 	} else {
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  385) 		spin_lock(&new_wb->list_lock);
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  386) 		spin_lock_nested(&old_wb->list_lock, SINGLE_DEPTH_NESTING);
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  387) 	}
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  388) 	spin_lock(&inode->i_lock);
b93b016313b3b (Matthew Wilcox        2018-04-10 16:36:56 -0700  389) 	xa_lock_irq(&mapping->i_pages);
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  390) 
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  391) 	/*
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  392) 	 * Once I_FREEING is visible under i_lock, the eviction path owns
c7f5408493aeb (Dave Chinner          2015-03-04 14:07:22 -0500  393) 	 * the inode and we shouldn't modify ->i_io_list.
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  394) 	 */
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  395) 	if (unlikely(inode->i_state & I_FREEING))
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  396) 		goto skip_switch;
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  397) 
3a8e9ac89e6a5 (Tejun Heo             2019-08-29 15:47:19 -0700  398) 	trace_inode_switch_wbs(inode, old_wb, new_wb);
3a8e9ac89e6a5 (Tejun Heo             2019-08-29 15:47:19 -0700  399) 
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  400) 	/*
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  401) 	 * Count and transfer stats.  Note that PAGECACHE_TAG_DIRTY points
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  402) 	 * to possibly dirty pages while PAGECACHE_TAG_WRITEBACK points to
b93b016313b3b (Matthew Wilcox        2018-04-10 16:36:56 -0700  403) 	 * pages actually under writeback.
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  404) 	 */
04edf02cdd37a (Matthew Wilcox        2017-12-04 10:46:23 -0500  405) 	xas_for_each_marked(&xas, page, ULONG_MAX, PAGECACHE_TAG_DIRTY) {
04edf02cdd37a (Matthew Wilcox        2017-12-04 10:46:23 -0500  406) 		if (PageDirty(page)) {
3e8f399da490e (Nikolay Borisov       2017-07-12 14:37:51 -0700  407) 			dec_wb_stat(old_wb, WB_RECLAIMABLE);
3e8f399da490e (Nikolay Borisov       2017-07-12 14:37:51 -0700  408) 			inc_wb_stat(new_wb, WB_RECLAIMABLE);
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  409) 		}
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  410) 	}
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  411) 
04edf02cdd37a (Matthew Wilcox        2017-12-04 10:46:23 -0500  412) 	xas_set(&xas, 0);
04edf02cdd37a (Matthew Wilcox        2017-12-04 10:46:23 -0500  413) 	xas_for_each_marked(&xas, page, ULONG_MAX, PAGECACHE_TAG_WRITEBACK) {
04edf02cdd37a (Matthew Wilcox        2017-12-04 10:46:23 -0500  414) 		WARN_ON_ONCE(!PageWriteback(page));
04edf02cdd37a (Matthew Wilcox        2017-12-04 10:46:23 -0500  415) 		dec_wb_stat(old_wb, WB_WRITEBACK);
04edf02cdd37a (Matthew Wilcox        2017-12-04 10:46:23 -0500  416) 		inc_wb_stat(new_wb, WB_WRITEBACK);
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  417) 	}
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  418) 
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  419) 	wb_get(new_wb);
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  420) 
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  421) 	/*
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  422) 	 * Transfer to @new_wb's IO list if necessary.  The specific list
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  423) 	 * @inode was on is ignored and the inode is put on ->b_dirty which
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  424) 	 * is always correct including from ->b_dirty_time.  The transfer
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  425) 	 * preserves @inode->dirtied_when ordering.
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  426) 	 */
c7f5408493aeb (Dave Chinner          2015-03-04 14:07:22 -0500  427) 	if (!list_empty(&inode->i_io_list)) {
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  428) 		struct inode *pos;
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  429) 
c7f5408493aeb (Dave Chinner          2015-03-04 14:07:22 -0500  430) 		inode_io_list_del_locked(inode, old_wb);
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  431) 		inode->i_wb = new_wb;
c7f5408493aeb (Dave Chinner          2015-03-04 14:07:22 -0500  432) 		list_for_each_entry(pos, &new_wb->b_dirty, i_io_list)
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  433) 			if (time_after_eq(inode->dirtied_when,
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  434) 					  pos->dirtied_when))
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  435) 				break;
c7f5408493aeb (Dave Chinner          2015-03-04 14:07:22 -0500  436) 		inode_io_list_move_locked(inode, new_wb, pos->i_io_list.prev);
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  437) 	} else {
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  438) 		inode->i_wb = new_wb;
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  439) 	}
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  440) 
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  441) 	/* ->i_wb_frn updates may race wbc_detach_inode() but doesn't matter */
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  442) 	inode->i_wb_frn_winner = 0;
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  443) 	inode->i_wb_frn_avg_time = 0;
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  444) 	inode->i_wb_frn_history = 0;
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  445) 	switched = true;
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  446) skip_switch:
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  447) 	/*
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  448) 	 * Paired with load_acquire in unlocked_inode_to_wb_begin() and
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  449) 	 * ensures that the new wb is visible if they see !I_WB_SWITCH.
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  450) 	 */
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  451) 	smp_store_release(&inode->i_state, inode->i_state & ~I_WB_SWITCH);
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  452) 
b93b016313b3b (Matthew Wilcox        2018-04-10 16:36:56 -0700  453) 	xa_unlock_irq(&mapping->i_pages);
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  454) 	spin_unlock(&inode->i_lock);
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  455) 	spin_unlock(&new_wb->list_lock);
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  456) 	spin_unlock(&old_wb->list_lock);
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  457) 
7fc5854f8c6ef (Tejun Heo             2017-12-12 08:38:30 -0800  458) 	up_read(&bdi->wb_switch_rwsem);
7fc5854f8c6ef (Tejun Heo             2017-12-12 08:38:30 -0800  459) 
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  460) 	if (switched) {
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  461) 		wb_wakeup(new_wb);
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  462) 		wb_put(old_wb);
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  463) 	}
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  464) 	wb_put(new_wb);
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  465) 
d10c809552659 (Tejun Heo             2015-05-28 14:50:56 -0400  466) 	iput(inode);
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  467) 	kfree(isw);
a1a0e23e49037 (Tejun Heo             2016-02-29 18:28:53 -0500  468) 
a1a0e23e49037 (Tejun Heo             2016-02-29 18:28:53 -0500  469) 	atomic_dec(&isw_nr_in_flight);
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  470) }
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  471) 
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  472) static void inode_switch_wbs_rcu_fn(struct rcu_head *rcu_head)
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  473) {
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  474) 	struct inode_switch_wbs_context *isw = container_of(rcu_head,
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  475) 				struct inode_switch_wbs_context, rcu_head);
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  476) 
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  477) 	/* needs to grab bh-unsafe locks, bounce to work item */
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  478) 	INIT_WORK(&isw->work, inode_switch_wbs_work_fn);
a1a0e23e49037 (Tejun Heo             2016-02-29 18:28:53 -0500  479) 	queue_work(isw_wq, &isw->work);
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  480) }
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  481) 
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  482) /**
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  483)  * inode_switch_wbs - change the wb association of an inode
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  484)  * @inode: target inode
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  485)  * @new_wb_id: ID of the new wb
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  486)  *
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  487)  * Switch @inode's wb association to the wb identified by @new_wb_id.  The
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  488)  * switching is performed asynchronously and may fail silently.
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  489)  */
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  490) static void inode_switch_wbs(struct inode *inode, int new_wb_id)
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  491) {
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  492) 	struct backing_dev_info *bdi = inode_to_bdi(inode);
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  493) 	struct cgroup_subsys_state *memcg_css;
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  494) 	struct inode_switch_wbs_context *isw;
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  495) 
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  496) 	/* noop if seems to be already in progress */
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  497) 	if (inode->i_state & I_WB_SWITCH)
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  498) 		return;
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  499) 
6444f47eb8678 (Tejun Heo             2019-08-02 12:08:13 -0700  500) 	/* avoid queueing a new switch if too many are already in flight */
6444f47eb8678 (Tejun Heo             2019-08-02 12:08:13 -0700  501) 	if (atomic_read(&isw_nr_in_flight) > WB_FRN_MAX_IN_FLIGHT)
7fc5854f8c6ef (Tejun Heo             2017-12-12 08:38:30 -0800  502) 		return;
7fc5854f8c6ef (Tejun Heo             2017-12-12 08:38:30 -0800  503) 
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  504) 	isw = kzalloc(sizeof(*isw), GFP_ATOMIC);
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  505) 	if (!isw)
6444f47eb8678 (Tejun Heo             2019-08-02 12:08:13 -0700  506) 		return;
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  507) 
3921b835fbaec (Roman Gushchin        2021-06-28 19:35:47 -0700  508) 	atomic_inc(&isw_nr_in_flight);
3921b835fbaec (Roman Gushchin        2021-06-28 19:35:47 -0700  509) 
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  510) 	/* find and pin the new wb */
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  511) 	rcu_read_lock();
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  512) 	memcg_css = css_from_id(new_wb_id, &memory_cgrp_subsys);
cae2f265c5a94 (Muchun Song           2021-04-02 17:11:45 +0800  513) 	if (memcg_css && !css_tryget(memcg_css))
cae2f265c5a94 (Muchun Song           2021-04-02 17:11:45 +0800  514) 		memcg_css = NULL;
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  515) 	rcu_read_unlock();
cae2f265c5a94 (Muchun Song           2021-04-02 17:11:45 +0800  516) 	if (!memcg_css)
cae2f265c5a94 (Muchun Song           2021-04-02 17:11:45 +0800  517) 		goto out_free;
cae2f265c5a94 (Muchun Song           2021-04-02 17:11:45 +0800  518) 
cae2f265c5a94 (Muchun Song           2021-04-02 17:11:45 +0800  519) 	isw->new_wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC);
cae2f265c5a94 (Muchun Song           2021-04-02 17:11:45 +0800  520) 	css_put(memcg_css);
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  521) 	if (!isw->new_wb)
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  522) 		goto out_free;
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  523) 
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  524) 	/* while holding I_WB_SWITCH, no one else can update the association */
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  525) 	spin_lock(&inode->i_lock);
1751e8a6cb935 (Linus Torvalds        2017-11-27 13:05:09 -0800  526) 	if (!(inode->i_sb->s_flags & SB_ACTIVE) ||
a1a0e23e49037 (Tejun Heo             2016-02-29 18:28:53 -0500  527) 	    inode->i_state & (I_WB_SWITCH | I_FREEING) ||
a1a0e23e49037 (Tejun Heo             2016-02-29 18:28:53 -0500  528) 	    inode_to_wb(inode) == isw->new_wb) {
a1a0e23e49037 (Tejun Heo             2016-02-29 18:28:53 -0500  529) 		spin_unlock(&inode->i_lock);
a1a0e23e49037 (Tejun Heo             2016-02-29 18:28:53 -0500  530) 		goto out_free;
a1a0e23e49037 (Tejun Heo             2016-02-29 18:28:53 -0500  531) 	}
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  532) 	inode->i_state |= I_WB_SWITCH;
7452495555609 (Tahsin Erdogan        2016-06-16 05:15:33 -0700  533) 	__iget(inode);
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  534) 	spin_unlock(&inode->i_lock);
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  535) 
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  536) 	isw->inode = inode;
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  537) 
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  538) 	/*
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  539) 	 * In addition to synchronizing among switchers, I_WB_SWITCH tells
b93b016313b3b (Matthew Wilcox        2018-04-10 16:36:56 -0700  540) 	 * the RCU protected stat update paths to grab the i_page
b93b016313b3b (Matthew Wilcox        2018-04-10 16:36:56 -0700  541) 	 * lock so that stat transfer can synchronize against them.
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  542) 	 * Let's continue after I_WB_SWITCH is guaranteed to be visible.
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  543) 	 */
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  544) 	call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn);
6444f47eb8678 (Tejun Heo             2019-08-02 12:08:13 -0700  545) 	return;
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  546) 
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  547) out_free:
3921b835fbaec (Roman Gushchin        2021-06-28 19:35:47 -0700  548) 	atomic_dec(&isw_nr_in_flight);
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  549) 	if (isw->new_wb)
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  550) 		wb_put(isw->new_wb);
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  551) 	kfree(isw);
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  552) }
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  553) 
b16b1deb553ad (Tejun Heo             2015-06-02 08:39:48 -0600  554) /**
b16b1deb553ad (Tejun Heo             2015-06-02 08:39:48 -0600  555)  * wbc_attach_and_unlock_inode - associate wbc with target inode and unlock it
b16b1deb553ad (Tejun Heo             2015-06-02 08:39:48 -0600  556)  * @wbc: writeback_control of interest
b16b1deb553ad (Tejun Heo             2015-06-02 08:39:48 -0600  557)  * @inode: target inode
b16b1deb553ad (Tejun Heo             2015-06-02 08:39:48 -0600  558)  *
b16b1deb553ad (Tejun Heo             2015-06-02 08:39:48 -0600  559)  * @inode is locked and about to be written back under the control of @wbc.
b16b1deb553ad (Tejun Heo             2015-06-02 08:39:48 -0600  560)  * Record @inode's writeback context into @wbc and unlock the i_lock.  On
b16b1deb553ad (Tejun Heo             2015-06-02 08:39:48 -0600  561)  * writeback completion, wbc_detach_inode() should be called.  This is used
b16b1deb553ad (Tejun Heo             2015-06-02 08:39:48 -0600  562)  * to track the cgroup writeback context.
b16b1deb553ad (Tejun Heo             2015-06-02 08:39:48 -0600  563)  */
b16b1deb553ad (Tejun Heo             2015-06-02 08:39:48 -0600  564) void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
b16b1deb553ad (Tejun Heo             2015-06-02 08:39:48 -0600  565) 				 struct inode *inode)
b16b1deb553ad (Tejun Heo             2015-06-02 08:39:48 -0600  566) {
dd73e4b7df958 (Tejun Heo             2015-06-16 18:48:30 -0400  567) 	if (!inode_cgwb_enabled(inode)) {
dd73e4b7df958 (Tejun Heo             2015-06-16 18:48:30 -0400  568) 		spin_unlock(&inode->i_lock);
dd73e4b7df958 (Tejun Heo             2015-06-16 18:48:30 -0400  569) 		return;
dd73e4b7df958 (Tejun Heo             2015-06-16 18:48:30 -0400  570) 	}
dd73e4b7df958 (Tejun Heo             2015-06-16 18:48:30 -0400  571) 
b16b1deb553ad (Tejun Heo             2015-06-02 08:39:48 -0600  572) 	wbc->wb = inode_to_wb(inode);
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  573) 	wbc->inode = inode;
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  574) 
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  575) 	wbc->wb_id = wbc->wb->memcg_css->id;
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  576) 	wbc->wb_lcand_id = inode->i_wb_frn_winner;
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  577) 	wbc->wb_tcand_id = 0;
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  578) 	wbc->wb_bytes = 0;
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  579) 	wbc->wb_lcand_bytes = 0;
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  580) 	wbc->wb_tcand_bytes = 0;
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  581) 
b16b1deb553ad (Tejun Heo             2015-06-02 08:39:48 -0600  582) 	wb_get(wbc->wb);
b16b1deb553ad (Tejun Heo             2015-06-02 08:39:48 -0600  583) 	spin_unlock(&inode->i_lock);
e8a7abf5a5bd3 (Tejun Heo             2015-05-28 14:50:57 -0400  584) 
e8a7abf5a5bd3 (Tejun Heo             2015-05-28 14:50:57 -0400  585) 	/*
65de03e251382 (Tejun Heo             2019-11-08 12:18:29 -0800  586) 	 * A dying wb indicates that either the blkcg associated with the
65de03e251382 (Tejun Heo             2019-11-08 12:18:29 -0800  587) 	 * memcg changed or the associated memcg is dying.  In the first
65de03e251382 (Tejun Heo             2019-11-08 12:18:29 -0800  588) 	 * case, a replacement wb should already be available and we should
65de03e251382 (Tejun Heo             2019-11-08 12:18:29 -0800  589) 	 * refresh the wb immediately.  In the second case, trying to
65de03e251382 (Tejun Heo             2019-11-08 12:18:29 -0800  590) 	 * refresh will keep failing.
e8a7abf5a5bd3 (Tejun Heo             2015-05-28 14:50:57 -0400  591) 	 */
65de03e251382 (Tejun Heo             2019-11-08 12:18:29 -0800  592) 	if (unlikely(wb_dying(wbc->wb) && !css_is_dying(wbc->wb->memcg_css)))
e8a7abf5a5bd3 (Tejun Heo             2015-05-28 14:50:57 -0400  593) 		inode_switch_wbs(inode, wbc->wb_id);
b16b1deb553ad (Tejun Heo             2015-06-02 08:39:48 -0600  594) }
9b0eb69b75bcc (Tejun Heo             2019-06-27 13:39:48 -0700  595) EXPORT_SYMBOL_GPL(wbc_attach_and_unlock_inode);
b16b1deb553ad (Tejun Heo             2015-06-02 08:39:48 -0600  596) 
b16b1deb553ad (Tejun Heo             2015-06-02 08:39:48 -0600  597) /**
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  598)  * wbc_detach_inode - disassociate wbc from inode and perform foreign detection
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  599)  * @wbc: writeback_control of the just finished writeback
b16b1deb553ad (Tejun Heo             2015-06-02 08:39:48 -0600  600)  *
b16b1deb553ad (Tejun Heo             2015-06-02 08:39:48 -0600  601)  * To be called after a writeback attempt of an inode finishes and undoes
b16b1deb553ad (Tejun Heo             2015-06-02 08:39:48 -0600  602)  * wbc_attach_and_unlock_inode().  Can be called under any context.
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  603)  *
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  604)  * As concurrent write sharing of an inode is expected to be very rare and
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  605)  * memcg only tracks page ownership on first-use basis severely confining
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  606)  * the usefulness of such sharing, cgroup writeback tracks ownership
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  607)  * per-inode.  While the support for concurrent write sharing of an inode
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  608)  * is deemed unnecessary, an inode being written to by different cgroups at
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  609)  * different points in time is a lot more common, and, more importantly,
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  610)  * charging only by first-use can too readily lead to grossly incorrect
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  611)  * behaviors (single foreign page can lead to gigabytes of writeback to be
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  612)  * incorrectly attributed).
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  613)  *
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  614)  * To resolve this issue, cgroup writeback detects the majority dirtier of
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  615)  * an inode and transfers the ownership to it.  To avoid unnnecessary
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  616)  * oscillation, the detection mechanism keeps track of history and gives
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  617)  * out the switch verdict only if the foreign usage pattern is stable over
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  618)  * a certain amount of time and/or writeback attempts.
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  619)  *
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  620)  * On each writeback attempt, @wbc tries to detect the majority writer
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  621)  * using Boyer-Moore majority vote algorithm.  In addition to the byte
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  622)  * count from the majority voting, it also counts the bytes written for the
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  623)  * current wb and the last round's winner wb (max of last round's current
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  624)  * wb, the winner from two rounds ago, and the last round's majority
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  625)  * candidate).  Keeping track of the historical winner helps the algorithm
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  626)  * to semi-reliably detect the most active writer even when it's not the
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  627)  * absolute majority.
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  628)  *
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  629)  * Once the winner of the round is determined, whether the winner is
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  630)  * foreign or not and how much IO time the round consumed is recorded in
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  631)  * inode->i_wb_frn_history.  If the amount of recorded foreign IO time is
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  632)  * over a certain threshold, the switch verdict is given.
b16b1deb553ad (Tejun Heo             2015-06-02 08:39:48 -0600  633)  */
b16b1deb553ad (Tejun Heo             2015-06-02 08:39:48 -0600  634) void wbc_detach_inode(struct writeback_control *wbc)
b16b1deb553ad (Tejun Heo             2015-06-02 08:39:48 -0600  635) {
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  636) 	struct bdi_writeback *wb = wbc->wb;
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  637) 	struct inode *inode = wbc->inode;
dd73e4b7df958 (Tejun Heo             2015-06-16 18:48:30 -0400  638) 	unsigned long avg_time, max_bytes, max_time;
dd73e4b7df958 (Tejun Heo             2015-06-16 18:48:30 -0400  639) 	u16 history;
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  640) 	int max_id;
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  641) 
dd73e4b7df958 (Tejun Heo             2015-06-16 18:48:30 -0400  642) 	if (!wb)
dd73e4b7df958 (Tejun Heo             2015-06-16 18:48:30 -0400  643) 		return;
dd73e4b7df958 (Tejun Heo             2015-06-16 18:48:30 -0400  644) 
dd73e4b7df958 (Tejun Heo             2015-06-16 18:48:30 -0400  645) 	history = inode->i_wb_frn_history;
dd73e4b7df958 (Tejun Heo             2015-06-16 18:48:30 -0400  646) 	avg_time = inode->i_wb_frn_avg_time;
dd73e4b7df958 (Tejun Heo             2015-06-16 18:48:30 -0400  647) 
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  648) 	/* pick the winner of this round */
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  649) 	if (wbc->wb_bytes >= wbc->wb_lcand_bytes &&
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  650) 	    wbc->wb_bytes >= wbc->wb_tcand_bytes) {
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  651) 		max_id = wbc->wb_id;
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  652) 		max_bytes = wbc->wb_bytes;
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  653) 	} else if (wbc->wb_lcand_bytes >= wbc->wb_tcand_bytes) {
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  654) 		max_id = wbc->wb_lcand_id;
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  655) 		max_bytes = wbc->wb_lcand_bytes;
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  656) 	} else {
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  657) 		max_id = wbc->wb_tcand_id;
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  658) 		max_bytes = wbc->wb_tcand_bytes;
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  659) 	}
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  660) 
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  661) 	/*
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  662) 	 * Calculate the amount of IO time the winner consumed and fold it
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  663) 	 * into the running average kept per inode.  If the consumed IO
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  664) 	 * time is lower than avag / WB_FRN_TIME_CUT_DIV, ignore it for
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  665) 	 * deciding whether to switch or not.  This is to prevent one-off
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  666) 	 * small dirtiers from skewing the verdict.
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  667) 	 */
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  668) 	max_time = DIV_ROUND_UP((max_bytes >> PAGE_SHIFT) << WB_FRN_TIME_SHIFT,
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  669) 				wb->avg_write_bandwidth);
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  670) 	if (avg_time)
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  671) 		avg_time += (max_time >> WB_FRN_TIME_AVG_SHIFT) -
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  672) 			    (avg_time >> WB_FRN_TIME_AVG_SHIFT);
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  673) 	else
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  674) 		avg_time = max_time;	/* immediate catch up on first run */
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  675) 
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  676) 	if (max_time >= avg_time / WB_FRN_TIME_CUT_DIV) {
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  677) 		int slots;
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  678) 
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  679) 		/*
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  680) 		 * The switch verdict is reached if foreign wb's consume
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  681) 		 * more than a certain proportion of IO time in a
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  682) 		 * WB_FRN_TIME_PERIOD.  This is loosely tracked by 16 slot
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  683) 		 * history mask where each bit represents one sixteenth of
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  684) 		 * the period.  Determine the number of slots to shift into
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  685) 		 * history from @max_time.
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  686) 		 */
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  687) 		slots = min(DIV_ROUND_UP(max_time, WB_FRN_HIST_UNIT),
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  688) 			    (unsigned long)WB_FRN_HIST_MAX_SLOTS);
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  689) 		history <<= slots;
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  690) 		if (wbc->wb_id != max_id)
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  691) 			history |= (1U << slots) - 1;
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  692) 
3a8e9ac89e6a5 (Tejun Heo             2019-08-29 15:47:19 -0700  693) 		if (history)
3a8e9ac89e6a5 (Tejun Heo             2019-08-29 15:47:19 -0700  694) 			trace_inode_foreign_history(inode, wbc, history);
3a8e9ac89e6a5 (Tejun Heo             2019-08-29 15:47:19 -0700  695) 
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  696) 		/*
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  697) 		 * Switch if the current wb isn't the consistent winner.
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  698) 		 * If there are multiple closely competing dirtiers, the
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  699) 		 * inode may switch across them repeatedly over time, which
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  700) 		 * is okay.  The main goal is avoiding keeping an inode on
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  701) 		 * the wrong wb for an extended period of time.
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  702) 		 */
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  703) 		if (hweight32(history) > WB_FRN_HIST_THR_SLOTS)
682aa8e1a6a15 (Tejun Heo             2015-05-28 14:50:53 -0400  704) 			inode_switch_wbs(inode, max_id);
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  705) 	}
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  706) 
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  707) 	/*
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  708) 	 * Multiple instances of this function may race to update the
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  709) 	 * following fields but we don't mind occassional inaccuracies.
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  710) 	 */
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  711) 	inode->i_wb_frn_winner = max_id;
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  712) 	inode->i_wb_frn_avg_time = min(avg_time, (unsigned long)U16_MAX);
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  713) 	inode->i_wb_frn_history = history;
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  714) 
b16b1deb553ad (Tejun Heo             2015-06-02 08:39:48 -0600  715) 	wb_put(wbc->wb);
b16b1deb553ad (Tejun Heo             2015-06-02 08:39:48 -0600  716) 	wbc->wb = NULL;
b16b1deb553ad (Tejun Heo             2015-06-02 08:39:48 -0600  717) }
9b0eb69b75bcc (Tejun Heo             2019-06-27 13:39:48 -0700  718) EXPORT_SYMBOL_GPL(wbc_detach_inode);
b16b1deb553ad (Tejun Heo             2015-06-02 08:39:48 -0600  719) 
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  720) /**
34e51a5e1a6e9 (Tejun Heo             2019-06-27 13:39:49 -0700  721)  * wbc_account_cgroup_owner - account writeback to update inode cgroup ownership
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  722)  * @wbc: writeback_control of the writeback in progress
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  723)  * @page: page being written out
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  724)  * @bytes: number of bytes being written out
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  725)  *
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  726)  * @bytes from @page are about to written out during the writeback
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  727)  * controlled by @wbc.  Keep the book for foreign inode detection.  See
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  728)  * wbc_detach_inode().
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  729)  */
34e51a5e1a6e9 (Tejun Heo             2019-06-27 13:39:49 -0700  730) void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page,
34e51a5e1a6e9 (Tejun Heo             2019-06-27 13:39:49 -0700  731) 			      size_t bytes)
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  732) {
6631142229005 (Tejun Heo             2019-06-13 15:30:41 -0700  733) 	struct cgroup_subsys_state *css;
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  734) 	int id;
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  735) 
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  736) 	/*
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  737) 	 * pageout() path doesn't attach @wbc to the inode being written
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  738) 	 * out.  This is intentional as we don't want the function to block
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  739) 	 * behind a slow cgroup.  Ultimately, we want pageout() to kick off
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  740) 	 * regular writeback instead of writing things out itself.
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  741) 	 */
27b36d8fa81fa (Tejun Heo             2019-06-27 13:39:50 -0700  742) 	if (!wbc->wb || wbc->no_cgroup_owner)
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  743) 		return;
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  744) 
6631142229005 (Tejun Heo             2019-06-13 15:30:41 -0700  745) 	css = mem_cgroup_css_from_page(page);
6631142229005 (Tejun Heo             2019-06-13 15:30:41 -0700  746) 	/* dead cgroups shouldn't contribute to inode ownership arbitration */
6631142229005 (Tejun Heo             2019-06-13 15:30:41 -0700  747) 	if (!(css->flags & CSS_ONLINE))
6631142229005 (Tejun Heo             2019-06-13 15:30:41 -0700  748) 		return;
6631142229005 (Tejun Heo             2019-06-13 15:30:41 -0700  749) 
6631142229005 (Tejun Heo             2019-06-13 15:30:41 -0700  750) 	id = css->id;
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  751) 
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  752) 	if (id == wbc->wb_id) {
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  753) 		wbc->wb_bytes += bytes;
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  754) 		return;
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  755) 	}
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  756) 
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  757) 	if (id == wbc->wb_lcand_id)
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  758) 		wbc->wb_lcand_bytes += bytes;
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  759) 
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  760) 	/* Boyer-Moore majority vote algorithm */
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  761) 	if (!wbc->wb_tcand_bytes)
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  762) 		wbc->wb_tcand_id = id;
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  763) 	if (id == wbc->wb_tcand_id)
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  764) 		wbc->wb_tcand_bytes += bytes;
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  765) 	else
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  766) 		wbc->wb_tcand_bytes -= min(bytes, wbc->wb_tcand_bytes);
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  767) }
34e51a5e1a6e9 (Tejun Heo             2019-06-27 13:39:49 -0700  768) EXPORT_SYMBOL_GPL(wbc_account_cgroup_owner);
2a81490811d02 (Tejun Heo             2015-05-28 14:50:51 -0400  769) 
703c270887bb5 (Tejun Heo             2015-05-22 17:13:44 -0400  770) /**
703c270887bb5 (Tejun Heo             2015-05-22 17:13:44 -0400  771)  * inode_congested - test whether an inode is congested
60292bcc1b240 (Tejun Heo             2015-08-18 14:54:54 -0700  772)  * @inode: inode to test for congestion (may be NULL)
703c270887bb5 (Tejun Heo             2015-05-22 17:13:44 -0400  773)  * @cong_bits: mask of WB_[a]sync_congested bits to test
703c270887bb5 (Tejun Heo             2015-05-22 17:13:44 -0400  774)  *
703c270887bb5 (Tejun Heo             2015-05-22 17:13:44 -0400  775)  * Tests whether @inode is congested.  @cong_bits is the mask of congestion
703c270887bb5 (Tejun Heo             2015-05-22 17:13:44 -0400  776)  * bits to test and the return value is the mask of set bits.
703c270887bb5 (Tejun Heo             2015-05-22 17:13:44 -0400  777)  *
703c270887bb5 (Tejun Heo             2015-05-22 17:13:44 -0400  778)  * If cgroup writeback is enabled for @inode, the congestion state is
703c270887bb5 (Tejun Heo             2015-05-22 17:13:44 -0400  779)  * determined by whether the cgwb (cgroup bdi_writeback) for the blkcg
703c270887bb5 (Tejun Heo             2015-05-22 17:13:44 -0400  780)  * associated with @inode is congested; otherwise, the root wb's congestion
703c270887bb5 (Tejun Heo             2015-05-22 17:13:44 -0400  781)  * state is used.
60292bcc1b240 (Tejun Heo             2015-08-18 14:54:54 -0700  782)  *
60292bcc1b240 (Tejun Heo             2015-08-18 14:54:54 -0700  783)  * @inode is allowed to be NULL as this function is often called on
60292bcc1b240 (Tejun Heo             2015-08-18 14:54:54 -0700  784)  * mapping->host which is NULL for the swapper space.
703c270887bb5 (Tejun Heo             2015-05-22 17:13:44 -0400  785)  */
703c270887bb5 (Tejun Heo             2015-05-22 17:13:44 -0400  786) int inode_congested(struct inode *inode, int cong_bits)
703c270887bb5 (Tejun Heo             2015-05-22 17:13:44 -0400  787) {
5cb8b8241e614 (Tejun Heo             2015-05-28 14:50:54 -0400  788) 	/*
5cb8b8241e614 (Tejun Heo             2015-05-28 14:50:54 -0400  789) 	 * Once set, ->i_wb never becomes NULL while the inode is alive.
5cb8b8241e614 (Tejun Heo             2015-05-28 14:50:54 -0400  790) 	 * Start transaction iff ->i_wb is visible.
5cb8b8241e614 (Tejun Heo             2015-05-28 14:50:54 -0400  791) 	 */
aaa2cacf8184e (Tejun Heo             2015-05-28 14:50:55 -0400  792) 	if (inode && inode_to_wb_is_valid(inode)) {
5cb8b8241e614 (Tejun Heo             2015-05-28 14:50:54 -0400  793) 		struct bdi_writeback *wb;
2e898e4c0a389 (Greg Thelen           2018-04-20 14:55:42 -0700  794) 		struct wb_lock_cookie lock_cookie = {};
2e898e4c0a389 (Greg Thelen           2018-04-20 14:55:42 -0700  795) 		bool congested;
5cb8b8241e614 (Tejun Heo             2015-05-28 14:50:54 -0400  796) 
2e898e4c0a389 (Greg Thelen           2018-04-20 14:55:42 -0700  797) 		wb = unlocked_inode_to_wb_begin(inode, &lock_cookie);
5cb8b8241e614 (Tejun Heo             2015-05-28 14:50:54 -0400  798) 		congested = wb_congested(wb, cong_bits);
2e898e4c0a389 (Greg Thelen           2018-04-20 14:55:42 -0700  799) 		unlocked_inode_to_wb_end(inode, &lock_cookie);
5cb8b8241e614 (Tejun Heo             2015-05-28 14:50:54 -0400  800) 		return congested;
703c270887bb5 (Tejun Heo             2015-05-22 17:13:44 -0400  801) 	}
703c270887bb5 (Tejun Heo             2015-05-22 17:13:44 -0400  802) 
703c270887bb5 (Tejun Heo             2015-05-22 17:13:44 -0400  803) 	return wb_congested(&inode_to_bdi(inode)->wb, cong_bits);
703c270887bb5 (Tejun Heo             2015-05-22 17:13:44 -0400  804) }
703c270887bb5 (Tejun Heo             2015-05-22 17:13:44 -0400  805) EXPORT_SYMBOL_GPL(inode_congested);
703c270887bb5 (Tejun Heo             2015-05-22 17:13:44 -0400  806) 
f2b6512160763 (Tejun Heo             2015-05-22 17:13:55 -0400  807) /**
f2b6512160763 (Tejun Heo             2015-05-22 17:13:55 -0400  808)  * wb_split_bdi_pages - split nr_pages to write according to bandwidth
f2b6512160763 (Tejun Heo             2015-05-22 17:13:55 -0400  809)  * @wb: target bdi_writeback to split @nr_pages to
f2b6512160763 (Tejun Heo             2015-05-22 17:13:55 -0400  810)  * @nr_pages: number of pages to write for the whole bdi
f2b6512160763 (Tejun Heo             2015-05-22 17:13:55 -0400  811)  *
f2b6512160763 (Tejun Heo             2015-05-22 17:13:55 -0400  812)  * Split @wb's portion of @nr_pages according to @wb's write bandwidth in
f2b6512160763 (Tejun Heo             2015-05-22 17:13:55 -0400  813)  * relation to the total write bandwidth of all wb's w/ dirty inodes on
f2b6512160763 (Tejun Heo             2015-05-22 17:13:55 -0400  814)  * @wb->bdi.
f2b6512160763 (Tejun Heo             2015-05-22 17:13:55 -0400  815)  */
f2b6512160763 (Tejun Heo             2015-05-22 17:13:55 -0400  816) static long wb_split_bdi_pages(struct bdi_writeback *wb, long nr_pages)
f2b6512160763 (Tejun Heo             2015-05-22 17:13:55 -0400  817) {
f2b6512160763 (Tejun Heo             2015-05-22 17:13:55 -0400  818) 	unsigned long this_bw = wb->avg_write_bandwidth;
f2b6512160763 (Tejun Heo             2015-05-22 17:13:55 -0400  819) 	unsigned long tot_bw = atomic_long_read(&wb->bdi->tot_write_bandwidth);
f2b6512160763 (Tejun Heo             2015-05-22 17:13:55 -0400  820) 
f2b6512160763 (Tejun Heo             2015-05-22 17:13:55 -0400  821) 	if (nr_pages == LONG_MAX)
f2b6512160763 (Tejun Heo             2015-05-22 17:13:55 -0400  822) 		return LONG_MAX;
f2b6512160763 (Tejun Heo             2015-05-22 17:13:55 -0400  823) 
f2b6512160763 (Tejun Heo             2015-05-22 17:13:55 -0400  824) 	/*
f2b6512160763 (Tejun Heo             2015-05-22 17:13:55 -0400  825) 	 * This may be called on clean wb's and proportional distribution
f2b6512160763 (Tejun Heo             2015-05-22 17:13:55 -0400  826) 	 * may not make sense, just use the original @nr_pages in those
f2b6512160763 (Tejun Heo             2015-05-22 17:13:55 -0400  827) 	 * cases.  In general, we wanna err on the side of writing more.
f2b6512160763 (Tejun Heo             2015-05-22 17:13:55 -0400  828) 	 */
f2b6512160763 (Tejun Heo             2015-05-22 17:13:55 -0400  829) 	if (!tot_bw || this_bw >= tot_bw)
f2b6512160763 (Tejun Heo             2015-05-22 17:13:55 -0400  830) 		return nr_pages;
f2b6512160763 (Tejun Heo             2015-05-22 17:13:55 -0400  831) 	else
f2b6512160763 (Tejun Heo             2015-05-22 17:13:55 -0400  832) 		return DIV_ROUND_UP_ULL((u64)nr_pages * this_bw, tot_bw);
f2b6512160763 (Tejun Heo             2015-05-22 17:13:55 -0400  833) }
f2b6512160763 (Tejun Heo             2015-05-22 17:13:55 -0400  834) 
db125360409fc (Tejun Heo             2015-05-22 17:14:01 -0400  835) /**
db125360409fc (Tejun Heo             2015-05-22 17:14:01 -0400  836)  * bdi_split_work_to_wbs - split a wb_writeback_work to all wb's of a bdi
db125360409fc (Tejun Heo             2015-05-22 17:14:01 -0400  837)  * @bdi: target backing_dev_info
db125360409fc (Tejun Heo             2015-05-22 17:14:01 -0400  838)  * @base_work: wb_writeback_work to issue
db125360409fc (Tejun Heo             2015-05-22 17:14:01 -0400  839)  * @skip_if_busy: skip wb's which already have writeback in progress
db125360409fc (Tejun Heo             2015-05-22 17:14:01 -0400  840)  *
db125360409fc (Tejun Heo             2015-05-22 17:14:01 -0400  841)  * Split and issue @base_work to all wb's (bdi_writeback's) of @bdi which
db125360409fc (Tejun Heo             2015-05-22 17:14:01 -0400  842)  * have dirty inodes.  If @base_work->nr_page isn't %LONG_MAX, it's
db125360409fc (Tejun Heo             2015-05-22 17:14:01 -0400  843)  * distributed to the busy wbs according to each wb's proportion in the
db125360409fc (Tejun Heo             2015-05-22 17:14:01 -0400  844)  * total active write bandwidth of @bdi.
db125360409fc (Tejun Heo             2015-05-22 17:14:01 -0400  845)  */
db125360409fc (Tejun Heo             2015-05-22 17:14:01 -0400  846) static void bdi_split_work_to_wbs(struct backing_dev_info *bdi,
db125360409fc (Tejun Heo             2015-05-22 17:14:01 -0400  847) 				  struct wb_writeback_work *base_work,
db125360409fc (Tejun Heo             2015-05-22 17:14:01 -0400  848) 				  bool skip_if_busy)
db125360409fc (Tejun Heo             2015-05-22 17:14:01 -0400  849) {
b817525a4a80c (Tejun Heo             2015-10-02 14:47:05 -0400  850) 	struct bdi_writeback *last_wb = NULL;
b33e18f61bd18 (Tejun Heo             2015-10-27 14:19:39 +0900  851) 	struct bdi_writeback *wb = list_entry(&bdi->wb_list,
b33e18f61bd18 (Tejun Heo             2015-10-27 14:19:39 +0900  852) 					      struct bdi_writeback, bdi_node);
db125360409fc (Tejun Heo             2015-05-22 17:14:01 -0400  853) 
db125360409fc (Tejun Heo             2015-05-22 17:14:01 -0400  854) 	might_sleep();
db125360409fc (Tejun Heo             2015-05-22 17:14:01 -0400  855) restart:
db125360409fc (Tejun Heo             2015-05-22 17:14:01 -0400  856) 	rcu_read_lock();
b817525a4a80c (Tejun Heo             2015-10-02 14:47:05 -0400  857) 	list_for_each_entry_continue_rcu(wb, &bdi->wb_list, bdi_node) {
5b9cce4c7eb06 (Tejun Heo             2019-08-26 09:06:52 -0700  858) 		DEFINE_WB_COMPLETION(fallback_work_done, bdi);
8a1270cda7b47 (Tejun Heo             2015-08-18 14:54:53 -0700  859) 		struct wb_writeback_work fallback_work;
8a1270cda7b47 (Tejun Heo             2015-08-18 14:54:53 -0700  860) 		struct wb_writeback_work *work;
8a1270cda7b47 (Tejun Heo             2015-08-18 14:54:53 -0700  861) 		long nr_pages;
8a1270cda7b47 (Tejun Heo             2015-08-18 14:54:53 -0700  862) 
b817525a4a80c (Tejun Heo             2015-10-02 14:47:05 -0400  863) 		if (last_wb) {
b817525a4a80c (Tejun Heo             2015-10-02 14:47:05 -0400  864) 			wb_put(last_wb);
b817525a4a80c (Tejun Heo             2015-10-02 14:47:05 -0400  865) 			last_wb = NULL;
b817525a4a80c (Tejun Heo             2015-10-02 14:47:05 -0400  866) 		}
b817525a4a80c (Tejun Heo             2015-10-02 14:47:05 -0400  867) 
006a0973ed020 (Tejun Heo             2015-08-25 14:11:52 -0400  868) 		/* SYNC_ALL writes out I_DIRTY_TIME too */
006a0973ed020 (Tejun Heo             2015-08-25 14:11:52 -0400  869) 		if (!wb_has_dirty_io(wb) &&
006a0973ed020 (Tejun Heo             2015-08-25 14:11:52 -0400  870) 		    (base_work->sync_mode == WB_SYNC_NONE ||
006a0973ed020 (Tejun Heo             2015-08-25 14:11:52 -0400  871) 		     list_empty(&wb->b_dirty_time)))
006a0973ed020 (Tejun Heo             2015-08-25 14:11:52 -0400  872) 			continue;
006a0973ed020 (Tejun Heo             2015-08-25 14:11:52 -0400  873) 		if (skip_if_busy && writeback_in_progress(wb))
db125360409fc (Tejun Heo             2015-05-22 17:14:01 -0400  874) 			continue;
db125360409fc (Tejun Heo             2015-05-22 17:14:01 -0400  875) 
8a1270cda7b47 (Tejun Heo             2015-08-18 14:54:53 -0700  876) 		nr_pages = wb_split_bdi_pages(wb, base_work->nr_pages);
8a1270cda7b47 (Tejun Heo             2015-08-18 14:54:53 -0700  877) 
8a1270cda7b47 (Tejun Heo             2015-08-18 14:54:53 -0700  878) 		work = kmalloc(sizeof(*work), GFP_ATOMIC);
8a1270cda7b47 (Tejun Heo             2015-08-18 14:54:53 -0700  879) 		if (work) {
8a1270cda7b47 (Tejun Heo             2015-08-18 14:54:53 -0700  880) 			*work = *base_work;
8a1270cda7b47 (Tejun Heo             2015-08-18 14:54:53 -0700  881) 			work->nr_pages = nr_pages;
8a1270cda7b47 (Tejun Heo             2015-08-18 14:54:53 -0700  882) 			work->auto_free = 1;
8a1270cda7b47 (Tejun Heo             2015-08-18 14:54:53 -0700  883) 			wb_queue_work(wb, work);
8a1270cda7b47 (Tejun Heo             2015-08-18 14:54:53 -0700  884) 			continue;
db125360409fc (Tejun Heo             2015-05-22 17:14:01 -0400  885) 		}
8a1270cda7b47 (Tejun Heo             2015-08-18 14:54:53 -0700  886) 
8a1270cda7b47 (Tejun Heo             2015-08-18 14:54:53 -0700  887) 		/* alloc failed, execute synchronously using on-stack fallback */
8a1270cda7b47 (Tejun Heo             2015-08-18 14:54:53 -0700  888) 		work = &fallback_work;
8a1270cda7b47 (Tejun Heo             2015-08-18 14:54:53 -0700  889) 		*work = *base_work;
8a1270cda7b47 (Tejun Heo             2015-08-18 14:54:53 -0700  890) 		work->nr_pages = nr_pages;
8a1270cda7b47 (Tejun Heo             2015-08-18 14:54:53 -0700  891) 		work->auto_free = 0;
8a1270cda7b47 (Tejun Heo             2015-08-18 14:54:53 -0700  892) 		work->done = &fallback_work_done;
8a1270cda7b47 (Tejun Heo             2015-08-18 14:54:53 -0700  893) 
8a1270cda7b47 (Tejun Heo             2015-08-18 14:54:53 -0700  894) 		wb_queue_work(wb, work);
8a1270cda7b47 (Tejun Heo             2015-08-18 14:54:53 -0700  895) 
b817525a4a80c (Tejun Heo             2015-10-02 14:47:05 -0400  896) 		/*
b817525a4a80c (Tejun Heo             2015-10-02 14:47:05 -0400  897) 		 * Pin @wb so that it stays on @bdi->wb_list.  This allows
b817525a4a80c (Tejun Heo             2015-10-02 14:47:05 -0400  898) 		 * continuing iteration from @wb after dropping and
b817525a4a80c (Tejun Heo             2015-10-02 14:47:05 -0400  899) 		 * regrabbing rcu read lock.
b817525a4a80c (Tejun Heo             2015-10-02 14:47:05 -0400  900) 		 */
b817525a4a80c (Tejun Heo             2015-10-02 14:47:05 -0400  901) 		wb_get(wb);
b817525a4a80c (Tejun Heo             2015-10-02 14:47:05 -0400  902) 		last_wb = wb;
b817525a4a80c (Tejun Heo             2015-10-02 14:47:05 -0400  903) 
8a1270cda7b47 (Tejun Heo             2015-08-18 14:54:53 -0700  904) 		rcu_read_unlock();
5b9cce4c7eb06 (Tejun Heo             2019-08-26 09:06:52 -0700  905) 		wb_wait_for_completion(&fallback_work_done);
8a1270cda7b47 (Tejun Heo             2015-08-18 14:54:53 -0700  906) 		goto restart;
db125360409fc (Tejun Heo             2015-05-22 17:14:01 -0400  907) 	}
db125360409fc (Tejun Heo             2015-05-22 17:14:01 -0400  908) 	rcu_read_unlock();
b817525a4a80c (Tejun Heo             2015-10-02 14:47:05 -0400  909) 
b817525a4a80c (Tejun Heo             2015-10-02 14:47:05 -0400  910) 	if (last_wb)
b817525a4a80c (Tejun Heo             2015-10-02 14:47:05 -0400  911) 		wb_put(last_wb);
db125360409fc (Tejun Heo             2015-05-22 17:14:01 -0400  912) }
db125360409fc (Tejun Heo             2015-05-22 17:14:01 -0400  913) 
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  914) /**
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  915)  * cgroup_writeback_by_id - initiate cgroup writeback from bdi and memcg IDs
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  916)  * @bdi_id: target bdi id
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  917)  * @memcg_id: target memcg css id
b46ec1da5eb7d (Randy Dunlap          2019-10-14 14:12:17 -0700  918)  * @nr: number of pages to write, 0 for best-effort dirty flushing
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  919)  * @reason: reason why some writeback work initiated
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  920)  * @done: target wb_completion
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  921)  *
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  922)  * Initiate flush of the bdi_writeback identified by @bdi_id and @memcg_id
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  923)  * with the specified parameters.
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  924)  */
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  925) int cgroup_writeback_by_id(u64 bdi_id, int memcg_id, unsigned long nr,
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  926) 			   enum wb_reason reason, struct wb_completion *done)
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  927) {
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  928) 	struct backing_dev_info *bdi;
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  929) 	struct cgroup_subsys_state *memcg_css;
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  930) 	struct bdi_writeback *wb;
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  931) 	struct wb_writeback_work *work;
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  932) 	int ret;
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  933) 
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  934) 	/* lookup bdi and memcg */
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  935) 	bdi = bdi_get_by_id(bdi_id);
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  936) 	if (!bdi)
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  937) 		return -ENOENT;
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  938) 
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  939) 	rcu_read_lock();
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  940) 	memcg_css = css_from_id(memcg_id, &memory_cgrp_subsys);
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  941) 	if (memcg_css && !css_tryget(memcg_css))
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  942) 		memcg_css = NULL;
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  943) 	rcu_read_unlock();
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  944) 	if (!memcg_css) {
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  945) 		ret = -ENOENT;
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  946) 		goto out_bdi_put;
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  947) 	}
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  948) 
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  949) 	/*
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  950) 	 * And find the associated wb.  If the wb isn't there already
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  951) 	 * there's nothing to flush, don't create one.
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  952) 	 */
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  953) 	wb = wb_get_lookup(bdi, memcg_css);
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  954) 	if (!wb) {
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  955) 		ret = -ENOENT;
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  956) 		goto out_css_put;
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  957) 	}
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  958) 
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  959) 	/*
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  960) 	 * If @nr is zero, the caller is attempting to write out most of
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  961) 	 * the currently dirty pages.  Let's take the current dirty page
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  962) 	 * count and inflate it by 25% which should be large enough to
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  963) 	 * flush out most dirty pages while avoiding getting livelocked by
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  964) 	 * concurrent dirtiers.
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  965) 	 */
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  966) 	if (!nr) {
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  967) 		unsigned long filepages, headroom, dirty, writeback;
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  968) 
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  969) 		mem_cgroup_wb_stats(wb, &filepages, &headroom, &dirty,
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  970) 				      &writeback);
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  971) 		nr = dirty * 10 / 8;
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  972) 	}
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  973) 
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  974) 	/* issue the writeback work */
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  975) 	work = kzalloc(sizeof(*work), GFP_NOWAIT | __GFP_NOWARN);
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  976) 	if (work) {
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  977) 		work->nr_pages = nr;
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  978) 		work->sync_mode = WB_SYNC_NONE;
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  979) 		work->range_cyclic = 1;
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  980) 		work->reason = reason;
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  981) 		work->done = done;
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  982) 		work->auto_free = 1;
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  983) 		wb_queue_work(wb, work);
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  984) 		ret = 0;
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  985) 	} else {
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  986) 		ret = -ENOMEM;
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  987) 	}
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  988) 
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  989) 	wb_put(wb);
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  990) out_css_put:
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  991) 	css_put(memcg_css);
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  992) out_bdi_put:
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  993) 	bdi_put(bdi);
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  994) 	return ret;
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  995) }
d62241c7a406f (Tejun Heo             2019-08-26 09:06:55 -0700  996) 
a1a0e23e49037 (Tejun Heo             2016-02-29 18:28:53 -0500  997) /**
a1a0e23e49037 (Tejun Heo             2016-02-29 18:28:53 -0500  998)  * cgroup_writeback_umount - flush inode wb switches for umount
a1a0e23e49037 (Tejun Heo             2016-02-29 18:28:53 -0500  999)  *
a1a0e23e49037 (Tejun Heo             2016-02-29 18:28:53 -0500 1000)  * This function is called when a super_block is about to be destroyed and
a1a0e23e49037 (Tejun Heo             2016-02-29 18:28:53 -0500 1001)  * flushes in-flight inode wb switches.  An inode wb switch goes through
a1a0e23e49037 (Tejun Heo             2016-02-29 18:28:53 -0500 1002)  * RCU and then workqueue, so the two need to be flushed in order to ensure
a1a0e23e49037 (Tejun Heo             2016-02-29 18:28:53 -0500 1003)  * that all previously scheduled switches are finished.  As wb switches are
a1a0e23e49037 (Tejun Heo             2016-02-29 18:28:53 -0500 1004)  * rare occurrences and synchronize_rcu() can take a while, perform
a1a0e23e49037 (Tejun Heo             2016-02-29 18:28:53 -0500 1005)  * flushing iff wb switches are in flight.
a1a0e23e49037 (Tejun Heo             2016-02-29 18:28:53 -0500 1006)  */
a1a0e23e49037 (Tejun Heo             2016-02-29 18:28:53 -0500 1007) void cgroup_writeback_umount(void)
a1a0e23e49037 (Tejun Heo             2016-02-29 18:28:53 -0500 1008) {
a1a0e23e49037 (Tejun Heo             2016-02-29 18:28:53 -0500 1009) 	if (atomic_read(&isw_nr_in_flight)) {
ec084de929e41 (Jiufei Xue            2019-05-17 14:31:44 -0700 1010) 		/*
ec084de929e41 (Jiufei Xue            2019-05-17 14:31:44 -0700 1011) 		 * Use rcu_barrier() to wait for all pending callbacks to
ec084de929e41 (Jiufei Xue            2019-05-17 14:31:44 -0700 1012) 		 * ensure that all in-flight wb switches are in the workqueue.
ec084de929e41 (Jiufei Xue            2019-05-17 14:31:44 -0700 1013) 		 */
ec084de929e41 (Jiufei Xue            2019-05-17 14:31:44 -0700 1014) 		rcu_barrier();
a1a0e23e49037 (Tejun Heo             2016-02-29 18:28:53 -0500 1015) 		flush_workqueue(isw_wq);
a1a0e23e49037 (Tejun Heo             2016-02-29 18:28:53 -0500 1016) 	}
a1a0e23e49037 (Tejun Heo             2016-02-29 18:28:53 -0500 1017) }
a1a0e23e49037 (Tejun Heo             2016-02-29 18:28:53 -0500 1018) 
a1a0e23e49037 (Tejun Heo             2016-02-29 18:28:53 -0500 1019) static int __init cgroup_writeback_init(void)
a1a0e23e49037 (Tejun Heo             2016-02-29 18:28:53 -0500 1020) {
a1a0e23e49037 (Tejun Heo             2016-02-29 18:28:53 -0500 1021) 	isw_wq = alloc_workqueue("inode_switch_wbs", 0, 0);
a1a0e23e49037 (Tejun Heo             2016-02-29 18:28:53 -0500 1022) 	if (!isw_wq)
a1a0e23e49037 (Tejun Heo             2016-02-29 18:28:53 -0500 1023) 		return -ENOMEM;
a1a0e23e49037 (Tejun Heo             2016-02-29 18:28:53 -0500 1024) 	return 0;
a1a0e23e49037 (Tejun Heo             2016-02-29 18:28:53 -0500 1025) }
a1a0e23e49037 (Tejun Heo             2016-02-29 18:28:53 -0500 1026) fs_initcall(cgroup_writeback_init);
a1a0e23e49037 (Tejun Heo             2016-02-29 18:28:53 -0500 1027) 
f2b6512160763 (Tejun Heo             2015-05-22 17:13:55 -0400 1028) #else	/* CONFIG_CGROUP_WRITEBACK */
f2b6512160763 (Tejun Heo             2015-05-22 17:13:55 -0400 1029) 
7fc5854f8c6ef (Tejun Heo             2017-12-12 08:38:30 -0800 1030) static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi) { }
7fc5854f8c6ef (Tejun Heo             2017-12-12 08:38:30 -0800 1031) static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi) { }
7fc5854f8c6ef (Tejun Heo             2017-12-12 08:38:30 -0800 1032) 
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400 1033) static struct bdi_writeback *
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400 1034) locked_inode_to_wb_and_lock_list(struct inode *inode)
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400 1035) 	__releases(&inode->i_lock)
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400 1036) 	__acquires(&wb->list_lock)
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400 1037) {
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400 1038) 	struct bdi_writeback *wb = inode_to_wb(inode);
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400 1039) 
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400 1040) 	spin_unlock(&inode->i_lock);
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400 1041) 	spin_lock(&wb->list_lock);
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400 1042) 	return wb;
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400 1043) }
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400 1044) 
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400 1045) static struct bdi_writeback *inode_to_wb_and_lock_list(struct inode *inode)
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400 1046) 	__acquires(&wb->list_lock)
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400 1047) {
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400 1048) 	struct bdi_writeback *wb = inode_to_wb(inode);
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400 1049) 
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400 1050) 	spin_lock(&wb->list_lock);
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400 1051) 	return wb;
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400 1052) }
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400 1053) 
f2b6512160763 (Tejun Heo             2015-05-22 17:13:55 -0400 1054) static long wb_split_bdi_pages(struct bdi_writeback *wb, long nr_pages)
f2b6512160763 (Tejun Heo             2015-05-22 17:13:55 -0400 1055) {
f2b6512160763 (Tejun Heo             2015-05-22 17:13:55 -0400 1056) 	return nr_pages;
f2b6512160763 (Tejun Heo             2015-05-22 17:13:55 -0400 1057) }
f2b6512160763 (Tejun Heo             2015-05-22 17:13:55 -0400 1058) 
db125360409fc (Tejun Heo             2015-05-22 17:14:01 -0400 1059) static void bdi_split_work_to_wbs(struct backing_dev_info *bdi,
db125360409fc (Tejun Heo             2015-05-22 17:14:01 -0400 1060) 				  struct wb_writeback_work *base_work,
db125360409fc (Tejun Heo             2015-05-22 17:14:01 -0400 1061) 				  bool skip_if_busy)
db125360409fc (Tejun Heo             2015-05-22 17:14:01 -0400 1062) {
db125360409fc (Tejun Heo             2015-05-22 17:14:01 -0400 1063) 	might_sleep();
db125360409fc (Tejun Heo             2015-05-22 17:14:01 -0400 1064) 
006a0973ed020 (Tejun Heo             2015-08-25 14:11:52 -0400 1065) 	if (!skip_if_busy || !writeback_in_progress(&bdi->wb)) {
db125360409fc (Tejun Heo             2015-05-22 17:14:01 -0400 1066) 		base_work->auto_free = 0;
db125360409fc (Tejun Heo             2015-05-22 17:14:01 -0400 1067) 		wb_queue_work(&bdi->wb, base_work);
db125360409fc (Tejun Heo             2015-05-22 17:14:01 -0400 1068) 	}
db125360409fc (Tejun Heo             2015-05-22 17:14:01 -0400 1069) }
db125360409fc (Tejun Heo             2015-05-22 17:14:01 -0400 1070) 
703c270887bb5 (Tejun Heo             2015-05-22 17:13:44 -0400 1071) #endif	/* CONFIG_CGROUP_WRITEBACK */
703c270887bb5 (Tejun Heo             2015-05-22 17:13:44 -0400 1072) 
e8e8a0c6c9bfc (Jens Axboe            2017-09-28 11:31:22 -0600 1073) /*
e8e8a0c6c9bfc (Jens Axboe            2017-09-28 11:31:22 -0600 1074)  * Add in the number of potentially dirty inodes, because each inode
e8e8a0c6c9bfc (Jens Axboe            2017-09-28 11:31:22 -0600 1075)  * write can dirty pagecache in the underlying blockdev.
e8e8a0c6c9bfc (Jens Axboe            2017-09-28 11:31:22 -0600 1076)  */
e8e8a0c6c9bfc (Jens Axboe            2017-09-28 11:31:22 -0600 1077) static unsigned long get_nr_dirty_pages(void)
e8e8a0c6c9bfc (Jens Axboe            2017-09-28 11:31:22 -0600 1078) {
e8e8a0c6c9bfc (Jens Axboe            2017-09-28 11:31:22 -0600 1079) 	return global_node_page_state(NR_FILE_DIRTY) +
e8e8a0c6c9bfc (Jens Axboe            2017-09-28 11:31:22 -0600 1080) 		get_nr_dirty_inodes();
e8e8a0c6c9bfc (Jens Axboe            2017-09-28 11:31:22 -0600 1081) }
e8e8a0c6c9bfc (Jens Axboe            2017-09-28 11:31:22 -0600 1082) 
e8e8a0c6c9bfc (Jens Axboe            2017-09-28 11:31:22 -0600 1083) static void wb_start_writeback(struct bdi_writeback *wb, enum wb_reason reason)
b6e51316daede (Jens Axboe            2009-09-16 15:13:54 +0200 1084) {
c00ddad39f512 (Tejun Heo             2015-05-22 17:13:51 -0400 1085) 	if (!wb_has_dirty_io(wb))
c00ddad39f512 (Tejun Heo             2015-05-22 17:13:51 -0400 1086) 		return;
c00ddad39f512 (Tejun Heo             2015-05-22 17:13:51 -0400 1087) 
aac8d41cd438f (Jens Axboe            2017-09-28 11:31:55 -0600 1088) 	/*
aac8d41cd438f (Jens Axboe            2017-09-28 11:31:55 -0600 1089) 	 * All callers of this function want to start writeback of all
aac8d41cd438f (Jens Axboe            2017-09-28 11:31:55 -0600 1090) 	 * dirty pages. Places like vmscan can call this at a very
aac8d41cd438f (Jens Axboe            2017-09-28 11:31:55 -0600 1091) 	 * high frequency, causing pointless allocations of tons of
aac8d41cd438f (Jens Axboe            2017-09-28 11:31:55 -0600 1092) 	 * work items and keeping the flusher threads busy retrieving
aac8d41cd438f (Jens Axboe            2017-09-28 11:31:55 -0600 1093) 	 * that work. Ensure that we only allow one of them pending and
85009b4f5f039 (Jens Axboe            2017-09-30 02:09:06 -0600 1094) 	 * inflight at the time.
aac8d41cd438f (Jens Axboe            2017-09-28 11:31:55 -0600 1095) 	 */
85009b4f5f039 (Jens Axboe            2017-09-30 02:09:06 -0600 1096) 	if (test_bit(WB_start_all, &wb->state) ||
85009b4f5f039 (Jens Axboe            2017-09-30 02:09:06 -0600 1097) 	    test_and_set_bit(WB_start_all, &wb->state))
aac8d41cd438f (Jens Axboe            2017-09-28 11:31:55 -0600 1098) 		return;
aac8d41cd438f (Jens Axboe            2017-09-28 11:31:55 -0600 1099) 
85009b4f5f039 (Jens Axboe            2017-09-30 02:09:06 -0600 1100) 	wb->start_all_reason = reason;
85009b4f5f039 (Jens Axboe            2017-09-30 02:09:06 -0600 1101) 	wb_wakeup(wb);
c5444198ca210 (Christoph Hellwig     2010-06-08 18:15:15 +0200 1102) }
d3ddec7635b6f (Wu Fengguang          2009-09-23 20:33:40 +0800 1103) 
c5444198ca210 (Christoph Hellwig     2010-06-08 18:15:15 +0200 1104) /**
9ecf4866c018a (Tejun Heo             2015-05-22 17:13:54 -0400 1105)  * wb_start_background_writeback - start background writeback
9ecf4866c018a (Tejun Heo             2015-05-22 17:13:54 -0400 1106)  * @wb: bdi_writback to write from
c5444198ca210 (Christoph Hellwig     2010-06-08 18:15:15 +0200 1107)  *
c5444198ca210 (Christoph Hellwig     2010-06-08 18:15:15 +0200 1108)  * Description:
6585027a5e8cb (Jan Kara              2011-01-13 15:45:44 -0800 1109)  *   This makes sure WB_SYNC_NONE background writeback happens. When
9ecf4866c018a (Tejun Heo             2015-05-22 17:13:54 -0400 1110)  *   this function returns, it is only guaranteed that for given wb
6585027a5e8cb (Jan Kara              2011-01-13 15:45:44 -0800 1111)  *   some IO is happening if we are over background dirty threshold.
6585027a5e8cb (Jan Kara              2011-01-13 15:45:44 -0800 1112)  *   Caller need not hold sb s_umount semaphore.
c5444198ca210 (Christoph Hellwig     2010-06-08 18:15:15 +0200 1113)  */
9ecf4866c018a (Tejun Heo             2015-05-22 17:13:54 -0400 1114) void wb_start_background_writeback(struct bdi_writeback *wb)
c5444198ca210 (Christoph Hellwig     2010-06-08 18:15:15 +0200 1115) {
6585027a5e8cb (Jan Kara              2011-01-13 15:45:44 -0800 1116) 	/*
6585027a5e8cb (Jan Kara              2011-01-13 15:45:44 -0800 1117) 	 * We just wake up the flusher thread. It will perform background
6585027a5e8cb (Jan Kara              2011-01-13 15:45:44 -0800 1118) 	 * writeback as soon as there is no other work to do.
6585027a5e8cb (Jan Kara              2011-01-13 15:45:44 -0800 1119) 	 */
5634cc2aa9aeb (Tejun Heo             2015-08-18 14:54:56 -0700 1120) 	trace_writeback_wake_background(wb);
9ecf4866c018a (Tejun Heo             2015-05-22 17:13:54 -0400 1121) 	wb_wakeup(wb);
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 1122) }
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 1123) 
a66979abad090 (Dave Chinner          2011-03-22 22:23:41 +1100 1124) /*
a66979abad090 (Dave Chinner          2011-03-22 22:23:41 +1100 1125)  * Remove the inode from the writeback list it is on.
a66979abad090 (Dave Chinner          2011-03-22 22:23:41 +1100 1126)  */
c7f5408493aeb (Dave Chinner          2015-03-04 14:07:22 -0500 1127) void inode_io_list_del(struct inode *inode)
a66979abad090 (Dave Chinner          2011-03-22 22:23:41 +1100 1128) {
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400 1129) 	struct bdi_writeback *wb;
f758eeabeb96f (Christoph Hellwig     2011-04-21 18:19:44 -0600 1130) 
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400 1131) 	wb = inode_to_wb_and_lock_list(inode);
b35250c0816c7 (Jan Kara              2020-06-10 17:36:03 +0200 1132) 	spin_lock(&inode->i_lock);
c7f5408493aeb (Dave Chinner          2015-03-04 14:07:22 -0500 1133) 	inode_io_list_del_locked(inode, wb);
b35250c0816c7 (Jan Kara              2020-06-10 17:36:03 +0200 1134) 	spin_unlock(&inode->i_lock);
52ebea749aaed (Tejun Heo             2015-05-22 17:13:37 -0400 1135) 	spin_unlock(&wb->list_lock);
a66979abad090 (Dave Chinner          2011-03-22 22:23:41 +1100 1136) }
4301efa4c7cca (Jan Kara              2020-04-21 10:54:44 +0200 1137) EXPORT_SYMBOL(inode_io_list_del);
a66979abad090 (Dave Chinner          2011-03-22 22:23:41 +1100 1138) 
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 1139) /*
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 1140)  * mark an inode as under writeback on the sb
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 1141)  */
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 1142) void sb_mark_inode_writeback(struct inode *inode)
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 1143) {
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 1144) 	struct super_block *sb = inode->i_sb;
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 1145) 	unsigned long flags;
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 1146) 
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 1147) 	if (list_empty(&inode->i_wb_list)) {
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 1148) 		spin_lock_irqsave(&sb->s_inode_wblist_lock, flags);
9a46b04f16a03 (Brian Foster          2016-07-26 15:21:53 -0700 1149) 		if (list_empty(&inode->i_wb_list)) {
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 1150) 			list_add_tail(&inode->i_wb_list, &sb->s_inodes_wb);
9a46b04f16a03 (Brian Foster          2016-07-26 15:21:53 -0700 1151) 			trace_sb_mark_inode_writeback(inode);
9a46b04f16a03 (Brian Foster          2016-07-26 15:21:53 -0700 1152) 		}
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 1153) 		spin_unlock_irqrestore(&sb->s_inode_wblist_lock, flags);
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 1154) 	}
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 1155) }
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 1156) 
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 1157) /*
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 1158)  * clear an inode as under writeback on the sb
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 1159)  */
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 1160) void sb_clear_inode_writeback(struct inode *inode)
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 1161) {
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 1162) 	struct super_block *sb = inode->i_sb;
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 1163) 	unsigned long flags;
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 1164) 
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 1165) 	if (!list_empty(&inode->i_wb_list)) {
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 1166) 		spin_lock_irqsave(&sb->s_inode_wblist_lock, flags);
9a46b04f16a03 (Brian Foster          2016-07-26 15:21:53 -0700 1167) 		if (!list_empty(&inode->i_wb_list)) {
9a46b04f16a03 (Brian Foster          2016-07-26 15:21:53 -0700 1168) 			list_del_init(&inode->i_wb_list);
9a46b04f16a03 (Brian Foster          2016-07-26 15:21:53 -0700 1169) 			trace_sb_clear_inode_writeback(inode);
9a46b04f16a03 (Brian Foster          2016-07-26 15:21:53 -0700 1170) 		}
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 1171) 		spin_unlock_irqrestore(&sb->s_inode_wblist_lock, flags);
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 1172) 	}
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 1173) }
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 1174) 
6610a0bc8dcc1 (Andrew Morton         2007-10-16 23:30:32 -0700 1175) /*
6610a0bc8dcc1 (Andrew Morton         2007-10-16 23:30:32 -0700 1176)  * Redirty an inode: set its when-it-was dirtied timestamp and move it to the
6610a0bc8dcc1 (Andrew Morton         2007-10-16 23:30:32 -0700 1177)  * furthest end of its superblock's dirty-inode list.
6610a0bc8dcc1 (Andrew Morton         2007-10-16 23:30:32 -0700 1178)  *
6610a0bc8dcc1 (Andrew Morton         2007-10-16 23:30:32 -0700 1179)  * Before stamping the inode's ->dirtied_when, we check to see whether it is
66f3b8e2e103a (Jens Axboe            2009-09-02 09:19:46 +0200 1180)  * already the most-recently-dirtied inode on the b_dirty list.  If that is
6610a0bc8dcc1 (Andrew Morton         2007-10-16 23:30:32 -0700 1181)  * the case then the inode must have been redirtied while it was being written
6610a0bc8dcc1 (Andrew Morton         2007-10-16 23:30:32 -0700 1182)  * out and we don't reset its dirtied_when.
6610a0bc8dcc1 (Andrew Morton         2007-10-16 23:30:32 -0700 1183)  */
b35250c0816c7 (Jan Kara              2020-06-10 17:36:03 +0200 1184) static void redirty_tail_locked(struct inode *inode, struct bdi_writeback *wb)
6610a0bc8dcc1 (Andrew Morton         2007-10-16 23:30:32 -0700 1185) {
b35250c0816c7 (Jan Kara              2020-06-10 17:36:03 +0200 1186) 	assert_spin_locked(&inode->i_lock);
b35250c0816c7 (Jan Kara              2020-06-10 17:36:03 +0200 1187) 
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 1188) 	if (!list_empty(&wb->b_dirty)) {
66f3b8e2e103a (Jens Axboe            2009-09-02 09:19:46 +0200 1189) 		struct inode *tail;
6610a0bc8dcc1 (Andrew Morton         2007-10-16 23:30:32 -0700 1190) 
7ccf19a8042e3 (Nicholas Piggin       2010-10-21 11:49:30 +1100 1191) 		tail = wb_inode(wb->b_dirty.next);
66f3b8e2e103a (Jens Axboe            2009-09-02 09:19:46 +0200 1192) 		if (time_before(inode->dirtied_when, tail->dirtied_when))
6610a0bc8dcc1 (Andrew Morton         2007-10-16 23:30:32 -0700 1193) 			inode->dirtied_when = jiffies;
6610a0bc8dcc1 (Andrew Morton         2007-10-16 23:30:32 -0700 1194) 	}
c7f5408493aeb (Dave Chinner          2015-03-04 14:07:22 -0500 1195) 	inode_io_list_move_locked(inode, wb, &wb->b_dirty);
5afced3bf2810 (Jan Kara              2020-05-29 15:05:22 +0200 1196) 	inode->i_state &= ~I_SYNC_QUEUED;
6610a0bc8dcc1 (Andrew Morton         2007-10-16 23:30:32 -0700 1197) }
6610a0bc8dcc1 (Andrew Morton         2007-10-16 23:30:32 -0700 1198) 
b35250c0816c7 (Jan Kara              2020-06-10 17:36:03 +0200 1199) static void redirty_tail(struct inode *inode, struct bdi_writeback *wb)
b35250c0816c7 (Jan Kara              2020-06-10 17:36:03 +0200 1200) {
b35250c0816c7 (Jan Kara              2020-06-10 17:36:03 +0200 1201) 	spin_lock(&inode->i_lock);
b35250c0816c7 (Jan Kara              2020-06-10 17:36:03 +0200 1202) 	redirty_tail_locked(inode, wb);
b35250c0816c7 (Jan Kara              2020-06-10 17:36:03 +0200 1203) 	spin_unlock(&inode->i_lock);
b35250c0816c7 (Jan Kara              2020-06-10 17:36:03 +0200 1204) }
b35250c0816c7 (Jan Kara              2020-06-10 17:36:03 +0200 1205) 
c986d1e2a460c (Andrew Morton         2007-10-16 23:30:34 -0700 1206) /*
66f3b8e2e103a (Jens Axboe            2009-09-02 09:19:46 +0200 1207)  * requeue inode for re-scanning after bdi->b_io list is exhausted.
c986d1e2a460c (Andrew Morton         2007-10-16 23:30:34 -0700 1208)  */
f758eeabeb96f (Christoph Hellwig     2011-04-21 18:19:44 -0600 1209) static void requeue_io(struct inode *inode, struct bdi_writeback *wb)
c986d1e2a460c (Andrew Morton         2007-10-16 23:30:34 -0700 1210) {
c7f5408493aeb (Dave Chinner          2015-03-04 14:07:22 -0500 1211) 	inode_io_list_move_locked(inode, wb, &wb->b_more_io);
c986d1e2a460c (Andrew Morton         2007-10-16 23:30:34 -0700 1212) }
c986d1e2a460c (Andrew Morton         2007-10-16 23:30:34 -0700 1213) 
1c0eeaf569859 (Joern Engel           2007-10-16 23:30:44 -0700 1214) static void inode_sync_complete(struct inode *inode)
1c0eeaf569859 (Joern Engel           2007-10-16 23:30:44 -0700 1215) {
365b94ae67d29 (Jan Kara              2012-05-03 14:47:55 +0200 1216) 	inode->i_state &= ~I_SYNC;
4eff96dd5283a (Jan Kara              2012-11-26 16:29:51 -0800 1217) 	/* If inode is clean an unused, put it into LRU now... */
4eff96dd5283a (Jan Kara              2012-11-26 16:29:51 -0800 1218) 	inode_add_lru(inode);
365b94ae67d29 (Jan Kara              2012-05-03 14:47:55 +0200 1219) 	/* Waiters must see I_SYNC cleared before being woken up */
1c0eeaf569859 (Joern Engel           2007-10-16 23:30:44 -0700 1220) 	smp_mb();
1c0eeaf569859 (Joern Engel           2007-10-16 23:30:44 -0700 1221) 	wake_up_bit(&inode->i_state, __I_SYNC);
1c0eeaf569859 (Joern Engel           2007-10-16 23:30:44 -0700 1222) }
1c0eeaf569859 (Joern Engel           2007-10-16 23:30:44 -0700 1223) 
d2caa3c549c74 (Jeff Layton           2009-04-02 16:56:37 -0700 1224) static bool inode_dirtied_after(struct inode *inode, unsigned long t)
d2caa3c549c74 (Jeff Layton           2009-04-02 16:56:37 -0700 1225) {
d2caa3c549c74 (Jeff Layton           2009-04-02 16:56:37 -0700 1226) 	bool ret = time_after(inode->dirtied_when, t);
d2caa3c549c74 (Jeff Layton           2009-04-02 16:56:37 -0700 1227) #ifndef CONFIG_64BIT
d2caa3c549c74 (Jeff Layton           2009-04-02 16:56:37 -0700 1228) 	/*
d2caa3c549c74 (Jeff Layton           2009-04-02 16:56:37 -0700 1229) 	 * For inodes being constantly redirtied, dirtied_when can get stuck.
d2caa3c549c74 (Jeff Layton           2009-04-02 16:56:37 -0700 1230) 	 * It _appears_ to be in the future, but is actually in distant past.
d2caa3c549c74 (Jeff Layton           2009-04-02 16:56:37 -0700 1231) 	 * This test is necessary to prevent such wrapped-around relative times
5b0830cb9085f (Jens Axboe            2009-09-23 19:37:09 +0200 1232) 	 * from permanently stopping the whole bdi writeback.
d2caa3c549c74 (Jeff Layton           2009-04-02 16:56:37 -0700 1233) 	 */
d2caa3c549c74 (Jeff Layton           2009-04-02 16:56:37 -0700 1234) 	ret = ret && time_before_eq(inode->dirtied_when, jiffies);
d2caa3c549c74 (Jeff Layton           2009-04-02 16:56:37 -0700 1235) #endif
d2caa3c549c74 (Jeff Layton           2009-04-02 16:56:37 -0700 1236) 	return ret;
d2caa3c549c74 (Jeff Layton           2009-04-02 16:56:37 -0700 1237) }
d2caa3c549c74 (Jeff Layton           2009-04-02 16:56:37 -0700 1238) 
0ae45f63d4ef8 (Theodore Ts'o         2015-02-02 00:37:00 -0500 1239) #define EXPIRE_DIRTY_ATIME 0x0001
0ae45f63d4ef8 (Theodore Ts'o         2015-02-02 00:37:00 -0500 1240) 
2c1365791048e (Fengguang Wu          2007-10-16 23:30:39 -0700 1241) /*
f9cae926f35e8 (Jan Kara              2020-05-29 16:08:58 +0200 1242)  * Move expired (dirtied before dirtied_before) dirty inodes from
697e6fed9fc62 (Jan Kara              2012-03-09 07:26:22 -0800 1243)  * @delaying_queue to @dispatch_queue.
2c1365791048e (Fengguang Wu          2007-10-16 23:30:39 -0700 1244)  */
e84d0a4f8e39a (Wu Fengguang          2011-04-23 12:27:27 -0600 1245) static int move_expired_inodes(struct list_head *delaying_queue,
2c1365791048e (Fengguang Wu          2007-10-16 23:30:39 -0700 1246) 			       struct list_head *dispatch_queue,
5fcd57505c002 (Jan Kara              2020-05-29 16:24:43 +0200 1247) 			       unsigned long dirtied_before)
2c1365791048e (Fengguang Wu          2007-10-16 23:30:39 -0700 1248) {
5c03449d34deb (Shaohua Li            2009-09-24 14:42:33 +0200 1249) 	LIST_HEAD(tmp);
5c03449d34deb (Shaohua Li            2009-09-24 14:42:33 +0200 1250) 	struct list_head *pos, *node;
cf137307cd982 (Jens Axboe            2009-09-24 15:12:57 +0200 1251) 	struct super_block *sb = NULL;
5c03449d34deb (Shaohua Li            2009-09-24 14:42:33 +0200 1252) 	struct inode *inode;
cf137307cd982 (Jens Axboe            2009-09-24 15:12:57 +0200 1253) 	int do_sb_sort = 0;
e84d0a4f8e39a (Wu Fengguang          2011-04-23 12:27:27 -0600 1254) 	int moved = 0;
5c03449d34deb (Shaohua Li            2009-09-24 14:42:33 +0200 1255) 
2c1365791048e (Fengguang Wu          2007-10-16 23:30:39 -0700 1256) 	while (!list_empty(delaying_queue)) {
7ccf19a8042e3 (Nicholas Piggin       2010-10-21 11:49:30 +1100 1257) 		inode = wb_inode(delaying_queue->prev);
f9cae926f35e8 (Jan Kara              2020-05-29 16:08:58 +0200 1258) 		if (inode_dirtied_after(inode, dirtied_before))
2c1365791048e (Fengguang Wu          2007-10-16 23:30:39 -0700 1259) 			break;
c7f5408493aeb (Dave Chinner          2015-03-04 14:07:22 -0500 1260) 		list_move(&inode->i_io_list, &tmp);
a8855990e382f (Jan Kara              2013-07-09 22:36:45 +0800 1261) 		moved++;
5afced3bf2810 (Jan Kara              2020-05-29 15:05:22 +0200 1262) 		spin_lock(&inode->i_lock);
5afced3bf2810 (Jan Kara              2020-05-29 15:05:22 +0200 1263) 		inode->i_state |= I_SYNC_QUEUED;
5afced3bf2810 (Jan Kara              2020-05-29 15:05:22 +0200 1264) 		spin_unlock(&inode->i_lock);
a8855990e382f (Jan Kara              2013-07-09 22:36:45 +0800 1265) 		if (sb_is_blkdev_sb(inode->i_sb))
a8855990e382f (Jan Kara              2013-07-09 22:36:45 +0800 1266) 			continue;
cf137307cd982 (Jens Axboe            2009-09-24 15:12:57 +0200 1267) 		if (sb && sb != inode->i_sb)
cf137307cd982 (Jens Axboe            2009-09-24 15:12:57 +0200 1268) 			do_sb_sort = 1;
cf137307cd982 (Jens Axboe            2009-09-24 15:12:57 +0200 1269) 		sb = inode->i_sb;
5c03449d34deb (Shaohua Li            2009-09-24 14:42:33 +0200 1270) 	}
5c03449d34deb (Shaohua Li            2009-09-24 14:42:33 +0200 1271) 
cf137307cd982 (Jens Axboe            2009-09-24 15:12:57 +0200 1272) 	/* just one sb in list, splice to dispatch_queue and we're done */
cf137307cd982 (Jens Axboe            2009-09-24 15:12:57 +0200 1273) 	if (!do_sb_sort) {
cf137307cd982 (Jens Axboe            2009-09-24 15:12:57 +0200 1274) 		list_splice(&tmp, dispatch_queue);
e84d0a4f8e39a (Wu Fengguang          2011-04-23 12:27:27 -0600 1275) 		goto out;
cf137307cd982 (Jens Axboe            2009-09-24 15:12:57 +0200 1276) 	}
cf137307cd982 (Jens Axboe            2009-09-24 15:12:57 +0200 1277) 
5c03449d34deb (Shaohua Li            2009-09-24 14:42:33 +0200 1278) 	/* Move inodes from one superblock together */
5c03449d34deb (Shaohua Li            2009-09-24 14:42:33 +0200 1279) 	while (!list_empty(&tmp)) {
7ccf19a8042e3 (Nicholas Piggin       2010-10-21 11:49:30 +1100 1280) 		sb = wb_inode(tmp.prev)->i_sb;
5c03449d34deb (Shaohua Li            2009-09-24 14:42:33 +0200 1281) 		list_for_each_prev_safe(pos, node, &tmp) {
7ccf19a8042e3 (Nicholas Piggin       2010-10-21 11:49:30 +1100 1282) 			inode = wb_inode(pos);
5c03449d34deb (Shaohua Li            2009-09-24 14:42:33 +0200 1283) 			if (inode->i_sb == sb)
c7f5408493aeb (Dave Chinner          2015-03-04 14:07:22 -0500 1284) 				list_move(&inode->i_io_list, dispatch_queue);
5c03449d34deb (Shaohua Li            2009-09-24 14:42:33 +0200 1285) 		}
2c1365791048e (Fengguang Wu          2007-10-16 23:30:39 -0700 1286) 	}
e84d0a4f8e39a (Wu Fengguang          2011-04-23 12:27:27 -0600 1287) out:
e84d0a4f8e39a (Wu Fengguang          2011-04-23 12:27:27 -0600 1288) 	return moved;
2c1365791048e (Fengguang Wu          2007-10-16 23:30:39 -0700 1289) }
2c1365791048e (Fengguang Wu          2007-10-16 23:30:39 -0700 1290) 
2c1365791048e (Fengguang Wu          2007-10-16 23:30:39 -0700 1291) /*
2c1365791048e (Fengguang Wu          2007-10-16 23:30:39 -0700 1292)  * Queue all expired dirty inodes for io, eldest first.
4ea879b96d437 (Wu Fengguang          2010-08-11 14:17:42 -0700 1293)  * Before
4ea879b96d437 (Wu Fengguang          2010-08-11 14:17:42 -0700 1294)  *         newly dirtied     b_dirty    b_io    b_more_io
4ea879b96d437 (Wu Fengguang          2010-08-11 14:17:42 -0700 1295)  *         =============>    gf         edc     BA
4ea879b96d437 (Wu Fengguang          2010-08-11 14:17:42 -0700 1296)  * After
4ea879b96d437 (Wu Fengguang          2010-08-11 14:17:42 -0700 1297)  *         newly dirtied     b_dirty    b_io    b_more_io
4ea879b96d437 (Wu Fengguang          2010-08-11 14:17:42 -0700 1298)  *         =============>    g          fBAedc
4ea879b96d437 (Wu Fengguang          2010-08-11 14:17:42 -0700 1299)  *                                           |
4ea879b96d437 (Wu Fengguang          2010-08-11 14:17:42 -0700 1300)  *                                           +--> dequeue for IO
2c1365791048e (Fengguang Wu          2007-10-16 23:30:39 -0700 1301)  */
f9cae926f35e8 (Jan Kara              2020-05-29 16:08:58 +0200 1302) static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work,
f9cae926f35e8 (Jan Kara              2020-05-29 16:08:58 +0200 1303) 		     unsigned long dirtied_before)
66f3b8e2e103a (Jens Axboe            2009-09-02 09:19:46 +0200 1304) {
e84d0a4f8e39a (Wu Fengguang          2011-04-23 12:27:27 -0600 1305) 	int moved;
f9cae926f35e8 (Jan Kara              2020-05-29 16:08:58 +0200 1306) 	unsigned long time_expire_jif = dirtied_before;
0ae45f63d4ef8 (Theodore Ts'o         2015-02-02 00:37:00 -0500 1307) 
f758eeabeb96f (Christoph Hellwig     2011-04-21 18:19:44 -0600 1308) 	assert_spin_locked(&wb->list_lock);
4ea879b96d437 (Wu Fengguang          2010-08-11 14:17:42 -0700 1309) 	list_splice_init(&wb->b_more_io, &wb->b_io);
5fcd57505c002 (Jan Kara              2020-05-29 16:24:43 +0200 1310) 	moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, dirtied_before);
f9cae926f35e8 (Jan Kara              2020-05-29 16:08:58 +0200 1311) 	if (!work->for_sync)
f9cae926f35e8 (Jan Kara              2020-05-29 16:08:58 +0200 1312) 		time_expire_jif = jiffies - dirtytime_expire_interval * HZ;
0ae45f63d4ef8 (Theodore Ts'o         2015-02-02 00:37:00 -0500 1313) 	moved += move_expired_inodes(&wb->b_dirty_time, &wb->b_io,
5fcd57505c002 (Jan Kara              2020-05-29 16:24:43 +0200 1314) 				     time_expire_jif);
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400 1315) 	if (moved)
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400 1316) 		wb_io_lists_populated(wb);
f9cae926f35e8 (Jan Kara              2020-05-29 16:08:58 +0200 1317) 	trace_writeback_queue_io(wb, work, dirtied_before, moved);
66f3b8e2e103a (Jens Axboe            2009-09-02 09:19:46 +0200 1318) }
66f3b8e2e103a (Jens Axboe            2009-09-02 09:19:46 +0200 1319) 
a9185b41a4f84 (Christoph Hellwig     2010-03-05 09:21:37 +0100 1320) static int write_inode(struct inode *inode, struct writeback_control *wbc)
08d8e9749e7f0 (Fengguang Wu          2007-10-16 23:30:39 -0700 1321) {
9fb0a7da0c528 (Tejun Heo             2013-01-11 13:06:37 -0800 1322) 	int ret;
9fb0a7da0c528 (Tejun Heo             2013-01-11 13:06:37 -0800 1323) 
9fb0a7da0c528 (Tejun Heo             2013-01-11 13:06:37 -0800 1324) 	if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode)) {
9fb0a7da0c528 (Tejun Heo             2013-01-11 13:06:37 -0800 1325) 		trace_writeback_write_inode_start(inode, wbc);
9fb0a7da0c528 (Tejun Heo             2013-01-11 13:06:37 -0800 1326) 		ret = inode->i_sb->s_op->write_inode(inode, wbc);
9fb0a7da0c528 (Tejun Heo             2013-01-11 13:06:37 -0800 1327) 		trace_writeback_write_inode(inode, wbc);
9fb0a7da0c528 (Tejun Heo             2013-01-11 13:06:37 -0800 1328) 		return ret;
9fb0a7da0c528 (Tejun Heo             2013-01-11 13:06:37 -0800 1329) 	}
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 1330) 	return 0;
08d8e9749e7f0 (Fengguang Wu          2007-10-16 23:30:39 -0700 1331) }
08d8e9749e7f0 (Fengguang Wu          2007-10-16 23:30:39 -0700 1332) 
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 1333) /*
169ebd90131b2 (Jan Kara              2012-05-03 14:48:03 +0200 1334)  * Wait for writeback on an inode to complete. Called with i_lock held.
169ebd90131b2 (Jan Kara              2012-05-03 14:48:03 +0200 1335)  * Caller must make sure inode cannot go away when we drop i_lock.
01c031945f275 (Christoph Hellwig     2009-06-08 13:35:40 +0200 1336)  */
169ebd90131b2 (Jan Kara              2012-05-03 14:48:03 +0200 1337) static void __inode_wait_for_writeback(struct inode *inode)
169ebd90131b2 (Jan Kara              2012-05-03 14:48:03 +0200 1338) 	__releases(inode->i_lock)
169ebd90131b2 (Jan Kara              2012-05-03 14:48:03 +0200 1339) 	__acquires(inode->i_lock)
01c031945f275 (Christoph Hellwig     2009-06-08 13:35:40 +0200 1340) {
01c031945f275 (Christoph Hellwig     2009-06-08 13:35:40 +0200 1341) 	DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC);
01c031945f275 (Christoph Hellwig     2009-06-08 13:35:40 +0200 1342) 	wait_queue_head_t *wqh;
01c031945f275 (Christoph Hellwig     2009-06-08 13:35:40 +0200 1343) 
01c031945f275 (Christoph Hellwig     2009-06-08 13:35:40 +0200 1344) 	wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
250df6ed274d7 (Dave Chinner          2011-03-22 22:23:36 +1100 1345) 	while (inode->i_state & I_SYNC) {
250df6ed274d7 (Dave Chinner          2011-03-22 22:23:36 +1100 1346) 		spin_unlock(&inode->i_lock);
743162013d40c (NeilBrown             2014-07-07 15:16:04 +1000 1347) 		__wait_on_bit(wqh, &wq, bit_wait,
743162013d40c (NeilBrown             2014-07-07 15:16:04 +1000 1348) 			      TASK_UNINTERRUPTIBLE);
250df6ed274d7 (Dave Chinner          2011-03-22 22:23:36 +1100 1349) 		spin_lock(&inode->i_lock);
58a9d3d8db06c (Richard Kennedy       2010-05-24 14:32:38 -0700 1350) 	}
01c031945f275 (Christoph Hellwig     2009-06-08 13:35:40 +0200 1351) }
01c031945f275 (Christoph Hellwig     2009-06-08 13:35:40 +0200 1352) 
169ebd90131b2 (Jan Kara              2012-05-03 14:48:03 +0200 1353) /*
169ebd90131b2 (Jan Kara              2012-05-03 14:48:03 +0200 1354)  * Wait for writeback on an inode to complete. Caller must have inode pinned.
169ebd90131b2 (Jan Kara              2012-05-03 14:48:03 +0200 1355)  */
169ebd90131b2 (Jan Kara              2012-05-03 14:48:03 +0200 1356) void inode_wait_for_writeback(struct inode *inode)
169ebd90131b2 (Jan Kara              2012-05-03 14:48:03 +0200 1357) {
169ebd90131b2 (Jan Kara              2012-05-03 14:48:03 +0200 1358) 	spin_lock(&inode->i_lock);
169ebd90131b2 (Jan Kara              2012-05-03 14:48:03 +0200 1359) 	__inode_wait_for_writeback(inode);
169ebd90131b2 (Jan Kara              2012-05-03 14:48:03 +0200 1360) 	spin_unlock(&inode->i_lock);
169ebd90131b2 (Jan Kara              2012-05-03 14:48:03 +0200 1361) }
169ebd90131b2 (Jan Kara              2012-05-03 14:48:03 +0200 1362) 
169ebd90131b2 (Jan Kara              2012-05-03 14:48:03 +0200 1363) /*
169ebd90131b2 (Jan Kara              2012-05-03 14:48:03 +0200 1364)  * Sleep until I_SYNC is cleared. This function must be called with i_lock
169ebd90131b2 (Jan Kara              2012-05-03 14:48:03 +0200 1365)  * held and drops it. It is aimed for callers not holding any inode reference
169ebd90131b2 (Jan Kara              2012-05-03 14:48:03 +0200 1366)  * so once i_lock is dropped, inode can go away.
169ebd90131b2 (Jan Kara              2012-05-03 14:48:03 +0200 1367)  */
169ebd90131b2 (Jan Kara              2012-05-03 14:48:03 +0200 1368) static void inode_sleep_on_writeback(struct inode *inode)
169ebd90131b2 (Jan Kara              2012-05-03 14:48:03 +0200 1369) 	__releases(inode->i_lock)
169ebd90131b2 (Jan Kara              2012-05-03 14:48:03 +0200 1370) {
169ebd90131b2 (Jan Kara              2012-05-03 14:48:03 +0200 1371) 	DEFINE_WAIT(wait);
169ebd90131b2 (Jan Kara              2012-05-03 14:48:03 +0200 1372) 	wait_queue_head_t *wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
169ebd90131b2 (Jan Kara              2012-05-03 14:48:03 +0200 1373) 	int sleep;
169ebd90131b2 (Jan Kara              2012-05-03 14:48:03 +0200 1374) 
169ebd90131b2 (Jan Kara              2012-05-03 14:48:03 +0200 1375) 	prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
169ebd90131b2 (Jan Kara              2012-05-03 14:48:03 +0200 1376) 	sleep = inode->i_state & I_SYNC;
169ebd90131b2 (Jan Kara              2012-05-03 14:48:03 +0200 1377) 	spin_unlock(&inode->i_lock);
169ebd90131b2 (Jan Kara              2012-05-03 14:48:03 +0200 1378) 	if (sleep)
169ebd90131b2 (Jan Kara              2012-05-03 14:48:03 +0200 1379) 		schedule();
169ebd90131b2 (Jan Kara              2012-05-03 14:48:03 +0200 1380) 	finish_wait(wqh, &wait);
169ebd90131b2 (Jan Kara              2012-05-03 14:48:03 +0200 1381) }
169ebd90131b2 (Jan Kara              2012-05-03 14:48:03 +0200 1382) 
ccb26b5a65867 (Jan Kara              2012-05-03 14:47:58 +0200 1383) /*
ccb26b5a65867 (Jan Kara              2012-05-03 14:47:58 +0200 1384)  * Find proper writeback list for the inode depending on its current state and
ccb26b5a65867 (Jan Kara              2012-05-03 14:47:58 +0200 1385)  * possibly also change of its state while we were doing writeback.  Here we
ccb26b5a65867 (Jan Kara              2012-05-03 14:47:58 +0200 1386)  * handle things such as livelock prevention or fairness of writeback among
ccb26b5a65867 (Jan Kara              2012-05-03 14:47:58 +0200 1387)  * inodes. This function can be called only by flusher thread - noone else
ccb26b5a65867 (Jan Kara              2012-05-03 14:47:58 +0200 1388)  * processes all inodes in writeback lists and requeueing inodes behind flusher
ccb26b5a65867 (Jan Kara              2012-05-03 14:47:58 +0200 1389)  * thread's back can have unexpected consequences.
ccb26b5a65867 (Jan Kara              2012-05-03 14:47:58 +0200 1390)  */
ccb26b5a65867 (Jan Kara              2012-05-03 14:47:58 +0200 1391) static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
ccb26b5a65867 (Jan Kara              2012-05-03 14:47:58 +0200 1392) 			  struct writeback_control *wbc)
ccb26b5a65867 (Jan Kara              2012-05-03 14:47:58 +0200 1393) {
ccb26b5a65867 (Jan Kara              2012-05-03 14:47:58 +0200 1394) 	if (inode->i_state & I_FREEING)
ccb26b5a65867 (Jan Kara              2012-05-03 14:47:58 +0200 1395) 		return;
ccb26b5a65867 (Jan Kara              2012-05-03 14:47:58 +0200 1396) 
ccb26b5a65867 (Jan Kara              2012-05-03 14:47:58 +0200 1397) 	/*
ccb26b5a65867 (Jan Kara              2012-05-03 14:47:58 +0200 1398) 	 * Sync livelock prevention. Each inode is tagged and synced in one
ccb26b5a65867 (Jan Kara              2012-05-03 14:47:58 +0200 1399) 	 * shot. If still dirty, it will be redirty_tail()'ed below.  Update
ccb26b5a65867 (Jan Kara              2012-05-03 14:47:58 +0200 1400) 	 * the dirty time to prevent enqueue and sync it again.
ccb26b5a65867 (Jan Kara              2012-05-03 14:47:58 +0200 1401) 	 */
ccb26b5a65867 (Jan Kara              2012-05-03 14:47:58 +0200 1402) 	if ((inode->i_state & I_DIRTY) &&
ccb26b5a65867 (Jan Kara              2012-05-03 14:47:58 +0200 1403) 	    (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages))
ccb26b5a65867 (Jan Kara              2012-05-03 14:47:58 +0200 1404) 		inode->dirtied_when = jiffies;
ccb26b5a65867 (Jan Kara              2012-05-03 14:47:58 +0200 1405) 
4f8ad655dbc82 (Jan Kara              2012-05-03 14:48:00 +0200 1406) 	if (wbc->pages_skipped) {
4f8ad655dbc82 (Jan Kara              2012-05-03 14:48:00 +0200 1407) 		/*
4f8ad655dbc82 (Jan Kara              2012-05-03 14:48:00 +0200 1408) 		 * writeback is not making progress due to locked
4f8ad655dbc82 (Jan Kara              2012-05-03 14:48:00 +0200 1409) 		 * buffers. Skip this inode for now.
4f8ad655dbc82 (Jan Kara              2012-05-03 14:48:00 +0200 1410) 		 */
b35250c0816c7 (Jan Kara              2020-06-10 17:36:03 +0200 1411) 		redirty_tail_locked(inode, wb);
4f8ad655dbc82 (Jan Kara              2012-05-03 14:48:00 +0200 1412) 		return;
4f8ad655dbc82 (Jan Kara              2012-05-03 14:48:00 +0200 1413) 	}
4f8ad655dbc82 (Jan Kara              2012-05-03 14:48:00 +0200 1414) 
ccb26b5a65867 (Jan Kara              2012-05-03 14:47:58 +0200 1415) 	if (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) {
ccb26b5a65867 (Jan Kara              2012-05-03 14:47:58 +0200 1416) 		/*
ccb26b5a65867 (Jan Kara              2012-05-03 14:47:58 +0200 1417) 		 * We didn't write back all the pages.  nfs_writepages()
ccb26b5a65867 (Jan Kara              2012-05-03 14:47:58 +0200 1418) 		 * sometimes bales out without doing anything.
ccb26b5a65867 (Jan Kara              2012-05-03 14:47:58 +0200 1419) 		 */
ccb26b5a65867 (Jan Kara              2012-05-03 14:47:58 +0200 1420) 		if (wbc->nr_to_write <= 0) {
ccb26b5a65867 (Jan Kara              2012-05-03 14:47:58 +0200 1421) 			/* Slice used up. Queue for next turn. */
ccb26b5a65867 (Jan Kara              2012-05-03 14:47:58 +0200 1422) 			requeue_io(inode, wb);
ccb26b5a65867 (Jan Kara              2012-05-03 14:47:58 +0200 1423) 		} else {
ccb26b5a65867 (Jan Kara              2012-05-03 14:47:58 +0200 1424) 			/*
ccb26b5a65867 (Jan Kara              2012-05-03 14:47:58 +0200 1425) 			 * Writeback blocked by something other than
ccb26b5a65867 (Jan Kara              2012-05-03 14:47:58 +0200 1426) 			 * congestion. Delay the inode for some time to
ccb26b5a65867 (Jan Kara              2012-05-03 14:47:58 +0200 1427) 			 * avoid spinning on the CPU (100% iowait)
ccb26b5a65867 (Jan Kara              2012-05-03 14:47:58 +0200 1428) 			 * retrying writeback of the dirty page/inode
ccb26b5a65867 (Jan Kara              2012-05-03 14:47:58 +0200 1429) 			 * that cannot be performed immediately.
ccb26b5a65867 (Jan Kara              2012-05-03 14:47:58 +0200 1430) 			 */
b35250c0816c7 (Jan Kara              2020-06-10 17:36:03 +0200 1431) 			redirty_tail_locked(inode, wb);
ccb26b5a65867 (Jan Kara              2012-05-03 14:47:58 +0200 1432) 		}
ccb26b5a65867 (Jan Kara              2012-05-03 14:47:58 +0200 1433) 	} else if (inode->i_state & I_DIRTY) {
ccb26b5a65867 (Jan Kara              2012-05-03 14:47:58 +0200 1434) 		/*
ccb26b5a65867 (Jan Kara              2012-05-03 14:47:58 +0200 1435) 		 * Filesystems can dirty the inode during writeback operations,
ccb26b5a65867 (Jan Kara              2012-05-03 14:47:58 +0200 1436) 		 * such as delayed allocation during submission or metadata
ccb26b5a65867 (Jan Kara              2012-05-03 14:47:58 +0200 1437) 		 * updates after data IO completion.
ccb26b5a65867 (Jan Kara              2012-05-03 14:47:58 +0200 1438) 		 */
b35250c0816c7 (Jan Kara              2020-06-10 17:36:03 +0200 1439) 		redirty_tail_locked(inode, wb);
0ae45f63d4ef8 (Theodore Ts'o         2015-02-02 00:37:00 -0500 1440) 	} else if (inode->i_state & I_DIRTY_TIME) {
a2f4870697a5b (Theodore Ts'o         2015-03-17 12:23:19 -0400 1441) 		inode->dirtied_when = jiffies;
c7f5408493aeb (Dave Chinner          2015-03-04 14:07:22 -0500 1442) 		inode_io_list_move_locked(inode, wb, &wb->b_dirty_time);
5afced3bf2810 (Jan Kara              2020-05-29 15:05:22 +0200 1443) 		inode->i_state &= ~I_SYNC_QUEUED;
ccb26b5a65867 (Jan Kara              2012-05-03 14:47:58 +0200 1444) 	} else {
ccb26b5a65867 (Jan Kara              2012-05-03 14:47:58 +0200 1445) 		/* The inode is clean. Remove from writeback lists. */
c7f5408493aeb (Dave Chinner          2015-03-04 14:07:22 -0500 1446) 		inode_io_list_del_locked(inode, wb);
ccb26b5a65867 (Jan Kara              2012-05-03 14:47:58 +0200 1447) 	}
ccb26b5a65867 (Jan Kara              2012-05-03 14:47:58 +0200 1448) }
ccb26b5a65867 (Jan Kara              2012-05-03 14:47:58 +0200 1449) 
01c031945f275 (Christoph Hellwig     2009-06-08 13:35:40 +0200 1450) /*
da0c4c60d8c7c (Eric Biggers          2021-01-12 11:02:51 -0800 1451)  * Write out an inode and its dirty pages (or some of its dirty pages, depending
da0c4c60d8c7c (Eric Biggers          2021-01-12 11:02:51 -0800 1452)  * on @wbc->nr_to_write), and clear the relevant dirty flags from i_state.
da0c4c60d8c7c (Eric Biggers          2021-01-12 11:02:51 -0800 1453)  *
da0c4c60d8c7c (Eric Biggers          2021-01-12 11:02:51 -0800 1454)  * This doesn't remove the inode from the writeback list it is on, except
da0c4c60d8c7c (Eric Biggers          2021-01-12 11:02:51 -0800 1455)  * potentially to move it from b_dirty_time to b_dirty due to timestamp
da0c4c60d8c7c (Eric Biggers          2021-01-12 11:02:51 -0800 1456)  * expiration.  The caller is otherwise responsible for writeback list handling.
da0c4c60d8c7c (Eric Biggers          2021-01-12 11:02:51 -0800 1457)  *
da0c4c60d8c7c (Eric Biggers          2021-01-12 11:02:51 -0800 1458)  * The caller is also responsible for setting the I_SYNC flag beforehand and
da0c4c60d8c7c (Eric Biggers          2021-01-12 11:02:51 -0800 1459)  * calling inode_sync_complete() to clear it afterwards.
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 1460)  */
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 1461) static int
cd8ed2a45a401 (Yan Hong              2012-10-08 16:33:45 -0700 1462) __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 1463) {
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 1464) 	struct address_space *mapping = inode->i_mapping;
251d6a471c831 (Wu Fengguang          2010-12-01 17:33:37 -0600 1465) 	long nr_to_write = wbc->nr_to_write;
01c031945f275 (Christoph Hellwig     2009-06-08 13:35:40 +0200 1466) 	unsigned dirty;
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 1467) 	int ret;
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 1468) 
4f8ad655dbc82 (Jan Kara              2012-05-03 14:48:00 +0200 1469) 	WARN_ON(!(inode->i_state & I_SYNC));
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 1470) 
9fb0a7da0c528 (Tejun Heo             2013-01-11 13:06:37 -0800 1471) 	trace_writeback_single_inode_start(inode, wbc, nr_to_write);
9fb0a7da0c528 (Tejun Heo             2013-01-11 13:06:37 -0800 1472) 
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 1473) 	ret = do_writepages(mapping, wbc);
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 1474) 
26821ed40b423 (Christoph Hellwig     2010-03-05 09:21:21 +0100 1475) 	/*
26821ed40b423 (Christoph Hellwig     2010-03-05 09:21:21 +0100 1476) 	 * Make sure to wait on the data before writing out the metadata.
26821ed40b423 (Christoph Hellwig     2010-03-05 09:21:21 +0100 1477) 	 * This is important for filesystems that modify metadata on data
7747bd4bceb30 (Dave Chinner          2013-07-02 22:38:35 +1000 1478) 	 * I/O completion. We don't do it for sync(2) writeback because it has a
7747bd4bceb30 (Dave Chinner          2013-07-02 22:38:35 +1000 1479) 	 * separate, external IO completion path and ->sync_fs for guaranteeing
7747bd4bceb30 (Dave Chinner          2013-07-02 22:38:35 +1000 1480) 	 * inode metadata is written back correctly.
26821ed40b423 (Christoph Hellwig     2010-03-05 09:21:21 +0100 1481) 	 */
7747bd4bceb30 (Dave Chinner          2013-07-02 22:38:35 +1000 1482) 	if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync) {
26821ed40b423 (Christoph Hellwig     2010-03-05 09:21:21 +0100 1483) 		int err = filemap_fdatawait(mapping);
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 1484) 		if (ret == 0)
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 1485) 			ret = err;
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 1486) 	}
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 1487) 
5547e8aac6f71 (Dmitry Monakhov       2010-05-07 13:35:44 +0400 1488) 	/*
1e249cb5b7fc0 (Eric Biggers          2021-01-12 11:02:43 -0800 1489) 	 * If the inode has dirty timestamps and we need to write them, call
1e249cb5b7fc0 (Eric Biggers          2021-01-12 11:02:43 -0800 1490) 	 * mark_inode_dirty_sync() to notify the filesystem about it and to
1e249cb5b7fc0 (Eric Biggers          2021-01-12 11:02:43 -0800 1491) 	 * change I_DIRTY_TIME into I_DIRTY_SYNC.
5547e8aac6f71 (Dmitry Monakhov       2010-05-07 13:35:44 +0400 1492) 	 */
5fcd57505c002 (Jan Kara              2020-05-29 16:24:43 +0200 1493) 	if ((inode->i_state & I_DIRTY_TIME) &&
83dc881d678a8 (Eric Biggers          2021-01-12 11:02:50 -0800 1494) 	    (wbc->sync_mode == WB_SYNC_ALL ||
5fcd57505c002 (Jan Kara              2020-05-29 16:24:43 +0200 1495) 	     time_after(jiffies, inode->dirtied_time_when +
5fcd57505c002 (Jan Kara              2020-05-29 16:24:43 +0200 1496) 			dirtytime_expire_interval * HZ))) {
5fcd57505c002 (Jan Kara              2020-05-29 16:24:43 +0200 1497) 		trace_writeback_lazytime(inode);
1e249cb5b7fc0 (Eric Biggers          2021-01-12 11:02:43 -0800 1498) 		mark_inode_dirty_sync(inode);
5fcd57505c002 (Jan Kara              2020-05-29 16:24:43 +0200 1499) 	}
1e249cb5b7fc0 (Eric Biggers          2021-01-12 11:02:43 -0800 1500) 
1e249cb5b7fc0 (Eric Biggers          2021-01-12 11:02:43 -0800 1501) 	/*
da0c4c60d8c7c (Eric Biggers          2021-01-12 11:02:51 -0800 1502) 	 * Get and clear the dirty flags from i_state.  This needs to be done
da0c4c60d8c7c (Eric Biggers          2021-01-12 11:02:51 -0800 1503) 	 * after calling writepages because some filesystems may redirty the
da0c4c60d8c7c (Eric Biggers          2021-01-12 11:02:51 -0800 1504) 	 * inode during writepages due to delalloc.  It also needs to be done
da0c4c60d8c7c (Eric Biggers          2021-01-12 11:02:51 -0800 1505) 	 * after handling timestamp expiration, as that may dirty the inode too.
1e249cb5b7fc0 (Eric Biggers          2021-01-12 11:02:43 -0800 1506) 	 */
1e249cb5b7fc0 (Eric Biggers          2021-01-12 11:02:43 -0800 1507) 	spin_lock(&inode->i_lock);
1e249cb5b7fc0 (Eric Biggers          2021-01-12 11:02:43 -0800 1508) 	dirty = inode->i_state & I_DIRTY;
0ae45f63d4ef8 (Theodore Ts'o         2015-02-02 00:37:00 -0500 1509) 	inode->i_state &= ~dirty;
9c6ac78eb3521 (Tejun Heo             2014-10-24 15:38:21 -0400 1510) 
9c6ac78eb3521 (Tejun Heo             2014-10-24 15:38:21 -0400 1511) 	/*
9c6ac78eb3521 (Tejun Heo             2014-10-24 15:38:21 -0400 1512) 	 * Paired with smp_mb() in __mark_inode_dirty().  This allows
9c6ac78eb3521 (Tejun Heo             2014-10-24 15:38:21 -0400 1513) 	 * __mark_inode_dirty() to test i_state without grabbing i_lock -
9c6ac78eb3521 (Tejun Heo             2014-10-24 15:38:21 -0400 1514) 	 * either they see the I_DIRTY bits cleared or we see the dirtied
9c6ac78eb3521 (Tejun Heo             2014-10-24 15:38:21 -0400 1515) 	 * inode.
9c6ac78eb3521 (Tejun Heo             2014-10-24 15:38:21 -0400 1516) 	 *
9c6ac78eb3521 (Tejun Heo             2014-10-24 15:38:21 -0400 1517) 	 * I_DIRTY_PAGES is always cleared together above even if @mapping
9c6ac78eb3521 (Tejun Heo             2014-10-24 15:38:21 -0400 1518) 	 * still has dirty pages.  The flag is reinstated after smp_mb() if
9c6ac78eb3521 (Tejun Heo             2014-10-24 15:38:21 -0400 1519) 	 * necessary.  This guarantees that either __mark_inode_dirty()
9c6ac78eb3521 (Tejun Heo             2014-10-24 15:38:21 -0400 1520) 	 * sees clear I_DIRTY_PAGES or we see PAGECACHE_TAG_DIRTY.
9c6ac78eb3521 (Tejun Heo             2014-10-24 15:38:21 -0400 1521) 	 */
9c6ac78eb3521 (Tejun Heo             2014-10-24 15:38:21 -0400 1522) 	smp_mb();
9c6ac78eb3521 (Tejun Heo             2014-10-24 15:38:21 -0400 1523) 
9c6ac78eb3521 (Tejun Heo             2014-10-24 15:38:21 -0400 1524) 	if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
9c6ac78eb3521 (Tejun Heo             2014-10-24 15:38:21 -0400 1525) 		inode->i_state |= I_DIRTY_PAGES;
9c6ac78eb3521 (Tejun Heo             2014-10-24 15:38:21 -0400 1526) 
250df6ed274d7 (Dave Chinner          2011-03-22 22:23:36 +1100 1527) 	spin_unlock(&inode->i_lock);
9c6ac78eb3521 (Tejun Heo             2014-10-24 15:38:21 -0400 1528) 
26821ed40b423 (Christoph Hellwig     2010-03-05 09:21:21 +0100 1529) 	/* Don't write the inode if only I_DIRTY_PAGES was set */
0ae45f63d4ef8 (Theodore Ts'o         2015-02-02 00:37:00 -0500 1530) 	if (dirty & ~I_DIRTY_PAGES) {
a9185b41a4f84 (Christoph Hellwig     2010-03-05 09:21:37 +0100 1531) 		int err = write_inode(inode, wbc);
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 1532) 		if (ret == 0)
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 1533) 			ret = err;
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 1534) 	}
4f8ad655dbc82 (Jan Kara              2012-05-03 14:48:00 +0200 1535) 	trace_writeback_single_inode(inode, wbc, nr_to_write);
4f8ad655dbc82 (Jan Kara              2012-05-03 14:48:00 +0200 1536) 	return ret;
4f8ad655dbc82 (Jan Kara              2012-05-03 14:48:00 +0200 1537) }
4f8ad655dbc82 (Jan Kara              2012-05-03 14:48:00 +0200 1538) 
4f8ad655dbc82 (Jan Kara              2012-05-03 14:48:00 +0200 1539) /*
da0c4c60d8c7c (Eric Biggers          2021-01-12 11:02:51 -0800 1540)  * Write out an inode's dirty data and metadata on-demand, i.e. separately from
da0c4c60d8c7c (Eric Biggers          2021-01-12 11:02:51 -0800 1541)  * the regular batched writeback done by the flusher threads in
da0c4c60d8c7c (Eric Biggers          2021-01-12 11:02:51 -0800 1542)  * writeback_sb_inodes().  @wbc controls various aspects of the write, such as
da0c4c60d8c7c (Eric Biggers          2021-01-12 11:02:51 -0800 1543)  * whether it is a data-integrity sync (%WB_SYNC_ALL) or not (%WB_SYNC_NONE).
4f8ad655dbc82 (Jan Kara              2012-05-03 14:48:00 +0200 1544)  *
da0c4c60d8c7c (Eric Biggers          2021-01-12 11:02:51 -0800 1545)  * To prevent the inode from going away, either the caller must have a reference
da0c4c60d8c7c (Eric Biggers          2021-01-12 11:02:51 -0800 1546)  * to the inode, or the inode must have I_WILL_FREE or I_FREEING set.
4f8ad655dbc82 (Jan Kara              2012-05-03 14:48:00 +0200 1547)  */
aaf2559332ba2 (Tejun Heo             2016-03-18 13:52:04 -0400 1548) static int writeback_single_inode(struct inode *inode,
aaf2559332ba2 (Tejun Heo             2016-03-18 13:52:04 -0400 1549) 				  struct writeback_control *wbc)
4f8ad655dbc82 (Jan Kara              2012-05-03 14:48:00 +0200 1550) {
aaf2559332ba2 (Tejun Heo             2016-03-18 13:52:04 -0400 1551) 	struct bdi_writeback *wb;
4f8ad655dbc82 (Jan Kara              2012-05-03 14:48:00 +0200 1552) 	int ret = 0;
4f8ad655dbc82 (Jan Kara              2012-05-03 14:48:00 +0200 1553) 
4f8ad655dbc82 (Jan Kara              2012-05-03 14:48:00 +0200 1554) 	spin_lock(&inode->i_lock);
4f8ad655dbc82 (Jan Kara              2012-05-03 14:48:00 +0200 1555) 	if (!atomic_read(&inode->i_count))
4f8ad655dbc82 (Jan Kara              2012-05-03 14:48:00 +0200 1556) 		WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING)));
4f8ad655dbc82 (Jan Kara              2012-05-03 14:48:00 +0200 1557) 	else
4f8ad655dbc82 (Jan Kara              2012-05-03 14:48:00 +0200 1558) 		WARN_ON(inode->i_state & I_WILL_FREE);
4f8ad655dbc82 (Jan Kara              2012-05-03 14:48:00 +0200 1559) 
4f8ad655dbc82 (Jan Kara              2012-05-03 14:48:00 +0200 1560) 	if (inode->i_state & I_SYNC) {
4f8ad655dbc82 (Jan Kara              2012-05-03 14:48:00 +0200 1561) 		/*
da0c4c60d8c7c (Eric Biggers          2021-01-12 11:02:51 -0800 1562) 		 * Writeback is already running on the inode.  For WB_SYNC_NONE,
da0c4c60d8c7c (Eric Biggers          2021-01-12 11:02:51 -0800 1563) 		 * that's enough and we can just return.  For WB_SYNC_ALL, we
da0c4c60d8c7c (Eric Biggers          2021-01-12 11:02:51 -0800 1564) 		 * must wait for the existing writeback to complete, then do
da0c4c60d8c7c (Eric Biggers          2021-01-12 11:02:51 -0800 1565) 		 * writeback again if there's anything left.
4f8ad655dbc82 (Jan Kara              2012-05-03 14:48:00 +0200 1566) 		 */
da0c4c60d8c7c (Eric Biggers          2021-01-12 11:02:51 -0800 1567) 		if (wbc->sync_mode != WB_SYNC_ALL)
da0c4c60d8c7c (Eric Biggers          2021-01-12 11:02:51 -0800 1568) 			goto out;
169ebd90131b2 (Jan Kara              2012-05-03 14:48:03 +0200 1569) 		__inode_wait_for_writeback(inode);
4f8ad655dbc82 (Jan Kara              2012-05-03 14:48:00 +0200 1570) 	}
4f8ad655dbc82 (Jan Kara              2012-05-03 14:48:00 +0200 1571) 	WARN_ON(inode->i_state & I_SYNC);
4f8ad655dbc82 (Jan Kara              2012-05-03 14:48:00 +0200 1572) 	/*
da0c4c60d8c7c (Eric Biggers          2021-01-12 11:02:51 -0800 1573) 	 * If the inode is already fully clean, then there's nothing to do.
da0c4c60d8c7c (Eric Biggers          2021-01-12 11:02:51 -0800 1574) 	 *
da0c4c60d8c7c (Eric Biggers          2021-01-12 11:02:51 -0800 1575) 	 * For data-integrity syncs we also need to check whether any pages are
da0c4c60d8c7c (Eric Biggers          2021-01-12 11:02:51 -0800 1576) 	 * still under writeback, e.g. due to prior WB_SYNC_NONE writeback.  If
da0c4c60d8c7c (Eric Biggers          2021-01-12 11:02:51 -0800 1577) 	 * there are any such pages, we'll need to wait for them.
4f8ad655dbc82 (Jan Kara              2012-05-03 14:48:00 +0200 1578) 	 */
0ae45f63d4ef8 (Theodore Ts'o         2015-02-02 00:37:00 -0500 1579) 	if (!(inode->i_state & I_DIRTY_ALL) &&
f9b0e058cbd04 (Jan Kara              2013-12-14 04:21:26 +0800 1580) 	    (wbc->sync_mode != WB_SYNC_ALL ||
f9b0e058cbd04 (Jan Kara              2013-12-14 04:21:26 +0800 1581) 	     !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK)))
4f8ad655dbc82 (Jan Kara              2012-05-03 14:48:00 +0200 1582) 		goto out;
4f8ad655dbc82 (Jan Kara              2012-05-03 14:48:00 +0200 1583) 	inode->i_state |= I_SYNC;
b16b1deb553ad (Tejun Heo             2015-06-02 08:39:48 -0600 1584) 	wbc_attach_and_unlock_inode(wbc, inode);
4f8ad655dbc82 (Jan Kara              2012-05-03 14:48:00 +0200 1585) 
cd8ed2a45a401 (Yan Hong              2012-10-08 16:33:45 -0700 1586) 	ret = __writeback_single_inode(inode, wbc);
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 1587) 
b16b1deb553ad (Tejun Heo             2015-06-02 08:39:48 -0600 1588) 	wbc_detach_inode(wbc);
aaf2559332ba2 (Tejun Heo             2016-03-18 13:52:04 -0400 1589) 
aaf2559332ba2 (Tejun Heo             2016-03-18 13:52:04 -0400 1590) 	wb = inode_to_wb_and_lock_list(inode);
250df6ed274d7 (Dave Chinner          2011-03-22 22:23:36 +1100 1591) 	spin_lock(&inode->i_lock);
4f8ad655dbc82 (Jan Kara              2012-05-03 14:48:00 +0200 1592) 	/*
da0c4c60d8c7c (Eric Biggers          2021-01-12 11:02:51 -0800 1593) 	 * If the inode is now fully clean, then it can be safely removed from
da0c4c60d8c7c (Eric Biggers          2021-01-12 11:02:51 -0800 1594) 	 * its writeback list (if any).  Otherwise the flusher threads are
da0c4c60d8c7c (Eric Biggers          2021-01-12 11:02:51 -0800 1595) 	 * responsible for the writeback lists.
4f8ad655dbc82 (Jan Kara              2012-05-03 14:48:00 +0200 1596) 	 */
0ae45f63d4ef8 (Theodore Ts'o         2015-02-02 00:37:00 -0500 1597) 	if (!(inode->i_state & I_DIRTY_ALL))
c7f5408493aeb (Dave Chinner          2015-03-04 14:07:22 -0500 1598) 		inode_io_list_del_locked(inode, wb);
4f8ad655dbc82 (Jan Kara              2012-05-03 14:48:00 +0200 1599) 	spin_unlock(&wb->list_lock);
1c0eeaf569859 (Joern Engel           2007-10-16 23:30:44 -0700 1600) 	inode_sync_complete(inode);
4f8ad655dbc82 (Jan Kara              2012-05-03 14:48:00 +0200 1601) out:
4f8ad655dbc82 (Jan Kara              2012-05-03 14:48:00 +0200 1602) 	spin_unlock(&inode->i_lock);
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 1603) 	return ret;
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 1604) }
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 1605) 
a88a341a73be4 (Tejun Heo             2015-05-22 17:13:28 -0400 1606) static long writeback_chunk_size(struct bdi_writeback *wb,
1a12d8bd7b299 (Wu Fengguang          2010-08-29 13:28:09 -0600 1607) 				 struct wb_writeback_work *work)
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1608) {
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1609) 	long pages;
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1610) 
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1611) 	/*
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1612) 	 * WB_SYNC_ALL mode does livelock avoidance by syncing dirty
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1613) 	 * inodes/pages in one big loop. Setting wbc.nr_to_write=LONG_MAX
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1614) 	 * here avoids calling into writeback_inodes_wb() more than once.
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1615) 	 *
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1616) 	 * The intended call sequence for WB_SYNC_ALL writeback is:
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1617) 	 *
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1618) 	 *      wb_writeback()
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1619) 	 *          writeback_sb_inodes()       <== called only once
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1620) 	 *              write_cache_pages()     <== called once for each inode
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1621) 	 *                   (quickly) tag currently dirty pages
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1622) 	 *                   (maybe slowly) sync all tagged pages
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1623) 	 */
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1624) 	if (work->sync_mode == WB_SYNC_ALL || work->tagged_writepages)
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1625) 		pages = LONG_MAX;
1a12d8bd7b299 (Wu Fengguang          2010-08-29 13:28:09 -0600 1626) 	else {
a88a341a73be4 (Tejun Heo             2015-05-22 17:13:28 -0400 1627) 		pages = min(wb->avg_write_bandwidth / 2,
dcc25ae76eb7b (Tejun Heo             2015-05-22 18:23:22 -0400 1628) 			    global_wb_domain.dirty_limit / DIRTY_SCOPE);
1a12d8bd7b299 (Wu Fengguang          2010-08-29 13:28:09 -0600 1629) 		pages = min(pages, work->nr_pages);
1a12d8bd7b299 (Wu Fengguang          2010-08-29 13:28:09 -0600 1630) 		pages = round_down(pages + MIN_WRITEBACK_PAGES,
1a12d8bd7b299 (Wu Fengguang          2010-08-29 13:28:09 -0600 1631) 				   MIN_WRITEBACK_PAGES);
1a12d8bd7b299 (Wu Fengguang          2010-08-29 13:28:09 -0600 1632) 	}
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1633) 
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1634) 	return pages;
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1635) }
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1636) 
f11c9c5c259cb (Edward Shishkin       2010-03-11 14:09:47 -0800 1637) /*
f11c9c5c259cb (Edward Shishkin       2010-03-11 14:09:47 -0800 1638)  * Write a portion of b_io inodes which belong to @sb.
edadfb10ba35d (Christoph Hellwig     2010-06-10 12:07:54 +0200 1639)  *
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1640)  * Return the number of pages and/or inodes written.
0ba13fd19d39b (Linus Torvalds        2015-09-11 13:26:39 -0700 1641)  *
0ba13fd19d39b (Linus Torvalds        2015-09-11 13:26:39 -0700 1642)  * NOTE! This is called with wb->list_lock held, and will
0ba13fd19d39b (Linus Torvalds        2015-09-11 13:26:39 -0700 1643)  * unlock and relock that for each inode it ends up doing
0ba13fd19d39b (Linus Torvalds        2015-09-11 13:26:39 -0700 1644)  * IO for.
f11c9c5c259cb (Edward Shishkin       2010-03-11 14:09:47 -0800 1645)  */
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1646) static long writeback_sb_inodes(struct super_block *sb,
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1647) 				struct bdi_writeback *wb,
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1648) 				struct wb_writeback_work *work)
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 1649) {
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1650) 	struct writeback_control wbc = {
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1651) 		.sync_mode		= work->sync_mode,
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1652) 		.tagged_writepages	= work->tagged_writepages,
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1653) 		.for_kupdate		= work->for_kupdate,
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1654) 		.for_background		= work->for_background,
7747bd4bceb30 (Dave Chinner          2013-07-02 22:38:35 +1000 1655) 		.for_sync		= work->for_sync,
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1656) 		.range_cyclic		= work->range_cyclic,
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1657) 		.range_start		= 0,
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1658) 		.range_end		= LLONG_MAX,
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1659) 	};
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1660) 	unsigned long start_time = jiffies;
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1661) 	long write_chunk;
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1662) 	long wrote = 0;  /* count both pages and inodes */
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1663) 
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 1664) 	while (!list_empty(&wb->b_io)) {
7ccf19a8042e3 (Nicholas Piggin       2010-10-21 11:49:30 +1100 1665) 		struct inode *inode = wb_inode(wb->b_io.prev);
aaf2559332ba2 (Tejun Heo             2016-03-18 13:52:04 -0400 1666) 		struct bdi_writeback *tmp_wb;
edadfb10ba35d (Christoph Hellwig     2010-06-10 12:07:54 +0200 1667) 
edadfb10ba35d (Christoph Hellwig     2010-06-10 12:07:54 +0200 1668) 		if (inode->i_sb != sb) {
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1669) 			if (work->sb) {
edadfb10ba35d (Christoph Hellwig     2010-06-10 12:07:54 +0200 1670) 				/*
edadfb10ba35d (Christoph Hellwig     2010-06-10 12:07:54 +0200 1671) 				 * We only want to write back data for this
edadfb10ba35d (Christoph Hellwig     2010-06-10 12:07:54 +0200 1672) 				 * superblock, move all inodes not belonging
edadfb10ba35d (Christoph Hellwig     2010-06-10 12:07:54 +0200 1673) 				 * to it back onto the dirty list.
edadfb10ba35d (Christoph Hellwig     2010-06-10 12:07:54 +0200 1674) 				 */
f758eeabeb96f (Christoph Hellwig     2011-04-21 18:19:44 -0600 1675) 				redirty_tail(inode, wb);
edadfb10ba35d (Christoph Hellwig     2010-06-10 12:07:54 +0200 1676) 				continue;
edadfb10ba35d (Christoph Hellwig     2010-06-10 12:07:54 +0200 1677) 			}
edadfb10ba35d (Christoph Hellwig     2010-06-10 12:07:54 +0200 1678) 
edadfb10ba35d (Christoph Hellwig     2010-06-10 12:07:54 +0200 1679) 			/*
edadfb10ba35d (Christoph Hellwig     2010-06-10 12:07:54 +0200 1680) 			 * The inode belongs to a different superblock.
edadfb10ba35d (Christoph Hellwig     2010-06-10 12:07:54 +0200 1681) 			 * Bounce back to the caller to unpin this and
edadfb10ba35d (Christoph Hellwig     2010-06-10 12:07:54 +0200 1682) 			 * pin the next superblock.
edadfb10ba35d (Christoph Hellwig     2010-06-10 12:07:54 +0200 1683) 			 */
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1684) 			break;
edadfb10ba35d (Christoph Hellwig     2010-06-10 12:07:54 +0200 1685) 		}
edadfb10ba35d (Christoph Hellwig     2010-06-10 12:07:54 +0200 1686) 
9843b76aae802 (Christoph Hellwig     2010-10-24 19:40:46 +0200 1687) 		/*
331cbdeedeb2f (Wanpeng Li            2012-06-09 11:10:55 +0800 1688) 		 * Don't bother with new inodes or inodes being freed, first
331cbdeedeb2f (Wanpeng Li            2012-06-09 11:10:55 +0800 1689) 		 * kind does not need periodic writeout yet, and for the latter
9843b76aae802 (Christoph Hellwig     2010-10-24 19:40:46 +0200 1690) 		 * kind writeout is handled by the freer.
9843b76aae802 (Christoph Hellwig     2010-10-24 19:40:46 +0200 1691) 		 */
250df6ed274d7 (Dave Chinner          2011-03-22 22:23:36 +1100 1692) 		spin_lock(&inode->i_lock);
9843b76aae802 (Christoph Hellwig     2010-10-24 19:40:46 +0200 1693) 		if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
b35250c0816c7 (Jan Kara              2020-06-10 17:36:03 +0200 1694) 			redirty_tail_locked(inode, wb);
250df6ed274d7 (Dave Chinner          2011-03-22 22:23:36 +1100 1695) 			spin_unlock(&inode->i_lock);
7ef0d7377cb28 (Nicholas Piggin       2009-03-12 14:31:38 -0700 1696) 			continue;
7ef0d7377cb28 (Nicholas Piggin       2009-03-12 14:31:38 -0700 1697) 		}
cc1676d917f32 (Jan Kara              2012-05-03 14:47:56 +0200 1698) 		if ((inode->i_state & I_SYNC) && wbc.sync_mode != WB_SYNC_ALL) {
cc1676d917f32 (Jan Kara              2012-05-03 14:47:56 +0200 1699) 			/*
cc1676d917f32 (Jan Kara              2012-05-03 14:47:56 +0200 1700) 			 * If this inode is locked for writeback and we are not
cc1676d917f32 (Jan Kara              2012-05-03 14:47:56 +0200 1701) 			 * doing writeback-for-data-integrity, move it to
cc1676d917f32 (Jan Kara              2012-05-03 14:47:56 +0200 1702) 			 * b_more_io so that writeback can proceed with the
cc1676d917f32 (Jan Kara              2012-05-03 14:47:56 +0200 1703) 			 * other inodes on s_io.
cc1676d917f32 (Jan Kara              2012-05-03 14:47:56 +0200 1704) 			 *
cc1676d917f32 (Jan Kara              2012-05-03 14:47:56 +0200 1705) 			 * We'll have another go at writing back this inode
cc1676d917f32 (Jan Kara              2012-05-03 14:47:56 +0200 1706) 			 * when we completed a full scan of b_io.
cc1676d917f32 (Jan Kara              2012-05-03 14:47:56 +0200 1707) 			 */
cc1676d917f32 (Jan Kara              2012-05-03 14:47:56 +0200 1708) 			spin_unlock(&inode->i_lock);
cc1676d917f32 (Jan Kara              2012-05-03 14:47:56 +0200 1709) 			requeue_io(inode, wb);
cc1676d917f32 (Jan Kara              2012-05-03 14:47:56 +0200 1710) 			trace_writeback_sb_inodes_requeue(inode);
cc1676d917f32 (Jan Kara              2012-05-03 14:47:56 +0200 1711) 			continue;
cc1676d917f32 (Jan Kara              2012-05-03 14:47:56 +0200 1712) 		}
f0d07b7ffde75 (Jan Kara              2012-05-03 14:47:59 +0200 1713) 		spin_unlock(&wb->list_lock);
f0d07b7ffde75 (Jan Kara              2012-05-03 14:47:59 +0200 1714) 
4f8ad655dbc82 (Jan Kara              2012-05-03 14:48:00 +0200 1715) 		/*
4f8ad655dbc82 (Jan Kara              2012-05-03 14:48:00 +0200 1716) 		 * We already requeued the inode if it had I_SYNC set and we
4f8ad655dbc82 (Jan Kara              2012-05-03 14:48:00 +0200 1717) 		 * are doing WB_SYNC_NONE writeback. So this catches only the
4f8ad655dbc82 (Jan Kara              2012-05-03 14:48:00 +0200 1718) 		 * WB_SYNC_ALL case.
4f8ad655dbc82 (Jan Kara              2012-05-03 14:48:00 +0200 1719) 		 */
169ebd90131b2 (Jan Kara              2012-05-03 14:48:03 +0200 1720) 		if (inode->i_state & I_SYNC) {
169ebd90131b2 (Jan Kara              2012-05-03 14:48:03 +0200 1721) 			/* Wait for I_SYNC. This function drops i_lock... */
169ebd90131b2 (Jan Kara              2012-05-03 14:48:03 +0200 1722) 			inode_sleep_on_writeback(inode);
169ebd90131b2 (Jan Kara              2012-05-03 14:48:03 +0200 1723) 			/* Inode may be gone, start again */
ead188f9f930f (Jan Kara              2012-06-08 17:07:36 +0200 1724) 			spin_lock(&wb->list_lock);
169ebd90131b2 (Jan Kara              2012-05-03 14:48:03 +0200 1725) 			continue;
169ebd90131b2 (Jan Kara              2012-05-03 14:48:03 +0200 1726) 		}
4f8ad655dbc82 (Jan Kara              2012-05-03 14:48:00 +0200 1727) 		inode->i_state |= I_SYNC;
b16b1deb553ad (Tejun Heo             2015-06-02 08:39:48 -0600 1728) 		wbc_attach_and_unlock_inode(&wbc, inode);
169ebd90131b2 (Jan Kara              2012-05-03 14:48:03 +0200 1729) 
a88a341a73be4 (Tejun Heo             2015-05-22 17:13:28 -0400 1730) 		write_chunk = writeback_chunk_size(wb, work);
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1731) 		wbc.nr_to_write = write_chunk;
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1732) 		wbc.pages_skipped = 0;
250df6ed274d7 (Dave Chinner          2011-03-22 22:23:36 +1100 1733) 
169ebd90131b2 (Jan Kara              2012-05-03 14:48:03 +0200 1734) 		/*
169ebd90131b2 (Jan Kara              2012-05-03 14:48:03 +0200 1735) 		 * We use I_SYNC to pin the inode in memory. While it is set
169ebd90131b2 (Jan Kara              2012-05-03 14:48:03 +0200 1736) 		 * evict_inode() will wait so the inode cannot be freed.
169ebd90131b2 (Jan Kara              2012-05-03 14:48:03 +0200 1737) 		 */
cd8ed2a45a401 (Yan Hong              2012-10-08 16:33:45 -0700 1738) 		__writeback_single_inode(inode, &wbc);
250df6ed274d7 (Dave Chinner          2011-03-22 22:23:36 +1100 1739) 
b16b1deb553ad (Tejun Heo             2015-06-02 08:39:48 -0600 1740) 		wbc_detach_inode(&wbc);
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1741) 		work->nr_pages -= write_chunk - wbc.nr_to_write;
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1742) 		wrote += write_chunk - wbc.nr_to_write;
590dca3a71875 (Chris Mason           2015-09-18 13:35:08 -0400 1743) 
590dca3a71875 (Chris Mason           2015-09-18 13:35:08 -0400 1744) 		if (need_resched()) {
590dca3a71875 (Chris Mason           2015-09-18 13:35:08 -0400 1745) 			/*
590dca3a71875 (Chris Mason           2015-09-18 13:35:08 -0400 1746) 			 * We're trying to balance between building up a nice
590dca3a71875 (Chris Mason           2015-09-18 13:35:08 -0400 1747) 			 * long list of IOs to improve our merge rate, and
590dca3a71875 (Chris Mason           2015-09-18 13:35:08 -0400 1748) 			 * getting those IOs out quickly for anyone throttling
590dca3a71875 (Chris Mason           2015-09-18 13:35:08 -0400 1749) 			 * in balance_dirty_pages().  cond_resched() doesn't
590dca3a71875 (Chris Mason           2015-09-18 13:35:08 -0400 1750) 			 * unplug, so get our IOs out the door before we
590dca3a71875 (Chris Mason           2015-09-18 13:35:08 -0400 1751) 			 * give up the CPU.
590dca3a71875 (Chris Mason           2015-09-18 13:35:08 -0400 1752) 			 */
590dca3a71875 (Chris Mason           2015-09-18 13:35:08 -0400 1753) 			blk_flush_plug(current);
590dca3a71875 (Chris Mason           2015-09-18 13:35:08 -0400 1754) 			cond_resched();
590dca3a71875 (Chris Mason           2015-09-18 13:35:08 -0400 1755) 		}
590dca3a71875 (Chris Mason           2015-09-18 13:35:08 -0400 1756) 
aaf2559332ba2 (Tejun Heo             2016-03-18 13:52:04 -0400 1757) 		/*
aaf2559332ba2 (Tejun Heo             2016-03-18 13:52:04 -0400 1758) 		 * Requeue @inode if still dirty.  Be careful as @inode may
aaf2559332ba2 (Tejun Heo             2016-03-18 13:52:04 -0400 1759) 		 * have been switched to another wb in the meantime.
aaf2559332ba2 (Tejun Heo             2016-03-18 13:52:04 -0400 1760) 		 */
aaf2559332ba2 (Tejun Heo             2016-03-18 13:52:04 -0400 1761) 		tmp_wb = inode_to_wb_and_lock_list(inode);
4f8ad655dbc82 (Jan Kara              2012-05-03 14:48:00 +0200 1762) 		spin_lock(&inode->i_lock);
0ae45f63d4ef8 (Theodore Ts'o         2015-02-02 00:37:00 -0500 1763) 		if (!(inode->i_state & I_DIRTY_ALL))
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1764) 			wrote++;
aaf2559332ba2 (Tejun Heo             2016-03-18 13:52:04 -0400 1765) 		requeue_inode(inode, tmp_wb, &wbc);
4f8ad655dbc82 (Jan Kara              2012-05-03 14:48:00 +0200 1766) 		inode_sync_complete(inode);
0f1b1fd86f6fd (Dave Chinner          2011-03-22 22:23:43 +1100 1767) 		spin_unlock(&inode->i_lock);
590dca3a71875 (Chris Mason           2015-09-18 13:35:08 -0400 1768) 
aaf2559332ba2 (Tejun Heo             2016-03-18 13:52:04 -0400 1769) 		if (unlikely(tmp_wb != wb)) {
aaf2559332ba2 (Tejun Heo             2016-03-18 13:52:04 -0400 1770) 			spin_unlock(&tmp_wb->list_lock);
aaf2559332ba2 (Tejun Heo             2016-03-18 13:52:04 -0400 1771) 			spin_lock(&wb->list_lock);
aaf2559332ba2 (Tejun Heo             2016-03-18 13:52:04 -0400 1772) 		}
aaf2559332ba2 (Tejun Heo             2016-03-18 13:52:04 -0400 1773) 
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1774) 		/*
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1775) 		 * bail out to wb_writeback() often enough to check
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1776) 		 * background threshold and other termination conditions.
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1777) 		 */
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1778) 		if (wrote) {
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1779) 			if (time_is_before_jiffies(start_time + HZ / 10UL))
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1780) 				break;
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1781) 			if (work->nr_pages <= 0)
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1782) 				break;
8bc3be2751b4f (Fengguang Wu          2008-02-04 22:29:36 -0800 1783) 		}
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 1784) 	}
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1785) 	return wrote;
f11c9c5c259cb (Edward Shishkin       2010-03-11 14:09:47 -0800 1786) }
f11c9c5c259cb (Edward Shishkin       2010-03-11 14:09:47 -0800 1787) 
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1788) static long __writeback_inodes_wb(struct bdi_writeback *wb,
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1789) 				  struct wb_writeback_work *work)
f11c9c5c259cb (Edward Shishkin       2010-03-11 14:09:47 -0800 1790) {
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1791) 	unsigned long start_time = jiffies;
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1792) 	long wrote = 0;
38f2197766312 (Nicholas Piggin       2009-01-06 14:40:25 -0800 1793) 
f11c9c5c259cb (Edward Shishkin       2010-03-11 14:09:47 -0800 1794) 	while (!list_empty(&wb->b_io)) {
7ccf19a8042e3 (Nicholas Piggin       2010-10-21 11:49:30 +1100 1795) 		struct inode *inode = wb_inode(wb->b_io.prev);
f11c9c5c259cb (Edward Shishkin       2010-03-11 14:09:47 -0800 1796) 		struct super_block *sb = inode->i_sb;
9ecc2738ac237 (Jens Axboe            2009-09-24 15:25:11 +0200 1797) 
eb6ef3df4faa5 (Konstantin Khlebnikov 2015-02-19 20:19:35 +0300 1798) 		if (!trylock_super(sb)) {
0e995816f4fb6 (Wu Fengguang          2011-07-29 22:14:35 -0600 1799) 			/*
eb6ef3df4faa5 (Konstantin Khlebnikov 2015-02-19 20:19:35 +0300 1800) 			 * trylock_super() may fail consistently due to
0e995816f4fb6 (Wu Fengguang          2011-07-29 22:14:35 -0600 1801) 			 * s_umount being grabbed by someone else. Don't use
0e995816f4fb6 (Wu Fengguang          2011-07-29 22:14:35 -0600 1802) 			 * requeue_io() to avoid busy retrying the inode/sb.
0e995816f4fb6 (Wu Fengguang          2011-07-29 22:14:35 -0600 1803) 			 */
0e995816f4fb6 (Wu Fengguang          2011-07-29 22:14:35 -0600 1804) 			redirty_tail(inode, wb);
edadfb10ba35d (Christoph Hellwig     2010-06-10 12:07:54 +0200 1805) 			continue;
f11c9c5c259cb (Edward Shishkin       2010-03-11 14:09:47 -0800 1806) 		}
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1807) 		wrote += writeback_sb_inodes(sb, wb, work);
eb6ef3df4faa5 (Konstantin Khlebnikov 2015-02-19 20:19:35 +0300 1808) 		up_read(&sb->s_umount);
f11c9c5c259cb (Edward Shishkin       2010-03-11 14:09:47 -0800 1809) 
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1810) 		/* refer to the same tests at the end of writeback_sb_inodes */
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1811) 		if (wrote) {
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1812) 			if (time_is_before_jiffies(start_time + HZ / 10UL))
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1813) 				break;
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1814) 			if (work->nr_pages <= 0)
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1815) 				break;
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1816) 		}
f11c9c5c259cb (Edward Shishkin       2010-03-11 14:09:47 -0800 1817) 	}
66f3b8e2e103a (Jens Axboe            2009-09-02 09:19:46 +0200 1818) 	/* Leave any unwritten inodes on b_io */
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1819) 	return wrote;
66f3b8e2e103a (Jens Axboe            2009-09-02 09:19:46 +0200 1820) }
66f3b8e2e103a (Jens Axboe            2009-09-02 09:19:46 +0200 1821) 
7d9f073b8da45 (Wanpeng Li            2013-09-11 14:22:40 -0700 1822) static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
0e175a1835ffc (Curt Wohlgemuth       2011-10-07 21:54:10 -0600 1823) 				enum wb_reason reason)
edadfb10ba35d (Christoph Hellwig     2010-06-10 12:07:54 +0200 1824) {
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1825) 	struct wb_writeback_work work = {
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1826) 		.nr_pages	= nr_pages,
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1827) 		.sync_mode	= WB_SYNC_NONE,
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1828) 		.range_cyclic	= 1,
0e175a1835ffc (Curt Wohlgemuth       2011-10-07 21:54:10 -0600 1829) 		.reason		= reason,
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1830) 	};
505a666ee3fc6 (Linus Torvalds        2015-09-11 13:37:19 -0700 1831) 	struct blk_plug plug;
edadfb10ba35d (Christoph Hellwig     2010-06-10 12:07:54 +0200 1832) 
505a666ee3fc6 (Linus Torvalds        2015-09-11 13:37:19 -0700 1833) 	blk_start_plug(&plug);
f758eeabeb96f (Christoph Hellwig     2011-04-21 18:19:44 -0600 1834) 	spin_lock(&wb->list_lock);
424b351fe1901 (Wu Fengguang          2010-07-21 20:11:53 -0600 1835) 	if (list_empty(&wb->b_io))
f9cae926f35e8 (Jan Kara              2020-05-29 16:08:58 +0200 1836) 		queue_io(wb, &work, jiffies);
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1837) 	__writeback_inodes_wb(wb, &work);
f758eeabeb96f (Christoph Hellwig     2011-04-21 18:19:44 -0600 1838) 	spin_unlock(&wb->list_lock);
505a666ee3fc6 (Linus Torvalds        2015-09-11 13:37:19 -0700 1839) 	blk_finish_plug(&plug);
edadfb10ba35d (Christoph Hellwig     2010-06-10 12:07:54 +0200 1840) 
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1841) 	return nr_pages - work.nr_pages;
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1842) }
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 1843) 
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 1844) /*
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 1845)  * Explicit flushing or periodic writeback of "old" data.
66f3b8e2e103a (Jens Axboe            2009-09-02 09:19:46 +0200 1846)  *
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 1847)  * Define "old": the first time one of an inode's pages is dirtied, we mark the
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 1848)  * dirtying-time in the inode's address_space.  So this periodic writeback code
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 1849)  * just walks the superblock inode list, writing back any inodes which are
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 1850)  * older than a specific point in time.
66f3b8e2e103a (Jens Axboe            2009-09-02 09:19:46 +0200 1851)  *
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 1852)  * Try to run once per dirty_writeback_interval.  But if a writeback event
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 1853)  * takes longer than a dirty_writeback_interval interval, then leave a
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 1854)  * one-second gap.
66f3b8e2e103a (Jens Axboe            2009-09-02 09:19:46 +0200 1855)  *
f9cae926f35e8 (Jan Kara              2020-05-29 16:08:58 +0200 1856)  * dirtied_before takes precedence over nr_to_write.  So we'll only write back
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 1857)  * all dirty pages if they are all attached to "old" mappings.
66f3b8e2e103a (Jens Axboe            2009-09-02 09:19:46 +0200 1858)  */
c4a77a6c7dcff (Jens Axboe            2009-09-16 15:18:25 +0200 1859) static long wb_writeback(struct bdi_writeback *wb,
83ba7b071f30f (Christoph Hellwig     2010-07-06 08:59:53 +0200 1860) 			 struct wb_writeback_work *work)
66f3b8e2e103a (Jens Axboe            2009-09-02 09:19:46 +0200 1861) {
e98be2d599207 (Wu Fengguang          2010-08-29 11:22:30 -0600 1862) 	unsigned long wb_start = jiffies;
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1863) 	long nr_pages = work->nr_pages;
f9cae926f35e8 (Jan Kara              2020-05-29 16:08:58 +0200 1864) 	unsigned long dirtied_before = jiffies;
a5989bdc981ec (Jan Kara              2009-09-16 19:22:48 +0200 1865) 	struct inode *inode;
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1866) 	long progress;
505a666ee3fc6 (Linus Torvalds        2015-09-11 13:37:19 -0700 1867) 	struct blk_plug plug;
66f3b8e2e103a (Jens Axboe            2009-09-02 09:19:46 +0200 1868) 
505a666ee3fc6 (Linus Torvalds        2015-09-11 13:37:19 -0700 1869) 	blk_start_plug(&plug);
e8dfc30582995 (Wu Fengguang          2011-04-21 12:06:32 -0600 1870) 	spin_lock(&wb->list_lock);
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 1871) 	for (;;) {
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 1872) 		/*
d3ddec7635b6f (Wu Fengguang          2009-09-23 20:33:40 +0800 1873) 		 * Stop writeback when nr_pages has been consumed
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 1874) 		 */
83ba7b071f30f (Christoph Hellwig     2010-07-06 08:59:53 +0200 1875) 		if (work->nr_pages <= 0)
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 1876) 			break;
66f3b8e2e103a (Jens Axboe            2009-09-02 09:19:46 +0200 1877) 
aa373cf550994 (Jan Kara              2011-01-13 15:45:47 -0800 1878) 		/*
aa373cf550994 (Jan Kara              2011-01-13 15:45:47 -0800 1879) 		 * Background writeout and kupdate-style writeback may
aa373cf550994 (Jan Kara              2011-01-13 15:45:47 -0800 1880) 		 * run forever. Stop them if there is other work to do
aa373cf550994 (Jan Kara              2011-01-13 15:45:47 -0800 1881) 		 * so that e.g. sync can proceed. They'll be restarted
aa373cf550994 (Jan Kara              2011-01-13 15:45:47 -0800 1882) 		 * after the other works are all done.
aa373cf550994 (Jan Kara              2011-01-13 15:45:47 -0800 1883) 		 */
aa373cf550994 (Jan Kara              2011-01-13 15:45:47 -0800 1884) 		if ((work->for_background || work->for_kupdate) &&
f0054bb1e1f3b (Tejun Heo             2015-05-22 17:13:30 -0400 1885) 		    !list_empty(&wb->work_list))
aa373cf550994 (Jan Kara              2011-01-13 15:45:47 -0800 1886) 			break;
aa373cf550994 (Jan Kara              2011-01-13 15:45:47 -0800 1887) 
38f2197766312 (Nicholas Piggin       2009-01-06 14:40:25 -0800 1888) 		/*
d3ddec7635b6f (Wu Fengguang          2009-09-23 20:33:40 +0800 1889) 		 * For background writeout, stop when we are below the
d3ddec7635b6f (Wu Fengguang          2009-09-23 20:33:40 +0800 1890) 		 * background dirty threshold
38f2197766312 (Nicholas Piggin       2009-01-06 14:40:25 -0800 1891) 		 */
aa661bbe1e61c (Tejun Heo             2015-05-22 18:23:31 -0400 1892) 		if (work->for_background && !wb_over_bg_thresh(wb))
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 1893) 			break;
38f2197766312 (Nicholas Piggin       2009-01-06 14:40:25 -0800 1894) 
1bc36b6426ae4 (Jan Kara              2011-10-19 11:44:41 +0200 1895) 		/*
1bc36b6426ae4 (Jan Kara              2011-10-19 11:44:41 +0200 1896) 		 * Kupdate and background works are special and we want to
1bc36b6426ae4 (Jan Kara              2011-10-19 11:44:41 +0200 1897) 		 * include all inodes that need writing. Livelock avoidance is
1bc36b6426ae4 (Jan Kara              2011-10-19 11:44:41 +0200 1898) 		 * handled by these works yielding to any other work so we are
1bc36b6426ae4 (Jan Kara              2011-10-19 11:44:41 +0200 1899) 		 * safe.
1bc36b6426ae4 (Jan Kara              2011-10-19 11:44:41 +0200 1900) 		 */
ba9aa8399fda4 (Wu Fengguang          2010-07-21 20:32:30 -0600 1901) 		if (work->for_kupdate) {
f9cae926f35e8 (Jan Kara              2020-05-29 16:08:58 +0200 1902) 			dirtied_before = jiffies -
ba9aa8399fda4 (Wu Fengguang          2010-07-21 20:32:30 -0600 1903) 				msecs_to_jiffies(dirty_expire_interval * 10);
1bc36b6426ae4 (Jan Kara              2011-10-19 11:44:41 +0200 1904) 		} else if (work->for_background)
f9cae926f35e8 (Jan Kara              2020-05-29 16:08:58 +0200 1905) 			dirtied_before = jiffies;
028c2dd184c09 (Dave Chinner          2010-07-07 13:24:07 +1000 1906) 
5634cc2aa9aeb (Tejun Heo             2015-08-18 14:54:56 -0700 1907) 		trace_writeback_start(wb, work);
e8dfc30582995 (Wu Fengguang          2011-04-21 12:06:32 -0600 1908) 		if (list_empty(&wb->b_io))
f9cae926f35e8 (Jan Kara              2020-05-29 16:08:58 +0200 1909) 			queue_io(wb, work, dirtied_before);
83ba7b071f30f (Christoph Hellwig     2010-07-06 08:59:53 +0200 1910) 		if (work->sb)
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1911) 			progress = writeback_sb_inodes(work->sb, wb, work);
edadfb10ba35d (Christoph Hellwig     2010-06-10 12:07:54 +0200 1912) 		else
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1913) 			progress = __writeback_inodes_wb(wb, work);
5634cc2aa9aeb (Tejun Heo             2015-08-18 14:54:56 -0700 1914) 		trace_writeback_written(wb, work);
028c2dd184c09 (Dave Chinner          2010-07-07 13:24:07 +1000 1915) 
e98be2d599207 (Wu Fengguang          2010-08-29 11:22:30 -0600 1916) 		wb_update_bandwidth(wb, wb_start);
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 1917) 
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 1918) 		/*
e6fb6da2e1068 (Wu Fengguang          2010-07-22 10:23:44 -0600 1919) 		 * Did we write something? Try for more
e6fb6da2e1068 (Wu Fengguang          2010-07-22 10:23:44 -0600 1920) 		 *
e6fb6da2e1068 (Wu Fengguang          2010-07-22 10:23:44 -0600 1921) 		 * Dirty inodes are moved to b_io for writeback in batches.
e6fb6da2e1068 (Wu Fengguang          2010-07-22 10:23:44 -0600 1922) 		 * The completion of the current batch does not necessarily
e6fb6da2e1068 (Wu Fengguang          2010-07-22 10:23:44 -0600 1923) 		 * mean the overall work is done. So we keep looping as long
e6fb6da2e1068 (Wu Fengguang          2010-07-22 10:23:44 -0600 1924) 		 * as made some progress on cleaning pages or inodes.
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 1925) 		 */
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1926) 		if (progress)
71fd05a887e0f (Jens Axboe            2009-09-23 19:32:26 +0200 1927) 			continue;
71fd05a887e0f (Jens Axboe            2009-09-23 19:32:26 +0200 1928) 		/*
e6fb6da2e1068 (Wu Fengguang          2010-07-22 10:23:44 -0600 1929) 		 * No more inodes for IO, bail
71fd05a887e0f (Jens Axboe            2009-09-23 19:32:26 +0200 1930) 		 */
b7a2441f9966f (Wu Fengguang          2010-07-21 22:19:51 -0600 1931) 		if (list_empty(&wb->b_more_io))
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 1932) 			break;
71fd05a887e0f (Jens Axboe            2009-09-23 19:32:26 +0200 1933) 		/*
71fd05a887e0f (Jens Axboe            2009-09-23 19:32:26 +0200 1934) 		 * Nothing written. Wait for some inode to
71fd05a887e0f (Jens Axboe            2009-09-23 19:32:26 +0200 1935) 		 * become available for writeback. Otherwise
71fd05a887e0f (Jens Axboe            2009-09-23 19:32:26 +0200 1936) 		 * we'll just busyloop.
71fd05a887e0f (Jens Axboe            2009-09-23 19:32:26 +0200 1937) 		 */
bace9248188f6 (Tahsin Erdogan        2016-12-12 16:43:20 -0800 1938) 		trace_writeback_wait(wb, work);
bace9248188f6 (Tahsin Erdogan        2016-12-12 16:43:20 -0800 1939) 		inode = wb_inode(wb->b_more_io.prev);
bace9248188f6 (Tahsin Erdogan        2016-12-12 16:43:20 -0800 1940) 		spin_lock(&inode->i_lock);
bace9248188f6 (Tahsin Erdogan        2016-12-12 16:43:20 -0800 1941) 		spin_unlock(&wb->list_lock);
bace9248188f6 (Tahsin Erdogan        2016-12-12 16:43:20 -0800 1942) 		/* This function drops i_lock... */
bace9248188f6 (Tahsin Erdogan        2016-12-12 16:43:20 -0800 1943) 		inode_sleep_on_writeback(inode);
bace9248188f6 (Tahsin Erdogan        2016-12-12 16:43:20 -0800 1944) 		spin_lock(&wb->list_lock);
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 1945) 	}
e8dfc30582995 (Wu Fengguang          2011-04-21 12:06:32 -0600 1946) 	spin_unlock(&wb->list_lock);
505a666ee3fc6 (Linus Torvalds        2015-09-11 13:37:19 -0700 1947) 	blk_finish_plug(&plug);
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 1948) 
d46db3d58233b (Wu Fengguang          2011-05-04 19:54:37 -0600 1949) 	return nr_pages - work->nr_pages;
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 1950) }
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 1951) 
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 1952) /*
83ba7b071f30f (Christoph Hellwig     2010-07-06 08:59:53 +0200 1953)  * Return the next wb_writeback_work struct that hasn't been processed yet.
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 1954)  */
f0054bb1e1f3b (Tejun Heo             2015-05-22 17:13:30 -0400 1955) static struct wb_writeback_work *get_next_work_item(struct bdi_writeback *wb)
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 1956) {
83ba7b071f30f (Christoph Hellwig     2010-07-06 08:59:53 +0200 1957) 	struct wb_writeback_work *work = NULL;
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 1958) 
f0054bb1e1f3b (Tejun Heo             2015-05-22 17:13:30 -0400 1959) 	spin_lock_bh(&wb->work_lock);
f0054bb1e1f3b (Tejun Heo             2015-05-22 17:13:30 -0400 1960) 	if (!list_empty(&wb->work_list)) {
f0054bb1e1f3b (Tejun Heo             2015-05-22 17:13:30 -0400 1961) 		work = list_entry(wb->work_list.next,
83ba7b071f30f (Christoph Hellwig     2010-07-06 08:59:53 +0200 1962) 				  struct wb_writeback_work, list);
83ba7b071f30f (Christoph Hellwig     2010-07-06 08:59:53 +0200 1963) 		list_del_init(&work->list);
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 1964) 	}
f0054bb1e1f3b (Tejun Heo             2015-05-22 17:13:30 -0400 1965) 	spin_unlock_bh(&wb->work_lock);
83ba7b071f30f (Christoph Hellwig     2010-07-06 08:59:53 +0200 1966) 	return work;
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 1967) }
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 1968) 
6585027a5e8cb (Jan Kara              2011-01-13 15:45:44 -0800 1969) static long wb_check_background_flush(struct bdi_writeback *wb)
6585027a5e8cb (Jan Kara              2011-01-13 15:45:44 -0800 1970) {
aa661bbe1e61c (Tejun Heo             2015-05-22 18:23:31 -0400 1971) 	if (wb_over_bg_thresh(wb)) {
6585027a5e8cb (Jan Kara              2011-01-13 15:45:44 -0800 1972) 
6585027a5e8cb (Jan Kara              2011-01-13 15:45:44 -0800 1973) 		struct wb_writeback_work work = {
6585027a5e8cb (Jan Kara              2011-01-13 15:45:44 -0800 1974) 			.nr_pages	= LONG_MAX,
6585027a5e8cb (Jan Kara              2011-01-13 15:45:44 -0800 1975) 			.sync_mode	= WB_SYNC_NONE,
6585027a5e8cb (Jan Kara              2011-01-13 15:45:44 -0800 1976) 			.for_background	= 1,
6585027a5e8cb (Jan Kara              2011-01-13 15:45:44 -0800 1977) 			.range_cyclic	= 1,
0e175a1835ffc (Curt Wohlgemuth       2011-10-07 21:54:10 -0600 1978) 			.reason		= WB_REASON_BACKGROUND,
6585027a5e8cb (Jan Kara              2011-01-13 15:45:44 -0800 1979) 		};
6585027a5e8cb (Jan Kara              2011-01-13 15:45:44 -0800 1980) 
6585027a5e8cb (Jan Kara              2011-01-13 15:45:44 -0800 1981) 		return wb_writeback(wb, &work);
6585027a5e8cb (Jan Kara              2011-01-13 15:45:44 -0800 1982) 	}
6585027a5e8cb (Jan Kara              2011-01-13 15:45:44 -0800 1983) 
6585027a5e8cb (Jan Kara              2011-01-13 15:45:44 -0800 1984) 	return 0;
6585027a5e8cb (Jan Kara              2011-01-13 15:45:44 -0800 1985) }
6585027a5e8cb (Jan Kara              2011-01-13 15:45:44 -0800 1986) 
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 1987) static long wb_check_old_data_flush(struct bdi_writeback *wb)
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 1988) {
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 1989) 	unsigned long expired;
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 1990) 	long nr_pages;
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 1991) 
69b62d01ec44f (Jens Axboe            2010-05-17 12:51:03 +0200 1992) 	/*
69b62d01ec44f (Jens Axboe            2010-05-17 12:51:03 +0200 1993) 	 * When set to zero, disable periodic writeback
69b62d01ec44f (Jens Axboe            2010-05-17 12:51:03 +0200 1994) 	 */
69b62d01ec44f (Jens Axboe            2010-05-17 12:51:03 +0200 1995) 	if (!dirty_writeback_interval)
69b62d01ec44f (Jens Axboe            2010-05-17 12:51:03 +0200 1996) 		return 0;
69b62d01ec44f (Jens Axboe            2010-05-17 12:51:03 +0200 1997) 
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 1998) 	expired = wb->last_old_flush +
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 1999) 			msecs_to_jiffies(dirty_writeback_interval * 10);
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2000) 	if (time_before(jiffies, expired))
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2001) 		return 0;
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2002) 
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2003) 	wb->last_old_flush = jiffies;
cdf01dd5443d0 (Linus Torvalds        2010-10-30 08:55:52 -0700 2004) 	nr_pages = get_nr_dirty_pages();
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2005) 
c4a77a6c7dcff (Jens Axboe            2009-09-16 15:18:25 +0200 2006) 	if (nr_pages) {
83ba7b071f30f (Christoph Hellwig     2010-07-06 08:59:53 +0200 2007) 		struct wb_writeback_work work = {
c4a77a6c7dcff (Jens Axboe            2009-09-16 15:18:25 +0200 2008) 			.nr_pages	= nr_pages,
c4a77a6c7dcff (Jens Axboe            2009-09-16 15:18:25 +0200 2009) 			.sync_mode	= WB_SYNC_NONE,
c4a77a6c7dcff (Jens Axboe            2009-09-16 15:18:25 +0200 2010) 			.for_kupdate	= 1,
c4a77a6c7dcff (Jens Axboe            2009-09-16 15:18:25 +0200 2011) 			.range_cyclic	= 1,
0e175a1835ffc (Curt Wohlgemuth       2011-10-07 21:54:10 -0600 2012) 			.reason		= WB_REASON_PERIODIC,
c4a77a6c7dcff (Jens Axboe            2009-09-16 15:18:25 +0200 2013) 		};
c4a77a6c7dcff (Jens Axboe            2009-09-16 15:18:25 +0200 2014) 
83ba7b071f30f (Christoph Hellwig     2010-07-06 08:59:53 +0200 2015) 		return wb_writeback(wb, &work);
c4a77a6c7dcff (Jens Axboe            2009-09-16 15:18:25 +0200 2016) 	}
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2017) 
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2018) 	return 0;
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2019) }
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2020) 
85009b4f5f039 (Jens Axboe            2017-09-30 02:09:06 -0600 2021) static long wb_check_start_all(struct bdi_writeback *wb)
85009b4f5f039 (Jens Axboe            2017-09-30 02:09:06 -0600 2022) {
85009b4f5f039 (Jens Axboe            2017-09-30 02:09:06 -0600 2023) 	long nr_pages;
85009b4f5f039 (Jens Axboe            2017-09-30 02:09:06 -0600 2024) 
85009b4f5f039 (Jens Axboe            2017-09-30 02:09:06 -0600 2025) 	if (!test_bit(WB_start_all, &wb->state))
85009b4f5f039 (Jens Axboe            2017-09-30 02:09:06 -0600 2026) 		return 0;
85009b4f5f039 (Jens Axboe            2017-09-30 02:09:06 -0600 2027) 
85009b4f5f039 (Jens Axboe            2017-09-30 02:09:06 -0600 2028) 	nr_pages = get_nr_dirty_pages();
85009b4f5f039 (Jens Axboe            2017-09-30 02:09:06 -0600 2029) 	if (nr_pages) {
85009b4f5f039 (Jens Axboe            2017-09-30 02:09:06 -0600 2030) 		struct wb_writeback_work work = {
85009b4f5f039 (Jens Axboe            2017-09-30 02:09:06 -0600 2031) 			.nr_pages	= wb_split_bdi_pages(wb, nr_pages),
85009b4f5f039 (Jens Axboe            2017-09-30 02:09:06 -0600 2032) 			.sync_mode	= WB_SYNC_NONE,
85009b4f5f039 (Jens Axboe            2017-09-30 02:09:06 -0600 2033) 			.range_cyclic	= 1,
85009b4f5f039 (Jens Axboe            2017-09-30 02:09:06 -0600 2034) 			.reason		= wb->start_all_reason,
85009b4f5f039 (Jens Axboe            2017-09-30 02:09:06 -0600 2035) 		};
85009b4f5f039 (Jens Axboe            2017-09-30 02:09:06 -0600 2036) 
85009b4f5f039 (Jens Axboe            2017-09-30 02:09:06 -0600 2037) 		nr_pages = wb_writeback(wb, &work);
85009b4f5f039 (Jens Axboe            2017-09-30 02:09:06 -0600 2038) 	}
85009b4f5f039 (Jens Axboe            2017-09-30 02:09:06 -0600 2039) 
85009b4f5f039 (Jens Axboe            2017-09-30 02:09:06 -0600 2040) 	clear_bit(WB_start_all, &wb->state);
85009b4f5f039 (Jens Axboe            2017-09-30 02:09:06 -0600 2041) 	return nr_pages;
85009b4f5f039 (Jens Axboe            2017-09-30 02:09:06 -0600 2042) }
85009b4f5f039 (Jens Axboe            2017-09-30 02:09:06 -0600 2043) 
85009b4f5f039 (Jens Axboe            2017-09-30 02:09:06 -0600 2044) 
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2045) /*
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2046)  * Retrieve work items and do the writeback they describe
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2047)  */
25d130ba22362 (Wanpeng Li            2013-07-08 16:00:14 -0700 2048) static long wb_do_writeback(struct bdi_writeback *wb)
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2049) {
83ba7b071f30f (Christoph Hellwig     2010-07-06 08:59:53 +0200 2050) 	struct wb_writeback_work *work;
c4a77a6c7dcff (Jens Axboe            2009-09-16 15:18:25 +0200 2051) 	long wrote = 0;
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2052) 
4452226ea276e (Tejun Heo             2015-05-22 17:13:26 -0400 2053) 	set_bit(WB_writeback_running, &wb->state);
f0054bb1e1f3b (Tejun Heo             2015-05-22 17:13:30 -0400 2054) 	while ((work = get_next_work_item(wb)) != NULL) {
5634cc2aa9aeb (Tejun Heo             2015-08-18 14:54:56 -0700 2055) 		trace_writeback_exec(wb, work);
83ba7b071f30f (Christoph Hellwig     2010-07-06 08:59:53 +0200 2056) 		wrote += wb_writeback(wb, work);
4a3a485b1ed0e (Tahsin Erdogan        2017-03-10 12:09:49 -0800 2057) 		finish_writeback_work(wb, work);
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2058) 	}
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2059) 
85009b4f5f039 (Jens Axboe            2017-09-30 02:09:06 -0600 2060) 	/*
85009b4f5f039 (Jens Axboe            2017-09-30 02:09:06 -0600 2061) 	 * Check for a flush-everything request
85009b4f5f039 (Jens Axboe            2017-09-30 02:09:06 -0600 2062) 	 */
85009b4f5f039 (Jens Axboe            2017-09-30 02:09:06 -0600 2063) 	wrote += wb_check_start_all(wb);
85009b4f5f039 (Jens Axboe            2017-09-30 02:09:06 -0600 2064) 
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2065) 	/*
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2066) 	 * Check for periodic writeback, kupdated() style
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2067) 	 */
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2068) 	wrote += wb_check_old_data_flush(wb);
6585027a5e8cb (Jan Kara              2011-01-13 15:45:44 -0800 2069) 	wrote += wb_check_background_flush(wb);
4452226ea276e (Tejun Heo             2015-05-22 17:13:26 -0400 2070) 	clear_bit(WB_writeback_running, &wb->state);
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2071) 
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2072) 	return wrote;
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2073) }
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2074) 
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2075) /*
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2076)  * Handle writeback of dirty data for the device backed by this bdi. Also
839a8e8660b67 (Tejun Heo             2013-04-01 19:08:06 -0700 2077)  * reschedules periodically and does kupdated style flushing.
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2078)  */
f0054bb1e1f3b (Tejun Heo             2015-05-22 17:13:30 -0400 2079) void wb_workfn(struct work_struct *work)
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2080) {
839a8e8660b67 (Tejun Heo             2013-04-01 19:08:06 -0700 2081) 	struct bdi_writeback *wb = container_of(to_delayed_work(work),
839a8e8660b67 (Tejun Heo             2013-04-01 19:08:06 -0700 2082) 						struct bdi_writeback, dwork);
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2083) 	long pages_written;
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2084) 
68f23b89067fd (Theodore Ts'o         2020-01-30 22:11:04 -0800 2085) 	set_worker_desc("flush-%s", bdi_dev_name(wb->bdi));
766f9164193f6 (Peter Zijlstra        2010-10-26 14:22:45 -0700 2086) 	current->flags |= PF_SWAPWRITE;
455b2864686d3 (Dave Chinner          2010-07-07 13:24:06 +1000 2087) 
839a8e8660b67 (Tejun Heo             2013-04-01 19:08:06 -0700 2088) 	if (likely(!current_is_workqueue_rescuer() ||
4452226ea276e (Tejun Heo             2015-05-22 17:13:26 -0400 2089) 		   !test_bit(WB_registered, &wb->state))) {
6467716a37673 (Artem Bityutskiy      2010-07-25 14:29:22 +0300 2090) 		/*
f0054bb1e1f3b (Tejun Heo             2015-05-22 17:13:30 -0400 2091) 		 * The normal path.  Keep writing back @wb until its
839a8e8660b67 (Tejun Heo             2013-04-01 19:08:06 -0700 2092) 		 * work_list is empty.  Note that this path is also taken
f0054bb1e1f3b (Tejun Heo             2015-05-22 17:13:30 -0400 2093) 		 * if @wb is shutting down even when we're running off the
839a8e8660b67 (Tejun Heo             2013-04-01 19:08:06 -0700 2094) 		 * rescuer as work_list needs to be drained.
6467716a37673 (Artem Bityutskiy      2010-07-25 14:29:22 +0300 2095) 		 */
839a8e8660b67 (Tejun Heo             2013-04-01 19:08:06 -0700 2096) 		do {
25d130ba22362 (Wanpeng Li            2013-07-08 16:00:14 -0700 2097) 			pages_written = wb_do_writeback(wb);
839a8e8660b67 (Tejun Heo             2013-04-01 19:08:06 -0700 2098) 			trace_writeback_pages_written(pages_written);
f0054bb1e1f3b (Tejun Heo             2015-05-22 17:13:30 -0400 2099) 		} while (!list_empty(&wb->work_list));
839a8e8660b67 (Tejun Heo             2013-04-01 19:08:06 -0700 2100) 	} else {
839a8e8660b67 (Tejun Heo             2013-04-01 19:08:06 -0700 2101) 		/*
839a8e8660b67 (Tejun Heo             2013-04-01 19:08:06 -0700 2102) 		 * bdi_wq can't get enough workers and we're running off
839a8e8660b67 (Tejun Heo             2013-04-01 19:08:06 -0700 2103) 		 * the emergency worker.  Don't hog it.  Hopefully, 1024 is
839a8e8660b67 (Tejun Heo             2013-04-01 19:08:06 -0700 2104) 		 * enough for efficient IO.
839a8e8660b67 (Tejun Heo             2013-04-01 19:08:06 -0700 2105) 		 */
f0054bb1e1f3b (Tejun Heo             2015-05-22 17:13:30 -0400 2106) 		pages_written = writeback_inodes_wb(wb, 1024,
839a8e8660b67 (Tejun Heo             2013-04-01 19:08:06 -0700 2107) 						    WB_REASON_FORKER_THREAD);
455b2864686d3 (Dave Chinner          2010-07-07 13:24:06 +1000 2108) 		trace_writeback_pages_written(pages_written);
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2109) 	}
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2110) 
f0054bb1e1f3b (Tejun Heo             2015-05-22 17:13:30 -0400 2111) 	if (!list_empty(&wb->work_list))
b8b784958eccb (Jan Kara              2018-05-03 18:26:26 +0200 2112) 		wb_wakeup(wb);
6ca738d60c563 (Derek Basehore        2014-04-03 14:46:22 -0700 2113) 	else if (wb_has_dirty_io(wb) && dirty_writeback_interval)
f0054bb1e1f3b (Tejun Heo             2015-05-22 17:13:30 -0400 2114) 		wb_wakeup_delayed(wb);
455b2864686d3 (Dave Chinner          2010-07-07 13:24:06 +1000 2115) 
839a8e8660b67 (Tejun Heo             2013-04-01 19:08:06 -0700 2116) 	current->flags &= ~PF_SWAPWRITE;
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2117) }
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2118) 
595043e5f9ef1 (Jens Axboe            2017-09-28 11:26:59 -0600 2119) /*
595043e5f9ef1 (Jens Axboe            2017-09-28 11:26:59 -0600 2120)  * Start writeback of `nr_pages' pages on this bdi. If `nr_pages' is zero,
595043e5f9ef1 (Jens Axboe            2017-09-28 11:26:59 -0600 2121)  * write back the whole world.
595043e5f9ef1 (Jens Axboe            2017-09-28 11:26:59 -0600 2122)  */
595043e5f9ef1 (Jens Axboe            2017-09-28 11:26:59 -0600 2123) static void __wakeup_flusher_threads_bdi(struct backing_dev_info *bdi,
e8e8a0c6c9bfc (Jens Axboe            2017-09-28 11:31:22 -0600 2124) 					 enum wb_reason reason)
595043e5f9ef1 (Jens Axboe            2017-09-28 11:26:59 -0600 2125) {
595043e5f9ef1 (Jens Axboe            2017-09-28 11:26:59 -0600 2126) 	struct bdi_writeback *wb;
595043e5f9ef1 (Jens Axboe            2017-09-28 11:26:59 -0600 2127) 
595043e5f9ef1 (Jens Axboe            2017-09-28 11:26:59 -0600 2128) 	if (!bdi_has_dirty_io(bdi))
595043e5f9ef1 (Jens Axboe            2017-09-28 11:26:59 -0600 2129) 		return;
595043e5f9ef1 (Jens Axboe            2017-09-28 11:26:59 -0600 2130) 
595043e5f9ef1 (Jens Axboe            2017-09-28 11:26:59 -0600 2131) 	list_for_each_entry_rcu(wb, &bdi->wb_list, bdi_node)
e8e8a0c6c9bfc (Jens Axboe            2017-09-28 11:31:22 -0600 2132) 		wb_start_writeback(wb, reason);
595043e5f9ef1 (Jens Axboe            2017-09-28 11:26:59 -0600 2133) }
595043e5f9ef1 (Jens Axboe            2017-09-28 11:26:59 -0600 2134) 
595043e5f9ef1 (Jens Axboe            2017-09-28 11:26:59 -0600 2135) void wakeup_flusher_threads_bdi(struct backing_dev_info *bdi,
595043e5f9ef1 (Jens Axboe            2017-09-28 11:26:59 -0600 2136) 				enum wb_reason reason)
595043e5f9ef1 (Jens Axboe            2017-09-28 11:26:59 -0600 2137) {
595043e5f9ef1 (Jens Axboe            2017-09-28 11:26:59 -0600 2138) 	rcu_read_lock();
e8e8a0c6c9bfc (Jens Axboe            2017-09-28 11:31:22 -0600 2139) 	__wakeup_flusher_threads_bdi(bdi, reason);
595043e5f9ef1 (Jens Axboe            2017-09-28 11:26:59 -0600 2140) 	rcu_read_unlock();
595043e5f9ef1 (Jens Axboe            2017-09-28 11:26:59 -0600 2141) }
595043e5f9ef1 (Jens Axboe            2017-09-28 11:26:59 -0600 2142) 
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2143) /*
9ba4b2dfafaa7 (Jens Axboe            2017-09-20 08:58:25 -0600 2144)  * Wakeup the flusher threads to start writeback of all currently dirty pages
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2145)  */
9ba4b2dfafaa7 (Jens Axboe            2017-09-20 08:58:25 -0600 2146) void wakeup_flusher_threads(enum wb_reason reason)
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2147) {
b8c2f3474f107 (Christoph Hellwig     2010-06-08 18:15:07 +0200 2148) 	struct backing_dev_info *bdi;
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2149) 
51350ea0d7f35 (Konstantin Khlebnikov 2016-08-04 21:36:05 +0300 2150) 	/*
51350ea0d7f35 (Konstantin Khlebnikov 2016-08-04 21:36:05 +0300 2151) 	 * If we are expecting writeback progress we must submit plugged IO.
51350ea0d7f35 (Konstantin Khlebnikov 2016-08-04 21:36:05 +0300 2152) 	 */
51350ea0d7f35 (Konstantin Khlebnikov 2016-08-04 21:36:05 +0300 2153) 	if (blk_needs_flush_plug(current))
51350ea0d7f35 (Konstantin Khlebnikov 2016-08-04 21:36:05 +0300 2154) 		blk_schedule_flush_plug(current);
51350ea0d7f35 (Konstantin Khlebnikov 2016-08-04 21:36:05 +0300 2155) 
b8c2f3474f107 (Christoph Hellwig     2010-06-08 18:15:07 +0200 2156) 	rcu_read_lock();
595043e5f9ef1 (Jens Axboe            2017-09-28 11:26:59 -0600 2157) 	list_for_each_entry_rcu(bdi, &bdi_list, bdi_list)
e8e8a0c6c9bfc (Jens Axboe            2017-09-28 11:31:22 -0600 2158) 		__wakeup_flusher_threads_bdi(bdi, reason);
cfc4ba5365449 (Jens Axboe            2009-09-14 13:12:40 +0200 2159) 	rcu_read_unlock();
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 2160) }
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 2161) 
a2f4870697a5b (Theodore Ts'o         2015-03-17 12:23:19 -0400 2162) /*
a2f4870697a5b (Theodore Ts'o         2015-03-17 12:23:19 -0400 2163)  * Wake up bdi's periodically to make sure dirtytime inodes gets
a2f4870697a5b (Theodore Ts'o         2015-03-17 12:23:19 -0400 2164)  * written back periodically.  We deliberately do *not* check the
a2f4870697a5b (Theodore Ts'o         2015-03-17 12:23:19 -0400 2165)  * b_dirtytime list in wb_has_dirty_io(), since this would cause the
a2f4870697a5b (Theodore Ts'o         2015-03-17 12:23:19 -0400 2166)  * kernel to be constantly waking up once there are any dirtytime
a2f4870697a5b (Theodore Ts'o         2015-03-17 12:23:19 -0400 2167)  * inodes on the system.  So instead we define a separate delayed work
a2f4870697a5b (Theodore Ts'o         2015-03-17 12:23:19 -0400 2168)  * function which gets called much more rarely.  (By default, only
a2f4870697a5b (Theodore Ts'o         2015-03-17 12:23:19 -0400 2169)  * once every 12 hours.)
a2f4870697a5b (Theodore Ts'o         2015-03-17 12:23:19 -0400 2170)  *
a2f4870697a5b (Theodore Ts'o         2015-03-17 12:23:19 -0400 2171)  * If there is any other write activity going on in the file system,
a2f4870697a5b (Theodore Ts'o         2015-03-17 12:23:19 -0400 2172)  * this function won't be necessary.  But if the only thing that has
a2f4870697a5b (Theodore Ts'o         2015-03-17 12:23:19 -0400 2173)  * happened on the file system is a dirtytime inode caused by an atime
a2f4870697a5b (Theodore Ts'o         2015-03-17 12:23:19 -0400 2174)  * update, we need this infrastructure below to make sure that inode
a2f4870697a5b (Theodore Ts'o         2015-03-17 12:23:19 -0400 2175)  * eventually gets pushed out to disk.
a2f4870697a5b (Theodore Ts'o         2015-03-17 12:23:19 -0400 2176)  */
a2f4870697a5b (Theodore Ts'o         2015-03-17 12:23:19 -0400 2177) static void wakeup_dirtytime_writeback(struct work_struct *w);
a2f4870697a5b (Theodore Ts'o         2015-03-17 12:23:19 -0400 2178) static DECLARE_DELAYED_WORK(dirtytime_work, wakeup_dirtytime_writeback);
a2f4870697a5b (Theodore Ts'o         2015-03-17 12:23:19 -0400 2179) 
a2f4870697a5b (Theodore Ts'o         2015-03-17 12:23:19 -0400 2180) static void wakeup_dirtytime_writeback(struct work_struct *w)
a2f4870697a5b (Theodore Ts'o         2015-03-17 12:23:19 -0400 2181) {
a2f4870697a5b (Theodore Ts'o         2015-03-17 12:23:19 -0400 2182) 	struct backing_dev_info *bdi;
a2f4870697a5b (Theodore Ts'o         2015-03-17 12:23:19 -0400 2183) 
a2f4870697a5b (Theodore Ts'o         2015-03-17 12:23:19 -0400 2184) 	rcu_read_lock();
a2f4870697a5b (Theodore Ts'o         2015-03-17 12:23:19 -0400 2185) 	list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
001fe6f617b1a (Tejun Heo             2015-05-22 17:13:56 -0400 2186) 		struct bdi_writeback *wb;
001fe6f617b1a (Tejun Heo             2015-05-22 17:13:56 -0400 2187) 
b817525a4a80c (Tejun Heo             2015-10-02 14:47:05 -0400 2188) 		list_for_each_entry_rcu(wb, &bdi->wb_list, bdi_node)
6fdf860f15d4a (Tejun Heo             2015-09-29 12:47:51 -0400 2189) 			if (!list_empty(&wb->b_dirty_time))
6fdf860f15d4a (Tejun Heo             2015-09-29 12:47:51 -0400 2190) 				wb_wakeup(wb);
a2f4870697a5b (Theodore Ts'o         2015-03-17 12:23:19 -0400 2191) 	}
a2f4870697a5b (Theodore Ts'o         2015-03-17 12:23:19 -0400 2192) 	rcu_read_unlock();
a2f4870697a5b (Theodore Ts'o         2015-03-17 12:23:19 -0400 2193) 	schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);
a2f4870697a5b (Theodore Ts'o         2015-03-17 12:23:19 -0400 2194) }
a2f4870697a5b (Theodore Ts'o         2015-03-17 12:23:19 -0400 2195) 
a2f4870697a5b (Theodore Ts'o         2015-03-17 12:23:19 -0400 2196) static int __init start_dirtytime_writeback(void)
a2f4870697a5b (Theodore Ts'o         2015-03-17 12:23:19 -0400 2197) {
a2f4870697a5b (Theodore Ts'o         2015-03-17 12:23:19 -0400 2198) 	schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);
a2f4870697a5b (Theodore Ts'o         2015-03-17 12:23:19 -0400 2199) 	return 0;
a2f4870697a5b (Theodore Ts'o         2015-03-17 12:23:19 -0400 2200) }
a2f4870697a5b (Theodore Ts'o         2015-03-17 12:23:19 -0400 2201) __initcall(start_dirtytime_writeback);
a2f4870697a5b (Theodore Ts'o         2015-03-17 12:23:19 -0400 2202) 
1efff914afac8 (Theodore Ts'o         2015-03-17 12:23:32 -0400 2203) int dirtytime_interval_handler(struct ctl_table *table, int write,
9ca48e20ec5cb (Tobias Klauser        2020-09-18 21:20:39 -0700 2204) 			       void *buffer, size_t *lenp, loff_t *ppos)
1efff914afac8 (Theodore Ts'o         2015-03-17 12:23:32 -0400 2205) {
1efff914afac8 (Theodore Ts'o         2015-03-17 12:23:32 -0400 2206) 	int ret;
1efff914afac8 (Theodore Ts'o         2015-03-17 12:23:32 -0400 2207) 
1efff914afac8 (Theodore Ts'o         2015-03-17 12:23:32 -0400 2208) 	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
1efff914afac8 (Theodore Ts'o         2015-03-17 12:23:32 -0400 2209) 	if (ret == 0 && write)
1efff914afac8 (Theodore Ts'o         2015-03-17 12:23:32 -0400 2210) 		mod_delayed_work(system_wq, &dirtytime_work, 0);
1efff914afac8 (Theodore Ts'o         2015-03-17 12:23:32 -0400 2211) 	return ret;
1efff914afac8 (Theodore Ts'o         2015-03-17 12:23:32 -0400 2212) }
1efff914afac8 (Theodore Ts'o         2015-03-17 12:23:32 -0400 2213) 
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2214) /**
35d14f278e530 (Eric Biggers          2021-01-12 11:02:49 -0800 2215)  * __mark_inode_dirty -	internal function to mark an inode dirty
0117d4272b1ac (Mauro Carvalho Chehab 2017-05-12 07:45:42 -0300 2216)  *
0117d4272b1ac (Mauro Carvalho Chehab 2017-05-12 07:45:42 -0300 2217)  * @inode: inode to mark
35d14f278e530 (Eric Biggers          2021-01-12 11:02:49 -0800 2218)  * @flags: what kind of dirty, e.g. I_DIRTY_SYNC.  This can be a combination of
35d14f278e530 (Eric Biggers          2021-01-12 11:02:49 -0800 2219)  *	   multiple I_DIRTY_* flags, except that I_DIRTY_TIME can't be combined
35d14f278e530 (Eric Biggers          2021-01-12 11:02:49 -0800 2220)  *	   with I_DIRTY_PAGES.
0117d4272b1ac (Mauro Carvalho Chehab 2017-05-12 07:45:42 -0300 2221)  *
35d14f278e530 (Eric Biggers          2021-01-12 11:02:49 -0800 2222)  * Mark an inode as dirty.  We notify the filesystem, then update the inode's
35d14f278e530 (Eric Biggers          2021-01-12 11:02:49 -0800 2223)  * dirty flags.  Then, if needed we add the inode to the appropriate dirty list.
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 2224)  *
35d14f278e530 (Eric Biggers          2021-01-12 11:02:49 -0800 2225)  * Most callers should use mark_inode_dirty() or mark_inode_dirty_sync()
35d14f278e530 (Eric Biggers          2021-01-12 11:02:49 -0800 2226)  * instead of calling this directly.
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2227)  *
35d14f278e530 (Eric Biggers          2021-01-12 11:02:49 -0800 2228)  * CAREFUL!  We only add the inode to the dirty list if it is hashed or if it
35d14f278e530 (Eric Biggers          2021-01-12 11:02:49 -0800 2229)  * refers to a blockdev.  Unhashed inodes will never be added to the dirty list
35d14f278e530 (Eric Biggers          2021-01-12 11:02:49 -0800 2230)  * even if they are later hashed, as they will have been marked dirty already.
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2231)  *
35d14f278e530 (Eric Biggers          2021-01-12 11:02:49 -0800 2232)  * In short, ensure you hash any inodes _before_ you start marking them dirty.
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 2233)  *
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2234)  * Note that for blockdevs, inode->dirtied_when represents the dirtying time of
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2235)  * the block-special inode (/dev/hda1) itself.  And the ->dirtied_when field of
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2236)  * the kernel-internal blockdev inode represents the dirtying time of the
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2237)  * blockdev's pages.  This is why for I_DIRTY_PAGES we always use
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2238)  * page->mapping->host, so the page-dirtying time is recorded in the internal
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2239)  * blockdev inode.
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 2240)  */
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2241) void __mark_inode_dirty(struct inode *inode, int flags)
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 2242) {
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2243) 	struct super_block *sb = inode->i_sb;
35d14f278e530 (Eric Biggers          2021-01-12 11:02:49 -0800 2244) 	int dirtytime = 0;
0ae45f63d4ef8 (Theodore Ts'o         2015-02-02 00:37:00 -0500 2245) 
0ae45f63d4ef8 (Theodore Ts'o         2015-02-02 00:37:00 -0500 2246) 	trace_writeback_mark_inode_dirty(inode, flags);
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 2247) 
e2728c5621fd9 (Eric Biggers          2021-01-12 11:02:47 -0800 2248) 	if (flags & I_DIRTY_INODE) {
35d14f278e530 (Eric Biggers          2021-01-12 11:02:49 -0800 2249) 		/*
35d14f278e530 (Eric Biggers          2021-01-12 11:02:49 -0800 2250) 		 * Notify the filesystem about the inode being dirtied, so that
35d14f278e530 (Eric Biggers          2021-01-12 11:02:49 -0800 2251) 		 * (if needed) it can update on-disk fields and journal the
35d14f278e530 (Eric Biggers          2021-01-12 11:02:49 -0800 2252) 		 * inode.  This is only needed when the inode itself is being
35d14f278e530 (Eric Biggers          2021-01-12 11:02:49 -0800 2253) 		 * dirtied now.  I.e. it's only needed for I_DIRTY_INODE, not
35d14f278e530 (Eric Biggers          2021-01-12 11:02:49 -0800 2254) 		 * for just I_DIRTY_PAGES or I_DIRTY_TIME.
35d14f278e530 (Eric Biggers          2021-01-12 11:02:49 -0800 2255) 		 */
9fb0a7da0c528 (Tejun Heo             2013-01-11 13:06:37 -0800 2256) 		trace_writeback_dirty_inode_start(inode, flags);
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2257) 		if (sb->s_op->dirty_inode)
a38ed483a7267 (Eric Biggers          2021-01-12 11:02:48 -0800 2258) 			sb->s_op->dirty_inode(inode, flags & I_DIRTY_INODE);
9fb0a7da0c528 (Tejun Heo             2013-01-11 13:06:37 -0800 2259) 		trace_writeback_dirty_inode(inode, flags);
e2728c5621fd9 (Eric Biggers          2021-01-12 11:02:47 -0800 2260) 
35d14f278e530 (Eric Biggers          2021-01-12 11:02:49 -0800 2261) 		/* I_DIRTY_INODE supersedes I_DIRTY_TIME. */
0ae45f63d4ef8 (Theodore Ts'o         2015-02-02 00:37:00 -0500 2262) 		flags &= ~I_DIRTY_TIME;
35d14f278e530 (Eric Biggers          2021-01-12 11:02:49 -0800 2263) 	} else {
35d14f278e530 (Eric Biggers          2021-01-12 11:02:49 -0800 2264) 		/*
35d14f278e530 (Eric Biggers          2021-01-12 11:02:49 -0800 2265) 		 * Else it's either I_DIRTY_PAGES, I_DIRTY_TIME, or nothing.
35d14f278e530 (Eric Biggers          2021-01-12 11:02:49 -0800 2266) 		 * (We don't support setting both I_DIRTY_PAGES and I_DIRTY_TIME
35d14f278e530 (Eric Biggers          2021-01-12 11:02:49 -0800 2267) 		 * in one call to __mark_inode_dirty().)
35d14f278e530 (Eric Biggers          2021-01-12 11:02:49 -0800 2268) 		 */
35d14f278e530 (Eric Biggers          2021-01-12 11:02:49 -0800 2269) 		dirtytime = flags & I_DIRTY_TIME;
35d14f278e530 (Eric Biggers          2021-01-12 11:02:49 -0800 2270) 		WARN_ON_ONCE(dirtytime && flags != I_DIRTY_TIME);
e2728c5621fd9 (Eric Biggers          2021-01-12 11:02:47 -0800 2271) 	}
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2272) 
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2273) 	/*
9c6ac78eb3521 (Tejun Heo             2014-10-24 15:38:21 -0400 2274) 	 * Paired with smp_mb() in __writeback_single_inode() for the
9c6ac78eb3521 (Tejun Heo             2014-10-24 15:38:21 -0400 2275) 	 * following lockless i_state test.  See there for details.
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2276) 	 */
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2277) 	smp_mb();
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2278) 
0ae45f63d4ef8 (Theodore Ts'o         2015-02-02 00:37:00 -0500 2279) 	if (((inode->i_state & flags) == flags) ||
0ae45f63d4ef8 (Theodore Ts'o         2015-02-02 00:37:00 -0500 2280) 	    (dirtytime && (inode->i_state & I_DIRTY_INODE)))
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2281) 		return;
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2282) 
250df6ed274d7 (Dave Chinner          2011-03-22 22:23:36 +1100 2283) 	spin_lock(&inode->i_lock);
0ae45f63d4ef8 (Theodore Ts'o         2015-02-02 00:37:00 -0500 2284) 	if (dirtytime && (inode->i_state & I_DIRTY_INODE))
0ae45f63d4ef8 (Theodore Ts'o         2015-02-02 00:37:00 -0500 2285) 		goto out_unlock_inode;
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2286) 	if ((inode->i_state & flags) != flags) {
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2287) 		const int was_dirty = inode->i_state & I_DIRTY;
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2288) 
52ebea749aaed (Tejun Heo             2015-05-22 17:13:37 -0400 2289) 		inode_attach_wb(inode, NULL);
52ebea749aaed (Tejun Heo             2015-05-22 17:13:37 -0400 2290) 
35d14f278e530 (Eric Biggers          2021-01-12 11:02:49 -0800 2291) 		/* I_DIRTY_INODE supersedes I_DIRTY_TIME. */
0ae45f63d4ef8 (Theodore Ts'o         2015-02-02 00:37:00 -0500 2292) 		if (flags & I_DIRTY_INODE)
0ae45f63d4ef8 (Theodore Ts'o         2015-02-02 00:37:00 -0500 2293) 			inode->i_state &= ~I_DIRTY_TIME;
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2294) 		inode->i_state |= flags;
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2295) 
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2296) 		/*
5afced3bf2810 (Jan Kara              2020-05-29 15:05:22 +0200 2297) 		 * If the inode is queued for writeback by flush worker, just
5afced3bf2810 (Jan Kara              2020-05-29 15:05:22 +0200 2298) 		 * update its dirty state. Once the flush worker is done with
5afced3bf2810 (Jan Kara              2020-05-29 15:05:22 +0200 2299) 		 * the inode it will place it on the appropriate superblock
5afced3bf2810 (Jan Kara              2020-05-29 15:05:22 +0200 2300) 		 * list, based upon its state.
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2301) 		 */
5afced3bf2810 (Jan Kara              2020-05-29 15:05:22 +0200 2302) 		if (inode->i_state & I_SYNC_QUEUED)
250df6ed274d7 (Dave Chinner          2011-03-22 22:23:36 +1100 2303) 			goto out_unlock_inode;
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2304) 
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2305) 		/*
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2306) 		 * Only add valid (hashed) inodes to the superblock's
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2307) 		 * dirty list.  Add blockdev inodes as well.
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2308) 		 */
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2309) 		if (!S_ISBLK(inode->i_mode)) {
1d3382cbf0298 (Al Viro               2010-10-23 15:19:20 -0400 2310) 			if (inode_unhashed(inode))
250df6ed274d7 (Dave Chinner          2011-03-22 22:23:36 +1100 2311) 				goto out_unlock_inode;
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2312) 		}
a4ffdde6e56fd (Al Viro               2010-06-02 17:38:30 -0400 2313) 		if (inode->i_state & I_FREEING)
250df6ed274d7 (Dave Chinner          2011-03-22 22:23:36 +1100 2314) 			goto out_unlock_inode;
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2315) 
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2316) 		/*
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2317) 		 * If the inode was already on b_dirty/b_io/b_more_io, don't
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2318) 		 * reposition it (that would break b_dirty time-ordering).
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2319) 		 */
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2320) 		if (!was_dirty) {
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400 2321) 			struct bdi_writeback *wb;
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400 2322) 			struct list_head *dirty_list;
a66979abad090 (Dave Chinner          2011-03-22 22:23:41 +1100 2323) 			bool wakeup_bdi = false;
253c34e9b10c3 (Artem Bityutskiy      2010-07-25 14:29:21 +0300 2324) 
87e1d789bf55b (Tejun Heo             2015-05-28 14:50:52 -0400 2325) 			wb = locked_inode_to_wb_and_lock_list(inode);
253c34e9b10c3 (Artem Bityutskiy      2010-07-25 14:29:21 +0300 2326) 
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2327) 			inode->dirtied_when = jiffies;
a2f4870697a5b (Theodore Ts'o         2015-03-17 12:23:19 -0400 2328) 			if (dirtytime)
a2f4870697a5b (Theodore Ts'o         2015-03-17 12:23:19 -0400 2329) 				inode->dirtied_time_when = jiffies;
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400 2330) 
0e11f6443f522 (Christoph Hellwig     2018-02-21 07:54:49 -0800 2331) 			if (inode->i_state & I_DIRTY)
0747259d13feb (Tejun Heo             2015-05-22 17:14:02 -0400 2332) 				dirty_list = &wb->b_dirty;
a2f4870697a5b (Theodore Ts'o         2015-03-17 12:23:19 -0400 2333) 			else
0747259d13feb (Tejun Heo             2015-05-22 17:14:02 -0400 2334) 				dirty_list = &wb->b_dirty_time;
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400 2335) 
c7f5408493aeb (Dave Chinner          2015-03-04 14:07:22 -0500 2336) 			wakeup_bdi = inode_io_list_move_locked(inode, wb,
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400 2337) 							       dirty_list);
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400 2338) 
0747259d13feb (Tejun Heo             2015-05-22 17:14:02 -0400 2339) 			spin_unlock(&wb->list_lock);
0ae45f63d4ef8 (Theodore Ts'o         2015-02-02 00:37:00 -0500 2340) 			trace_writeback_dirty_inode_enqueue(inode);
a66979abad090 (Dave Chinner          2011-03-22 22:23:41 +1100 2341) 
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400 2342) 			/*
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400 2343) 			 * If this is the first dirty inode for this bdi,
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400 2344) 			 * we have to wake-up the corresponding bdi thread
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400 2345) 			 * to make sure background write-back happens
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400 2346) 			 * later.
d6c10f1fc8626 (Tejun Heo             2015-05-22 17:13:45 -0400 2347) 			 */
f56753ac2a908 (Christoph Hellwig     2020-09-24 08:51:40 +0200 2348) 			if (wakeup_bdi &&
f56753ac2a908 (Christoph Hellwig     2020-09-24 08:51:40 +0200 2349) 			    (wb->bdi->capabilities & BDI_CAP_WRITEBACK))
0747259d13feb (Tejun Heo             2015-05-22 17:14:02 -0400 2350) 				wb_wakeup_delayed(wb);
a66979abad090 (Dave Chinner          2011-03-22 22:23:41 +1100 2351) 			return;
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 2352) 		}
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 2353) 	}
250df6ed274d7 (Dave Chinner          2011-03-22 22:23:36 +1100 2354) out_unlock_inode:
250df6ed274d7 (Dave Chinner          2011-03-22 22:23:36 +1100 2355) 	spin_unlock(&inode->i_lock);
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2356) }
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2357) EXPORT_SYMBOL(__mark_inode_dirty);
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2358) 
e97fedb9ef986 (Dave Chinner          2015-03-04 13:40:00 -0500 2359) /*
e97fedb9ef986 (Dave Chinner          2015-03-04 13:40:00 -0500 2360)  * The @s_sync_lock is used to serialise concurrent sync operations
e97fedb9ef986 (Dave Chinner          2015-03-04 13:40:00 -0500 2361)  * to avoid lock contention problems with concurrent wait_sb_inodes() calls.
e97fedb9ef986 (Dave Chinner          2015-03-04 13:40:00 -0500 2362)  * Concurrent callers will block on the s_sync_lock rather than doing contending
e97fedb9ef986 (Dave Chinner          2015-03-04 13:40:00 -0500 2363)  * walks. The queueing maintains sync(2) required behaviour as all the IO that
e97fedb9ef986 (Dave Chinner          2015-03-04 13:40:00 -0500 2364)  * has been issued up to the time this function is enter is guaranteed to be
e97fedb9ef986 (Dave Chinner          2015-03-04 13:40:00 -0500 2365)  * completed by the time we have gained the lock and waited for all IO that is
e97fedb9ef986 (Dave Chinner          2015-03-04 13:40:00 -0500 2366)  * in progress regardless of the order callers are granted the lock.
e97fedb9ef986 (Dave Chinner          2015-03-04 13:40:00 -0500 2367)  */
b6e51316daede (Jens Axboe            2009-09-16 15:13:54 +0200 2368) static void wait_sb_inodes(struct super_block *sb)
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2369) {
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 2370) 	LIST_HEAD(sync_list);
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2371) 
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2372) 	/*
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2373) 	 * We need to be protected against the filesystem going from
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2374) 	 * r/o to r/w or vice versa.
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2375) 	 */
b6e51316daede (Jens Axboe            2009-09-16 15:13:54 +0200 2376) 	WARN_ON(!rwsem_is_locked(&sb->s_umount));
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2377) 
e97fedb9ef986 (Dave Chinner          2015-03-04 13:40:00 -0500 2378) 	mutex_lock(&sb->s_sync_lock);
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2379) 
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2380) 	/*
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 2381) 	 * Splice the writeback list onto a temporary list to avoid waiting on
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 2382) 	 * inodes that have started writeback after this point.
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 2383) 	 *
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 2384) 	 * Use rcu_read_lock() to keep the inodes around until we have a
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 2385) 	 * reference. s_inode_wblist_lock protects sb->s_inodes_wb as well as
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 2386) 	 * the local list because inodes can be dropped from either by writeback
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 2387) 	 * completion.
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 2388) 	 */
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 2389) 	rcu_read_lock();
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 2390) 	spin_lock_irq(&sb->s_inode_wblist_lock);
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 2391) 	list_splice_init(&sb->s_inodes_wb, &sync_list);
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 2392) 
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 2393) 	/*
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 2394) 	 * Data integrity sync. Must wait for all pages under writeback, because
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 2395) 	 * there may have been pages dirtied before our sync call, but which had
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 2396) 	 * writeout started before we write it out.  In which case, the inode
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 2397) 	 * may not be on the dirty list, but we still have to wait for that
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 2398) 	 * writeout.
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2399) 	 */
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 2400) 	while (!list_empty(&sync_list)) {
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 2401) 		struct inode *inode = list_first_entry(&sync_list, struct inode,
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 2402) 						       i_wb_list);
250df6ed274d7 (Dave Chinner          2011-03-22 22:23:36 +1100 2403) 		struct address_space *mapping = inode->i_mapping;
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2404) 
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 2405) 		/*
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 2406) 		 * Move each inode back to the wb list before we drop the lock
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 2407) 		 * to preserve consistency between i_wb_list and the mapping
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 2408) 		 * writeback tag. Writeback completion is responsible to remove
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 2409) 		 * the inode from either list once the writeback tag is cleared.
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 2410) 		 */
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 2411) 		list_move_tail(&inode->i_wb_list, &sb->s_inodes_wb);
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 2412) 
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 2413) 		/*
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 2414) 		 * The mapping can appear untagged while still on-list since we
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 2415) 		 * do not have the mapping lock. Skip it here, wb completion
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 2416) 		 * will remove it.
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 2417) 		 */
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 2418) 		if (!mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK))
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 2419) 			continue;
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 2420) 
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 2421) 		spin_unlock_irq(&sb->s_inode_wblist_lock);
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 2422) 
250df6ed274d7 (Dave Chinner          2011-03-22 22:23:36 +1100 2423) 		spin_lock(&inode->i_lock);
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 2424) 		if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) {
250df6ed274d7 (Dave Chinner          2011-03-22 22:23:36 +1100 2425) 			spin_unlock(&inode->i_lock);
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 2426) 
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 2427) 			spin_lock_irq(&sb->s_inode_wblist_lock);
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2428) 			continue;
250df6ed274d7 (Dave Chinner          2011-03-22 22:23:36 +1100 2429) 		}
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2430) 		__iget(inode);
250df6ed274d7 (Dave Chinner          2011-03-22 22:23:36 +1100 2431) 		spin_unlock(&inode->i_lock);
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 2432) 		rcu_read_unlock();
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2433) 
aa750fd71c242 (Junichi Nomura        2015-11-05 18:47:23 -0800 2434) 		/*
aa750fd71c242 (Junichi Nomura        2015-11-05 18:47:23 -0800 2435) 		 * We keep the error status of individual mapping so that
aa750fd71c242 (Junichi Nomura        2015-11-05 18:47:23 -0800 2436) 		 * applications can catch the writeback error using fsync(2).
aa750fd71c242 (Junichi Nomura        2015-11-05 18:47:23 -0800 2437) 		 * See filemap_fdatawait_keep_errors() for details.
aa750fd71c242 (Junichi Nomura        2015-11-05 18:47:23 -0800 2438) 		 */
aa750fd71c242 (Junichi Nomura        2015-11-05 18:47:23 -0800 2439) 		filemap_fdatawait_keep_errors(mapping);
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2440) 
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2441) 		cond_resched();
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2442) 
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 2443) 		iput(inode);
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 2444) 
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 2445) 		rcu_read_lock();
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 2446) 		spin_lock_irq(&sb->s_inode_wblist_lock);
03ba3782e8dcc (Jens Axboe            2009-09-09 09:08:54 +0200 2447) 	}
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 2448) 	spin_unlock_irq(&sb->s_inode_wblist_lock);
6c60d2b5746cf (Dave Chinner          2016-07-26 15:21:50 -0700 2449) 	rcu_read_unlock();
e97fedb9ef986 (Dave Chinner          2015-03-04 13:40:00 -0500 2450) 	mutex_unlock(&sb->s_sync_lock);
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 2451) }
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 2452) 
f30a7d0cc8d90 (Tejun Heo             2015-05-22 17:14:00 -0400 2453) static void __writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr,
f30a7d0cc8d90 (Tejun Heo             2015-05-22 17:14:00 -0400 2454) 				     enum wb_reason reason, bool skip_if_busy)
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 2455) {
5b9cce4c7eb06 (Tejun Heo             2019-08-26 09:06:52 -0700 2456) 	struct backing_dev_info *bdi = sb->s_bdi;
5b9cce4c7eb06 (Tejun Heo             2019-08-26 09:06:52 -0700 2457) 	DEFINE_WB_COMPLETION(done, bdi);
83ba7b071f30f (Christoph Hellwig     2010-07-06 08:59:53 +0200 2458) 	struct wb_writeback_work work = {
6e6938b6d3130 (Wu Fengguang          2010-06-06 10:38:15 -0600 2459) 		.sb			= sb,
6e6938b6d3130 (Wu Fengguang          2010-06-06 10:38:15 -0600 2460) 		.sync_mode		= WB_SYNC_NONE,
6e6938b6d3130 (Wu Fengguang          2010-06-06 10:38:15 -0600 2461) 		.tagged_writepages	= 1,
6e6938b6d3130 (Wu Fengguang          2010-06-06 10:38:15 -0600 2462) 		.done			= &done,
6e6938b6d3130 (Wu Fengguang          2010-06-06 10:38:15 -0600 2463) 		.nr_pages		= nr,
0e175a1835ffc (Curt Wohlgemuth       2011-10-07 21:54:10 -0600 2464) 		.reason			= reason,
3c4d716538f3e (Christoph Hellwig     2010-06-08 18:14:43 +0200 2465) 	};
d8a8559cd7a9c (Jens Axboe            2009-09-02 12:34:32 +0200 2466) 
e79729123f639 (Tejun Heo             2015-05-22 17:13:48 -0400 2467) 	if (!bdi_has_dirty_io(bdi) || bdi == &noop_backing_dev_info)
6eedc70150d55 (Jan Kara              2012-07-03 16:45:27 +0200 2468) 		return;
cf37e972478ec (Christoph Hellwig     2010-06-08 18:14:51 +0200 2469) 	WARN_ON(!rwsem_is_locked(&sb->s_umount));
f30a7d0cc8d90 (Tejun Heo             2015-05-22 17:14:00 -0400 2470) 
db125360409fc (Tejun Heo             2015-05-22 17:14:01 -0400 2471) 	bdi_split_work_to_wbs(sb->s_bdi, &work, skip_if_busy);
5b9cce4c7eb06 (Tejun Heo             2019-08-26 09:06:52 -0700 2472) 	wb_wait_for_completion(&done);
e913fc825dc68 (Jens Axboe            2010-05-17 12:55:07 +0200 2473) }
f30a7d0cc8d90 (Tejun Heo             2015-05-22 17:14:00 -0400 2474) 
f30a7d0cc8d90 (Tejun Heo             2015-05-22 17:14:00 -0400 2475) /**
f30a7d0cc8d90 (Tejun Heo             2015-05-22 17:14:00 -0400 2476)  * writeback_inodes_sb_nr -	writeback dirty inodes from given super_block
f30a7d0cc8d90 (Tejun Heo             2015-05-22 17:14:00 -0400 2477)  * @sb: the superblock
f30a7d0cc8d90 (Tejun Heo             2015-05-22 17:14:00 -0400 2478)  * @nr: the number of pages to write
f30a7d0cc8d90 (Tejun Heo             2015-05-22 17:14:00 -0400 2479)  * @reason: reason why some writeback work initiated
f30a7d0cc8d90 (Tejun Heo             2015-05-22 17:14:00 -0400 2480)  *
f30a7d0cc8d90 (Tejun Heo             2015-05-22 17:14:00 -0400 2481)  * Start writeback on some inodes on this super_block. No guarantees are made
f30a7d0cc8d90 (Tejun Heo             2015-05-22 17:14:00 -0400 2482)  * on how many (if any) will be written, and this function does not wait
f30a7d0cc8d90 (Tejun Heo             2015-05-22 17:14:00 -0400 2483)  * for IO completion of submitted IO.
f30a7d0cc8d90 (Tejun Heo             2015-05-22 17:14:00 -0400 2484)  */
f30a7d0cc8d90 (Tejun Heo             2015-05-22 17:14:00 -0400 2485) void writeback_inodes_sb_nr(struct super_block *sb,
f30a7d0cc8d90 (Tejun Heo             2015-05-22 17:14:00 -0400 2486) 			    unsigned long nr,
f30a7d0cc8d90 (Tejun Heo             2015-05-22 17:14:00 -0400 2487) 			    enum wb_reason reason)
f30a7d0cc8d90 (Tejun Heo             2015-05-22 17:14:00 -0400 2488) {
f30a7d0cc8d90 (Tejun Heo             2015-05-22 17:14:00 -0400 2489) 	__writeback_inodes_sb_nr(sb, nr, reason, false);
f30a7d0cc8d90 (Tejun Heo             2015-05-22 17:14:00 -0400 2490) }
3259f8bed2f0f (Chris Mason           2010-10-29 11:16:17 -0400 2491) EXPORT_SYMBOL(writeback_inodes_sb_nr);
3259f8bed2f0f (Chris Mason           2010-10-29 11:16:17 -0400 2492) 
3259f8bed2f0f (Chris Mason           2010-10-29 11:16:17 -0400 2493) /**
3259f8bed2f0f (Chris Mason           2010-10-29 11:16:17 -0400 2494)  * writeback_inodes_sb	-	writeback dirty inodes from given super_block
3259f8bed2f0f (Chris Mason           2010-10-29 11:16:17 -0400 2495)  * @sb: the superblock
786228ab3095f (Marcos Paulo de Souza 2011-11-23 20:56:45 +0800 2496)  * @reason: reason why some writeback work was initiated
3259f8bed2f0f (Chris Mason           2010-10-29 11:16:17 -0400 2497)  *
3259f8bed2f0f (Chris Mason           2010-10-29 11:16:17 -0400 2498)  * Start writeback on some inodes on this super_block. No guarantees are made
3259f8bed2f0f (Chris Mason           2010-10-29 11:16:17 -0400 2499)  * on how many (if any) will be written, and this function does not wait
3259f8bed2f0f (Chris Mason           2010-10-29 11:16:17 -0400 2500)  * for IO completion of submitted IO.
3259f8bed2f0f (Chris Mason           2010-10-29 11:16:17 -0400 2501)  */
0e175a1835ffc (Curt Wohlgemuth       2011-10-07 21:54:10 -0600 2502) void writeback_inodes_sb(struct super_block *sb, enum wb_reason reason)
3259f8bed2f0f (Chris Mason           2010-10-29 11:16:17 -0400 2503) {
0e175a1835ffc (Curt Wohlgemuth       2011-10-07 21:54:10 -0600 2504) 	return writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), reason);
3259f8bed2f0f (Chris Mason           2010-10-29 11:16:17 -0400 2505) }
0e3c9a2284f54 (Jens Axboe            2010-06-01 11:08:43 +0200 2506) EXPORT_SYMBOL(writeback_inodes_sb);
e913fc825dc68 (Jens Axboe            2010-05-17 12:55:07 +0200 2507) 
17bd55d037a02 (Eric Sandeen          2009-12-23 07:57:07 -0500 2508) /**
8264c3214f28b (Rakesh Pandit         2017-10-09 13:34:41 +0300 2509)  * try_to_writeback_inodes_sb - try to start writeback if none underway
17bd55d037a02 (Eric Sandeen          2009-12-23 07:57:07 -0500 2510)  * @sb: the superblock
8264c3214f28b (Rakesh Pandit         2017-10-09 13:34:41 +0300 2511)  * @reason: reason why some writeback work was initiated
17bd55d037a02 (Eric Sandeen          2009-12-23 07:57:07 -0500 2512)  *
8264c3214f28b (Rakesh Pandit         2017-10-09 13:34:41 +0300 2513)  * Invoke __writeback_inodes_sb_nr if no writeback is currently underway.
17bd55d037a02 (Eric Sandeen          2009-12-23 07:57:07 -0500 2514)  */
8264c3214f28b (Rakesh Pandit         2017-10-09 13:34:41 +0300 2515) void try_to_writeback_inodes_sb(struct super_block *sb, enum wb_reason reason)
17bd55d037a02 (Eric Sandeen          2009-12-23 07:57:07 -0500 2516) {
10ee27a06cc8e (Miao Xie              2013-01-10 13:47:57 +0800 2517) 	if (!down_read_trylock(&sb->s_umount))
8264c3214f28b (Rakesh Pandit         2017-10-09 13:34:41 +0300 2518) 		return;
10ee27a06cc8e (Miao Xie              2013-01-10 13:47:57 +0800 2519) 
8264c3214f28b (Rakesh Pandit         2017-10-09 13:34:41 +0300 2520) 	__writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), reason, true);
10ee27a06cc8e (Miao Xie              2013-01-10 13:47:57 +0800 2521) 	up_read(&sb->s_umount);
3259f8bed2f0f (Chris Mason           2010-10-29 11:16:17 -0400 2522) }
10ee27a06cc8e (Miao Xie              2013-01-10 13:47:57 +0800 2523) EXPORT_SYMBOL(try_to_writeback_inodes_sb);
3259f8bed2f0f (Chris Mason           2010-10-29 11:16:17 -0400 2524) 
d8a8559cd7a9c (Jens Axboe            2009-09-02 12:34:32 +0200 2525) /**
d8a8559cd7a9c (Jens Axboe            2009-09-02 12:34:32 +0200 2526)  * sync_inodes_sb	-	sync sb inode pages
0dc83bd30b0bf (Jan Kara              2014-02-21 11:19:04 +0100 2527)  * @sb: the superblock
d8a8559cd7a9c (Jens Axboe            2009-09-02 12:34:32 +0200 2528)  *
d8a8559cd7a9c (Jens Axboe            2009-09-02 12:34:32 +0200 2529)  * This function writes and waits on any dirty inode belonging to this
0dc83bd30b0bf (Jan Kara              2014-02-21 11:19:04 +0100 2530)  * super_block.
d8a8559cd7a9c (Jens Axboe            2009-09-02 12:34:32 +0200 2531)  */
0dc83bd30b0bf (Jan Kara              2014-02-21 11:19:04 +0100 2532) void sync_inodes_sb(struct super_block *sb)
d8a8559cd7a9c (Jens Axboe            2009-09-02 12:34:32 +0200 2533) {
5b9cce4c7eb06 (Tejun Heo             2019-08-26 09:06:52 -0700 2534) 	struct backing_dev_info *bdi = sb->s_bdi;
5b9cce4c7eb06 (Tejun Heo             2019-08-26 09:06:52 -0700 2535) 	DEFINE_WB_COMPLETION(done, bdi);
83ba7b071f30f (Christoph Hellwig     2010-07-06 08:59:53 +0200 2536) 	struct wb_writeback_work work = {
3c4d716538f3e (Christoph Hellwig     2010-06-08 18:14:43 +0200 2537) 		.sb		= sb,
3c4d716538f3e (Christoph Hellwig     2010-06-08 18:14:43 +0200 2538) 		.sync_mode	= WB_SYNC_ALL,
3c4d716538f3e (Christoph Hellwig     2010-06-08 18:14:43 +0200 2539) 		.nr_pages	= LONG_MAX,
3c4d716538f3e (Christoph Hellwig     2010-06-08 18:14:43 +0200 2540) 		.range_cyclic	= 0,
83ba7b071f30f (Christoph Hellwig     2010-07-06 08:59:53 +0200 2541) 		.done		= &done,
0e175a1835ffc (Curt Wohlgemuth       2011-10-07 21:54:10 -0600 2542) 		.reason		= WB_REASON_SYNC,
7747bd4bceb30 (Dave Chinner          2013-07-02 22:38:35 +1000 2543) 		.for_sync	= 1,
3c4d716538f3e (Christoph Hellwig     2010-06-08 18:14:43 +0200 2544) 	};
3c4d716538f3e (Christoph Hellwig     2010-06-08 18:14:43 +0200 2545) 
006a0973ed020 (Tejun Heo             2015-08-25 14:11:52 -0400 2546) 	/*
006a0973ed020 (Tejun Heo             2015-08-25 14:11:52 -0400 2547) 	 * Can't skip on !bdi_has_dirty() because we should wait for !dirty
006a0973ed020 (Tejun Heo             2015-08-25 14:11:52 -0400 2548) 	 * inodes under writeback and I_DIRTY_TIME inodes ignored by
006a0973ed020 (Tejun Heo             2015-08-25 14:11:52 -0400 2549) 	 * bdi_has_dirty() need to be written out too.
006a0973ed020 (Tejun Heo             2015-08-25 14:11:52 -0400 2550) 	 */
006a0973ed020 (Tejun Heo             2015-08-25 14:11:52 -0400 2551) 	if (bdi == &noop_backing_dev_info)
6eedc70150d55 (Jan Kara              2012-07-03 16:45:27 +0200 2552) 		return;
cf37e972478ec (Christoph Hellwig     2010-06-08 18:14:51 +0200 2553) 	WARN_ON(!rwsem_is_locked(&sb->s_umount));
cf37e972478ec (Christoph Hellwig     2010-06-08 18:14:51 +0200 2554) 
7fc5854f8c6ef (Tejun Heo             2017-12-12 08:38:30 -0800 2555) 	/* protect against inode wb switch, see inode_switch_wbs_work_fn() */
7fc5854f8c6ef (Tejun Heo             2017-12-12 08:38:30 -0800 2556) 	bdi_down_write_wb_switch_rwsem(bdi);
db125360409fc (Tejun Heo             2015-05-22 17:14:01 -0400 2557) 	bdi_split_work_to_wbs(bdi, &work, false);
5b9cce4c7eb06 (Tejun Heo             2019-08-26 09:06:52 -0700 2558) 	wb_wait_for_completion(&done);
7fc5854f8c6ef (Tejun Heo             2017-12-12 08:38:30 -0800 2559) 	bdi_up_write_wb_switch_rwsem(bdi);
83ba7b071f30f (Christoph Hellwig     2010-07-06 08:59:53 +0200 2560) 
b6e51316daede (Jens Axboe            2009-09-16 15:13:54 +0200 2561) 	wait_sb_inodes(sb);
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 2562) }
d8a8559cd7a9c (Jens Axboe            2009-09-02 12:34:32 +0200 2563) EXPORT_SYMBOL(sync_inodes_sb);
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 2564) 
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 2565) /**
7f04c26d715a2 (Andrea Arcangeli      2005-10-30 15:03:05 -0800 2566)  * write_inode_now	-	write an inode to disk
7f04c26d715a2 (Andrea Arcangeli      2005-10-30 15:03:05 -0800 2567)  * @inode: inode to write to disk
7f04c26d715a2 (Andrea Arcangeli      2005-10-30 15:03:05 -0800 2568)  * @sync: whether the write should be synchronous or not
7f04c26d715a2 (Andrea Arcangeli      2005-10-30 15:03:05 -0800 2569)  *
7f04c26d715a2 (Andrea Arcangeli      2005-10-30 15:03:05 -0800 2570)  * This function commits an inode to disk immediately if it is dirty. This is
7f04c26d715a2 (Andrea Arcangeli      2005-10-30 15:03:05 -0800 2571)  * primarily needed by knfsd.
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 2572)  *
7f04c26d715a2 (Andrea Arcangeli      2005-10-30 15:03:05 -0800 2573)  * The caller must either have a ref on the inode or must have set I_WILL_FREE.
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 2574)  */
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 2575) int write_inode_now(struct inode *inode, int sync)
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 2576) {
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 2577) 	struct writeback_control wbc = {
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 2578) 		.nr_to_write = LONG_MAX,
18914b1884ebd (Mike Galbraith        2008-02-08 04:20:23 -0800 2579) 		.sync_mode = sync ? WB_SYNC_ALL : WB_SYNC_NONE,
111ebb6e6f7bd (OGAWA Hirofumi        2006-06-23 02:03:26 -0700 2580) 		.range_start = 0,
111ebb6e6f7bd (OGAWA Hirofumi        2006-06-23 02:03:26 -0700 2581) 		.range_end = LLONG_MAX,
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 2582) 	};
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 2583) 
f56753ac2a908 (Christoph Hellwig     2020-09-24 08:51:40 +0200 2584) 	if (!mapping_can_writeback(inode->i_mapping))
49364ce253441 (Andrew Morton         2005-11-07 00:59:15 -0800 2585) 		wbc.nr_to_write = 0;
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 2586) 
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 2587) 	might_sleep();
aaf2559332ba2 (Tejun Heo             2016-03-18 13:52:04 -0400 2588) 	return writeback_single_inode(inode, &wbc);
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 2589) }
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 2590) EXPORT_SYMBOL(write_inode_now);
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 2591) 
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 2592) /**
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 2593)  * sync_inode - write an inode and its pages to disk.
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 2594)  * @inode: the inode to sync
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 2595)  * @wbc: controls the writeback mode
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 2596)  *
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 2597)  * sync_inode() will write an inode and its pages to disk.  It will also
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 2598)  * correctly update the inode on its superblock's dirty inode lists and will
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 2599)  * update inode->i_state.
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 2600)  *
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 2601)  * The caller must have a ref on the inode.
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 2602)  */
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 2603) int sync_inode(struct inode *inode, struct writeback_control *wbc)
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 2604) {
aaf2559332ba2 (Tejun Heo             2016-03-18 13:52:04 -0400 2605) 	return writeback_single_inode(inode, wbc);
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 2606) }
^1da177e4c3f4 (Linus Torvalds        2005-04-16 15:20:36 -0700 2607) EXPORT_SYMBOL(sync_inode);
c37650161a53c (Christoph Hellwig     2010-10-06 10:48:20 +0200 2608) 
c37650161a53c (Christoph Hellwig     2010-10-06 10:48:20 +0200 2609) /**
c691b9d983d70 (Andrew Morton         2011-01-13 15:45:48 -0800 2610)  * sync_inode_metadata - write an inode to disk
c37650161a53c (Christoph Hellwig     2010-10-06 10:48:20 +0200 2611)  * @inode: the inode to sync
c37650161a53c (Christoph Hellwig     2010-10-06 10:48:20 +0200 2612)  * @wait: wait for I/O to complete.
c37650161a53c (Christoph Hellwig     2010-10-06 10:48:20 +0200 2613)  *
c691b9d983d70 (Andrew Morton         2011-01-13 15:45:48 -0800 2614)  * Write an inode to disk and adjust its dirty state after completion.
c37650161a53c (Christoph Hellwig     2010-10-06 10:48:20 +0200 2615)  *
c37650161a53c (Christoph Hellwig     2010-10-06 10:48:20 +0200 2616)  * Note: only writes the actual inode, no associated data or other metadata.
c37650161a53c (Christoph Hellwig     2010-10-06 10:48:20 +0200 2617)  */
c37650161a53c (Christoph Hellwig     2010-10-06 10:48:20 +0200 2618) int sync_inode_metadata(struct inode *inode, int wait)
c37650161a53c (Christoph Hellwig     2010-10-06 10:48:20 +0200 2619) {
c37650161a53c (Christoph Hellwig     2010-10-06 10:48:20 +0200 2620) 	struct writeback_control wbc = {
c37650161a53c (Christoph Hellwig     2010-10-06 10:48:20 +0200 2621) 		.sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_NONE,
c37650161a53c (Christoph Hellwig     2010-10-06 10:48:20 +0200 2622) 		.nr_to_write = 0, /* metadata-only */
c37650161a53c (Christoph Hellwig     2010-10-06 10:48:20 +0200 2623) 	};
c37650161a53c (Christoph Hellwig     2010-10-06 10:48:20 +0200 2624) 
c37650161a53c (Christoph Hellwig     2010-10-06 10:48:20 +0200 2625) 	return sync_inode(inode, &wbc);
c37650161a53c (Christoph Hellwig     2010-10-06 10:48:20 +0200 2626) }
c37650161a53c (Christoph Hellwig     2010-10-06 10:48:20 +0200 2627) EXPORT_SYMBOL(sync_inode_metadata);