457c899653991 (Thomas Gleixner 2019-05-19 13:08:55 +0100 1) // SPDX-License-Identifier: GPL-2.0-only
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 2) /*
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 3) * fs/fs-writeback.c
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 4) *
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 5) * Copyright (C) 2002, Linus Torvalds.
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 6) *
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 7) * Contains all the functions related to writing back and waiting
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 8) * upon dirty inodes against superblocks, and writing back dirty
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 9) * pages against inodes. ie: data writeback. Writeout of the
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 10) * inode itself is not handled here.
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 11) *
e1f8e87449147 (Francois Cami 2008-10-15 22:01:59 -0700 12) * 10Apr2002 Andrew Morton
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 13) * Split out of fs/inode.c
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 14) * Additions for address_space-based writeback
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 15) */
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 16)
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 17) #include <linux/kernel.h>
630d9c47274aa (Paul Gortmaker 2011-11-16 23:57:37 -0500 18) #include <linux/export.h>
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 19) #include <linux/spinlock.h>
5a0e3ad6af866 (Tejun Heo 2010-03-24 17:04:11 +0900 20) #include <linux/slab.h>
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 21) #include <linux/sched.h>
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 22) #include <linux/fs.h>
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 23) #include <linux/mm.h>
bc31b86a5923f (Wu Fengguang 2012-01-07 20:41:55 -0600 24) #include <linux/pagemap.h>
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 25) #include <linux/kthread.h>
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 26) #include <linux/writeback.h>
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 27) #include <linux/blkdev.h>
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 28) #include <linux/backing-dev.h>
455b2864686d3 (Dave Chinner 2010-07-07 13:24:06 +1000 29) #include <linux/tracepoint.h>
719ea2fbb553a (Al Viro 2013-09-29 11:24:49 -0400 30) #include <linux/device.h>
21c6321fbb3a3 (Tejun Heo 2015-05-28 14:50:49 -0400 31) #include <linux/memcontrol.h>
07f3f05c1e305 (David Howells 2006-09-30 20:52:18 +0200 32) #include "internal.h"
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 33)
bc31b86a5923f (Wu Fengguang 2012-01-07 20:41:55 -0600 34) /*
bc31b86a5923f (Wu Fengguang 2012-01-07 20:41:55 -0600 35) * 4MB minimal write chunk size
bc31b86a5923f (Wu Fengguang 2012-01-07 20:41:55 -0600 36) */
09cbfeaf1a5a6 (Kirill A. Shutemov 2016-04-01 15:29:47 +0300 37) #define MIN_WRITEBACK_PAGES (4096UL >> (PAGE_SHIFT - 10))
bc31b86a5923f (Wu Fengguang 2012-01-07 20:41:55 -0600 38)
c4a77a6c7dcff (Jens Axboe 2009-09-16 15:18:25 +0200 39) /*
c4a77a6c7dcff (Jens Axboe 2009-09-16 15:18:25 +0200 40) * Passed into wb_writeback(), essentially a subset of writeback_control
c4a77a6c7dcff (Jens Axboe 2009-09-16 15:18:25 +0200 41) */
83ba7b071f30f (Christoph Hellwig 2010-07-06 08:59:53 +0200 42) struct wb_writeback_work {
c4a77a6c7dcff (Jens Axboe 2009-09-16 15:18:25 +0200 43) long nr_pages;
c4a77a6c7dcff (Jens Axboe 2009-09-16 15:18:25 +0200 44) struct super_block *sb;
c4a77a6c7dcff (Jens Axboe 2009-09-16 15:18:25 +0200 45) enum writeback_sync_modes sync_mode;
6e6938b6d3130 (Wu Fengguang 2010-06-06 10:38:15 -0600 46) unsigned int tagged_writepages:1;
52957fe1c709d (H Hartley Sweeten 2010-04-01 20:36:30 -0500 47) unsigned int for_kupdate:1;
52957fe1c709d (H Hartley Sweeten 2010-04-01 20:36:30 -0500 48) unsigned int range_cyclic:1;
52957fe1c709d (H Hartley Sweeten 2010-04-01 20:36:30 -0500 49) unsigned int for_background:1;
7747bd4bceb30 (Dave Chinner 2013-07-02 22:38:35 +1000 50) unsigned int for_sync:1; /* sync(2) WB_SYNC_ALL writeback */
ac7b19a34f332 (Tejun Heo 2015-05-22 17:13:57 -0400 51) unsigned int auto_free:1; /* free on completion */
0e175a1835ffc (Curt Wohlgemuth 2011-10-07 21:54:10 -0600 52) enum wb_reason reason; /* why was writeback initiated? */
c4a77a6c7dcff (Jens Axboe 2009-09-16 15:18:25 +0200 53)
8010c3b6349b4 (Jens Axboe 2009-09-15 20:04:57 +0200 54) struct list_head list; /* pending work list */
cc395d7f1f7b9 (Tejun Heo 2015-05-22 17:13:58 -0400 55) struct wb_completion *done; /* set if the caller waits */
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 56) };
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 57)
a2f4870697a5b (Theodore Ts'o 2015-03-17 12:23:19 -0400 58) /*
a2f4870697a5b (Theodore Ts'o 2015-03-17 12:23:19 -0400 59) * If an inode is constantly having its pages dirtied, but then the
a2f4870697a5b (Theodore Ts'o 2015-03-17 12:23:19 -0400 60) * updates stop dirtytime_expire_interval seconds in the past, it's
a2f4870697a5b (Theodore Ts'o 2015-03-17 12:23:19 -0400 61) * possible for the worst case time between when an inode has its
a2f4870697a5b (Theodore Ts'o 2015-03-17 12:23:19 -0400 62) * timestamps updated and when they finally get written out to be two
a2f4870697a5b (Theodore Ts'o 2015-03-17 12:23:19 -0400 63) * dirtytime_expire_intervals. We set the default to 12 hours (in
a2f4870697a5b (Theodore Ts'o 2015-03-17 12:23:19 -0400 64) * seconds), which means most of the time inodes will have their
a2f4870697a5b (Theodore Ts'o 2015-03-17 12:23:19 -0400 65) * timestamps written to disk after 12 hours, but in the worst case a
a2f4870697a5b (Theodore Ts'o 2015-03-17 12:23:19 -0400 66) * few inodes might not their timestamps updated for 24 hours.
a2f4870697a5b (Theodore Ts'o 2015-03-17 12:23:19 -0400 67) */
a2f4870697a5b (Theodore Ts'o 2015-03-17 12:23:19 -0400 68) unsigned int dirtytime_expire_interval = 12 * 60 * 60;
a2f4870697a5b (Theodore Ts'o 2015-03-17 12:23:19 -0400 69)
7ccf19a8042e3 (Nicholas Piggin 2010-10-21 11:49:30 +1100 70) static inline struct inode *wb_inode(struct list_head *head)
7ccf19a8042e3 (Nicholas Piggin 2010-10-21 11:49:30 +1100 71) {
c7f5408493aeb (Dave Chinner 2015-03-04 14:07:22 -0500 72) return list_entry(head, struct inode, i_io_list);
7ccf19a8042e3 (Nicholas Piggin 2010-10-21 11:49:30 +1100 73) }
7ccf19a8042e3 (Nicholas Piggin 2010-10-21 11:49:30 +1100 74)
15eb77a07c714 (Wu Fengguang 2012-01-17 11:18:56 -0600 75) /*
15eb77a07c714 (Wu Fengguang 2012-01-17 11:18:56 -0600 76) * Include the creation of the trace points after defining the
15eb77a07c714 (Wu Fengguang 2012-01-17 11:18:56 -0600 77) * wb_writeback_work structure and inline functions so that the definition
15eb77a07c714 (Wu Fengguang 2012-01-17 11:18:56 -0600 78) * remains local to this file.
15eb77a07c714 (Wu Fengguang 2012-01-17 11:18:56 -0600 79) */
15eb77a07c714 (Wu Fengguang 2012-01-17 11:18:56 -0600 80) #define CREATE_TRACE_POINTS
15eb77a07c714 (Wu Fengguang 2012-01-17 11:18:56 -0600 81) #include <trace/events/writeback.h>
15eb77a07c714 (Wu Fengguang 2012-01-17 11:18:56 -0600 82)
774016b2d4550 (Steven Whitehouse 2014-02-06 15:47:47 +0000 83) EXPORT_TRACEPOINT_SYMBOL_GPL(wbc_writepage);
774016b2d4550 (Steven Whitehouse 2014-02-06 15:47:47 +0000 84)
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 85) static bool wb_io_lists_populated(struct bdi_writeback *wb)
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 86) {
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 87) if (wb_has_dirty_io(wb)) {
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 88) return false;
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 89) } else {
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 90) set_bit(WB_has_dirty_io, &wb->state);
95a46c65e3c09 (Tejun Heo 2015-05-22 17:13:47 -0400 91) WARN_ON_ONCE(!wb->avg_write_bandwidth);
766a9d6e60578 (Tejun Heo 2015-05-22 17:13:46 -0400 92) atomic_long_add(wb->avg_write_bandwidth,
766a9d6e60578 (Tejun Heo 2015-05-22 17:13:46 -0400 93) &wb->bdi->tot_write_bandwidth);
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 94) return true;
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 95) }
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 96) }
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 97)
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 98) static void wb_io_lists_depopulated(struct bdi_writeback *wb)
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 99) {
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 100) if (wb_has_dirty_io(wb) && list_empty(&wb->b_dirty) &&
766a9d6e60578 (Tejun Heo 2015-05-22 17:13:46 -0400 101) list_empty(&wb->b_io) && list_empty(&wb->b_more_io)) {
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 102) clear_bit(WB_has_dirty_io, &wb->state);
95a46c65e3c09 (Tejun Heo 2015-05-22 17:13:47 -0400 103) WARN_ON_ONCE(atomic_long_sub_return(wb->avg_write_bandwidth,
95a46c65e3c09 (Tejun Heo 2015-05-22 17:13:47 -0400 104) &wb->bdi->tot_write_bandwidth) < 0);
766a9d6e60578 (Tejun Heo 2015-05-22 17:13:46 -0400 105) }
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 106) }
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 107)
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 108) /**
c7f5408493aeb (Dave Chinner 2015-03-04 14:07:22 -0500 109) * inode_io_list_move_locked - move an inode onto a bdi_writeback IO list
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 110) * @inode: inode to be moved
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 111) * @wb: target bdi_writeback
bbbc3c1cfaf69 (Wang Long 2017-12-05 07:23:19 -0500 112) * @head: one of @wb->b_{dirty|io|more_io|dirty_time}
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 113) *
c7f5408493aeb (Dave Chinner 2015-03-04 14:07:22 -0500 114) * Move @inode->i_io_list to @list of @wb and set %WB_has_dirty_io.
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 115) * Returns %true if @inode is the first occupant of the !dirty_time IO
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 116) * lists; otherwise, %false.
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 117) */
c7f5408493aeb (Dave Chinner 2015-03-04 14:07:22 -0500 118) static bool inode_io_list_move_locked(struct inode *inode,
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 119) struct bdi_writeback *wb,
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 120) struct list_head *head)
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 121) {
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 122) assert_spin_locked(&wb->list_lock);
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 123)
c7f5408493aeb (Dave Chinner 2015-03-04 14:07:22 -0500 124) list_move(&inode->i_io_list, head);
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 125)
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 126) /* dirty_time doesn't count as dirty_io until expiration */
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 127) if (head != &wb->b_dirty_time)
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 128) return wb_io_lists_populated(wb);
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 129)
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 130) wb_io_lists_depopulated(wb);
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 131) return false;
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 132) }
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 133)
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 134) /**
c7f5408493aeb (Dave Chinner 2015-03-04 14:07:22 -0500 135) * inode_io_list_del_locked - remove an inode from its bdi_writeback IO list
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 136) * @inode: inode to be removed
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 137) * @wb: bdi_writeback @inode is being removed from
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 138) *
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 139) * Remove @inode which may be on one of @wb->b_{dirty|io|more_io} lists and
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 140) * clear %WB_has_dirty_io if all are empty afterwards.
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 141) */
c7f5408493aeb (Dave Chinner 2015-03-04 14:07:22 -0500 142) static void inode_io_list_del_locked(struct inode *inode,
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 143) struct bdi_writeback *wb)
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 144) {
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 145) assert_spin_locked(&wb->list_lock);
b35250c0816c7 (Jan Kara 2020-06-10 17:36:03 +0200 146) assert_spin_locked(&inode->i_lock);
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 147)
5afced3bf2810 (Jan Kara 2020-05-29 15:05:22 +0200 148) inode->i_state &= ~I_SYNC_QUEUED;
c7f5408493aeb (Dave Chinner 2015-03-04 14:07:22 -0500 149) list_del_init(&inode->i_io_list);
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 150) wb_io_lists_depopulated(wb);
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 151) }
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 152)
f0054bb1e1f3b (Tejun Heo 2015-05-22 17:13:30 -0400 153) static void wb_wakeup(struct bdi_writeback *wb)
5acda9d12dcf1 (Jan Kara 2014-04-03 14:46:23 -0700 154) {
f0054bb1e1f3b (Tejun Heo 2015-05-22 17:13:30 -0400 155) spin_lock_bh(&wb->work_lock);
f0054bb1e1f3b (Tejun Heo 2015-05-22 17:13:30 -0400 156) if (test_bit(WB_registered, &wb->state))
f0054bb1e1f3b (Tejun Heo 2015-05-22 17:13:30 -0400 157) mod_delayed_work(bdi_wq, &wb->dwork, 0);
f0054bb1e1f3b (Tejun Heo 2015-05-22 17:13:30 -0400 158) spin_unlock_bh(&wb->work_lock);
5acda9d12dcf1 (Jan Kara 2014-04-03 14:46:23 -0700 159) }
5acda9d12dcf1 (Jan Kara 2014-04-03 14:46:23 -0700 160)
4a3a485b1ed0e (Tahsin Erdogan 2017-03-10 12:09:49 -0800 161) static void finish_writeback_work(struct bdi_writeback *wb,
4a3a485b1ed0e (Tahsin Erdogan 2017-03-10 12:09:49 -0800 162) struct wb_writeback_work *work)
4a3a485b1ed0e (Tahsin Erdogan 2017-03-10 12:09:49 -0800 163) {
4a3a485b1ed0e (Tahsin Erdogan 2017-03-10 12:09:49 -0800 164) struct wb_completion *done = work->done;
4a3a485b1ed0e (Tahsin Erdogan 2017-03-10 12:09:49 -0800 165)
4a3a485b1ed0e (Tahsin Erdogan 2017-03-10 12:09:49 -0800 166) if (work->auto_free)
4a3a485b1ed0e (Tahsin Erdogan 2017-03-10 12:09:49 -0800 167) kfree(work);
8e00c4e9dd852 (Tejun Heo 2019-10-06 17:58:09 -0700 168) if (done) {
8e00c4e9dd852 (Tejun Heo 2019-10-06 17:58:09 -0700 169) wait_queue_head_t *waitq = done->waitq;
8e00c4e9dd852 (Tejun Heo 2019-10-06 17:58:09 -0700 170)
8e00c4e9dd852 (Tejun Heo 2019-10-06 17:58:09 -0700 171) /* @done can't be accessed after the following dec */
8e00c4e9dd852 (Tejun Heo 2019-10-06 17:58:09 -0700 172) if (atomic_dec_and_test(&done->cnt))
8e00c4e9dd852 (Tejun Heo 2019-10-06 17:58:09 -0700 173) wake_up_all(waitq);
8e00c4e9dd852 (Tejun Heo 2019-10-06 17:58:09 -0700 174) }
4a3a485b1ed0e (Tahsin Erdogan 2017-03-10 12:09:49 -0800 175) }
4a3a485b1ed0e (Tahsin Erdogan 2017-03-10 12:09:49 -0800 176)
f0054bb1e1f3b (Tejun Heo 2015-05-22 17:13:30 -0400 177) static void wb_queue_work(struct bdi_writeback *wb,
f0054bb1e1f3b (Tejun Heo 2015-05-22 17:13:30 -0400 178) struct wb_writeback_work *work)
6585027a5e8cb (Jan Kara 2011-01-13 15:45:44 -0800 179) {
5634cc2aa9aeb (Tejun Heo 2015-08-18 14:54:56 -0700 180) trace_writeback_queue(wb, work);
6585027a5e8cb (Jan Kara 2011-01-13 15:45:44 -0800 181)
cc395d7f1f7b9 (Tejun Heo 2015-05-22 17:13:58 -0400 182) if (work->done)
cc395d7f1f7b9 (Tejun Heo 2015-05-22 17:13:58 -0400 183) atomic_inc(&work->done->cnt);
4a3a485b1ed0e (Tahsin Erdogan 2017-03-10 12:09:49 -0800 184)
4a3a485b1ed0e (Tahsin Erdogan 2017-03-10 12:09:49 -0800 185) spin_lock_bh(&wb->work_lock);
4a3a485b1ed0e (Tahsin Erdogan 2017-03-10 12:09:49 -0800 186)
4a3a485b1ed0e (Tahsin Erdogan 2017-03-10 12:09:49 -0800 187) if (test_bit(WB_registered, &wb->state)) {
4a3a485b1ed0e (Tahsin Erdogan 2017-03-10 12:09:49 -0800 188) list_add_tail(&work->list, &wb->work_list);
4a3a485b1ed0e (Tahsin Erdogan 2017-03-10 12:09:49 -0800 189) mod_delayed_work(bdi_wq, &wb->dwork, 0);
4a3a485b1ed0e (Tahsin Erdogan 2017-03-10 12:09:49 -0800 190) } else
4a3a485b1ed0e (Tahsin Erdogan 2017-03-10 12:09:49 -0800 191) finish_writeback_work(wb, work);
4a3a485b1ed0e (Tahsin Erdogan 2017-03-10 12:09:49 -0800 192)
f0054bb1e1f3b (Tejun Heo 2015-05-22 17:13:30 -0400 193) spin_unlock_bh(&wb->work_lock);
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 194) }
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 195)
cc395d7f1f7b9 (Tejun Heo 2015-05-22 17:13:58 -0400 196) /**
cc395d7f1f7b9 (Tejun Heo 2015-05-22 17:13:58 -0400 197) * wb_wait_for_completion - wait for completion of bdi_writeback_works
cc395d7f1f7b9 (Tejun Heo 2015-05-22 17:13:58 -0400 198) * @done: target wb_completion
cc395d7f1f7b9 (Tejun Heo 2015-05-22 17:13:58 -0400 199) *
cc395d7f1f7b9 (Tejun Heo 2015-05-22 17:13:58 -0400 200) * Wait for one or more work items issued to @bdi with their ->done field
5b9cce4c7eb06 (Tejun Heo 2019-08-26 09:06:52 -0700 201) * set to @done, which should have been initialized with
5b9cce4c7eb06 (Tejun Heo 2019-08-26 09:06:52 -0700 202) * DEFINE_WB_COMPLETION(). This function returns after all such work items
5b9cce4c7eb06 (Tejun Heo 2019-08-26 09:06:52 -0700 203) * are completed. Work items which are waited upon aren't freed
cc395d7f1f7b9 (Tejun Heo 2015-05-22 17:13:58 -0400 204) * automatically on completion.
cc395d7f1f7b9 (Tejun Heo 2015-05-22 17:13:58 -0400 205) */
5b9cce4c7eb06 (Tejun Heo 2019-08-26 09:06:52 -0700 206) void wb_wait_for_completion(struct wb_completion *done)
cc395d7f1f7b9 (Tejun Heo 2015-05-22 17:13:58 -0400 207) {
cc395d7f1f7b9 (Tejun Heo 2015-05-22 17:13:58 -0400 208) atomic_dec(&done->cnt); /* put down the initial count */
5b9cce4c7eb06 (Tejun Heo 2019-08-26 09:06:52 -0700 209) wait_event(*done->waitq, !atomic_read(&done->cnt));
cc395d7f1f7b9 (Tejun Heo 2015-05-22 17:13:58 -0400 210) }
cc395d7f1f7b9 (Tejun Heo 2015-05-22 17:13:58 -0400 211)
703c270887bb5 (Tejun Heo 2015-05-22 17:13:44 -0400 212) #ifdef CONFIG_CGROUP_WRITEBACK
703c270887bb5 (Tejun Heo 2015-05-22 17:13:44 -0400 213)
55a694dffb7fd (Tejun Heo 2019-08-15 12:25:28 -0700 214) /*
55a694dffb7fd (Tejun Heo 2019-08-15 12:25:28 -0700 215) * Parameters for foreign inode detection, see wbc_detach_inode() to see
55a694dffb7fd (Tejun Heo 2019-08-15 12:25:28 -0700 216) * how they're used.
55a694dffb7fd (Tejun Heo 2019-08-15 12:25:28 -0700 217) *
55a694dffb7fd (Tejun Heo 2019-08-15 12:25:28 -0700 218) * These paramters are inherently heuristical as the detection target
55a694dffb7fd (Tejun Heo 2019-08-15 12:25:28 -0700 219) * itself is fuzzy. All we want to do is detaching an inode from the
55a694dffb7fd (Tejun Heo 2019-08-15 12:25:28 -0700 220) * current owner if it's being written to by some other cgroups too much.
55a694dffb7fd (Tejun Heo 2019-08-15 12:25:28 -0700 221) *
55a694dffb7fd (Tejun Heo 2019-08-15 12:25:28 -0700 222) * The current cgroup writeback is built on the assumption that multiple
55a694dffb7fd (Tejun Heo 2019-08-15 12:25:28 -0700 223) * cgroups writing to the same inode concurrently is very rare and a mode
55a694dffb7fd (Tejun Heo 2019-08-15 12:25:28 -0700 224) * of operation which isn't well supported. As such, the goal is not
55a694dffb7fd (Tejun Heo 2019-08-15 12:25:28 -0700 225) * taking too long when a different cgroup takes over an inode while
55a694dffb7fd (Tejun Heo 2019-08-15 12:25:28 -0700 226) * avoiding too aggressive flip-flops from occasional foreign writes.
55a694dffb7fd (Tejun Heo 2019-08-15 12:25:28 -0700 227) *
55a694dffb7fd (Tejun Heo 2019-08-15 12:25:28 -0700 228) * We record, very roughly, 2s worth of IO time history and if more than
55a694dffb7fd (Tejun Heo 2019-08-15 12:25:28 -0700 229) * half of that is foreign, trigger the switch. The recording is quantized
55a694dffb7fd (Tejun Heo 2019-08-15 12:25:28 -0700 230) * to 16 slots. To avoid tiny writes from swinging the decision too much,
55a694dffb7fd (Tejun Heo 2019-08-15 12:25:28 -0700 231) * writes smaller than 1/8 of avg size are ignored.
55a694dffb7fd (Tejun Heo 2019-08-15 12:25:28 -0700 232) */
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 233) #define WB_FRN_TIME_SHIFT 13 /* 1s = 2^13, upto 8 secs w/ 16bit */
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 234) #define WB_FRN_TIME_AVG_SHIFT 3 /* avg = avg * 7/8 + new * 1/8 */
55a694dffb7fd (Tejun Heo 2019-08-15 12:25:28 -0700 235) #define WB_FRN_TIME_CUT_DIV 8 /* ignore rounds < avg / 8 */
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 236) #define WB_FRN_TIME_PERIOD (2 * (1 << WB_FRN_TIME_SHIFT)) /* 2s */
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 237)
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 238) #define WB_FRN_HIST_SLOTS 16 /* inode->i_wb_frn_history is 16bit */
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 239) #define WB_FRN_HIST_UNIT (WB_FRN_TIME_PERIOD / WB_FRN_HIST_SLOTS)
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 240) /* each slot's duration is 2s / 16 */
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 241) #define WB_FRN_HIST_THR_SLOTS (WB_FRN_HIST_SLOTS / 2)
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 242) /* if foreign slots >= 8, switch */
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 243) #define WB_FRN_HIST_MAX_SLOTS (WB_FRN_HIST_THR_SLOTS / 2 + 1)
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 244) /* one round can affect upto 5 slots */
6444f47eb8678 (Tejun Heo 2019-08-02 12:08:13 -0700 245) #define WB_FRN_MAX_IN_FLIGHT 1024 /* don't queue too many concurrently */
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 246)
a1a0e23e49037 (Tejun Heo 2016-02-29 18:28:53 -0500 247) static atomic_t isw_nr_in_flight = ATOMIC_INIT(0);
a1a0e23e49037 (Tejun Heo 2016-02-29 18:28:53 -0500 248) static struct workqueue_struct *isw_wq;
a1a0e23e49037 (Tejun Heo 2016-02-29 18:28:53 -0500 249)
21c6321fbb3a3 (Tejun Heo 2015-05-28 14:50:49 -0400 250) void __inode_attach_wb(struct inode *inode, struct page *page)
21c6321fbb3a3 (Tejun Heo 2015-05-28 14:50:49 -0400 251) {
21c6321fbb3a3 (Tejun Heo 2015-05-28 14:50:49 -0400 252) struct backing_dev_info *bdi = inode_to_bdi(inode);
21c6321fbb3a3 (Tejun Heo 2015-05-28 14:50:49 -0400 253) struct bdi_writeback *wb = NULL;
21c6321fbb3a3 (Tejun Heo 2015-05-28 14:50:49 -0400 254)
21c6321fbb3a3 (Tejun Heo 2015-05-28 14:50:49 -0400 255) if (inode_cgwb_enabled(inode)) {
21c6321fbb3a3 (Tejun Heo 2015-05-28 14:50:49 -0400 256) struct cgroup_subsys_state *memcg_css;
21c6321fbb3a3 (Tejun Heo 2015-05-28 14:50:49 -0400 257)
21c6321fbb3a3 (Tejun Heo 2015-05-28 14:50:49 -0400 258) if (page) {
21c6321fbb3a3 (Tejun Heo 2015-05-28 14:50:49 -0400 259) memcg_css = mem_cgroup_css_from_page(page);
21c6321fbb3a3 (Tejun Heo 2015-05-28 14:50:49 -0400 260) wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC);
21c6321fbb3a3 (Tejun Heo 2015-05-28 14:50:49 -0400 261) } else {
21c6321fbb3a3 (Tejun Heo 2015-05-28 14:50:49 -0400 262) /* must pin memcg_css, see wb_get_create() */
21c6321fbb3a3 (Tejun Heo 2015-05-28 14:50:49 -0400 263) memcg_css = task_get_css(current, memory_cgrp_id);
21c6321fbb3a3 (Tejun Heo 2015-05-28 14:50:49 -0400 264) wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC);
21c6321fbb3a3 (Tejun Heo 2015-05-28 14:50:49 -0400 265) css_put(memcg_css);
21c6321fbb3a3 (Tejun Heo 2015-05-28 14:50:49 -0400 266) }
21c6321fbb3a3 (Tejun Heo 2015-05-28 14:50:49 -0400 267) }
21c6321fbb3a3 (Tejun Heo 2015-05-28 14:50:49 -0400 268)
21c6321fbb3a3 (Tejun Heo 2015-05-28 14:50:49 -0400 269) if (!wb)
21c6321fbb3a3 (Tejun Heo 2015-05-28 14:50:49 -0400 270) wb = &bdi->wb;
21c6321fbb3a3 (Tejun Heo 2015-05-28 14:50:49 -0400 271)
21c6321fbb3a3 (Tejun Heo 2015-05-28 14:50:49 -0400 272) /*
21c6321fbb3a3 (Tejun Heo 2015-05-28 14:50:49 -0400 273) * There may be multiple instances of this function racing to
21c6321fbb3a3 (Tejun Heo 2015-05-28 14:50:49 -0400 274) * update the same inode. Use cmpxchg() to tell the winner.
21c6321fbb3a3 (Tejun Heo 2015-05-28 14:50:49 -0400 275) */
21c6321fbb3a3 (Tejun Heo 2015-05-28 14:50:49 -0400 276) if (unlikely(cmpxchg(&inode->i_wb, NULL, wb)))
21c6321fbb3a3 (Tejun Heo 2015-05-28 14:50:49 -0400 277) wb_put(wb);
21c6321fbb3a3 (Tejun Heo 2015-05-28 14:50:49 -0400 278) }
9b0eb69b75bcc (Tejun Heo 2019-06-27 13:39:48 -0700 279) EXPORT_SYMBOL_GPL(__inode_attach_wb);
21c6321fbb3a3 (Tejun Heo 2015-05-28 14:50:49 -0400 280)
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 281) /**
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 282) * locked_inode_to_wb_and_lock_list - determine a locked inode's wb and lock it
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 283) * @inode: inode of interest with i_lock held
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 284) *
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 285) * Returns @inode's wb with its list_lock held. @inode->i_lock must be
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 286) * held on entry and is released on return. The returned wb is guaranteed
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 287) * to stay @inode's associated wb until its list_lock is released.
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 288) */
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 289) static struct bdi_writeback *
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 290) locked_inode_to_wb_and_lock_list(struct inode *inode)
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 291) __releases(&inode->i_lock)
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 292) __acquires(&wb->list_lock)
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 293) {
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 294) while (true) {
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 295) struct bdi_writeback *wb = inode_to_wb(inode);
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 296)
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 297) /*
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 298) * inode_to_wb() association is protected by both
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 299) * @inode->i_lock and @wb->list_lock but list_lock nests
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 300) * outside i_lock. Drop i_lock and verify that the
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 301) * association hasn't changed after acquiring list_lock.
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 302) */
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 303) wb_get(wb);
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 304) spin_unlock(&inode->i_lock);
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 305) spin_lock(&wb->list_lock);
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 306)
aaa2cacf8184e (Tejun Heo 2015-05-28 14:50:55 -0400 307) /* i_wb may have changed inbetween, can't use inode_to_wb() */
614a4e3773148 (Tejun Heo 2016-03-18 13:50:03 -0400 308) if (likely(wb == inode->i_wb)) {
614a4e3773148 (Tejun Heo 2016-03-18 13:50:03 -0400 309) wb_put(wb); /* @inode already has ref */
614a4e3773148 (Tejun Heo 2016-03-18 13:50:03 -0400 310) return wb;
614a4e3773148 (Tejun Heo 2016-03-18 13:50:03 -0400 311) }
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 312)
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 313) spin_unlock(&wb->list_lock);
614a4e3773148 (Tejun Heo 2016-03-18 13:50:03 -0400 314) wb_put(wb);
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 315) cpu_relax();
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 316) spin_lock(&inode->i_lock);
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 317) }
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 318) }
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 319)
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 320) /**
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 321) * inode_to_wb_and_lock_list - determine an inode's wb and lock it
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 322) * @inode: inode of interest
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 323) *
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 324) * Same as locked_inode_to_wb_and_lock_list() but @inode->i_lock isn't held
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 325) * on entry.
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 326) */
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 327) static struct bdi_writeback *inode_to_wb_and_lock_list(struct inode *inode)
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 328) __acquires(&wb->list_lock)
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 329) {
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 330) spin_lock(&inode->i_lock);
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 331) return locked_inode_to_wb_and_lock_list(inode);
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 332) }
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 333)
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 334) struct inode_switch_wbs_context {
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 335) struct inode *inode;
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 336) struct bdi_writeback *new_wb;
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 337)
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 338) struct rcu_head rcu_head;
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 339) struct work_struct work;
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 340) };
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 341)
7fc5854f8c6ef (Tejun Heo 2017-12-12 08:38:30 -0800 342) static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi)
7fc5854f8c6ef (Tejun Heo 2017-12-12 08:38:30 -0800 343) {
7fc5854f8c6ef (Tejun Heo 2017-12-12 08:38:30 -0800 344) down_write(&bdi->wb_switch_rwsem);
7fc5854f8c6ef (Tejun Heo 2017-12-12 08:38:30 -0800 345) }
7fc5854f8c6ef (Tejun Heo 2017-12-12 08:38:30 -0800 346)
7fc5854f8c6ef (Tejun Heo 2017-12-12 08:38:30 -0800 347) static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi)
7fc5854f8c6ef (Tejun Heo 2017-12-12 08:38:30 -0800 348) {
7fc5854f8c6ef (Tejun Heo 2017-12-12 08:38:30 -0800 349) up_write(&bdi->wb_switch_rwsem);
7fc5854f8c6ef (Tejun Heo 2017-12-12 08:38:30 -0800 350) }
7fc5854f8c6ef (Tejun Heo 2017-12-12 08:38:30 -0800 351)
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 352) static void inode_switch_wbs_work_fn(struct work_struct *work)
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 353) {
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 354) struct inode_switch_wbs_context *isw =
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 355) container_of(work, struct inode_switch_wbs_context, work);
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 356) struct inode *inode = isw->inode;
7fc5854f8c6ef (Tejun Heo 2017-12-12 08:38:30 -0800 357) struct backing_dev_info *bdi = inode_to_bdi(inode);
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 358) struct address_space *mapping = inode->i_mapping;
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 359) struct bdi_writeback *old_wb = inode->i_wb;
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 360) struct bdi_writeback *new_wb = isw->new_wb;
04edf02cdd37a (Matthew Wilcox 2017-12-04 10:46:23 -0500 361) XA_STATE(xas, &mapping->i_pages, 0);
04edf02cdd37a (Matthew Wilcox 2017-12-04 10:46:23 -0500 362) struct page *page;
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 363) bool switched = false;
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 364)
7fc5854f8c6ef (Tejun Heo 2017-12-12 08:38:30 -0800 365) /*
7fc5854f8c6ef (Tejun Heo 2017-12-12 08:38:30 -0800 366) * If @inode switches cgwb membership while sync_inodes_sb() is
7fc5854f8c6ef (Tejun Heo 2017-12-12 08:38:30 -0800 367) * being issued, sync_inodes_sb() might miss it. Synchronize.
7fc5854f8c6ef (Tejun Heo 2017-12-12 08:38:30 -0800 368) */
7fc5854f8c6ef (Tejun Heo 2017-12-12 08:38:30 -0800 369) down_read(&bdi->wb_switch_rwsem);
7fc5854f8c6ef (Tejun Heo 2017-12-12 08:38:30 -0800 370)
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 371) /*
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 372) * By the time control reaches here, RCU grace period has passed
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 373) * since I_WB_SWITCH assertion and all wb stat update transactions
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 374) * between unlocked_inode_to_wb_begin/end() are guaranteed to be
b93b016313b3b (Matthew Wilcox 2018-04-10 16:36:56 -0700 375) * synchronizing against the i_pages lock.
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 376) *
b93b016313b3b (Matthew Wilcox 2018-04-10 16:36:56 -0700 377) * Grabbing old_wb->list_lock, inode->i_lock and the i_pages lock
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 378) * gives us exclusion against all wb related operations on @inode
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 379) * including IO list manipulations and stat updates.
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 380) */
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 381) if (old_wb < new_wb) {
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 382) spin_lock(&old_wb->list_lock);
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 383) spin_lock_nested(&new_wb->list_lock, SINGLE_DEPTH_NESTING);
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 384) } else {
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 385) spin_lock(&new_wb->list_lock);
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 386) spin_lock_nested(&old_wb->list_lock, SINGLE_DEPTH_NESTING);
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 387) }
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 388) spin_lock(&inode->i_lock);
b93b016313b3b (Matthew Wilcox 2018-04-10 16:36:56 -0700 389) xa_lock_irq(&mapping->i_pages);
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 390)
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 391) /*
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 392) * Once I_FREEING is visible under i_lock, the eviction path owns
c7f5408493aeb (Dave Chinner 2015-03-04 14:07:22 -0500 393) * the inode and we shouldn't modify ->i_io_list.
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 394) */
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 395) if (unlikely(inode->i_state & I_FREEING))
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 396) goto skip_switch;
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 397)
3a8e9ac89e6a5 (Tejun Heo 2019-08-29 15:47:19 -0700 398) trace_inode_switch_wbs(inode, old_wb, new_wb);
3a8e9ac89e6a5 (Tejun Heo 2019-08-29 15:47:19 -0700 399)
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 400) /*
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 401) * Count and transfer stats. Note that PAGECACHE_TAG_DIRTY points
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 402) * to possibly dirty pages while PAGECACHE_TAG_WRITEBACK points to
b93b016313b3b (Matthew Wilcox 2018-04-10 16:36:56 -0700 403) * pages actually under writeback.
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 404) */
04edf02cdd37a (Matthew Wilcox 2017-12-04 10:46:23 -0500 405) xas_for_each_marked(&xas, page, ULONG_MAX, PAGECACHE_TAG_DIRTY) {
04edf02cdd37a (Matthew Wilcox 2017-12-04 10:46:23 -0500 406) if (PageDirty(page)) {
3e8f399da490e (Nikolay Borisov 2017-07-12 14:37:51 -0700 407) dec_wb_stat(old_wb, WB_RECLAIMABLE);
3e8f399da490e (Nikolay Borisov 2017-07-12 14:37:51 -0700 408) inc_wb_stat(new_wb, WB_RECLAIMABLE);
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 409) }
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 410) }
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 411)
04edf02cdd37a (Matthew Wilcox 2017-12-04 10:46:23 -0500 412) xas_set(&xas, 0);
04edf02cdd37a (Matthew Wilcox 2017-12-04 10:46:23 -0500 413) xas_for_each_marked(&xas, page, ULONG_MAX, PAGECACHE_TAG_WRITEBACK) {
04edf02cdd37a (Matthew Wilcox 2017-12-04 10:46:23 -0500 414) WARN_ON_ONCE(!PageWriteback(page));
04edf02cdd37a (Matthew Wilcox 2017-12-04 10:46:23 -0500 415) dec_wb_stat(old_wb, WB_WRITEBACK);
04edf02cdd37a (Matthew Wilcox 2017-12-04 10:46:23 -0500 416) inc_wb_stat(new_wb, WB_WRITEBACK);
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 417) }
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 418)
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 419) wb_get(new_wb);
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 420)
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 421) /*
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 422) * Transfer to @new_wb's IO list if necessary. The specific list
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 423) * @inode was on is ignored and the inode is put on ->b_dirty which
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 424) * is always correct including from ->b_dirty_time. The transfer
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 425) * preserves @inode->dirtied_when ordering.
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 426) */
c7f5408493aeb (Dave Chinner 2015-03-04 14:07:22 -0500 427) if (!list_empty(&inode->i_io_list)) {
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 428) struct inode *pos;
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 429)
c7f5408493aeb (Dave Chinner 2015-03-04 14:07:22 -0500 430) inode_io_list_del_locked(inode, old_wb);
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 431) inode->i_wb = new_wb;
c7f5408493aeb (Dave Chinner 2015-03-04 14:07:22 -0500 432) list_for_each_entry(pos, &new_wb->b_dirty, i_io_list)
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 433) if (time_after_eq(inode->dirtied_when,
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 434) pos->dirtied_when))
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 435) break;
c7f5408493aeb (Dave Chinner 2015-03-04 14:07:22 -0500 436) inode_io_list_move_locked(inode, new_wb, pos->i_io_list.prev);
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 437) } else {
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 438) inode->i_wb = new_wb;
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 439) }
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 440)
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 441) /* ->i_wb_frn updates may race wbc_detach_inode() but doesn't matter */
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 442) inode->i_wb_frn_winner = 0;
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 443) inode->i_wb_frn_avg_time = 0;
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 444) inode->i_wb_frn_history = 0;
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 445) switched = true;
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 446) skip_switch:
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 447) /*
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 448) * Paired with load_acquire in unlocked_inode_to_wb_begin() and
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 449) * ensures that the new wb is visible if they see !I_WB_SWITCH.
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 450) */
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 451) smp_store_release(&inode->i_state, inode->i_state & ~I_WB_SWITCH);
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 452)
b93b016313b3b (Matthew Wilcox 2018-04-10 16:36:56 -0700 453) xa_unlock_irq(&mapping->i_pages);
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 454) spin_unlock(&inode->i_lock);
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 455) spin_unlock(&new_wb->list_lock);
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 456) spin_unlock(&old_wb->list_lock);
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 457)
7fc5854f8c6ef (Tejun Heo 2017-12-12 08:38:30 -0800 458) up_read(&bdi->wb_switch_rwsem);
7fc5854f8c6ef (Tejun Heo 2017-12-12 08:38:30 -0800 459)
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 460) if (switched) {
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 461) wb_wakeup(new_wb);
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 462) wb_put(old_wb);
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 463) }
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 464) wb_put(new_wb);
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 465)
d10c809552659 (Tejun Heo 2015-05-28 14:50:56 -0400 466) iput(inode);
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 467) kfree(isw);
a1a0e23e49037 (Tejun Heo 2016-02-29 18:28:53 -0500 468)
a1a0e23e49037 (Tejun Heo 2016-02-29 18:28:53 -0500 469) atomic_dec(&isw_nr_in_flight);
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 470) }
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 471)
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 472) static void inode_switch_wbs_rcu_fn(struct rcu_head *rcu_head)
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 473) {
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 474) struct inode_switch_wbs_context *isw = container_of(rcu_head,
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 475) struct inode_switch_wbs_context, rcu_head);
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 476)
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 477) /* needs to grab bh-unsafe locks, bounce to work item */
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 478) INIT_WORK(&isw->work, inode_switch_wbs_work_fn);
a1a0e23e49037 (Tejun Heo 2016-02-29 18:28:53 -0500 479) queue_work(isw_wq, &isw->work);
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 480) }
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 481)
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 482) /**
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 483) * inode_switch_wbs - change the wb association of an inode
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 484) * @inode: target inode
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 485) * @new_wb_id: ID of the new wb
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 486) *
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 487) * Switch @inode's wb association to the wb identified by @new_wb_id. The
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 488) * switching is performed asynchronously and may fail silently.
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 489) */
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 490) static void inode_switch_wbs(struct inode *inode, int new_wb_id)
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 491) {
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 492) struct backing_dev_info *bdi = inode_to_bdi(inode);
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 493) struct cgroup_subsys_state *memcg_css;
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 494) struct inode_switch_wbs_context *isw;
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 495)
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 496) /* noop if seems to be already in progress */
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 497) if (inode->i_state & I_WB_SWITCH)
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 498) return;
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 499)
6444f47eb8678 (Tejun Heo 2019-08-02 12:08:13 -0700 500) /* avoid queueing a new switch if too many are already in flight */
6444f47eb8678 (Tejun Heo 2019-08-02 12:08:13 -0700 501) if (atomic_read(&isw_nr_in_flight) > WB_FRN_MAX_IN_FLIGHT)
7fc5854f8c6ef (Tejun Heo 2017-12-12 08:38:30 -0800 502) return;
7fc5854f8c6ef (Tejun Heo 2017-12-12 08:38:30 -0800 503)
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 504) isw = kzalloc(sizeof(*isw), GFP_ATOMIC);
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 505) if (!isw)
6444f47eb8678 (Tejun Heo 2019-08-02 12:08:13 -0700 506) return;
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 507)
3921b835fbaec (Roman Gushchin 2021-06-28 19:35:47 -0700 508) atomic_inc(&isw_nr_in_flight);
3921b835fbaec (Roman Gushchin 2021-06-28 19:35:47 -0700 509)
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 510) /* find and pin the new wb */
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 511) rcu_read_lock();
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 512) memcg_css = css_from_id(new_wb_id, &memory_cgrp_subsys);
cae2f265c5a94 (Muchun Song 2021-04-02 17:11:45 +0800 513) if (memcg_css && !css_tryget(memcg_css))
cae2f265c5a94 (Muchun Song 2021-04-02 17:11:45 +0800 514) memcg_css = NULL;
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 515) rcu_read_unlock();
cae2f265c5a94 (Muchun Song 2021-04-02 17:11:45 +0800 516) if (!memcg_css)
cae2f265c5a94 (Muchun Song 2021-04-02 17:11:45 +0800 517) goto out_free;
cae2f265c5a94 (Muchun Song 2021-04-02 17:11:45 +0800 518)
cae2f265c5a94 (Muchun Song 2021-04-02 17:11:45 +0800 519) isw->new_wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC);
cae2f265c5a94 (Muchun Song 2021-04-02 17:11:45 +0800 520) css_put(memcg_css);
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 521) if (!isw->new_wb)
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 522) goto out_free;
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 523)
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 524) /* while holding I_WB_SWITCH, no one else can update the association */
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 525) spin_lock(&inode->i_lock);
1751e8a6cb935 (Linus Torvalds 2017-11-27 13:05:09 -0800 526) if (!(inode->i_sb->s_flags & SB_ACTIVE) ||
a1a0e23e49037 (Tejun Heo 2016-02-29 18:28:53 -0500 527) inode->i_state & (I_WB_SWITCH | I_FREEING) ||
a1a0e23e49037 (Tejun Heo 2016-02-29 18:28:53 -0500 528) inode_to_wb(inode) == isw->new_wb) {
a1a0e23e49037 (Tejun Heo 2016-02-29 18:28:53 -0500 529) spin_unlock(&inode->i_lock);
a1a0e23e49037 (Tejun Heo 2016-02-29 18:28:53 -0500 530) goto out_free;
a1a0e23e49037 (Tejun Heo 2016-02-29 18:28:53 -0500 531) }
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 532) inode->i_state |= I_WB_SWITCH;
7452495555609 (Tahsin Erdogan 2016-06-16 05:15:33 -0700 533) __iget(inode);
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 534) spin_unlock(&inode->i_lock);
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 535)
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 536) isw->inode = inode;
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 537)
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 538) /*
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 539) * In addition to synchronizing among switchers, I_WB_SWITCH tells
b93b016313b3b (Matthew Wilcox 2018-04-10 16:36:56 -0700 540) * the RCU protected stat update paths to grab the i_page
b93b016313b3b (Matthew Wilcox 2018-04-10 16:36:56 -0700 541) * lock so that stat transfer can synchronize against them.
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 542) * Let's continue after I_WB_SWITCH is guaranteed to be visible.
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 543) */
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 544) call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn);
6444f47eb8678 (Tejun Heo 2019-08-02 12:08:13 -0700 545) return;
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 546)
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 547) out_free:
3921b835fbaec (Roman Gushchin 2021-06-28 19:35:47 -0700 548) atomic_dec(&isw_nr_in_flight);
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 549) if (isw->new_wb)
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 550) wb_put(isw->new_wb);
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 551) kfree(isw);
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 552) }
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 553)
b16b1deb553ad (Tejun Heo 2015-06-02 08:39:48 -0600 554) /**
b16b1deb553ad (Tejun Heo 2015-06-02 08:39:48 -0600 555) * wbc_attach_and_unlock_inode - associate wbc with target inode and unlock it
b16b1deb553ad (Tejun Heo 2015-06-02 08:39:48 -0600 556) * @wbc: writeback_control of interest
b16b1deb553ad (Tejun Heo 2015-06-02 08:39:48 -0600 557) * @inode: target inode
b16b1deb553ad (Tejun Heo 2015-06-02 08:39:48 -0600 558) *
b16b1deb553ad (Tejun Heo 2015-06-02 08:39:48 -0600 559) * @inode is locked and about to be written back under the control of @wbc.
b16b1deb553ad (Tejun Heo 2015-06-02 08:39:48 -0600 560) * Record @inode's writeback context into @wbc and unlock the i_lock. On
b16b1deb553ad (Tejun Heo 2015-06-02 08:39:48 -0600 561) * writeback completion, wbc_detach_inode() should be called. This is used
b16b1deb553ad (Tejun Heo 2015-06-02 08:39:48 -0600 562) * to track the cgroup writeback context.
b16b1deb553ad (Tejun Heo 2015-06-02 08:39:48 -0600 563) */
b16b1deb553ad (Tejun Heo 2015-06-02 08:39:48 -0600 564) void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
b16b1deb553ad (Tejun Heo 2015-06-02 08:39:48 -0600 565) struct inode *inode)
b16b1deb553ad (Tejun Heo 2015-06-02 08:39:48 -0600 566) {
dd73e4b7df958 (Tejun Heo 2015-06-16 18:48:30 -0400 567) if (!inode_cgwb_enabled(inode)) {
dd73e4b7df958 (Tejun Heo 2015-06-16 18:48:30 -0400 568) spin_unlock(&inode->i_lock);
dd73e4b7df958 (Tejun Heo 2015-06-16 18:48:30 -0400 569) return;
dd73e4b7df958 (Tejun Heo 2015-06-16 18:48:30 -0400 570) }
dd73e4b7df958 (Tejun Heo 2015-06-16 18:48:30 -0400 571)
b16b1deb553ad (Tejun Heo 2015-06-02 08:39:48 -0600 572) wbc->wb = inode_to_wb(inode);
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 573) wbc->inode = inode;
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 574)
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 575) wbc->wb_id = wbc->wb->memcg_css->id;
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 576) wbc->wb_lcand_id = inode->i_wb_frn_winner;
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 577) wbc->wb_tcand_id = 0;
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 578) wbc->wb_bytes = 0;
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 579) wbc->wb_lcand_bytes = 0;
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 580) wbc->wb_tcand_bytes = 0;
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 581)
b16b1deb553ad (Tejun Heo 2015-06-02 08:39:48 -0600 582) wb_get(wbc->wb);
b16b1deb553ad (Tejun Heo 2015-06-02 08:39:48 -0600 583) spin_unlock(&inode->i_lock);
e8a7abf5a5bd3 (Tejun Heo 2015-05-28 14:50:57 -0400 584)
e8a7abf5a5bd3 (Tejun Heo 2015-05-28 14:50:57 -0400 585) /*
65de03e251382 (Tejun Heo 2019-11-08 12:18:29 -0800 586) * A dying wb indicates that either the blkcg associated with the
65de03e251382 (Tejun Heo 2019-11-08 12:18:29 -0800 587) * memcg changed or the associated memcg is dying. In the first
65de03e251382 (Tejun Heo 2019-11-08 12:18:29 -0800 588) * case, a replacement wb should already be available and we should
65de03e251382 (Tejun Heo 2019-11-08 12:18:29 -0800 589) * refresh the wb immediately. In the second case, trying to
65de03e251382 (Tejun Heo 2019-11-08 12:18:29 -0800 590) * refresh will keep failing.
e8a7abf5a5bd3 (Tejun Heo 2015-05-28 14:50:57 -0400 591) */
65de03e251382 (Tejun Heo 2019-11-08 12:18:29 -0800 592) if (unlikely(wb_dying(wbc->wb) && !css_is_dying(wbc->wb->memcg_css)))
e8a7abf5a5bd3 (Tejun Heo 2015-05-28 14:50:57 -0400 593) inode_switch_wbs(inode, wbc->wb_id);
b16b1deb553ad (Tejun Heo 2015-06-02 08:39:48 -0600 594) }
9b0eb69b75bcc (Tejun Heo 2019-06-27 13:39:48 -0700 595) EXPORT_SYMBOL_GPL(wbc_attach_and_unlock_inode);
b16b1deb553ad (Tejun Heo 2015-06-02 08:39:48 -0600 596)
b16b1deb553ad (Tejun Heo 2015-06-02 08:39:48 -0600 597) /**
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 598) * wbc_detach_inode - disassociate wbc from inode and perform foreign detection
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 599) * @wbc: writeback_control of the just finished writeback
b16b1deb553ad (Tejun Heo 2015-06-02 08:39:48 -0600 600) *
b16b1deb553ad (Tejun Heo 2015-06-02 08:39:48 -0600 601) * To be called after a writeback attempt of an inode finishes and undoes
b16b1deb553ad (Tejun Heo 2015-06-02 08:39:48 -0600 602) * wbc_attach_and_unlock_inode(). Can be called under any context.
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 603) *
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 604) * As concurrent write sharing of an inode is expected to be very rare and
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 605) * memcg only tracks page ownership on first-use basis severely confining
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 606) * the usefulness of such sharing, cgroup writeback tracks ownership
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 607) * per-inode. While the support for concurrent write sharing of an inode
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 608) * is deemed unnecessary, an inode being written to by different cgroups at
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 609) * different points in time is a lot more common, and, more importantly,
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 610) * charging only by first-use can too readily lead to grossly incorrect
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 611) * behaviors (single foreign page can lead to gigabytes of writeback to be
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 612) * incorrectly attributed).
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 613) *
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 614) * To resolve this issue, cgroup writeback detects the majority dirtier of
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 615) * an inode and transfers the ownership to it. To avoid unnnecessary
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 616) * oscillation, the detection mechanism keeps track of history and gives
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 617) * out the switch verdict only if the foreign usage pattern is stable over
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 618) * a certain amount of time and/or writeback attempts.
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 619) *
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 620) * On each writeback attempt, @wbc tries to detect the majority writer
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 621) * using Boyer-Moore majority vote algorithm. In addition to the byte
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 622) * count from the majority voting, it also counts the bytes written for the
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 623) * current wb and the last round's winner wb (max of last round's current
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 624) * wb, the winner from two rounds ago, and the last round's majority
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 625) * candidate). Keeping track of the historical winner helps the algorithm
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 626) * to semi-reliably detect the most active writer even when it's not the
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 627) * absolute majority.
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 628) *
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 629) * Once the winner of the round is determined, whether the winner is
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 630) * foreign or not and how much IO time the round consumed is recorded in
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 631) * inode->i_wb_frn_history. If the amount of recorded foreign IO time is
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 632) * over a certain threshold, the switch verdict is given.
b16b1deb553ad (Tejun Heo 2015-06-02 08:39:48 -0600 633) */
b16b1deb553ad (Tejun Heo 2015-06-02 08:39:48 -0600 634) void wbc_detach_inode(struct writeback_control *wbc)
b16b1deb553ad (Tejun Heo 2015-06-02 08:39:48 -0600 635) {
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 636) struct bdi_writeback *wb = wbc->wb;
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 637) struct inode *inode = wbc->inode;
dd73e4b7df958 (Tejun Heo 2015-06-16 18:48:30 -0400 638) unsigned long avg_time, max_bytes, max_time;
dd73e4b7df958 (Tejun Heo 2015-06-16 18:48:30 -0400 639) u16 history;
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 640) int max_id;
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 641)
dd73e4b7df958 (Tejun Heo 2015-06-16 18:48:30 -0400 642) if (!wb)
dd73e4b7df958 (Tejun Heo 2015-06-16 18:48:30 -0400 643) return;
dd73e4b7df958 (Tejun Heo 2015-06-16 18:48:30 -0400 644)
dd73e4b7df958 (Tejun Heo 2015-06-16 18:48:30 -0400 645) history = inode->i_wb_frn_history;
dd73e4b7df958 (Tejun Heo 2015-06-16 18:48:30 -0400 646) avg_time = inode->i_wb_frn_avg_time;
dd73e4b7df958 (Tejun Heo 2015-06-16 18:48:30 -0400 647)
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 648) /* pick the winner of this round */
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 649) if (wbc->wb_bytes >= wbc->wb_lcand_bytes &&
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 650) wbc->wb_bytes >= wbc->wb_tcand_bytes) {
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 651) max_id = wbc->wb_id;
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 652) max_bytes = wbc->wb_bytes;
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 653) } else if (wbc->wb_lcand_bytes >= wbc->wb_tcand_bytes) {
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 654) max_id = wbc->wb_lcand_id;
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 655) max_bytes = wbc->wb_lcand_bytes;
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 656) } else {
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 657) max_id = wbc->wb_tcand_id;
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 658) max_bytes = wbc->wb_tcand_bytes;
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 659) }
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 660)
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 661) /*
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 662) * Calculate the amount of IO time the winner consumed and fold it
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 663) * into the running average kept per inode. If the consumed IO
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 664) * time is lower than avag / WB_FRN_TIME_CUT_DIV, ignore it for
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 665) * deciding whether to switch or not. This is to prevent one-off
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 666) * small dirtiers from skewing the verdict.
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 667) */
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 668) max_time = DIV_ROUND_UP((max_bytes >> PAGE_SHIFT) << WB_FRN_TIME_SHIFT,
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 669) wb->avg_write_bandwidth);
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 670) if (avg_time)
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 671) avg_time += (max_time >> WB_FRN_TIME_AVG_SHIFT) -
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 672) (avg_time >> WB_FRN_TIME_AVG_SHIFT);
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 673) else
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 674) avg_time = max_time; /* immediate catch up on first run */
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 675)
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 676) if (max_time >= avg_time / WB_FRN_TIME_CUT_DIV) {
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 677) int slots;
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 678)
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 679) /*
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 680) * The switch verdict is reached if foreign wb's consume
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 681) * more than a certain proportion of IO time in a
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 682) * WB_FRN_TIME_PERIOD. This is loosely tracked by 16 slot
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 683) * history mask where each bit represents one sixteenth of
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 684) * the period. Determine the number of slots to shift into
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 685) * history from @max_time.
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 686) */
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 687) slots = min(DIV_ROUND_UP(max_time, WB_FRN_HIST_UNIT),
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 688) (unsigned long)WB_FRN_HIST_MAX_SLOTS);
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 689) history <<= slots;
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 690) if (wbc->wb_id != max_id)
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 691) history |= (1U << slots) - 1;
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 692)
3a8e9ac89e6a5 (Tejun Heo 2019-08-29 15:47:19 -0700 693) if (history)
3a8e9ac89e6a5 (Tejun Heo 2019-08-29 15:47:19 -0700 694) trace_inode_foreign_history(inode, wbc, history);
3a8e9ac89e6a5 (Tejun Heo 2019-08-29 15:47:19 -0700 695)
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 696) /*
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 697) * Switch if the current wb isn't the consistent winner.
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 698) * If there are multiple closely competing dirtiers, the
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 699) * inode may switch across them repeatedly over time, which
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 700) * is okay. The main goal is avoiding keeping an inode on
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 701) * the wrong wb for an extended period of time.
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 702) */
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 703) if (hweight32(history) > WB_FRN_HIST_THR_SLOTS)
682aa8e1a6a15 (Tejun Heo 2015-05-28 14:50:53 -0400 704) inode_switch_wbs(inode, max_id);
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 705) }
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 706)
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 707) /*
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 708) * Multiple instances of this function may race to update the
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 709) * following fields but we don't mind occassional inaccuracies.
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 710) */
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 711) inode->i_wb_frn_winner = max_id;
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 712) inode->i_wb_frn_avg_time = min(avg_time, (unsigned long)U16_MAX);
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 713) inode->i_wb_frn_history = history;
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 714)
b16b1deb553ad (Tejun Heo 2015-06-02 08:39:48 -0600 715) wb_put(wbc->wb);
b16b1deb553ad (Tejun Heo 2015-06-02 08:39:48 -0600 716) wbc->wb = NULL;
b16b1deb553ad (Tejun Heo 2015-06-02 08:39:48 -0600 717) }
9b0eb69b75bcc (Tejun Heo 2019-06-27 13:39:48 -0700 718) EXPORT_SYMBOL_GPL(wbc_detach_inode);
b16b1deb553ad (Tejun Heo 2015-06-02 08:39:48 -0600 719)
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 720) /**
34e51a5e1a6e9 (Tejun Heo 2019-06-27 13:39:49 -0700 721) * wbc_account_cgroup_owner - account writeback to update inode cgroup ownership
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 722) * @wbc: writeback_control of the writeback in progress
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 723) * @page: page being written out
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 724) * @bytes: number of bytes being written out
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 725) *
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 726) * @bytes from @page are about to written out during the writeback
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 727) * controlled by @wbc. Keep the book for foreign inode detection. See
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 728) * wbc_detach_inode().
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 729) */
34e51a5e1a6e9 (Tejun Heo 2019-06-27 13:39:49 -0700 730) void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page,
34e51a5e1a6e9 (Tejun Heo 2019-06-27 13:39:49 -0700 731) size_t bytes)
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 732) {
6631142229005 (Tejun Heo 2019-06-13 15:30:41 -0700 733) struct cgroup_subsys_state *css;
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 734) int id;
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 735)
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 736) /*
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 737) * pageout() path doesn't attach @wbc to the inode being written
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 738) * out. This is intentional as we don't want the function to block
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 739) * behind a slow cgroup. Ultimately, we want pageout() to kick off
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 740) * regular writeback instead of writing things out itself.
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 741) */
27b36d8fa81fa (Tejun Heo 2019-06-27 13:39:50 -0700 742) if (!wbc->wb || wbc->no_cgroup_owner)
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 743) return;
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 744)
6631142229005 (Tejun Heo 2019-06-13 15:30:41 -0700 745) css = mem_cgroup_css_from_page(page);
6631142229005 (Tejun Heo 2019-06-13 15:30:41 -0700 746) /* dead cgroups shouldn't contribute to inode ownership arbitration */
6631142229005 (Tejun Heo 2019-06-13 15:30:41 -0700 747) if (!(css->flags & CSS_ONLINE))
6631142229005 (Tejun Heo 2019-06-13 15:30:41 -0700 748) return;
6631142229005 (Tejun Heo 2019-06-13 15:30:41 -0700 749)
6631142229005 (Tejun Heo 2019-06-13 15:30:41 -0700 750) id = css->id;
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 751)
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 752) if (id == wbc->wb_id) {
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 753) wbc->wb_bytes += bytes;
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 754) return;
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 755) }
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 756)
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 757) if (id == wbc->wb_lcand_id)
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 758) wbc->wb_lcand_bytes += bytes;
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 759)
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 760) /* Boyer-Moore majority vote algorithm */
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 761) if (!wbc->wb_tcand_bytes)
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 762) wbc->wb_tcand_id = id;
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 763) if (id == wbc->wb_tcand_id)
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 764) wbc->wb_tcand_bytes += bytes;
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 765) else
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 766) wbc->wb_tcand_bytes -= min(bytes, wbc->wb_tcand_bytes);
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 767) }
34e51a5e1a6e9 (Tejun Heo 2019-06-27 13:39:49 -0700 768) EXPORT_SYMBOL_GPL(wbc_account_cgroup_owner);
2a81490811d02 (Tejun Heo 2015-05-28 14:50:51 -0400 769)
703c270887bb5 (Tejun Heo 2015-05-22 17:13:44 -0400 770) /**
703c270887bb5 (Tejun Heo 2015-05-22 17:13:44 -0400 771) * inode_congested - test whether an inode is congested
60292bcc1b240 (Tejun Heo 2015-08-18 14:54:54 -0700 772) * @inode: inode to test for congestion (may be NULL)
703c270887bb5 (Tejun Heo 2015-05-22 17:13:44 -0400 773) * @cong_bits: mask of WB_[a]sync_congested bits to test
703c270887bb5 (Tejun Heo 2015-05-22 17:13:44 -0400 774) *
703c270887bb5 (Tejun Heo 2015-05-22 17:13:44 -0400 775) * Tests whether @inode is congested. @cong_bits is the mask of congestion
703c270887bb5 (Tejun Heo 2015-05-22 17:13:44 -0400 776) * bits to test and the return value is the mask of set bits.
703c270887bb5 (Tejun Heo 2015-05-22 17:13:44 -0400 777) *
703c270887bb5 (Tejun Heo 2015-05-22 17:13:44 -0400 778) * If cgroup writeback is enabled for @inode, the congestion state is
703c270887bb5 (Tejun Heo 2015-05-22 17:13:44 -0400 779) * determined by whether the cgwb (cgroup bdi_writeback) for the blkcg
703c270887bb5 (Tejun Heo 2015-05-22 17:13:44 -0400 780) * associated with @inode is congested; otherwise, the root wb's congestion
703c270887bb5 (Tejun Heo 2015-05-22 17:13:44 -0400 781) * state is used.
60292bcc1b240 (Tejun Heo 2015-08-18 14:54:54 -0700 782) *
60292bcc1b240 (Tejun Heo 2015-08-18 14:54:54 -0700 783) * @inode is allowed to be NULL as this function is often called on
60292bcc1b240 (Tejun Heo 2015-08-18 14:54:54 -0700 784) * mapping->host which is NULL for the swapper space.
703c270887bb5 (Tejun Heo 2015-05-22 17:13:44 -0400 785) */
703c270887bb5 (Tejun Heo 2015-05-22 17:13:44 -0400 786) int inode_congested(struct inode *inode, int cong_bits)
703c270887bb5 (Tejun Heo 2015-05-22 17:13:44 -0400 787) {
5cb8b8241e614 (Tejun Heo 2015-05-28 14:50:54 -0400 788) /*
5cb8b8241e614 (Tejun Heo 2015-05-28 14:50:54 -0400 789) * Once set, ->i_wb never becomes NULL while the inode is alive.
5cb8b8241e614 (Tejun Heo 2015-05-28 14:50:54 -0400 790) * Start transaction iff ->i_wb is visible.
5cb8b8241e614 (Tejun Heo 2015-05-28 14:50:54 -0400 791) */
aaa2cacf8184e (Tejun Heo 2015-05-28 14:50:55 -0400 792) if (inode && inode_to_wb_is_valid(inode)) {
5cb8b8241e614 (Tejun Heo 2015-05-28 14:50:54 -0400 793) struct bdi_writeback *wb;
2e898e4c0a389 (Greg Thelen 2018-04-20 14:55:42 -0700 794) struct wb_lock_cookie lock_cookie = {};
2e898e4c0a389 (Greg Thelen 2018-04-20 14:55:42 -0700 795) bool congested;
5cb8b8241e614 (Tejun Heo 2015-05-28 14:50:54 -0400 796)
2e898e4c0a389 (Greg Thelen 2018-04-20 14:55:42 -0700 797) wb = unlocked_inode_to_wb_begin(inode, &lock_cookie);
5cb8b8241e614 (Tejun Heo 2015-05-28 14:50:54 -0400 798) congested = wb_congested(wb, cong_bits);
2e898e4c0a389 (Greg Thelen 2018-04-20 14:55:42 -0700 799) unlocked_inode_to_wb_end(inode, &lock_cookie);
5cb8b8241e614 (Tejun Heo 2015-05-28 14:50:54 -0400 800) return congested;
703c270887bb5 (Tejun Heo 2015-05-22 17:13:44 -0400 801) }
703c270887bb5 (Tejun Heo 2015-05-22 17:13:44 -0400 802)
703c270887bb5 (Tejun Heo 2015-05-22 17:13:44 -0400 803) return wb_congested(&inode_to_bdi(inode)->wb, cong_bits);
703c270887bb5 (Tejun Heo 2015-05-22 17:13:44 -0400 804) }
703c270887bb5 (Tejun Heo 2015-05-22 17:13:44 -0400 805) EXPORT_SYMBOL_GPL(inode_congested);
703c270887bb5 (Tejun Heo 2015-05-22 17:13:44 -0400 806)
f2b6512160763 (Tejun Heo 2015-05-22 17:13:55 -0400 807) /**
f2b6512160763 (Tejun Heo 2015-05-22 17:13:55 -0400 808) * wb_split_bdi_pages - split nr_pages to write according to bandwidth
f2b6512160763 (Tejun Heo 2015-05-22 17:13:55 -0400 809) * @wb: target bdi_writeback to split @nr_pages to
f2b6512160763 (Tejun Heo 2015-05-22 17:13:55 -0400 810) * @nr_pages: number of pages to write for the whole bdi
f2b6512160763 (Tejun Heo 2015-05-22 17:13:55 -0400 811) *
f2b6512160763 (Tejun Heo 2015-05-22 17:13:55 -0400 812) * Split @wb's portion of @nr_pages according to @wb's write bandwidth in
f2b6512160763 (Tejun Heo 2015-05-22 17:13:55 -0400 813) * relation to the total write bandwidth of all wb's w/ dirty inodes on
f2b6512160763 (Tejun Heo 2015-05-22 17:13:55 -0400 814) * @wb->bdi.
f2b6512160763 (Tejun Heo 2015-05-22 17:13:55 -0400 815) */
f2b6512160763 (Tejun Heo 2015-05-22 17:13:55 -0400 816) static long wb_split_bdi_pages(struct bdi_writeback *wb, long nr_pages)
f2b6512160763 (Tejun Heo 2015-05-22 17:13:55 -0400 817) {
f2b6512160763 (Tejun Heo 2015-05-22 17:13:55 -0400 818) unsigned long this_bw = wb->avg_write_bandwidth;
f2b6512160763 (Tejun Heo 2015-05-22 17:13:55 -0400 819) unsigned long tot_bw = atomic_long_read(&wb->bdi->tot_write_bandwidth);
f2b6512160763 (Tejun Heo 2015-05-22 17:13:55 -0400 820)
f2b6512160763 (Tejun Heo 2015-05-22 17:13:55 -0400 821) if (nr_pages == LONG_MAX)
f2b6512160763 (Tejun Heo 2015-05-22 17:13:55 -0400 822) return LONG_MAX;
f2b6512160763 (Tejun Heo 2015-05-22 17:13:55 -0400 823)
f2b6512160763 (Tejun Heo 2015-05-22 17:13:55 -0400 824) /*
f2b6512160763 (Tejun Heo 2015-05-22 17:13:55 -0400 825) * This may be called on clean wb's and proportional distribution
f2b6512160763 (Tejun Heo 2015-05-22 17:13:55 -0400 826) * may not make sense, just use the original @nr_pages in those
f2b6512160763 (Tejun Heo 2015-05-22 17:13:55 -0400 827) * cases. In general, we wanna err on the side of writing more.
f2b6512160763 (Tejun Heo 2015-05-22 17:13:55 -0400 828) */
f2b6512160763 (Tejun Heo 2015-05-22 17:13:55 -0400 829) if (!tot_bw || this_bw >= tot_bw)
f2b6512160763 (Tejun Heo 2015-05-22 17:13:55 -0400 830) return nr_pages;
f2b6512160763 (Tejun Heo 2015-05-22 17:13:55 -0400 831) else
f2b6512160763 (Tejun Heo 2015-05-22 17:13:55 -0400 832) return DIV_ROUND_UP_ULL((u64)nr_pages * this_bw, tot_bw);
f2b6512160763 (Tejun Heo 2015-05-22 17:13:55 -0400 833) }
f2b6512160763 (Tejun Heo 2015-05-22 17:13:55 -0400 834)
db125360409fc (Tejun Heo 2015-05-22 17:14:01 -0400 835) /**
db125360409fc (Tejun Heo 2015-05-22 17:14:01 -0400 836) * bdi_split_work_to_wbs - split a wb_writeback_work to all wb's of a bdi
db125360409fc (Tejun Heo 2015-05-22 17:14:01 -0400 837) * @bdi: target backing_dev_info
db125360409fc (Tejun Heo 2015-05-22 17:14:01 -0400 838) * @base_work: wb_writeback_work to issue
db125360409fc (Tejun Heo 2015-05-22 17:14:01 -0400 839) * @skip_if_busy: skip wb's which already have writeback in progress
db125360409fc (Tejun Heo 2015-05-22 17:14:01 -0400 840) *
db125360409fc (Tejun Heo 2015-05-22 17:14:01 -0400 841) * Split and issue @base_work to all wb's (bdi_writeback's) of @bdi which
db125360409fc (Tejun Heo 2015-05-22 17:14:01 -0400 842) * have dirty inodes. If @base_work->nr_page isn't %LONG_MAX, it's
db125360409fc (Tejun Heo 2015-05-22 17:14:01 -0400 843) * distributed to the busy wbs according to each wb's proportion in the
db125360409fc (Tejun Heo 2015-05-22 17:14:01 -0400 844) * total active write bandwidth of @bdi.
db125360409fc (Tejun Heo 2015-05-22 17:14:01 -0400 845) */
db125360409fc (Tejun Heo 2015-05-22 17:14:01 -0400 846) static void bdi_split_work_to_wbs(struct backing_dev_info *bdi,
db125360409fc (Tejun Heo 2015-05-22 17:14:01 -0400 847) struct wb_writeback_work *base_work,
db125360409fc (Tejun Heo 2015-05-22 17:14:01 -0400 848) bool skip_if_busy)
db125360409fc (Tejun Heo 2015-05-22 17:14:01 -0400 849) {
b817525a4a80c (Tejun Heo 2015-10-02 14:47:05 -0400 850) struct bdi_writeback *last_wb = NULL;
b33e18f61bd18 (Tejun Heo 2015-10-27 14:19:39 +0900 851) struct bdi_writeback *wb = list_entry(&bdi->wb_list,
b33e18f61bd18 (Tejun Heo 2015-10-27 14:19:39 +0900 852) struct bdi_writeback, bdi_node);
db125360409fc (Tejun Heo 2015-05-22 17:14:01 -0400 853)
db125360409fc (Tejun Heo 2015-05-22 17:14:01 -0400 854) might_sleep();
db125360409fc (Tejun Heo 2015-05-22 17:14:01 -0400 855) restart:
db125360409fc (Tejun Heo 2015-05-22 17:14:01 -0400 856) rcu_read_lock();
b817525a4a80c (Tejun Heo 2015-10-02 14:47:05 -0400 857) list_for_each_entry_continue_rcu(wb, &bdi->wb_list, bdi_node) {
5b9cce4c7eb06 (Tejun Heo 2019-08-26 09:06:52 -0700 858) DEFINE_WB_COMPLETION(fallback_work_done, bdi);
8a1270cda7b47 (Tejun Heo 2015-08-18 14:54:53 -0700 859) struct wb_writeback_work fallback_work;
8a1270cda7b47 (Tejun Heo 2015-08-18 14:54:53 -0700 860) struct wb_writeback_work *work;
8a1270cda7b47 (Tejun Heo 2015-08-18 14:54:53 -0700 861) long nr_pages;
8a1270cda7b47 (Tejun Heo 2015-08-18 14:54:53 -0700 862)
b817525a4a80c (Tejun Heo 2015-10-02 14:47:05 -0400 863) if (last_wb) {
b817525a4a80c (Tejun Heo 2015-10-02 14:47:05 -0400 864) wb_put(last_wb);
b817525a4a80c (Tejun Heo 2015-10-02 14:47:05 -0400 865) last_wb = NULL;
b817525a4a80c (Tejun Heo 2015-10-02 14:47:05 -0400 866) }
b817525a4a80c (Tejun Heo 2015-10-02 14:47:05 -0400 867)
006a0973ed020 (Tejun Heo 2015-08-25 14:11:52 -0400 868) /* SYNC_ALL writes out I_DIRTY_TIME too */
006a0973ed020 (Tejun Heo 2015-08-25 14:11:52 -0400 869) if (!wb_has_dirty_io(wb) &&
006a0973ed020 (Tejun Heo 2015-08-25 14:11:52 -0400 870) (base_work->sync_mode == WB_SYNC_NONE ||
006a0973ed020 (Tejun Heo 2015-08-25 14:11:52 -0400 871) list_empty(&wb->b_dirty_time)))
006a0973ed020 (Tejun Heo 2015-08-25 14:11:52 -0400 872) continue;
006a0973ed020 (Tejun Heo 2015-08-25 14:11:52 -0400 873) if (skip_if_busy && writeback_in_progress(wb))
db125360409fc (Tejun Heo 2015-05-22 17:14:01 -0400 874) continue;
db125360409fc (Tejun Heo 2015-05-22 17:14:01 -0400 875)
8a1270cda7b47 (Tejun Heo 2015-08-18 14:54:53 -0700 876) nr_pages = wb_split_bdi_pages(wb, base_work->nr_pages);
8a1270cda7b47 (Tejun Heo 2015-08-18 14:54:53 -0700 877)
8a1270cda7b47 (Tejun Heo 2015-08-18 14:54:53 -0700 878) work = kmalloc(sizeof(*work), GFP_ATOMIC);
8a1270cda7b47 (Tejun Heo 2015-08-18 14:54:53 -0700 879) if (work) {
8a1270cda7b47 (Tejun Heo 2015-08-18 14:54:53 -0700 880) *work = *base_work;
8a1270cda7b47 (Tejun Heo 2015-08-18 14:54:53 -0700 881) work->nr_pages = nr_pages;
8a1270cda7b47 (Tejun Heo 2015-08-18 14:54:53 -0700 882) work->auto_free = 1;
8a1270cda7b47 (Tejun Heo 2015-08-18 14:54:53 -0700 883) wb_queue_work(wb, work);
8a1270cda7b47 (Tejun Heo 2015-08-18 14:54:53 -0700 884) continue;
db125360409fc (Tejun Heo 2015-05-22 17:14:01 -0400 885) }
8a1270cda7b47 (Tejun Heo 2015-08-18 14:54:53 -0700 886)
8a1270cda7b47 (Tejun Heo 2015-08-18 14:54:53 -0700 887) /* alloc failed, execute synchronously using on-stack fallback */
8a1270cda7b47 (Tejun Heo 2015-08-18 14:54:53 -0700 888) work = &fallback_work;
8a1270cda7b47 (Tejun Heo 2015-08-18 14:54:53 -0700 889) *work = *base_work;
8a1270cda7b47 (Tejun Heo 2015-08-18 14:54:53 -0700 890) work->nr_pages = nr_pages;
8a1270cda7b47 (Tejun Heo 2015-08-18 14:54:53 -0700 891) work->auto_free = 0;
8a1270cda7b47 (Tejun Heo 2015-08-18 14:54:53 -0700 892) work->done = &fallback_work_done;
8a1270cda7b47 (Tejun Heo 2015-08-18 14:54:53 -0700 893)
8a1270cda7b47 (Tejun Heo 2015-08-18 14:54:53 -0700 894) wb_queue_work(wb, work);
8a1270cda7b47 (Tejun Heo 2015-08-18 14:54:53 -0700 895)
b817525a4a80c (Tejun Heo 2015-10-02 14:47:05 -0400 896) /*
b817525a4a80c (Tejun Heo 2015-10-02 14:47:05 -0400 897) * Pin @wb so that it stays on @bdi->wb_list. This allows
b817525a4a80c (Tejun Heo 2015-10-02 14:47:05 -0400 898) * continuing iteration from @wb after dropping and
b817525a4a80c (Tejun Heo 2015-10-02 14:47:05 -0400 899) * regrabbing rcu read lock.
b817525a4a80c (Tejun Heo 2015-10-02 14:47:05 -0400 900) */
b817525a4a80c (Tejun Heo 2015-10-02 14:47:05 -0400 901) wb_get(wb);
b817525a4a80c (Tejun Heo 2015-10-02 14:47:05 -0400 902) last_wb = wb;
b817525a4a80c (Tejun Heo 2015-10-02 14:47:05 -0400 903)
8a1270cda7b47 (Tejun Heo 2015-08-18 14:54:53 -0700 904) rcu_read_unlock();
5b9cce4c7eb06 (Tejun Heo 2019-08-26 09:06:52 -0700 905) wb_wait_for_completion(&fallback_work_done);
8a1270cda7b47 (Tejun Heo 2015-08-18 14:54:53 -0700 906) goto restart;
db125360409fc (Tejun Heo 2015-05-22 17:14:01 -0400 907) }
db125360409fc (Tejun Heo 2015-05-22 17:14:01 -0400 908) rcu_read_unlock();
b817525a4a80c (Tejun Heo 2015-10-02 14:47:05 -0400 909)
b817525a4a80c (Tejun Heo 2015-10-02 14:47:05 -0400 910) if (last_wb)
b817525a4a80c (Tejun Heo 2015-10-02 14:47:05 -0400 911) wb_put(last_wb);
db125360409fc (Tejun Heo 2015-05-22 17:14:01 -0400 912) }
db125360409fc (Tejun Heo 2015-05-22 17:14:01 -0400 913)
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 914) /**
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 915) * cgroup_writeback_by_id - initiate cgroup writeback from bdi and memcg IDs
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 916) * @bdi_id: target bdi id
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 917) * @memcg_id: target memcg css id
b46ec1da5eb7d (Randy Dunlap 2019-10-14 14:12:17 -0700 918) * @nr: number of pages to write, 0 for best-effort dirty flushing
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 919) * @reason: reason why some writeback work initiated
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 920) * @done: target wb_completion
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 921) *
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 922) * Initiate flush of the bdi_writeback identified by @bdi_id and @memcg_id
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 923) * with the specified parameters.
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 924) */
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 925) int cgroup_writeback_by_id(u64 bdi_id, int memcg_id, unsigned long nr,
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 926) enum wb_reason reason, struct wb_completion *done)
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 927) {
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 928) struct backing_dev_info *bdi;
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 929) struct cgroup_subsys_state *memcg_css;
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 930) struct bdi_writeback *wb;
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 931) struct wb_writeback_work *work;
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 932) int ret;
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 933)
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 934) /* lookup bdi and memcg */
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 935) bdi = bdi_get_by_id(bdi_id);
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 936) if (!bdi)
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 937) return -ENOENT;
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 938)
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 939) rcu_read_lock();
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 940) memcg_css = css_from_id(memcg_id, &memory_cgrp_subsys);
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 941) if (memcg_css && !css_tryget(memcg_css))
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 942) memcg_css = NULL;
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 943) rcu_read_unlock();
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 944) if (!memcg_css) {
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 945) ret = -ENOENT;
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 946) goto out_bdi_put;
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 947) }
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 948)
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 949) /*
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 950) * And find the associated wb. If the wb isn't there already
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 951) * there's nothing to flush, don't create one.
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 952) */
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 953) wb = wb_get_lookup(bdi, memcg_css);
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 954) if (!wb) {
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 955) ret = -ENOENT;
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 956) goto out_css_put;
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 957) }
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 958)
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 959) /*
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 960) * If @nr is zero, the caller is attempting to write out most of
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 961) * the currently dirty pages. Let's take the current dirty page
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 962) * count and inflate it by 25% which should be large enough to
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 963) * flush out most dirty pages while avoiding getting livelocked by
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 964) * concurrent dirtiers.
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 965) */
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 966) if (!nr) {
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 967) unsigned long filepages, headroom, dirty, writeback;
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 968)
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 969) mem_cgroup_wb_stats(wb, &filepages, &headroom, &dirty,
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 970) &writeback);
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 971) nr = dirty * 10 / 8;
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 972) }
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 973)
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 974) /* issue the writeback work */
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 975) work = kzalloc(sizeof(*work), GFP_NOWAIT | __GFP_NOWARN);
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 976) if (work) {
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 977) work->nr_pages = nr;
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 978) work->sync_mode = WB_SYNC_NONE;
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 979) work->range_cyclic = 1;
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 980) work->reason = reason;
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 981) work->done = done;
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 982) work->auto_free = 1;
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 983) wb_queue_work(wb, work);
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 984) ret = 0;
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 985) } else {
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 986) ret = -ENOMEM;
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 987) }
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 988)
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 989) wb_put(wb);
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 990) out_css_put:
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 991) css_put(memcg_css);
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 992) out_bdi_put:
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 993) bdi_put(bdi);
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 994) return ret;
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 995) }
d62241c7a406f (Tejun Heo 2019-08-26 09:06:55 -0700 996)
a1a0e23e49037 (Tejun Heo 2016-02-29 18:28:53 -0500 997) /**
a1a0e23e49037 (Tejun Heo 2016-02-29 18:28:53 -0500 998) * cgroup_writeback_umount - flush inode wb switches for umount
a1a0e23e49037 (Tejun Heo 2016-02-29 18:28:53 -0500 999) *
a1a0e23e49037 (Tejun Heo 2016-02-29 18:28:53 -0500 1000) * This function is called when a super_block is about to be destroyed and
a1a0e23e49037 (Tejun Heo 2016-02-29 18:28:53 -0500 1001) * flushes in-flight inode wb switches. An inode wb switch goes through
a1a0e23e49037 (Tejun Heo 2016-02-29 18:28:53 -0500 1002) * RCU and then workqueue, so the two need to be flushed in order to ensure
a1a0e23e49037 (Tejun Heo 2016-02-29 18:28:53 -0500 1003) * that all previously scheduled switches are finished. As wb switches are
a1a0e23e49037 (Tejun Heo 2016-02-29 18:28:53 -0500 1004) * rare occurrences and synchronize_rcu() can take a while, perform
a1a0e23e49037 (Tejun Heo 2016-02-29 18:28:53 -0500 1005) * flushing iff wb switches are in flight.
a1a0e23e49037 (Tejun Heo 2016-02-29 18:28:53 -0500 1006) */
a1a0e23e49037 (Tejun Heo 2016-02-29 18:28:53 -0500 1007) void cgroup_writeback_umount(void)
a1a0e23e49037 (Tejun Heo 2016-02-29 18:28:53 -0500 1008) {
a1a0e23e49037 (Tejun Heo 2016-02-29 18:28:53 -0500 1009) if (atomic_read(&isw_nr_in_flight)) {
ec084de929e41 (Jiufei Xue 2019-05-17 14:31:44 -0700 1010) /*
ec084de929e41 (Jiufei Xue 2019-05-17 14:31:44 -0700 1011) * Use rcu_barrier() to wait for all pending callbacks to
ec084de929e41 (Jiufei Xue 2019-05-17 14:31:44 -0700 1012) * ensure that all in-flight wb switches are in the workqueue.
ec084de929e41 (Jiufei Xue 2019-05-17 14:31:44 -0700 1013) */
ec084de929e41 (Jiufei Xue 2019-05-17 14:31:44 -0700 1014) rcu_barrier();
a1a0e23e49037 (Tejun Heo 2016-02-29 18:28:53 -0500 1015) flush_workqueue(isw_wq);
a1a0e23e49037 (Tejun Heo 2016-02-29 18:28:53 -0500 1016) }
a1a0e23e49037 (Tejun Heo 2016-02-29 18:28:53 -0500 1017) }
a1a0e23e49037 (Tejun Heo 2016-02-29 18:28:53 -0500 1018)
a1a0e23e49037 (Tejun Heo 2016-02-29 18:28:53 -0500 1019) static int __init cgroup_writeback_init(void)
a1a0e23e49037 (Tejun Heo 2016-02-29 18:28:53 -0500 1020) {
a1a0e23e49037 (Tejun Heo 2016-02-29 18:28:53 -0500 1021) isw_wq = alloc_workqueue("inode_switch_wbs", 0, 0);
a1a0e23e49037 (Tejun Heo 2016-02-29 18:28:53 -0500 1022) if (!isw_wq)
a1a0e23e49037 (Tejun Heo 2016-02-29 18:28:53 -0500 1023) return -ENOMEM;
a1a0e23e49037 (Tejun Heo 2016-02-29 18:28:53 -0500 1024) return 0;
a1a0e23e49037 (Tejun Heo 2016-02-29 18:28:53 -0500 1025) }
a1a0e23e49037 (Tejun Heo 2016-02-29 18:28:53 -0500 1026) fs_initcall(cgroup_writeback_init);
a1a0e23e49037 (Tejun Heo 2016-02-29 18:28:53 -0500 1027)
f2b6512160763 (Tejun Heo 2015-05-22 17:13:55 -0400 1028) #else /* CONFIG_CGROUP_WRITEBACK */
f2b6512160763 (Tejun Heo 2015-05-22 17:13:55 -0400 1029)
7fc5854f8c6ef (Tejun Heo 2017-12-12 08:38:30 -0800 1030) static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi) { }
7fc5854f8c6ef (Tejun Heo 2017-12-12 08:38:30 -0800 1031) static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi) { }
7fc5854f8c6ef (Tejun Heo 2017-12-12 08:38:30 -0800 1032)
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 1033) static struct bdi_writeback *
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 1034) locked_inode_to_wb_and_lock_list(struct inode *inode)
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 1035) __releases(&inode->i_lock)
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 1036) __acquires(&wb->list_lock)
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 1037) {
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 1038) struct bdi_writeback *wb = inode_to_wb(inode);
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 1039)
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 1040) spin_unlock(&inode->i_lock);
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 1041) spin_lock(&wb->list_lock);
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 1042) return wb;
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 1043) }
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 1044)
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 1045) static struct bdi_writeback *inode_to_wb_and_lock_list(struct inode *inode)
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 1046) __acquires(&wb->list_lock)
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 1047) {
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 1048) struct bdi_writeback *wb = inode_to_wb(inode);
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 1049)
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 1050) spin_lock(&wb->list_lock);
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 1051) return wb;
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 1052) }
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 1053)
f2b6512160763 (Tejun Heo 2015-05-22 17:13:55 -0400 1054) static long wb_split_bdi_pages(struct bdi_writeback *wb, long nr_pages)
f2b6512160763 (Tejun Heo 2015-05-22 17:13:55 -0400 1055) {
f2b6512160763 (Tejun Heo 2015-05-22 17:13:55 -0400 1056) return nr_pages;
f2b6512160763 (Tejun Heo 2015-05-22 17:13:55 -0400 1057) }
f2b6512160763 (Tejun Heo 2015-05-22 17:13:55 -0400 1058)
db125360409fc (Tejun Heo 2015-05-22 17:14:01 -0400 1059) static void bdi_split_work_to_wbs(struct backing_dev_info *bdi,
db125360409fc (Tejun Heo 2015-05-22 17:14:01 -0400 1060) struct wb_writeback_work *base_work,
db125360409fc (Tejun Heo 2015-05-22 17:14:01 -0400 1061) bool skip_if_busy)
db125360409fc (Tejun Heo 2015-05-22 17:14:01 -0400 1062) {
db125360409fc (Tejun Heo 2015-05-22 17:14:01 -0400 1063) might_sleep();
db125360409fc (Tejun Heo 2015-05-22 17:14:01 -0400 1064)
006a0973ed020 (Tejun Heo 2015-08-25 14:11:52 -0400 1065) if (!skip_if_busy || !writeback_in_progress(&bdi->wb)) {
db125360409fc (Tejun Heo 2015-05-22 17:14:01 -0400 1066) base_work->auto_free = 0;
db125360409fc (Tejun Heo 2015-05-22 17:14:01 -0400 1067) wb_queue_work(&bdi->wb, base_work);
db125360409fc (Tejun Heo 2015-05-22 17:14:01 -0400 1068) }
db125360409fc (Tejun Heo 2015-05-22 17:14:01 -0400 1069) }
db125360409fc (Tejun Heo 2015-05-22 17:14:01 -0400 1070)
703c270887bb5 (Tejun Heo 2015-05-22 17:13:44 -0400 1071) #endif /* CONFIG_CGROUP_WRITEBACK */
703c270887bb5 (Tejun Heo 2015-05-22 17:13:44 -0400 1072)
e8e8a0c6c9bfc (Jens Axboe 2017-09-28 11:31:22 -0600 1073) /*
e8e8a0c6c9bfc (Jens Axboe 2017-09-28 11:31:22 -0600 1074) * Add in the number of potentially dirty inodes, because each inode
e8e8a0c6c9bfc (Jens Axboe 2017-09-28 11:31:22 -0600 1075) * write can dirty pagecache in the underlying blockdev.
e8e8a0c6c9bfc (Jens Axboe 2017-09-28 11:31:22 -0600 1076) */
e8e8a0c6c9bfc (Jens Axboe 2017-09-28 11:31:22 -0600 1077) static unsigned long get_nr_dirty_pages(void)
e8e8a0c6c9bfc (Jens Axboe 2017-09-28 11:31:22 -0600 1078) {
e8e8a0c6c9bfc (Jens Axboe 2017-09-28 11:31:22 -0600 1079) return global_node_page_state(NR_FILE_DIRTY) +
e8e8a0c6c9bfc (Jens Axboe 2017-09-28 11:31:22 -0600 1080) get_nr_dirty_inodes();
e8e8a0c6c9bfc (Jens Axboe 2017-09-28 11:31:22 -0600 1081) }
e8e8a0c6c9bfc (Jens Axboe 2017-09-28 11:31:22 -0600 1082)
e8e8a0c6c9bfc (Jens Axboe 2017-09-28 11:31:22 -0600 1083) static void wb_start_writeback(struct bdi_writeback *wb, enum wb_reason reason)
b6e51316daede (Jens Axboe 2009-09-16 15:13:54 +0200 1084) {
c00ddad39f512 (Tejun Heo 2015-05-22 17:13:51 -0400 1085) if (!wb_has_dirty_io(wb))
c00ddad39f512 (Tejun Heo 2015-05-22 17:13:51 -0400 1086) return;
c00ddad39f512 (Tejun Heo 2015-05-22 17:13:51 -0400 1087)
aac8d41cd438f (Jens Axboe 2017-09-28 11:31:55 -0600 1088) /*
aac8d41cd438f (Jens Axboe 2017-09-28 11:31:55 -0600 1089) * All callers of this function want to start writeback of all
aac8d41cd438f (Jens Axboe 2017-09-28 11:31:55 -0600 1090) * dirty pages. Places like vmscan can call this at a very
aac8d41cd438f (Jens Axboe 2017-09-28 11:31:55 -0600 1091) * high frequency, causing pointless allocations of tons of
aac8d41cd438f (Jens Axboe 2017-09-28 11:31:55 -0600 1092) * work items and keeping the flusher threads busy retrieving
aac8d41cd438f (Jens Axboe 2017-09-28 11:31:55 -0600 1093) * that work. Ensure that we only allow one of them pending and
85009b4f5f039 (Jens Axboe 2017-09-30 02:09:06 -0600 1094) * inflight at the time.
aac8d41cd438f (Jens Axboe 2017-09-28 11:31:55 -0600 1095) */
85009b4f5f039 (Jens Axboe 2017-09-30 02:09:06 -0600 1096) if (test_bit(WB_start_all, &wb->state) ||
85009b4f5f039 (Jens Axboe 2017-09-30 02:09:06 -0600 1097) test_and_set_bit(WB_start_all, &wb->state))
aac8d41cd438f (Jens Axboe 2017-09-28 11:31:55 -0600 1098) return;
aac8d41cd438f (Jens Axboe 2017-09-28 11:31:55 -0600 1099)
85009b4f5f039 (Jens Axboe 2017-09-30 02:09:06 -0600 1100) wb->start_all_reason = reason;
85009b4f5f039 (Jens Axboe 2017-09-30 02:09:06 -0600 1101) wb_wakeup(wb);
c5444198ca210 (Christoph Hellwig 2010-06-08 18:15:15 +0200 1102) }
d3ddec7635b6f (Wu Fengguang 2009-09-23 20:33:40 +0800 1103)
c5444198ca210 (Christoph Hellwig 2010-06-08 18:15:15 +0200 1104) /**
9ecf4866c018a (Tejun Heo 2015-05-22 17:13:54 -0400 1105) * wb_start_background_writeback - start background writeback
9ecf4866c018a (Tejun Heo 2015-05-22 17:13:54 -0400 1106) * @wb: bdi_writback to write from
c5444198ca210 (Christoph Hellwig 2010-06-08 18:15:15 +0200 1107) *
c5444198ca210 (Christoph Hellwig 2010-06-08 18:15:15 +0200 1108) * Description:
6585027a5e8cb (Jan Kara 2011-01-13 15:45:44 -0800 1109) * This makes sure WB_SYNC_NONE background writeback happens. When
9ecf4866c018a (Tejun Heo 2015-05-22 17:13:54 -0400 1110) * this function returns, it is only guaranteed that for given wb
6585027a5e8cb (Jan Kara 2011-01-13 15:45:44 -0800 1111) * some IO is happening if we are over background dirty threshold.
6585027a5e8cb (Jan Kara 2011-01-13 15:45:44 -0800 1112) * Caller need not hold sb s_umount semaphore.
c5444198ca210 (Christoph Hellwig 2010-06-08 18:15:15 +0200 1113) */
9ecf4866c018a (Tejun Heo 2015-05-22 17:13:54 -0400 1114) void wb_start_background_writeback(struct bdi_writeback *wb)
c5444198ca210 (Christoph Hellwig 2010-06-08 18:15:15 +0200 1115) {
6585027a5e8cb (Jan Kara 2011-01-13 15:45:44 -0800 1116) /*
6585027a5e8cb (Jan Kara 2011-01-13 15:45:44 -0800 1117) * We just wake up the flusher thread. It will perform background
6585027a5e8cb (Jan Kara 2011-01-13 15:45:44 -0800 1118) * writeback as soon as there is no other work to do.
6585027a5e8cb (Jan Kara 2011-01-13 15:45:44 -0800 1119) */
5634cc2aa9aeb (Tejun Heo 2015-08-18 14:54:56 -0700 1120) trace_writeback_wake_background(wb);
9ecf4866c018a (Tejun Heo 2015-05-22 17:13:54 -0400 1121) wb_wakeup(wb);
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 1122) }
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 1123)
a66979abad090 (Dave Chinner 2011-03-22 22:23:41 +1100 1124) /*
a66979abad090 (Dave Chinner 2011-03-22 22:23:41 +1100 1125) * Remove the inode from the writeback list it is on.
a66979abad090 (Dave Chinner 2011-03-22 22:23:41 +1100 1126) */
c7f5408493aeb (Dave Chinner 2015-03-04 14:07:22 -0500 1127) void inode_io_list_del(struct inode *inode)
a66979abad090 (Dave Chinner 2011-03-22 22:23:41 +1100 1128) {
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 1129) struct bdi_writeback *wb;
f758eeabeb96f (Christoph Hellwig 2011-04-21 18:19:44 -0600 1130)
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 1131) wb = inode_to_wb_and_lock_list(inode);
b35250c0816c7 (Jan Kara 2020-06-10 17:36:03 +0200 1132) spin_lock(&inode->i_lock);
c7f5408493aeb (Dave Chinner 2015-03-04 14:07:22 -0500 1133) inode_io_list_del_locked(inode, wb);
b35250c0816c7 (Jan Kara 2020-06-10 17:36:03 +0200 1134) spin_unlock(&inode->i_lock);
52ebea749aaed (Tejun Heo 2015-05-22 17:13:37 -0400 1135) spin_unlock(&wb->list_lock);
a66979abad090 (Dave Chinner 2011-03-22 22:23:41 +1100 1136) }
4301efa4c7cca (Jan Kara 2020-04-21 10:54:44 +0200 1137) EXPORT_SYMBOL(inode_io_list_del);
a66979abad090 (Dave Chinner 2011-03-22 22:23:41 +1100 1138)
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 1139) /*
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 1140) * mark an inode as under writeback on the sb
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 1141) */
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 1142) void sb_mark_inode_writeback(struct inode *inode)
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 1143) {
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 1144) struct super_block *sb = inode->i_sb;
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 1145) unsigned long flags;
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 1146)
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 1147) if (list_empty(&inode->i_wb_list)) {
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 1148) spin_lock_irqsave(&sb->s_inode_wblist_lock, flags);
9a46b04f16a03 (Brian Foster 2016-07-26 15:21:53 -0700 1149) if (list_empty(&inode->i_wb_list)) {
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 1150) list_add_tail(&inode->i_wb_list, &sb->s_inodes_wb);
9a46b04f16a03 (Brian Foster 2016-07-26 15:21:53 -0700 1151) trace_sb_mark_inode_writeback(inode);
9a46b04f16a03 (Brian Foster 2016-07-26 15:21:53 -0700 1152) }
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 1153) spin_unlock_irqrestore(&sb->s_inode_wblist_lock, flags);
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 1154) }
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 1155) }
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 1156)
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 1157) /*
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 1158) * clear an inode as under writeback on the sb
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 1159) */
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 1160) void sb_clear_inode_writeback(struct inode *inode)
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 1161) {
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 1162) struct super_block *sb = inode->i_sb;
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 1163) unsigned long flags;
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 1164)
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 1165) if (!list_empty(&inode->i_wb_list)) {
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 1166) spin_lock_irqsave(&sb->s_inode_wblist_lock, flags);
9a46b04f16a03 (Brian Foster 2016-07-26 15:21:53 -0700 1167) if (!list_empty(&inode->i_wb_list)) {
9a46b04f16a03 (Brian Foster 2016-07-26 15:21:53 -0700 1168) list_del_init(&inode->i_wb_list);
9a46b04f16a03 (Brian Foster 2016-07-26 15:21:53 -0700 1169) trace_sb_clear_inode_writeback(inode);
9a46b04f16a03 (Brian Foster 2016-07-26 15:21:53 -0700 1170) }
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 1171) spin_unlock_irqrestore(&sb->s_inode_wblist_lock, flags);
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 1172) }
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 1173) }
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 1174)
6610a0bc8dcc1 (Andrew Morton 2007-10-16 23:30:32 -0700 1175) /*
6610a0bc8dcc1 (Andrew Morton 2007-10-16 23:30:32 -0700 1176) * Redirty an inode: set its when-it-was dirtied timestamp and move it to the
6610a0bc8dcc1 (Andrew Morton 2007-10-16 23:30:32 -0700 1177) * furthest end of its superblock's dirty-inode list.
6610a0bc8dcc1 (Andrew Morton 2007-10-16 23:30:32 -0700 1178) *
6610a0bc8dcc1 (Andrew Morton 2007-10-16 23:30:32 -0700 1179) * Before stamping the inode's ->dirtied_when, we check to see whether it is
66f3b8e2e103a (Jens Axboe 2009-09-02 09:19:46 +0200 1180) * already the most-recently-dirtied inode on the b_dirty list. If that is
6610a0bc8dcc1 (Andrew Morton 2007-10-16 23:30:32 -0700 1181) * the case then the inode must have been redirtied while it was being written
6610a0bc8dcc1 (Andrew Morton 2007-10-16 23:30:32 -0700 1182) * out and we don't reset its dirtied_when.
6610a0bc8dcc1 (Andrew Morton 2007-10-16 23:30:32 -0700 1183) */
b35250c0816c7 (Jan Kara 2020-06-10 17:36:03 +0200 1184) static void redirty_tail_locked(struct inode *inode, struct bdi_writeback *wb)
6610a0bc8dcc1 (Andrew Morton 2007-10-16 23:30:32 -0700 1185) {
b35250c0816c7 (Jan Kara 2020-06-10 17:36:03 +0200 1186) assert_spin_locked(&inode->i_lock);
b35250c0816c7 (Jan Kara 2020-06-10 17:36:03 +0200 1187)
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 1188) if (!list_empty(&wb->b_dirty)) {
66f3b8e2e103a (Jens Axboe 2009-09-02 09:19:46 +0200 1189) struct inode *tail;
6610a0bc8dcc1 (Andrew Morton 2007-10-16 23:30:32 -0700 1190)
7ccf19a8042e3 (Nicholas Piggin 2010-10-21 11:49:30 +1100 1191) tail = wb_inode(wb->b_dirty.next);
66f3b8e2e103a (Jens Axboe 2009-09-02 09:19:46 +0200 1192) if (time_before(inode->dirtied_when, tail->dirtied_when))
6610a0bc8dcc1 (Andrew Morton 2007-10-16 23:30:32 -0700 1193) inode->dirtied_when = jiffies;
6610a0bc8dcc1 (Andrew Morton 2007-10-16 23:30:32 -0700 1194) }
c7f5408493aeb (Dave Chinner 2015-03-04 14:07:22 -0500 1195) inode_io_list_move_locked(inode, wb, &wb->b_dirty);
5afced3bf2810 (Jan Kara 2020-05-29 15:05:22 +0200 1196) inode->i_state &= ~I_SYNC_QUEUED;
6610a0bc8dcc1 (Andrew Morton 2007-10-16 23:30:32 -0700 1197) }
6610a0bc8dcc1 (Andrew Morton 2007-10-16 23:30:32 -0700 1198)
b35250c0816c7 (Jan Kara 2020-06-10 17:36:03 +0200 1199) static void redirty_tail(struct inode *inode, struct bdi_writeback *wb)
b35250c0816c7 (Jan Kara 2020-06-10 17:36:03 +0200 1200) {
b35250c0816c7 (Jan Kara 2020-06-10 17:36:03 +0200 1201) spin_lock(&inode->i_lock);
b35250c0816c7 (Jan Kara 2020-06-10 17:36:03 +0200 1202) redirty_tail_locked(inode, wb);
b35250c0816c7 (Jan Kara 2020-06-10 17:36:03 +0200 1203) spin_unlock(&inode->i_lock);
b35250c0816c7 (Jan Kara 2020-06-10 17:36:03 +0200 1204) }
b35250c0816c7 (Jan Kara 2020-06-10 17:36:03 +0200 1205)
c986d1e2a460c (Andrew Morton 2007-10-16 23:30:34 -0700 1206) /*
66f3b8e2e103a (Jens Axboe 2009-09-02 09:19:46 +0200 1207) * requeue inode for re-scanning after bdi->b_io list is exhausted.
c986d1e2a460c (Andrew Morton 2007-10-16 23:30:34 -0700 1208) */
f758eeabeb96f (Christoph Hellwig 2011-04-21 18:19:44 -0600 1209) static void requeue_io(struct inode *inode, struct bdi_writeback *wb)
c986d1e2a460c (Andrew Morton 2007-10-16 23:30:34 -0700 1210) {
c7f5408493aeb (Dave Chinner 2015-03-04 14:07:22 -0500 1211) inode_io_list_move_locked(inode, wb, &wb->b_more_io);
c986d1e2a460c (Andrew Morton 2007-10-16 23:30:34 -0700 1212) }
c986d1e2a460c (Andrew Morton 2007-10-16 23:30:34 -0700 1213)
1c0eeaf569859 (Joern Engel 2007-10-16 23:30:44 -0700 1214) static void inode_sync_complete(struct inode *inode)
1c0eeaf569859 (Joern Engel 2007-10-16 23:30:44 -0700 1215) {
365b94ae67d29 (Jan Kara 2012-05-03 14:47:55 +0200 1216) inode->i_state &= ~I_SYNC;
4eff96dd5283a (Jan Kara 2012-11-26 16:29:51 -0800 1217) /* If inode is clean an unused, put it into LRU now... */
4eff96dd5283a (Jan Kara 2012-11-26 16:29:51 -0800 1218) inode_add_lru(inode);
365b94ae67d29 (Jan Kara 2012-05-03 14:47:55 +0200 1219) /* Waiters must see I_SYNC cleared before being woken up */
1c0eeaf569859 (Joern Engel 2007-10-16 23:30:44 -0700 1220) smp_mb();
1c0eeaf569859 (Joern Engel 2007-10-16 23:30:44 -0700 1221) wake_up_bit(&inode->i_state, __I_SYNC);
1c0eeaf569859 (Joern Engel 2007-10-16 23:30:44 -0700 1222) }
1c0eeaf569859 (Joern Engel 2007-10-16 23:30:44 -0700 1223)
d2caa3c549c74 (Jeff Layton 2009-04-02 16:56:37 -0700 1224) static bool inode_dirtied_after(struct inode *inode, unsigned long t)
d2caa3c549c74 (Jeff Layton 2009-04-02 16:56:37 -0700 1225) {
d2caa3c549c74 (Jeff Layton 2009-04-02 16:56:37 -0700 1226) bool ret = time_after(inode->dirtied_when, t);
d2caa3c549c74 (Jeff Layton 2009-04-02 16:56:37 -0700 1227) #ifndef CONFIG_64BIT
d2caa3c549c74 (Jeff Layton 2009-04-02 16:56:37 -0700 1228) /*
d2caa3c549c74 (Jeff Layton 2009-04-02 16:56:37 -0700 1229) * For inodes being constantly redirtied, dirtied_when can get stuck.
d2caa3c549c74 (Jeff Layton 2009-04-02 16:56:37 -0700 1230) * It _appears_ to be in the future, but is actually in distant past.
d2caa3c549c74 (Jeff Layton 2009-04-02 16:56:37 -0700 1231) * This test is necessary to prevent such wrapped-around relative times
5b0830cb9085f (Jens Axboe 2009-09-23 19:37:09 +0200 1232) * from permanently stopping the whole bdi writeback.
d2caa3c549c74 (Jeff Layton 2009-04-02 16:56:37 -0700 1233) */
d2caa3c549c74 (Jeff Layton 2009-04-02 16:56:37 -0700 1234) ret = ret && time_before_eq(inode->dirtied_when, jiffies);
d2caa3c549c74 (Jeff Layton 2009-04-02 16:56:37 -0700 1235) #endif
d2caa3c549c74 (Jeff Layton 2009-04-02 16:56:37 -0700 1236) return ret;
d2caa3c549c74 (Jeff Layton 2009-04-02 16:56:37 -0700 1237) }
d2caa3c549c74 (Jeff Layton 2009-04-02 16:56:37 -0700 1238)
0ae45f63d4ef8 (Theodore Ts'o 2015-02-02 00:37:00 -0500 1239) #define EXPIRE_DIRTY_ATIME 0x0001
0ae45f63d4ef8 (Theodore Ts'o 2015-02-02 00:37:00 -0500 1240)
2c1365791048e (Fengguang Wu 2007-10-16 23:30:39 -0700 1241) /*
f9cae926f35e8 (Jan Kara 2020-05-29 16:08:58 +0200 1242) * Move expired (dirtied before dirtied_before) dirty inodes from
697e6fed9fc62 (Jan Kara 2012-03-09 07:26:22 -0800 1243) * @delaying_queue to @dispatch_queue.
2c1365791048e (Fengguang Wu 2007-10-16 23:30:39 -0700 1244) */
e84d0a4f8e39a (Wu Fengguang 2011-04-23 12:27:27 -0600 1245) static int move_expired_inodes(struct list_head *delaying_queue,
2c1365791048e (Fengguang Wu 2007-10-16 23:30:39 -0700 1246) struct list_head *dispatch_queue,
5fcd57505c002 (Jan Kara 2020-05-29 16:24:43 +0200 1247) unsigned long dirtied_before)
2c1365791048e (Fengguang Wu 2007-10-16 23:30:39 -0700 1248) {
5c03449d34deb (Shaohua Li 2009-09-24 14:42:33 +0200 1249) LIST_HEAD(tmp);
5c03449d34deb (Shaohua Li 2009-09-24 14:42:33 +0200 1250) struct list_head *pos, *node;
cf137307cd982 (Jens Axboe 2009-09-24 15:12:57 +0200 1251) struct super_block *sb = NULL;
5c03449d34deb (Shaohua Li 2009-09-24 14:42:33 +0200 1252) struct inode *inode;
cf137307cd982 (Jens Axboe 2009-09-24 15:12:57 +0200 1253) int do_sb_sort = 0;
e84d0a4f8e39a (Wu Fengguang 2011-04-23 12:27:27 -0600 1254) int moved = 0;
5c03449d34deb (Shaohua Li 2009-09-24 14:42:33 +0200 1255)
2c1365791048e (Fengguang Wu 2007-10-16 23:30:39 -0700 1256) while (!list_empty(delaying_queue)) {
7ccf19a8042e3 (Nicholas Piggin 2010-10-21 11:49:30 +1100 1257) inode = wb_inode(delaying_queue->prev);
f9cae926f35e8 (Jan Kara 2020-05-29 16:08:58 +0200 1258) if (inode_dirtied_after(inode, dirtied_before))
2c1365791048e (Fengguang Wu 2007-10-16 23:30:39 -0700 1259) break;
c7f5408493aeb (Dave Chinner 2015-03-04 14:07:22 -0500 1260) list_move(&inode->i_io_list, &tmp);
a8855990e382f (Jan Kara 2013-07-09 22:36:45 +0800 1261) moved++;
5afced3bf2810 (Jan Kara 2020-05-29 15:05:22 +0200 1262) spin_lock(&inode->i_lock);
5afced3bf2810 (Jan Kara 2020-05-29 15:05:22 +0200 1263) inode->i_state |= I_SYNC_QUEUED;
5afced3bf2810 (Jan Kara 2020-05-29 15:05:22 +0200 1264) spin_unlock(&inode->i_lock);
a8855990e382f (Jan Kara 2013-07-09 22:36:45 +0800 1265) if (sb_is_blkdev_sb(inode->i_sb))
a8855990e382f (Jan Kara 2013-07-09 22:36:45 +0800 1266) continue;
cf137307cd982 (Jens Axboe 2009-09-24 15:12:57 +0200 1267) if (sb && sb != inode->i_sb)
cf137307cd982 (Jens Axboe 2009-09-24 15:12:57 +0200 1268) do_sb_sort = 1;
cf137307cd982 (Jens Axboe 2009-09-24 15:12:57 +0200 1269) sb = inode->i_sb;
5c03449d34deb (Shaohua Li 2009-09-24 14:42:33 +0200 1270) }
5c03449d34deb (Shaohua Li 2009-09-24 14:42:33 +0200 1271)
cf137307cd982 (Jens Axboe 2009-09-24 15:12:57 +0200 1272) /* just one sb in list, splice to dispatch_queue and we're done */
cf137307cd982 (Jens Axboe 2009-09-24 15:12:57 +0200 1273) if (!do_sb_sort) {
cf137307cd982 (Jens Axboe 2009-09-24 15:12:57 +0200 1274) list_splice(&tmp, dispatch_queue);
e84d0a4f8e39a (Wu Fengguang 2011-04-23 12:27:27 -0600 1275) goto out;
cf137307cd982 (Jens Axboe 2009-09-24 15:12:57 +0200 1276) }
cf137307cd982 (Jens Axboe 2009-09-24 15:12:57 +0200 1277)
5c03449d34deb (Shaohua Li 2009-09-24 14:42:33 +0200 1278) /* Move inodes from one superblock together */
5c03449d34deb (Shaohua Li 2009-09-24 14:42:33 +0200 1279) while (!list_empty(&tmp)) {
7ccf19a8042e3 (Nicholas Piggin 2010-10-21 11:49:30 +1100 1280) sb = wb_inode(tmp.prev)->i_sb;
5c03449d34deb (Shaohua Li 2009-09-24 14:42:33 +0200 1281) list_for_each_prev_safe(pos, node, &tmp) {
7ccf19a8042e3 (Nicholas Piggin 2010-10-21 11:49:30 +1100 1282) inode = wb_inode(pos);
5c03449d34deb (Shaohua Li 2009-09-24 14:42:33 +0200 1283) if (inode->i_sb == sb)
c7f5408493aeb (Dave Chinner 2015-03-04 14:07:22 -0500 1284) list_move(&inode->i_io_list, dispatch_queue);
5c03449d34deb (Shaohua Li 2009-09-24 14:42:33 +0200 1285) }
2c1365791048e (Fengguang Wu 2007-10-16 23:30:39 -0700 1286) }
e84d0a4f8e39a (Wu Fengguang 2011-04-23 12:27:27 -0600 1287) out:
e84d0a4f8e39a (Wu Fengguang 2011-04-23 12:27:27 -0600 1288) return moved;
2c1365791048e (Fengguang Wu 2007-10-16 23:30:39 -0700 1289) }
2c1365791048e (Fengguang Wu 2007-10-16 23:30:39 -0700 1290)
2c1365791048e (Fengguang Wu 2007-10-16 23:30:39 -0700 1291) /*
2c1365791048e (Fengguang Wu 2007-10-16 23:30:39 -0700 1292) * Queue all expired dirty inodes for io, eldest first.
4ea879b96d437 (Wu Fengguang 2010-08-11 14:17:42 -0700 1293) * Before
4ea879b96d437 (Wu Fengguang 2010-08-11 14:17:42 -0700 1294) * newly dirtied b_dirty b_io b_more_io
4ea879b96d437 (Wu Fengguang 2010-08-11 14:17:42 -0700 1295) * =============> gf edc BA
4ea879b96d437 (Wu Fengguang 2010-08-11 14:17:42 -0700 1296) * After
4ea879b96d437 (Wu Fengguang 2010-08-11 14:17:42 -0700 1297) * newly dirtied b_dirty b_io b_more_io
4ea879b96d437 (Wu Fengguang 2010-08-11 14:17:42 -0700 1298) * =============> g fBAedc
4ea879b96d437 (Wu Fengguang 2010-08-11 14:17:42 -0700 1299) * |
4ea879b96d437 (Wu Fengguang 2010-08-11 14:17:42 -0700 1300) * +--> dequeue for IO
2c1365791048e (Fengguang Wu 2007-10-16 23:30:39 -0700 1301) */
f9cae926f35e8 (Jan Kara 2020-05-29 16:08:58 +0200 1302) static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work,
f9cae926f35e8 (Jan Kara 2020-05-29 16:08:58 +0200 1303) unsigned long dirtied_before)
66f3b8e2e103a (Jens Axboe 2009-09-02 09:19:46 +0200 1304) {
e84d0a4f8e39a (Wu Fengguang 2011-04-23 12:27:27 -0600 1305) int moved;
f9cae926f35e8 (Jan Kara 2020-05-29 16:08:58 +0200 1306) unsigned long time_expire_jif = dirtied_before;
0ae45f63d4ef8 (Theodore Ts'o 2015-02-02 00:37:00 -0500 1307)
f758eeabeb96f (Christoph Hellwig 2011-04-21 18:19:44 -0600 1308) assert_spin_locked(&wb->list_lock);
4ea879b96d437 (Wu Fengguang 2010-08-11 14:17:42 -0700 1309) list_splice_init(&wb->b_more_io, &wb->b_io);
5fcd57505c002 (Jan Kara 2020-05-29 16:24:43 +0200 1310) moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, dirtied_before);
f9cae926f35e8 (Jan Kara 2020-05-29 16:08:58 +0200 1311) if (!work->for_sync)
f9cae926f35e8 (Jan Kara 2020-05-29 16:08:58 +0200 1312) time_expire_jif = jiffies - dirtytime_expire_interval * HZ;
0ae45f63d4ef8 (Theodore Ts'o 2015-02-02 00:37:00 -0500 1313) moved += move_expired_inodes(&wb->b_dirty_time, &wb->b_io,
5fcd57505c002 (Jan Kara 2020-05-29 16:24:43 +0200 1314) time_expire_jif);
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 1315) if (moved)
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 1316) wb_io_lists_populated(wb);
f9cae926f35e8 (Jan Kara 2020-05-29 16:08:58 +0200 1317) trace_writeback_queue_io(wb, work, dirtied_before, moved);
66f3b8e2e103a (Jens Axboe 2009-09-02 09:19:46 +0200 1318) }
66f3b8e2e103a (Jens Axboe 2009-09-02 09:19:46 +0200 1319)
a9185b41a4f84 (Christoph Hellwig 2010-03-05 09:21:37 +0100 1320) static int write_inode(struct inode *inode, struct writeback_control *wbc)
08d8e9749e7f0 (Fengguang Wu 2007-10-16 23:30:39 -0700 1321) {
9fb0a7da0c528 (Tejun Heo 2013-01-11 13:06:37 -0800 1322) int ret;
9fb0a7da0c528 (Tejun Heo 2013-01-11 13:06:37 -0800 1323)
9fb0a7da0c528 (Tejun Heo 2013-01-11 13:06:37 -0800 1324) if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode)) {
9fb0a7da0c528 (Tejun Heo 2013-01-11 13:06:37 -0800 1325) trace_writeback_write_inode_start(inode, wbc);
9fb0a7da0c528 (Tejun Heo 2013-01-11 13:06:37 -0800 1326) ret = inode->i_sb->s_op->write_inode(inode, wbc);
9fb0a7da0c528 (Tejun Heo 2013-01-11 13:06:37 -0800 1327) trace_writeback_write_inode(inode, wbc);
9fb0a7da0c528 (Tejun Heo 2013-01-11 13:06:37 -0800 1328) return ret;
9fb0a7da0c528 (Tejun Heo 2013-01-11 13:06:37 -0800 1329) }
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 1330) return 0;
08d8e9749e7f0 (Fengguang Wu 2007-10-16 23:30:39 -0700 1331) }
08d8e9749e7f0 (Fengguang Wu 2007-10-16 23:30:39 -0700 1332)
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 1333) /*
169ebd90131b2 (Jan Kara 2012-05-03 14:48:03 +0200 1334) * Wait for writeback on an inode to complete. Called with i_lock held.
169ebd90131b2 (Jan Kara 2012-05-03 14:48:03 +0200 1335) * Caller must make sure inode cannot go away when we drop i_lock.
01c031945f275 (Christoph Hellwig 2009-06-08 13:35:40 +0200 1336) */
169ebd90131b2 (Jan Kara 2012-05-03 14:48:03 +0200 1337) static void __inode_wait_for_writeback(struct inode *inode)
169ebd90131b2 (Jan Kara 2012-05-03 14:48:03 +0200 1338) __releases(inode->i_lock)
169ebd90131b2 (Jan Kara 2012-05-03 14:48:03 +0200 1339) __acquires(inode->i_lock)
01c031945f275 (Christoph Hellwig 2009-06-08 13:35:40 +0200 1340) {
01c031945f275 (Christoph Hellwig 2009-06-08 13:35:40 +0200 1341) DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC);
01c031945f275 (Christoph Hellwig 2009-06-08 13:35:40 +0200 1342) wait_queue_head_t *wqh;
01c031945f275 (Christoph Hellwig 2009-06-08 13:35:40 +0200 1343)
01c031945f275 (Christoph Hellwig 2009-06-08 13:35:40 +0200 1344) wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
250df6ed274d7 (Dave Chinner 2011-03-22 22:23:36 +1100 1345) while (inode->i_state & I_SYNC) {
250df6ed274d7 (Dave Chinner 2011-03-22 22:23:36 +1100 1346) spin_unlock(&inode->i_lock);
743162013d40c (NeilBrown 2014-07-07 15:16:04 +1000 1347) __wait_on_bit(wqh, &wq, bit_wait,
743162013d40c (NeilBrown 2014-07-07 15:16:04 +1000 1348) TASK_UNINTERRUPTIBLE);
250df6ed274d7 (Dave Chinner 2011-03-22 22:23:36 +1100 1349) spin_lock(&inode->i_lock);
58a9d3d8db06c (Richard Kennedy 2010-05-24 14:32:38 -0700 1350) }
01c031945f275 (Christoph Hellwig 2009-06-08 13:35:40 +0200 1351) }
01c031945f275 (Christoph Hellwig 2009-06-08 13:35:40 +0200 1352)
169ebd90131b2 (Jan Kara 2012-05-03 14:48:03 +0200 1353) /*
169ebd90131b2 (Jan Kara 2012-05-03 14:48:03 +0200 1354) * Wait for writeback on an inode to complete. Caller must have inode pinned.
169ebd90131b2 (Jan Kara 2012-05-03 14:48:03 +0200 1355) */
169ebd90131b2 (Jan Kara 2012-05-03 14:48:03 +0200 1356) void inode_wait_for_writeback(struct inode *inode)
169ebd90131b2 (Jan Kara 2012-05-03 14:48:03 +0200 1357) {
169ebd90131b2 (Jan Kara 2012-05-03 14:48:03 +0200 1358) spin_lock(&inode->i_lock);
169ebd90131b2 (Jan Kara 2012-05-03 14:48:03 +0200 1359) __inode_wait_for_writeback(inode);
169ebd90131b2 (Jan Kara 2012-05-03 14:48:03 +0200 1360) spin_unlock(&inode->i_lock);
169ebd90131b2 (Jan Kara 2012-05-03 14:48:03 +0200 1361) }
169ebd90131b2 (Jan Kara 2012-05-03 14:48:03 +0200 1362)
169ebd90131b2 (Jan Kara 2012-05-03 14:48:03 +0200 1363) /*
169ebd90131b2 (Jan Kara 2012-05-03 14:48:03 +0200 1364) * Sleep until I_SYNC is cleared. This function must be called with i_lock
169ebd90131b2 (Jan Kara 2012-05-03 14:48:03 +0200 1365) * held and drops it. It is aimed for callers not holding any inode reference
169ebd90131b2 (Jan Kara 2012-05-03 14:48:03 +0200 1366) * so once i_lock is dropped, inode can go away.
169ebd90131b2 (Jan Kara 2012-05-03 14:48:03 +0200 1367) */
169ebd90131b2 (Jan Kara 2012-05-03 14:48:03 +0200 1368) static void inode_sleep_on_writeback(struct inode *inode)
169ebd90131b2 (Jan Kara 2012-05-03 14:48:03 +0200 1369) __releases(inode->i_lock)
169ebd90131b2 (Jan Kara 2012-05-03 14:48:03 +0200 1370) {
169ebd90131b2 (Jan Kara 2012-05-03 14:48:03 +0200 1371) DEFINE_WAIT(wait);
169ebd90131b2 (Jan Kara 2012-05-03 14:48:03 +0200 1372) wait_queue_head_t *wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
169ebd90131b2 (Jan Kara 2012-05-03 14:48:03 +0200 1373) int sleep;
169ebd90131b2 (Jan Kara 2012-05-03 14:48:03 +0200 1374)
169ebd90131b2 (Jan Kara 2012-05-03 14:48:03 +0200 1375) prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
169ebd90131b2 (Jan Kara 2012-05-03 14:48:03 +0200 1376) sleep = inode->i_state & I_SYNC;
169ebd90131b2 (Jan Kara 2012-05-03 14:48:03 +0200 1377) spin_unlock(&inode->i_lock);
169ebd90131b2 (Jan Kara 2012-05-03 14:48:03 +0200 1378) if (sleep)
169ebd90131b2 (Jan Kara 2012-05-03 14:48:03 +0200 1379) schedule();
169ebd90131b2 (Jan Kara 2012-05-03 14:48:03 +0200 1380) finish_wait(wqh, &wait);
169ebd90131b2 (Jan Kara 2012-05-03 14:48:03 +0200 1381) }
169ebd90131b2 (Jan Kara 2012-05-03 14:48:03 +0200 1382)
ccb26b5a65867 (Jan Kara 2012-05-03 14:47:58 +0200 1383) /*
ccb26b5a65867 (Jan Kara 2012-05-03 14:47:58 +0200 1384) * Find proper writeback list for the inode depending on its current state and
ccb26b5a65867 (Jan Kara 2012-05-03 14:47:58 +0200 1385) * possibly also change of its state while we were doing writeback. Here we
ccb26b5a65867 (Jan Kara 2012-05-03 14:47:58 +0200 1386) * handle things such as livelock prevention or fairness of writeback among
ccb26b5a65867 (Jan Kara 2012-05-03 14:47:58 +0200 1387) * inodes. This function can be called only by flusher thread - noone else
ccb26b5a65867 (Jan Kara 2012-05-03 14:47:58 +0200 1388) * processes all inodes in writeback lists and requeueing inodes behind flusher
ccb26b5a65867 (Jan Kara 2012-05-03 14:47:58 +0200 1389) * thread's back can have unexpected consequences.
ccb26b5a65867 (Jan Kara 2012-05-03 14:47:58 +0200 1390) */
ccb26b5a65867 (Jan Kara 2012-05-03 14:47:58 +0200 1391) static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
ccb26b5a65867 (Jan Kara 2012-05-03 14:47:58 +0200 1392) struct writeback_control *wbc)
ccb26b5a65867 (Jan Kara 2012-05-03 14:47:58 +0200 1393) {
ccb26b5a65867 (Jan Kara 2012-05-03 14:47:58 +0200 1394) if (inode->i_state & I_FREEING)
ccb26b5a65867 (Jan Kara 2012-05-03 14:47:58 +0200 1395) return;
ccb26b5a65867 (Jan Kara 2012-05-03 14:47:58 +0200 1396)
ccb26b5a65867 (Jan Kara 2012-05-03 14:47:58 +0200 1397) /*
ccb26b5a65867 (Jan Kara 2012-05-03 14:47:58 +0200 1398) * Sync livelock prevention. Each inode is tagged and synced in one
ccb26b5a65867 (Jan Kara 2012-05-03 14:47:58 +0200 1399) * shot. If still dirty, it will be redirty_tail()'ed below. Update
ccb26b5a65867 (Jan Kara 2012-05-03 14:47:58 +0200 1400) * the dirty time to prevent enqueue and sync it again.
ccb26b5a65867 (Jan Kara 2012-05-03 14:47:58 +0200 1401) */
ccb26b5a65867 (Jan Kara 2012-05-03 14:47:58 +0200 1402) if ((inode->i_state & I_DIRTY) &&
ccb26b5a65867 (Jan Kara 2012-05-03 14:47:58 +0200 1403) (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages))
ccb26b5a65867 (Jan Kara 2012-05-03 14:47:58 +0200 1404) inode->dirtied_when = jiffies;
ccb26b5a65867 (Jan Kara 2012-05-03 14:47:58 +0200 1405)
4f8ad655dbc82 (Jan Kara 2012-05-03 14:48:00 +0200 1406) if (wbc->pages_skipped) {
4f8ad655dbc82 (Jan Kara 2012-05-03 14:48:00 +0200 1407) /*
4f8ad655dbc82 (Jan Kara 2012-05-03 14:48:00 +0200 1408) * writeback is not making progress due to locked
4f8ad655dbc82 (Jan Kara 2012-05-03 14:48:00 +0200 1409) * buffers. Skip this inode for now.
4f8ad655dbc82 (Jan Kara 2012-05-03 14:48:00 +0200 1410) */
b35250c0816c7 (Jan Kara 2020-06-10 17:36:03 +0200 1411) redirty_tail_locked(inode, wb);
4f8ad655dbc82 (Jan Kara 2012-05-03 14:48:00 +0200 1412) return;
4f8ad655dbc82 (Jan Kara 2012-05-03 14:48:00 +0200 1413) }
4f8ad655dbc82 (Jan Kara 2012-05-03 14:48:00 +0200 1414)
ccb26b5a65867 (Jan Kara 2012-05-03 14:47:58 +0200 1415) if (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) {
ccb26b5a65867 (Jan Kara 2012-05-03 14:47:58 +0200 1416) /*
ccb26b5a65867 (Jan Kara 2012-05-03 14:47:58 +0200 1417) * We didn't write back all the pages. nfs_writepages()
ccb26b5a65867 (Jan Kara 2012-05-03 14:47:58 +0200 1418) * sometimes bales out without doing anything.
ccb26b5a65867 (Jan Kara 2012-05-03 14:47:58 +0200 1419) */
ccb26b5a65867 (Jan Kara 2012-05-03 14:47:58 +0200 1420) if (wbc->nr_to_write <= 0) {
ccb26b5a65867 (Jan Kara 2012-05-03 14:47:58 +0200 1421) /* Slice used up. Queue for next turn. */
ccb26b5a65867 (Jan Kara 2012-05-03 14:47:58 +0200 1422) requeue_io(inode, wb);
ccb26b5a65867 (Jan Kara 2012-05-03 14:47:58 +0200 1423) } else {
ccb26b5a65867 (Jan Kara 2012-05-03 14:47:58 +0200 1424) /*
ccb26b5a65867 (Jan Kara 2012-05-03 14:47:58 +0200 1425) * Writeback blocked by something other than
ccb26b5a65867 (Jan Kara 2012-05-03 14:47:58 +0200 1426) * congestion. Delay the inode for some time to
ccb26b5a65867 (Jan Kara 2012-05-03 14:47:58 +0200 1427) * avoid spinning on the CPU (100% iowait)
ccb26b5a65867 (Jan Kara 2012-05-03 14:47:58 +0200 1428) * retrying writeback of the dirty page/inode
ccb26b5a65867 (Jan Kara 2012-05-03 14:47:58 +0200 1429) * that cannot be performed immediately.
ccb26b5a65867 (Jan Kara 2012-05-03 14:47:58 +0200 1430) */
b35250c0816c7 (Jan Kara 2020-06-10 17:36:03 +0200 1431) redirty_tail_locked(inode, wb);
ccb26b5a65867 (Jan Kara 2012-05-03 14:47:58 +0200 1432) }
ccb26b5a65867 (Jan Kara 2012-05-03 14:47:58 +0200 1433) } else if (inode->i_state & I_DIRTY) {
ccb26b5a65867 (Jan Kara 2012-05-03 14:47:58 +0200 1434) /*
ccb26b5a65867 (Jan Kara 2012-05-03 14:47:58 +0200 1435) * Filesystems can dirty the inode during writeback operations,
ccb26b5a65867 (Jan Kara 2012-05-03 14:47:58 +0200 1436) * such as delayed allocation during submission or metadata
ccb26b5a65867 (Jan Kara 2012-05-03 14:47:58 +0200 1437) * updates after data IO completion.
ccb26b5a65867 (Jan Kara 2012-05-03 14:47:58 +0200 1438) */
b35250c0816c7 (Jan Kara 2020-06-10 17:36:03 +0200 1439) redirty_tail_locked(inode, wb);
0ae45f63d4ef8 (Theodore Ts'o 2015-02-02 00:37:00 -0500 1440) } else if (inode->i_state & I_DIRTY_TIME) {
a2f4870697a5b (Theodore Ts'o 2015-03-17 12:23:19 -0400 1441) inode->dirtied_when = jiffies;
c7f5408493aeb (Dave Chinner 2015-03-04 14:07:22 -0500 1442) inode_io_list_move_locked(inode, wb, &wb->b_dirty_time);
5afced3bf2810 (Jan Kara 2020-05-29 15:05:22 +0200 1443) inode->i_state &= ~I_SYNC_QUEUED;
ccb26b5a65867 (Jan Kara 2012-05-03 14:47:58 +0200 1444) } else {
ccb26b5a65867 (Jan Kara 2012-05-03 14:47:58 +0200 1445) /* The inode is clean. Remove from writeback lists. */
c7f5408493aeb (Dave Chinner 2015-03-04 14:07:22 -0500 1446) inode_io_list_del_locked(inode, wb);
ccb26b5a65867 (Jan Kara 2012-05-03 14:47:58 +0200 1447) }
ccb26b5a65867 (Jan Kara 2012-05-03 14:47:58 +0200 1448) }
ccb26b5a65867 (Jan Kara 2012-05-03 14:47:58 +0200 1449)
01c031945f275 (Christoph Hellwig 2009-06-08 13:35:40 +0200 1450) /*
da0c4c60d8c7c (Eric Biggers 2021-01-12 11:02:51 -0800 1451) * Write out an inode and its dirty pages (or some of its dirty pages, depending
da0c4c60d8c7c (Eric Biggers 2021-01-12 11:02:51 -0800 1452) * on @wbc->nr_to_write), and clear the relevant dirty flags from i_state.
da0c4c60d8c7c (Eric Biggers 2021-01-12 11:02:51 -0800 1453) *
da0c4c60d8c7c (Eric Biggers 2021-01-12 11:02:51 -0800 1454) * This doesn't remove the inode from the writeback list it is on, except
da0c4c60d8c7c (Eric Biggers 2021-01-12 11:02:51 -0800 1455) * potentially to move it from b_dirty_time to b_dirty due to timestamp
da0c4c60d8c7c (Eric Biggers 2021-01-12 11:02:51 -0800 1456) * expiration. The caller is otherwise responsible for writeback list handling.
da0c4c60d8c7c (Eric Biggers 2021-01-12 11:02:51 -0800 1457) *
da0c4c60d8c7c (Eric Biggers 2021-01-12 11:02:51 -0800 1458) * The caller is also responsible for setting the I_SYNC flag beforehand and
da0c4c60d8c7c (Eric Biggers 2021-01-12 11:02:51 -0800 1459) * calling inode_sync_complete() to clear it afterwards.
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 1460) */
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 1461) static int
cd8ed2a45a401 (Yan Hong 2012-10-08 16:33:45 -0700 1462) __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 1463) {
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 1464) struct address_space *mapping = inode->i_mapping;
251d6a471c831 (Wu Fengguang 2010-12-01 17:33:37 -0600 1465) long nr_to_write = wbc->nr_to_write;
01c031945f275 (Christoph Hellwig 2009-06-08 13:35:40 +0200 1466) unsigned dirty;
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 1467) int ret;
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 1468)
4f8ad655dbc82 (Jan Kara 2012-05-03 14:48:00 +0200 1469) WARN_ON(!(inode->i_state & I_SYNC));
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 1470)
9fb0a7da0c528 (Tejun Heo 2013-01-11 13:06:37 -0800 1471) trace_writeback_single_inode_start(inode, wbc, nr_to_write);
9fb0a7da0c528 (Tejun Heo 2013-01-11 13:06:37 -0800 1472)
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 1473) ret = do_writepages(mapping, wbc);
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 1474)
26821ed40b423 (Christoph Hellwig 2010-03-05 09:21:21 +0100 1475) /*
26821ed40b423 (Christoph Hellwig 2010-03-05 09:21:21 +0100 1476) * Make sure to wait on the data before writing out the metadata.
26821ed40b423 (Christoph Hellwig 2010-03-05 09:21:21 +0100 1477) * This is important for filesystems that modify metadata on data
7747bd4bceb30 (Dave Chinner 2013-07-02 22:38:35 +1000 1478) * I/O completion. We don't do it for sync(2) writeback because it has a
7747bd4bceb30 (Dave Chinner 2013-07-02 22:38:35 +1000 1479) * separate, external IO completion path and ->sync_fs for guaranteeing
7747bd4bceb30 (Dave Chinner 2013-07-02 22:38:35 +1000 1480) * inode metadata is written back correctly.
26821ed40b423 (Christoph Hellwig 2010-03-05 09:21:21 +0100 1481) */
7747bd4bceb30 (Dave Chinner 2013-07-02 22:38:35 +1000 1482) if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync) {
26821ed40b423 (Christoph Hellwig 2010-03-05 09:21:21 +0100 1483) int err = filemap_fdatawait(mapping);
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 1484) if (ret == 0)
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 1485) ret = err;
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 1486) }
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 1487)
5547e8aac6f71 (Dmitry Monakhov 2010-05-07 13:35:44 +0400 1488) /*
1e249cb5b7fc0 (Eric Biggers 2021-01-12 11:02:43 -0800 1489) * If the inode has dirty timestamps and we need to write them, call
1e249cb5b7fc0 (Eric Biggers 2021-01-12 11:02:43 -0800 1490) * mark_inode_dirty_sync() to notify the filesystem about it and to
1e249cb5b7fc0 (Eric Biggers 2021-01-12 11:02:43 -0800 1491) * change I_DIRTY_TIME into I_DIRTY_SYNC.
5547e8aac6f71 (Dmitry Monakhov 2010-05-07 13:35:44 +0400 1492) */
5fcd57505c002 (Jan Kara 2020-05-29 16:24:43 +0200 1493) if ((inode->i_state & I_DIRTY_TIME) &&
83dc881d678a8 (Eric Biggers 2021-01-12 11:02:50 -0800 1494) (wbc->sync_mode == WB_SYNC_ALL ||
5fcd57505c002 (Jan Kara 2020-05-29 16:24:43 +0200 1495) time_after(jiffies, inode->dirtied_time_when +
5fcd57505c002 (Jan Kara 2020-05-29 16:24:43 +0200 1496) dirtytime_expire_interval * HZ))) {
5fcd57505c002 (Jan Kara 2020-05-29 16:24:43 +0200 1497) trace_writeback_lazytime(inode);
1e249cb5b7fc0 (Eric Biggers 2021-01-12 11:02:43 -0800 1498) mark_inode_dirty_sync(inode);
5fcd57505c002 (Jan Kara 2020-05-29 16:24:43 +0200 1499) }
1e249cb5b7fc0 (Eric Biggers 2021-01-12 11:02:43 -0800 1500)
1e249cb5b7fc0 (Eric Biggers 2021-01-12 11:02:43 -0800 1501) /*
da0c4c60d8c7c (Eric Biggers 2021-01-12 11:02:51 -0800 1502) * Get and clear the dirty flags from i_state. This needs to be done
da0c4c60d8c7c (Eric Biggers 2021-01-12 11:02:51 -0800 1503) * after calling writepages because some filesystems may redirty the
da0c4c60d8c7c (Eric Biggers 2021-01-12 11:02:51 -0800 1504) * inode during writepages due to delalloc. It also needs to be done
da0c4c60d8c7c (Eric Biggers 2021-01-12 11:02:51 -0800 1505) * after handling timestamp expiration, as that may dirty the inode too.
1e249cb5b7fc0 (Eric Biggers 2021-01-12 11:02:43 -0800 1506) */
1e249cb5b7fc0 (Eric Biggers 2021-01-12 11:02:43 -0800 1507) spin_lock(&inode->i_lock);
1e249cb5b7fc0 (Eric Biggers 2021-01-12 11:02:43 -0800 1508) dirty = inode->i_state & I_DIRTY;
0ae45f63d4ef8 (Theodore Ts'o 2015-02-02 00:37:00 -0500 1509) inode->i_state &= ~dirty;
9c6ac78eb3521 (Tejun Heo 2014-10-24 15:38:21 -0400 1510)
9c6ac78eb3521 (Tejun Heo 2014-10-24 15:38:21 -0400 1511) /*
9c6ac78eb3521 (Tejun Heo 2014-10-24 15:38:21 -0400 1512) * Paired with smp_mb() in __mark_inode_dirty(). This allows
9c6ac78eb3521 (Tejun Heo 2014-10-24 15:38:21 -0400 1513) * __mark_inode_dirty() to test i_state without grabbing i_lock -
9c6ac78eb3521 (Tejun Heo 2014-10-24 15:38:21 -0400 1514) * either they see the I_DIRTY bits cleared or we see the dirtied
9c6ac78eb3521 (Tejun Heo 2014-10-24 15:38:21 -0400 1515) * inode.
9c6ac78eb3521 (Tejun Heo 2014-10-24 15:38:21 -0400 1516) *
9c6ac78eb3521 (Tejun Heo 2014-10-24 15:38:21 -0400 1517) * I_DIRTY_PAGES is always cleared together above even if @mapping
9c6ac78eb3521 (Tejun Heo 2014-10-24 15:38:21 -0400 1518) * still has dirty pages. The flag is reinstated after smp_mb() if
9c6ac78eb3521 (Tejun Heo 2014-10-24 15:38:21 -0400 1519) * necessary. This guarantees that either __mark_inode_dirty()
9c6ac78eb3521 (Tejun Heo 2014-10-24 15:38:21 -0400 1520) * sees clear I_DIRTY_PAGES or we see PAGECACHE_TAG_DIRTY.
9c6ac78eb3521 (Tejun Heo 2014-10-24 15:38:21 -0400 1521) */
9c6ac78eb3521 (Tejun Heo 2014-10-24 15:38:21 -0400 1522) smp_mb();
9c6ac78eb3521 (Tejun Heo 2014-10-24 15:38:21 -0400 1523)
9c6ac78eb3521 (Tejun Heo 2014-10-24 15:38:21 -0400 1524) if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
9c6ac78eb3521 (Tejun Heo 2014-10-24 15:38:21 -0400 1525) inode->i_state |= I_DIRTY_PAGES;
9c6ac78eb3521 (Tejun Heo 2014-10-24 15:38:21 -0400 1526)
250df6ed274d7 (Dave Chinner 2011-03-22 22:23:36 +1100 1527) spin_unlock(&inode->i_lock);
9c6ac78eb3521 (Tejun Heo 2014-10-24 15:38:21 -0400 1528)
26821ed40b423 (Christoph Hellwig 2010-03-05 09:21:21 +0100 1529) /* Don't write the inode if only I_DIRTY_PAGES was set */
0ae45f63d4ef8 (Theodore Ts'o 2015-02-02 00:37:00 -0500 1530) if (dirty & ~I_DIRTY_PAGES) {
a9185b41a4f84 (Christoph Hellwig 2010-03-05 09:21:37 +0100 1531) int err = write_inode(inode, wbc);
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 1532) if (ret == 0)
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 1533) ret = err;
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 1534) }
4f8ad655dbc82 (Jan Kara 2012-05-03 14:48:00 +0200 1535) trace_writeback_single_inode(inode, wbc, nr_to_write);
4f8ad655dbc82 (Jan Kara 2012-05-03 14:48:00 +0200 1536) return ret;
4f8ad655dbc82 (Jan Kara 2012-05-03 14:48:00 +0200 1537) }
4f8ad655dbc82 (Jan Kara 2012-05-03 14:48:00 +0200 1538)
4f8ad655dbc82 (Jan Kara 2012-05-03 14:48:00 +0200 1539) /*
da0c4c60d8c7c (Eric Biggers 2021-01-12 11:02:51 -0800 1540) * Write out an inode's dirty data and metadata on-demand, i.e. separately from
da0c4c60d8c7c (Eric Biggers 2021-01-12 11:02:51 -0800 1541) * the regular batched writeback done by the flusher threads in
da0c4c60d8c7c (Eric Biggers 2021-01-12 11:02:51 -0800 1542) * writeback_sb_inodes(). @wbc controls various aspects of the write, such as
da0c4c60d8c7c (Eric Biggers 2021-01-12 11:02:51 -0800 1543) * whether it is a data-integrity sync (%WB_SYNC_ALL) or not (%WB_SYNC_NONE).
4f8ad655dbc82 (Jan Kara 2012-05-03 14:48:00 +0200 1544) *
da0c4c60d8c7c (Eric Biggers 2021-01-12 11:02:51 -0800 1545) * To prevent the inode from going away, either the caller must have a reference
da0c4c60d8c7c (Eric Biggers 2021-01-12 11:02:51 -0800 1546) * to the inode, or the inode must have I_WILL_FREE or I_FREEING set.
4f8ad655dbc82 (Jan Kara 2012-05-03 14:48:00 +0200 1547) */
aaf2559332ba2 (Tejun Heo 2016-03-18 13:52:04 -0400 1548) static int writeback_single_inode(struct inode *inode,
aaf2559332ba2 (Tejun Heo 2016-03-18 13:52:04 -0400 1549) struct writeback_control *wbc)
4f8ad655dbc82 (Jan Kara 2012-05-03 14:48:00 +0200 1550) {
aaf2559332ba2 (Tejun Heo 2016-03-18 13:52:04 -0400 1551) struct bdi_writeback *wb;
4f8ad655dbc82 (Jan Kara 2012-05-03 14:48:00 +0200 1552) int ret = 0;
4f8ad655dbc82 (Jan Kara 2012-05-03 14:48:00 +0200 1553)
4f8ad655dbc82 (Jan Kara 2012-05-03 14:48:00 +0200 1554) spin_lock(&inode->i_lock);
4f8ad655dbc82 (Jan Kara 2012-05-03 14:48:00 +0200 1555) if (!atomic_read(&inode->i_count))
4f8ad655dbc82 (Jan Kara 2012-05-03 14:48:00 +0200 1556) WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING)));
4f8ad655dbc82 (Jan Kara 2012-05-03 14:48:00 +0200 1557) else
4f8ad655dbc82 (Jan Kara 2012-05-03 14:48:00 +0200 1558) WARN_ON(inode->i_state & I_WILL_FREE);
4f8ad655dbc82 (Jan Kara 2012-05-03 14:48:00 +0200 1559)
4f8ad655dbc82 (Jan Kara 2012-05-03 14:48:00 +0200 1560) if (inode->i_state & I_SYNC) {
4f8ad655dbc82 (Jan Kara 2012-05-03 14:48:00 +0200 1561) /*
da0c4c60d8c7c (Eric Biggers 2021-01-12 11:02:51 -0800 1562) * Writeback is already running on the inode. For WB_SYNC_NONE,
da0c4c60d8c7c (Eric Biggers 2021-01-12 11:02:51 -0800 1563) * that's enough and we can just return. For WB_SYNC_ALL, we
da0c4c60d8c7c (Eric Biggers 2021-01-12 11:02:51 -0800 1564) * must wait for the existing writeback to complete, then do
da0c4c60d8c7c (Eric Biggers 2021-01-12 11:02:51 -0800 1565) * writeback again if there's anything left.
4f8ad655dbc82 (Jan Kara 2012-05-03 14:48:00 +0200 1566) */
da0c4c60d8c7c (Eric Biggers 2021-01-12 11:02:51 -0800 1567) if (wbc->sync_mode != WB_SYNC_ALL)
da0c4c60d8c7c (Eric Biggers 2021-01-12 11:02:51 -0800 1568) goto out;
169ebd90131b2 (Jan Kara 2012-05-03 14:48:03 +0200 1569) __inode_wait_for_writeback(inode);
4f8ad655dbc82 (Jan Kara 2012-05-03 14:48:00 +0200 1570) }
4f8ad655dbc82 (Jan Kara 2012-05-03 14:48:00 +0200 1571) WARN_ON(inode->i_state & I_SYNC);
4f8ad655dbc82 (Jan Kara 2012-05-03 14:48:00 +0200 1572) /*
da0c4c60d8c7c (Eric Biggers 2021-01-12 11:02:51 -0800 1573) * If the inode is already fully clean, then there's nothing to do.
da0c4c60d8c7c (Eric Biggers 2021-01-12 11:02:51 -0800 1574) *
da0c4c60d8c7c (Eric Biggers 2021-01-12 11:02:51 -0800 1575) * For data-integrity syncs we also need to check whether any pages are
da0c4c60d8c7c (Eric Biggers 2021-01-12 11:02:51 -0800 1576) * still under writeback, e.g. due to prior WB_SYNC_NONE writeback. If
da0c4c60d8c7c (Eric Biggers 2021-01-12 11:02:51 -0800 1577) * there are any such pages, we'll need to wait for them.
4f8ad655dbc82 (Jan Kara 2012-05-03 14:48:00 +0200 1578) */
0ae45f63d4ef8 (Theodore Ts'o 2015-02-02 00:37:00 -0500 1579) if (!(inode->i_state & I_DIRTY_ALL) &&
f9b0e058cbd04 (Jan Kara 2013-12-14 04:21:26 +0800 1580) (wbc->sync_mode != WB_SYNC_ALL ||
f9b0e058cbd04 (Jan Kara 2013-12-14 04:21:26 +0800 1581) !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK)))
4f8ad655dbc82 (Jan Kara 2012-05-03 14:48:00 +0200 1582) goto out;
4f8ad655dbc82 (Jan Kara 2012-05-03 14:48:00 +0200 1583) inode->i_state |= I_SYNC;
b16b1deb553ad (Tejun Heo 2015-06-02 08:39:48 -0600 1584) wbc_attach_and_unlock_inode(wbc, inode);
4f8ad655dbc82 (Jan Kara 2012-05-03 14:48:00 +0200 1585)
cd8ed2a45a401 (Yan Hong 2012-10-08 16:33:45 -0700 1586) ret = __writeback_single_inode(inode, wbc);
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 1587)
b16b1deb553ad (Tejun Heo 2015-06-02 08:39:48 -0600 1588) wbc_detach_inode(wbc);
aaf2559332ba2 (Tejun Heo 2016-03-18 13:52:04 -0400 1589)
aaf2559332ba2 (Tejun Heo 2016-03-18 13:52:04 -0400 1590) wb = inode_to_wb_and_lock_list(inode);
250df6ed274d7 (Dave Chinner 2011-03-22 22:23:36 +1100 1591) spin_lock(&inode->i_lock);
4f8ad655dbc82 (Jan Kara 2012-05-03 14:48:00 +0200 1592) /*
da0c4c60d8c7c (Eric Biggers 2021-01-12 11:02:51 -0800 1593) * If the inode is now fully clean, then it can be safely removed from
da0c4c60d8c7c (Eric Biggers 2021-01-12 11:02:51 -0800 1594) * its writeback list (if any). Otherwise the flusher threads are
da0c4c60d8c7c (Eric Biggers 2021-01-12 11:02:51 -0800 1595) * responsible for the writeback lists.
4f8ad655dbc82 (Jan Kara 2012-05-03 14:48:00 +0200 1596) */
0ae45f63d4ef8 (Theodore Ts'o 2015-02-02 00:37:00 -0500 1597) if (!(inode->i_state & I_DIRTY_ALL))
c7f5408493aeb (Dave Chinner 2015-03-04 14:07:22 -0500 1598) inode_io_list_del_locked(inode, wb);
4f8ad655dbc82 (Jan Kara 2012-05-03 14:48:00 +0200 1599) spin_unlock(&wb->list_lock);
1c0eeaf569859 (Joern Engel 2007-10-16 23:30:44 -0700 1600) inode_sync_complete(inode);
4f8ad655dbc82 (Jan Kara 2012-05-03 14:48:00 +0200 1601) out:
4f8ad655dbc82 (Jan Kara 2012-05-03 14:48:00 +0200 1602) spin_unlock(&inode->i_lock);
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 1603) return ret;
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 1604) }
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 1605)
a88a341a73be4 (Tejun Heo 2015-05-22 17:13:28 -0400 1606) static long writeback_chunk_size(struct bdi_writeback *wb,
1a12d8bd7b299 (Wu Fengguang 2010-08-29 13:28:09 -0600 1607) struct wb_writeback_work *work)
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1608) {
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1609) long pages;
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1610)
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1611) /*
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1612) * WB_SYNC_ALL mode does livelock avoidance by syncing dirty
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1613) * inodes/pages in one big loop. Setting wbc.nr_to_write=LONG_MAX
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1614) * here avoids calling into writeback_inodes_wb() more than once.
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1615) *
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1616) * The intended call sequence for WB_SYNC_ALL writeback is:
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1617) *
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1618) * wb_writeback()
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1619) * writeback_sb_inodes() <== called only once
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1620) * write_cache_pages() <== called once for each inode
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1621) * (quickly) tag currently dirty pages
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1622) * (maybe slowly) sync all tagged pages
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1623) */
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1624) if (work->sync_mode == WB_SYNC_ALL || work->tagged_writepages)
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1625) pages = LONG_MAX;
1a12d8bd7b299 (Wu Fengguang 2010-08-29 13:28:09 -0600 1626) else {
a88a341a73be4 (Tejun Heo 2015-05-22 17:13:28 -0400 1627) pages = min(wb->avg_write_bandwidth / 2,
dcc25ae76eb7b (Tejun Heo 2015-05-22 18:23:22 -0400 1628) global_wb_domain.dirty_limit / DIRTY_SCOPE);
1a12d8bd7b299 (Wu Fengguang 2010-08-29 13:28:09 -0600 1629) pages = min(pages, work->nr_pages);
1a12d8bd7b299 (Wu Fengguang 2010-08-29 13:28:09 -0600 1630) pages = round_down(pages + MIN_WRITEBACK_PAGES,
1a12d8bd7b299 (Wu Fengguang 2010-08-29 13:28:09 -0600 1631) MIN_WRITEBACK_PAGES);
1a12d8bd7b299 (Wu Fengguang 2010-08-29 13:28:09 -0600 1632) }
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1633)
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1634) return pages;
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1635) }
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1636)
f11c9c5c259cb (Edward Shishkin 2010-03-11 14:09:47 -0800 1637) /*
f11c9c5c259cb (Edward Shishkin 2010-03-11 14:09:47 -0800 1638) * Write a portion of b_io inodes which belong to @sb.
edadfb10ba35d (Christoph Hellwig 2010-06-10 12:07:54 +0200 1639) *
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1640) * Return the number of pages and/or inodes written.
0ba13fd19d39b (Linus Torvalds 2015-09-11 13:26:39 -0700 1641) *
0ba13fd19d39b (Linus Torvalds 2015-09-11 13:26:39 -0700 1642) * NOTE! This is called with wb->list_lock held, and will
0ba13fd19d39b (Linus Torvalds 2015-09-11 13:26:39 -0700 1643) * unlock and relock that for each inode it ends up doing
0ba13fd19d39b (Linus Torvalds 2015-09-11 13:26:39 -0700 1644) * IO for.
f11c9c5c259cb (Edward Shishkin 2010-03-11 14:09:47 -0800 1645) */
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1646) static long writeback_sb_inodes(struct super_block *sb,
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1647) struct bdi_writeback *wb,
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1648) struct wb_writeback_work *work)
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 1649) {
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1650) struct writeback_control wbc = {
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1651) .sync_mode = work->sync_mode,
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1652) .tagged_writepages = work->tagged_writepages,
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1653) .for_kupdate = work->for_kupdate,
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1654) .for_background = work->for_background,
7747bd4bceb30 (Dave Chinner 2013-07-02 22:38:35 +1000 1655) .for_sync = work->for_sync,
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1656) .range_cyclic = work->range_cyclic,
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1657) .range_start = 0,
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1658) .range_end = LLONG_MAX,
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1659) };
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1660) unsigned long start_time = jiffies;
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1661) long write_chunk;
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1662) long wrote = 0; /* count both pages and inodes */
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1663)
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 1664) while (!list_empty(&wb->b_io)) {
7ccf19a8042e3 (Nicholas Piggin 2010-10-21 11:49:30 +1100 1665) struct inode *inode = wb_inode(wb->b_io.prev);
aaf2559332ba2 (Tejun Heo 2016-03-18 13:52:04 -0400 1666) struct bdi_writeback *tmp_wb;
edadfb10ba35d (Christoph Hellwig 2010-06-10 12:07:54 +0200 1667)
edadfb10ba35d (Christoph Hellwig 2010-06-10 12:07:54 +0200 1668) if (inode->i_sb != sb) {
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1669) if (work->sb) {
edadfb10ba35d (Christoph Hellwig 2010-06-10 12:07:54 +0200 1670) /*
edadfb10ba35d (Christoph Hellwig 2010-06-10 12:07:54 +0200 1671) * We only want to write back data for this
edadfb10ba35d (Christoph Hellwig 2010-06-10 12:07:54 +0200 1672) * superblock, move all inodes not belonging
edadfb10ba35d (Christoph Hellwig 2010-06-10 12:07:54 +0200 1673) * to it back onto the dirty list.
edadfb10ba35d (Christoph Hellwig 2010-06-10 12:07:54 +0200 1674) */
f758eeabeb96f (Christoph Hellwig 2011-04-21 18:19:44 -0600 1675) redirty_tail(inode, wb);
edadfb10ba35d (Christoph Hellwig 2010-06-10 12:07:54 +0200 1676) continue;
edadfb10ba35d (Christoph Hellwig 2010-06-10 12:07:54 +0200 1677) }
edadfb10ba35d (Christoph Hellwig 2010-06-10 12:07:54 +0200 1678)
edadfb10ba35d (Christoph Hellwig 2010-06-10 12:07:54 +0200 1679) /*
edadfb10ba35d (Christoph Hellwig 2010-06-10 12:07:54 +0200 1680) * The inode belongs to a different superblock.
edadfb10ba35d (Christoph Hellwig 2010-06-10 12:07:54 +0200 1681) * Bounce back to the caller to unpin this and
edadfb10ba35d (Christoph Hellwig 2010-06-10 12:07:54 +0200 1682) * pin the next superblock.
edadfb10ba35d (Christoph Hellwig 2010-06-10 12:07:54 +0200 1683) */
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1684) break;
edadfb10ba35d (Christoph Hellwig 2010-06-10 12:07:54 +0200 1685) }
edadfb10ba35d (Christoph Hellwig 2010-06-10 12:07:54 +0200 1686)
9843b76aae802 (Christoph Hellwig 2010-10-24 19:40:46 +0200 1687) /*
331cbdeedeb2f (Wanpeng Li 2012-06-09 11:10:55 +0800 1688) * Don't bother with new inodes or inodes being freed, first
331cbdeedeb2f (Wanpeng Li 2012-06-09 11:10:55 +0800 1689) * kind does not need periodic writeout yet, and for the latter
9843b76aae802 (Christoph Hellwig 2010-10-24 19:40:46 +0200 1690) * kind writeout is handled by the freer.
9843b76aae802 (Christoph Hellwig 2010-10-24 19:40:46 +0200 1691) */
250df6ed274d7 (Dave Chinner 2011-03-22 22:23:36 +1100 1692) spin_lock(&inode->i_lock);
9843b76aae802 (Christoph Hellwig 2010-10-24 19:40:46 +0200 1693) if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
b35250c0816c7 (Jan Kara 2020-06-10 17:36:03 +0200 1694) redirty_tail_locked(inode, wb);
250df6ed274d7 (Dave Chinner 2011-03-22 22:23:36 +1100 1695) spin_unlock(&inode->i_lock);
7ef0d7377cb28 (Nicholas Piggin 2009-03-12 14:31:38 -0700 1696) continue;
7ef0d7377cb28 (Nicholas Piggin 2009-03-12 14:31:38 -0700 1697) }
cc1676d917f32 (Jan Kara 2012-05-03 14:47:56 +0200 1698) if ((inode->i_state & I_SYNC) && wbc.sync_mode != WB_SYNC_ALL) {
cc1676d917f32 (Jan Kara 2012-05-03 14:47:56 +0200 1699) /*
cc1676d917f32 (Jan Kara 2012-05-03 14:47:56 +0200 1700) * If this inode is locked for writeback and we are not
cc1676d917f32 (Jan Kara 2012-05-03 14:47:56 +0200 1701) * doing writeback-for-data-integrity, move it to
cc1676d917f32 (Jan Kara 2012-05-03 14:47:56 +0200 1702) * b_more_io so that writeback can proceed with the
cc1676d917f32 (Jan Kara 2012-05-03 14:47:56 +0200 1703) * other inodes on s_io.
cc1676d917f32 (Jan Kara 2012-05-03 14:47:56 +0200 1704) *
cc1676d917f32 (Jan Kara 2012-05-03 14:47:56 +0200 1705) * We'll have another go at writing back this inode
cc1676d917f32 (Jan Kara 2012-05-03 14:47:56 +0200 1706) * when we completed a full scan of b_io.
cc1676d917f32 (Jan Kara 2012-05-03 14:47:56 +0200 1707) */
cc1676d917f32 (Jan Kara 2012-05-03 14:47:56 +0200 1708) spin_unlock(&inode->i_lock);
cc1676d917f32 (Jan Kara 2012-05-03 14:47:56 +0200 1709) requeue_io(inode, wb);
cc1676d917f32 (Jan Kara 2012-05-03 14:47:56 +0200 1710) trace_writeback_sb_inodes_requeue(inode);
cc1676d917f32 (Jan Kara 2012-05-03 14:47:56 +0200 1711) continue;
cc1676d917f32 (Jan Kara 2012-05-03 14:47:56 +0200 1712) }
f0d07b7ffde75 (Jan Kara 2012-05-03 14:47:59 +0200 1713) spin_unlock(&wb->list_lock);
f0d07b7ffde75 (Jan Kara 2012-05-03 14:47:59 +0200 1714)
4f8ad655dbc82 (Jan Kara 2012-05-03 14:48:00 +0200 1715) /*
4f8ad655dbc82 (Jan Kara 2012-05-03 14:48:00 +0200 1716) * We already requeued the inode if it had I_SYNC set and we
4f8ad655dbc82 (Jan Kara 2012-05-03 14:48:00 +0200 1717) * are doing WB_SYNC_NONE writeback. So this catches only the
4f8ad655dbc82 (Jan Kara 2012-05-03 14:48:00 +0200 1718) * WB_SYNC_ALL case.
4f8ad655dbc82 (Jan Kara 2012-05-03 14:48:00 +0200 1719) */
169ebd90131b2 (Jan Kara 2012-05-03 14:48:03 +0200 1720) if (inode->i_state & I_SYNC) {
169ebd90131b2 (Jan Kara 2012-05-03 14:48:03 +0200 1721) /* Wait for I_SYNC. This function drops i_lock... */
169ebd90131b2 (Jan Kara 2012-05-03 14:48:03 +0200 1722) inode_sleep_on_writeback(inode);
169ebd90131b2 (Jan Kara 2012-05-03 14:48:03 +0200 1723) /* Inode may be gone, start again */
ead188f9f930f (Jan Kara 2012-06-08 17:07:36 +0200 1724) spin_lock(&wb->list_lock);
169ebd90131b2 (Jan Kara 2012-05-03 14:48:03 +0200 1725) continue;
169ebd90131b2 (Jan Kara 2012-05-03 14:48:03 +0200 1726) }
4f8ad655dbc82 (Jan Kara 2012-05-03 14:48:00 +0200 1727) inode->i_state |= I_SYNC;
b16b1deb553ad (Tejun Heo 2015-06-02 08:39:48 -0600 1728) wbc_attach_and_unlock_inode(&wbc, inode);
169ebd90131b2 (Jan Kara 2012-05-03 14:48:03 +0200 1729)
a88a341a73be4 (Tejun Heo 2015-05-22 17:13:28 -0400 1730) write_chunk = writeback_chunk_size(wb, work);
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1731) wbc.nr_to_write = write_chunk;
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1732) wbc.pages_skipped = 0;
250df6ed274d7 (Dave Chinner 2011-03-22 22:23:36 +1100 1733)
169ebd90131b2 (Jan Kara 2012-05-03 14:48:03 +0200 1734) /*
169ebd90131b2 (Jan Kara 2012-05-03 14:48:03 +0200 1735) * We use I_SYNC to pin the inode in memory. While it is set
169ebd90131b2 (Jan Kara 2012-05-03 14:48:03 +0200 1736) * evict_inode() will wait so the inode cannot be freed.
169ebd90131b2 (Jan Kara 2012-05-03 14:48:03 +0200 1737) */
cd8ed2a45a401 (Yan Hong 2012-10-08 16:33:45 -0700 1738) __writeback_single_inode(inode, &wbc);
250df6ed274d7 (Dave Chinner 2011-03-22 22:23:36 +1100 1739)
b16b1deb553ad (Tejun Heo 2015-06-02 08:39:48 -0600 1740) wbc_detach_inode(&wbc);
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1741) work->nr_pages -= write_chunk - wbc.nr_to_write;
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1742) wrote += write_chunk - wbc.nr_to_write;
590dca3a71875 (Chris Mason 2015-09-18 13:35:08 -0400 1743)
590dca3a71875 (Chris Mason 2015-09-18 13:35:08 -0400 1744) if (need_resched()) {
590dca3a71875 (Chris Mason 2015-09-18 13:35:08 -0400 1745) /*
590dca3a71875 (Chris Mason 2015-09-18 13:35:08 -0400 1746) * We're trying to balance between building up a nice
590dca3a71875 (Chris Mason 2015-09-18 13:35:08 -0400 1747) * long list of IOs to improve our merge rate, and
590dca3a71875 (Chris Mason 2015-09-18 13:35:08 -0400 1748) * getting those IOs out quickly for anyone throttling
590dca3a71875 (Chris Mason 2015-09-18 13:35:08 -0400 1749) * in balance_dirty_pages(). cond_resched() doesn't
590dca3a71875 (Chris Mason 2015-09-18 13:35:08 -0400 1750) * unplug, so get our IOs out the door before we
590dca3a71875 (Chris Mason 2015-09-18 13:35:08 -0400 1751) * give up the CPU.
590dca3a71875 (Chris Mason 2015-09-18 13:35:08 -0400 1752) */
590dca3a71875 (Chris Mason 2015-09-18 13:35:08 -0400 1753) blk_flush_plug(current);
590dca3a71875 (Chris Mason 2015-09-18 13:35:08 -0400 1754) cond_resched();
590dca3a71875 (Chris Mason 2015-09-18 13:35:08 -0400 1755) }
590dca3a71875 (Chris Mason 2015-09-18 13:35:08 -0400 1756)
aaf2559332ba2 (Tejun Heo 2016-03-18 13:52:04 -0400 1757) /*
aaf2559332ba2 (Tejun Heo 2016-03-18 13:52:04 -0400 1758) * Requeue @inode if still dirty. Be careful as @inode may
aaf2559332ba2 (Tejun Heo 2016-03-18 13:52:04 -0400 1759) * have been switched to another wb in the meantime.
aaf2559332ba2 (Tejun Heo 2016-03-18 13:52:04 -0400 1760) */
aaf2559332ba2 (Tejun Heo 2016-03-18 13:52:04 -0400 1761) tmp_wb = inode_to_wb_and_lock_list(inode);
4f8ad655dbc82 (Jan Kara 2012-05-03 14:48:00 +0200 1762) spin_lock(&inode->i_lock);
0ae45f63d4ef8 (Theodore Ts'o 2015-02-02 00:37:00 -0500 1763) if (!(inode->i_state & I_DIRTY_ALL))
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1764) wrote++;
aaf2559332ba2 (Tejun Heo 2016-03-18 13:52:04 -0400 1765) requeue_inode(inode, tmp_wb, &wbc);
4f8ad655dbc82 (Jan Kara 2012-05-03 14:48:00 +0200 1766) inode_sync_complete(inode);
0f1b1fd86f6fd (Dave Chinner 2011-03-22 22:23:43 +1100 1767) spin_unlock(&inode->i_lock);
590dca3a71875 (Chris Mason 2015-09-18 13:35:08 -0400 1768)
aaf2559332ba2 (Tejun Heo 2016-03-18 13:52:04 -0400 1769) if (unlikely(tmp_wb != wb)) {
aaf2559332ba2 (Tejun Heo 2016-03-18 13:52:04 -0400 1770) spin_unlock(&tmp_wb->list_lock);
aaf2559332ba2 (Tejun Heo 2016-03-18 13:52:04 -0400 1771) spin_lock(&wb->list_lock);
aaf2559332ba2 (Tejun Heo 2016-03-18 13:52:04 -0400 1772) }
aaf2559332ba2 (Tejun Heo 2016-03-18 13:52:04 -0400 1773)
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1774) /*
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1775) * bail out to wb_writeback() often enough to check
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1776) * background threshold and other termination conditions.
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1777) */
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1778) if (wrote) {
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1779) if (time_is_before_jiffies(start_time + HZ / 10UL))
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1780) break;
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1781) if (work->nr_pages <= 0)
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1782) break;
8bc3be2751b4f (Fengguang Wu 2008-02-04 22:29:36 -0800 1783) }
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 1784) }
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1785) return wrote;
f11c9c5c259cb (Edward Shishkin 2010-03-11 14:09:47 -0800 1786) }
f11c9c5c259cb (Edward Shishkin 2010-03-11 14:09:47 -0800 1787)
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1788) static long __writeback_inodes_wb(struct bdi_writeback *wb,
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1789) struct wb_writeback_work *work)
f11c9c5c259cb (Edward Shishkin 2010-03-11 14:09:47 -0800 1790) {
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1791) unsigned long start_time = jiffies;
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1792) long wrote = 0;
38f2197766312 (Nicholas Piggin 2009-01-06 14:40:25 -0800 1793)
f11c9c5c259cb (Edward Shishkin 2010-03-11 14:09:47 -0800 1794) while (!list_empty(&wb->b_io)) {
7ccf19a8042e3 (Nicholas Piggin 2010-10-21 11:49:30 +1100 1795) struct inode *inode = wb_inode(wb->b_io.prev);
f11c9c5c259cb (Edward Shishkin 2010-03-11 14:09:47 -0800 1796) struct super_block *sb = inode->i_sb;
9ecc2738ac237 (Jens Axboe 2009-09-24 15:25:11 +0200 1797)
eb6ef3df4faa5 (Konstantin Khlebnikov 2015-02-19 20:19:35 +0300 1798) if (!trylock_super(sb)) {
0e995816f4fb6 (Wu Fengguang 2011-07-29 22:14:35 -0600 1799) /*
eb6ef3df4faa5 (Konstantin Khlebnikov 2015-02-19 20:19:35 +0300 1800) * trylock_super() may fail consistently due to
0e995816f4fb6 (Wu Fengguang 2011-07-29 22:14:35 -0600 1801) * s_umount being grabbed by someone else. Don't use
0e995816f4fb6 (Wu Fengguang 2011-07-29 22:14:35 -0600 1802) * requeue_io() to avoid busy retrying the inode/sb.
0e995816f4fb6 (Wu Fengguang 2011-07-29 22:14:35 -0600 1803) */
0e995816f4fb6 (Wu Fengguang 2011-07-29 22:14:35 -0600 1804) redirty_tail(inode, wb);
edadfb10ba35d (Christoph Hellwig 2010-06-10 12:07:54 +0200 1805) continue;
f11c9c5c259cb (Edward Shishkin 2010-03-11 14:09:47 -0800 1806) }
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1807) wrote += writeback_sb_inodes(sb, wb, work);
eb6ef3df4faa5 (Konstantin Khlebnikov 2015-02-19 20:19:35 +0300 1808) up_read(&sb->s_umount);
f11c9c5c259cb (Edward Shishkin 2010-03-11 14:09:47 -0800 1809)
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1810) /* refer to the same tests at the end of writeback_sb_inodes */
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1811) if (wrote) {
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1812) if (time_is_before_jiffies(start_time + HZ / 10UL))
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1813) break;
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1814) if (work->nr_pages <= 0)
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1815) break;
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1816) }
f11c9c5c259cb (Edward Shishkin 2010-03-11 14:09:47 -0800 1817) }
66f3b8e2e103a (Jens Axboe 2009-09-02 09:19:46 +0200 1818) /* Leave any unwritten inodes on b_io */
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1819) return wrote;
66f3b8e2e103a (Jens Axboe 2009-09-02 09:19:46 +0200 1820) }
66f3b8e2e103a (Jens Axboe 2009-09-02 09:19:46 +0200 1821)
7d9f073b8da45 (Wanpeng Li 2013-09-11 14:22:40 -0700 1822) static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
0e175a1835ffc (Curt Wohlgemuth 2011-10-07 21:54:10 -0600 1823) enum wb_reason reason)
edadfb10ba35d (Christoph Hellwig 2010-06-10 12:07:54 +0200 1824) {
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1825) struct wb_writeback_work work = {
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1826) .nr_pages = nr_pages,
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1827) .sync_mode = WB_SYNC_NONE,
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1828) .range_cyclic = 1,
0e175a1835ffc (Curt Wohlgemuth 2011-10-07 21:54:10 -0600 1829) .reason = reason,
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1830) };
505a666ee3fc6 (Linus Torvalds 2015-09-11 13:37:19 -0700 1831) struct blk_plug plug;
edadfb10ba35d (Christoph Hellwig 2010-06-10 12:07:54 +0200 1832)
505a666ee3fc6 (Linus Torvalds 2015-09-11 13:37:19 -0700 1833) blk_start_plug(&plug);
f758eeabeb96f (Christoph Hellwig 2011-04-21 18:19:44 -0600 1834) spin_lock(&wb->list_lock);
424b351fe1901 (Wu Fengguang 2010-07-21 20:11:53 -0600 1835) if (list_empty(&wb->b_io))
f9cae926f35e8 (Jan Kara 2020-05-29 16:08:58 +0200 1836) queue_io(wb, &work, jiffies);
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1837) __writeback_inodes_wb(wb, &work);
f758eeabeb96f (Christoph Hellwig 2011-04-21 18:19:44 -0600 1838) spin_unlock(&wb->list_lock);
505a666ee3fc6 (Linus Torvalds 2015-09-11 13:37:19 -0700 1839) blk_finish_plug(&plug);
edadfb10ba35d (Christoph Hellwig 2010-06-10 12:07:54 +0200 1840)
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1841) return nr_pages - work.nr_pages;
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1842) }
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 1843)
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 1844) /*
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 1845) * Explicit flushing or periodic writeback of "old" data.
66f3b8e2e103a (Jens Axboe 2009-09-02 09:19:46 +0200 1846) *
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 1847) * Define "old": the first time one of an inode's pages is dirtied, we mark the
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 1848) * dirtying-time in the inode's address_space. So this periodic writeback code
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 1849) * just walks the superblock inode list, writing back any inodes which are
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 1850) * older than a specific point in time.
66f3b8e2e103a (Jens Axboe 2009-09-02 09:19:46 +0200 1851) *
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 1852) * Try to run once per dirty_writeback_interval. But if a writeback event
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 1853) * takes longer than a dirty_writeback_interval interval, then leave a
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 1854) * one-second gap.
66f3b8e2e103a (Jens Axboe 2009-09-02 09:19:46 +0200 1855) *
f9cae926f35e8 (Jan Kara 2020-05-29 16:08:58 +0200 1856) * dirtied_before takes precedence over nr_to_write. So we'll only write back
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 1857) * all dirty pages if they are all attached to "old" mappings.
66f3b8e2e103a (Jens Axboe 2009-09-02 09:19:46 +0200 1858) */
c4a77a6c7dcff (Jens Axboe 2009-09-16 15:18:25 +0200 1859) static long wb_writeback(struct bdi_writeback *wb,
83ba7b071f30f (Christoph Hellwig 2010-07-06 08:59:53 +0200 1860) struct wb_writeback_work *work)
66f3b8e2e103a (Jens Axboe 2009-09-02 09:19:46 +0200 1861) {
e98be2d599207 (Wu Fengguang 2010-08-29 11:22:30 -0600 1862) unsigned long wb_start = jiffies;
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1863) long nr_pages = work->nr_pages;
f9cae926f35e8 (Jan Kara 2020-05-29 16:08:58 +0200 1864) unsigned long dirtied_before = jiffies;
a5989bdc981ec (Jan Kara 2009-09-16 19:22:48 +0200 1865) struct inode *inode;
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1866) long progress;
505a666ee3fc6 (Linus Torvalds 2015-09-11 13:37:19 -0700 1867) struct blk_plug plug;
66f3b8e2e103a (Jens Axboe 2009-09-02 09:19:46 +0200 1868)
505a666ee3fc6 (Linus Torvalds 2015-09-11 13:37:19 -0700 1869) blk_start_plug(&plug);
e8dfc30582995 (Wu Fengguang 2011-04-21 12:06:32 -0600 1870) spin_lock(&wb->list_lock);
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 1871) for (;;) {
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 1872) /*
d3ddec7635b6f (Wu Fengguang 2009-09-23 20:33:40 +0800 1873) * Stop writeback when nr_pages has been consumed
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 1874) */
83ba7b071f30f (Christoph Hellwig 2010-07-06 08:59:53 +0200 1875) if (work->nr_pages <= 0)
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 1876) break;
66f3b8e2e103a (Jens Axboe 2009-09-02 09:19:46 +0200 1877)
aa373cf550994 (Jan Kara 2011-01-13 15:45:47 -0800 1878) /*
aa373cf550994 (Jan Kara 2011-01-13 15:45:47 -0800 1879) * Background writeout and kupdate-style writeback may
aa373cf550994 (Jan Kara 2011-01-13 15:45:47 -0800 1880) * run forever. Stop them if there is other work to do
aa373cf550994 (Jan Kara 2011-01-13 15:45:47 -0800 1881) * so that e.g. sync can proceed. They'll be restarted
aa373cf550994 (Jan Kara 2011-01-13 15:45:47 -0800 1882) * after the other works are all done.
aa373cf550994 (Jan Kara 2011-01-13 15:45:47 -0800 1883) */
aa373cf550994 (Jan Kara 2011-01-13 15:45:47 -0800 1884) if ((work->for_background || work->for_kupdate) &&
f0054bb1e1f3b (Tejun Heo 2015-05-22 17:13:30 -0400 1885) !list_empty(&wb->work_list))
aa373cf550994 (Jan Kara 2011-01-13 15:45:47 -0800 1886) break;
aa373cf550994 (Jan Kara 2011-01-13 15:45:47 -0800 1887)
38f2197766312 (Nicholas Piggin 2009-01-06 14:40:25 -0800 1888) /*
d3ddec7635b6f (Wu Fengguang 2009-09-23 20:33:40 +0800 1889) * For background writeout, stop when we are below the
d3ddec7635b6f (Wu Fengguang 2009-09-23 20:33:40 +0800 1890) * background dirty threshold
38f2197766312 (Nicholas Piggin 2009-01-06 14:40:25 -0800 1891) */
aa661bbe1e61c (Tejun Heo 2015-05-22 18:23:31 -0400 1892) if (work->for_background && !wb_over_bg_thresh(wb))
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 1893) break;
38f2197766312 (Nicholas Piggin 2009-01-06 14:40:25 -0800 1894)
1bc36b6426ae4 (Jan Kara 2011-10-19 11:44:41 +0200 1895) /*
1bc36b6426ae4 (Jan Kara 2011-10-19 11:44:41 +0200 1896) * Kupdate and background works are special and we want to
1bc36b6426ae4 (Jan Kara 2011-10-19 11:44:41 +0200 1897) * include all inodes that need writing. Livelock avoidance is
1bc36b6426ae4 (Jan Kara 2011-10-19 11:44:41 +0200 1898) * handled by these works yielding to any other work so we are
1bc36b6426ae4 (Jan Kara 2011-10-19 11:44:41 +0200 1899) * safe.
1bc36b6426ae4 (Jan Kara 2011-10-19 11:44:41 +0200 1900) */
ba9aa8399fda4 (Wu Fengguang 2010-07-21 20:32:30 -0600 1901) if (work->for_kupdate) {
f9cae926f35e8 (Jan Kara 2020-05-29 16:08:58 +0200 1902) dirtied_before = jiffies -
ba9aa8399fda4 (Wu Fengguang 2010-07-21 20:32:30 -0600 1903) msecs_to_jiffies(dirty_expire_interval * 10);
1bc36b6426ae4 (Jan Kara 2011-10-19 11:44:41 +0200 1904) } else if (work->for_background)
f9cae926f35e8 (Jan Kara 2020-05-29 16:08:58 +0200 1905) dirtied_before = jiffies;
028c2dd184c09 (Dave Chinner 2010-07-07 13:24:07 +1000 1906)
5634cc2aa9aeb (Tejun Heo 2015-08-18 14:54:56 -0700 1907) trace_writeback_start(wb, work);
e8dfc30582995 (Wu Fengguang 2011-04-21 12:06:32 -0600 1908) if (list_empty(&wb->b_io))
f9cae926f35e8 (Jan Kara 2020-05-29 16:08:58 +0200 1909) queue_io(wb, work, dirtied_before);
83ba7b071f30f (Christoph Hellwig 2010-07-06 08:59:53 +0200 1910) if (work->sb)
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1911) progress = writeback_sb_inodes(work->sb, wb, work);
edadfb10ba35d (Christoph Hellwig 2010-06-10 12:07:54 +0200 1912) else
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1913) progress = __writeback_inodes_wb(wb, work);
5634cc2aa9aeb (Tejun Heo 2015-08-18 14:54:56 -0700 1914) trace_writeback_written(wb, work);
028c2dd184c09 (Dave Chinner 2010-07-07 13:24:07 +1000 1915)
e98be2d599207 (Wu Fengguang 2010-08-29 11:22:30 -0600 1916) wb_update_bandwidth(wb, wb_start);
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 1917)
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 1918) /*
e6fb6da2e1068 (Wu Fengguang 2010-07-22 10:23:44 -0600 1919) * Did we write something? Try for more
e6fb6da2e1068 (Wu Fengguang 2010-07-22 10:23:44 -0600 1920) *
e6fb6da2e1068 (Wu Fengguang 2010-07-22 10:23:44 -0600 1921) * Dirty inodes are moved to b_io for writeback in batches.
e6fb6da2e1068 (Wu Fengguang 2010-07-22 10:23:44 -0600 1922) * The completion of the current batch does not necessarily
e6fb6da2e1068 (Wu Fengguang 2010-07-22 10:23:44 -0600 1923) * mean the overall work is done. So we keep looping as long
e6fb6da2e1068 (Wu Fengguang 2010-07-22 10:23:44 -0600 1924) * as made some progress on cleaning pages or inodes.
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 1925) */
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1926) if (progress)
71fd05a887e0f (Jens Axboe 2009-09-23 19:32:26 +0200 1927) continue;
71fd05a887e0f (Jens Axboe 2009-09-23 19:32:26 +0200 1928) /*
e6fb6da2e1068 (Wu Fengguang 2010-07-22 10:23:44 -0600 1929) * No more inodes for IO, bail
71fd05a887e0f (Jens Axboe 2009-09-23 19:32:26 +0200 1930) */
b7a2441f9966f (Wu Fengguang 2010-07-21 22:19:51 -0600 1931) if (list_empty(&wb->b_more_io))
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 1932) break;
71fd05a887e0f (Jens Axboe 2009-09-23 19:32:26 +0200 1933) /*
71fd05a887e0f (Jens Axboe 2009-09-23 19:32:26 +0200 1934) * Nothing written. Wait for some inode to
71fd05a887e0f (Jens Axboe 2009-09-23 19:32:26 +0200 1935) * become available for writeback. Otherwise
71fd05a887e0f (Jens Axboe 2009-09-23 19:32:26 +0200 1936) * we'll just busyloop.
71fd05a887e0f (Jens Axboe 2009-09-23 19:32:26 +0200 1937) */
bace9248188f6 (Tahsin Erdogan 2016-12-12 16:43:20 -0800 1938) trace_writeback_wait(wb, work);
bace9248188f6 (Tahsin Erdogan 2016-12-12 16:43:20 -0800 1939) inode = wb_inode(wb->b_more_io.prev);
bace9248188f6 (Tahsin Erdogan 2016-12-12 16:43:20 -0800 1940) spin_lock(&inode->i_lock);
bace9248188f6 (Tahsin Erdogan 2016-12-12 16:43:20 -0800 1941) spin_unlock(&wb->list_lock);
bace9248188f6 (Tahsin Erdogan 2016-12-12 16:43:20 -0800 1942) /* This function drops i_lock... */
bace9248188f6 (Tahsin Erdogan 2016-12-12 16:43:20 -0800 1943) inode_sleep_on_writeback(inode);
bace9248188f6 (Tahsin Erdogan 2016-12-12 16:43:20 -0800 1944) spin_lock(&wb->list_lock);
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 1945) }
e8dfc30582995 (Wu Fengguang 2011-04-21 12:06:32 -0600 1946) spin_unlock(&wb->list_lock);
505a666ee3fc6 (Linus Torvalds 2015-09-11 13:37:19 -0700 1947) blk_finish_plug(&plug);
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 1948)
d46db3d58233b (Wu Fengguang 2011-05-04 19:54:37 -0600 1949) return nr_pages - work->nr_pages;
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 1950) }
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 1951)
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 1952) /*
83ba7b071f30f (Christoph Hellwig 2010-07-06 08:59:53 +0200 1953) * Return the next wb_writeback_work struct that hasn't been processed yet.
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 1954) */
f0054bb1e1f3b (Tejun Heo 2015-05-22 17:13:30 -0400 1955) static struct wb_writeback_work *get_next_work_item(struct bdi_writeback *wb)
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 1956) {
83ba7b071f30f (Christoph Hellwig 2010-07-06 08:59:53 +0200 1957) struct wb_writeback_work *work = NULL;
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 1958)
f0054bb1e1f3b (Tejun Heo 2015-05-22 17:13:30 -0400 1959) spin_lock_bh(&wb->work_lock);
f0054bb1e1f3b (Tejun Heo 2015-05-22 17:13:30 -0400 1960) if (!list_empty(&wb->work_list)) {
f0054bb1e1f3b (Tejun Heo 2015-05-22 17:13:30 -0400 1961) work = list_entry(wb->work_list.next,
83ba7b071f30f (Christoph Hellwig 2010-07-06 08:59:53 +0200 1962) struct wb_writeback_work, list);
83ba7b071f30f (Christoph Hellwig 2010-07-06 08:59:53 +0200 1963) list_del_init(&work->list);
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 1964) }
f0054bb1e1f3b (Tejun Heo 2015-05-22 17:13:30 -0400 1965) spin_unlock_bh(&wb->work_lock);
83ba7b071f30f (Christoph Hellwig 2010-07-06 08:59:53 +0200 1966) return work;
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 1967) }
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 1968)
6585027a5e8cb (Jan Kara 2011-01-13 15:45:44 -0800 1969) static long wb_check_background_flush(struct bdi_writeback *wb)
6585027a5e8cb (Jan Kara 2011-01-13 15:45:44 -0800 1970) {
aa661bbe1e61c (Tejun Heo 2015-05-22 18:23:31 -0400 1971) if (wb_over_bg_thresh(wb)) {
6585027a5e8cb (Jan Kara 2011-01-13 15:45:44 -0800 1972)
6585027a5e8cb (Jan Kara 2011-01-13 15:45:44 -0800 1973) struct wb_writeback_work work = {
6585027a5e8cb (Jan Kara 2011-01-13 15:45:44 -0800 1974) .nr_pages = LONG_MAX,
6585027a5e8cb (Jan Kara 2011-01-13 15:45:44 -0800 1975) .sync_mode = WB_SYNC_NONE,
6585027a5e8cb (Jan Kara 2011-01-13 15:45:44 -0800 1976) .for_background = 1,
6585027a5e8cb (Jan Kara 2011-01-13 15:45:44 -0800 1977) .range_cyclic = 1,
0e175a1835ffc (Curt Wohlgemuth 2011-10-07 21:54:10 -0600 1978) .reason = WB_REASON_BACKGROUND,
6585027a5e8cb (Jan Kara 2011-01-13 15:45:44 -0800 1979) };
6585027a5e8cb (Jan Kara 2011-01-13 15:45:44 -0800 1980)
6585027a5e8cb (Jan Kara 2011-01-13 15:45:44 -0800 1981) return wb_writeback(wb, &work);
6585027a5e8cb (Jan Kara 2011-01-13 15:45:44 -0800 1982) }
6585027a5e8cb (Jan Kara 2011-01-13 15:45:44 -0800 1983)
6585027a5e8cb (Jan Kara 2011-01-13 15:45:44 -0800 1984) return 0;
6585027a5e8cb (Jan Kara 2011-01-13 15:45:44 -0800 1985) }
6585027a5e8cb (Jan Kara 2011-01-13 15:45:44 -0800 1986)
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 1987) static long wb_check_old_data_flush(struct bdi_writeback *wb)
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 1988) {
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 1989) unsigned long expired;
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 1990) long nr_pages;
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 1991)
69b62d01ec44f (Jens Axboe 2010-05-17 12:51:03 +0200 1992) /*
69b62d01ec44f (Jens Axboe 2010-05-17 12:51:03 +0200 1993) * When set to zero, disable periodic writeback
69b62d01ec44f (Jens Axboe 2010-05-17 12:51:03 +0200 1994) */
69b62d01ec44f (Jens Axboe 2010-05-17 12:51:03 +0200 1995) if (!dirty_writeback_interval)
69b62d01ec44f (Jens Axboe 2010-05-17 12:51:03 +0200 1996) return 0;
69b62d01ec44f (Jens Axboe 2010-05-17 12:51:03 +0200 1997)
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 1998) expired = wb->last_old_flush +
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 1999) msecs_to_jiffies(dirty_writeback_interval * 10);
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2000) if (time_before(jiffies, expired))
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2001) return 0;
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2002)
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2003) wb->last_old_flush = jiffies;
cdf01dd5443d0 (Linus Torvalds 2010-10-30 08:55:52 -0700 2004) nr_pages = get_nr_dirty_pages();
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2005)
c4a77a6c7dcff (Jens Axboe 2009-09-16 15:18:25 +0200 2006) if (nr_pages) {
83ba7b071f30f (Christoph Hellwig 2010-07-06 08:59:53 +0200 2007) struct wb_writeback_work work = {
c4a77a6c7dcff (Jens Axboe 2009-09-16 15:18:25 +0200 2008) .nr_pages = nr_pages,
c4a77a6c7dcff (Jens Axboe 2009-09-16 15:18:25 +0200 2009) .sync_mode = WB_SYNC_NONE,
c4a77a6c7dcff (Jens Axboe 2009-09-16 15:18:25 +0200 2010) .for_kupdate = 1,
c4a77a6c7dcff (Jens Axboe 2009-09-16 15:18:25 +0200 2011) .range_cyclic = 1,
0e175a1835ffc (Curt Wohlgemuth 2011-10-07 21:54:10 -0600 2012) .reason = WB_REASON_PERIODIC,
c4a77a6c7dcff (Jens Axboe 2009-09-16 15:18:25 +0200 2013) };
c4a77a6c7dcff (Jens Axboe 2009-09-16 15:18:25 +0200 2014)
83ba7b071f30f (Christoph Hellwig 2010-07-06 08:59:53 +0200 2015) return wb_writeback(wb, &work);
c4a77a6c7dcff (Jens Axboe 2009-09-16 15:18:25 +0200 2016) }
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2017)
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2018) return 0;
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2019) }
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2020)
85009b4f5f039 (Jens Axboe 2017-09-30 02:09:06 -0600 2021) static long wb_check_start_all(struct bdi_writeback *wb)
85009b4f5f039 (Jens Axboe 2017-09-30 02:09:06 -0600 2022) {
85009b4f5f039 (Jens Axboe 2017-09-30 02:09:06 -0600 2023) long nr_pages;
85009b4f5f039 (Jens Axboe 2017-09-30 02:09:06 -0600 2024)
85009b4f5f039 (Jens Axboe 2017-09-30 02:09:06 -0600 2025) if (!test_bit(WB_start_all, &wb->state))
85009b4f5f039 (Jens Axboe 2017-09-30 02:09:06 -0600 2026) return 0;
85009b4f5f039 (Jens Axboe 2017-09-30 02:09:06 -0600 2027)
85009b4f5f039 (Jens Axboe 2017-09-30 02:09:06 -0600 2028) nr_pages = get_nr_dirty_pages();
85009b4f5f039 (Jens Axboe 2017-09-30 02:09:06 -0600 2029) if (nr_pages) {
85009b4f5f039 (Jens Axboe 2017-09-30 02:09:06 -0600 2030) struct wb_writeback_work work = {
85009b4f5f039 (Jens Axboe 2017-09-30 02:09:06 -0600 2031) .nr_pages = wb_split_bdi_pages(wb, nr_pages),
85009b4f5f039 (Jens Axboe 2017-09-30 02:09:06 -0600 2032) .sync_mode = WB_SYNC_NONE,
85009b4f5f039 (Jens Axboe 2017-09-30 02:09:06 -0600 2033) .range_cyclic = 1,
85009b4f5f039 (Jens Axboe 2017-09-30 02:09:06 -0600 2034) .reason = wb->start_all_reason,
85009b4f5f039 (Jens Axboe 2017-09-30 02:09:06 -0600 2035) };
85009b4f5f039 (Jens Axboe 2017-09-30 02:09:06 -0600 2036)
85009b4f5f039 (Jens Axboe 2017-09-30 02:09:06 -0600 2037) nr_pages = wb_writeback(wb, &work);
85009b4f5f039 (Jens Axboe 2017-09-30 02:09:06 -0600 2038) }
85009b4f5f039 (Jens Axboe 2017-09-30 02:09:06 -0600 2039)
85009b4f5f039 (Jens Axboe 2017-09-30 02:09:06 -0600 2040) clear_bit(WB_start_all, &wb->state);
85009b4f5f039 (Jens Axboe 2017-09-30 02:09:06 -0600 2041) return nr_pages;
85009b4f5f039 (Jens Axboe 2017-09-30 02:09:06 -0600 2042) }
85009b4f5f039 (Jens Axboe 2017-09-30 02:09:06 -0600 2043)
85009b4f5f039 (Jens Axboe 2017-09-30 02:09:06 -0600 2044)
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2045) /*
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2046) * Retrieve work items and do the writeback they describe
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2047) */
25d130ba22362 (Wanpeng Li 2013-07-08 16:00:14 -0700 2048) static long wb_do_writeback(struct bdi_writeback *wb)
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2049) {
83ba7b071f30f (Christoph Hellwig 2010-07-06 08:59:53 +0200 2050) struct wb_writeback_work *work;
c4a77a6c7dcff (Jens Axboe 2009-09-16 15:18:25 +0200 2051) long wrote = 0;
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2052)
4452226ea276e (Tejun Heo 2015-05-22 17:13:26 -0400 2053) set_bit(WB_writeback_running, &wb->state);
f0054bb1e1f3b (Tejun Heo 2015-05-22 17:13:30 -0400 2054) while ((work = get_next_work_item(wb)) != NULL) {
5634cc2aa9aeb (Tejun Heo 2015-08-18 14:54:56 -0700 2055) trace_writeback_exec(wb, work);
83ba7b071f30f (Christoph Hellwig 2010-07-06 08:59:53 +0200 2056) wrote += wb_writeback(wb, work);
4a3a485b1ed0e (Tahsin Erdogan 2017-03-10 12:09:49 -0800 2057) finish_writeback_work(wb, work);
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2058) }
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2059)
85009b4f5f039 (Jens Axboe 2017-09-30 02:09:06 -0600 2060) /*
85009b4f5f039 (Jens Axboe 2017-09-30 02:09:06 -0600 2061) * Check for a flush-everything request
85009b4f5f039 (Jens Axboe 2017-09-30 02:09:06 -0600 2062) */
85009b4f5f039 (Jens Axboe 2017-09-30 02:09:06 -0600 2063) wrote += wb_check_start_all(wb);
85009b4f5f039 (Jens Axboe 2017-09-30 02:09:06 -0600 2064)
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2065) /*
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2066) * Check for periodic writeback, kupdated() style
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2067) */
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2068) wrote += wb_check_old_data_flush(wb);
6585027a5e8cb (Jan Kara 2011-01-13 15:45:44 -0800 2069) wrote += wb_check_background_flush(wb);
4452226ea276e (Tejun Heo 2015-05-22 17:13:26 -0400 2070) clear_bit(WB_writeback_running, &wb->state);
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2071)
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2072) return wrote;
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2073) }
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2074)
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2075) /*
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2076) * Handle writeback of dirty data for the device backed by this bdi. Also
839a8e8660b67 (Tejun Heo 2013-04-01 19:08:06 -0700 2077) * reschedules periodically and does kupdated style flushing.
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2078) */
f0054bb1e1f3b (Tejun Heo 2015-05-22 17:13:30 -0400 2079) void wb_workfn(struct work_struct *work)
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2080) {
839a8e8660b67 (Tejun Heo 2013-04-01 19:08:06 -0700 2081) struct bdi_writeback *wb = container_of(to_delayed_work(work),
839a8e8660b67 (Tejun Heo 2013-04-01 19:08:06 -0700 2082) struct bdi_writeback, dwork);
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2083) long pages_written;
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2084)
68f23b89067fd (Theodore Ts'o 2020-01-30 22:11:04 -0800 2085) set_worker_desc("flush-%s", bdi_dev_name(wb->bdi));
766f9164193f6 (Peter Zijlstra 2010-10-26 14:22:45 -0700 2086) current->flags |= PF_SWAPWRITE;
455b2864686d3 (Dave Chinner 2010-07-07 13:24:06 +1000 2087)
839a8e8660b67 (Tejun Heo 2013-04-01 19:08:06 -0700 2088) if (likely(!current_is_workqueue_rescuer() ||
4452226ea276e (Tejun Heo 2015-05-22 17:13:26 -0400 2089) !test_bit(WB_registered, &wb->state))) {
6467716a37673 (Artem Bityutskiy 2010-07-25 14:29:22 +0300 2090) /*
f0054bb1e1f3b (Tejun Heo 2015-05-22 17:13:30 -0400 2091) * The normal path. Keep writing back @wb until its
839a8e8660b67 (Tejun Heo 2013-04-01 19:08:06 -0700 2092) * work_list is empty. Note that this path is also taken
f0054bb1e1f3b (Tejun Heo 2015-05-22 17:13:30 -0400 2093) * if @wb is shutting down even when we're running off the
839a8e8660b67 (Tejun Heo 2013-04-01 19:08:06 -0700 2094) * rescuer as work_list needs to be drained.
6467716a37673 (Artem Bityutskiy 2010-07-25 14:29:22 +0300 2095) */
839a8e8660b67 (Tejun Heo 2013-04-01 19:08:06 -0700 2096) do {
25d130ba22362 (Wanpeng Li 2013-07-08 16:00:14 -0700 2097) pages_written = wb_do_writeback(wb);
839a8e8660b67 (Tejun Heo 2013-04-01 19:08:06 -0700 2098) trace_writeback_pages_written(pages_written);
f0054bb1e1f3b (Tejun Heo 2015-05-22 17:13:30 -0400 2099) } while (!list_empty(&wb->work_list));
839a8e8660b67 (Tejun Heo 2013-04-01 19:08:06 -0700 2100) } else {
839a8e8660b67 (Tejun Heo 2013-04-01 19:08:06 -0700 2101) /*
839a8e8660b67 (Tejun Heo 2013-04-01 19:08:06 -0700 2102) * bdi_wq can't get enough workers and we're running off
839a8e8660b67 (Tejun Heo 2013-04-01 19:08:06 -0700 2103) * the emergency worker. Don't hog it. Hopefully, 1024 is
839a8e8660b67 (Tejun Heo 2013-04-01 19:08:06 -0700 2104) * enough for efficient IO.
839a8e8660b67 (Tejun Heo 2013-04-01 19:08:06 -0700 2105) */
f0054bb1e1f3b (Tejun Heo 2015-05-22 17:13:30 -0400 2106) pages_written = writeback_inodes_wb(wb, 1024,
839a8e8660b67 (Tejun Heo 2013-04-01 19:08:06 -0700 2107) WB_REASON_FORKER_THREAD);
455b2864686d3 (Dave Chinner 2010-07-07 13:24:06 +1000 2108) trace_writeback_pages_written(pages_written);
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2109) }
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2110)
f0054bb1e1f3b (Tejun Heo 2015-05-22 17:13:30 -0400 2111) if (!list_empty(&wb->work_list))
b8b784958eccb (Jan Kara 2018-05-03 18:26:26 +0200 2112) wb_wakeup(wb);
6ca738d60c563 (Derek Basehore 2014-04-03 14:46:22 -0700 2113) else if (wb_has_dirty_io(wb) && dirty_writeback_interval)
f0054bb1e1f3b (Tejun Heo 2015-05-22 17:13:30 -0400 2114) wb_wakeup_delayed(wb);
455b2864686d3 (Dave Chinner 2010-07-07 13:24:06 +1000 2115)
839a8e8660b67 (Tejun Heo 2013-04-01 19:08:06 -0700 2116) current->flags &= ~PF_SWAPWRITE;
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2117) }
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2118)
595043e5f9ef1 (Jens Axboe 2017-09-28 11:26:59 -0600 2119) /*
595043e5f9ef1 (Jens Axboe 2017-09-28 11:26:59 -0600 2120) * Start writeback of `nr_pages' pages on this bdi. If `nr_pages' is zero,
595043e5f9ef1 (Jens Axboe 2017-09-28 11:26:59 -0600 2121) * write back the whole world.
595043e5f9ef1 (Jens Axboe 2017-09-28 11:26:59 -0600 2122) */
595043e5f9ef1 (Jens Axboe 2017-09-28 11:26:59 -0600 2123) static void __wakeup_flusher_threads_bdi(struct backing_dev_info *bdi,
e8e8a0c6c9bfc (Jens Axboe 2017-09-28 11:31:22 -0600 2124) enum wb_reason reason)
595043e5f9ef1 (Jens Axboe 2017-09-28 11:26:59 -0600 2125) {
595043e5f9ef1 (Jens Axboe 2017-09-28 11:26:59 -0600 2126) struct bdi_writeback *wb;
595043e5f9ef1 (Jens Axboe 2017-09-28 11:26:59 -0600 2127)
595043e5f9ef1 (Jens Axboe 2017-09-28 11:26:59 -0600 2128) if (!bdi_has_dirty_io(bdi))
595043e5f9ef1 (Jens Axboe 2017-09-28 11:26:59 -0600 2129) return;
595043e5f9ef1 (Jens Axboe 2017-09-28 11:26:59 -0600 2130)
595043e5f9ef1 (Jens Axboe 2017-09-28 11:26:59 -0600 2131) list_for_each_entry_rcu(wb, &bdi->wb_list, bdi_node)
e8e8a0c6c9bfc (Jens Axboe 2017-09-28 11:31:22 -0600 2132) wb_start_writeback(wb, reason);
595043e5f9ef1 (Jens Axboe 2017-09-28 11:26:59 -0600 2133) }
595043e5f9ef1 (Jens Axboe 2017-09-28 11:26:59 -0600 2134)
595043e5f9ef1 (Jens Axboe 2017-09-28 11:26:59 -0600 2135) void wakeup_flusher_threads_bdi(struct backing_dev_info *bdi,
595043e5f9ef1 (Jens Axboe 2017-09-28 11:26:59 -0600 2136) enum wb_reason reason)
595043e5f9ef1 (Jens Axboe 2017-09-28 11:26:59 -0600 2137) {
595043e5f9ef1 (Jens Axboe 2017-09-28 11:26:59 -0600 2138) rcu_read_lock();
e8e8a0c6c9bfc (Jens Axboe 2017-09-28 11:31:22 -0600 2139) __wakeup_flusher_threads_bdi(bdi, reason);
595043e5f9ef1 (Jens Axboe 2017-09-28 11:26:59 -0600 2140) rcu_read_unlock();
595043e5f9ef1 (Jens Axboe 2017-09-28 11:26:59 -0600 2141) }
595043e5f9ef1 (Jens Axboe 2017-09-28 11:26:59 -0600 2142)
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2143) /*
9ba4b2dfafaa7 (Jens Axboe 2017-09-20 08:58:25 -0600 2144) * Wakeup the flusher threads to start writeback of all currently dirty pages
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2145) */
9ba4b2dfafaa7 (Jens Axboe 2017-09-20 08:58:25 -0600 2146) void wakeup_flusher_threads(enum wb_reason reason)
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2147) {
b8c2f3474f107 (Christoph Hellwig 2010-06-08 18:15:07 +0200 2148) struct backing_dev_info *bdi;
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2149)
51350ea0d7f35 (Konstantin Khlebnikov 2016-08-04 21:36:05 +0300 2150) /*
51350ea0d7f35 (Konstantin Khlebnikov 2016-08-04 21:36:05 +0300 2151) * If we are expecting writeback progress we must submit plugged IO.
51350ea0d7f35 (Konstantin Khlebnikov 2016-08-04 21:36:05 +0300 2152) */
51350ea0d7f35 (Konstantin Khlebnikov 2016-08-04 21:36:05 +0300 2153) if (blk_needs_flush_plug(current))
51350ea0d7f35 (Konstantin Khlebnikov 2016-08-04 21:36:05 +0300 2154) blk_schedule_flush_plug(current);
51350ea0d7f35 (Konstantin Khlebnikov 2016-08-04 21:36:05 +0300 2155)
b8c2f3474f107 (Christoph Hellwig 2010-06-08 18:15:07 +0200 2156) rcu_read_lock();
595043e5f9ef1 (Jens Axboe 2017-09-28 11:26:59 -0600 2157) list_for_each_entry_rcu(bdi, &bdi_list, bdi_list)
e8e8a0c6c9bfc (Jens Axboe 2017-09-28 11:31:22 -0600 2158) __wakeup_flusher_threads_bdi(bdi, reason);
cfc4ba5365449 (Jens Axboe 2009-09-14 13:12:40 +0200 2159) rcu_read_unlock();
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 2160) }
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 2161)
a2f4870697a5b (Theodore Ts'o 2015-03-17 12:23:19 -0400 2162) /*
a2f4870697a5b (Theodore Ts'o 2015-03-17 12:23:19 -0400 2163) * Wake up bdi's periodically to make sure dirtytime inodes gets
a2f4870697a5b (Theodore Ts'o 2015-03-17 12:23:19 -0400 2164) * written back periodically. We deliberately do *not* check the
a2f4870697a5b (Theodore Ts'o 2015-03-17 12:23:19 -0400 2165) * b_dirtytime list in wb_has_dirty_io(), since this would cause the
a2f4870697a5b (Theodore Ts'o 2015-03-17 12:23:19 -0400 2166) * kernel to be constantly waking up once there are any dirtytime
a2f4870697a5b (Theodore Ts'o 2015-03-17 12:23:19 -0400 2167) * inodes on the system. So instead we define a separate delayed work
a2f4870697a5b (Theodore Ts'o 2015-03-17 12:23:19 -0400 2168) * function which gets called much more rarely. (By default, only
a2f4870697a5b (Theodore Ts'o 2015-03-17 12:23:19 -0400 2169) * once every 12 hours.)
a2f4870697a5b (Theodore Ts'o 2015-03-17 12:23:19 -0400 2170) *
a2f4870697a5b (Theodore Ts'o 2015-03-17 12:23:19 -0400 2171) * If there is any other write activity going on in the file system,
a2f4870697a5b (Theodore Ts'o 2015-03-17 12:23:19 -0400 2172) * this function won't be necessary. But if the only thing that has
a2f4870697a5b (Theodore Ts'o 2015-03-17 12:23:19 -0400 2173) * happened on the file system is a dirtytime inode caused by an atime
a2f4870697a5b (Theodore Ts'o 2015-03-17 12:23:19 -0400 2174) * update, we need this infrastructure below to make sure that inode
a2f4870697a5b (Theodore Ts'o 2015-03-17 12:23:19 -0400 2175) * eventually gets pushed out to disk.
a2f4870697a5b (Theodore Ts'o 2015-03-17 12:23:19 -0400 2176) */
a2f4870697a5b (Theodore Ts'o 2015-03-17 12:23:19 -0400 2177) static void wakeup_dirtytime_writeback(struct work_struct *w);
a2f4870697a5b (Theodore Ts'o 2015-03-17 12:23:19 -0400 2178) static DECLARE_DELAYED_WORK(dirtytime_work, wakeup_dirtytime_writeback);
a2f4870697a5b (Theodore Ts'o 2015-03-17 12:23:19 -0400 2179)
a2f4870697a5b (Theodore Ts'o 2015-03-17 12:23:19 -0400 2180) static void wakeup_dirtytime_writeback(struct work_struct *w)
a2f4870697a5b (Theodore Ts'o 2015-03-17 12:23:19 -0400 2181) {
a2f4870697a5b (Theodore Ts'o 2015-03-17 12:23:19 -0400 2182) struct backing_dev_info *bdi;
a2f4870697a5b (Theodore Ts'o 2015-03-17 12:23:19 -0400 2183)
a2f4870697a5b (Theodore Ts'o 2015-03-17 12:23:19 -0400 2184) rcu_read_lock();
a2f4870697a5b (Theodore Ts'o 2015-03-17 12:23:19 -0400 2185) list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
001fe6f617b1a (Tejun Heo 2015-05-22 17:13:56 -0400 2186) struct bdi_writeback *wb;
001fe6f617b1a (Tejun Heo 2015-05-22 17:13:56 -0400 2187)
b817525a4a80c (Tejun Heo 2015-10-02 14:47:05 -0400 2188) list_for_each_entry_rcu(wb, &bdi->wb_list, bdi_node)
6fdf860f15d4a (Tejun Heo 2015-09-29 12:47:51 -0400 2189) if (!list_empty(&wb->b_dirty_time))
6fdf860f15d4a (Tejun Heo 2015-09-29 12:47:51 -0400 2190) wb_wakeup(wb);
a2f4870697a5b (Theodore Ts'o 2015-03-17 12:23:19 -0400 2191) }
a2f4870697a5b (Theodore Ts'o 2015-03-17 12:23:19 -0400 2192) rcu_read_unlock();
a2f4870697a5b (Theodore Ts'o 2015-03-17 12:23:19 -0400 2193) schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);
a2f4870697a5b (Theodore Ts'o 2015-03-17 12:23:19 -0400 2194) }
a2f4870697a5b (Theodore Ts'o 2015-03-17 12:23:19 -0400 2195)
a2f4870697a5b (Theodore Ts'o 2015-03-17 12:23:19 -0400 2196) static int __init start_dirtytime_writeback(void)
a2f4870697a5b (Theodore Ts'o 2015-03-17 12:23:19 -0400 2197) {
a2f4870697a5b (Theodore Ts'o 2015-03-17 12:23:19 -0400 2198) schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);
a2f4870697a5b (Theodore Ts'o 2015-03-17 12:23:19 -0400 2199) return 0;
a2f4870697a5b (Theodore Ts'o 2015-03-17 12:23:19 -0400 2200) }
a2f4870697a5b (Theodore Ts'o 2015-03-17 12:23:19 -0400 2201) __initcall(start_dirtytime_writeback);
a2f4870697a5b (Theodore Ts'o 2015-03-17 12:23:19 -0400 2202)
1efff914afac8 (Theodore Ts'o 2015-03-17 12:23:32 -0400 2203) int dirtytime_interval_handler(struct ctl_table *table, int write,
9ca48e20ec5cb (Tobias Klauser 2020-09-18 21:20:39 -0700 2204) void *buffer, size_t *lenp, loff_t *ppos)
1efff914afac8 (Theodore Ts'o 2015-03-17 12:23:32 -0400 2205) {
1efff914afac8 (Theodore Ts'o 2015-03-17 12:23:32 -0400 2206) int ret;
1efff914afac8 (Theodore Ts'o 2015-03-17 12:23:32 -0400 2207)
1efff914afac8 (Theodore Ts'o 2015-03-17 12:23:32 -0400 2208) ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
1efff914afac8 (Theodore Ts'o 2015-03-17 12:23:32 -0400 2209) if (ret == 0 && write)
1efff914afac8 (Theodore Ts'o 2015-03-17 12:23:32 -0400 2210) mod_delayed_work(system_wq, &dirtytime_work, 0);
1efff914afac8 (Theodore Ts'o 2015-03-17 12:23:32 -0400 2211) return ret;
1efff914afac8 (Theodore Ts'o 2015-03-17 12:23:32 -0400 2212) }
1efff914afac8 (Theodore Ts'o 2015-03-17 12:23:32 -0400 2213)
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2214) /**
35d14f278e530 (Eric Biggers 2021-01-12 11:02:49 -0800 2215) * __mark_inode_dirty - internal function to mark an inode dirty
0117d4272b1ac (Mauro Carvalho Chehab 2017-05-12 07:45:42 -0300 2216) *
0117d4272b1ac (Mauro Carvalho Chehab 2017-05-12 07:45:42 -0300 2217) * @inode: inode to mark
35d14f278e530 (Eric Biggers 2021-01-12 11:02:49 -0800 2218) * @flags: what kind of dirty, e.g. I_DIRTY_SYNC. This can be a combination of
35d14f278e530 (Eric Biggers 2021-01-12 11:02:49 -0800 2219) * multiple I_DIRTY_* flags, except that I_DIRTY_TIME can't be combined
35d14f278e530 (Eric Biggers 2021-01-12 11:02:49 -0800 2220) * with I_DIRTY_PAGES.
0117d4272b1ac (Mauro Carvalho Chehab 2017-05-12 07:45:42 -0300 2221) *
35d14f278e530 (Eric Biggers 2021-01-12 11:02:49 -0800 2222) * Mark an inode as dirty. We notify the filesystem, then update the inode's
35d14f278e530 (Eric Biggers 2021-01-12 11:02:49 -0800 2223) * dirty flags. Then, if needed we add the inode to the appropriate dirty list.
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 2224) *
35d14f278e530 (Eric Biggers 2021-01-12 11:02:49 -0800 2225) * Most callers should use mark_inode_dirty() or mark_inode_dirty_sync()
35d14f278e530 (Eric Biggers 2021-01-12 11:02:49 -0800 2226) * instead of calling this directly.
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2227) *
35d14f278e530 (Eric Biggers 2021-01-12 11:02:49 -0800 2228) * CAREFUL! We only add the inode to the dirty list if it is hashed or if it
35d14f278e530 (Eric Biggers 2021-01-12 11:02:49 -0800 2229) * refers to a blockdev. Unhashed inodes will never be added to the dirty list
35d14f278e530 (Eric Biggers 2021-01-12 11:02:49 -0800 2230) * even if they are later hashed, as they will have been marked dirty already.
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2231) *
35d14f278e530 (Eric Biggers 2021-01-12 11:02:49 -0800 2232) * In short, ensure you hash any inodes _before_ you start marking them dirty.
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 2233) *
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2234) * Note that for blockdevs, inode->dirtied_when represents the dirtying time of
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2235) * the block-special inode (/dev/hda1) itself. And the ->dirtied_when field of
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2236) * the kernel-internal blockdev inode represents the dirtying time of the
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2237) * blockdev's pages. This is why for I_DIRTY_PAGES we always use
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2238) * page->mapping->host, so the page-dirtying time is recorded in the internal
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2239) * blockdev inode.
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 2240) */
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2241) void __mark_inode_dirty(struct inode *inode, int flags)
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 2242) {
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2243) struct super_block *sb = inode->i_sb;
35d14f278e530 (Eric Biggers 2021-01-12 11:02:49 -0800 2244) int dirtytime = 0;
0ae45f63d4ef8 (Theodore Ts'o 2015-02-02 00:37:00 -0500 2245)
0ae45f63d4ef8 (Theodore Ts'o 2015-02-02 00:37:00 -0500 2246) trace_writeback_mark_inode_dirty(inode, flags);
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 2247)
e2728c5621fd9 (Eric Biggers 2021-01-12 11:02:47 -0800 2248) if (flags & I_DIRTY_INODE) {
35d14f278e530 (Eric Biggers 2021-01-12 11:02:49 -0800 2249) /*
35d14f278e530 (Eric Biggers 2021-01-12 11:02:49 -0800 2250) * Notify the filesystem about the inode being dirtied, so that
35d14f278e530 (Eric Biggers 2021-01-12 11:02:49 -0800 2251) * (if needed) it can update on-disk fields and journal the
35d14f278e530 (Eric Biggers 2021-01-12 11:02:49 -0800 2252) * inode. This is only needed when the inode itself is being
35d14f278e530 (Eric Biggers 2021-01-12 11:02:49 -0800 2253) * dirtied now. I.e. it's only needed for I_DIRTY_INODE, not
35d14f278e530 (Eric Biggers 2021-01-12 11:02:49 -0800 2254) * for just I_DIRTY_PAGES or I_DIRTY_TIME.
35d14f278e530 (Eric Biggers 2021-01-12 11:02:49 -0800 2255) */
9fb0a7da0c528 (Tejun Heo 2013-01-11 13:06:37 -0800 2256) trace_writeback_dirty_inode_start(inode, flags);
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2257) if (sb->s_op->dirty_inode)
a38ed483a7267 (Eric Biggers 2021-01-12 11:02:48 -0800 2258) sb->s_op->dirty_inode(inode, flags & I_DIRTY_INODE);
9fb0a7da0c528 (Tejun Heo 2013-01-11 13:06:37 -0800 2259) trace_writeback_dirty_inode(inode, flags);
e2728c5621fd9 (Eric Biggers 2021-01-12 11:02:47 -0800 2260)
35d14f278e530 (Eric Biggers 2021-01-12 11:02:49 -0800 2261) /* I_DIRTY_INODE supersedes I_DIRTY_TIME. */
0ae45f63d4ef8 (Theodore Ts'o 2015-02-02 00:37:00 -0500 2262) flags &= ~I_DIRTY_TIME;
35d14f278e530 (Eric Biggers 2021-01-12 11:02:49 -0800 2263) } else {
35d14f278e530 (Eric Biggers 2021-01-12 11:02:49 -0800 2264) /*
35d14f278e530 (Eric Biggers 2021-01-12 11:02:49 -0800 2265) * Else it's either I_DIRTY_PAGES, I_DIRTY_TIME, or nothing.
35d14f278e530 (Eric Biggers 2021-01-12 11:02:49 -0800 2266) * (We don't support setting both I_DIRTY_PAGES and I_DIRTY_TIME
35d14f278e530 (Eric Biggers 2021-01-12 11:02:49 -0800 2267) * in one call to __mark_inode_dirty().)
35d14f278e530 (Eric Biggers 2021-01-12 11:02:49 -0800 2268) */
35d14f278e530 (Eric Biggers 2021-01-12 11:02:49 -0800 2269) dirtytime = flags & I_DIRTY_TIME;
35d14f278e530 (Eric Biggers 2021-01-12 11:02:49 -0800 2270) WARN_ON_ONCE(dirtytime && flags != I_DIRTY_TIME);
e2728c5621fd9 (Eric Biggers 2021-01-12 11:02:47 -0800 2271) }
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2272)
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2273) /*
9c6ac78eb3521 (Tejun Heo 2014-10-24 15:38:21 -0400 2274) * Paired with smp_mb() in __writeback_single_inode() for the
9c6ac78eb3521 (Tejun Heo 2014-10-24 15:38:21 -0400 2275) * following lockless i_state test. See there for details.
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2276) */
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2277) smp_mb();
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2278)
0ae45f63d4ef8 (Theodore Ts'o 2015-02-02 00:37:00 -0500 2279) if (((inode->i_state & flags) == flags) ||
0ae45f63d4ef8 (Theodore Ts'o 2015-02-02 00:37:00 -0500 2280) (dirtytime && (inode->i_state & I_DIRTY_INODE)))
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2281) return;
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2282)
250df6ed274d7 (Dave Chinner 2011-03-22 22:23:36 +1100 2283) spin_lock(&inode->i_lock);
0ae45f63d4ef8 (Theodore Ts'o 2015-02-02 00:37:00 -0500 2284) if (dirtytime && (inode->i_state & I_DIRTY_INODE))
0ae45f63d4ef8 (Theodore Ts'o 2015-02-02 00:37:00 -0500 2285) goto out_unlock_inode;
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2286) if ((inode->i_state & flags) != flags) {
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2287) const int was_dirty = inode->i_state & I_DIRTY;
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2288)
52ebea749aaed (Tejun Heo 2015-05-22 17:13:37 -0400 2289) inode_attach_wb(inode, NULL);
52ebea749aaed (Tejun Heo 2015-05-22 17:13:37 -0400 2290)
35d14f278e530 (Eric Biggers 2021-01-12 11:02:49 -0800 2291) /* I_DIRTY_INODE supersedes I_DIRTY_TIME. */
0ae45f63d4ef8 (Theodore Ts'o 2015-02-02 00:37:00 -0500 2292) if (flags & I_DIRTY_INODE)
0ae45f63d4ef8 (Theodore Ts'o 2015-02-02 00:37:00 -0500 2293) inode->i_state &= ~I_DIRTY_TIME;
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2294) inode->i_state |= flags;
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2295)
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2296) /*
5afced3bf2810 (Jan Kara 2020-05-29 15:05:22 +0200 2297) * If the inode is queued for writeback by flush worker, just
5afced3bf2810 (Jan Kara 2020-05-29 15:05:22 +0200 2298) * update its dirty state. Once the flush worker is done with
5afced3bf2810 (Jan Kara 2020-05-29 15:05:22 +0200 2299) * the inode it will place it on the appropriate superblock
5afced3bf2810 (Jan Kara 2020-05-29 15:05:22 +0200 2300) * list, based upon its state.
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2301) */
5afced3bf2810 (Jan Kara 2020-05-29 15:05:22 +0200 2302) if (inode->i_state & I_SYNC_QUEUED)
250df6ed274d7 (Dave Chinner 2011-03-22 22:23:36 +1100 2303) goto out_unlock_inode;
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2304)
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2305) /*
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2306) * Only add valid (hashed) inodes to the superblock's
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2307) * dirty list. Add blockdev inodes as well.
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2308) */
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2309) if (!S_ISBLK(inode->i_mode)) {
1d3382cbf0298 (Al Viro 2010-10-23 15:19:20 -0400 2310) if (inode_unhashed(inode))
250df6ed274d7 (Dave Chinner 2011-03-22 22:23:36 +1100 2311) goto out_unlock_inode;
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2312) }
a4ffdde6e56fd (Al Viro 2010-06-02 17:38:30 -0400 2313) if (inode->i_state & I_FREEING)
250df6ed274d7 (Dave Chinner 2011-03-22 22:23:36 +1100 2314) goto out_unlock_inode;
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2315)
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2316) /*
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2317) * If the inode was already on b_dirty/b_io/b_more_io, don't
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2318) * reposition it (that would break b_dirty time-ordering).
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2319) */
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2320) if (!was_dirty) {
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 2321) struct bdi_writeback *wb;
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 2322) struct list_head *dirty_list;
a66979abad090 (Dave Chinner 2011-03-22 22:23:41 +1100 2323) bool wakeup_bdi = false;
253c34e9b10c3 (Artem Bityutskiy 2010-07-25 14:29:21 +0300 2324)
87e1d789bf55b (Tejun Heo 2015-05-28 14:50:52 -0400 2325) wb = locked_inode_to_wb_and_lock_list(inode);
253c34e9b10c3 (Artem Bityutskiy 2010-07-25 14:29:21 +0300 2326)
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2327) inode->dirtied_when = jiffies;
a2f4870697a5b (Theodore Ts'o 2015-03-17 12:23:19 -0400 2328) if (dirtytime)
a2f4870697a5b (Theodore Ts'o 2015-03-17 12:23:19 -0400 2329) inode->dirtied_time_when = jiffies;
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 2330)
0e11f6443f522 (Christoph Hellwig 2018-02-21 07:54:49 -0800 2331) if (inode->i_state & I_DIRTY)
0747259d13feb (Tejun Heo 2015-05-22 17:14:02 -0400 2332) dirty_list = &wb->b_dirty;
a2f4870697a5b (Theodore Ts'o 2015-03-17 12:23:19 -0400 2333) else
0747259d13feb (Tejun Heo 2015-05-22 17:14:02 -0400 2334) dirty_list = &wb->b_dirty_time;
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 2335)
c7f5408493aeb (Dave Chinner 2015-03-04 14:07:22 -0500 2336) wakeup_bdi = inode_io_list_move_locked(inode, wb,
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 2337) dirty_list);
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 2338)
0747259d13feb (Tejun Heo 2015-05-22 17:14:02 -0400 2339) spin_unlock(&wb->list_lock);
0ae45f63d4ef8 (Theodore Ts'o 2015-02-02 00:37:00 -0500 2340) trace_writeback_dirty_inode_enqueue(inode);
a66979abad090 (Dave Chinner 2011-03-22 22:23:41 +1100 2341)
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 2342) /*
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 2343) * If this is the first dirty inode for this bdi,
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 2344) * we have to wake-up the corresponding bdi thread
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 2345) * to make sure background write-back happens
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 2346) * later.
d6c10f1fc8626 (Tejun Heo 2015-05-22 17:13:45 -0400 2347) */
f56753ac2a908 (Christoph Hellwig 2020-09-24 08:51:40 +0200 2348) if (wakeup_bdi &&
f56753ac2a908 (Christoph Hellwig 2020-09-24 08:51:40 +0200 2349) (wb->bdi->capabilities & BDI_CAP_WRITEBACK))
0747259d13feb (Tejun Heo 2015-05-22 17:14:02 -0400 2350) wb_wakeup_delayed(wb);
a66979abad090 (Dave Chinner 2011-03-22 22:23:41 +1100 2351) return;
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 2352) }
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 2353) }
250df6ed274d7 (Dave Chinner 2011-03-22 22:23:36 +1100 2354) out_unlock_inode:
250df6ed274d7 (Dave Chinner 2011-03-22 22:23:36 +1100 2355) spin_unlock(&inode->i_lock);
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2356) }
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2357) EXPORT_SYMBOL(__mark_inode_dirty);
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2358)
e97fedb9ef986 (Dave Chinner 2015-03-04 13:40:00 -0500 2359) /*
e97fedb9ef986 (Dave Chinner 2015-03-04 13:40:00 -0500 2360) * The @s_sync_lock is used to serialise concurrent sync operations
e97fedb9ef986 (Dave Chinner 2015-03-04 13:40:00 -0500 2361) * to avoid lock contention problems with concurrent wait_sb_inodes() calls.
e97fedb9ef986 (Dave Chinner 2015-03-04 13:40:00 -0500 2362) * Concurrent callers will block on the s_sync_lock rather than doing contending
e97fedb9ef986 (Dave Chinner 2015-03-04 13:40:00 -0500 2363) * walks. The queueing maintains sync(2) required behaviour as all the IO that
e97fedb9ef986 (Dave Chinner 2015-03-04 13:40:00 -0500 2364) * has been issued up to the time this function is enter is guaranteed to be
e97fedb9ef986 (Dave Chinner 2015-03-04 13:40:00 -0500 2365) * completed by the time we have gained the lock and waited for all IO that is
e97fedb9ef986 (Dave Chinner 2015-03-04 13:40:00 -0500 2366) * in progress regardless of the order callers are granted the lock.
e97fedb9ef986 (Dave Chinner 2015-03-04 13:40:00 -0500 2367) */
b6e51316daede (Jens Axboe 2009-09-16 15:13:54 +0200 2368) static void wait_sb_inodes(struct super_block *sb)
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2369) {
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 2370) LIST_HEAD(sync_list);
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2371)
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2372) /*
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2373) * We need to be protected against the filesystem going from
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2374) * r/o to r/w or vice versa.
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2375) */
b6e51316daede (Jens Axboe 2009-09-16 15:13:54 +0200 2376) WARN_ON(!rwsem_is_locked(&sb->s_umount));
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2377)
e97fedb9ef986 (Dave Chinner 2015-03-04 13:40:00 -0500 2378) mutex_lock(&sb->s_sync_lock);
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2379)
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2380) /*
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 2381) * Splice the writeback list onto a temporary list to avoid waiting on
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 2382) * inodes that have started writeback after this point.
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 2383) *
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 2384) * Use rcu_read_lock() to keep the inodes around until we have a
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 2385) * reference. s_inode_wblist_lock protects sb->s_inodes_wb as well as
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 2386) * the local list because inodes can be dropped from either by writeback
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 2387) * completion.
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 2388) */
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 2389) rcu_read_lock();
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 2390) spin_lock_irq(&sb->s_inode_wblist_lock);
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 2391) list_splice_init(&sb->s_inodes_wb, &sync_list);
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 2392)
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 2393) /*
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 2394) * Data integrity sync. Must wait for all pages under writeback, because
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 2395) * there may have been pages dirtied before our sync call, but which had
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 2396) * writeout started before we write it out. In which case, the inode
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 2397) * may not be on the dirty list, but we still have to wait for that
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 2398) * writeout.
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2399) */
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 2400) while (!list_empty(&sync_list)) {
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 2401) struct inode *inode = list_first_entry(&sync_list, struct inode,
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 2402) i_wb_list);
250df6ed274d7 (Dave Chinner 2011-03-22 22:23:36 +1100 2403) struct address_space *mapping = inode->i_mapping;
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2404)
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 2405) /*
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 2406) * Move each inode back to the wb list before we drop the lock
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 2407) * to preserve consistency between i_wb_list and the mapping
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 2408) * writeback tag. Writeback completion is responsible to remove
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 2409) * the inode from either list once the writeback tag is cleared.
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 2410) */
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 2411) list_move_tail(&inode->i_wb_list, &sb->s_inodes_wb);
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 2412)
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 2413) /*
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 2414) * The mapping can appear untagged while still on-list since we
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 2415) * do not have the mapping lock. Skip it here, wb completion
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 2416) * will remove it.
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 2417) */
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 2418) if (!mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK))
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 2419) continue;
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 2420)
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 2421) spin_unlock_irq(&sb->s_inode_wblist_lock);
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 2422)
250df6ed274d7 (Dave Chinner 2011-03-22 22:23:36 +1100 2423) spin_lock(&inode->i_lock);
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 2424) if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) {
250df6ed274d7 (Dave Chinner 2011-03-22 22:23:36 +1100 2425) spin_unlock(&inode->i_lock);
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 2426)
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 2427) spin_lock_irq(&sb->s_inode_wblist_lock);
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2428) continue;
250df6ed274d7 (Dave Chinner 2011-03-22 22:23:36 +1100 2429) }
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2430) __iget(inode);
250df6ed274d7 (Dave Chinner 2011-03-22 22:23:36 +1100 2431) spin_unlock(&inode->i_lock);
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 2432) rcu_read_unlock();
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2433)
aa750fd71c242 (Junichi Nomura 2015-11-05 18:47:23 -0800 2434) /*
aa750fd71c242 (Junichi Nomura 2015-11-05 18:47:23 -0800 2435) * We keep the error status of individual mapping so that
aa750fd71c242 (Junichi Nomura 2015-11-05 18:47:23 -0800 2436) * applications can catch the writeback error using fsync(2).
aa750fd71c242 (Junichi Nomura 2015-11-05 18:47:23 -0800 2437) * See filemap_fdatawait_keep_errors() for details.
aa750fd71c242 (Junichi Nomura 2015-11-05 18:47:23 -0800 2438) */
aa750fd71c242 (Junichi Nomura 2015-11-05 18:47:23 -0800 2439) filemap_fdatawait_keep_errors(mapping);
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2440)
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2441) cond_resched();
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2442)
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 2443) iput(inode);
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 2444)
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 2445) rcu_read_lock();
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 2446) spin_lock_irq(&sb->s_inode_wblist_lock);
03ba3782e8dcc (Jens Axboe 2009-09-09 09:08:54 +0200 2447) }
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 2448) spin_unlock_irq(&sb->s_inode_wblist_lock);
6c60d2b5746cf (Dave Chinner 2016-07-26 15:21:50 -0700 2449) rcu_read_unlock();
e97fedb9ef986 (Dave Chinner 2015-03-04 13:40:00 -0500 2450) mutex_unlock(&sb->s_sync_lock);
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 2451) }
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 2452)
f30a7d0cc8d90 (Tejun Heo 2015-05-22 17:14:00 -0400 2453) static void __writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr,
f30a7d0cc8d90 (Tejun Heo 2015-05-22 17:14:00 -0400 2454) enum wb_reason reason, bool skip_if_busy)
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 2455) {
5b9cce4c7eb06 (Tejun Heo 2019-08-26 09:06:52 -0700 2456) struct backing_dev_info *bdi = sb->s_bdi;
5b9cce4c7eb06 (Tejun Heo 2019-08-26 09:06:52 -0700 2457) DEFINE_WB_COMPLETION(done, bdi);
83ba7b071f30f (Christoph Hellwig 2010-07-06 08:59:53 +0200 2458) struct wb_writeback_work work = {
6e6938b6d3130 (Wu Fengguang 2010-06-06 10:38:15 -0600 2459) .sb = sb,
6e6938b6d3130 (Wu Fengguang 2010-06-06 10:38:15 -0600 2460) .sync_mode = WB_SYNC_NONE,
6e6938b6d3130 (Wu Fengguang 2010-06-06 10:38:15 -0600 2461) .tagged_writepages = 1,
6e6938b6d3130 (Wu Fengguang 2010-06-06 10:38:15 -0600 2462) .done = &done,
6e6938b6d3130 (Wu Fengguang 2010-06-06 10:38:15 -0600 2463) .nr_pages = nr,
0e175a1835ffc (Curt Wohlgemuth 2011-10-07 21:54:10 -0600 2464) .reason = reason,
3c4d716538f3e (Christoph Hellwig 2010-06-08 18:14:43 +0200 2465) };
d8a8559cd7a9c (Jens Axboe 2009-09-02 12:34:32 +0200 2466)
e79729123f639 (Tejun Heo 2015-05-22 17:13:48 -0400 2467) if (!bdi_has_dirty_io(bdi) || bdi == &noop_backing_dev_info)
6eedc70150d55 (Jan Kara 2012-07-03 16:45:27 +0200 2468) return;
cf37e972478ec (Christoph Hellwig 2010-06-08 18:14:51 +0200 2469) WARN_ON(!rwsem_is_locked(&sb->s_umount));
f30a7d0cc8d90 (Tejun Heo 2015-05-22 17:14:00 -0400 2470)
db125360409fc (Tejun Heo 2015-05-22 17:14:01 -0400 2471) bdi_split_work_to_wbs(sb->s_bdi, &work, skip_if_busy);
5b9cce4c7eb06 (Tejun Heo 2019-08-26 09:06:52 -0700 2472) wb_wait_for_completion(&done);
e913fc825dc68 (Jens Axboe 2010-05-17 12:55:07 +0200 2473) }
f30a7d0cc8d90 (Tejun Heo 2015-05-22 17:14:00 -0400 2474)
f30a7d0cc8d90 (Tejun Heo 2015-05-22 17:14:00 -0400 2475) /**
f30a7d0cc8d90 (Tejun Heo 2015-05-22 17:14:00 -0400 2476) * writeback_inodes_sb_nr - writeback dirty inodes from given super_block
f30a7d0cc8d90 (Tejun Heo 2015-05-22 17:14:00 -0400 2477) * @sb: the superblock
f30a7d0cc8d90 (Tejun Heo 2015-05-22 17:14:00 -0400 2478) * @nr: the number of pages to write
f30a7d0cc8d90 (Tejun Heo 2015-05-22 17:14:00 -0400 2479) * @reason: reason why some writeback work initiated
f30a7d0cc8d90 (Tejun Heo 2015-05-22 17:14:00 -0400 2480) *
f30a7d0cc8d90 (Tejun Heo 2015-05-22 17:14:00 -0400 2481) * Start writeback on some inodes on this super_block. No guarantees are made
f30a7d0cc8d90 (Tejun Heo 2015-05-22 17:14:00 -0400 2482) * on how many (if any) will be written, and this function does not wait
f30a7d0cc8d90 (Tejun Heo 2015-05-22 17:14:00 -0400 2483) * for IO completion of submitted IO.
f30a7d0cc8d90 (Tejun Heo 2015-05-22 17:14:00 -0400 2484) */
f30a7d0cc8d90 (Tejun Heo 2015-05-22 17:14:00 -0400 2485) void writeback_inodes_sb_nr(struct super_block *sb,
f30a7d0cc8d90 (Tejun Heo 2015-05-22 17:14:00 -0400 2486) unsigned long nr,
f30a7d0cc8d90 (Tejun Heo 2015-05-22 17:14:00 -0400 2487) enum wb_reason reason)
f30a7d0cc8d90 (Tejun Heo 2015-05-22 17:14:00 -0400 2488) {
f30a7d0cc8d90 (Tejun Heo 2015-05-22 17:14:00 -0400 2489) __writeback_inodes_sb_nr(sb, nr, reason, false);
f30a7d0cc8d90 (Tejun Heo 2015-05-22 17:14:00 -0400 2490) }
3259f8bed2f0f (Chris Mason 2010-10-29 11:16:17 -0400 2491) EXPORT_SYMBOL(writeback_inodes_sb_nr);
3259f8bed2f0f (Chris Mason 2010-10-29 11:16:17 -0400 2492)
3259f8bed2f0f (Chris Mason 2010-10-29 11:16:17 -0400 2493) /**
3259f8bed2f0f (Chris Mason 2010-10-29 11:16:17 -0400 2494) * writeback_inodes_sb - writeback dirty inodes from given super_block
3259f8bed2f0f (Chris Mason 2010-10-29 11:16:17 -0400 2495) * @sb: the superblock
786228ab3095f (Marcos Paulo de Souza 2011-11-23 20:56:45 +0800 2496) * @reason: reason why some writeback work was initiated
3259f8bed2f0f (Chris Mason 2010-10-29 11:16:17 -0400 2497) *
3259f8bed2f0f (Chris Mason 2010-10-29 11:16:17 -0400 2498) * Start writeback on some inodes on this super_block. No guarantees are made
3259f8bed2f0f (Chris Mason 2010-10-29 11:16:17 -0400 2499) * on how many (if any) will be written, and this function does not wait
3259f8bed2f0f (Chris Mason 2010-10-29 11:16:17 -0400 2500) * for IO completion of submitted IO.
3259f8bed2f0f (Chris Mason 2010-10-29 11:16:17 -0400 2501) */
0e175a1835ffc (Curt Wohlgemuth 2011-10-07 21:54:10 -0600 2502) void writeback_inodes_sb(struct super_block *sb, enum wb_reason reason)
3259f8bed2f0f (Chris Mason 2010-10-29 11:16:17 -0400 2503) {
0e175a1835ffc (Curt Wohlgemuth 2011-10-07 21:54:10 -0600 2504) return writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), reason);
3259f8bed2f0f (Chris Mason 2010-10-29 11:16:17 -0400 2505) }
0e3c9a2284f54 (Jens Axboe 2010-06-01 11:08:43 +0200 2506) EXPORT_SYMBOL(writeback_inodes_sb);
e913fc825dc68 (Jens Axboe 2010-05-17 12:55:07 +0200 2507)
17bd55d037a02 (Eric Sandeen 2009-12-23 07:57:07 -0500 2508) /**
8264c3214f28b (Rakesh Pandit 2017-10-09 13:34:41 +0300 2509) * try_to_writeback_inodes_sb - try to start writeback if none underway
17bd55d037a02 (Eric Sandeen 2009-12-23 07:57:07 -0500 2510) * @sb: the superblock
8264c3214f28b (Rakesh Pandit 2017-10-09 13:34:41 +0300 2511) * @reason: reason why some writeback work was initiated
17bd55d037a02 (Eric Sandeen 2009-12-23 07:57:07 -0500 2512) *
8264c3214f28b (Rakesh Pandit 2017-10-09 13:34:41 +0300 2513) * Invoke __writeback_inodes_sb_nr if no writeback is currently underway.
17bd55d037a02 (Eric Sandeen 2009-12-23 07:57:07 -0500 2514) */
8264c3214f28b (Rakesh Pandit 2017-10-09 13:34:41 +0300 2515) void try_to_writeback_inodes_sb(struct super_block *sb, enum wb_reason reason)
17bd55d037a02 (Eric Sandeen 2009-12-23 07:57:07 -0500 2516) {
10ee27a06cc8e (Miao Xie 2013-01-10 13:47:57 +0800 2517) if (!down_read_trylock(&sb->s_umount))
8264c3214f28b (Rakesh Pandit 2017-10-09 13:34:41 +0300 2518) return;
10ee27a06cc8e (Miao Xie 2013-01-10 13:47:57 +0800 2519)
8264c3214f28b (Rakesh Pandit 2017-10-09 13:34:41 +0300 2520) __writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), reason, true);
10ee27a06cc8e (Miao Xie 2013-01-10 13:47:57 +0800 2521) up_read(&sb->s_umount);
3259f8bed2f0f (Chris Mason 2010-10-29 11:16:17 -0400 2522) }
10ee27a06cc8e (Miao Xie 2013-01-10 13:47:57 +0800 2523) EXPORT_SYMBOL(try_to_writeback_inodes_sb);
3259f8bed2f0f (Chris Mason 2010-10-29 11:16:17 -0400 2524)
d8a8559cd7a9c (Jens Axboe 2009-09-02 12:34:32 +0200 2525) /**
d8a8559cd7a9c (Jens Axboe 2009-09-02 12:34:32 +0200 2526) * sync_inodes_sb - sync sb inode pages
0dc83bd30b0bf (Jan Kara 2014-02-21 11:19:04 +0100 2527) * @sb: the superblock
d8a8559cd7a9c (Jens Axboe 2009-09-02 12:34:32 +0200 2528) *
d8a8559cd7a9c (Jens Axboe 2009-09-02 12:34:32 +0200 2529) * This function writes and waits on any dirty inode belonging to this
0dc83bd30b0bf (Jan Kara 2014-02-21 11:19:04 +0100 2530) * super_block.
d8a8559cd7a9c (Jens Axboe 2009-09-02 12:34:32 +0200 2531) */
0dc83bd30b0bf (Jan Kara 2014-02-21 11:19:04 +0100 2532) void sync_inodes_sb(struct super_block *sb)
d8a8559cd7a9c (Jens Axboe 2009-09-02 12:34:32 +0200 2533) {
5b9cce4c7eb06 (Tejun Heo 2019-08-26 09:06:52 -0700 2534) struct backing_dev_info *bdi = sb->s_bdi;
5b9cce4c7eb06 (Tejun Heo 2019-08-26 09:06:52 -0700 2535) DEFINE_WB_COMPLETION(done, bdi);
83ba7b071f30f (Christoph Hellwig 2010-07-06 08:59:53 +0200 2536) struct wb_writeback_work work = {
3c4d716538f3e (Christoph Hellwig 2010-06-08 18:14:43 +0200 2537) .sb = sb,
3c4d716538f3e (Christoph Hellwig 2010-06-08 18:14:43 +0200 2538) .sync_mode = WB_SYNC_ALL,
3c4d716538f3e (Christoph Hellwig 2010-06-08 18:14:43 +0200 2539) .nr_pages = LONG_MAX,
3c4d716538f3e (Christoph Hellwig 2010-06-08 18:14:43 +0200 2540) .range_cyclic = 0,
83ba7b071f30f (Christoph Hellwig 2010-07-06 08:59:53 +0200 2541) .done = &done,
0e175a1835ffc (Curt Wohlgemuth 2011-10-07 21:54:10 -0600 2542) .reason = WB_REASON_SYNC,
7747bd4bceb30 (Dave Chinner 2013-07-02 22:38:35 +1000 2543) .for_sync = 1,
3c4d716538f3e (Christoph Hellwig 2010-06-08 18:14:43 +0200 2544) };
3c4d716538f3e (Christoph Hellwig 2010-06-08 18:14:43 +0200 2545)
006a0973ed020 (Tejun Heo 2015-08-25 14:11:52 -0400 2546) /*
006a0973ed020 (Tejun Heo 2015-08-25 14:11:52 -0400 2547) * Can't skip on !bdi_has_dirty() because we should wait for !dirty
006a0973ed020 (Tejun Heo 2015-08-25 14:11:52 -0400 2548) * inodes under writeback and I_DIRTY_TIME inodes ignored by
006a0973ed020 (Tejun Heo 2015-08-25 14:11:52 -0400 2549) * bdi_has_dirty() need to be written out too.
006a0973ed020 (Tejun Heo 2015-08-25 14:11:52 -0400 2550) */
006a0973ed020 (Tejun Heo 2015-08-25 14:11:52 -0400 2551) if (bdi == &noop_backing_dev_info)
6eedc70150d55 (Jan Kara 2012-07-03 16:45:27 +0200 2552) return;
cf37e972478ec (Christoph Hellwig 2010-06-08 18:14:51 +0200 2553) WARN_ON(!rwsem_is_locked(&sb->s_umount));
cf37e972478ec (Christoph Hellwig 2010-06-08 18:14:51 +0200 2554)
7fc5854f8c6ef (Tejun Heo 2017-12-12 08:38:30 -0800 2555) /* protect against inode wb switch, see inode_switch_wbs_work_fn() */
7fc5854f8c6ef (Tejun Heo 2017-12-12 08:38:30 -0800 2556) bdi_down_write_wb_switch_rwsem(bdi);
db125360409fc (Tejun Heo 2015-05-22 17:14:01 -0400 2557) bdi_split_work_to_wbs(bdi, &work, false);
5b9cce4c7eb06 (Tejun Heo 2019-08-26 09:06:52 -0700 2558) wb_wait_for_completion(&done);
7fc5854f8c6ef (Tejun Heo 2017-12-12 08:38:30 -0800 2559) bdi_up_write_wb_switch_rwsem(bdi);
83ba7b071f30f (Christoph Hellwig 2010-07-06 08:59:53 +0200 2560)
b6e51316daede (Jens Axboe 2009-09-16 15:13:54 +0200 2561) wait_sb_inodes(sb);
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 2562) }
d8a8559cd7a9c (Jens Axboe 2009-09-02 12:34:32 +0200 2563) EXPORT_SYMBOL(sync_inodes_sb);
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 2564)
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 2565) /**
7f04c26d715a2 (Andrea Arcangeli 2005-10-30 15:03:05 -0800 2566) * write_inode_now - write an inode to disk
7f04c26d715a2 (Andrea Arcangeli 2005-10-30 15:03:05 -0800 2567) * @inode: inode to write to disk
7f04c26d715a2 (Andrea Arcangeli 2005-10-30 15:03:05 -0800 2568) * @sync: whether the write should be synchronous or not
7f04c26d715a2 (Andrea Arcangeli 2005-10-30 15:03:05 -0800 2569) *
7f04c26d715a2 (Andrea Arcangeli 2005-10-30 15:03:05 -0800 2570) * This function commits an inode to disk immediately if it is dirty. This is
7f04c26d715a2 (Andrea Arcangeli 2005-10-30 15:03:05 -0800 2571) * primarily needed by knfsd.
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 2572) *
7f04c26d715a2 (Andrea Arcangeli 2005-10-30 15:03:05 -0800 2573) * The caller must either have a ref on the inode or must have set I_WILL_FREE.
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 2574) */
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 2575) int write_inode_now(struct inode *inode, int sync)
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 2576) {
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 2577) struct writeback_control wbc = {
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 2578) .nr_to_write = LONG_MAX,
18914b1884ebd (Mike Galbraith 2008-02-08 04:20:23 -0800 2579) .sync_mode = sync ? WB_SYNC_ALL : WB_SYNC_NONE,
111ebb6e6f7bd (OGAWA Hirofumi 2006-06-23 02:03:26 -0700 2580) .range_start = 0,
111ebb6e6f7bd (OGAWA Hirofumi 2006-06-23 02:03:26 -0700 2581) .range_end = LLONG_MAX,
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 2582) };
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 2583)
f56753ac2a908 (Christoph Hellwig 2020-09-24 08:51:40 +0200 2584) if (!mapping_can_writeback(inode->i_mapping))
49364ce253441 (Andrew Morton 2005-11-07 00:59:15 -0800 2585) wbc.nr_to_write = 0;
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 2586)
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 2587) might_sleep();
aaf2559332ba2 (Tejun Heo 2016-03-18 13:52:04 -0400 2588) return writeback_single_inode(inode, &wbc);
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 2589) }
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 2590) EXPORT_SYMBOL(write_inode_now);
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 2591)
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 2592) /**
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 2593) * sync_inode - write an inode and its pages to disk.
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 2594) * @inode: the inode to sync
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 2595) * @wbc: controls the writeback mode
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 2596) *
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 2597) * sync_inode() will write an inode and its pages to disk. It will also
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 2598) * correctly update the inode on its superblock's dirty inode lists and will
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 2599) * update inode->i_state.
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 2600) *
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 2601) * The caller must have a ref on the inode.
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 2602) */
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 2603) int sync_inode(struct inode *inode, struct writeback_control *wbc)
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 2604) {
aaf2559332ba2 (Tejun Heo 2016-03-18 13:52:04 -0400 2605) return writeback_single_inode(inode, wbc);
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 2606) }
^1da177e4c3f4 (Linus Torvalds 2005-04-16 15:20:36 -0700 2607) EXPORT_SYMBOL(sync_inode);
c37650161a53c (Christoph Hellwig 2010-10-06 10:48:20 +0200 2608)
c37650161a53c (Christoph Hellwig 2010-10-06 10:48:20 +0200 2609) /**
c691b9d983d70 (Andrew Morton 2011-01-13 15:45:48 -0800 2610) * sync_inode_metadata - write an inode to disk
c37650161a53c (Christoph Hellwig 2010-10-06 10:48:20 +0200 2611) * @inode: the inode to sync
c37650161a53c (Christoph Hellwig 2010-10-06 10:48:20 +0200 2612) * @wait: wait for I/O to complete.
c37650161a53c (Christoph Hellwig 2010-10-06 10:48:20 +0200 2613) *
c691b9d983d70 (Andrew Morton 2011-01-13 15:45:48 -0800 2614) * Write an inode to disk and adjust its dirty state after completion.
c37650161a53c (Christoph Hellwig 2010-10-06 10:48:20 +0200 2615) *
c37650161a53c (Christoph Hellwig 2010-10-06 10:48:20 +0200 2616) * Note: only writes the actual inode, no associated data or other metadata.
c37650161a53c (Christoph Hellwig 2010-10-06 10:48:20 +0200 2617) */
c37650161a53c (Christoph Hellwig 2010-10-06 10:48:20 +0200 2618) int sync_inode_metadata(struct inode *inode, int wait)
c37650161a53c (Christoph Hellwig 2010-10-06 10:48:20 +0200 2619) {
c37650161a53c (Christoph Hellwig 2010-10-06 10:48:20 +0200 2620) struct writeback_control wbc = {
c37650161a53c (Christoph Hellwig 2010-10-06 10:48:20 +0200 2621) .sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_NONE,
c37650161a53c (Christoph Hellwig 2010-10-06 10:48:20 +0200 2622) .nr_to_write = 0, /* metadata-only */
c37650161a53c (Christoph Hellwig 2010-10-06 10:48:20 +0200 2623) };
c37650161a53c (Christoph Hellwig 2010-10-06 10:48:20 +0200 2624)
c37650161a53c (Christoph Hellwig 2010-10-06 10:48:20 +0200 2625) return sync_inode(inode, &wbc);
c37650161a53c (Christoph Hellwig 2010-10-06 10:48:20 +0200 2626) }
c37650161a53c (Christoph Hellwig 2010-10-06 10:48:20 +0200 2627) EXPORT_SYMBOL(sync_inode_metadata);