summaryrefslogtreecommitdiffstats
path: root/patches/boot_time_opt/0110-fs-ext4-fsync-optimize-double-fsync-a-bunch.patch
blob: b115a5b12ef0b39bc3dbbe1c70f04d9d3aae0d03 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
From 5e6c814b3ab74e9abbdd836a224c5ceb7246b44e Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Sat, 9 Apr 2016 22:41:37 +0000
Subject: [PATCH 110/126] fs: ext4: fsync: optimize double-fsync() a bunch

There are cases where EXT4 is a bit too conservative sending barriers down to
the disk; there are cases where the transaction in progress is not the one
that sent the barrier (in other words: the fsync is for a file for which the
IO happened more time ago and all data was already sent to the disk).

For that case, a more performing tradeoff can be made on SSD devices (which
have the ability to flush their dram caches in a hurry on a power fail event)
where the barrier gets sent to the disk, but we don't need to wait for the
barrier to complete. Any consecutive IO will block on the barrier correctly.
---
 block/bio.c            | 19 +++++++++++++++++++
 block/blk-flush.c      | 40 ++++++++++++++++++++++++++++++++++++++++
 fs/ext4/fsync.c        |  6 +++++-
 include/linux/bio.h    |  1 +
 include/linux/blkdev.h |  5 +++++
 5 files changed, 70 insertions(+), 1 deletion(-)

diff --git a/block/bio.c b/block/bio.c
index 101c2a9b5481..8f9f6e9febe0 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -956,6 +956,25 @@ int submit_bio_wait(struct bio *bio)
 }
 EXPORT_SYMBOL(submit_bio_wait);
 
+static void submit_bio_nowait_endio(struct bio *bio)
+{
+	bio_put(bio);
+}
+
+/**
+ * submit_bio_nowait - submit a bio for fire-and-forge for fire-and-forget
+ * @bio: The &struct bio which describes the I/O
+ *
+ * Simple wrapper around submit_bio() that takes care of bio_put() on completion
+ */
+void submit_bio_nowait(struct bio *bio)
+{
+	bio->bi_end_io = submit_bio_nowait_endio;
+	bio->bi_opf |= REQ_SYNC;
+	submit_bio(bio);
+}
+EXPORT_SYMBOL(submit_bio_nowait);
+
 /**
  * bio_advance - increment/complete a bio by some number of bytes
  * @bio:	bio to advance
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 4938bec8cfef..020f2c75343a 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -543,6 +543,46 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
 }
 EXPORT_SYMBOL(blkdev_issue_flush);
 
+/**
+ * blkdev_issue_flush_nowait - queue a flush
+ * @bdev:	blockdev to issue flush for
+ * @gfp_mask:	memory allocation flags (for bio_alloc)
+ *
+ * Description:
+ *    Issue a flush for the block device in question. Caller can supply
+ *    room for storing the error offset in case of a flush error, if they
+ *    wish to. If WAIT flag is not passed then caller may check only what
+ *    request was pushed in some internal queue for later handling.
+ */
+void blkdev_issue_flush_nowait(struct block_device *bdev, gfp_t gfp_mask)
+{
+	struct request_queue *q;
+	struct bio *bio;
+
+	if (bdev->bd_disk == NULL)
+		return;
+
+	q = bdev_get_queue(bdev);
+	if (!q)
+		return;
+
+	/*
+	 * some block devices may not have their queue correctly set up here
+	 * (e.g. loop device without a backing file) and so issuing a flush
+	 * here will panic. Ensure there is a request function before issuing
+	 * the flush.
+	 */
+	if (!q->make_request_fn)
+		return;
+
+	bio = bio_alloc(gfp_mask, 0);
+	bio_set_dev(bio, bdev);
+	bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
+
+	submit_bio_nowait(bio);
+}
+EXPORT_SYMBOL(blkdev_issue_flush_nowait);
+
 struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q,
 		int node, int cmd_size)
 {
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 26a7fe5c4fd3..2708b292ddca 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -154,7 +154,11 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 	ret = jbd2_complete_transaction(journal, commit_tid);
 	if (needs_barrier) {
 	issue_flush:
-		err = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
+		err = 0;
+		if (!blk_queue_nonrot(bdev_get_queue(inode->i_sb->s_bdev)))
+			err = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
+		else
+			blkdev_issue_flush_nowait(inode->i_sb->s_bdev, GFP_KERNEL);
 		if (!ret)
 			ret = err;
 	}
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 275c91c99516..4ddfa9e10301 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -448,6 +448,7 @@ struct request_queue;
 extern int bio_phys_segments(struct request_queue *, struct bio *);
 
 extern int submit_bio_wait(struct bio *bio);
+extern void submit_bio_nowait(struct bio *bio);
 extern void bio_advance(struct bio *, unsigned);
 
 extern void bio_init(struct bio *bio, struct bio_vec *table,
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 8da66379f7ea..c48565cc84fb 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1336,6 +1336,7 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt,
 }
 
 extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *);
+extern void blkdev_issue_flush_nowait(struct block_device *, gfp_t);
 extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
 		sector_t nr_sects, gfp_t gfp_mask, struct page *page);
 
@@ -2001,6 +2002,10 @@ static inline int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
 	return 0;
 }
 
+static inline void blkdev_issue_flush_nowait(struct block_device *bdev, gfp_t gfp_mask)
+{
+}
+
 #endif /* CONFIG_BLOCK */
 
 #endif
-- 
2.15.0