1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
|
From 3e3da899949f18869c6958a30a861d89f2d4b73c Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Wed, 11 Jul 2012 09:09:36 -0400
Subject: [PATCH 24/73] cifs: when CONFIG_HIGHMEM is set, serialize the
read/write kmaps
commit 3cf003c08be785af4bee9ac05891a15bcbff856a upstream.
Jian found that when he ran fsx on a 32 bit arch with a large wsize the
process and one of the bdi writeback kthreads would sometimes deadlock
with a stack trace like this:
crash> bt
PID: 2789 TASK: f02edaa0 CPU: 3 COMMAND: "fsx"
#0 [eed63cbc] schedule at c083c5b3
#1 [eed63d80] kmap_high at c0500ec8
#2 [eed63db0] cifs_async_writev at f7fabcd7 [cifs]
#3 [eed63df0] cifs_writepages at f7fb7f5c [cifs]
#4 [eed63e50] do_writepages at c04f3e32
#5 [eed63e54] __filemap_fdatawrite_range at c04e152a
#6 [eed63ea4] filemap_fdatawrite at c04e1b3e
#7 [eed63eb4] cifs_file_aio_write at f7fa111a [cifs]
#8 [eed63ecc] do_sync_write at c052d202
#9 [eed63f74] vfs_write at c052d4ee
#10 [eed63f94] sys_write at c052df4c
#11 [eed63fb0] ia32_sysenter_target at c0409a98
EAX: 00000004 EBX: 00000003 ECX: abd73b73 EDX: 012a65c6
DS: 007b ESI: 012a65c6 ES: 007b EDI: 00000000
SS: 007b ESP: bf8db178 EBP: bf8db1f8 GS: 0033
CS: 0073 EIP: 40000424 ERR: 00000004 EFLAGS: 00000246
Each task would kmap part of its address array before getting stuck, but
not enough to actually issue the write.
This patch fixes this by serializing the marshal_iov operations for
async reads and writes. The idea here is to ensure that cifs
aggressively tries to populate a request before attempting to fulfill
another one. As soon as all of the pages are kmapped for a request, then
we can unlock and allow another one to proceed.
There's no need to do this serialization on non-CONFIG_HIGHMEM arches
however, so optimize all of this out when CONFIG_HIGHMEM isn't set.
Reported-by: Jian Li <jiali@redhat.com>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <smfrench@gmail.com>
[bwh: Backported to 3.2: adjust context]
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
---
fs/cifs/cifssmb.c | 30 ++++++++++++++++++++++++++++++
1 files changed, 30 insertions(+), 0 deletions(-)
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 6aa7457..c858a29 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -89,6 +89,32 @@ static struct {
/* Forward declarations */
static void cifs_readv_complete(struct work_struct *work);
+#ifdef CONFIG_HIGHMEM
+/*
+ * On arches that have high memory, kmap address space is limited. By
+ * serializing the kmap operations on those arches, we ensure that we don't
+ * end up with a bunch of threads in writeback with partially mapped page
+ * arrays, stuck waiting for kmap to come back. That situation prevents
+ * progress and can deadlock.
+ */
+static DEFINE_MUTEX(cifs_kmap_mutex);
+
+static inline void
+cifs_kmap_lock(void)
+{
+ mutex_lock(&cifs_kmap_mutex);
+}
+
+static inline void
+cifs_kmap_unlock(void)
+{
+ mutex_unlock(&cifs_kmap_mutex);
+}
+#else /* !CONFIG_HIGHMEM */
+#define cifs_kmap_lock() do { ; } while(0)
+#define cifs_kmap_unlock() do { ; } while(0)
+#endif /* CONFIG_HIGHMEM */
+
/* Mark as invalid, all open files on tree connections since they
were closed when session to server was lost */
static void mark_open_files_invalid(struct cifs_tcon *pTcon)
@@ -1540,6 +1566,7 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
cFYI(1, "eof=%llu eof_index=%lu", eof, eof_index);
+ cifs_kmap_lock();
list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
if (remaining >= PAGE_CACHE_SIZE) {
/* enough data to fill the page */
@@ -1589,6 +1616,7 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
page_cache_release(page);
}
}
+ cifs_kmap_unlock();
/* issue the read if we have any iovecs left to fill */
if (rdata->nr_iov > 1) {
@@ -2171,6 +2199,7 @@ cifs_async_writev(struct cifs_writedata *wdata)
iov[0].iov_base = smb;
/* marshal up the pages into iov array */
+ cifs_kmap_lock();
wdata->bytes = 0;
for (i = 0; i < wdata->nr_pages; i++) {
iov[i + 1].iov_len = min(inode->i_size -
@@ -2179,6 +2208,7 @@ cifs_async_writev(struct cifs_writedata *wdata)
iov[i + 1].iov_base = kmap(wdata->pages[i]);
wdata->bytes += iov[i + 1].iov_len;
}
+ cifs_kmap_unlock();
cFYI(1, "async write at %llu %u bytes", wdata->offset, wdata->bytes);
--
1.7.7.6
|