summaryrefslogtreecommitdiffstats
path: root/recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.24/0068-ore-Fix-NFS-crash-by-supporting-any-unaligned-RAID-I.patch
diff options
context:
space:
mode:
Diffstat (limited to 'recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.24/0068-ore-Fix-NFS-crash-by-supporting-any-unaligned-RAID-I.patch')
-rw-r--r--recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.24/0068-ore-Fix-NFS-crash-by-supporting-any-unaligned-RAID-I.patch211
1 files changed, 211 insertions, 0 deletions
diff --git a/recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.24/0068-ore-Fix-NFS-crash-by-supporting-any-unaligned-RAID-I.patch b/recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.24/0068-ore-Fix-NFS-crash-by-supporting-any-unaligned-RAID-I.patch
new file mode 100644
index 00000000..0dabe3b9
--- /dev/null
+++ b/recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.24/0068-ore-Fix-NFS-crash-by-supporting-any-unaligned-RAID-I.patch
@@ -0,0 +1,211 @@
1From b4c39a3690fd0d723f50eba441fe567e8fee68f1 Mon Sep 17 00:00:00 2001
2From: Boaz Harrosh <bharrosh@panasas.com>
3Date: Fri, 8 Jun 2012 01:19:07 +0300
4Subject: [PATCH 068/109] ore: Fix NFS crash by supporting any unaligned RAID
5 IO
6
7commit 9ff19309a9623f2963ac5a136782ea4d8b5d67fb upstream.
8
9In RAID_5/6 We used to not permit an IO that it's end
10byte is not stripe_size aligned and spans more than one stripe.
11.i.e the caller must check if after submission the actual
12transferred bytes is shorter, and would need to resubmit
13a new IO with the remainder.
14
15Exofs supports this, and NFS was supposed to support this
16as well with it's short write mechanism. But late testing has
17exposed a CRASH when this is used with none-RPC layout-drivers.
18
19The change at NFS is deep and risky, in it's place the fix
20at ORE to lift the limitation is actually clean and simple.
21So here it is below.
22
23The principal here is that in the case of unaligned IO on
24both ends, beginning and end, we will send two read requests
25one like old code, before the calculation of the first stripe,
26and also a new site, before the calculation of the last stripe.
27If any "boundary" is aligned or the complete IO is within a single
28stripe. we do a single read like before.
29
30The code is clean and simple by splitting the old _read_4_write
31into 3 even parts:
321._read_4_write_first_stripe
332. _read_4_write_last_stripe
343. _read_4_write_execute
35
36And calling 1+3 at the same place as before. 2+3 before last
37stripe, and in the case of all in a single stripe then 1+2+3
38is preformed additively.
39
40Why did I not think of it before. Well I had a strike of
41genius because I have stared at this code for 2 years, and did
42not find this simple solution, til today. Not that I did not try.
43
44This solution is much better for NFS than the previous supposedly
45solution because the short write was dealt with out-of-band after
46IO_done, which would cause for a seeky IO pattern where as in here
47we execute in order. At both solutions we do 2 separate reads, only
48here we do it within a single IO request. (And actually combine two
49writes into a single submission)
50
51NFS/exofs code need not change since the ORE API communicates the new
52shorter length on return, what will happen is that this case would not
53occur anymore.
54
55hurray!!
56
57[Stable this is an NFS bug since 3.2 Kernel should apply cleanly]
58Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
59Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
60---
61 fs/exofs/ore_raid.c | 67 +++++++++++++++++++++++++++-----------------------
62 1 files changed, 36 insertions(+), 31 deletions(-)
63
64diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c
65index d222c77..fff2070 100644
66--- a/fs/exofs/ore_raid.c
67+++ b/fs/exofs/ore_raid.c
68@@ -461,16 +461,12 @@ static void _mark_read4write_pages_uptodate(struct ore_io_state *ios, int ret)
69 * ios->sp2d[p][*], xor is calculated the same way. These pages are
70 * allocated/freed and don't go through cache
71 */
72-static int _read_4_write(struct ore_io_state *ios)
73+static int _read_4_write_first_stripe(struct ore_io_state *ios)
74 {
75- struct ore_io_state *ios_read;
76 struct ore_striping_info read_si;
77 struct __stripe_pages_2d *sp2d = ios->sp2d;
78 u64 offset = ios->si.first_stripe_start;
79- u64 last_stripe_end;
80- unsigned bytes_in_stripe = ios->si.bytes_in_stripe;
81- unsigned i, c, p, min_p = sp2d->pages_in_unit, max_p = -1;
82- int ret;
83+ unsigned c, p, min_p = sp2d->pages_in_unit, max_p = -1;
84
85 if (offset == ios->offset) /* Go to start collect $200 */
86 goto read_last_stripe;
87@@ -478,6 +474,9 @@ static int _read_4_write(struct ore_io_state *ios)
88 min_p = _sp2d_min_pg(sp2d);
89 max_p = _sp2d_max_pg(sp2d);
90
91+ ORE_DBGMSG("stripe_start=0x%llx ios->offset=0x%llx min_p=%d max_p=%d\n",
92+ offset, ios->offset, min_p, max_p);
93+
94 for (c = 0; ; c++) {
95 ore_calc_stripe_info(ios->layout, offset, 0, &read_si);
96 read_si.obj_offset += min_p * PAGE_SIZE;
97@@ -512,6 +511,18 @@ static int _read_4_write(struct ore_io_state *ios)
98 }
99
100 read_last_stripe:
101+ return 0;
102+}
103+
104+static int _read_4_write_last_stripe(struct ore_io_state *ios)
105+{
106+ struct ore_striping_info read_si;
107+ struct __stripe_pages_2d *sp2d = ios->sp2d;
108+ u64 offset;
109+ u64 last_stripe_end;
110+ unsigned bytes_in_stripe = ios->si.bytes_in_stripe;
111+ unsigned c, p, min_p = sp2d->pages_in_unit, max_p = -1;
112+
113 offset = ios->offset + ios->length;
114 if (offset % PAGE_SIZE)
115 _add_to_r4w_last_page(ios, &offset);
116@@ -527,15 +538,15 @@ read_last_stripe:
117 c = _dev_order(ios->layout->group_width * ios->layout->mirrors_p1,
118 ios->layout->mirrors_p1, read_si.par_dev, read_si.dev);
119
120- BUG_ON(ios->si.first_stripe_start + bytes_in_stripe != last_stripe_end);
121- /* unaligned IO must be within a single stripe */
122-
123 if (min_p == sp2d->pages_in_unit) {
124 /* Didn't do it yet */
125 min_p = _sp2d_min_pg(sp2d);
126 max_p = _sp2d_max_pg(sp2d);
127 }
128
129+ ORE_DBGMSG("offset=0x%llx stripe_end=0x%llx min_p=%d max_p=%d\n",
130+ offset, last_stripe_end, min_p, max_p);
131+
132 while (offset < last_stripe_end) {
133 struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p];
134
135@@ -568,6 +579,15 @@ read_last_stripe:
136 }
137
138 read_it:
139+ return 0;
140+}
141+
142+static int _read_4_write_execute(struct ore_io_state *ios)
143+{
144+ struct ore_io_state *ios_read;
145+ unsigned i;
146+ int ret;
147+
148 ios_read = ios->ios_read_4_write;
149 if (!ios_read)
150 return 0;
151@@ -591,6 +611,8 @@ read_it:
152 }
153
154 _mark_read4write_pages_uptodate(ios_read, ret);
155+ ore_put_io_state(ios_read);
156+ ios->ios_read_4_write = NULL; /* Might need a reuse at last stripe */
157 return 0;
158 }
159
160@@ -626,8 +648,11 @@ int _ore_add_parity_unit(struct ore_io_state *ios,
161 /* If first stripe, Read in all read4write pages
162 * (if needed) before we calculate the first parity.
163 */
164- _read_4_write(ios);
165+ _read_4_write_first_stripe(ios);
166 }
167+ if (!cur_len) /* If last stripe r4w pages of last stripe */
168+ _read_4_write_last_stripe(ios);
169+ _read_4_write_execute(ios);
170
171 for (i = 0; i < num_pages; i++) {
172 pages[i] = _raid_page_alloc();
173@@ -654,34 +679,14 @@ int _ore_add_parity_unit(struct ore_io_state *ios,
174
175 int _ore_post_alloc_raid_stuff(struct ore_io_state *ios)
176 {
177- struct ore_layout *layout = ios->layout;
178-
179 if (ios->parity_pages) {
180+ struct ore_layout *layout = ios->layout;
181 unsigned pages_in_unit = layout->stripe_unit / PAGE_SIZE;
182- unsigned stripe_size = ios->si.bytes_in_stripe;
183- u64 last_stripe, first_stripe;
184
185 if (_sp2d_alloc(pages_in_unit, layout->group_width,
186 layout->parity, &ios->sp2d)) {
187 return -ENOMEM;
188 }
189-
190- /* Round io down to last full strip */
191- first_stripe = div_u64(ios->offset, stripe_size);
192- last_stripe = div_u64(ios->offset + ios->length, stripe_size);
193-
194- /* If an IO spans more then a single stripe it must end at
195- * a stripe boundary. The reminder at the end is pushed into the
196- * next IO.
197- */
198- if (last_stripe != first_stripe) {
199- ios->length = last_stripe * stripe_size - ios->offset;
200-
201- BUG_ON(!ios->length);
202- ios->nr_pages = (ios->length + PAGE_SIZE - 1) /
203- PAGE_SIZE;
204- ios->si.length = ios->length; /*make it consistent */
205- }
206 }
207 return 0;
208 }
209--
2101.7.7.6
211