1 files changed, 211 insertions, 0 deletions
diff --git a/recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.24/0068-ore-Fix-NFS-crash-by-supporting-any-unaligned-RAID-I.patch b/recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.24/0068-ore-Fix-NFS-crash-by-supporting-any-unaligned-RAID-I.patch
new file mode 100644
index 00000000..0dabe3b9
--- /dev/null
+++ b/recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.24/0068-ore-Fix-NFS-crash-by-supporting-any-unaligned-RAID-I.patch
@@ -0,0 +1,211 @@
+From b4c39a3690fd0d723f50eba441fe567e8fee68f1 Mon Sep 17 00:00:00 2001
+From: Boaz Harrosh <bharrosh@panasas.com>
+Date: Fri, 8 Jun 2012 01:19:07 +0300
+Subject: [PATCH 068/109] ore: Fix NFS crash by supporting any unaligned RAID
+ IO
+commit 9ff19309a9623f2963ac5a136782ea4d8b5d67fb upstream.
+In RAID_5/6 We used to not permit an IO that it's end
+byte is not stripe_size aligned and spans more than one stripe.
+.i.e the caller must check if after submission the actual
+transferred bytes is shorter, and would need to resubmit
+a new IO with the remainder.
+Exofs supports this, and NFS was supposed to support this
+as well with it's short write mechanism. But late testing has
+exposed a CRASH when this is used with none-RPC layout-drivers.
+The change at NFS is deep and risky, in it's place the fix
+at ORE to lift the limitation is actually clean and simple.
+So here it is below.
+The principal here is that in the case of unaligned IO on
+both ends, beginning and end, we will send two read requests
+one like old code, before the calculation of the first stripe,
+and also a new site, before the calculation of the last stripe.
+If any "boundary" is aligned or the complete IO is within a single
+stripe. we do a single read like before.
+The code is clean and simple by splitting the old _read_4_write
+into 3 even parts:
+1._read_4_write_first_stripe
+2. _read_4_write_last_stripe
+3. _read_4_write_execute
+And calling 1+3 at the same place as before. 2+3 before last
+stripe, and in the case of all in a single stripe then 1+2+3
+is preformed additively.
+Why did I not think of it before. Well I had a strike of
+genius because I have stared at this code for 2 years, and did
+not find this simple solution, til today. Not that I did not try.
+This solution is much better for NFS than the previous supposedly
+solution because the short write was dealt  with out-of-band after
+IO_done, which would cause for a seeky IO pattern where as in here
+we execute in order. At both solutions we do 2 separate reads, only
+here we do it within a single IO request. (And actually combine two
+writes into a single submission)
+NFS/exofs code need not change since the ORE API communicates the new
+shorter length on return, what will happen is that this case would not
+occur anymore.
+hurray!!
+[Stable this is an NFS bug since 3.2 Kernel should apply cleanly]
+Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ fs/exofs/ore_raid.c |   67 +++++++++++++++++++++++++++-----------------------
+ 1 files changed, 36 insertions(+), 31 deletions(-)
+diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c
+index d222c77..fff2070 100644
+--- a/fs/exofs/ore_raid.c
+++ b/fs/exofs/ore_raid.c
+@@ -461,16 +461,12 @@ static void _mark_read4write_pages_uptodate(struct ore_io_state *ios, int ret)
+  * ios->sp2d[p][*], xor is calculated the same way. These pages are
+  * allocated/freed and don't go through cache
+  */
+-static int _read_4_write(struct ore_io_state *ios)
+static int _read_4_write_first_stripe(struct ore_io_state *ios)
+ {
+-       struct ore_io_state *ios_read;
+        struct ore_striping_info read_si;
+        struct __stripe_pages_2d *sp2d = ios->sp2d;
+        u64 offset = ios->si.first_stripe_start;
+-       u64 last_stripe_end;
+-       unsigned bytes_in_stripe = ios->si.bytes_in_stripe;
+-       unsigned i, c, p, min_p = sp2d->pages_in_unit, max_p = -1;
+-       int ret;
+       unsigned c, p, min_p = sp2d->pages_in_unit, max_p = -1;
+ 
+        if (offset == ios->offset) /* Go to start collect $200 */
+                goto read_last_stripe;
+@@ -478,6 +474,9 @@ static int _read_4_write(struct ore_io_state *ios)
+        min_p = _sp2d_min_pg(sp2d);
+        max_p = _sp2d_max_pg(sp2d);
+ 
+       ORE_DBGMSG("stripe_start=0x%llx ios->offset=0x%llx min_p=%d max_p=%d\n",
+                  offset, ios->offset, min_p, max_p);
+
+        for (c = 0; ; c++) {
+                ore_calc_stripe_info(ios->layout, offset, 0, &read_si);
+                read_si.obj_offset += min_p * PAGE_SIZE;
+@@ -512,6 +511,18 @@ static int _read_4_write(struct ore_io_state *ios)
+        }
+ 
+ read_last_stripe:
+       return 0;
+}
+
+static int _read_4_write_last_stripe(struct ore_io_state *ios)
+{
+       struct ore_striping_info read_si;
+       struct __stripe_pages_2d *sp2d = ios->sp2d;
+       u64 offset;
+       u64 last_stripe_end;
+       unsigned bytes_in_stripe = ios->si.bytes_in_stripe;
+       unsigned c, p, min_p = sp2d->pages_in_unit, max_p = -1;
+
+        offset = ios->offset + ios->length;
+        if (offset % PAGE_SIZE)
+                _add_to_r4w_last_page(ios, &offset);
+@@ -527,15 +538,15 @@ read_last_stripe:
+        c = _dev_order(ios->layout->group_width * ios->layout->mirrors_p1,
+                       ios->layout->mirrors_p1, read_si.par_dev, read_si.dev);
+ 
+-       BUG_ON(ios->si.first_stripe_start + bytes_in_stripe != last_stripe_end);
+-       /* unaligned IO must be within a single stripe */
+-
+        if (min_p == sp2d->pages_in_unit) {
+                /* Didn't do it yet */
+                min_p = _sp2d_min_pg(sp2d);
+                max_p = _sp2d_max_pg(sp2d);
+        }
+ 
+       ORE_DBGMSG("offset=0x%llx stripe_end=0x%llx min_p=%d max_p=%d\n",
+                  offset, last_stripe_end, min_p, max_p);
+
+        while (offset < last_stripe_end) {
+                struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p];
+ 
+@@ -568,6 +579,15 @@ read_last_stripe:
+        }
+ 
+ read_it:
+       return 0;
+}
+
+static int _read_4_write_execute(struct ore_io_state *ios)
+{
+       struct ore_io_state *ios_read;
+       unsigned i;
+       int ret;
+
+        ios_read = ios->ios_read_4_write;
+        if (!ios_read)
+                return 0;
+@@ -591,6 +611,8 @@ read_it:
+        }
+ 
+        _mark_read4write_pages_uptodate(ios_read, ret);
+       ore_put_io_state(ios_read);
+       ios->ios_read_4_write = NULL; /* Might need a reuse at last stripe */
+        return 0;
+ }
+ 
+@@ -626,8 +648,11 @@ int _ore_add_parity_unit(struct ore_io_state *ios,
+                        /* If first stripe, Read in all read4write pages
+                         * (if needed) before we calculate the first parity.
+                         */
+-                       _read_4_write(ios);
+                       _read_4_write_first_stripe(ios);
+                }
+               if (!cur_len) /* If last stripe r4w pages of last stripe */
+                       _read_4_write_last_stripe(ios);
+               _read_4_write_execute(ios);
+ 
+                for (i = 0; i < num_pages; i++) {
+                        pages[i] = _raid_page_alloc();
+@@ -654,34 +679,14 @@ int _ore_add_parity_unit(struct ore_io_state *ios,
+ 
+ int _ore_post_alloc_raid_stuff(struct ore_io_state *ios)
+ {
+-       struct ore_layout *layout = ios->layout;
+-
+        if (ios->parity_pages) {
+               struct ore_layout *layout = ios->layout;
+                unsigned pages_in_unit = layout->stripe_unit / PAGE_SIZE;
+-               unsigned stripe_size = ios->si.bytes_in_stripe;
+-               u64 last_stripe, first_stripe;
+ 
+                if (_sp2d_alloc(pages_in_unit, layout->group_width,
+                                layout->parity, &ios->sp2d)) {
+                        return -ENOMEM;
+                }
+-
+-               /* Round io down to last full strip */
+-               first_stripe = div_u64(ios->offset, stripe_size);
+-               last_stripe = div_u64(ios->offset + ios->length, stripe_size);
+-
+-               /* If an IO spans more then a single stripe it must end at
+-                * a stripe boundary. The reminder at the end is pushed into the
+-                * next IO.
+-                */
+-               if (last_stripe != first_stripe) {
+-                       ios->length = last_stripe * stripe_size - ios->offset;
+-
+-                       BUG_ON(!ios->length);
+-                       ios->nr_pages = (ios->length + PAGE_SIZE - 1) /
+-                                       PAGE_SIZE;
+-                       ios->si.length = ios->length; /*make it consistent */
+-               }
+        }
+        return 0;
+ }
+-- 
+1.7.7.6

diff --git a/recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.24/0068-ore-Fix-NFS-crash-by-supporting-any-unaligned-RAID-I.patch b/recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.24/0068-ore-Fix-NFS-crash-by-supporting-any-unaligned-RAID-I.patch new file mode 100644 index 00000000..0dabe3b9 --- /dev/null +++ b/recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.24/0068-ore-Fix-NFS-crash-by-supporting-any-unaligned-RAID-I.patch
@@ -0,0 +1,211 @@
	1	From b4c39a3690fd0d723f50eba441fe567e8fee68f1 Mon Sep 17 00:00:00 2001
	2	From: Boaz Harrosh <bharrosh@panasas.com>
	3	Date: Fri, 8 Jun 2012 01:19:07 +0300
	4	Subject: [PATCH 068/109] ore: Fix NFS crash by supporting any unaligned RAID
	5	IO
	6
	7	commit 9ff19309a9623f2963ac5a136782ea4d8b5d67fb upstream.
	8
	9	In RAID_5/6 We used to not permit an IO that it's end
	10	byte is not stripe_size aligned and spans more than one stripe.
	11	.i.e the caller must check if after submission the actual
	12	transferred bytes is shorter, and would need to resubmit
	13	a new IO with the remainder.
	14
	15	Exofs supports this, and NFS was supposed to support this
	16	as well with it's short write mechanism. But late testing has
	17	exposed a CRASH when this is used with none-RPC layout-drivers.
	18
	19	The change at NFS is deep and risky, in it's place the fix
	20	at ORE to lift the limitation is actually clean and simple.
	21	So here it is below.
	22
	23	The principal here is that in the case of unaligned IO on
	24	both ends, beginning and end, we will send two read requests
	25	one like old code, before the calculation of the first stripe,
	26	and also a new site, before the calculation of the last stripe.
	27	If any "boundary" is aligned or the complete IO is within a single
	28	stripe. we do a single read like before.
	29
	30	The code is clean and simple by splitting the old _read_4_write
	31	into 3 even parts:
	32	1._read_4_write_first_stripe
	33	2. _read_4_write_last_stripe
	34	3. _read_4_write_execute
	35
	36	And calling 1+3 at the same place as before. 2+3 before last
	37	stripe, and in the case of all in a single stripe then 1+2+3
	38	is preformed additively.
	39
	40	Why did I not think of it before. Well I had a strike of
	41	genius because I have stared at this code for 2 years, and did
	42	not find this simple solution, til today. Not that I did not try.
	43
	44	This solution is much better for NFS than the previous supposedly
	45	solution because the short write was dealt with out-of-band after
	46	IO_done, which would cause for a seeky IO pattern where as in here
	47	we execute in order. At both solutions we do 2 separate reads, only
	48	here we do it within a single IO request. (And actually combine two
	49	writes into a single submission)
	50
	51	NFS/exofs code need not change since the ORE API communicates the new
	52	shorter length on return, what will happen is that this case would not
	53	occur anymore.
	54
	55	hurray!!
	56
	57	[Stable this is an NFS bug since 3.2 Kernel should apply cleanly]
	58	Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
	59	Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
	60	---
	61	fs/exofs/ore_raid.c \| 67 +++++++++++++++++++++++++++-----------------------
	62	1 files changed, 36 insertions(+), 31 deletions(-)
	63
	64	diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c
	65	index d222c77..fff2070 100644
	66	--- a/fs/exofs/ore_raid.c
	67	+++ b/fs/exofs/ore_raid.c
	68	@@ -461,16 +461,12 @@ static void _mark_read4write_pages_uptodate(struct ore_io_state *ios, int ret)
	69	* ios->sp2d[p][*], xor is calculated the same way. These pages are
	70	* allocated/freed and don't go through cache
	71	*/
	72	-static int _read_4_write(struct ore_io_state *ios)
	73	+static int _read_4_write_first_stripe(struct ore_io_state *ios)
	74	{
	75	- struct ore_io_state *ios_read;
	76	struct ore_striping_info read_si;
	77	struct __stripe_pages_2d *sp2d = ios->sp2d;
	78	u64 offset = ios->si.first_stripe_start;
	79	- u64 last_stripe_end;
	80	- unsigned bytes_in_stripe = ios->si.bytes_in_stripe;
	81	- unsigned i, c, p, min_p = sp2d->pages_in_unit, max_p = -1;
	82	- int ret;
	83	+ unsigned c, p, min_p = sp2d->pages_in_unit, max_p = -1;
	84
	85	if (offset == ios->offset) /* Go to start collect $200 */
	86	goto read_last_stripe;
	87	@@ -478,6 +474,9 @@ static int _read_4_write(struct ore_io_state *ios)
	88	min_p = _sp2d_min_pg(sp2d);
	89	max_p = _sp2d_max_pg(sp2d);
	90
	91	+ ORE_DBGMSG("stripe_start=0x%llx ios->offset=0x%llx min_p=%d max_p=%d\n",
	92	+ offset, ios->offset, min_p, max_p);
	93	+
	94	for (c = 0; ; c++) {
	95	ore_calc_stripe_info(ios->layout, offset, 0, &read_si);
	96	read_si.obj_offset += min_p * PAGE_SIZE;
	97	@@ -512,6 +511,18 @@ static int _read_4_write(struct ore_io_state *ios)
	98	}
	99
	100	read_last_stripe:
	101	+ return 0;
	102	+}
	103	+
	104	+static int _read_4_write_last_stripe(struct ore_io_state *ios)
	105	+{
	106	+ struct ore_striping_info read_si;
	107	+ struct __stripe_pages_2d *sp2d = ios->sp2d;
	108	+ u64 offset;
	109	+ u64 last_stripe_end;
	110	+ unsigned bytes_in_stripe = ios->si.bytes_in_stripe;
	111	+ unsigned c, p, min_p = sp2d->pages_in_unit, max_p = -1;
	112	+
	113	offset = ios->offset + ios->length;
	114	if (offset % PAGE_SIZE)
	115	_add_to_r4w_last_page(ios, &offset);
	116	@@ -527,15 +538,15 @@ read_last_stripe:
	117	c = _dev_order(ios->layout->group_width * ios->layout->mirrors_p1,
	118	ios->layout->mirrors_p1, read_si.par_dev, read_si.dev);
	119
	120	- BUG_ON(ios->si.first_stripe_start + bytes_in_stripe != last_stripe_end);
	121	- /* unaligned IO must be within a single stripe */
	122	-
	123	if (min_p == sp2d->pages_in_unit) {
	124	/* Didn't do it yet */
	125	min_p = _sp2d_min_pg(sp2d);
	126	max_p = _sp2d_max_pg(sp2d);
	127	}
	128
	129	+ ORE_DBGMSG("offset=0x%llx stripe_end=0x%llx min_p=%d max_p=%d\n",
	130	+ offset, last_stripe_end, min_p, max_p);
	131	+
	132	while (offset < last_stripe_end) {
	133	struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p];
	134
	135	@@ -568,6 +579,15 @@ read_last_stripe:
	136	}
	137
	138	read_it:
	139	+ return 0;
	140	+}
	141	+
	142	+static int _read_4_write_execute(struct ore_io_state *ios)
	143	+{
	144	+ struct ore_io_state *ios_read;
	145	+ unsigned i;
	146	+ int ret;
	147	+
	148	ios_read = ios->ios_read_4_write;
	149	if (!ios_read)
	150	return 0;
	151	@@ -591,6 +611,8 @@ read_it:
	152	}
	153
	154	_mark_read4write_pages_uptodate(ios_read, ret);
	155	+ ore_put_io_state(ios_read);
	156	+ ios->ios_read_4_write = NULL; /* Might need a reuse at last stripe */
	157	return 0;
	158	}
	159
	160	@@ -626,8 +648,11 @@ int _ore_add_parity_unit(struct ore_io_state *ios,
	161	/* If first stripe, Read in all read4write pages
	162	* (if needed) before we calculate the first parity.
	163	*/
	164	- _read_4_write(ios);
	165	+ _read_4_write_first_stripe(ios);
	166	}
	167	+ if (!cur_len) /* If last stripe r4w pages of last stripe */
	168	+ _read_4_write_last_stripe(ios);
	169	+ _read_4_write_execute(ios);
	170
	171	for (i = 0; i < num_pages; i++) {
	172	pages[i] = _raid_page_alloc();
	173	@@ -654,34 +679,14 @@ int _ore_add_parity_unit(struct ore_io_state *ios,
	174
	175	int _ore_post_alloc_raid_stuff(struct ore_io_state *ios)
	176	{
	177	- struct ore_layout *layout = ios->layout;
	178	-
	179	if (ios->parity_pages) {
	180	+ struct ore_layout *layout = ios->layout;
	181	unsigned pages_in_unit = layout->stripe_unit / PAGE_SIZE;
	182	- unsigned stripe_size = ios->si.bytes_in_stripe;
	183	- u64 last_stripe, first_stripe;
	184
	185	if (_sp2d_alloc(pages_in_unit, layout->group_width,
	186	layout->parity, &ios->sp2d)) {
	187	return -ENOMEM;
	188	}
	189	-
	190	- /* Round io down to last full strip */
	191	- first_stripe = div_u64(ios->offset, stripe_size);
	192	- last_stripe = div_u64(ios->offset + ios->length, stripe_size);
	193	-
	194	- /* If an IO spans more then a single stripe it must end at
	195	- * a stripe boundary. The reminder at the end is pushed into the
	196	- * next IO.
	197	- */
	198	- if (last_stripe != first_stripe) {
	199	- ios->length = last_stripe * stripe_size - ios->offset;
	200	-
	201	- BUG_ON(!ios->length);
	202	- ios->nr_pages = (ios->length + PAGE_SIZE - 1) /
	203	- PAGE_SIZE;
	204	- ios->si.length = ios->length; /make it consistent /
	205	- }
	206	}
	207	return 0;
	208	}
	209	--
	210	1.7.7.6
	211