1 files changed, 146 insertions, 0 deletions
diff --git a/meta/recipes-graphics/xorg-driver/xf86-video-omapfb/omapfb-neon.diff b/meta/recipes-graphics/xorg-driver/xf86-video-omapfb/omapfb-neon.diff
new file mode 100644
index 0000000000..325ca66f0c
--- /dev/null
+++ b/meta/recipes-graphics/xorg-driver/xf86-video-omapfb/omapfb-neon.diff
@@ -0,0 +1,146 @@
+--- /tmp/image-format-conversions.h     2009-02-03 10:18:04.000000000 +0100
+++ git/src/image-format-conversions.h  2009-02-03 10:19:18.000000000 +0100
+@@ -30,6 +30,8 @@
+ /* Basic C implementation of YV12/I420 to UYVY conversion */
+ void uv12_to_uyvy(int w, int h, int y_pitch, int uv_pitch, uint8_t *y_p, uint8_t *u_p, uint8_t *v_p, uint8_t *dest);
+ 
+/* NEON implementation of YV12/I420 to UYVY conversion */
+void uv12_to_uyvy_neon(int w, int h, int y_pitch, int uv_pitch, uint8_t *y_p, uint8_t *u_p, uint8_t *v_p, uint8_t *dest);
+ 
+ #endif /* __IMAGE_FORMAT_CONVERSIONS_H__ */
+ 
+--- /tmp/image-format-conversions.c     2009-02-03 10:18:04.000000000 +0100
+++ git/src/image-format-conversions.c  2009-02-03 10:16:47.000000000 +0100
+@@ -2,6 +2,7 @@
+  * Copyright 2008 Kalle Vahlman, <zuh@iki.fi>
+  *                Ilpo Ruotsalainen, <lonewolf@iki.fi>
+  *                Tuomas Kulve, <tuomas.kulve@movial.com>
+ *                Ian Rickards, <ian.rickards@arm.com>
+  *                
+  *
+  * Permission to use, copy, modify, distribute and sell this software and its
+@@ -89,3 +90,104 @@
+        }
+ }
+ 
+void uv12_to_uyvy_neon(int w, int h, int y_pitch, int uv_pitch, uint8_t *y_p, uint8_t *u_p, uint8_t *v_p, uint8_t *dest)
+{
+    int x, y;
+    uint8_t *dest_even = dest;
+    uint8_t *dest_odd = dest + w * 2;
+    uint8_t *y_p_even = y_p;
+    uint8_t *y_p_odd = y_p + y_pitch;
+
+    /*ErrorF("in uv12_to_uyvy, w: %d, pitch: %d\n", w, pitch);*/
+    if (w<16)
+    {
+        for (y=0; y<h; y+=2)
+        {
+            for (x=0; x<w; x+=2)
+            {
+                /* Output two 2x1 macroblocks to form a 2x2 block from input */
+                uint8_t u_val = *u_p++;
+                uint8_t v_val = *v_p++;
+
+                /* Even row, first pixel */
+                *dest_even++ = u_val;
+                *dest_even++ = *y_p_even++;
+
+                /* Even row, second pixel */
+                *dest_even++ = v_val;
+                *dest_even++ = *y_p_even++;
+
+                /* Odd row, first pixel */
+                *dest_odd++ = u_val;
+                *dest_odd++ = *y_p_odd++;
+
+                /* Odd row, second pixel */
+                *dest_odd++ = v_val;
+                *dest_odd++ = *y_p_odd++;
+            }
+
+            dest_even += w * 2;
+            dest_odd += w * 2;
+
+            u_p += ((uv_pitch << 1) - w) >> 1;
+            v_p += ((uv_pitch << 1) - w) >> 1;
+
+            y_p_even += (y_pitch - w) + y_pitch;
+            y_p_odd += (y_pitch - w) + y_pitch;
+        }
+    }
+    else
+    {
+        for (y=0; y<h; y+=2)
+        {
+            x=w;
+            do {
+                // avoid using d8-d15 (q4-q7) aapcs callee-save registers
+                asm volatile (
+                        "1:\n\t"
+                        "vld1.u8   {d0}, [%[u_p]]!\n\t"
+                        "sub       %[x],%[x],#16\n\t"
+                        "cmp       %[x],#16\n\t"
+                        "vld1.u8   {d1}, [%[v_p]]!\n\t"
+                        "vld1.u8   {q1}, [%[y_p_even]]!\n\t"
+                        "vzip.u8   d0, d1\n\t"
+                        "vld1.u8   {q2}, [%[y_p_odd]]!\n\t"
+                // use 2-element struct stores to zip up y with y&v
+                        "vst2.u8   {q0,q1}, [%[dest_even]]!\n\t"
+                        "vmov.u8   q1, q2\n\t"
+                        "vst2.u8   {q0,q1}, [%[dest_odd]]!\n\t"
+                        "bhs       1b\n\t"
+                        : [u_p] "+r" (u_p), [v_p] "+r" (v_p), [y_p_even] "+r" (y_p_even), [y_p_odd] "+r" (y_p_odd),
+                          [dest_even] "+r" (dest_even), [dest_odd] "+r" (dest_odd),
+                          [x] "+r" (x)
+                        :
+                        : "cc", "memory", "d0","d1","d2","d3","d4","d5"
+                        );
+                if (x!=0)
+                {
+                    // overlap final 16-pixel block to process requested width exactly
+                    x = 16-x;
+                    u_p -= x/2;
+                    v_p -= x/2;
+                    y_p_even -= x;
+                    y_p_odd -= x;
+                    dest_even -= x*2;
+                    dest_odd -= x*2;
+                    x = 16;
+                    // do another 16-pixel block
+                }
+            }
+            while (x!=0);
+
+            dest_even += w * 2;
+            dest_odd += w * 2;
+
+            u_p += ((uv_pitch << 1) - w) >> 1;
+            v_p += ((uv_pitch << 1) - w) >> 1;
+
+            y_p_even += (y_pitch - w) + y_pitch;
+            y_p_odd += (y_pitch - w) + y_pitch;
+        }
+    }
+}
+
+--- /tmp/omapfb-xv-generic.c    2009-02-03 10:52:18.000000000 +0100
+++ git/src/omapfb-xv-generic.c 2009-02-03 10:52:24.000000000 +0100
+@@ -240,7 +240,7 @@
+                        uint8_t *yb = buf;
+                        uint8_t *ub = yb + (src_y_pitch * src_h);
+                        uint8_t *vb = ub + (src_uv_pitch * (src_h / 2));
+-                       uv12_to_uyvy(src_w & ~15,
+                       uv12_to_uyvy_neon(src_w & ~15,
+                                     src_h & ~15,
+                                     src_y_pitch,
+                                     src_uv_pitch,
+@@ -256,7 +256,7 @@
+                        uint8_t *yb = buf;
+                        uint8_t *vb = yb + (src_y_pitch * src_h);
+                        uint8_t *ub = vb + (src_uv_pitch * (src_h / 2));
+-                       uv12_to_uyvy(src_w & ~15,
+                       uv12_to_uyvy_neon(src_w & ~15,
+                                     src_h & ~15,
+                                     src_y_pitch,
+                                     src_uv_pitch,

diff --git a/meta/recipes-graphics/xorg-driver/xf86-video-omapfb/omapfb-neon.diff b/meta/recipes-graphics/xorg-driver/xf86-video-omapfb/omapfb-neon.diff new file mode 100644 index 0000000000..325ca66f0c --- /dev/null +++ b/meta/recipes-graphics/xorg-driver/xf86-video-omapfb/omapfb-neon.diff
@@ -0,0 +1,146 @@
	1	--- /tmp/image-format-conversions.h 2009-02-03 10:18:04.000000000 +0100
	2	+++ git/src/image-format-conversions.h 2009-02-03 10:19:18.000000000 +0100
	3	@@ -30,6 +30,8 @@
	4	/* Basic C implementation of YV12/I420 to UYVY conversion */
	5	void uv12_to_uyvy(int w, int h, int y_pitch, int uv_pitch, uint8_t y_p, uint8_t u_p, uint8_t v_p, uint8_t dest);
	6
	7	+/* NEON implementation of YV12/I420 to UYVY conversion */
	8	+void uv12_to_uyvy_neon(int w, int h, int y_pitch, int uv_pitch, uint8_t y_p, uint8_t u_p, uint8_t v_p, uint8_t dest);
	9
	10	#endif /* __IMAGE_FORMAT_CONVERSIONS_H__ */
	11
	12	--- /tmp/image-format-conversions.c 2009-02-03 10:18:04.000000000 +0100
	13	+++ git/src/image-format-conversions.c 2009-02-03 10:16:47.000000000 +0100
	14	@@ -2,6 +2,7 @@
	15	* Copyright 2008 Kalle Vahlman, <zuh@iki.fi>
	16	* Ilpo Ruotsalainen, <lonewolf@iki.fi>
	17	* Tuomas Kulve, <tuomas.kulve@movial.com>
	18	+ * Ian Rickards, <ian.rickards@arm.com>
	19	*
	20	*
	21	* Permission to use, copy, modify, distribute and sell this software and its
	22	@@ -89,3 +90,104 @@
	23	}
	24	}
	25
	26	+void uv12_to_uyvy_neon(int w, int h, int y_pitch, int uv_pitch, uint8_t y_p, uint8_t u_p, uint8_t v_p, uint8_t dest)
	27	+{
	28	+ int x, y;
	29	+ uint8_t *dest_even = dest;
	30	+ uint8_t dest_odd = dest + w 2;
	31	+ uint8_t *y_p_even = y_p;
	32	+ uint8_t *y_p_odd = y_p + y_pitch;
	33	+
	34	+ /ErrorF("in uv12_to_uyvy, w: %d, pitch: %d\n", w, pitch);/
	35	+ if (w<16)
	36	+ {
	37	+ for (y=0; y<h; y+=2)
	38	+ {
	39	+ for (x=0; x<w; x+=2)
	40	+ {
	41	+ /* Output two 2x1 macroblocks to form a 2x2 block from input */
	42	+ uint8_t u_val = *u_p++;
	43	+ uint8_t v_val = *v_p++;
	44	+
	45	+ /* Even row, first pixel */
	46	+ *dest_even++ = u_val;
	47	+ dest_even++ = y_p_even++;
	48	+
	49	+ /* Even row, second pixel */
	50	+ *dest_even++ = v_val;
	51	+ dest_even++ = y_p_even++;
	52	+
	53	+ /* Odd row, first pixel */
	54	+ *dest_odd++ = u_val;
	55	+ dest_odd++ = y_p_odd++;
	56	+
	57	+ /* Odd row, second pixel */
	58	+ *dest_odd++ = v_val;
	59	+ dest_odd++ = y_p_odd++;
	60	+ }
	61	+
	62	+ dest_even += w * 2;
	63	+ dest_odd += w * 2;
	64	+
	65	+ u_p += ((uv_pitch << 1) - w) >> 1;
	66	+ v_p += ((uv_pitch << 1) - w) >> 1;
	67	+
	68	+ y_p_even += (y_pitch - w) + y_pitch;
	69	+ y_p_odd += (y_pitch - w) + y_pitch;
	70	+ }
	71	+ }
	72	+ else
	73	+ {
	74	+ for (y=0; y<h; y+=2)
	75	+ {
	76	+ x=w;
	77	+ do {
	78	+ // avoid using d8-d15 (q4-q7) aapcs callee-save registers
	79	+ asm volatile (
	80	+ "1:\n\t"
	81	+ "vld1.u8 {d0}, [%[u_p]]!\n\t"
	82	+ "sub %[x],%[x],#16\n\t"
	83	+ "cmp %[x],#16\n\t"
	84	+ "vld1.u8 {d1}, [%[v_p]]!\n\t"
	85	+ "vld1.u8 {q1}, [%[y_p_even]]!\n\t"
	86	+ "vzip.u8 d0, d1\n\t"
	87	+ "vld1.u8 {q2}, [%[y_p_odd]]!\n\t"
	88	+ // use 2-element struct stores to zip up y with y&v
	89	+ "vst2.u8 {q0,q1}, [%[dest_even]]!\n\t"
	90	+ "vmov.u8 q1, q2\n\t"
	91	+ "vst2.u8 {q0,q1}, [%[dest_odd]]!\n\t"
	92	+ "bhs 1b\n\t"
	93	+ : [u_p] "+r" (u_p), [v_p] "+r" (v_p), [y_p_even] "+r" (y_p_even), [y_p_odd] "+r" (y_p_odd),
	94	+ [dest_even] "+r" (dest_even), [dest_odd] "+r" (dest_odd),
	95	+ [x] "+r" (x)
	96	+ :
	97	+ : "cc", "memory", "d0","d1","d2","d3","d4","d5"
	98	+ );
	99	+ if (x!=0)
	100	+ {
	101	+ // overlap final 16-pixel block to process requested width exactly
	102	+ x = 16-x;
	103	+ u_p -= x/2;
	104	+ v_p -= x/2;
	105	+ y_p_even -= x;
	106	+ y_p_odd -= x;
	107	+ dest_even -= x*2;
	108	+ dest_odd -= x*2;
	109	+ x = 16;
	110	+ // do another 16-pixel block
	111	+ }
	112	+ }
	113	+ while (x!=0);
	114	+
	115	+ dest_even += w * 2;
	116	+ dest_odd += w * 2;
	117	+
	118	+ u_p += ((uv_pitch << 1) - w) >> 1;
	119	+ v_p += ((uv_pitch << 1) - w) >> 1;
	120	+
	121	+ y_p_even += (y_pitch - w) + y_pitch;
	122	+ y_p_odd += (y_pitch - w) + y_pitch;
	123	+ }
	124	+ }
	125	+}
	126	+
	127	--- /tmp/omapfb-xv-generic.c 2009-02-03 10:52:18.000000000 +0100
	128	+++ git/src/omapfb-xv-generic.c 2009-02-03 10:52:24.000000000 +0100
	129	@@ -240,7 +240,7 @@
	130	uint8_t *yb = buf;
	131	uint8_t ub = yb + (src_y_pitch src_h);
	132	uint8_t vb = ub + (src_uv_pitch (src_h / 2));
	133	- uv12_to_uyvy(src_w & ~15,
	134	+ uv12_to_uyvy_neon(src_w & ~15,
	135	src_h & ~15,
	136	src_y_pitch,
	137	src_uv_pitch,
	138	@@ -256,7 +256,7 @@
	139	uint8_t *yb = buf;
	140	uint8_t vb = yb + (src_y_pitch src_h);
	141	uint8_t ub = vb + (src_uv_pitch (src_h / 2));
	142	- uv12_to_uyvy(src_w & ~15,
	143	+ uv12_to_uyvy_neon(src_w & ~15,
	144	src_h & ~15,
	145	src_y_pitch,
	146	src_uv_pitch,