summaryrefslogtreecommitdiffstats
path: root/meta/recipes-graphics/xorg-driver/xf86-video-omapfb/omapfb-neon.diff
diff options
context:
space:
mode:
authorRichard Purdie <rpurdie@linux.intel.com>2010-08-27 15:14:24 +0100
committerRichard Purdie <rpurdie@linux.intel.com>2010-08-27 15:29:45 +0100
commit29d6678fd546377459ef75cf54abeef5b969b5cf (patch)
tree8edd65790e37a00d01c3f203f773fe4b5012db18 /meta/recipes-graphics/xorg-driver/xf86-video-omapfb/omapfb-neon.diff
parentda49de6885ee1bc424e70bc02f21f6ab920efb55 (diff)
downloadpoky-29d6678fd546377459ef75cf54abeef5b969b5cf.tar.gz
Major layout change to the packages directory
Having one monolithic packages directory makes it hard to find things and is generally overwhelming. This commit splits it into several logical sections roughly based on function, recipes.txt gives more information about the classifications used. The opportunity is also used to switch from "packages" to "recipes" as used in OpenEmbedded as the term "packages" can be confusing to people and has many different meanings. Not all recipes have been classified yet, this is just a first pass at separating things out. Some packages are moved to meta-extras as they're no longer actively used or maintained. Signed-off-by: Richard Purdie <rpurdie@linux.intel.com>
Diffstat (limited to 'meta/recipes-graphics/xorg-driver/xf86-video-omapfb/omapfb-neon.diff')
-rw-r--r--meta/recipes-graphics/xorg-driver/xf86-video-omapfb/omapfb-neon.diff146
1 files changed, 146 insertions, 0 deletions
diff --git a/meta/recipes-graphics/xorg-driver/xf86-video-omapfb/omapfb-neon.diff b/meta/recipes-graphics/xorg-driver/xf86-video-omapfb/omapfb-neon.diff
new file mode 100644
index 0000000000..325ca66f0c
--- /dev/null
+++ b/meta/recipes-graphics/xorg-driver/xf86-video-omapfb/omapfb-neon.diff
@@ -0,0 +1,146 @@
1--- /tmp/image-format-conversions.h 2009-02-03 10:18:04.000000000 +0100
2+++ git/src/image-format-conversions.h 2009-02-03 10:19:18.000000000 +0100
3@@ -30,6 +30,8 @@
4 /* Basic C implementation of YV12/I420 to UYVY conversion */
5 void uv12_to_uyvy(int w, int h, int y_pitch, int uv_pitch, uint8_t *y_p, uint8_t *u_p, uint8_t *v_p, uint8_t *dest);
6
7+/* NEON implementation of YV12/I420 to UYVY conversion */
8+void uv12_to_uyvy_neon(int w, int h, int y_pitch, int uv_pitch, uint8_t *y_p, uint8_t *u_p, uint8_t *v_p, uint8_t *dest);
9
10 #endif /* __IMAGE_FORMAT_CONVERSIONS_H__ */
11
12--- /tmp/image-format-conversions.c 2009-02-03 10:18:04.000000000 +0100
13+++ git/src/image-format-conversions.c 2009-02-03 10:16:47.000000000 +0100
14@@ -2,6 +2,7 @@
15 * Copyright 2008 Kalle Vahlman, <zuh@iki.fi>
16 * Ilpo Ruotsalainen, <lonewolf@iki.fi>
17 * Tuomas Kulve, <tuomas.kulve@movial.com>
18+ * Ian Rickards, <ian.rickards@arm.com>
19 *
20 *
21 * Permission to use, copy, modify, distribute and sell this software and its
22@@ -89,3 +90,104 @@
23 }
24 }
25
26+void uv12_to_uyvy_neon(int w, int h, int y_pitch, int uv_pitch, uint8_t *y_p, uint8_t *u_p, uint8_t *v_p, uint8_t *dest)
27+{
28+ int x, y;
29+ uint8_t *dest_even = dest;
30+ uint8_t *dest_odd = dest + w * 2;
31+ uint8_t *y_p_even = y_p;
32+ uint8_t *y_p_odd = y_p + y_pitch;
33+
34+ /*ErrorF("in uv12_to_uyvy, w: %d, pitch: %d\n", w, pitch);*/
35+ if (w<16)
36+ {
37+ for (y=0; y<h; y+=2)
38+ {
39+ for (x=0; x<w; x+=2)
40+ {
41+ /* Output two 2x1 macroblocks to form a 2x2 block from input */
42+ uint8_t u_val = *u_p++;
43+ uint8_t v_val = *v_p++;
44+
45+ /* Even row, first pixel */
46+ *dest_even++ = u_val;
47+ *dest_even++ = *y_p_even++;
48+
49+ /* Even row, second pixel */
50+ *dest_even++ = v_val;
51+ *dest_even++ = *y_p_even++;
52+
53+ /* Odd row, first pixel */
54+ *dest_odd++ = u_val;
55+ *dest_odd++ = *y_p_odd++;
56+
57+ /* Odd row, second pixel */
58+ *dest_odd++ = v_val;
59+ *dest_odd++ = *y_p_odd++;
60+ }
61+
62+ dest_even += w * 2;
63+ dest_odd += w * 2;
64+
65+ u_p += ((uv_pitch << 1) - w) >> 1;
66+ v_p += ((uv_pitch << 1) - w) >> 1;
67+
68+ y_p_even += (y_pitch - w) + y_pitch;
69+ y_p_odd += (y_pitch - w) + y_pitch;
70+ }
71+ }
72+ else
73+ {
74+ for (y=0; y<h; y+=2)
75+ {
76+ x=w;
77+ do {
78+ // avoid using d8-d15 (q4-q7) aapcs callee-save registers
79+ asm volatile (
80+ "1:\n\t"
81+ "vld1.u8 {d0}, [%[u_p]]!\n\t"
82+ "sub %[x],%[x],#16\n\t"
83+ "cmp %[x],#16\n\t"
84+ "vld1.u8 {d1}, [%[v_p]]!\n\t"
85+ "vld1.u8 {q1}, [%[y_p_even]]!\n\t"
86+ "vzip.u8 d0, d1\n\t"
87+ "vld1.u8 {q2}, [%[y_p_odd]]!\n\t"
88+ // use 2-element struct stores to zip up y with y&v
89+ "vst2.u8 {q0,q1}, [%[dest_even]]!\n\t"
90+ "vmov.u8 q1, q2\n\t"
91+ "vst2.u8 {q0,q1}, [%[dest_odd]]!\n\t"
92+ "bhs 1b\n\t"
93+ : [u_p] "+r" (u_p), [v_p] "+r" (v_p), [y_p_even] "+r" (y_p_even), [y_p_odd] "+r" (y_p_odd),
94+ [dest_even] "+r" (dest_even), [dest_odd] "+r" (dest_odd),
95+ [x] "+r" (x)
96+ :
97+ : "cc", "memory", "d0","d1","d2","d3","d4","d5"
98+ );
99+ if (x!=0)
100+ {
101+ // overlap final 16-pixel block to process requested width exactly
102+ x = 16-x;
103+ u_p -= x/2;
104+ v_p -= x/2;
105+ y_p_even -= x;
106+ y_p_odd -= x;
107+ dest_even -= x*2;
108+ dest_odd -= x*2;
109+ x = 16;
110+ // do another 16-pixel block
111+ }
112+ }
113+ while (x!=0);
114+
115+ dest_even += w * 2;
116+ dest_odd += w * 2;
117+
118+ u_p += ((uv_pitch << 1) - w) >> 1;
119+ v_p += ((uv_pitch << 1) - w) >> 1;
120+
121+ y_p_even += (y_pitch - w) + y_pitch;
122+ y_p_odd += (y_pitch - w) + y_pitch;
123+ }
124+ }
125+}
126+
127--- /tmp/omapfb-xv-generic.c 2009-02-03 10:52:18.000000000 +0100
128+++ git/src/omapfb-xv-generic.c 2009-02-03 10:52:24.000000000 +0100
129@@ -240,7 +240,7 @@
130 uint8_t *yb = buf;
131 uint8_t *ub = yb + (src_y_pitch * src_h);
132 uint8_t *vb = ub + (src_uv_pitch * (src_h / 2));
133- uv12_to_uyvy(src_w & ~15,
134+ uv12_to_uyvy_neon(src_w & ~15,
135 src_h & ~15,
136 src_y_pitch,
137 src_uv_pitch,
138@@ -256,7 +256,7 @@
139 uint8_t *yb = buf;
140 uint8_t *vb = yb + (src_y_pitch * src_h);
141 uint8_t *ub = vb + (src_uv_pitch * (src_h / 2));
142- uv12_to_uyvy(src_w & ~15,
143+ uv12_to_uyvy_neon(src_w & ~15,
144 src_h & ~15,
145 src_y_pitch,
146 src_uv_pitch,