summaryrefslogtreecommitdiffstats
path: root/meta/recipes-graphics/xorg-driver/xf86-video-omapfb/omapfb-neon.diff
diff options
context:
space:
mode:
Diffstat (limited to 'meta/recipes-graphics/xorg-driver/xf86-video-omapfb/omapfb-neon.diff')
-rw-r--r--meta/recipes-graphics/xorg-driver/xf86-video-omapfb/omapfb-neon.diff148
1 files changed, 0 insertions, 148 deletions
diff --git a/meta/recipes-graphics/xorg-driver/xf86-video-omapfb/omapfb-neon.diff b/meta/recipes-graphics/xorg-driver/xf86-video-omapfb/omapfb-neon.diff
deleted file mode 100644
index fb3ac8c594..0000000000
--- a/meta/recipes-graphics/xorg-driver/xf86-video-omapfb/omapfb-neon.diff
+++ /dev/null
@@ -1,148 +0,0 @@
1Upstream-Status: Pending
2
3--- /tmp/image-format-conversions.h 2009-02-03 10:18:04.000000000 +0100
4+++ git/src/image-format-conversions.h 2009-02-03 10:19:18.000000000 +0100
5@@ -30,6 +30,8 @@
6 /* Basic C implementation of YV12/I420 to UYVY conversion */
7 void uv12_to_uyvy(int w, int h, int y_pitch, int uv_pitch, uint8_t *y_p, uint8_t *u_p, uint8_t *v_p, uint8_t *dest);
8
9+/* NEON implementation of YV12/I420 to UYVY conversion */
10+void uv12_to_uyvy_neon(int w, int h, int y_pitch, int uv_pitch, uint8_t *y_p, uint8_t *u_p, uint8_t *v_p, uint8_t *dest);
11
12 #endif /* __IMAGE_FORMAT_CONVERSIONS_H__ */
13
14--- /tmp/image-format-conversions.c 2009-02-03 10:18:04.000000000 +0100
15+++ git/src/image-format-conversions.c 2009-02-03 10:16:47.000000000 +0100
16@@ -2,6 +2,7 @@
17 * Copyright 2008 Kalle Vahlman, <zuh@iki.fi>
18 * Ilpo Ruotsalainen, <lonewolf@iki.fi>
19 * Tuomas Kulve, <tuomas.kulve@movial.com>
20+ * Ian Rickards, <ian.rickards@arm.com>
21 *
22 *
23 * Permission to use, copy, modify, distribute and sell this software and its
24@@ -89,3 +90,104 @@
25 }
26 }
27
28+void uv12_to_uyvy_neon(int w, int h, int y_pitch, int uv_pitch, uint8_t *y_p, uint8_t *u_p, uint8_t *v_p, uint8_t *dest)
29+{
30+ int x, y;
31+ uint8_t *dest_even = dest;
32+ uint8_t *dest_odd = dest + w * 2;
33+ uint8_t *y_p_even = y_p;
34+ uint8_t *y_p_odd = y_p + y_pitch;
35+
36+ /*ErrorF("in uv12_to_uyvy, w: %d, pitch: %d\n", w, pitch);*/
37+ if (w<16)
38+ {
39+ for (y=0; y<h; y+=2)
40+ {
41+ for (x=0; x<w; x+=2)
42+ {
43+ /* Output two 2x1 macroblocks to form a 2x2 block from input */
44+ uint8_t u_val = *u_p++;
45+ uint8_t v_val = *v_p++;
46+
47+ /* Even row, first pixel */
48+ *dest_even++ = u_val;
49+ *dest_even++ = *y_p_even++;
50+
51+ /* Even row, second pixel */
52+ *dest_even++ = v_val;
53+ *dest_even++ = *y_p_even++;
54+
55+ /* Odd row, first pixel */
56+ *dest_odd++ = u_val;
57+ *dest_odd++ = *y_p_odd++;
58+
59+ /* Odd row, second pixel */
60+ *dest_odd++ = v_val;
61+ *dest_odd++ = *y_p_odd++;
62+ }
63+
64+ dest_even += w * 2;
65+ dest_odd += w * 2;
66+
67+ u_p += ((uv_pitch << 1) - w) >> 1;
68+ v_p += ((uv_pitch << 1) - w) >> 1;
69+
70+ y_p_even += (y_pitch - w) + y_pitch;
71+ y_p_odd += (y_pitch - w) + y_pitch;
72+ }
73+ }
74+ else
75+ {
76+ for (y=0; y<h; y+=2)
77+ {
78+ x=w;
79+ do {
80+ // avoid using d8-d15 (q4-q7) aapcs callee-save registers
81+ asm volatile (
82+ "1:\n\t"
83+ "vld1.u8 {d0}, [%[u_p]]!\n\t"
84+ "sub %[x],%[x],#16\n\t"
85+ "cmp %[x],#16\n\t"
86+ "vld1.u8 {d1}, [%[v_p]]!\n\t"
87+ "vld1.u8 {q1}, [%[y_p_even]]!\n\t"
88+ "vzip.u8 d0, d1\n\t"
89+ "vld1.u8 {q2}, [%[y_p_odd]]!\n\t"
90+ // use 2-element struct stores to zip up y with y&v
91+ "vst2.u8 {q0,q1}, [%[dest_even]]!\n\t"
92+ "vmov.u8 q1, q2\n\t"
93+ "vst2.u8 {q0,q1}, [%[dest_odd]]!\n\t"
94+ "bhs 1b\n\t"
95+ : [u_p] "+r" (u_p), [v_p] "+r" (v_p), [y_p_even] "+r" (y_p_even), [y_p_odd] "+r" (y_p_odd),
96+ [dest_even] "+r" (dest_even), [dest_odd] "+r" (dest_odd),
97+ [x] "+r" (x)
98+ :
99+ : "cc", "memory", "d0","d1","d2","d3","d4","d5"
100+ );
101+ if (x!=0)
102+ {
103+ // overlap final 16-pixel block to process requested width exactly
104+ x = 16-x;
105+ u_p -= x/2;
106+ v_p -= x/2;
107+ y_p_even -= x;
108+ y_p_odd -= x;
109+ dest_even -= x*2;
110+ dest_odd -= x*2;
111+ x = 16;
112+ // do another 16-pixel block
113+ }
114+ }
115+ while (x!=0);
116+
117+ dest_even += w * 2;
118+ dest_odd += w * 2;
119+
120+ u_p += ((uv_pitch << 1) - w) >> 1;
121+ v_p += ((uv_pitch << 1) - w) >> 1;
122+
123+ y_p_even += (y_pitch - w) + y_pitch;
124+ y_p_odd += (y_pitch - w) + y_pitch;
125+ }
126+ }
127+}
128+
129--- /tmp/omapfb-xv-generic.c 2009-02-03 10:52:18.000000000 +0100
130+++ git/src/omapfb-xv-generic.c 2009-02-03 10:52:24.000000000 +0100
131@@ -240,7 +240,7 @@
132 uint8_t *yb = buf;
133 uint8_t *ub = yb + (src_y_pitch * src_h);
134 uint8_t *vb = ub + (src_uv_pitch * (src_h / 2));
135- uv12_to_uyvy(src_w & ~15,
136+ uv12_to_uyvy_neon(src_w & ~15,
137 src_h & ~15,
138 src_y_pitch,
139 src_uv_pitch,
140@@ -256,7 +256,7 @@
141 uint8_t *yb = buf;
142 uint8_t *vb = yb + (src_y_pitch * src_h);
143 uint8_t *ub = vb + (src_uv_pitch * (src_h / 2));
144- uv12_to_uyvy(src_w & ~15,
145+ uv12_to_uyvy_neon(src_w & ~15,
146 src_h & ~15,
147 src_y_pitch,
148 src_uv_pitch,