summaryrefslogtreecommitdiffstats
path: root/meta-oe/recipes-graphics/xorg-lib/pixman-0.23.6/0004-ARM-NEON-Bilinear-macro-template-for-instruction-sch.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-oe/recipes-graphics/xorg-lib/pixman-0.23.6/0004-ARM-NEON-Bilinear-macro-template-for-instruction-sch.patch')
-rw-r--r--meta-oe/recipes-graphics/xorg-lib/pixman-0.23.6/0004-ARM-NEON-Bilinear-macro-template-for-instruction-sch.patch235
1 files changed, 235 insertions, 0 deletions
diff --git a/meta-oe/recipes-graphics/xorg-lib/pixman-0.23.6/0004-ARM-NEON-Bilinear-macro-template-for-instruction-sch.patch b/meta-oe/recipes-graphics/xorg-lib/pixman-0.23.6/0004-ARM-NEON-Bilinear-macro-template-for-instruction-sch.patch
new file mode 100644
index 0000000000..dc8a69f749
--- /dev/null
+++ b/meta-oe/recipes-graphics/xorg-lib/pixman-0.23.6/0004-ARM-NEON-Bilinear-macro-template-for-instruction-sch.patch
@@ -0,0 +1,235 @@
1From 524d1cc7acb753167fffdd08d8c10bf71e0634ba Mon Sep 17 00:00:00 2001
2From: Taekyun Kim <tkq.kim@samsung.com>
3Date: Tue, 20 Sep 2011 21:32:35 +0900
4Subject: [PATCH 4/8] ARM: NEON: Bilinear macro template for instruction scheduling
5
6This macro template takes 6 code blocks.
7
81. process_last_pixel
92. process_two_pixels
103. process_four_pixels
114. process_pixblock_head
125. process_pixblock_tail
136. process_pixblock_tail_head
14
15process_last_pixel does not need to update horizontal weight. This
16is done by the template. two and four code block should update
17horizontal weight inside of them. head/tail/tail_head blocks
18consist unrolled core loop. You can apply instruction scheduling
19to the tail_head blocks.
20
21You can also specify size of the pixel block. Supported size is 4
22and 8. If you want to use mask, give BILINEAR_FLAG_USE_MASK flags
23to the template, then you can use register MASK. When using d8~d15
24registers, give BILINEAR_FLAG_USE_ALL_NEON_REGS to make sure
25registers are properly saved on the stack and later restored.
26---
27 pixman/pixman-arm-neon-asm-bilinear.S | 195 +++++++++++++++++++++++++++++++++
28 1 files changed, 195 insertions(+), 0 deletions(-)
29
30diff --git a/pixman/pixman-arm-neon-asm-bilinear.S b/pixman/pixman-arm-neon-asm-bilinear.S
31index c5ba929..784e5df 100644
32--- a/pixman/pixman-arm-neon-asm-bilinear.S
33+++ b/pixman/pixman-arm-neon-asm-bilinear.S
34@@ -773,3 +773,198 @@ generate_bilinear_scanline_func_src_a8_dst \
35 generate_bilinear_scanline_func_src_a8_dst \
36 pixman_scaled_bilinear_scanline_8888_8_8888_ADD_asm_neon, \
37 8888, 8888, add, 2, 28
38+
39+.set BILINEAR_FLAG_USE_MASK, 1
40+.set BILINEAR_FLAG_USE_ALL_NEON_REGS, 2
41+
42+/*
43+ * Main template macro for generating NEON optimized bilinear scanline functions.
44+ *
45+ * Bilinear scanline generator macro take folling arguments:
46+ * fname - name of the function to generate
47+ * src_fmt - source color format (8888 or 0565)
48+ * dst_fmt - destination color format (8888 or 0565)
49+ * src/dst_bpp_shift - (1 << bpp_shift) is the size of src/dst pixel in bytes
50+ * process_last_pixel - code block that interpolate one pixel and does not
51+ * update horizontal weight
52+ * process_two_pixels - code block that interpolate two pixels and update
53+ * horizontal weight
54+ * process_four_pixels - code block that interpolate four pixels and update
55+ * horizontal weight
56+ * process_pixblock_head - head part of middle loop
57+ * process_pixblock_tail - tail part of middle loop
58+ * process_pixblock_tail_head - tail_head of middle loop
59+ * pixblock_size - number of pixels processed in a single middle loop
60+ * prefetch_distance - prefetch in the source image by that many pixels ahead
61+ */
62+
63+.macro generate_bilinear_scanline_func \
64+ fname, \
65+ src_fmt, dst_fmt, src_bpp_shift, dst_bpp_shift, \
66+ bilinear_process_last_pixel, \
67+ bilinear_process_two_pixels, \
68+ bilinear_process_four_pixels, \
69+ bilinear_process_pixblock_head, \
70+ bilinear_process_pixblock_tail, \
71+ bilinear_process_pixblock_tail_head, \
72+ pixblock_size, \
73+ prefetch_distance, \
74+ flags
75+
76+pixman_asm_function fname
77+.if pixblock_size == 8
78+.elseif pixblock_size == 4
79+.else
80+ .error unsupported pixblock size
81+.endif
82+
83+.if ((flags) & BILINEAR_FLAG_USE_MASK) == 0
84+ OUT .req r0
85+ TOP .req r1
86+ BOTTOM .req r2
87+ WT .req r3
88+ WB .req r4
89+ X .req r5
90+ UX .req r6
91+ WIDTH .req ip
92+ TMP1 .req r3
93+ TMP2 .req r4
94+ PF_OFFS .req r7
95+ TMP3 .req r8
96+ TMP4 .req r9
97+ STRIDE .req r2
98+
99+ mov ip, sp
100+ push {r4, r5, r6, r7, r8, r9}
101+ mov PF_OFFS, #prefetch_distance
102+ ldmia ip, {WB, X, UX, WIDTH}
103+.else
104+ OUT .req r0
105+ MASK .req r1
106+ TOP .req r2
107+ BOTTOM .req r3
108+ WT .req r4
109+ WB .req r5
110+ X .req r6
111+ UX .req r7
112+ WIDTH .req ip
113+ TMP1 .req r4
114+ TMP2 .req r5
115+ PF_OFFS .req r8
116+ TMP3 .req r9
117+ TMP4 .req r10
118+ STRIDE .req r3
119+
120+ mov ip, sp
121+ push {r4, r5, r6, r7, r8, r9, r10, ip}
122+ mov PF_OFFS, #prefetch_distance
123+ ldmia ip, {WT, WB, X, UX, WIDTH}
124+.endif
125+
126+ mul PF_OFFS, PF_OFFS, UX
127+
128+.if ((flags) & BILINEAR_FLAG_USE_ALL_NEON_REGS) != 0
129+ vpush {d8-d15}
130+.endif
131+
132+ sub STRIDE, BOTTOM, TOP
133+ .unreq BOTTOM
134+
135+ cmp WIDTH, #0
136+ ble 3f
137+
138+ vdup.u16 q12, X
139+ vdup.u16 q13, UX
140+ vdup.u8 d28, WT
141+ vdup.u8 d29, WB
142+ vadd.u16 d25, d25, d26
143+
144+ /* ensure good destination alignment */
145+ cmp WIDTH, #1
146+ blt 0f
147+ tst OUT, #(1 << dst_bpp_shift)
148+ beq 0f
149+ vshr.u16 q15, q12, #8
150+ vadd.u16 q12, q12, q13
151+ bilinear_process_last_pixel
152+ sub WIDTH, WIDTH, #1
153+0:
154+ vadd.u16 q13, q13, q13
155+ vshr.u16 q15, q12, #8
156+ vadd.u16 q12, q12, q13
157+
158+ cmp WIDTH, #2
159+ blt 0f
160+ tst OUT, #(1 << (dst_bpp_shift + 1))
161+ beq 0f
162+ bilinear_process_two_pixels
163+ sub WIDTH, WIDTH, #2
164+0:
165+.if pixblock_size == 8
166+ cmp WIDTH, #4
167+ blt 0f
168+ tst OUT, #(1 << (dst_bpp_shift + 2))
169+ beq 0f
170+ bilinear_process_four_pixels
171+ sub WIDTH, WIDTH, #4
172+0:
173+.endif
174+ subs WIDTH, WIDTH, #pixblock_size
175+ blt 1f
176+ mov PF_OFFS, PF_OFFS, asr #(16 - src_bpp_shift)
177+ bilinear_process_pixblock_head
178+ subs WIDTH, WIDTH, #pixblock_size
179+ blt 5f
180+0:
181+ bilinear_process_pixblock_tail_head
182+ subs WIDTH, WIDTH, #pixblock_size
183+ bge 0b
184+5:
185+ bilinear_process_pixblock_tail
186+1:
187+.if pixblock_size == 8
188+ tst WIDTH, #4
189+ beq 2f
190+ bilinear_process_four_pixels
191+2:
192+.endif
193+ /* handle the remaining trailing pixels */
194+ tst WIDTH, #2
195+ beq 2f
196+ bilinear_process_two_pixels
197+2:
198+ tst WIDTH, #1
199+ beq 3f
200+ bilinear_process_last_pixel
201+3:
202+.if ((flags) & BILINEAR_FLAG_USE_ALL_NEON_REGS) != 0
203+ vpop {d8-d15}
204+.endif
205+
206+.if ((flags) & BILINEAR_FLAG_USE_MASK) == 0
207+ pop {r4, r5, r6, r7, r8, r9}
208+.else
209+ pop {r4, r5, r6, r7, r8, r9, r10, ip}
210+.endif
211+ bx lr
212+
213+ .unreq OUT
214+ .unreq TOP
215+ .unreq WT
216+ .unreq WB
217+ .unreq X
218+ .unreq UX
219+ .unreq WIDTH
220+ .unreq TMP1
221+ .unreq TMP2
222+ .unreq PF_OFFS
223+ .unreq TMP3
224+ .unreq TMP4
225+ .unreq STRIDE
226+.if ((flags) & BILINEAR_FLAG_USE_MASK) != 0
227+ .unreq MASK
228+.endif
229+
230+.endfunc
231+
232+.endm
233--
2341.6.6.1
235