summaryrefslogtreecommitdiffstats
path: root/meta-oe/recipes-graphics/xorg-lib/pixman-0.23.6/0007-ARM-NEON-Standard-fast-path-src_n_8_8.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-oe/recipes-graphics/xorg-lib/pixman-0.23.6/0007-ARM-NEON-Standard-fast-path-src_n_8_8.patch')
-rw-r--r--meta-oe/recipes-graphics/xorg-lib/pixman-0.23.6/0007-ARM-NEON-Standard-fast-path-src_n_8_8.patch118
1 files changed, 0 insertions, 118 deletions
diff --git a/meta-oe/recipes-graphics/xorg-lib/pixman-0.23.6/0007-ARM-NEON-Standard-fast-path-src_n_8_8.patch b/meta-oe/recipes-graphics/xorg-lib/pixman-0.23.6/0007-ARM-NEON-Standard-fast-path-src_n_8_8.patch
deleted file mode 100644
index 7c67001cc..000000000
--- a/meta-oe/recipes-graphics/xorg-lib/pixman-0.23.6/0007-ARM-NEON-Standard-fast-path-src_n_8_8.patch
+++ /dev/null
@@ -1,118 +0,0 @@
1From 34ce640914e06f2e23a0a93a3a49ec0bfff7497b Mon Sep 17 00:00:00 2001
2From: Taekyun Kim <tkq.kim@samsung.com>
3Date: Mon, 26 Sep 2011 18:33:27 +0900
4Subject: [PATCH 7/8] ARM: NEON: Standard fast path src_n_8_8
5
6Performance numbers of before/after on cortex-a8 @ 1GHz
7
8- before
9L1: 28.05 L2: 28.26 M: 26.97 ( 4.48%) HT: 19.79 VT: 19.14 R: 17.61 RT: 9.88 ( 101Kops/s)
10
11- after
12L1:1430.28 L2:1252.10 M:421.93 ( 75.48%) HT:170.16 VT:138.03 R:145.86 RT: 35.51 ( 255Kops/s)
13---
14 pixman/pixman-arm-neon-asm.S | 66 ++++++++++++++++++++++++++++++++++++++++++
15 pixman/pixman-arm-neon.c | 3 ++
16 2 files changed, 69 insertions(+), 0 deletions(-)
17
18diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
19index 1db02db..da8f054 100644
20--- a/pixman/pixman-arm-neon-asm.S
21+++ b/pixman/pixman-arm-neon-asm.S
22@@ -1292,6 +1292,72 @@ generate_composite_function \
23
24 /******************************************************************************/
25
26+.macro pixman_composite_src_n_8_8_process_pixblock_head
27+ vmull.u8 q0, d24, d16
28+ vmull.u8 q1, d25, d16
29+ vmull.u8 q2, d26, d16
30+ vmull.u8 q3, d27, d16
31+ vrsra.u16 q0, q0, #8
32+ vrsra.u16 q1, q1, #8
33+ vrsra.u16 q2, q2, #8
34+ vrsra.u16 q3, q3, #8
35+.endm
36+
37+.macro pixman_composite_src_n_8_8_process_pixblock_tail
38+ vrshrn.u16 d28, q0, #8
39+ vrshrn.u16 d29, q1, #8
40+ vrshrn.u16 d30, q2, #8
41+ vrshrn.u16 d31, q3, #8
42+.endm
43+
44+.macro pixman_composite_src_n_8_8_process_pixblock_tail_head
45+ fetch_mask_pixblock
46+ PF add PF_X, PF_X, #8
47+ vrshrn.u16 d28, q0, #8
48+ PF tst PF_CTL, #0x0F
49+ vrshrn.u16 d29, q1, #8
50+ PF addne PF_X, PF_X, #8
51+ vrshrn.u16 d30, q2, #8
52+ PF subne PF_CTL, PF_CTL, #1
53+ vrshrn.u16 d31, q3, #8
54+ PF cmp PF_X, ORIG_W
55+ vmull.u8 q0, d24, d16
56+ PF pld, [PF_MASK, PF_X, lsl #mask_bpp_shift]
57+ vmull.u8 q1, d25, d16
58+ PF subge PF_X, PF_X, ORIG_W
59+ vmull.u8 q2, d26, d16
60+ PF subges PF_CTL, PF_CTL, #0x10
61+ vmull.u8 q3, d27, d16
62+ PF ldrgeb DUMMY, [PF_MASK, MASK_STRIDE, lsl #mask_bpp_shift]!
63+ vst1.8 {d28, d29, d30, d31}, [DST_W, :128]!
64+ vrsra.u16 q0, q0, #8
65+ vrsra.u16 q1, q1, #8
66+ vrsra.u16 q2, q2, #8
67+ vrsra.u16 q3, q3, #8
68+.endm
69+
70+.macro pixman_composite_src_n_8_8_init
71+ add DUMMY, sp, #ARGS_STACK_OFFSET
72+ vld1.32 {d16[0]}, [DUMMY]
73+ vdup.8 d16, d16[3]
74+.endm
75+
76+.macro pixman_composite_src_n_8_8_cleanup
77+.endm
78+
79+generate_composite_function \
80+ pixman_composite_src_n_8_8_asm_neon, 0, 8, 8, \
81+ FLAG_DST_WRITEONLY, \
82+ 32, /* number of pixels, processed in a single block */ \
83+ 5, /* prefetch distance */ \
84+ pixman_composite_src_n_8_8_init, \
85+ pixman_composite_src_n_8_8_cleanup, \
86+ pixman_composite_src_n_8_8_process_pixblock_head, \
87+ pixman_composite_src_n_8_8_process_pixblock_tail, \
88+ pixman_composite_src_n_8_8_process_pixblock_tail_head
89+
90+/******************************************************************************/
91+
92 .macro pixman_composite_over_n_8_8888_process_pixblock_head
93 /* expecting deinterleaved source data in {d8, d9, d10, d11} */
94 /* d8 - blue, d9 - green, d10 - red, d11 - alpha */
95diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
96index 3db9adf..ca139de 100644
97--- a/pixman/pixman-arm-neon.c
98+++ b/pixman/pixman-arm-neon.c
99@@ -92,6 +92,8 @@ PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, add_n_8_8888,
100 uint8_t, 1, uint32_t, 1)
101 PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (0, neon, src_n_8_8888,
102 uint8_t, 1, uint32_t, 1)
103+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (0, neon, src_n_8_8,
104+ uint8_t, 1, uint8_t, 1)
105
106 PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_8888_n_8888,
107 uint32_t, 1, uint32_t, 1)
108@@ -295,6 +297,7 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
109 PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8r8g8b8, neon_composite_src_n_8_8888),
110 PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8b8g8r8, neon_composite_src_n_8_8888),
111 PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8b8g8r8, neon_composite_src_n_8_8888),
112+ PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8, neon_composite_src_n_8_8),
113
114 PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8, neon_composite_over_n_8_8),
115 PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, neon_composite_over_n_8_0565),
116--
1171.6.6.1
118