diff options
Diffstat (limited to 'meta-oe/recipes-graphics/xorg-lib/pixman-0.23.6/0007-ARM-NEON-Standard-fast-path-src_n_8_8.patch')
-rw-r--r-- | meta-oe/recipes-graphics/xorg-lib/pixman-0.23.6/0007-ARM-NEON-Standard-fast-path-src_n_8_8.patch | 118 |
1 files changed, 0 insertions, 118 deletions
diff --git a/meta-oe/recipes-graphics/xorg-lib/pixman-0.23.6/0007-ARM-NEON-Standard-fast-path-src_n_8_8.patch b/meta-oe/recipes-graphics/xorg-lib/pixman-0.23.6/0007-ARM-NEON-Standard-fast-path-src_n_8_8.patch deleted file mode 100644 index 7c67001cc..000000000 --- a/meta-oe/recipes-graphics/xorg-lib/pixman-0.23.6/0007-ARM-NEON-Standard-fast-path-src_n_8_8.patch +++ /dev/null | |||
@@ -1,118 +0,0 @@ | |||
1 | From 34ce640914e06f2e23a0a93a3a49ec0bfff7497b Mon Sep 17 00:00:00 2001 | ||
2 | From: Taekyun Kim <tkq.kim@samsung.com> | ||
3 | Date: Mon, 26 Sep 2011 18:33:27 +0900 | ||
4 | Subject: [PATCH 7/8] ARM: NEON: Standard fast path src_n_8_8 | ||
5 | |||
6 | Performance numbers of before/after on cortex-a8 @ 1GHz | ||
7 | |||
8 | - before | ||
9 | L1: 28.05 L2: 28.26 M: 26.97 ( 4.48%) HT: 19.79 VT: 19.14 R: 17.61 RT: 9.88 ( 101Kops/s) | ||
10 | |||
11 | - after | ||
12 | L1:1430.28 L2:1252.10 M:421.93 ( 75.48%) HT:170.16 VT:138.03 R:145.86 RT: 35.51 ( 255Kops/s) | ||
13 | --- | ||
14 | pixman/pixman-arm-neon-asm.S | 66 ++++++++++++++++++++++++++++++++++++++++++ | ||
15 | pixman/pixman-arm-neon.c | 3 ++ | ||
16 | 2 files changed, 69 insertions(+), 0 deletions(-) | ||
17 | |||
18 | diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S | ||
19 | index 1db02db..da8f054 100644 | ||
20 | --- a/pixman/pixman-arm-neon-asm.S | ||
21 | +++ b/pixman/pixman-arm-neon-asm.S | ||
22 | @@ -1292,6 +1292,72 @@ generate_composite_function \ | ||
23 | |||
24 | /******************************************************************************/ | ||
25 | |||
26 | +.macro pixman_composite_src_n_8_8_process_pixblock_head | ||
27 | + vmull.u8 q0, d24, d16 | ||
28 | + vmull.u8 q1, d25, d16 | ||
29 | + vmull.u8 q2, d26, d16 | ||
30 | + vmull.u8 q3, d27, d16 | ||
31 | + vrsra.u16 q0, q0, #8 | ||
32 | + vrsra.u16 q1, q1, #8 | ||
33 | + vrsra.u16 q2, q2, #8 | ||
34 | + vrsra.u16 q3, q3, #8 | ||
35 | +.endm | ||
36 | + | ||
37 | +.macro pixman_composite_src_n_8_8_process_pixblock_tail | ||
38 | + vrshrn.u16 d28, q0, #8 | ||
39 | + vrshrn.u16 d29, q1, #8 | ||
40 | + vrshrn.u16 d30, q2, #8 | ||
41 | + vrshrn.u16 d31, q3, #8 | ||
42 | +.endm | ||
43 | + | ||
44 | +.macro pixman_composite_src_n_8_8_process_pixblock_tail_head | ||
45 | + fetch_mask_pixblock | ||
46 | + PF add PF_X, PF_X, #8 | ||
47 | + vrshrn.u16 d28, q0, #8 | ||
48 | + PF tst PF_CTL, #0x0F | ||
49 | + vrshrn.u16 d29, q1, #8 | ||
50 | + PF addne PF_X, PF_X, #8 | ||
51 | + vrshrn.u16 d30, q2, #8 | ||
52 | + PF subne PF_CTL, PF_CTL, #1 | ||
53 | + vrshrn.u16 d31, q3, #8 | ||
54 | + PF cmp PF_X, ORIG_W | ||
55 | + vmull.u8 q0, d24, d16 | ||
56 | + PF pld, [PF_MASK, PF_X, lsl #mask_bpp_shift] | ||
57 | + vmull.u8 q1, d25, d16 | ||
58 | + PF subge PF_X, PF_X, ORIG_W | ||
59 | + vmull.u8 q2, d26, d16 | ||
60 | + PF subges PF_CTL, PF_CTL, #0x10 | ||
61 | + vmull.u8 q3, d27, d16 | ||
62 | + PF ldrgeb DUMMY, [PF_MASK, MASK_STRIDE, lsl #mask_bpp_shift]! | ||
63 | + vst1.8 {d28, d29, d30, d31}, [DST_W, :128]! | ||
64 | + vrsra.u16 q0, q0, #8 | ||
65 | + vrsra.u16 q1, q1, #8 | ||
66 | + vrsra.u16 q2, q2, #8 | ||
67 | + vrsra.u16 q3, q3, #8 | ||
68 | +.endm | ||
69 | + | ||
70 | +.macro pixman_composite_src_n_8_8_init | ||
71 | + add DUMMY, sp, #ARGS_STACK_OFFSET | ||
72 | + vld1.32 {d16[0]}, [DUMMY] | ||
73 | + vdup.8 d16, d16[3] | ||
74 | +.endm | ||
75 | + | ||
76 | +.macro pixman_composite_src_n_8_8_cleanup | ||
77 | +.endm | ||
78 | + | ||
79 | +generate_composite_function \ | ||
80 | + pixman_composite_src_n_8_8_asm_neon, 0, 8, 8, \ | ||
81 | + FLAG_DST_WRITEONLY, \ | ||
82 | + 32, /* number of pixels, processed in a single block */ \ | ||
83 | + 5, /* prefetch distance */ \ | ||
84 | + pixman_composite_src_n_8_8_init, \ | ||
85 | + pixman_composite_src_n_8_8_cleanup, \ | ||
86 | + pixman_composite_src_n_8_8_process_pixblock_head, \ | ||
87 | + pixman_composite_src_n_8_8_process_pixblock_tail, \ | ||
88 | + pixman_composite_src_n_8_8_process_pixblock_tail_head | ||
89 | + | ||
90 | +/******************************************************************************/ | ||
91 | + | ||
92 | .macro pixman_composite_over_n_8_8888_process_pixblock_head | ||
93 | /* expecting deinterleaved source data in {d8, d9, d10, d11} */ | ||
94 | /* d8 - blue, d9 - green, d10 - red, d11 - alpha */ | ||
95 | diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c | ||
96 | index 3db9adf..ca139de 100644 | ||
97 | --- a/pixman/pixman-arm-neon.c | ||
98 | +++ b/pixman/pixman-arm-neon.c | ||
99 | @@ -92,6 +92,8 @@ PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, add_n_8_8888, | ||
100 | uint8_t, 1, uint32_t, 1) | ||
101 | PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (0, neon, src_n_8_8888, | ||
102 | uint8_t, 1, uint32_t, 1) | ||
103 | +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (0, neon, src_n_8_8, | ||
104 | + uint8_t, 1, uint8_t, 1) | ||
105 | |||
106 | PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_8888_n_8888, | ||
107 | uint32_t, 1, uint32_t, 1) | ||
108 | @@ -295,6 +297,7 @@ static const pixman_fast_path_t arm_neon_fast_paths[] = | ||
109 | PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8r8g8b8, neon_composite_src_n_8_8888), | ||
110 | PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8b8g8r8, neon_composite_src_n_8_8888), | ||
111 | PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8b8g8r8, neon_composite_src_n_8_8888), | ||
112 | + PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8, neon_composite_src_n_8_8), | ||
113 | |||
114 | PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8, neon_composite_over_n_8_8), | ||
115 | PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, neon_composite_over_n_8_0565), | ||
116 | -- | ||
117 | 1.6.6.1 | ||
118 | |||