diff options
Diffstat (limited to 'meta-multimedia/recipes-multimedia/fluidsynth/files/0002-Use-ARM-NEON-accelaration-for-float-multithreaded-se.patch')
-rw-r--r-- | meta-multimedia/recipes-multimedia/fluidsynth/files/0002-Use-ARM-NEON-accelaration-for-float-multithreaded-se.patch | 76 |
1 files changed, 76 insertions, 0 deletions
diff --git a/meta-multimedia/recipes-multimedia/fluidsynth/files/0002-Use-ARM-NEON-accelaration-for-float-multithreaded-se.patch b/meta-multimedia/recipes-multimedia/fluidsynth/files/0002-Use-ARM-NEON-accelaration-for-float-multithreaded-se.patch new file mode 100644 index 000000000..0e1846e31 --- /dev/null +++ b/meta-multimedia/recipes-multimedia/fluidsynth/files/0002-Use-ARM-NEON-accelaration-for-float-multithreaded-se.patch | |||
@@ -0,0 +1,76 @@ | |||
1 | From 2de7e128fbdf528716b500cf27ed9a4358c931c9 Mon Sep 17 00:00:00 2001 | ||
2 | From: =?UTF-8?q?Andreas=20M=C3=BCller?= <schnitzeltony@gmail.com> | ||
3 | Date: Fri, 24 Nov 2017 00:05:35 +0100 | ||
4 | Subject: [PATCH 2/2] Use ARM-NEON accelaration for float-multithreaded setups | ||
5 | MIME-Version: 1.0 | ||
6 | Content-Type: text/plain; charset=UTF-8 | ||
7 | Content-Transfer-Encoding: 8bit | ||
8 | |||
9 | Upstream-Status: Pending | ||
10 | |||
11 | Signed-off-by: Andreas Müller <schnitzeltony@gmail.com> | ||
12 | --- | ||
13 | src/rvoice/fluid_rvoice_mixer.c | 26 ++++++++++++++++++++++++++ | ||
14 | 1 file changed, 26 insertions(+) | ||
15 | |||
16 | diff --git a/src/rvoice/fluid_rvoice_mixer.c b/src/rvoice/fluid_rvoice_mixer.c | ||
17 | index 9616518..dbf8057 100644 | ||
18 | --- a/src/rvoice/fluid_rvoice_mixer.c | ||
19 | +++ b/src/rvoice/fluid_rvoice_mixer.c | ||
20 | @@ -27,6 +27,10 @@ | ||
21 | #include "fluid_ladspa.h" | ||
22 | #include "fluid_synth.h" | ||
23 | |||
24 | +#if defined(__ARM_NEON__) | ||
25 | +#include "arm_neon.h" | ||
26 | +#endif | ||
27 | + | ||
28 | |||
29 | #define ENABLE_MIXER_THREADS 1 | ||
30 | |||
31 | @@ -794,20 +798,42 @@ fluid_mixer_buffers_mix(fluid_mixer_buffers_t* dest, fluid_mixer_buffers_t* src) | ||
32 | if (minbuf > src->buf_count) | ||
33 | minbuf = src->buf_count; | ||
34 | for (i=0; i < minbuf; i++) { | ||
35 | +#if defined(__ARM_NEON__) && defined(WITH_FLOAT) | ||
36 | + for (j=0; j < scount; j+=4) { | ||
37 | + float32x4_t vleft = vld1q_f32(&dest->left_buf[i][j]); | ||
38 | + float32x4_t vright = vld1q_f32(&dest->right_buf[i][j]); | ||
39 | + vleft = vaddq_f32(vleft, vld1q_f32(&src->left_buf[i][j])); | ||
40 | + vright = vaddq_f32(vright, vld1q_f32(&src->right_buf[i][j])); | ||
41 | + vst1q_f32(&dest->left_buf[i][j], vleft); | ||
42 | + vst1q_f32(&dest->right_buf[i][j], vright); | ||
43 | + } | ||
44 | +#else | ||
45 | for (j=0; j < scount; j++) { | ||
46 | dest->left_buf[i][j] += src->left_buf[i][j]; | ||
47 | dest->right_buf[i][j] += src->right_buf[i][j]; | ||
48 | } | ||
49 | +#endif | ||
50 | } | ||
51 | |||
52 | minbuf = dest->fx_buf_count; | ||
53 | if (minbuf > src->fx_buf_count) | ||
54 | minbuf = src->fx_buf_count; | ||
55 | for (i=0; i < minbuf; i++) { | ||
56 | +#if defined(__ARM_NEON__) && defined(WITH_FLOAT) | ||
57 | + for (j=0; j < scount; j+=4) { | ||
58 | + float32x4_t vleft = vld1q_f32(&dest->fx_left_buf[i][j]); | ||
59 | + float32x4_t vright = vld1q_f32(&dest->fx_right_buf[i][j]); | ||
60 | + vleft = vaddq_f32(vleft, vld1q_f32(&src->fx_left_buf[i][j])); | ||
61 | + vright = vaddq_f32(vright, vld1q_f32(&src->fx_right_buf[i][j])); | ||
62 | + vst1q_f32(&dest->fx_left_buf[i][j], vleft); | ||
63 | + vst1q_f32(&dest->fx_right_buf[i][j], vright); | ||
64 | + } | ||
65 | +#else | ||
66 | for (j=0; j < scount; j++) { | ||
67 | dest->fx_left_buf[i][j] += src->fx_left_buf[i][j]; | ||
68 | dest->fx_right_buf[i][j] += src->fx_right_buf[i][j]; | ||
69 | } | ||
70 | +#endif | ||
71 | } | ||
72 | } | ||
73 | |||
74 | -- | ||
75 | 2.9.5 | ||
76 | |||