diff options
Diffstat (limited to 'meta-oe/recipes-multimedia/jack/jack/0002-jack_simdtests-add-application-checking-accurracy-an.patch')
-rw-r--r-- | meta-oe/recipes-multimedia/jack/jack/0002-jack_simdtests-add-application-checking-accurracy-an.patch | 433 |
1 files changed, 0 insertions, 433 deletions
diff --git a/meta-oe/recipes-multimedia/jack/jack/0002-jack_simdtests-add-application-checking-accurracy-an.patch b/meta-oe/recipes-multimedia/jack/jack/0002-jack_simdtests-add-application-checking-accurracy-an.patch deleted file mode 100644 index e0c9e8ca8..000000000 --- a/meta-oe/recipes-multimedia/jack/jack/0002-jack_simdtests-add-application-checking-accurracy-an.patch +++ /dev/null | |||
@@ -1,433 +0,0 @@ | |||
1 | From d0543c0628d2c0a6d898c694003e941fa189b393 Mon Sep 17 00:00:00 2001 | ||
2 | From: =?UTF-8?q?Andreas=20M=C3=BCller?= <schnitzeltony@googlemail.com> | ||
3 | Date: Sun, 15 Jan 2017 20:52:20 +0100 | ||
4 | Subject: [PATCH 2/2] jack_simdtests: add application checking accurracy and | ||
5 | performance of SIMD optimizations | ||
6 | MIME-Version: 1.0 | ||
7 | Content-Type: text/plain; charset=UTF-8 | ||
8 | Content-Transfer-Encoding: 8bit | ||
9 | |||
10 | Upstream-Status: Submitted [1] | ||
11 | |||
12 | [1] https://github.com/jackaudio/jack2/pull/250 | ||
13 | |||
14 | Signed-off-by: Andreas Müller <schnitzeltony@googlemail.com> | ||
15 | --- | ||
16 | example-clients/simdtests.cpp | 390 ++++++++++++++++++++++++++++++++++++++++++ | ||
17 | example-clients/wscript | 3 +- | ||
18 | 2 files changed, 392 insertions(+), 1 deletion(-) | ||
19 | create mode 100644 example-clients/simdtests.cpp | ||
20 | |||
21 | diff --git a/example-clients/simdtests.cpp b/example-clients/simdtests.cpp | ||
22 | new file mode 100644 | ||
23 | index 0000000..b74d50a | ||
24 | --- /dev/null | ||
25 | +++ b/example-clients/simdtests.cpp | ||
26 | @@ -0,0 +1,390 @@ | ||
27 | +/* | ||
28 | + * simdtests.c -- test accuraccy and performance of simd optimizations | ||
29 | + * | ||
30 | + * Copyright (C) 2017 Andreas Mueller. | ||
31 | + * | ||
32 | + * This program is free software; you can redistribute it and/or modify | ||
33 | + * it under the terms of the GNU General Public License as published by | ||
34 | + * the Free Software Foundation; either version 2 of the License, or | ||
35 | + * (at your option) any later version. | ||
36 | + * | ||
37 | + * This program is distributed in the hope that it will be useful, | ||
38 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
39 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
40 | + * GNU General Public License for more details. | ||
41 | + * | ||
42 | + * You should have received a copy of the GNU General Public License | ||
43 | + * along with this program; if not, write to the Free Software | ||
44 | + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
45 | + */ | ||
46 | + | ||
47 | +/* We must include all headers memops.c includes to avoid trouble with | ||
48 | + * out namespace game below. | ||
49 | + */ | ||
50 | +#include <stdio.h> | ||
51 | +#include <string.h> | ||
52 | +#include <math.h> | ||
53 | +#include <memory.h> | ||
54 | +#include <stdlib.h> | ||
55 | +#include <stdint.h> | ||
56 | +#include <limits.h> | ||
57 | +#ifdef __linux__ | ||
58 | +#include <endian.h> | ||
59 | +#endif | ||
60 | +#include "memops.h" | ||
61 | + | ||
62 | +#if defined (__SSE2__) && !defined (__sun__) | ||
63 | +#include <emmintrin.h> | ||
64 | +#ifdef __SSE4_1__ | ||
65 | +#include <smmintrin.h> | ||
66 | +#endif | ||
67 | +#endif | ||
68 | + | ||
69 | +#ifdef __ARM_NEON__ | ||
70 | +#include <arm_neon.h> | ||
71 | +#endif | ||
72 | + | ||
73 | +// our additional headers | ||
74 | +#include <time.h> | ||
75 | + | ||
76 | +/* Dirty: include mempos.c twice the second time with SIMD disabled | ||
77 | + * so we can compare aceelerated non accelerated | ||
78 | + */ | ||
79 | +namespace accelerated { | ||
80 | +#include "../common/memops.c" | ||
81 | +} | ||
82 | + | ||
83 | +namespace origerated { | ||
84 | +#ifdef __SSE2__ | ||
85 | +#undef __SSE2__ | ||
86 | +#endif | ||
87 | + | ||
88 | +#ifdef __ARM_NEON__ | ||
89 | +#undef __ARM_NEON__ | ||
90 | +#endif | ||
91 | + | ||
92 | +#include "../common/memops.c" | ||
93 | +} | ||
94 | + | ||
95 | +// define conversion function types | ||
96 | +typedef void (*t_jack_to_integer)( | ||
97 | + char *dst, | ||
98 | + jack_default_audio_sample_t *src, | ||
99 | + unsigned long nsamples, | ||
100 | + unsigned long dst_skip, | ||
101 | + dither_state_t *state); | ||
102 | + | ||
103 | +typedef void (*t_integer_to_jack)( | ||
104 | + jack_default_audio_sample_t *dst, | ||
105 | + char *src, | ||
106 | + unsigned long nsamples, | ||
107 | + unsigned long src_skip); | ||
108 | + | ||
109 | +// define/setup test case data | ||
110 | +typedef struct test_case_data { | ||
111 | + uint32_t frame_size; | ||
112 | + uint32_t sample_size; | ||
113 | + bool reverse; | ||
114 | + t_jack_to_integer jack_to_integer_accel; | ||
115 | + t_jack_to_integer jack_to_integer_orig; | ||
116 | + t_integer_to_jack integer_to_jack_accel; | ||
117 | + t_integer_to_jack integer_to_jack_orig; | ||
118 | + dither_state_t *ditherstate; | ||
119 | + const char *name; | ||
120 | +} test_case_data_t; | ||
121 | + | ||
122 | +test_case_data_t test_cases[] = { | ||
123 | + { | ||
124 | + 4, | ||
125 | + 3, | ||
126 | + true, | ||
127 | + accelerated::sample_move_d32u24_sSs, | ||
128 | + origerated::sample_move_d32u24_sSs, | ||
129 | + accelerated::sample_move_dS_s32u24s, | ||
130 | + origerated::sample_move_dS_s32u24s, | ||
131 | + NULL, | ||
132 | + "32u24s" }, | ||
133 | + { | ||
134 | + 4, | ||
135 | + 3, | ||
136 | + false, | ||
137 | + accelerated::sample_move_d32u24_sS, | ||
138 | + origerated::sample_move_d32u24_sS, | ||
139 | + accelerated::sample_move_dS_s32u24, | ||
140 | + origerated::sample_move_dS_s32u24, | ||
141 | + NULL, | ||
142 | + "32u24" }, | ||
143 | + { | ||
144 | + 3, | ||
145 | + 3, | ||
146 | + true, | ||
147 | + accelerated::sample_move_d24_sSs, | ||
148 | + origerated::sample_move_d24_sSs, | ||
149 | + accelerated::sample_move_dS_s24s, | ||
150 | + origerated::sample_move_dS_s24s, | ||
151 | + NULL, | ||
152 | + "24s" }, | ||
153 | + { | ||
154 | + 3, | ||
155 | + 3, | ||
156 | + false, | ||
157 | + accelerated::sample_move_d24_sS, | ||
158 | + origerated::sample_move_d24_sS, | ||
159 | + accelerated::sample_move_dS_s24, | ||
160 | + origerated::sample_move_dS_s24, | ||
161 | + NULL, | ||
162 | + "24" }, | ||
163 | + { | ||
164 | + 2, | ||
165 | + 2, | ||
166 | + true, | ||
167 | + accelerated::sample_move_d16_sSs, | ||
168 | + origerated::sample_move_d16_sSs, | ||
169 | + accelerated::sample_move_dS_s16s, | ||
170 | + origerated::sample_move_dS_s16s, | ||
171 | + NULL, | ||
172 | + "16s" }, | ||
173 | + { | ||
174 | + 2, | ||
175 | + 2, | ||
176 | + false, | ||
177 | + accelerated::sample_move_d16_sS, | ||
178 | + origerated::sample_move_d16_sS, | ||
179 | + accelerated::sample_move_dS_s16, | ||
180 | + origerated::sample_move_dS_s16, | ||
181 | + NULL, | ||
182 | + "16" }, | ||
183 | +}; | ||
184 | + | ||
185 | +// we need to repeat for better accuracy at time measurement | ||
186 | +const uint32_t retry_per_case = 1000; | ||
187 | + | ||
188 | +// setup test buffers | ||
189 | +#define TESTBUFF_SIZE 1024 | ||
190 | +jack_default_audio_sample_t jackbuffer_source[TESTBUFF_SIZE]; | ||
191 | +// integer buffers: max 4 bytes per value / * 2 for stereo | ||
192 | +char integerbuffer_accel[TESTBUFF_SIZE*4*2]; | ||
193 | +char integerbuffer_orig[TESTBUFF_SIZE*4*2]; | ||
194 | +// float buffers | ||
195 | +jack_default_audio_sample_t jackfloatbuffer_accel[TESTBUFF_SIZE]; | ||
196 | +jack_default_audio_sample_t jackfloatbuffer_orig[TESTBUFF_SIZE]; | ||
197 | + | ||
198 | +// comparing unsigned makes life easier | ||
199 | +uint32_t extract_integer( | ||
200 | + char* buff, | ||
201 | + uint32_t offset, | ||
202 | + uint32_t frame_size, | ||
203 | + uint32_t sample_size, | ||
204 | + bool big_endian) | ||
205 | +{ | ||
206 | + uint32_t retval = 0; | ||
207 | + unsigned char* curr; | ||
208 | + uint32_t mult = 1; | ||
209 | + if(big_endian) { | ||
210 | + curr = (unsigned char*)buff + offset + sample_size-1; | ||
211 | + for(uint32_t i=0; i<sample_size; i++) { | ||
212 | + retval += *(curr--) * mult; | ||
213 | + mult*=256; | ||
214 | + } | ||
215 | + } | ||
216 | + else { | ||
217 | + curr = (unsigned char*)buff + offset + frame_size-sample_size; | ||
218 | + for(uint32_t i=0; i<sample_size; i++) { | ||
219 | + retval += *(curr++) * mult; | ||
220 | + mult*=256; | ||
221 | + } | ||
222 | + } | ||
223 | + return retval; | ||
224 | +} | ||
225 | + | ||
226 | +int main(int argc, char *argv[]) | ||
227 | +{ | ||
228 | +// parse_arguments(argc, argv); | ||
229 | + uint32_t maxerr_displayed = 10; | ||
230 | + | ||
231 | + // fill jackbuffer | ||
232 | + for(int i=0; i<TESTBUFF_SIZE; i++) { | ||
233 | + // ramp | ||
234 | + jack_default_audio_sample_t value = | ||
235 | + ((jack_default_audio_sample_t)((i % TESTBUFF_SIZE) - TESTBUFF_SIZE/2)) / (TESTBUFF_SIZE/2); | ||
236 | + // force clipping | ||
237 | + value *= 1.02; | ||
238 | + jackbuffer_source[i] = value; | ||
239 | + } | ||
240 | + | ||
241 | + for(uint32_t testcase=0; testcase<sizeof(test_cases)/sizeof(test_case_data_t); testcase++) { | ||
242 | + // test mono/stereo | ||
243 | + for(uint32_t channels=1; channels<=2; channels++) { | ||
244 | + ////////////////////////////////////////////////////////////////////////////// | ||
245 | + // jackfloat -> integer | ||
246 | + | ||
247 | + // clean target buffers | ||
248 | + memset(integerbuffer_accel, 0, sizeof(integerbuffer_accel)); | ||
249 | + memset(integerbuffer_orig, 0, sizeof(integerbuffer_orig)); | ||
250 | + // accel | ||
251 | + clock_t time_to_integer_accel = clock(); | ||
252 | + for(uint32_t repetition=0; repetition<retry_per_case; repetition++) | ||
253 | + { | ||
254 | + test_cases[testcase].jack_to_integer_accel( | ||
255 | + integerbuffer_accel, | ||
256 | + jackbuffer_source, | ||
257 | + TESTBUFF_SIZE, | ||
258 | + test_cases[testcase].frame_size*channels, | ||
259 | + test_cases[testcase].ditherstate); | ||
260 | + } | ||
261 | + float timediff_to_integer_accel = ((float)(clock() - time_to_integer_accel)) / CLOCKS_PER_SEC; | ||
262 | + // orig | ||
263 | + clock_t time_to_integer_orig = clock(); | ||
264 | + for(uint32_t repetition=0; repetition<retry_per_case; repetition++) | ||
265 | + { | ||
266 | + test_cases[testcase].jack_to_integer_orig( | ||
267 | + integerbuffer_orig, | ||
268 | + jackbuffer_source, | ||
269 | + TESTBUFF_SIZE, | ||
270 | + test_cases[testcase].frame_size*channels, | ||
271 | + test_cases[testcase].ditherstate); | ||
272 | + } | ||
273 | + float timediff_to_integer_orig = ((float)(clock() - time_to_integer_orig)) / CLOCKS_PER_SEC; | ||
274 | + // output performance results | ||
275 | + printf( | ||
276 | + "JackFloat->Integer @%7.7s/%u: Orig %7.6f sec / Accel %7.6f sec -> Win: %5.2f %%\n", | ||
277 | + test_cases[testcase].name, | ||
278 | + channels, | ||
279 | + timediff_to_integer_orig, | ||
280 | + timediff_to_integer_accel, | ||
281 | + (timediff_to_integer_orig/timediff_to_integer_accel-1)*100.0); | ||
282 | + uint32_t int_deviation_max = 0; | ||
283 | + uint32_t int_error_count = 0; | ||
284 | + // output error (avoid spam -> limit error lines per test case) | ||
285 | + for(uint32_t sample=0; sample<TESTBUFF_SIZE; sample++) { | ||
286 | + uint32_t sample_offset = sample*test_cases[testcase].frame_size*channels; | ||
287 | + // compare both results | ||
288 | + uint32_t intval_accel=extract_integer( | ||
289 | + integerbuffer_accel, | ||
290 | + sample_offset, | ||
291 | + test_cases[testcase].frame_size, | ||
292 | + test_cases[testcase].sample_size, | ||
293 | +#if __BYTE_ORDER == __BIG_ENDIAN | ||
294 | + !test_cases[testcase].reverse); | ||
295 | +#else | ||
296 | + test_cases[testcase].reverse); | ||
297 | +#endif | ||
298 | + uint32_t intval_orig=extract_integer( | ||
299 | + integerbuffer_orig, | ||
300 | + sample_offset, | ||
301 | + test_cases[testcase].frame_size, | ||
302 | + test_cases[testcase].sample_size, | ||
303 | +#if __BYTE_ORDER == __BIG_ENDIAN | ||
304 | + !test_cases[testcase].reverse); | ||
305 | +#else | ||
306 | + test_cases[testcase].reverse); | ||
307 | +#endif | ||
308 | + if(intval_accel != intval_orig) { | ||
309 | + if(int_error_count<maxerr_displayed) { | ||
310 | + printf("Value error sample %u:", sample); | ||
311 | + printf(" Orig 0x"); | ||
312 | + char formatstr[10]; | ||
313 | + sprintf(formatstr, "%%0%uX", test_cases[testcase].sample_size*2); | ||
314 | + printf(formatstr, intval_orig); | ||
315 | + printf(" Accel 0x"); | ||
316 | + printf(formatstr, intval_accel); | ||
317 | + printf("\n"); | ||
318 | + } | ||
319 | + int_error_count++; | ||
320 | + uint32_t int_deviation; | ||
321 | + if(intval_accel > intval_orig) | ||
322 | + int_deviation = intval_accel-intval_orig; | ||
323 | + else | ||
324 | + int_deviation = intval_orig-intval_accel; | ||
325 | + if(int_deviation > int_deviation_max) | ||
326 | + int_deviation_max = int_deviation; | ||
327 | + } | ||
328 | + } | ||
329 | + printf( | ||
330 | + "JackFloat->Integer @%7.7s/%u: Errors: %u Max deviation %u\n", | ||
331 | + test_cases[testcase].name, | ||
332 | + channels, | ||
333 | + int_error_count, | ||
334 | + int_deviation_max); | ||
335 | + | ||
336 | + ////////////////////////////////////////////////////////////////////////////// | ||
337 | + // integer -> jackfloat | ||
338 | + | ||
339 | + // clean target buffers | ||
340 | + memset(jackfloatbuffer_accel, 0, sizeof(jackfloatbuffer_accel)); | ||
341 | + memset(jackfloatbuffer_orig, 0, sizeof(jackfloatbuffer_orig)); | ||
342 | + // accel | ||
343 | + clock_t time_to_float_accel = clock(); | ||
344 | + for(uint32_t repetition=0; repetition<retry_per_case; repetition++) | ||
345 | + { | ||
346 | + test_cases[testcase].integer_to_jack_accel( | ||
347 | + jackfloatbuffer_accel, | ||
348 | + integerbuffer_orig, | ||
349 | + TESTBUFF_SIZE, | ||
350 | + test_cases[testcase].frame_size*channels); | ||
351 | + } | ||
352 | + float timediff_to_float_accel = ((float)(clock() - time_to_float_accel)) / CLOCKS_PER_SEC; | ||
353 | + // orig | ||
354 | + clock_t time_to_float_orig = clock(); | ||
355 | + for(uint32_t repetition=0; repetition<retry_per_case; repetition++) | ||
356 | + { | ||
357 | + test_cases[testcase].integer_to_jack_orig( | ||
358 | + jackfloatbuffer_orig, | ||
359 | + integerbuffer_orig, | ||
360 | + TESTBUFF_SIZE, | ||
361 | + test_cases[testcase].frame_size*channels); | ||
362 | + } | ||
363 | + float timediff_to_float_orig = ((float)(clock() - time_to_float_orig)) / CLOCKS_PER_SEC; | ||
364 | + // output performance results | ||
365 | + printf( | ||
366 | + "Integer->JackFloat @%7.7s/%u: Orig %7.6f sec / Accel %7.6f sec -> Win: %5.2f %%\n", | ||
367 | + test_cases[testcase].name, | ||
368 | + channels, | ||
369 | + timediff_to_float_orig, | ||
370 | + timediff_to_float_accel, | ||
371 | + (timediff_to_float_orig/timediff_to_float_accel-1)*100.0); | ||
372 | + jack_default_audio_sample_t float_deviation_max = 0.0; | ||
373 | + uint32_t float_error_count = 0; | ||
374 | + // output error (avoid spam -> limit error lines per test case) | ||
375 | + for(uint32_t sample=0; sample<TESTBUFF_SIZE; sample++) { | ||
376 | + // For easier estimation/readabilty we scale floats back to integer | ||
377 | + jack_default_audio_sample_t sample_scaling; | ||
378 | + switch(test_cases[testcase].sample_size) { | ||
379 | + case 2: | ||
380 | + sample_scaling = SAMPLE_16BIT_SCALING; | ||
381 | + break; | ||
382 | + default: | ||
383 | + sample_scaling = SAMPLE_24BIT_SCALING; | ||
384 | + break; | ||
385 | + } | ||
386 | + jack_default_audio_sample_t floatval_accel = jackfloatbuffer_accel[sample] * sample_scaling; | ||
387 | + jack_default_audio_sample_t floatval_orig = jackfloatbuffer_orig[sample] * sample_scaling; | ||
388 | + // compare both results | ||
389 | + jack_default_audio_sample_t float_deviation; | ||
390 | + if(floatval_accel > floatval_orig) | ||
391 | + float_deviation = floatval_accel-floatval_orig; | ||
392 | + else | ||
393 | + float_deviation = floatval_orig-floatval_accel; | ||
394 | + if(float_deviation > float_deviation_max) | ||
395 | + float_deviation_max = float_deviation; | ||
396 | + // deviation > half bit => error | ||
397 | + if(float_deviation > 0.5) { | ||
398 | + if(float_error_count<maxerr_displayed) { | ||
399 | + printf("Value error sample %u:", sample); | ||
400 | + printf(" Orig %8.1f Accel %8.1f\n", floatval_orig, floatval_accel); | ||
401 | + } | ||
402 | + float_error_count++; | ||
403 | + } | ||
404 | + } | ||
405 | + printf( | ||
406 | + "Integer->JackFloat @%7.7s/%u: Errors: %u Max deviation %f\n", | ||
407 | + test_cases[testcase].name, | ||
408 | + channels, | ||
409 | + float_error_count, | ||
410 | + float_deviation_max); | ||
411 | + | ||
412 | + printf("\n"); | ||
413 | + } | ||
414 | + } | ||
415 | + return 0; | ||
416 | +} | ||
417 | diff --git a/example-clients/wscript b/example-clients/wscript | ||
418 | index ba67614..1b2f674 100644 | ||
419 | --- a/example-clients/wscript | ||
420 | +++ b/example-clients/wscript | ||
421 | @@ -28,7 +28,8 @@ example_programs = { | ||
422 | 'jack_net_master' : 'netmaster.c', | ||
423 | 'jack_latent_client' : 'latent_client.c', | ||
424 | 'jack_midi_dump' : 'midi_dump.c', | ||
425 | - 'jack_midi_latency_test' : 'midi_latency_test.c' | ||
426 | + 'jack_midi_latency_test' : 'midi_latency_test.c', | ||
427 | + 'jack_simdtests' : 'simdtests.cpp' | ||
428 | } | ||
429 | |||
430 | example_libs = { | ||
431 | -- | ||
432 | 2.5.5 | ||
433 | |||