diff options
| author | Paul Eggleton <paul.eggleton@linux.intel.com> | 2013-04-06 00:35:40 +0100 |
|---|---|---|
| committer | Martin Jansa <Martin.Jansa@gmail.com> | 2013-04-09 21:34:17 +0200 |
| commit | 23c711ed619a0362167720fe92fcc4dd02a8caea (patch) | |
| tree | 7dacf6cce64f2e205e0dfacaf204537f8f22dc37 /meta-oe | |
| parent | d5de18c7e9816d3428050f4e3ec6590259e6af00 (diff) | |
| download | meta-openembedded-23c711ed619a0362167720fe92fcc4dd02a8caea.tar.gz | |
libmad: remove
This is largely equivalent to the recipe in OE-Core apart from
LICENSE_FLAGS, insignificant patch differences, and an additional patch
for avr32 optimisation (and since there appears to be no public layer
for an avr32 machine, there's not a great deal of point in preserving
the latter).
Signed-off-by: Paul Eggleton <paul.eggleton@linux.intel.com>
Signed-off-by: Martin Jansa <Martin.Jansa@gmail.com>
Diffstat (limited to 'meta-oe')
6 files changed, 0 insertions, 3136 deletions
diff --git a/meta-oe/recipes-multimedia/libmad/files/add-pkgconfig.patch b/meta-oe/recipes-multimedia/libmad/files/add-pkgconfig.patch deleted file mode 100644 index 636b27a928..0000000000 --- a/meta-oe/recipes-multimedia/libmad/files/add-pkgconfig.patch +++ /dev/null | |||
| @@ -1,68 +0,0 @@ | |||
| 1 | Here is a patch for adding pkg-config support to libmad. | ||
| 2 | It would make life a bit easier for distro maintainers if this was applied. | ||
| 3 | In case you didn't know, pkg-config is a tool for providing LDFLAGS and | ||
| 4 | CFLAGS for packages using shared libraries. It's on freedesktop.org. | ||
| 5 | Debian has already been distributing the pkg-config file mad.pc with | ||
| 6 | libmad for some time, and people developing on debian (notably xmms2 | ||
| 7 | developers) have started relying on this support being present, causing | ||
| 8 | some confusion for people installing from source and on some BSDs which | ||
| 9 | do not provide mad.pc (google: pkgconfig libmad). | ||
| 10 | |||
| 11 | EMH | ||
| 12 | |||
| 13 | --h31gzZEtNLTqOjlF | ||
| 14 | Content-Type: text/plain; charset=us-ascii | ||
| 15 | Content-Disposition: attachment; filename="libmad-0.15.1b-pkgconfig.patch" | ||
| 16 | |||
| 17 | diff -Naur libmad-0.15.1b.old/configure.ac libmad-0.15.1b/configure.ac | ||
| 18 | --- libmad-0.15.1b.old/configure.ac 2004-01-23 10:41:32.000000000 +0100 | ||
| 19 | +++ libmad-0.15.1b/configure.ac 2004-08-07 02:25:24.633462168 +0200 | ||
| 20 | @@ -429,5 +429,5 @@ | ||
| 21 | dnl AC_SUBST(LTLIBOBJS) | ||
| 22 | |||
| 23 | AC_CONFIG_FILES([Makefile msvc++/Makefile \ | ||
| 24 | - libmad.list]) | ||
| 25 | + libmad.list mad.pc]) | ||
| 26 | AC_OUTPUT | ||
| 27 | diff -Naur libmad-0.15.1b.old/mad.pc.in libmad-0.15.1b/mad.pc.in | ||
| 28 | --- libmad-0.15.1b.old/mad.pc.in 1970-01-01 01:00:00.000000000 +0100 | ||
| 29 | +++ libmad-0.15.1b/mad.pc.in 2004-08-07 02:04:59.617692872 +0200 | ||
| 30 | @@ -0,0 +1,14 @@ | ||
| 31 | +# libmad pkg-config source file | ||
| 32 | + | ||
| 33 | +prefix=@prefix@ | ||
| 34 | +exec_prefix=@exec_prefix@ | ||
| 35 | +libdir=@libdir@ | ||
| 36 | +includedir=@includedir@ | ||
| 37 | + | ||
| 38 | +Name: mad | ||
| 39 | +Description: MPEG Audio Decoder | ||
| 40 | +Version: @VERSION@ | ||
| 41 | +Requires: | ||
| 42 | +Conflicts: | ||
| 43 | +Libs: -L${libdir} -lmad -lm | ||
| 44 | +Cflags: -I${includedir} | ||
| 45 | diff -Naur libmad-0.15.1b.old/Makefile.am libmad-0.15.1b/Makefile.am | ||
| 46 | --- libmad-0.15.1b.old/Makefile.am 2004-02-17 03:02:03.000000000 +0100 | ||
| 47 | +++ libmad-0.15.1b/Makefile.am 2004-08-07 02:03:19.859858368 +0200 | ||
| 48 | @@ -24,6 +24,9 @@ | ||
| 49 | SUBDIRS = | ||
| 50 | DIST_SUBDIRS = msvc++ | ||
| 51 | |||
| 52 | +pkgconfigdir = $(libdir)/pkgconfig | ||
| 53 | +pkgconfig_DATA = mad.pc | ||
| 54 | + | ||
| 55 | lib_LTLIBRARIES = libmad.la | ||
| 56 | include_HEADERS = mad.h | ||
| 57 | |||
| 58 | @@ -34,7 +37,8 @@ | ||
| 59 | minimad_LDADD = libmad.la | ||
| 60 | |||
| 61 | EXTRA_DIST = mad.h.sed \ | ||
| 62 | - CHANGES COPYRIGHT CREDITS README TODO VERSION | ||
| 63 | + CHANGES COPYRIGHT CREDITS README TODO VERSION \ | ||
| 64 | + mad.pc.in | ||
| 65 | |||
| 66 | exported_headers = version.h fixed.h bit.h timer.h stream.h frame.h \ | ||
| 67 | synth.h decoder.h | ||
| 68 | |||
diff --git a/meta-oe/recipes-multimedia/libmad/files/libmad-0.15.1b-avr32-optimization.patch b/meta-oe/recipes-multimedia/libmad/files/libmad-0.15.1b-avr32-optimization.patch deleted file mode 100644 index b74eea3225..0000000000 --- a/meta-oe/recipes-multimedia/libmad/files/libmad-0.15.1b-avr32-optimization.patch +++ /dev/null | |||
| @@ -1,2922 +0,0 @@ | |||
| 1 | diff --git a/bit.c b/bit.c | ||
| 2 | index c2bfb24..262ce3a 100644 | ||
| 3 | --- a/bit.c | ||
| 4 | +++ b/bit.c | ||
| 5 | @@ -25,12 +25,6 @@ | ||
| 6 | |||
| 7 | # include "global.h" | ||
| 8 | |||
| 9 | -# ifdef HAVE_LIMITS_H | ||
| 10 | -# include <limits.h> | ||
| 11 | -# else | ||
| 12 | -# define CHAR_BIT 8 | ||
| 13 | -# endif | ||
| 14 | - | ||
| 15 | # include "bit.h" | ||
| 16 | |||
| 17 | /* | ||
| 18 | @@ -81,6 +75,8 @@ unsigned short const crc_table[256] = { | ||
| 19 | |||
| 20 | # define CRC_POLY 0x8005 | ||
| 21 | |||
| 22 | +#ifndef FPM_AVR32 | ||
| 23 | + | ||
| 24 | /* | ||
| 25 | * NAME: bit->init() | ||
| 26 | * DESCRIPTION: initialize bit pointer struct | ||
| 27 | @@ -190,6 +186,8 @@ void mad_bit_write(struct mad_bitptr *bitptr, unsigned int len, | ||
| 28 | } | ||
| 29 | # endif | ||
| 30 | |||
| 31 | +#endif | ||
| 32 | + | ||
| 33 | /* | ||
| 34 | * NAME: bit->crc() | ||
| 35 | * DESCRIPTION: compute CRC-check word | ||
| 36 | diff --git a/bit.h b/bit.h | ||
| 37 | index 5a51570..70f550a 100644 | ||
| 38 | --- a/bit.h | ||
| 39 | +++ b/bit.h | ||
| 40 | @@ -22,6 +22,92 @@ | ||
| 41 | # ifndef LIBMAD_BIT_H | ||
| 42 | # define LIBMAD_BIT_H | ||
| 43 | |||
| 44 | +# ifdef HAVE_LIMITS_H | ||
| 45 | +# include <limits.h> | ||
| 46 | +# else | ||
| 47 | +# define CHAR_BIT 8 | ||
| 48 | +# endif | ||
| 49 | + | ||
| 50 | +#ifdef FPM_AVR32 | ||
| 51 | + | ||
| 52 | +struct mad_bitptr { | ||
| 53 | + unsigned char const *byte; | ||
| 54 | + unsigned int read_bytes; | ||
| 55 | +}; | ||
| 56 | + | ||
| 57 | +/* | ||
| 58 | + * NAME: bit->init() | ||
| 59 | + * DESCRIPTION: initialize bit pointer struct | ||
| 60 | + */ | ||
| 61 | +static void mad_bit_init(struct mad_bitptr *bitptr, unsigned char const *byte) | ||
| 62 | +{ | ||
| 63 | + bitptr->byte = byte; | ||
| 64 | + bitptr->read_bytes = 0; | ||
| 65 | +} | ||
| 66 | + | ||
| 67 | +/* | ||
| 68 | + * NAME: bit->length() | ||
| 69 | + * DESCRIPTION: return number of bits between start and end points | ||
| 70 | + */ | ||
| 71 | +static unsigned int mad_bit_length(struct mad_bitptr const *begin, | ||
| 72 | + struct mad_bitptr const *end) | ||
| 73 | +{ | ||
| 74 | + return (end->read_bytes - begin->read_bytes) + | ||
| 75 | + 8 * (end->byte - begin->byte); | ||
| 76 | +} | ||
| 77 | + | ||
| 78 | +/* | ||
| 79 | + * NAME: bit->nextbyte() | ||
| 80 | + * DESCRIPTION: return pointer to next unprocessed byte | ||
| 81 | + */ | ||
| 82 | +static unsigned char const *mad_bit_nextbyte(struct mad_bitptr const *bitptr) | ||
| 83 | +{ | ||
| 84 | + return bitptr->byte + ((bitptr->read_bytes + 0x7) >> 3); | ||
| 85 | +} | ||
| 86 | + | ||
| 87 | +/* | ||
| 88 | + * NAME: bit->skip() | ||
| 89 | + * DESCRIPTION: advance bit pointer | ||
| 90 | + */ | ||
| 91 | +static void mad_bit_skip(struct mad_bitptr *bitptr, unsigned int len) | ||
| 92 | +{ | ||
| 93 | + bitptr->read_bytes += len; | ||
| 94 | + bitptr->byte += (bitptr->read_bytes >> 3); | ||
| 95 | + bitptr->read_bytes &= 0x7; | ||
| 96 | +} | ||
| 97 | + | ||
| 98 | +/* | ||
| 99 | + * NAME: bit->read() | ||
| 100 | + * DESCRIPTION: read an arbitrary number of bits and return their UIMSBF value | ||
| 101 | + */ | ||
| 102 | +static unsigned long mad_bit_read(struct mad_bitptr *bitptr, unsigned int len) | ||
| 103 | +{ | ||
| 104 | + register unsigned long value; | ||
| 105 | + | ||
| 106 | + if (!len) | ||
| 107 | + return 0; | ||
| 108 | + | ||
| 109 | + value = *(unsigned int *)bitptr->byte; | ||
| 110 | + | ||
| 111 | + value <<= bitptr->read_bytes; | ||
| 112 | + value >>= (32 - len); | ||
| 113 | + | ||
| 114 | + bitptr->read_bytes += len; | ||
| 115 | + bitptr->byte += (bitptr->read_bytes >> 3); | ||
| 116 | + bitptr->read_bytes &= 0x7; | ||
| 117 | + | ||
| 118 | + return value; | ||
| 119 | +} | ||
| 120 | + | ||
| 121 | +# define mad_bit_finish(bitptr) /* nothing */ | ||
| 122 | + | ||
| 123 | +static unsigned long mad_bit_bitsleft(struct mad_bitptr *bitptr) | ||
| 124 | +{ | ||
| 125 | + return (8 - (bitptr)->read_bytes); | ||
| 126 | +} | ||
| 127 | + | ||
| 128 | +#else /* #ifdef FPM_AVR32 */ | ||
| 129 | + | ||
| 130 | struct mad_bitptr { | ||
| 131 | unsigned char const *byte; | ||
| 132 | unsigned short cache; | ||
| 133 | @@ -42,6 +128,8 @@ void mad_bit_skip(struct mad_bitptr *, unsigned int); | ||
| 134 | unsigned long mad_bit_read(struct mad_bitptr *, unsigned int); | ||
| 135 | void mad_bit_write(struct mad_bitptr *, unsigned int, unsigned long); | ||
| 136 | |||
| 137 | +#endif | ||
| 138 | + | ||
| 139 | unsigned short mad_bit_crc(struct mad_bitptr, unsigned int, unsigned short); | ||
| 140 | |||
| 141 | # endif | ||
| 142 | diff --git a/configure.ac b/configure.ac | ||
| 143 | index 9b79399..063cb9b 100644 | ||
| 144 | --- a/configure.ac | ||
| 145 | +++ b/configure.ac | ||
| 146 | @@ -274,13 +274,14 @@ fi | ||
| 147 | AC_MSG_CHECKING(for architecture-specific fixed-point math routines) | ||
| 148 | AC_ARG_ENABLE(fpm, AC_HELP_STRING([--enable-fpm=ARCH], | ||
| 149 | [use ARCH-specific fixed-point math routines | ||
| 150 | - (one of: intel, arm, mips, sparc, ppc, 64bit, default)]), | ||
| 151 | + (one of: intel, arm, avr32, mips, sparc, ppc, 64bit, default)]), | ||
| 152 | [ | ||
| 153 | case "$enableval" in | ||
| 154 | yes) ;; | ||
| 155 | no|default|approx) FPM="DEFAULT" ;; | ||
| 156 | intel|i?86) FPM="INTEL" ;; | ||
| 157 | arm) FPM="ARM" ;; | ||
| 158 | + avr32) FPM="AVR32" ;; | ||
| 159 | mips) FPM="MIPS" ;; | ||
| 160 | sparc) FPM="SPARC" ;; | ||
| 161 | ppc|powerpc) FPM="PPC" ;; | ||
| 162 | @@ -298,6 +299,7 @@ then | ||
| 163 | case "$host" in | ||
| 164 | i?86-*) FPM="INTEL" ;; | ||
| 165 | arm*-*) FPM="ARM" ;; | ||
| 166 | + avr32*-*) FPM="AVR32" ;; | ||
| 167 | mips*-*) FPM="MIPS" ;; | ||
| 168 | sparc*-*) FPM="SPARC" ;; | ||
| 169 | powerpc*-*) FPM="PPC" ;; | ||
| 170 | @@ -343,6 +345,11 @@ then | ||
| 171 | ASO="$ASO -DASO_IMDCT" | ||
| 172 | ASO_OBJS="imdct_l_arm.lo" | ||
| 173 | ;; | ||
| 174 | + avr32*-*) | ||
| 175 | + ASO="$ASO -DASO_INTERLEAVE2" | ||
| 176 | + ASO="$ASO -DASO_ZEROCHECK" | ||
| 177 | + ASO_OBJS="dct32_avr32.lo synth_avr32.lo imdct_avr32.lo" | ||
| 178 | + ;; | ||
| 179 | mips*-*) | ||
| 180 | ASO="$ASO -DASO_INTERLEAVE2" | ||
| 181 | ASO="$ASO -DASO_ZEROCHECK" | ||
| 182 | diff --git a/configure b/configure | ||
| 183 | index ee421cc..7a9f0c8 100755 | ||
| 184 | --- a/configure | ||
| 185 | +++ b/configure | ||
| 186 | @@ -1048,7 +1048,7 @@ Optional Features: | ||
| 187 | --enable-speed optimize for speed over accuracy | ||
| 188 | --enable-accuracy optimize for accuracy over speed | ||
| 189 | --enable-fpm=ARCH use ARCH-specific fixed-point math routines (one of: | ||
| 190 | - intel, arm, mips, sparc, ppc, 64bit, default) | ||
| 191 | + intel, arm, avr32, mips, sparc, ppc, 64bit, default) | ||
| 192 | --enable-sso use subband synthesis optimization | ||
| 193 | --disable-aso disable architecture-specific optimizations | ||
| 194 | --enable-strict-iso use strict ISO/IEC interpretations | ||
| 195 | @@ -21477,6 +21477,7 @@ if test "${enable_fpm+set}" = set; then | ||
| 196 | no|default|approx) FPM="DEFAULT" ;; | ||
| 197 | intel|i?86) FPM="INTEL" ;; | ||
| 198 | arm) FPM="ARM" ;; | ||
| 199 | + avr32) FPM="AVR32" ;; | ||
| 200 | mips) FPM="MIPS" ;; | ||
| 201 | sparc) FPM="SPARC" ;; | ||
| 202 | ppc|powerpc) FPM="PPC" ;; | ||
| 203 | @@ -21498,6 +21499,7 @@ then | ||
| 204 | case "$host" in | ||
| 205 | i?86-*) FPM="INTEL" ;; | ||
| 206 | arm*-*) FPM="ARM" ;; | ||
| 207 | + avr32*-*) FPM="AVR32" ;; | ||
| 208 | mips*-*) FPM="MIPS" ;; | ||
| 209 | sparc*-*) FPM="SPARC" ;; | ||
| 210 | powerpc*-*) FPM="PPC" ;; | ||
| 211 | @@ -21554,6 +21556,11 @@ then | ||
| 212 | ASO="$ASO -DASO_IMDCT" | ||
| 213 | ASO_OBJS="imdct_l_arm.lo" | ||
| 214 | ;; | ||
| 215 | + avr32*-*) | ||
| 216 | + ASO="$ASO -DASO_INTERLEAVE2" | ||
| 217 | + ASO="$ASO -DASO_ZEROCHECK" | ||
| 218 | + ASO_OBJS="dct32_avr32.lo synth_avr32.lo imdct_avr32.lo" | ||
| 219 | + ;; | ||
| 220 | mips*-*) | ||
| 221 | ASO="$ASO -DASO_INTERLEAVE2" | ||
| 222 | ASO="$ASO -DASO_ZEROCHECK" | ||
| 223 | diff --git a/dct32_avr32.S b/dct32_avr32.S | ||
| 224 | new file mode 100644 | ||
| 225 | index 0000000..7513340 | ||
| 226 | --- /dev/null | ||
| 227 | +++ b/dct32_avr32.S | ||
| 228 | @@ -0,0 +1,780 @@ | ||
| 229 | +/* | ||
| 230 | + Optimized 32-point Discrete Cosine Transform (DCT) | ||
| 231 | + Copyright 2003-2006 Atmel Corporation. | ||
| 232 | + | ||
| 233 | + Written by Ronny Pedersen, Atmel Norway | ||
| 234 | + | ||
| 235 | + This program is free software; you can redistribute it and/or modify | ||
| 236 | + it under the terms of the GNU General Public License as published by | ||
| 237 | + the Free Software Foundation; either version 2 of the License, or | ||
| 238 | + (at your option) any later version. | ||
| 239 | + | ||
| 240 | + This program is distributed in the hope that it will be useful, | ||
| 241 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 242 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 243 | + GNU General Public License for more details. | ||
| 244 | + | ||
| 245 | + You should have received a copy of the GNU General Public License | ||
| 246 | + along with this program; if not, write to the Free Software | ||
| 247 | + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ | ||
| 248 | + | ||
| 249 | +#define SHIFT 12 | ||
| 250 | +#define MAD_F_SCALEBITS 28 | ||
| 251 | +#define SLOTS 8 | ||
| 252 | + | ||
| 253 | +#define MAD_F(x) ((x + (1 << 15)) >> 16) | ||
| 254 | + | ||
| 255 | +# define costab1 MAD_F(0x7fd8878e) | ||
| 256 | +# define costab2 MAD_F(0x7f62368f) | ||
| 257 | +# define costab3 MAD_F(0x7e9d55fc) | ||
| 258 | +# define costab4 MAD_F(0x7d8a5f40) | ||
| 259 | +# define costab5 MAD_F(0x7c29fbee) | ||
| 260 | +# define costab6 MAD_F(0x7a7d055b) | ||
| 261 | +# define costab7 MAD_F(0x78848414) | ||
| 262 | +# define costab8 MAD_F(0x7641af3d) | ||
| 263 | +# define costab9 MAD_F(0x73b5ebd1) | ||
| 264 | +# define costab10 MAD_F(0x70e2cbc6) | ||
| 265 | +# define costab11 MAD_F(0x6dca0d14) | ||
| 266 | +# define costab12 MAD_F(0x6a6d98a4) | ||
| 267 | +# define costab13 MAD_F(0x66cf8120) | ||
| 268 | +# define costab14 MAD_F(0x62f201ac) | ||
| 269 | +# define costab15 MAD_F(0x5ed77c8a) | ||
| 270 | +# define costab16 MAD_F(0x5a82799a) | ||
| 271 | +# define costab17 MAD_F(0x55f5a4d2) | ||
| 272 | +# define costab18 MAD_F(0x5133cc94) | ||
| 273 | +# define costab19 MAD_F(0x4c3fdff4) | ||
| 274 | +# define costab20 MAD_F(0x471cece7) | ||
| 275 | +# define costab21 MAD_F(0x41ce1e65) | ||
| 276 | +# define costab22 MAD_F(0x3c56ba70) | ||
| 277 | +# define costab23 MAD_F(0x36ba2014) | ||
| 278 | +# define costab24 MAD_F(0x30fbc54d) | ||
| 279 | +# define costab25 MAD_F(0x2b1f34eb) | ||
| 280 | +# define costab26 MAD_F(0x25280c5e) | ||
| 281 | +# define costab27 MAD_F(0x1f19f97b) | ||
| 282 | +# define costab28 MAD_F(0x18f8b83c) | ||
| 283 | +# define costab29 MAD_F(0x12c8106f) | ||
| 284 | +# define costab30 MAD_F(0x0c8bd35e) | ||
| 285 | +# define costab31 MAD_F(0x0647d97c) | ||
| 286 | + | ||
| 287 | + | ||
| 288 | + .macro butterfly2_in out1, out2, out3, out4, in, idx_in1, idx_in2, idx_in3, idx_in4, coeff1, coeff2, tmplo, tmphi | ||
| 289 | + mov \tmplo, \coeff1 | ||
| 290 | + ld.w \out1, \in[\idx_in1 * 4] | ||
| 291 | + ld.w \out2, \in[\idx_in2 * 4] | ||
| 292 | + ld.w \out3, \in[\idx_in3 * 4] | ||
| 293 | + ld.w \out4, \in[\idx_in4 * 4] | ||
| 294 | + sub \tmphi, \out1, \out2 | ||
| 295 | + add \out1, \out2 | ||
| 296 | + mulsatrndwh.w \out2, \tmphi, \tmplo:b | ||
| 297 | + | ||
| 298 | + sub \tmphi, \out3, \out4 | ||
| 299 | + mov \tmplo, \coeff2 | ||
| 300 | + add \out3, \out4 | ||
| 301 | + mulsatrndwh.w \out4, \tmphi, \tmplo:b | ||
| 302 | + .endm | ||
| 303 | + | ||
| 304 | + .macro butterfly2 in1, in2, in3, in4, coeff1, tmplo, tmphi, tmp | ||
| 305 | + mov \tmp, \coeff1 | ||
| 306 | + sub \tmphi, \in1, \in2 | ||
| 307 | + add \in1, \in2 | ||
| 308 | + mulsatrndwh.w \in2, \tmphi, \tmp:b | ||
| 309 | + | ||
| 310 | + sub \tmphi, \in3, \in4 | ||
| 311 | + add \in3, \in4 | ||
| 312 | + mulsatrndwh.w \in4, \tmphi, \tmp:b | ||
| 313 | + .endm | ||
| 314 | + | ||
| 315 | + .macro butterfly4 in1, in2, in3, in4, in5, in6, in7, in8, coeff1, tmplo, tmphi, tmp | ||
| 316 | + mov \tmp, \coeff1 | ||
| 317 | + sub \tmphi, \in1, \in2 | ||
| 318 | + add \in1, \in2 | ||
| 319 | + mulsatrndwh.w \in2, \tmphi, \tmp:b | ||
| 320 | + | ||
| 321 | + sub \tmphi, \in3, \in4 | ||
| 322 | + add \in3, \in4 | ||
| 323 | + mulsatrndwh.w \in4, \tmphi, \tmp:b | ||
| 324 | + | ||
| 325 | + sub \tmphi, \in5, \in6 | ||
| 326 | + add \in5, \in6 | ||
| 327 | + mulsatrndwh.w \in6, \tmphi, \tmp:b | ||
| 328 | + | ||
| 329 | + sub \tmphi, \in7, \in8 | ||
| 330 | + add \in7, \in8 | ||
| 331 | + mulsatrndwh.w \in8, \tmphi, \tmp:b | ||
| 332 | + .endm | ||
| 333 | + | ||
| 334 | + .macro scale reg | ||
| 335 | + .endm | ||
| 336 | + | ||
| 337 | +/*void dct32( mad_fixed_t const in[32], unsigned int slot, | ||
| 338 | + mad_fixed_t lo[16][8], mad_fixed_t hi[16][8]) */ | ||
| 339 | + | ||
| 340 | + .global dct32_avr32 | ||
| 341 | +dct32_avr32: | ||
| 342 | + stm --sp, r0-r7, r9-r11, lr | ||
| 343 | + | ||
| 344 | + sub sp, 32*4 | ||
| 345 | + | ||
| 346 | +/* t0 = in[0] + in[31]; t16 = MUL(in[0] - in[31], costab1); | ||
| 347 | + t1 = in[15] + in[16]; t17 = MUL(in[15] - in[16], costab31); */ | ||
| 348 | + butterfly2_in r4/*t0*/, r5/*t16*/, r6/*t1*/, r7/*t17*/, r12, 0, 31, 15, 16, costab1, costab31, r10, r11 | ||
| 349 | + | ||
| 350 | +/* t41 = t16 + t17; | ||
| 351 | + t59 = MUL(t16 - t17, costab2); | ||
| 352 | + t33 = t0 + t1; | ||
| 353 | + t50 = MUL(t0 - t1, costab2);*/ | ||
| 354 | + butterfly2 r5/*t41*/, r7/*t59*/, r4/*t33*/, r6/*t50*/, costab2, r10, r11, lr | ||
| 355 | + | ||
| 356 | +/* t2 = in[7] + in[24]; t18 = MUL(in[7] - in[24], costab15); | ||
| 357 | + t3 = in[8] + in[23]; t19 = MUL(in[8] - in[23], costab17); */ | ||
| 358 | + butterfly2_in r0/*t2*/, r1/*t18*/, r2/*t3*/, r3/*t19*/, r12, 7, 24, 8, 23, costab15, costab17, r10, r11 | ||
| 359 | + | ||
| 360 | +/* t42 = t18 + t19; | ||
| 361 | + t60 = MUL(t18 - t19, costab30); | ||
| 362 | + t34 = t2 + t3; | ||
| 363 | + t51 = MUL(t2 - t3, costab30); */ | ||
| 364 | + butterfly2 r1/*t42*/, r3/*t60*/, r0/*t34*/, r2/*t51*/, costab30, r10, r11, lr | ||
| 365 | + | ||
| 366 | +/* t73 = t41 + t42; t94 = MUL(t41 - t42, costab4); | ||
| 367 | + t83 = t59 + t60; t106 = MUL(t59 - t60, costab4); */ | ||
| 368 | + | ||
| 369 | + | ||
| 370 | +/* t69 = t33 + t34; t89 = MUL(t33 - t34, costab4); | ||
| 371 | + t78 = t50 + t51; t100 = MUL(t50 - t51, costab4); */ | ||
| 372 | + butterfly4 r5/*t73*/, r1/*t94*/, r7/*t83*/, r3/*t106*/,r4/*t69*/, r0/*t89*/, r6/*t78*/, r2/*t100*/, costab4, r10, r11, lr | ||
| 373 | + | ||
| 374 | +/* Store away the computed butterflies: | ||
| 375 | + sp[0-7] = t83, t78, t73, t69, t106, t100, t94, t89 */ | ||
| 376 | + stm sp, r0-r7 | ||
| 377 | + | ||
| 378 | + | ||
| 379 | +/* t4 = in[3] + in[28]; t20 = MUL(in[3] - in[28], costab7); | ||
| 380 | + t5 = in[12] + in[19]; t21 = MUL(in[12] - in[19], costab25); */ | ||
| 381 | + butterfly2_in r4/*t4*/, r5/*t20*/, r6/*t5*/, r7/*t21*/, r12, 3, 28, 12, 19, costab7, costab25, r10, r11 | ||
| 382 | + | ||
| 383 | +/* t43 = t20 + t21; | ||
| 384 | + t61 = MUL(t20 - t21, costab14); | ||
| 385 | + t35 = t4 + t5; | ||
| 386 | + t52 = MUL(t4 - t5, costab14); */ | ||
| 387 | + butterfly2 r5/*t43*/, r7/*t61*/, r4/*t35*/, r6/*t52*/, costab14, r10, r11, lr | ||
| 388 | + | ||
| 389 | +/* t6 = in[4] + in[27]; t22 = MUL(in[4] - in[27], costab9); | ||
| 390 | + t7 = in[11] + in[20]; t23 = MUL(in[11] - in[20], costab23); */ | ||
| 391 | + butterfly2_in r0/*t6*/, r1/*t22*/, r2/*t7*/, r3/*t23*/, r12, 4, 27, 11, 20, costab9, costab23, r10, r11 | ||
| 392 | + | ||
| 393 | +/* t44 = t22 + t23; | ||
| 394 | + t62 = MUL(t22 - t23, costab18); | ||
| 395 | + t36 = t6 + t7; | ||
| 396 | + t53 = MUL(t6 - t7, costab18); */ | ||
| 397 | + butterfly2 r1/*t44*/, r3/*t62*/, r0/*t36*/, r2/*t53*/, costab18, r10, r11, lr | ||
| 398 | + | ||
| 399 | +/* t74 = t43 + t44; t95 = MUL(t43 - t44, costab28); | ||
| 400 | + t84 = t61 + t62; t107 = MUL(t61 - t62, costab28); */ | ||
| 401 | + | ||
| 402 | +/* t70 = t35 + t36; t90 = MUL(t35 - t36, costab28); | ||
| 403 | + t79 = t52 + t53; t101 = MUL(t52 - t53, costab28); */ | ||
| 404 | + butterfly4 r5/*t74*/, r1/*t95*/, r7/*t84*/, r3/*t107*/, r4/*t70*/, r0/*t90*/, r6/*t79*/, r2/*t101*/, costab28, r10, r11, lr | ||
| 405 | + | ||
| 406 | +/* Store away the computed butterflies: | ||
| 407 | + sp[8-15] = t84, t79, t74, t70, t107, t101, t95, t90 */ | ||
| 408 | + sub r10, sp, -8*4 | ||
| 409 | + stm r10, r0-r7 | ||
| 410 | + | ||
| 411 | + | ||
| 412 | +/* t8 = in[1] + in[30]; t24 = MUL(in[1] - in[30], costab3); | ||
| 413 | + t9 = in[14] + in[17]; t25 = MUL(in[14] - in[17], costab29); */ | ||
| 414 | + butterfly2_in r4/*t8*/, r5/*t24*/, r6/*t9*/, r7/*t25*/, r12, 1, 30, 14, 17, costab3, costab29, r10, r11 | ||
| 415 | + | ||
| 416 | + | ||
| 417 | +/* t45 = t24 + t25; | ||
| 418 | + t63 = MUL(t24 - t25, costab6); | ||
| 419 | + t37 = t8 + t9; | ||
| 420 | + t54 = MUL(t8 - t9, costab6); */ | ||
| 421 | + butterfly2 r5/*t45*/, r7/*t63*/, r4/*t37*/, r6/*t54*/, costab6, r10, r11, lr | ||
| 422 | + | ||
| 423 | +/* t10 = in[6] + in[25]; t26 = MUL(in[6] - in[25], costab13); | ||
| 424 | + t11 = in[9] + in[22]; t27 = MUL(in[9] - in[22], costab19); */ | ||
| 425 | + butterfly2_in r0/*t10*/, r1/*t26*/, r2/*t11*/, r3/*t27*/, r12, 6, 25, 9, 22, costab13, costab19, r10, r11 | ||
| 426 | + | ||
| 427 | +/* t46 = t26 + t27; | ||
| 428 | + t64 = MUL(t26 - t27, costab26); | ||
| 429 | + t38 = t10 + t11; | ||
| 430 | + t55 = MUL(t10 - t11, costab26); */ | ||
| 431 | + butterfly2 r1/*t46*/, r3/*t64*/, r0/*t38*/, r2/*t55*/, costab26, r10, r11, lr | ||
| 432 | + | ||
| 433 | +/* t75 = t45 + t46; t96 = MUL(t45 - t46, costab12); | ||
| 434 | + t85 = t63 + t64; t108 = MUL(t63 - t64, costab12); */ | ||
| 435 | + | ||
| 436 | +/* t71 = t37 + t38; t91 = MUL(t37 - t38, costab12); | ||
| 437 | + t80 = t54 + t55; t102 = MUL(t54 - t55, costab12); */ | ||
| 438 | + butterfly4 r5/*t75*/, r1/*t96*/, r7/*t85*/, r3/*t108*/, r4/*t71*/, r0/*t91*/, r6/*t80*/, r2/*t102*/, costab12, r10, r11, lr | ||
| 439 | + | ||
| 440 | +/* Store away the computed butterflies: | ||
| 441 | + sp[16-23] = t85, t80, t75, t71, t108, t102, t96, t91 */ | ||
| 442 | + sub r10, sp, -16*4 | ||
| 443 | + stm r10, r0-r7 | ||
| 444 | + | ||
| 445 | +/* t12 = in[2] + in[29]; t28 = MUL(in[2] - in[29], costab5); | ||
| 446 | + t13 = in[13] + in[18]; t29 = MUL(in[13] - in[18], costab27); */ | ||
| 447 | + butterfly2_in r4/*t12*/, r5/*t28*/, r6/*t13*/, r7/*t29*/, r12, 2, 29, 13, 18, costab5, costab27, r10, r11 | ||
| 448 | + | ||
| 449 | +/* t47 = t28 + t29; | ||
| 450 | + t65 = MUL(t28 - t29, costab10); | ||
| 451 | + t39 = t12 + t13; | ||
| 452 | + t56 = MUL(t12 - t13, costab10); */ | ||
| 453 | + butterfly2 r5/*t47*/, r7/*t65*/, r4/*t39*/, r6/*t56*/, costab10, r10, r11, lr | ||
| 454 | + | ||
| 455 | +/* t14 = in[5] + in[26]; t30 = MUL(in[5] - in[26], costab11); | ||
| 456 | + t15 = in[10] + in[21]; t31 = MUL(in[10] - in[21], costab21);*/ | ||
| 457 | + butterfly2_in r0/*t14*/, r1/*t30*/, r2/*t15*/, r3/*t31*/, r12, 5, 26, 10, 21, costab11, costab21, r10, r11 | ||
| 458 | + | ||
| 459 | +/* t48 = t30 + t31; | ||
| 460 | + t66 = MUL(t30 - t31, costab22); | ||
| 461 | + t40 = t14 + t15; | ||
| 462 | + t57 = MUL(t14 - t15, costab22);*/ | ||
| 463 | + butterfly2 r1/*t48*/, r3/*t66*/, r0/*t40*/, r2/*t57*/, costab22, r10, r11, lr | ||
| 464 | + | ||
| 465 | +/* t76 = t47 + t48; t97 = MUL(t47 - t48, costab20); | ||
| 466 | + t86 = t65 + t66; t109 = MUL(t65 - t66, costab20);*/ | ||
| 467 | + | ||
| 468 | +/* t72 = t39 + t40; t92 = MUL(t39 - t40, costab20); | ||
| 469 | + t81 = t56 + t57; t103 = MUL(t56 - t57, costab20);*/ | ||
| 470 | + butterfly4 r5/*t76*/, r1/*t97*/, r7/*t86*/, r3/*t109*/,r4/*t72*/, r0/*t92*/, r6/*t81*/, r2/*t103*/, costab20, r10, r11, lr | ||
| 471 | + | ||
| 472 | +/* Store away the computed butterflies: | ||
| 473 | + sp[24-31] = t86, t81, t76, t72, t109, t103, t97, t92 */ | ||
| 474 | + sub r10, sp, -24*4 | ||
| 475 | + stm r10, r0-r7 | ||
| 476 | + | ||
| 477 | +/* We now have the following on the stack: | ||
| 478 | + | ||
| 479 | + sp[0-7] = t83, t78, t73, t69, t106, t100, t94, t89 | ||
| 480 | + sp[8-15] = t84, t79, t74, t70, t107, t101, t95, t90 | ||
| 481 | + sp[16-23] = t85, t80, t75, t71, t108, t102, t96, t91 | ||
| 482 | + sp[24-31] = t86, t81, t76, t72, t109, t103, t97, t92 */ | ||
| 483 | + | ||
| 484 | +/* Load {r0...r7} = { t72, t76, t71, t75, t70, t74, t69, t73 } */ | ||
| 485 | + ld.d r6, sp[2*4] | ||
| 486 | + ld.d r4, sp[10*4] | ||
| 487 | + ld.d r2, sp[18*4] | ||
| 488 | + ld.d r0, sp[26*4] | ||
| 489 | + | ||
| 490 | + | ||
| 491 | +/* t113 = t69 + t70; | ||
| 492 | + t141 = MUL(t69 - t70, costab8); | ||
| 493 | + | ||
| 494 | + t115 = t73 + t74; | ||
| 495 | + t144 = MUL(t73 - t74, costab8); */ | ||
| 496 | + butterfly2 r6/*t113*/, r4/*t141*/, r7/*t115*/, r5/*t144*/, costab8, r10, r11, lr | ||
| 497 | + | ||
| 498 | +/* t114 = t71 + t72; | ||
| 499 | + t142 = MUL(t71 - t72, costab24); | ||
| 500 | + | ||
| 501 | + t116 = t75 + t76; | ||
| 502 | + t145 = MUL(t75 - t76, costab24); */ | ||
| 503 | + butterfly2 r2/*t114*/, r0/*t142*/, r3/*t116*/, r1/*t145*/, costab24, r10, r11, lr | ||
| 504 | + | ||
| 505 | + | ||
| 506 | +/* | ||
| 507 | + t191 = t113 + t114; | ||
| 508 | + t192 = MUL(t113 - t114, costab16) | ||
| 509 | + | ||
| 510 | + t32 = t115 + t116; | ||
| 511 | + t177 = MUL(t115 - t116, costab16) ; | ||
| 512 | + | ||
| 513 | + t143 = t141 + t142; | ||
| 514 | + t190 = MUL(t141 - t142, costab16) ; | ||
| 515 | + | ||
| 516 | + t146 = t144 + t145; | ||
| 517 | + t184 = MUL(t144 - t145, costab16) ; */ | ||
| 518 | + butterfly4 r6/*t191*/, r2/*t192*/, r7/*t32*/, r3/*t177*/, r4/*t143*/, r0/*190*/, r5/*t146*/, r1/*t184*/, costab16, r10, r11, lr | ||
| 519 | + | ||
| 520 | +/* Store away the computed butterflies: | ||
| 521 | + sp[2-3] = t32, t191 | ||
| 522 | + sp[10-11] = t146, t143 | ||
| 523 | + sp[18-19] = t177, t192 | ||
| 524 | + sp[26-27] = t184, t190 */ | ||
| 525 | + st.d sp[2*4] , r6 | ||
| 526 | + st.d sp[10*4], r4 | ||
| 527 | + st.d sp[18*4], r2 | ||
| 528 | + st.d sp[26*4], r0 | ||
| 529 | + | ||
| 530 | +/* Load {r0...r7} = { t81, t86, t80, t85, t79, t84, t78, t83 } */ | ||
| 531 | + ld.d r6, sp[0*4] | ||
| 532 | + ld.d r4, sp[8*4] | ||
| 533 | + ld.d r2, sp[16*4] | ||
| 534 | + ld.d r0, sp[24*4] | ||
| 535 | + | ||
| 536 | + | ||
| 537 | +/* t118 = t78 + t79; | ||
| 538 | + t148 = MUL(t78 - t79, costab8); | ||
| 539 | + | ||
| 540 | + t121 = t83 + t84; | ||
| 541 | + t152 = MUL(t83 - t84, costab8); */ | ||
| 542 | + butterfly2 r6/*t118*/, r4/*t148*/, r7/*t121*/, r5/*t152*/, costab8, r10, r11, lr | ||
| 543 | + | ||
| 544 | +/* t119 = t80 + t81; | ||
| 545 | + t149 = MUL(t80 - t81, costab24); | ||
| 546 | + | ||
| 547 | + t122 = t85 + t86; | ||
| 548 | + t153 = MUL(t85 - t86, costab24); */ | ||
| 549 | + butterfly2 r2/*t119*/, r0/*t149*/, r3/*t122*/, r1/*t153*/, costab24, r10, r11, lr | ||
| 550 | + | ||
| 551 | + | ||
| 552 | + | ||
| 553 | +/* t58 = t118 + t119; | ||
| 554 | + t178 = MUL(t118 - t119, costab16) ; | ||
| 555 | + | ||
| 556 | + t67 = t121 + t122; | ||
| 557 | + t179 = MUL(t121 - t122, costab16) ; | ||
| 558 | + | ||
| 559 | + t150 = t148 + t149; | ||
| 560 | + t185 = MUL(t148 - t149, costab16) ; | ||
| 561 | + | ||
| 562 | + t154 = t152 + t153; | ||
| 563 | + t186 = MUL(t152 - t153, costab16) ; */ | ||
| 564 | + butterfly4 r6/*t58*/, r2/*t178*/, r7/*t67*/, r3/*t179*/, r4/*t150*/, r0/*185*/, r5/*t154*/, r1/*t186*/, costab16, r10, r11, lr | ||
| 565 | + | ||
| 566 | +/* Store away the computed butterflies: | ||
| 567 | + sp[0-1] = t67, t58 | ||
| 568 | + sp[8-9] = t154, t150 | ||
| 569 | + sp[16-17] = t179, t178 | ||
| 570 | + sp[24-25] = t186, t185 */ | ||
| 571 | + st.d sp[0*4] , r6 | ||
| 572 | + st.d sp[8*4], r4 | ||
| 573 | + st.d sp[16*4], r2 | ||
| 574 | + st.d sp[24*4], r0 | ||
| 575 | + | ||
| 576 | +/* Load {r0...r7} = { t92, t97, t91, t96, t90, t95, t89, t94 } */ | ||
| 577 | + ld.d r6, sp[6*4] | ||
| 578 | + ld.d r4, sp[14*4] | ||
| 579 | + ld.d r2, sp[22*4] | ||
| 580 | + ld.d r0, sp[30*4] | ||
| 581 | + | ||
| 582 | + | ||
| 583 | +/* t125 = t89 + t90; | ||
| 584 | + t157 = MUL(t89 - t90, costab8); | ||
| 585 | + | ||
| 586 | + t128 = t94 + t95; | ||
| 587 | + t161 = MUL(t94 - t95, costab8); */ | ||
| 588 | + butterfly2 r6/*t125*/, r4/*t157*/, r7/*t128*/, r5/*t161*/, costab8, r10, r11, lr | ||
| 589 | + | ||
| 590 | +/* t126 = t91 + t92; | ||
| 591 | + t158 = MUL(t91 - t92, costab24); | ||
| 592 | + | ||
| 593 | + t129 = t96 + t97; | ||
| 594 | + t162 = MUL(t96 - t97, costab24); */ | ||
| 595 | + butterfly2 r2/*t126*/, r0/*t158*/, r3/*t129*/, r1/*t162*/, costab24, r10, r11, lr | ||
| 596 | + | ||
| 597 | + | ||
| 598 | +/* | ||
| 599 | + t93 = t125 + t126; | ||
| 600 | + t180 = MUL(t125 - t126, costab16) ; | ||
| 601 | + | ||
| 602 | + t98 = t128 + t129; | ||
| 603 | + t181 = MUL(t128 - t129, costab16) ; | ||
| 604 | + | ||
| 605 | + t159 = t157 + t158; | ||
| 606 | + t187 = MUL(t157 - t158, costab16) ; | ||
| 607 | + | ||
| 608 | + t163 = t161 + t162; | ||
| 609 | + t188 = MUL(t161 - t162, costab16) ; */ | ||
| 610 | + butterfly4 r6/*t93*/, r2/*t180*/, r7/*t98*/, r3/*t181*/, r4/*t159*/, r0/*187*/, r5/*t163*/, r1/*t188*/, costab16, r10, r11, lr | ||
| 611 | + | ||
| 612 | + | ||
| 613 | +/* Store away the computed butterflies: | ||
| 614 | + sp[6-7] = t98, t93 | ||
| 615 | + sp[14-15] = t163, t159 | ||
| 616 | + sp[22-23] = t181, t180 | ||
| 617 | + sp[30-31] = t188, t187 */ | ||
| 618 | + st.d sp[6*4] , r6 | ||
| 619 | + st.d sp[14*4], r4 | ||
| 620 | + st.d sp[22*4], r2 | ||
| 621 | + st.d sp[30*4], r0 | ||
| 622 | + | ||
| 623 | +/* Load {r0...r7} = { t103, t109, t102, t108, t101, t107, t100, t106 } */ | ||
| 624 | + ld.d r6, sp[4*4] | ||
| 625 | + ld.d r4, sp[12*4] | ||
| 626 | + ld.d r2, sp[20*4] | ||
| 627 | + ld.d r0, sp[28*4] | ||
| 628 | + | ||
| 629 | + | ||
| 630 | + | ||
| 631 | +/* t132 = t100 + t101; | ||
| 632 | + t166 = MUL(t100 - t101, costab8); | ||
| 633 | + | ||
| 634 | + t136 = t106 + t107; | ||
| 635 | + t171 = MUL(t106 - t107, costab8); */ | ||
| 636 | + butterfly2 r6/*t132*/, r4/*t166*/, r7/*t136*/, r5/*t171*/, costab8, r10, r11, lr | ||
| 637 | + | ||
| 638 | +/* t133 = t102 + t103; | ||
| 639 | + t167 = MUL(t102 - t103, costab24); | ||
| 640 | + | ||
| 641 | + t137 = t108 + t109; | ||
| 642 | + t172 = MUL(t108 - t109, costab24);*/ | ||
| 643 | + butterfly2 r2/*t133*/, r0/*t167*/, r3/*t137*/, r1/*t172*/, costab24, r10, r11, lr | ||
| 644 | + | ||
| 645 | + | ||
| 646 | +/* t104 = t132 + t133; | ||
| 647 | + t182 = MUL(t132 - t133, costab16) ; | ||
| 648 | + | ||
| 649 | + t110 = t136 + t137; | ||
| 650 | + t183 = MUL(t136 - t137, costab16) ; | ||
| 651 | + | ||
| 652 | + t168 = t166 + t167; | ||
| 653 | + t189 = MUL(t166 - t167, costab16) ; | ||
| 654 | + | ||
| 655 | + t173 = t171 + t172; | ||
| 656 | + t208 = MUL(t171 - t172, costab16) ; */ | ||
| 657 | + butterfly4 r6/*t104*/, r2/*t182*/, r7/*t110*/, r3/*t183*/, r4/*t168*/, r0/*189*/, r5/*t173*/, r1/*t208*/, costab16, r10, r11, lr | ||
| 658 | + | ||
| 659 | +/* Store away the computed butterflies: | ||
| 660 | + sp[4-5] = t110, t104 | ||
| 661 | + sp[12-13] = t173, t168 | ||
| 662 | + sp[20-21] = t183, t182 | ||
| 663 | + sp[28-29] = t208, t189 */ | ||
| 664 | + st.d sp[4*4] , r6 | ||
| 665 | + st.d sp[12*4], r4 | ||
| 666 | + st.d sp[20*4], r2 | ||
| 667 | + st.d sp[28*4], r0 | ||
| 668 | + | ||
| 669 | +/* Now we have the following stack | ||
| 670 | + | ||
| 671 | + sp[0-7] = t67, t58 , t32, t191, t110, t104, t98, t93 | ||
| 672 | + sp[8-15] = t154, t150, t146, t143, t173, t168, t163, t159 | ||
| 673 | + sp[16-23] = t179, t178, t177, t192, t183, t182, t181, t180 | ||
| 674 | + sp[24-31] = t186, t185, t184, t190, t208, t189, t188, t187 | ||
| 675 | +*/ | ||
| 676 | + | ||
| 677 | + /* Get slot, lo and hi from stack */ | ||
| 678 | + lddsp lr, sp[32*4 + 4] /*slot*/ | ||
| 679 | + lddsp r12, sp[32*4 + 8] /*lo*/ | ||
| 680 | + lddsp r11, sp[32*4 + 12] /*hi*/ | ||
| 681 | + | ||
| 682 | + add r12, r12, lr << 2 | ||
| 683 | + add r11, r11, lr << 2 | ||
| 684 | + | ||
| 685 | + | ||
| 686 | +/* t49 = -(t67 * 2) + t32; | ||
| 687 | + hi[14][slot] = SHIFT(t32); | ||
| 688 | + t87 = -(t110 * 2) + t67; | ||
| 689 | + t138 = -(t173 * 2) + t110; | ||
| 690 | + t203 = -(t208 * 2) + t173; */ | ||
| 691 | + | ||
| 692 | + lddsp r0/*t67*/, sp[0] | ||
| 693 | + lddsp r1/*t32*/, sp[2*4] | ||
| 694 | + lddsp r2/*t110*/, sp[4*4] | ||
| 695 | + lddsp r3/*t173*/, sp[12*4] | ||
| 696 | + lddsp r5/*t208*/, sp[28*4] | ||
| 697 | + | ||
| 698 | + sub r4/*t49*/, r1, r0 << 1 | ||
| 699 | + scale r1 | ||
| 700 | + sub r0/*t87*/, r0, r2 << 1 | ||
| 701 | + st.w r11[14*SLOTS*4], r1 | ||
| 702 | + sub r2/*t138*/, r2, r3 << 1 | ||
| 703 | + sub r1/*t203*/, r3, r5 << 1 | ||
| 704 | + | ||
| 705 | +/* Live: r0 = t87, r1= t203, r2= t138, r4 = t49 | ||
| 706 | + Free: r3, r5, r6, r7, r8, r9, r10, lr */ | ||
| 707 | + | ||
| 708 | +/* t68 = (t98 * 2) + t49; | ||
| 709 | + hi[12][slot] = SHIFT(-t49); | ||
| 710 | + t130 = -(t163 * 2) + t98; | ||
| 711 | + t201 = -(t188 * 2) + t163; | ||
| 712 | + t200 = -(t186 * 2) + t154; | ||
| 713 | + t111 = (t154 * 2) + t87; | ||
| 714 | + t77 = -(-(t87 * 2) - t68); | ||
| 715 | + t88 = (t146 * 2) + t77; | ||
| 716 | + t199 = -(t184 * 2) + t146; | ||
| 717 | + hi[ 8][slot] = SHIFT(-t77); | ||
| 718 | + hi[10][slot] = SHIFT(t68);*/ | ||
| 719 | + lddsp r3/*t98*/, sp[6*4] | ||
| 720 | + lddsp r5/*t163*/, sp[14*4] | ||
| 721 | + lddsp r6/*t188*/, sp[30*4] | ||
| 722 | + lddsp r10/*t186*/, sp[24*4] | ||
| 723 | + | ||
| 724 | + add r7/*t68*/, r4, r3 << 1 | ||
| 725 | + neg r4 | ||
| 726 | + scale r4 | ||
| 727 | + lddsp r9/*t154*/, sp[8*4] | ||
| 728 | + sub r3/*t130*/, r3, r5 << 1 | ||
| 729 | + st.w r11[12*SLOTS*4], r4 | ||
| 730 | + sub r8/*t201*/, r5, r6 << 1 | ||
| 731 | + sub r4/*t200*/, r9, r10 << 1 | ||
| 732 | + lddsp lr/*t146*/, sp[10*4] | ||
| 733 | + lddsp r6/*t184*/, sp[26*4] | ||
| 734 | + add r10/*t111*/, r0, r9 << 1 | ||
| 735 | + add r5/*t77*/,r7, r0 << 1 | ||
| 736 | + add r0/*t88*/, r5, lr << 1 | ||
| 737 | + sub r6/*t199*/, lr, r6 << 1 | ||
| 738 | + neg r5 | ||
| 739 | + scale r5 | ||
| 740 | + scale r7 | ||
| 741 | + st.w r11[8*SLOTS*4], r5 | ||
| 742 | + st.w r11[10*SLOTS*4], r7 | ||
| 743 | + | ||
| 744 | +/* Live: r0 = t88, r1= t203, r2= t138, r3 = t130, r4 = t200, | ||
| 745 | + r6 = 199, r8 = t201, r10 = t111 | ||
| 746 | + Free: r5, r7, r9, lr */ | ||
| 747 | + | ||
| 748 | + | ||
| 749 | +/* | ||
| 750 | + t123 = -(-(t138 * 2) - t111); | ||
| 751 | + t174 = (t183 * 2) + t138; | ||
| 752 | + t99 = -(t111 * 2) + t88; | ||
| 753 | + hi[ 6][slot] = SHIFT(t88); */ | ||
| 754 | + lddsp r5/*t183*/, sp[20*4] | ||
| 755 | + | ||
| 756 | + add r7/*t123*/, r10, r2 << 1 | ||
| 757 | + sub r10/*t99*/, r0, r10 << 1 | ||
| 758 | + scale r0 | ||
| 759 | + add r2/*t174*/, r2, r5 << 1 | ||
| 760 | + st.w r11[6*SLOTS*4], r0 | ||
| 761 | + | ||
| 762 | +/* Live: r1 = t203, r2 = t174, r3 = t130, r4 = t200, | ||
| 763 | + r6 = t199, r7 = t123, r8 = t201, r10 = t99 | ||
| 764 | + Free: r0, r5, r9, lr */ | ||
| 765 | + | ||
| 766 | +/* t112 = -(t130 * 2) + t99; | ||
| 767 | + t164 = (t181 * 2) + t130; | ||
| 768 | + hi[ 4][slot] = SHIFT(-t99); */ | ||
| 769 | + lddsp r0/*t181*/, sp[22*4] | ||
| 770 | + | ||
| 771 | + sub r5/*t112*/, r10, r3 << 1 | ||
| 772 | + neg r10 | ||
| 773 | + scale r10 | ||
| 774 | + add r3/*164*/, r3, r0 << 1 | ||
| 775 | + st.w r11[4*SLOTS*4], r10 | ||
| 776 | + | ||
| 777 | +/* Live: r1 = t203, r2 = t174, r3 = t164, r4 = t200, | ||
| 778 | + r5 = t112, r6 = t199, r7 = t123, r8 = t201 | ||
| 779 | + Free: r0, r9, r10, lr */ | ||
| 780 | + | ||
| 781 | + | ||
| 782 | +/* t117 = -(-(t123 * 2) - t112); | ||
| 783 | + t139 = (t179 * 2) + t123; | ||
| 784 | + hi[ 2][slot] = SHIFT(t112); */ | ||
| 785 | + lddsp r0/*t179*/, sp[16*4] | ||
| 786 | + | ||
| 787 | + add r9/*t117*/, r5, r7 << 1 | ||
| 788 | + scale r5 | ||
| 789 | + add r7/*t139*/, r7, r0 << 1 | ||
| 790 | + st.w r11[2*SLOTS*4], r5 | ||
| 791 | + | ||
| 792 | +/* Live: r1 = t203, r2 = t174, r3 = t164, r4 = t200, | ||
| 793 | + r6 = t199, r7 = t139, r8 = t201, r9 = t117 | ||
| 794 | + Free: r0, r5, r10, lr */ | ||
| 795 | + | ||
| 796 | +/* t155 = -(t174 * 2) + t139; | ||
| 797 | + t204 = -(-(t203 * 2) - t174); | ||
| 798 | + t124 = (t177 * 2) + t117; | ||
| 799 | + hi[ 0][slot] = SHIFT(-t117); | ||
| 800 | + t131 = -(t139 * 2) + t124; | ||
| 801 | + lo[ 1][slot] = SHIFT(t124);*/ | ||
| 802 | + lddsp r0/*t177*/, sp[18*4] | ||
| 803 | + | ||
| 804 | + sub r5/*t155*/, r7, r2 << 1 | ||
| 805 | + add r2/*t204*/, r2, r1 << 1 | ||
| 806 | + add r0/*t124*/, r9, r0 << 1 | ||
| 807 | + neg r9 | ||
| 808 | + scale r9 | ||
| 809 | + sub r7/*t131*/, r0, r7 << 1 | ||
| 810 | + scale r0 | ||
| 811 | + st.w r11[0*SLOTS*4], r9 | ||
| 812 | + st.w r12[1*SLOTS*4], r0 | ||
| 813 | + | ||
| 814 | +/* Live: r2 = t204, r3 = t164, r4 = t200, | ||
| 815 | + r5 = t155, r6 = t199, r7 = t131, r8 = t201 | ||
| 816 | + Free: r0, r1, r9, r10, lr */ | ||
| 817 | + | ||
| 818 | +/* t140 = (t164 * 2) + t131; | ||
| 819 | + lo[ 3][slot] = SHIFT(-t131); | ||
| 820 | + t202 = -(-(t201 * 2) - t164); */ | ||
| 821 | + add r0/*t140*/, r7, r3 << 1 | ||
| 822 | + neg r7 | ||
| 823 | + scale r7 | ||
| 824 | + add r3/*t202*/, r3, r8 << 1 | ||
| 825 | + st.w r12[3*SLOTS*4], r7 | ||
| 826 | + | ||
| 827 | +/* Live: r0 = t140, r2 = t204, r3 = t202, r4 = t200, | ||
| 828 | + r5 = t155, r6 = t199 | ||
| 829 | + Free: r1, r7, r8, r9, r10, lr */ | ||
| 830 | + | ||
| 831 | + | ||
| 832 | +/* t147 = -(-(t155 * 2) - t140); | ||
| 833 | + lo[ 5][slot] = SHIFT(t140); | ||
| 834 | + t175 = -(t200 * 2) + t155; | ||
| 835 | + t156 = -(t199 * 2) + t147; | ||
| 836 | + lo[ 7][slot] = SHIFT(-t147); */ | ||
| 837 | + add r1/*t147*/, r0, r5 << 1 | ||
| 838 | + scale r0 | ||
| 839 | + sub r5/*t175*/, r5, r4 << 1 | ||
| 840 | + sub r4/*156*/, r1, r6 << 1 | ||
| 841 | + neg r1 | ||
| 842 | + scale r1 | ||
| 843 | + st.w r12[5*SLOTS*4], r0 | ||
| 844 | + st.w r12[7*SLOTS*4], r1 | ||
| 845 | + | ||
| 846 | +/* Live: r2 = t204, r3 = t202, | ||
| 847 | + r4 = t156, r5 = t175 | ||
| 848 | + Free: r0, r1, r6, r7, r8, r9, r10, lr */ | ||
| 849 | + | ||
| 850 | + | ||
| 851 | +/* t205 = -(-(t204 * 2) - t175); | ||
| 852 | + t165 = -(t175 * 2) + t156; | ||
| 853 | + lo[ 9][slot] = SHIFT(t156); | ||
| 854 | + t176 = -(t202 * 2) + t165; | ||
| 855 | + lo[11][slot] = SHIFT(-t165); | ||
| 856 | + t206 = -(-(t205 * 2) - t176); | ||
| 857 | + lo[15][slot] = SHIFT(-t206) | ||
| 858 | + lo[13][slot] = SHIFT(t176) */ | ||
| 859 | + add r0/*t205*/, r5, r2 << 1 | ||
| 860 | + sub r1/*t165*/, r4, r5 << 1 | ||
| 861 | + scale r4 | ||
| 862 | + sub r3/*t176*/, r1, r3 << 1 | ||
| 863 | + st.w r12[9*SLOTS*4], r4 | ||
| 864 | + neg r1 | ||
| 865 | + scale r1 | ||
| 866 | + add r6/*t206*/, r3, r0 << 1 | ||
| 867 | + neg r6 | ||
| 868 | + scale r6 | ||
| 869 | + scale r3 | ||
| 870 | + st.w r12[11*SLOTS*4], r1 | ||
| 871 | + st.w r12[15*SLOTS*4], r6 | ||
| 872 | + st.w r12[13*SLOTS*4], r3 | ||
| 873 | + | ||
| 874 | +/* t193 = -((t190 * 2) - t143) | ||
| 875 | + hi[ 7][slot] = SHIFT(t143); | ||
| 876 | + lo[ 8][slot] = SHIFT(-t193); | ||
| 877 | + t82 = -(t104 * 2) + t58; | ||
| 878 | + hi[13][slot] = SHIFT(t58); | ||
| 879 | + t134 = -(t168 * 2) + t104; | ||
| 880 | + t196 = -(t189 * 2) + t168; */ | ||
| 881 | + | ||
| 882 | + lddsp r0/*t190*/, sp[27*4] | ||
| 883 | + lddsp r1/*t143*/, sp[11*4] | ||
| 884 | + lddsp r2/*t104*/, sp[5*4] | ||
| 885 | + lddsp r3/*t58*/, sp[1*4] | ||
| 886 | + lddsp r4/*t168*/, sp[13*4] | ||
| 887 | + lddsp r5/*t189*/, sp[29*4] | ||
| 888 | + sub r0/*t193*/, r1, r0 << 1 | ||
| 889 | + neg r0 | ||
| 890 | + scale r1 | ||
| 891 | + scale r0 | ||
| 892 | + st.w r11[7*SLOTS*4], r1 | ||
| 893 | + st.w r12[8*SLOTS*4], r0 | ||
| 894 | + sub r0/*t82*/, r3, r2 << 1 | ||
| 895 | + scale r3 | ||
| 896 | + sub r2/*t134*/, r2, r4 << 1 | ||
| 897 | + sub r4/*t196*/, r4, r5 << 1 | ||
| 898 | + st.w r11[13*SLOTS*4], r3 | ||
| 899 | + | ||
| 900 | +/* Live: r0 = t82, r2 = t134, | ||
| 901 | + r4 = t196 | ||
| 902 | + Free: r1, r3, r5, r6, r7, r8, r9, r10, lr */ | ||
| 903 | + | ||
| 904 | + | ||
| 905 | + | ||
| 906 | +/* | ||
| 907 | + | ||
| 908 | + t207 = -(t185 * 2) + t150; | ||
| 909 | + t105 = (t150 * 2) + t82; | ||
| 910 | + hi[ 9][slot] = SHIFT(-t82); | ||
| 911 | + t120 = -(-(t134 * 2) - t105); | ||
| 912 | + hi[ 5][slot] = SHIFT(t105); | ||
| 913 | + t169 = (t182 * 2) + t134; | ||
| 914 | + | ||
| 915 | + t135 = (t178 * 2) + t120; | ||
| 916 | + hi[ 1][slot] = SHIFT(-t120); | ||
| 917 | + t197 = -(-(t196 * 2) - t169); | ||
| 918 | + t151 = -(t169 * 2) + t135; | ||
| 919 | + lo[ 2][slot] = SHIFT(t135); */ | ||
| 920 | + lddsp r1/*t185*/, sp[25*4] | ||
| 921 | + lddsp r3/*t150*/, sp[9*4] | ||
| 922 | + lddsp r5/*t182*/, sp[21*4] | ||
| 923 | + lddsp r8/*t178*/, sp[17*4] | ||
| 924 | + | ||
| 925 | + sub r6/*t207*/, r3, r1 << 1 | ||
| 926 | + add r3/*t105*/, r0, r3 << 1 | ||
| 927 | + neg r0 | ||
| 928 | + scale r0 | ||
| 929 | + add r7/*t120*/, r3, r2 << 1 | ||
| 930 | + scale r3 | ||
| 931 | + st.w r11[9*SLOTS*4], r0 | ||
| 932 | + st.w r11[5*SLOTS*4], r3 | ||
| 933 | + add r2/*t169*/, r2, r5 << 1 | ||
| 934 | + add r8/*t135*/, r7, r8 << 1 | ||
| 935 | + neg r7 | ||
| 936 | + scale r7 | ||
| 937 | + add r4/*t197*/, r2, r4 << 1 | ||
| 938 | + sub r2/*t151*/, r8, r2 << 1 | ||
| 939 | + scale r8 | ||
| 940 | + st.w r11[1*SLOTS*4], r7 | ||
| 941 | + st.w r12[2*SLOTS*4], r8 | ||
| 942 | + | ||
| 943 | +/* Live: r2 = t151, r4 = t197, r6 = t207 | ||
| 944 | + | ||
| 945 | + Free: r0, r1, r3, r5, r7, r8, r9, r10, lr */ | ||
| 946 | + | ||
| 947 | + | ||
| 948 | + | ||
| 949 | +/* t170 = -(t207 * 2) + t151; | ||
| 950 | + lo[ 6][slot] = SHIFT(-t151); | ||
| 951 | + | ||
| 952 | + t198 = -(-(t197 * 2) - t170); | ||
| 953 | + lo[10][slot] = SHIFT(t170); | ||
| 954 | + lo[14][slot] = SHIFT(-t198); | ||
| 955 | + | ||
| 956 | + t127 = -(t159 * 2) + t93; | ||
| 957 | + hi[11][slot] = SHIFT(t93); | ||
| 958 | + t194 = -(t187 * 2) + t159; */ | ||
| 959 | + lddsp r0/*t159*/, sp[15*4] | ||
| 960 | + lddsp r1/*t93*/, sp[7*4] | ||
| 961 | + lddsp r3/*t187*/, sp[31*4] | ||
| 962 | + sub r5/*t170*/, r2, r6 << 1 | ||
| 963 | + neg r2 | ||
| 964 | + scale r2 | ||
| 965 | + add r4/*t198*/,r5, r4 << 1 | ||
| 966 | + neg r4 | ||
| 967 | + scale r5 | ||
| 968 | + scale r4 | ||
| 969 | + st.w r12[6*SLOTS*4], r2 | ||
| 970 | + st.w r12[10*SLOTS*4], r5 | ||
| 971 | + st.w r12[14*SLOTS*4], r4 | ||
| 972 | + sub r7/*t127*/, r1, r0 << 1 | ||
| 973 | + scale r1 | ||
| 974 | + sub r0/*t194*/, r0, r3 << 1 | ||
| 975 | + st.w r11[11*SLOTS*4], r1 | ||
| 976 | + | ||
| 977 | + | ||
| 978 | +/* Live: r0 = t194, r7 = t127 | ||
| 979 | + Free: r1, r2, r3, r4, r6, r5, r8, r9, r10, lr */ | ||
| 980 | + | ||
| 981 | +/* t160 = (t180 * 2) + t127; | ||
| 982 | + hi[ 3][slot] = SHIFT(-t127); | ||
| 983 | + t195 = -(-(t194 * 2) - t160); | ||
| 984 | + lo[ 4][slot] = SHIFT(t160); | ||
| 985 | + lo[12][slot] = SHIFT(-t195); | ||
| 986 | + | ||
| 987 | + hi[15][slot] = SHIFT(t191); | ||
| 988 | + lo[ 0][slot] = SHIFT(t192); */ | ||
| 989 | + lddsp r1/*t180*/, sp[23*4] | ||
| 990 | + lddsp r2/*t191*/, sp[3*4] | ||
| 991 | + lddsp r3/*t192*/, sp[19*4] | ||
| 992 | + add r4/*t160*/, r7, r1 << 1 | ||
| 993 | + neg r7 | ||
| 994 | + scale r7 | ||
| 995 | + add r6/*t195*/, r4, r0 << 1 | ||
| 996 | + scale r4 | ||
| 997 | + neg r6 | ||
| 998 | + scale r6 | ||
| 999 | + st.w r11[3*SLOTS*4], r7 | ||
| 1000 | + st.w r12[4*SLOTS*4], r4 | ||
| 1001 | + st.w r12[12*SLOTS*4], r6 | ||
| 1002 | + scale r2 | ||
| 1003 | + scale r3 | ||
| 1004 | + st.w r11[15*SLOTS*4], r2 | ||
| 1005 | + st.w r12[0*SLOTS*4], r3 | ||
| 1006 | + | ||
| 1007 | + sub sp, -32*4 | ||
| 1008 | + ldm sp++,r0-r7, r9-r11, pc | ||
| 1009 | diff --git a/fixed.h b/fixed.h | ||
| 1010 | index 4b58abf..0a1350a 100644 | ||
| 1011 | --- a/fixed.h | ||
| 1012 | +++ b/fixed.h | ||
| 1013 | @@ -237,6 +237,46 @@ mad_fixed_t mad_f_mul_inline(mad_fixed_t x, mad_fixed_t y) | ||
| 1014 | # define MAD_F_SCALEBITS MAD_F_FRACBITS | ||
| 1015 | # endif | ||
| 1016 | |||
| 1017 | +/* --- AVR32 ----------------------------------------------------------------- */ | ||
| 1018 | + | ||
| 1019 | +# elif defined(FPM_AVR32) | ||
| 1020 | + | ||
| 1021 | +typedef signed short mad_coeff_t; | ||
| 1022 | + | ||
| 1023 | +struct DWstruct {int high, low;}; | ||
| 1024 | + | ||
| 1025 | +typedef union { | ||
| 1026 | + struct DWstruct s; | ||
| 1027 | + long long ll; | ||
| 1028 | +} DWunion; | ||
| 1029 | + | ||
| 1030 | +# define MAD_F_MLX(hi, lo, x, y) \ | ||
| 1031 | + { register DWunion __res; \ | ||
| 1032 | + __res.ll = (long long)x * (long long)y; \ | ||
| 1033 | + /* asm ("muls.d\t%0, %1, %2" : "=r" (__res.ll) : "r" (x), "r" (y));*/ \ | ||
| 1034 | + hi = __res.s.high; \ | ||
| 1035 | + lo = __res.s.low; } | ||
| 1036 | + | ||
| 1037 | +# define MAD_F_MLA(hi, lo, x, y) \ | ||
| 1038 | + { register DWunion __res; \ | ||
| 1039 | + __res.s.high = hi; \ | ||
| 1040 | + __res.s.low = lo; \ | ||
| 1041 | + __res.ll += (long long)x * (long long)y; \ | ||
| 1042 | +/* asm ("macs.d\t%0, %1, %2" : "+r" (__res.ll) : "r" (x), "r" (y));*/ \ | ||
| 1043 | + hi = __res.s.high; \ | ||
| 1044 | + lo = __res.s.low; } | ||
| 1045 | + | ||
| 1046 | + | ||
| 1047 | +# define MAD_F_MLN(hi, lo) \ | ||
| 1048 | + asm ("neg %0\n" \ | ||
| 1049 | + "acr %1\n" \ | ||
| 1050 | + "neg %1" \ | ||
| 1051 | + : "+r" (lo), "+r" (hi) \ | ||
| 1052 | + :: "cc") | ||
| 1053 | + | ||
| 1054 | + | ||
| 1055 | +# define MAD_F_SCALEBITS MAD_F_FRACBITS | ||
| 1056 | + | ||
| 1057 | /* --- ARM ----------------------------------------------------------------- */ | ||
| 1058 | |||
| 1059 | # elif defined(FPM_ARM) | ||
| 1060 | @@ -433,6 +473,8 @@ mad_fixed_t mad_f_mul_inline(mad_fixed_t x, mad_fixed_t y) | ||
| 1061 | * | ||
| 1062 | * Pre-rounding is required to stay within the limits of compliance. | ||
| 1063 | */ | ||
| 1064 | +typedef signed int mad_coeff_t; | ||
| 1065 | + | ||
| 1066 | # if defined(OPT_SPEED) | ||
| 1067 | # define mad_f_mul(x, y) (((x) >> 12) * ((y) >> 16)) | ||
| 1068 | # else | ||
| 1069 | diff --git a/imdct_avr32.S b/imdct_avr32.S | ||
| 1070 | new file mode 100644 | ||
| 1071 | index 0000000..d0ee6b4 | ||
| 1072 | --- /dev/null | ||
| 1073 | +++ b/imdct_avr32.S | ||
| 1074 | @@ -0,0 +1,789 @@ | ||
| 1075 | +/* | ||
| 1076 | + Optimized 36-point Inverse Modified Cosine Transform (IMDCT) | ||
| 1077 | + Copyright 2003-2006 Atmel Corporation. | ||
| 1078 | + | ||
| 1079 | + Written by Ronny Pedersen, Atmel Norway | ||
| 1080 | + | ||
| 1081 | + This program is free software; you can redistribute it and/or modify | ||
| 1082 | + it under the terms of the GNU General Public License as published by | ||
| 1083 | + the Free Software Foundation; either version 2 of the License, or | ||
| 1084 | + (at your option) any later version. | ||
| 1085 | + | ||
| 1086 | + This program is distributed in the hope that it will be useful, | ||
| 1087 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 1088 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 1089 | + GNU General Public License for more details. | ||
| 1090 | + | ||
| 1091 | + You should have received a copy of the GNU General Public License | ||
| 1092 | + along with this program; if not, write to the Free Software | ||
| 1093 | + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ | ||
| 1094 | + | ||
| 1095 | +#define MAD_F(x) ((x + (1 << 13)) >> 14) | ||
| 1096 | + | ||
| 1097 | + .public imdct36_avr32 | ||
| 1098 | + | ||
| 1099 | +/* | ||
| 1100 | + void imdct36(mad_fixed_t const x[18], mad_fixed_t y[36]) | ||
| 1101 | + { | ||
| 1102 | + mad_fixed_t tmp[18]; | ||
| 1103 | + int i; | ||
| 1104 | +*/ | ||
| 1105 | +/* DCT-IV */ | ||
| 1106 | +imdct36_avr32: | ||
| 1107 | + pushm r0-r7,r11,lr | ||
| 1108 | + sub sp, 4*18 | ||
| 1109 | +/* | ||
| 1110 | + { | ||
| 1111 | + mad_fixed_t tmp2[18]; | ||
| 1112 | + int i; | ||
| 1113 | + | ||
| 1114 | + /* scale[i] = 2 * cos(PI * (2 * i + 1) / (4 * 18)) */ | ||
| 1115 | +/* | ||
| 1116 | + static mad_fixed_t const scale[18] = { | ||
| 1117 | + MAD_F(0x1ff833fa), MAD_F(0x1fb9ea93), MAD_F(0x1f3dd120), | ||
| 1118 | + MAD_F(0x1e84d969), MAD_F(0x1d906bcf), MAD_F(0x1c62648b), | ||
| 1119 | + MAD_F(0x1afd100f), MAD_F(0x1963268b), MAD_F(0x1797c6a4), | ||
| 1120 | + MAD_F(0x159e6f5b), MAD_F(0x137af940), MAD_F(0x11318ef3), | ||
| 1121 | + MAD_F(0x0ec6a507), MAD_F(0x0c3ef153), MAD_F(0x099f61c5), | ||
| 1122 | + MAD_F(0x06ed12c5), MAD_F(0x042d4544), MAD_F(0x0165547c) | ||
| 1123 | + }; | ||
| 1124 | +*/ | ||
| 1125 | + | ||
| 1126 | + /* scaling */ | ||
| 1127 | + | ||
| 1128 | +/* | ||
| 1129 | + for (i = 0; i < 18; i += 3) { | ||
| 1130 | + tmp2[i + 0] = mad_f_mul(x[i + 0], scale[i + 0]); | ||
| 1131 | + tmp2[i + 1] = mad_f_mul(x[i + 1], scale[i + 1]); | ||
| 1132 | + tmp2[i + 2] = mad_f_mul(x[i + 2], scale[i + 2]); | ||
| 1133 | + } | ||
| 1134 | +*/ | ||
| 1135 | + /* even input butterfly */ | ||
| 1136 | + | ||
| 1137 | +/* | ||
| 1138 | + for (i = 0; i < 9; i += 3) { | ||
| 1139 | + tmp3[i + 0] = tmp2[i + 0] + tmp2[18 - (i + 0) - 1]; | ||
| 1140 | + tmp3[i + 1] = tmp2[i + 1] + tmp2[18 - (i + 1) - 1]; | ||
| 1141 | + tmp3[i + 2] = tmp2[i + 2] + tmp2[18 - (i + 2) - 1]; | ||
| 1142 | + } | ||
| 1143 | + for (i = 0; i < 9; i += 3) { | ||
| 1144 | + tmp4[i + 0] = tmp2[i + 0] - tmp2[18 - (i + 0) - 1]; | ||
| 1145 | + tmp4[i + 1] = tmp2[i + 1] - tmp2[18 - (i + 1) - 1]; | ||
| 1146 | + tmp4[i + 2] = tmp2[i + 2] - tmp2[18 - (i + 2) - 1]; | ||
| 1147 | + } | ||
| 1148 | +*/ | ||
| 1149 | + | ||
| 1150 | + ld.d r8, r12[0] /*r8 = x[1], r9 = x[0]*/ | ||
| 1151 | + ld.d r0, pc[scale_dctIV - .] /*r0 = {scale[2], scale[3]}, r1 = { scale[0], scale[1] }*/ | ||
| 1152 | + ld.d r2, r12[2*4] /*r2 = x[3], r3 = x[2]*/ | ||
| 1153 | + ld.d r4, pc[scale_dctIV - . + 14*2] /*r4 = {scale[16], scale[17]}, r5 = { scale[14], scale[15] }*/ | ||
| 1154 | + mulsatrndwh.w r9/*tmp2[0]*/, r9, r1:t /*tmp2[0] = mad_f_mul(x[0], scale[0]) */ | ||
| 1155 | + ld.d r6, r12[16*4] /*r6 = x[17], r7 = x[16]*/ | ||
| 1156 | + mulsatrndwh.w r8/*tmp2[1]*/, r8, r1:b /*tmp2[1] = mad_f_mul(x[1], scale[1]) */ | ||
| 1157 | + mulsatrndwh.w r3/*tmp2[2]*/, r3, r0:t /*tmp2[2] = mad_f_mul(x[2], scale[2]) */ | ||
| 1158 | + mulsatrndwh.w r2/*tmp2[3]*/, r2, r0:b /*tmp2[3] = mad_f_mul(x[3], scale[3]) */ | ||
| 1159 | + ld.d r0, r12[14*4] /*r0 = x[15], r1 = x[14]*/ | ||
| 1160 | + mulsatrndwh.w r7/*tmp2[16]*/, r7, r4:t /*tmp2[16] = mad_f_mul(x[16], scale[16]) */ | ||
| 1161 | + mulsatrndwh.w r6/*tmp2[17]*/, r6, r4:b /*tmp2[17] = mad_f_mul(x[17], scale[17]) */ | ||
| 1162 | + mulsatrndwh.w r1/*tmp2[14]*/, r1, r5:t /*tmp2[14] = mad_f_mul(x[14], scale[14]) */ | ||
| 1163 | + mulsatrndwh.w r0/*tmp2[15]*/, r0, r5:b /*tmp2[15] = mad_f_mul(x[15], scale[15]) */ | ||
| 1164 | + | ||
| 1165 | + ld.d r4, r12[4*4] /*r4 = x[5], r5 = x[4]*/ | ||
| 1166 | + | ||
| 1167 | + sub lr/*tmp4[0]*/, r9, r6 | ||
| 1168 | + add r6/*tmp3[0]*/, r9, r6 | ||
| 1169 | + sub r10/*tmp4[1]*/, r8, r7 | ||
| 1170 | + add r7/*tmp3[1]*/, r8, r7 | ||
| 1171 | + sub r9/*tmp4[2]*/, r3, r0 | ||
| 1172 | + add r0/*tmp3[2]*/, r3, r0 | ||
| 1173 | + sub r8/*tmp4[3]*/, r2, r1 | ||
| 1174 | + add r1/*tmp3[3]*/, r2, r1 | ||
| 1175 | + | ||
| 1176 | + ld.d r2, pc[scale_dctIV - . + 4*2] /*r2 = {scale[6], scale[7]}, r3 = { scale[4], scale[5] }*/ | ||
| 1177 | + | ||
| 1178 | + stm --sp, r8-r10, lr /*sp[0] = tmp4[0],sp[1] = tmp4[1], | ||
| 1179 | + sp[2] = tmp4[2],sp[3] = tmp4[3] */ | ||
| 1180 | + | ||
| 1181 | + /* Registers used: r0 = tmp3[2], r1 = tmp3[3], r6 = tmp3[0], r7 = tmp3[1], r12 = x | ||
| 1182 | + Free registers: r2-r5, r8-r11, lr | ||
| 1183 | + */ | ||
| 1184 | + ld.d r8, r12[6*4] /*r8 = x[7], r9 = x[6]*/ | ||
| 1185 | + ld.d r10, pc[scale_dctIV - . + 10*2] /*r10 = {scale[12], scale[13]}, r11 = { scale[10], scale[11] }*/ | ||
| 1186 | + mulsatrndwh.w r5/*tmp2[4]*/, r5, r3:t /*tmp2[4] = mad_f_mul(x[4], scale[4]) */ | ||
| 1187 | + mulsatrndwh.w r4/*tmp2[5]*/, r4, r3:b /*tmp2[5] = mad_f_mul(x[5], scale[5]) */ | ||
| 1188 | + mulsatrndwh.w r9/*tmp2[6]*/, r9, r2:t /*tmp2[6] = mad_f_mul(x[6], scale[6]) */ | ||
| 1189 | + mulsatrndwh.w r8/*tmp2[7]*/, r8, r2:b /*tmp2[7] = mad_f_mul(x[7], scale[7]) */ | ||
| 1190 | + | ||
| 1191 | + ld.d r2, r12[12*4] /*r2 = x[13], r3 = x[12]*/ | ||
| 1192 | + ld.w lr, r12[11*4] /*lr = x[11] */ | ||
| 1193 | + mulsatrndwh.w r3/*tmp2[12]*/, r3, r10:t /*tmp2[12] = mad_f_mul(x[12], scale[12]) */ | ||
| 1194 | + mulsatrndwh.w r2/*tmp2[13]*/, r2, r10:b /*tmp2[13] = mad_f_mul(x[13], scale[13]) */ | ||
| 1195 | + ld.w r10, r12[10*4] /*r10 = x[10] */ | ||
| 1196 | + mulsatrndwh.w lr/*tmp2[11]*/, lr, r11:b /*tmp2[11] = mad_f_mul(x[11], scale[11]) */ | ||
| 1197 | + mulsatrndwh.w r10/*tmp2[10]*/, r10, r11:t /*tmp2[10] = mad_f_mul(x[10], scale[10]) */ | ||
| 1198 | + | ||
| 1199 | + sub r11/*tmp4[4]*/, r5, r2 | ||
| 1200 | + add r2/*tmp3[4]*/, r5, r2 | ||
| 1201 | + sub r5/*tmp4[5]*/, r4, r3 | ||
| 1202 | + add r3/*tmp3[5]*/, r4, r3 | ||
| 1203 | + sub r4/*tmp4[6]*/, r9, lr | ||
| 1204 | + add lr/*tmp3[6]*/, r9, lr | ||
| 1205 | + sub r9/*tmp4[7]*/, r8, r10 | ||
| 1206 | + add r10/*tmp3[7]*/, r8, r10 | ||
| 1207 | + lddpc r8, scale_dctIV + 8*2 /*r8 = {scale[8], scale[9]} */ | ||
| 1208 | + | ||
| 1209 | + stm --sp, r4, r5, r9, r11 /*sp[0] = tmp4[4],sp[1] = tmp4[7], | ||
| 1210 | + sp[2] = tmp4[5],sp[3] = tmp4[6] */ | ||
| 1211 | + ld.d r4, r12[8*4] /*r4 = x[9], r5 = x[8]*/ | ||
| 1212 | + mulsatrndwh.w r5/*tmp2[8]*/, r5, r8:t /*tmp2[8] = mad_f_mul(x[8], scale[8]) */ | ||
| 1213 | + mulsatrndwh.w r4/*tmp2[9]*/, r4, r8:b /*tmp2[9] = mad_f_mul(x[9], scale[9]) */ | ||
| 1214 | + sub r9/*tmp4[8]*/, r5, r4 | ||
| 1215 | + add r5/*tmp3[8]*/, r5, r4 | ||
| 1216 | + | ||
| 1217 | + st.w --sp, r9 /* sp[0] = tmp4[8] */ | ||
| 1218 | + | ||
| 1219 | + /* Registers used: | ||
| 1220 | + | ||
| 1221 | + r0=tmp3[2], r1=tmp3[3], r2=tmp3[4], r3=tmp3[5], r5=tmp3[8], r6 = tmp3[0], | ||
| 1222 | + r7 = tmp3[1], r10=tmp3[7], lr=tmp3[6] | ||
| 1223 | + Free registers: | ||
| 1224 | + r4, r8, r9, r11, r12 | ||
| 1225 | + */ | ||
| 1226 | + | ||
| 1227 | + | ||
| 1228 | + /* SDCT-II */ | ||
| 1229 | +/* | ||
| 1230 | + | ||
| 1231 | + { | ||
| 1232 | + mad_fixed_t tmp3[9]; | ||
| 1233 | + int i; | ||
| 1234 | +*/ | ||
| 1235 | + /* scale[i] = 2 * cos(PI * (2 * i + 1) / (2 * 18)) */ | ||
| 1236 | +/* | ||
| 1237 | + static mad_fixed_t const scale[9] = { | ||
| 1238 | + MAD_F(0x1fe0d3b4), MAD_F(0x1ee8dd47), MAD_F(0x1d007930), | ||
| 1239 | + MAD_F(0x1a367e59), MAD_F(0x16a09e66), MAD_F(0x125abcf8), | ||
| 1240 | + MAD_F(0x0d8616bc), MAD_F(0x08483ee1), MAD_F(0x02c9fad7) | ||
| 1241 | + }; | ||
| 1242 | +*/ | ||
| 1243 | + /* divide the 18-point SDCT-II into two 9-point SDCT-IIs */ | ||
| 1244 | + | ||
| 1245 | + | ||
| 1246 | + /* fastdct */ | ||
| 1247 | + | ||
| 1248 | +/* | ||
| 1249 | + { | ||
| 1250 | + mad_fixed_t a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12; | ||
| 1251 | + mad_fixed_t a13, a14, a15, a16, a17, a18, a19, a20, a21, a22, a23, a24, a25; | ||
| 1252 | + mad_fixed_t m0, m1, m2, m3, m4, m5, m6, m7; | ||
| 1253 | +*/ | ||
| 1254 | +// enum { | ||
| 1255 | +// c0 = MAD_F(0x1f838b8d), /* 2 * cos( 1 * PI / 18) */ | ||
| 1256 | +// c1 = MAD_F(0x1bb67ae8), /* 2 * cos( 3 * PI / 18) */ | ||
| 1257 | +// c2 = MAD_F(0x18836fa3), /* 2 * cos( 4 * PI / 18) */ | ||
| 1258 | +// c3 = MAD_F(0x1491b752), /* 2 * cos( 5 * PI / 18) */ | ||
| 1259 | +// c4 = MAD_F(0x0af1d43a), /* 2 * cos( 7 * PI / 18) */ | ||
| 1260 | +// c5 = MAD_F(0x058e86a0), /* 2 * cos( 8 * PI / 18) */ | ||
| 1261 | +// c6 = -MAD_F(0x1e11f642) /* 2 * cos(16 * PI / 18) */ | ||
| 1262 | +// }; | ||
| 1263 | + | ||
| 1264 | +/* | ||
| 1265 | + a2 = tmp3[6] + tmp3[2]; | ||
| 1266 | + a6 = tmp3[8] + tmp3[0]; | ||
| 1267 | + a11 = a2 - a6; | ||
| 1268 | + m5 = mad_f_mul(a11, -c6) ; | ||
| 1269 | + a4 = tmp3[1] + tmp3[7]; | ||
| 1270 | + | ||
| 1271 | + a18 = tmp3[4] + a4; | ||
| 1272 | + a19 = -2 * tmp3[4] + a4; | ||
| 1273 | + | ||
| 1274 | + a0 = tmp3[3] + tmp3[5]; | ||
| 1275 | + | ||
| 1276 | +*/ | ||
| 1277 | + add r11/*a4*/, r7, r10 | ||
| 1278 | + add r12/*a18*/, r2, r11 | ||
| 1279 | + sub r11/*a19*/, r11, r2<<1 | ||
| 1280 | + | ||
| 1281 | + add r4/*a2*/, lr, r0 | ||
| 1282 | + add r8/*a6*/, r5, r6 | ||
| 1283 | + sub r9/*a11*/, r4, r8 | ||
| 1284 | + | ||
| 1285 | + st.d --sp, r0 /* sp[0] = tmp3[3], sp1[1] = tmp3[2]*/ | ||
| 1286 | + | ||
| 1287 | + mov r2, MAD_F(0x1e11f642) | ||
| 1288 | + mulsatrndwh.w r9/*m5*/, r9, r2:b | ||
| 1289 | + | ||
| 1290 | + add r2/*a0*/, r1, r3 | ||
| 1291 | + | ||
| 1292 | + /* Registers used: | ||
| 1293 | + | ||
| 1294 | + r2=a0, r3=tmp3[5], r4=a2, r5=tmp3[8], r6 = tmp3[0], | ||
| 1295 | + r7 = tmp3[1], r8=a6, r10=tmp3[7], r9=m5, r11=a19, r12=a18,lr=tmp3[6] | ||
| 1296 | + Free registers: | ||
| 1297 | + r0, r1 | ||
| 1298 | + */ | ||
| 1299 | + | ||
| 1300 | +/* | ||
| 1301 | + a8 = a0 + a2; | ||
| 1302 | + a12 = a8 + a6; | ||
| 1303 | + a10 = a0 - a6; | ||
| 1304 | + a9 = a0 - a2; | ||
| 1305 | + m7 = mad_f_mul(a9, -c2) ; | ||
| 1306 | + m6 = mad_f_mul(a10, -c5) ; | ||
| 1307 | +*/ | ||
| 1308 | + | ||
| 1309 | + add r0/*a8*/, r2, r4 | ||
| 1310 | + add r0/*a12*/, r8 | ||
| 1311 | + rsub r8/*a10*/, r2 | ||
| 1312 | + sub r2/*a9*/, r4 | ||
| 1313 | + mov r1, -MAD_F(0x18836fa3) | ||
| 1314 | + mulsatrndwh.w r2/*m7*/, r2, r1:b | ||
| 1315 | + mov r1, -MAD_F(0x058e86a0) | ||
| 1316 | + mulsatrndwh.w r8/*m6*/, r8, r1:b | ||
| 1317 | + | ||
| 1318 | + /* Registers used: | ||
| 1319 | + | ||
| 1320 | + r0=a12, r2=m7, r3=tmp3[5], r5=tmp3[8], r6 = tmp3[0], | ||
| 1321 | + r7 = tmp3[1], r8=m6, r10=tmp3[7], r9=m5, r11=a19, r12=a18,lr=tmp3[6] | ||
| 1322 | + Free registers: | ||
| 1323 | + r1, r4 | ||
| 1324 | + */ | ||
| 1325 | + | ||
| 1326 | + | ||
| 1327 | +/* | ||
| 1328 | + a21 = -a19 - (m5 << 1); | ||
| 1329 | + tmp[ 8] = a21 - (m6 << 1); | ||
| 1330 | + | ||
| 1331 | + a20 = a19 - (m5 << 1); | ||
| 1332 | + tmp[ 4] = (m7 << 1) + a20; | ||
| 1333 | + a22 = -a19 + (m6 << 1); | ||
| 1334 | + tmp[16] = a22 + (m7 << 1); | ||
| 1335 | + tmp[ 0] = a18 + a12; | ||
| 1336 | + tmp[12] = a12 - 2 * a18; | ||
| 1337 | +*/ | ||
| 1338 | + add r1/*a21*/, r11, r9 << 1 | ||
| 1339 | + neg r1 | ||
| 1340 | + sub r1/*tmp[8]*/, r1, r8 << 1 | ||
| 1341 | + stdsp sp[4*11/*tmp3[..] on the stack*/ + 8*4], r1 | ||
| 1342 | + sub r4/*a20*/, r11, r9 << 1 | ||
| 1343 | + add r4/*tmp[4]*/, r4, r2 << 1 | ||
| 1344 | + stdsp sp[4*11/*tmp3[..] on the stack*/ + 4*4], r4 | ||
| 1345 | + neg r11 | ||
| 1346 | + add r1/*a22*/, r11, r8 << 1 | ||
| 1347 | + add r1/*tmp[16]*/, r1, r2 << 1 | ||
| 1348 | + stdsp sp[4*11/*tmp3[..] on the stack*/ + 16*4], r1 | ||
| 1349 | + add r4, r12, r0 | ||
| 1350 | + sub r1, r0, r12 << 1 | ||
| 1351 | + stdsp sp[4*11/*tmp3[..] on the stack*/ + 0*4], r4 | ||
| 1352 | + stdsp sp[4*11/*tmp3[..] on the stack*/ + 12*4], r1 | ||
| 1353 | + | ||
| 1354 | + ld.d r0, sp++ | ||
| 1355 | + | ||
| 1356 | + /* Registers used: | ||
| 1357 | + | ||
| 1358 | + r0 = tmp3[2], r1 = tmp3[3], r3=tmp3[5], r5=tmp3[8], r6 = tmp3[0], | ||
| 1359 | + r7 = tmp3[1], r10=tmp3[7], r11=a19, lr=tmp3[6] | ||
| 1360 | + Free registers: | ||
| 1361 | + r2,r4,r8,r9,r12 | ||
| 1362 | + */ | ||
| 1363 | + | ||
| 1364 | +/* | ||
| 1365 | + a5 = tmp3[1] - tmp3[7]; | ||
| 1366 | + a7 = tmp3[8] - tmp3[0]; | ||
| 1367 | + a3 = tmp3[6] - tmp3[2]; | ||
| 1368 | + a1 = tmp3[3] - tmp3[5]; | ||
| 1369 | + a13 = a1 - a3; | ||
| 1370 | + a14 = a13 + a7; | ||
| 1371 | + m3 = mad_f_mul(a14, -c1) ; | ||
| 1372 | + m4 = mad_f_mul(a5, -c1) ; | ||
| 1373 | + tmp[ 6] = m3 << 1; | ||
| 1374 | +*/ | ||
| 1375 | + sub r7/*a5*/, r10 | ||
| 1376 | + sub r2/*a7*/, r5, r6 | ||
| 1377 | + sub r4/*a3*/, lr, r0 | ||
| 1378 | + sub r8/*a1*/, r1, r3 | ||
| 1379 | + sub r9/*a13*/, r8, r4 | ||
| 1380 | + add r12/*a14*/, r9, r2 | ||
| 1381 | + mov r0, -MAD_F(0x1bb67ae8) | ||
| 1382 | + mulsatrndwh.w r12/*m3*/, r12, r0:b | ||
| 1383 | + mulsatrndwh.w r7/*m4*/, r7, r0:b | ||
| 1384 | + lsl r12, 1 | ||
| 1385 | + stdsp sp[4*9/*tmp3[..] on the stack*/ + 6*4], r12 | ||
| 1386 | + | ||
| 1387 | + /* Registers used: | ||
| 1388 | + r2 = a7, r4 = a3, r7 = m4, r8 = a1, r12 = m3 | ||
| 1389 | + | ||
| 1390 | + Free registers: | ||
| 1391 | + r0, r1, r3, r5, r6, r10, r9, r11, lr | ||
| 1392 | + */ | ||
| 1393 | + | ||
| 1394 | + | ||
| 1395 | +/* | ||
| 1396 | + a15 = a3 + a7; | ||
| 1397 | + m2 = mad_f_mul(a15, -c4) ; | ||
| 1398 | + a17 = a1 + a3; | ||
| 1399 | + m0 = mad_f_mul(a17, -c3) ; | ||
| 1400 | + a23 = (m4 << 1) + (m2 << 1); | ||
| 1401 | + tmp[14] = a23 + (m0 << 1); */ | ||
| 1402 | + add r0/*a15*/, r4, r2 | ||
| 1403 | + mov r1, -MAD_F(0x0af1d43a) | ||
| 1404 | + mulsatrndwh.w r0/*m2*/, r0, r1:b | ||
| 1405 | + mov r3, -MAD_F(0x1491b752) | ||
| 1406 | + add r5/*a17*/, r8, r4 | ||
| 1407 | + mulsatrndwh.w r5/*m0*/, r5, r3:b | ||
| 1408 | + lsl r7, 1 | ||
| 1409 | + add r6/*a23*/, r7, r0 << 1 | ||
| 1410 | + add r6/*tmp[14]*/, r6, r5 << 1 | ||
| 1411 | + stdsp sp[4*9/*tmp3[..] on the stack*/ + 14*4], r6 | ||
| 1412 | + | ||
| 1413 | + /* Registers used: | ||
| 1414 | + r0 = m2, r2 = a7, r5 = m0, r7 = m4, r8 = a1 | ||
| 1415 | + | ||
| 1416 | + Free registers: | ||
| 1417 | + r1, r3, r4, r6, r10, r9, r11, lr | ||
| 1418 | + */ | ||
| 1419 | + | ||
| 1420 | +/* | ||
| 1421 | + a16 = a1 - a7; | ||
| 1422 | + m1 = mad_f_mul(a16, -c0) ; | ||
| 1423 | + a24 = (m4 << 1) - (m2 << 1); | ||
| 1424 | + tmp[10] = a24 - (m1 << 1); | ||
| 1425 | + | ||
| 1426 | + a25 = (m4 << 1) + (m1 << 1); | ||
| 1427 | + tmp[ 2] = (m0 << 1) - a25; | ||
| 1428 | +*/ | ||
| 1429 | + sub r3/*a16*/, r8, r2 | ||
| 1430 | + mov r4, -MAD_F(0x1f838b8d) | ||
| 1431 | + mulsatrndwh.w r3/*m1*/, r3, r4:b | ||
| 1432 | + sub r1/*a24*/, r7, r0 << 1 | ||
| 1433 | + sub r1/*tmp[10]*/, r1, r3 << 1 | ||
| 1434 | + stdsp sp[4*9/*tmp3[..] on the stack*/ + 10*4], r1 | ||
| 1435 | + add r7/*a25*/, r7, r3 << 1 | ||
| 1436 | + sub r7, r7, r5 << 1 | ||
| 1437 | + neg r7 | ||
| 1438 | + stdsp sp[4*9/*tmp3[..] on the stack*/ + 2*4], r7 | ||
| 1439 | + | ||
| 1440 | + | ||
| 1441 | + | ||
| 1442 | + | ||
| 1443 | + /* output to every other slot for convenience */ | ||
| 1444 | + | ||
| 1445 | + /*} */ | ||
| 1446 | + /* End fastdct */ | ||
| 1447 | + | ||
| 1448 | + /* odd input butterfly and scaling */ | ||
| 1449 | + | ||
| 1450 | + | ||
| 1451 | + /* On the stack: | ||
| 1452 | + sp[0] = tmp4[8], sp[1] = tmp4[4],sp[2] = tmp4[7], sp[3] = tmp4[5],sp[4] = tmp4[6] | ||
| 1453 | + sp[5] = tmp4[0], sp[6] = tmp4[1],sp[7] = tmp4[2],sp[8] = tmp4[3] | ||
| 1454 | + */ | ||
| 1455 | + | ||
| 1456 | + /* | ||
| 1457 | + tmp3[0] = mad_f_mul(tmp4[0], scale[0]); | ||
| 1458 | + tmp3[1] = mad_f_mul(tmp4[1], scale[1]) << 1; | ||
| 1459 | + tmp3[2] = mad_f_mul(tmp4[2], scale[2]); | ||
| 1460 | + tmp3[3] = mad_f_mul(tmp4[3], scale[3]) << 1; | ||
| 1461 | + tmp3[4] = mad_f_mul(tmp4[4], scale[4]); | ||
| 1462 | + tmp3[5] = mad_f_mul(tmp4[5], scale[5]); | ||
| 1463 | + tmp3[6] = mad_f_mul(tmp4[6], scale[6]) << 1; | ||
| 1464 | + tmp3[7] = mad_f_mul(tmp4[7], scale[7]); | ||
| 1465 | + tmp3[8] = mad_f_mul(tmp4[8], scale[8]) << 1; | ||
| 1466 | + */ | ||
| 1467 | + /* Registers used: | ||
| 1468 | + r1 = tmp4[3], r2 = tmp4[2], r3 = tmp4[1], r4 = tmp4[0], r7 = tmp4[6] | ||
| 1469 | + r10 = tmp4[5], r11 = tmp4[7], r12 = tmp4[4], lr = tmp4[8] | ||
| 1470 | + | ||
| 1471 | + Free registers: | ||
| 1472 | + r0, r5, r6, r8, r9 | ||
| 1473 | + */ | ||
| 1474 | + ld.d r8, pc[ scale_sdctII - . + 4*2] /* r8 = { scale[6], scale[7] }, r9 = { scale[4], scale[5]} */ | ||
| 1475 | + ldm sp++, r1, r2, r3, r4, r7, r10, r11, r12, lr | ||
| 1476 | + mov r5, MAD_F(0x02c9fad7) /* r3 = scale[8] */ | ||
| 1477 | + mulsatrndwh.w r5/*tmp3[8]*/, lr, r5:b | ||
| 1478 | + mulsatrndwh.w lr/*tmp3[6]*/, r7, r8:t | ||
| 1479 | + ld.d r6, pc[ scale_sdctII - . + 0*2] /* r6 = { scale[2], scale[3] }, r7 = { scale[0], scale[1]} */ | ||
| 1480 | + lsl lr, 1 | ||
| 1481 | + lsl r5, 1 | ||
| 1482 | + mulsatrndwh.w r0/*tmp3[2]*/, r2, r6:t | ||
| 1483 | + mulsatrndwh.w r1/*tmp3[3]*/, r1, r6:b | ||
| 1484 | + mulsatrndwh.w r6/*tmp3[0]*/, r4, r7:t | ||
| 1485 | + mulsatrndwh.w r7/*tmp3[1]*/, r3, r7:b | ||
| 1486 | + mulsatrndwh.w r3/*tmp3[5]*/, r10, r9:b | ||
| 1487 | + mulsatrndwh.w r2/*tmp3[4]*/, r12, r9:t | ||
| 1488 | + mulsatrndwh.w r9/*tmp3[7]*/, r11, r8:b | ||
| 1489 | + lsl r1, 1 | ||
| 1490 | + lsl r7, 1 | ||
| 1491 | + | ||
| 1492 | + | ||
| 1493 | + /* fastdct */ | ||
| 1494 | + | ||
| 1495 | +/* | ||
| 1496 | + { | ||
| 1497 | + mad_fixed_t a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12; | ||
| 1498 | + mad_fixed_t a13, a14, a15, a16, a17, a18, a19, a20, a21, a22, a23, a24, a25; | ||
| 1499 | + mad_fixed_t m0, m1, m2, m3, m4, m5, m6, m7; | ||
| 1500 | +*/ | ||
| 1501 | +// enum { | ||
| 1502 | +// c0 = MAD_F(0x1f838b8d), /* 2 * cos( 1 * PI / 18) */ | ||
| 1503 | +// c1 = MAD_F(0x1bb67ae8), /* 2 * cos( 3 * PI / 18) */ | ||
| 1504 | +// c2 = MAD_F(0x18836fa3), /* 2 * cos( 4 * PI / 18) */ | ||
| 1505 | +// c3 = MAD_F(0x1491b752), /* 2 * cos( 5 * PI / 18) */ | ||
| 1506 | +// c4 = MAD_F(0x0af1d43a), /* 2 * cos( 7 * PI / 18) */ | ||
| 1507 | +// c5 = MAD_F(0x058e86a0), /* 2 * cos( 8 * PI / 18) */ | ||
| 1508 | +// c6 = -MAD_F(0x1e11f642) /* 2 * cos(16 * PI / 18) */ | ||
| 1509 | +// }; | ||
| 1510 | + | ||
| 1511 | + /* Registers used: | ||
| 1512 | + | ||
| 1513 | + r0=tmp3[2], r1=tmp3[3], r2=tmp3[4], r3=tmp3[5], r5=tmp3[8], r6 = tmp3[0], | ||
| 1514 | + r7 = tmp3[1], r9=tmp3[7], lr=tmp3[6] | ||
| 1515 | + Free registers: | ||
| 1516 | + r4, r8, r10, r11, r12 | ||
| 1517 | + */ | ||
| 1518 | + | ||
| 1519 | +/* | ||
| 1520 | + a2 = tmp3[6] + (tmp3[2] << 1); | ||
| 1521 | + a6 = tmp3[8] + (tmp3[0] << 1); | ||
| 1522 | + a11 = a2 - a6; | ||
| 1523 | + m5 = mad_f_mul(a11, c6) ; | ||
| 1524 | + a4 = tmp3[1] + (tmp3[7] << 1); | ||
| 1525 | + | ||
| 1526 | + a18 = (tmp3[4] << 1) + a4; | ||
| 1527 | + a19 = -2 * (tmp3[4] << 1) + a4; | ||
| 1528 | + | ||
| 1529 | + a0 = tmp3[3] + (tmp3[5] << 1); | ||
| 1530 | + | ||
| 1531 | +*/ | ||
| 1532 | + add r11/*a4*/, r7, r9 << 1 | ||
| 1533 | + add r12/*a18*/, r11, r2 << 1 | ||
| 1534 | + sub r11/*a19*/, r11, r2 << 2 | ||
| 1535 | + | ||
| 1536 | + add r4/*a2*/, lr, r0 << 1 | ||
| 1537 | + add r8/*a6*/, r5, r6 << 1 | ||
| 1538 | + sub r10/*a11*/, r4, r8 | ||
| 1539 | + | ||
| 1540 | + st.d --sp, r0 /* sp[0] = tmp3[3], sp1[1] = tmp3[2]*/ | ||
| 1541 | + | ||
| 1542 | + mov r2, -MAD_F(0x1e11f642) | ||
| 1543 | + mulsatrndwh.w r10/*m5*/, r10, r2:b | ||
| 1544 | + | ||
| 1545 | + add r2/*a0*/, r1, r3 << 1 | ||
| 1546 | + | ||
| 1547 | + /* Registers used: | ||
| 1548 | + | ||
| 1549 | + r2=a0, r3=tmp3[5], r4=a2, r5=tmp3[8], r6 = tmp3[0], | ||
| 1550 | + r7 = tmp3[1], r8=a6, r9=tmp3[7], r10=m5, r11=a19, r12=a18,lr=tmp3[6] | ||
| 1551 | + Free registers: | ||
| 1552 | + r0, r1 | ||
| 1553 | + */ | ||
| 1554 | + | ||
| 1555 | +/* | ||
| 1556 | + a8 = a0 + a2; | ||
| 1557 | + a12 = a8 + a6; | ||
| 1558 | + a10 = a0 - a6; | ||
| 1559 | + a9 = a0 - a2; | ||
| 1560 | + m7 = mad_f_mul(a9, -c2) ; | ||
| 1561 | + m6 = mad_f_mul(a10, -c5) ; | ||
| 1562 | +*/ | ||
| 1563 | + | ||
| 1564 | + add r0/*a8*/, r2, r4 | ||
| 1565 | + add r0/*a12*/, r8 | ||
| 1566 | + rsub r8/*a10*/, r2 | ||
| 1567 | + sub r2/*a9*/, r4 | ||
| 1568 | + mov r1, -MAD_F(0x18836fa3) | ||
| 1569 | + mulsatrndwh.w r2/*m7*/, r2, r1:b | ||
| 1570 | + mov r1, -MAD_F(0x058e86a0) | ||
| 1571 | + mulsatrndwh.w r8/*m6*/, r8, r1:b | ||
| 1572 | + | ||
| 1573 | + /* Registers used: | ||
| 1574 | + | ||
| 1575 | + r0=a12, r2=m7, r3=tmp3[5], r5=tmp3[8], r6 = tmp3[0], | ||
| 1576 | + r7 = tmp3[1], r8=m6, r9=tmp3[7], r10=m5, r11=a19, r12=a18,lr=tmp3[6] | ||
| 1577 | + Free registers: | ||
| 1578 | + r1, r4 | ||
| 1579 | + */ | ||
| 1580 | + | ||
| 1581 | + | ||
| 1582 | +/* | ||
| 1583 | + a21 = -a19 + (m5 << 1); | ||
| 1584 | + tmp[ 9] = a21 - (m6 << 1); | ||
| 1585 | + | ||
| 1586 | + a20 = -(-a19 - (m5 << 1)); | ||
| 1587 | + tmp[ 5] = (m7 << 1) + a20; | ||
| 1588 | + a22 = -a19 + (m6 << 1); | ||
| 1589 | + tmp[17] = a22 + (m7 << 1); | ||
| 1590 | + tmp[ 1] = a18 + a12; | ||
| 1591 | + tmp[13] = a12 - 2 * a18; | ||
| 1592 | +*/ | ||
| 1593 | + sub r1/*a21*/, r11, r10 << 1 | ||
| 1594 | + neg r1 | ||
| 1595 | + sub r1/*tmp[9]*/, r1, r8 << 1 | ||
| 1596 | + stdsp sp[4*2/*tmp3[..] on the stack*/ + 9*4], r1 | ||
| 1597 | + add r4/*a20*/, r11, r10 << 1 | ||
| 1598 | + add r4/*tmp[5]*/, r4, r2 << 1 | ||
| 1599 | + stdsp sp[4*2/*tmp3[..] on the stack*/ + 5*4], r4 | ||
| 1600 | + neg r11 | ||
| 1601 | + add r1/*a22*/, r11, r8 << 1 | ||
| 1602 | + add r1/*tmp[17]*/, r1, r2 << 1 | ||
| 1603 | + stdsp sp[4*2/*tmp3[..] on the stack*/ + 17*4], r1 | ||
| 1604 | + add r4, r12, r0 | ||
| 1605 | + sub r1, r0, r12 << 1 | ||
| 1606 | + stdsp sp[4*2/*tmp3[..] on the stack*/ + 1*4], r4 | ||
| 1607 | + stdsp sp[4*2/*tmp3[..] on the stack*/ + 13*4], r1 | ||
| 1608 | + | ||
| 1609 | + ld.d r0, sp++ | ||
| 1610 | + | ||
| 1611 | + /* Registers used: | ||
| 1612 | + | ||
| 1613 | + r0 = tmp3[2], r1 = tmp3[3], r3=tmp3[5], r5=tmp3[8], r6 = tmp3[0], | ||
| 1614 | + r7 = tmp3[1], r9=tmp3[7], r11=a19, lr=tmp3[6] | ||
| 1615 | + Free registers: | ||
| 1616 | + r2,r4,r8,r10,r12 | ||
| 1617 | + */ | ||
| 1618 | + | ||
| 1619 | +/* | ||
| 1620 | + a5 = tmp3[1] - (tmp3[7] << 1); | ||
| 1621 | + a7 = tmp3[8] - (tmp3[0] << 1); | ||
| 1622 | + a3 = tmp3[6] - (tmp3[2] << 1); | ||
| 1623 | + a1 = tmp3[3] - (tmp3[5] << 1); | ||
| 1624 | + a13 = a1 - a3; | ||
| 1625 | + a14 = a13 + a7; | ||
| 1626 | + m3 = mad_f_mul(a14, -c1) ; | ||
| 1627 | + m4 = mad_f_mul(a5, -c1) ; | ||
| 1628 | + tmp[ 7] = m3 << 1; | ||
| 1629 | +*/ | ||
| 1630 | + sub r7/*a5*/, r7, r9 << 1 | ||
| 1631 | + sub r2/*a7*/, r5, r6 << 1 | ||
| 1632 | + sub r4/*a3*/, lr, r0 << 1 | ||
| 1633 | + sub r8/*a1*/, r1, r3 << 1 | ||
| 1634 | + sub r10/*a13*/, r8, r4 | ||
| 1635 | + add r12/*a14*/, r10, r2 | ||
| 1636 | + mov r0, -MAD_F(0x1bb67ae8) | ||
| 1637 | + mulsatrndwh.w r12/*m3*/, r12, r0:b | ||
| 1638 | + mulsatrndwh.w r7/*m4*/, r7, r0:b | ||
| 1639 | + lsl r12, 1 | ||
| 1640 | + stdsp sp[7*4], r12 | ||
| 1641 | + | ||
| 1642 | + /* Registers used: | ||
| 1643 | + r2 = a7, r4 = a3, r7 = m4, r8 = a1, r12 = m3 | ||
| 1644 | + | ||
| 1645 | + Free registers: | ||
| 1646 | + r0, r1, r3, r5, r6, r9, r10, r11, lr | ||
| 1647 | + */ | ||
| 1648 | + | ||
| 1649 | + | ||
| 1650 | +/* | ||
| 1651 | + a15 = a3 + a7; | ||
| 1652 | + m2 = mad_f_mul(a15, -c4) ; | ||
| 1653 | + a17 = a1 + a3; | ||
| 1654 | + m0 = mad_f_mul(a17, -c3) ; | ||
| 1655 | + a23 = (m4 << 1) + (m2 << 1); | ||
| 1656 | + tmp[15] = a23 + (m0 << 1); */ | ||
| 1657 | + add r0/*a15*/, r4, r2 | ||
| 1658 | + mov r1, -MAD_F(0x0af1d43a) | ||
| 1659 | + mulsatrndwh.w r0/*m2*/, r0, r1:b | ||
| 1660 | + mov r3, -MAD_F(0x1491b752) | ||
| 1661 | + add r5/*a17*/, r8, r4 | ||
| 1662 | + mulsatrndwh.w r5/*m0*/, r5, r3:b | ||
| 1663 | + lsl r7, 1 | ||
| 1664 | + add r6/*a23*/, r7, r0 << 1 | ||
| 1665 | + add r6/*tmp[15]*/, r6, r5 << 1 | ||
| 1666 | + stdsp sp[15*4], r6 | ||
| 1667 | + | ||
| 1668 | + /* Registers used: | ||
| 1669 | + r0 = m2, r2 = a7, r5 = m0, r7 = m4, r8 = a1 | ||
| 1670 | + | ||
| 1671 | + Free registers: | ||
| 1672 | + r1, r3, r4, r6, r9, r10, r11, lr | ||
| 1673 | + */ | ||
| 1674 | + | ||
| 1675 | +/* | ||
| 1676 | + a16 = a1 - a7; | ||
| 1677 | + m1 = mad_f_mul(a16, -c0) ; | ||
| 1678 | + a24 = (m4 << 1) - (m2 << 1); | ||
| 1679 | + tmp[11] = a24 - (m1 << 1); | ||
| 1680 | + | ||
| 1681 | + a25 = (m4 << 1) + (m1 << 1); | ||
| 1682 | + tmp[ 3] = (m0 << 1) - a25; | ||
| 1683 | +*/ | ||
| 1684 | + sub r3/*a16*/, r8, r2 | ||
| 1685 | + mov r4, -MAD_F(0x1f838b8d) | ||
| 1686 | + mulsatrndwh.w r3/*m1*/, r3, r4:b | ||
| 1687 | + sub r1/*a24*/, r7, r0 << 1 | ||
| 1688 | + sub r1/*tmp[11]*/, r1, r3 << 1 | ||
| 1689 | + stdsp sp[11*4], r1 | ||
| 1690 | + add r7/*a25*/, r7, r3 << 1 | ||
| 1691 | + sub r7, r7, r5 << 1 | ||
| 1692 | + neg r7 | ||
| 1693 | + lddsp r12, sp[4*18+4] /* Get y from stack */ | ||
| 1694 | + stdsp sp[3*4], r7 | ||
| 1695 | + | ||
| 1696 | + | ||
| 1697 | + /* output to every other slot for convenience */ | ||
| 1698 | + | ||
| 1699 | + /* End fastdct */ | ||
| 1700 | + | ||
| 1701 | + /* output accumulation */ | ||
| 1702 | + | ||
| 1703 | +/* for (i = 3; i < 18; i += 8) { | ||
| 1704 | + tmp[i + 0] -= tmp[(i + 0) - 2]; | ||
| 1705 | + tmp[i + 2] -= tmp[(i + 2) - 2]; | ||
| 1706 | + tmp[i + 4] -= tmp[(i + 4) - 2]; | ||
| 1707 | + tmp[i + 6] -= tmp[(i + 6) - 2]; | ||
| 1708 | + } | ||
| 1709 | + } | ||
| 1710 | +*/ | ||
| 1711 | + | ||
| 1712 | +/* End SDCT-II */ | ||
| 1713 | + | ||
| 1714 | + | ||
| 1715 | + | ||
| 1716 | + /* scale reduction and output accumulation */ | ||
| 1717 | + | ||
| 1718 | +/* | ||
| 1719 | + for (i = 1; i < 17; i += 4) { | ||
| 1720 | + tmp[i + 0] = tmp[i + 0] - tmp[(i + 0) - 1]; | ||
| 1721 | + tmp[i + 1] = tmp[i + 1] - tmp[(i + 1) - 1]; | ||
| 1722 | + tmp[i + 2] = tmp[i + 2] - tmp[(i + 2) - 1]; | ||
| 1723 | + tmp[i + 3] = tmp[i + 3] - tmp[(i + 3) - 1]; | ||
| 1724 | + } | ||
| 1725 | + tmp[17] = tmp[17] - tmp[16]; | ||
| 1726 | + } | ||
| 1727 | +*/ | ||
| 1728 | +/* End DCT-IV */ | ||
| 1729 | + | ||
| 1730 | + | ||
| 1731 | + /* convert 18-point DCT-IV to 36-point IMDCT */ | ||
| 1732 | + | ||
| 1733 | +/* | ||
| 1734 | + for (i = 0; i < 9; i += 3) { | ||
| 1735 | + y[i + 0] = tmp[9 + (i + 0)]; | ||
| 1736 | + y[i + 1] = tmp[9 + (i + 1)]; | ||
| 1737 | + y[i + 2] = tmp[9 + (i + 2)]; | ||
| 1738 | + } | ||
| 1739 | + for (i = 9; i < 27; i += 3) { | ||
| 1740 | + y[i + 0] = -tmp[36 - (9 + (i + 0)) - 1]; | ||
| 1741 | + y[i + 1] = -tmp[36 - (9 + (i + 1)) - 1]; | ||
| 1742 | + y[i + 2] = -tmp[36 - (9 + (i + 2)) - 1]; | ||
| 1743 | + } | ||
| 1744 | + for (i = 27; i < 36; i += 3) { | ||
| 1745 | + y[i + 0] = -tmp[(i + 0) - 27]; | ||
| 1746 | + y[i + 1] = -tmp[(i + 1) - 27]; | ||
| 1747 | + y[i + 2] = -tmp[(i + 2) - 27]; | ||
| 1748 | + } | ||
| 1749 | + } | ||
| 1750 | +*/ | ||
| 1751 | + | ||
| 1752 | + /* Registers used: | ||
| 1753 | + r0 = tmp[8], r1 = tmp[7], r2 = tmp[6], r3 = tmp[5], r4 = tmp[4] | ||
| 1754 | + r5 = tmp[3], r6 = tmp[2], r7 = tmp[1], r8 = tmp[0], r12 = y | ||
| 1755 | + | ||
| 1756 | + Free registers: | ||
| 1757 | + r9, r10, r11, lr | ||
| 1758 | + */ | ||
| 1759 | + | ||
| 1760 | + ldm sp++, r0-r8 /* Get tmp[0]-tmp[8] from stack */ | ||
| 1761 | + sub r5, r7 /* tmp[3] -= tmp[1]*/ | ||
| 1762 | + sub r3, r5 /* tmp[5] -= tmp[3]*/ | ||
| 1763 | + sub r1, r3 /* tmp[7] -= tmp[5]*/ | ||
| 1764 | + | ||
| 1765 | + sub r7, r8 /* tmp[1] -= tmp[0]*/ | ||
| 1766 | + sub r6, r7 /* tmp[2] -= tmp[1]*/ | ||
| 1767 | + sub r5, r6 /* tmp[3] -= tmp[2]*/ | ||
| 1768 | + neg r8 | ||
| 1769 | + st.w r12[26*4], r8 /* y[26] = -tmp[0] */ | ||
| 1770 | + st.w r12[27*4], r8 /* y[27] = -tmp[0] */ | ||
| 1771 | + neg r7 | ||
| 1772 | + neg r6 | ||
| 1773 | + st.w r12[25*4], r7 /* y[25] = -tmp[1] */ | ||
| 1774 | + st.w r12[24*4], r6 /* y[24] = -tmp[2] */ | ||
| 1775 | + st.d r12[28*4], r6 /* y[28] = -tmp[1], y[29] = -tmp[2]*/ | ||
| 1776 | + | ||
| 1777 | + sub r4, r5 /* tmp[4] -= tmp[3]*/ | ||
| 1778 | + sub r3, r4 /* tmp[5] -= tmp[4]*/ | ||
| 1779 | + neg r5 | ||
| 1780 | + neg r4 | ||
| 1781 | + st.w r12[23*4], r5 /* y[23] = -tmp[3] */ | ||
| 1782 | + st.w r12[22*4], r4 /* y[22] = -tmp[4] */ | ||
| 1783 | + st.d r12[30*4], r4 /* y[30] = -tmp[3], y[31] = -tmp[4]*/ | ||
| 1784 | + | ||
| 1785 | + ldm sp++, r4-r11,lr /* Get tmp[9]-tmp[17] from stack */ | ||
| 1786 | + | ||
| 1787 | + sub r2, r3 /* tmp[6] -= tmp[5]*/ | ||
| 1788 | + | ||
| 1789 | + sub lr, r1 /* tmp[9] -= tmp[7]*/ | ||
| 1790 | + sub r10, lr /* tmp[11] -= tmp[9]*/ | ||
| 1791 | + sub r8, r10 /* tmp[13] -= tmp[11]*/ | ||
| 1792 | + sub r6, r8 /* tmp[15] -= tmp[13]*/ | ||
| 1793 | + sub r4, r6 /* tmp[17] -= tmp[15]*/ | ||
| 1794 | + | ||
| 1795 | + sub r1, r2 /* tmp[7] -= tmp[6]*/ | ||
| 1796 | + sub r0, r1 /* tmp[8] -= tmp[7]*/ | ||
| 1797 | + neg r3 | ||
| 1798 | + neg r2 | ||
| 1799 | + st.w r12[21*4], r3 /* y[21] = -tmp[5] */ | ||
| 1800 | + st.w r12[20*4], r2 /* y[20] = -tmp[6] */ | ||
| 1801 | + st.d r12[32*4], r2 /* y[32] = -tmp[5], y[33] = -tmp[6]*/ | ||
| 1802 | + | ||
| 1803 | + sub lr, r0 /* tmp[9] -= tmp[8]*/ | ||
| 1804 | + sub r11, lr /* tmp[10] -= tmp[9]*/ | ||
| 1805 | + neg r1 | ||
| 1806 | + neg r0 | ||
| 1807 | + st.w r12[19*4], r1 /* y[19] = -tmp[7] */ | ||
| 1808 | + st.w r12[18*4], r0 /* y[18] = -tmp[8] */ | ||
| 1809 | + st.d r12[34*4], r0 /* y[34] = -tmp[7], y[35] = -tmp[8]*/ | ||
| 1810 | + | ||
| 1811 | + sub r10, r11 /* tmp[11] -= tmp[10]*/ | ||
| 1812 | + sub r9, r10 /* tmp[12] -= tmp[11]*/ | ||
| 1813 | + | ||
| 1814 | + st.w r12[0*4], lr /* y[0] = tmp[9]*/ | ||
| 1815 | + neg lr | ||
| 1816 | + st.w r12[17*4], lr /* y[17] = -tmp[9]*/ | ||
| 1817 | + st.d r12[1*4], r10 /* y[1] = tmp[10], y[2] = tmp[11] */ | ||
| 1818 | + neg r11 | ||
| 1819 | + neg r10 | ||
| 1820 | + st.w r12[16*4], r11 /* y[16] = -tmp[10] */ | ||
| 1821 | + st.w r12[15*4], r10 /* y[15] = -tmp[11] */ | ||
| 1822 | + | ||
| 1823 | + | ||
| 1824 | + sub r8, r9 /* tmp[13] -= tmp[12]*/ | ||
| 1825 | + sub r7, r8 /* tmp[14] -= tmp[13]*/ | ||
| 1826 | + st.d r12[3*4], r8 /* y[3] = tmp[12], y[4] = tmp[13] */ | ||
| 1827 | + neg r9 | ||
| 1828 | + neg r8 | ||
| 1829 | + st.w r12[14*4], r9 /* y[14] = -tmp[12] */ | ||
| 1830 | + st.w r12[13*4], r8 /* y[13] = -tmp[13] */ | ||
| 1831 | + | ||
| 1832 | + sub r6, r7 /* tmp[15] -= tmp[14]*/ | ||
| 1833 | + sub r5, r6 /* tmp[16] -= tmp[15]*/ | ||
| 1834 | + sub r4, r5 /* tmp[17] -= tmp[16]*/ | ||
| 1835 | + | ||
| 1836 | + st.d r12[5*4], r6 /* y[5] = tmp[14], y[6] = tmp[15] */ | ||
| 1837 | + neg r7 | ||
| 1838 | + neg r6 | ||
| 1839 | + st.w r12[12*4], r7 /* y[12] = -tmp[14] */ | ||
| 1840 | + st.w r12[11*4], r6 /* y[11] = -tmp[15] */ | ||
| 1841 | + | ||
| 1842 | + st.d r12[7*4], r4 /* y[7] = tmp[16], y[8] = tmp[17] */ | ||
| 1843 | + neg r5 | ||
| 1844 | + neg r4 | ||
| 1845 | + st.w r12[10*4], r5 /* y[10] = -tmp[16] */ | ||
| 1846 | + st.w r12[9*4], r4 /* y[9] = -tmp[17] */ | ||
| 1847 | + | ||
| 1848 | + popm r0-r7,r11,pc | ||
| 1849 | + | ||
| 1850 | + .align 2 | ||
| 1851 | +scale_dctIV: | ||
| 1852 | + .short MAD_F(0x1ff833fa), MAD_F(0x1fb9ea93), MAD_F(0x1f3dd120) | ||
| 1853 | + .short MAD_F(0x1e84d969), MAD_F(0x1d906bcf), MAD_F(0x1c62648b) | ||
| 1854 | + .short MAD_F(0x1afd100f), MAD_F(0x1963268b), MAD_F(0x1797c6a4) | ||
| 1855 | + .short MAD_F(0x159e6f5b), MAD_F(0x137af940), MAD_F(0x11318ef3) | ||
| 1856 | + .short MAD_F(0x0ec6a507), MAD_F(0x0c3ef153), MAD_F(0x099f61c5) | ||
| 1857 | + .short MAD_F(0x06ed12c5), MAD_F(0x042d4544), MAD_F(0x0165547c) | ||
| 1858 | + | ||
| 1859 | + .align 2 | ||
| 1860 | +scale_sdctII: | ||
| 1861 | + .short MAD_F(0x1fe0d3b4), MAD_F(0x1ee8dd47), MAD_F(0x1d007930) | ||
| 1862 | + .short MAD_F(0x1a367e59), MAD_F(0x16a09e66), MAD_F(0x125abcf8) | ||
| 1863 | + .short MAD_F(0x0d8616bc), MAD_F(0x08483ee1), MAD_F(0x02c9fad7) | ||
| 1864 | diff --git a/layer3.c b/layer3.c | ||
| 1865 | index 4e5d3fa..dffdab3 100644 | ||
| 1866 | --- a/layer3.c | ||
| 1867 | +++ b/layer3.c | ||
| 1868 | @@ -378,6 +378,11 @@ mad_fixed_t const ca[8] = { | ||
| 1869 | -MAD_F(0x003a2847) /* -0.014198569 */, -MAD_F(0x000f27b4) /* -0.003699975 */ | ||
| 1870 | }; | ||
| 1871 | |||
| 1872 | +#ifdef FPM_AVR32 | ||
| 1873 | +# undef MAD_F | ||
| 1874 | +# define MAD_F(x) ((x + (1 << 12)) >> 13) | ||
| 1875 | +#endif | ||
| 1876 | + | ||
| 1877 | /* | ||
| 1878 | * IMDCT coefficients for short blocks | ||
| 1879 | * derived from section 2.4.3.4.10.2 of ISO/IEC 11172-3 | ||
| 1880 | @@ -386,7 +391,7 @@ mad_fixed_t const ca[8] = { | ||
| 1881 | * imdct_s[i /odd][k] = cos((PI / 24) * (2 * (6 + (i-1)/2) + 7) * (2 * k + 1)) | ||
| 1882 | */ | ||
| 1883 | static | ||
| 1884 | -mad_fixed_t const imdct_s[6][6] = { | ||
| 1885 | +mad_coeff_t const imdct_s[6][6] = { | ||
| 1886 | # include "imdct_s.dat" | ||
| 1887 | }; | ||
| 1888 | |||
| 1889 | @@ -398,7 +403,7 @@ mad_fixed_t const imdct_s[6][6] = { | ||
| 1890 | * window_l[i] = sin((PI / 36) * (i + 1/2)) | ||
| 1891 | */ | ||
| 1892 | static | ||
| 1893 | -mad_fixed_t const window_l[36] = { | ||
| 1894 | +mad_coeff_t const window_l[36] = { | ||
| 1895 | MAD_F(0x00b2aa3e) /* 0.043619387 */, MAD_F(0x0216a2a2) /* 0.130526192 */, | ||
| 1896 | MAD_F(0x03768962) /* 0.216439614 */, MAD_F(0x04cfb0e2) /* 0.300705800 */, | ||
| 1897 | MAD_F(0x061f78aa) /* 0.382683432 */, MAD_F(0x07635284) /* 0.461748613 */, | ||
| 1898 | @@ -429,7 +434,7 @@ mad_fixed_t const window_l[36] = { | ||
| 1899 | * window_s[i] = sin((PI / 12) * (i + 1/2)) | ||
| 1900 | */ | ||
| 1901 | static | ||
| 1902 | -mad_fixed_t const window_s[12] = { | ||
| 1903 | +mad_coeff_t const window_s[12] = { | ||
| 1904 | MAD_F(0x0216a2a2) /* 0.130526192 */, MAD_F(0x061f78aa) /* 0.382683432 */, | ||
| 1905 | MAD_F(0x09bd7ca0) /* 0.608761429 */, MAD_F(0x0cb19346) /* 0.793353340 */, | ||
| 1906 | MAD_F(0x0ec835e8) /* 0.923879533 */, MAD_F(0x0fdcf549) /* 0.991444861 */, | ||
| 1907 | @@ -438,6 +443,11 @@ mad_fixed_t const window_s[12] = { | ||
| 1908 | MAD_F(0x061f78aa) /* 0.382683432 */, MAD_F(0x0216a2a2) /* 0.130526192 */, | ||
| 1909 | }; | ||
| 1910 | |||
| 1911 | +#ifdef FPM_AVR32 | ||
| 1912 | +# undef MAD_F | ||
| 1913 | +# define MAD_F(x) ((mad_fixed_t) (x##L)) | ||
| 1914 | +#endif | ||
| 1915 | + | ||
| 1916 | /* | ||
| 1917 | * coefficients for intensity stereo processing | ||
| 1918 | * derived from section 2.4.3.4.9.3 of ISO/IEC 11172-3 | ||
| 1919 | @@ -879,6 +889,42 @@ void III_exponents(struct channel const *channel, | ||
| 1920 | * NAME: III_requantize() | ||
| 1921 | * DESCRIPTION: requantize one (positive) value | ||
| 1922 | */ | ||
| 1923 | + | ||
| 1924 | +#if 0 | ||
| 1925 | +/*static*/ | ||
| 1926 | +mad_fixed_t III_requantize(unsigned int value, signed int exp) | ||
| 1927 | +{ | ||
| 1928 | + register mad_fixed_t tmp2, tmp3; | ||
| 1929 | + long long tmp_d; | ||
| 1930 | + | ||
| 1931 | + asm ("asr\t%0, %1, 2\n" | ||
| 1932 | + "ld.w\t%2, %4[%5 << 2]\n" | ||
| 1933 | + "sub\t%1, %1, %0 << 2\n" | ||
| 1934 | + "asr\t%3, %2, 7\n" | ||
| 1935 | + "andl\t%2, 0x7f, COH\n" | ||
| 1936 | + "add\t%0, %2\n" | ||
| 1937 | + "lsl\t%m0,%3,%0\n" | ||
| 1938 | + "neg\t%0\n" | ||
| 1939 | + "asr\t%3,%3,%0\n" | ||
| 1940 | + "add\t%2, %6, %1 << 2\n" | ||
| 1941 | + "ld.w\t%2, %2[12]\n" | ||
| 1942 | + "cp.w\t%0, 0\n" | ||
| 1943 | + "movlt\t%3, %m0\n" | ||
| 1944 | + "muls.d\t%0, %3, %2\n" | ||
| 1945 | + "cp.w\t%1, 0\n" | ||
| 1946 | + "breq\t0f\n" | ||
| 1947 | + "lsr\t%0, %0, 28\n" | ||
| 1948 | + "or\t%3, %0, %m0 << 4\n" | ||
| 1949 | + "0:\n" | ||
| 1950 | + : "=&r"(tmp_d), "+r"(exp), "=&r"(tmp2), "=&r"(tmp3) | ||
| 1951 | + : "r"(&rq_table), "r"(value), "r"(root_table)); | ||
| 1952 | + | ||
| 1953 | + | ||
| 1954 | + return tmp3; | ||
| 1955 | +} | ||
| 1956 | + | ||
| 1957 | +#else | ||
| 1958 | + | ||
| 1959 | static | ||
| 1960 | mad_fixed_t III_requantize(unsigned int value, signed int exp) | ||
| 1961 | { | ||
| 1962 | @@ -918,6 +964,7 @@ mad_fixed_t III_requantize(unsigned int value, signed int exp) | ||
| 1963 | |||
| 1964 | return frac ? mad_f_mul(requantized, root_table[3 + frac]) : requantized; | ||
| 1965 | } | ||
| 1966 | +#endif | ||
| 1967 | |||
| 1968 | /* we must take care that sz >= bits and sz < sizeof(long) lest bits == 0 */ | ||
| 1969 | # define MASK(cache, sz, bits) \ | ||
| 1970 | @@ -2054,27 +2101,42 @@ void imdct36(mad_fixed_t const X[18], mad_fixed_t x[36]) | ||
| 1971 | } | ||
| 1972 | # endif | ||
| 1973 | |||
| 1974 | + | ||
| 1975 | +#ifdef FPM_AVR32 | ||
| 1976 | +# undef mad_f_mul | ||
| 1977 | +# define mad_f_mul(x, y) __builtin_mulsatrndwh_w(x, y) | ||
| 1978 | +#endif | ||
| 1979 | + | ||
| 1980 | /* | ||
| 1981 | * NAME: III_imdct_l() | ||
| 1982 | * DESCRIPTION: perform IMDCT and windowing for long blocks | ||
| 1983 | */ | ||
| 1984 | static | ||
| 1985 | -void III_imdct_l(mad_fixed_t const X[18], mad_fixed_t z[36], | ||
| 1986 | +void III_imdct_l(mad_fixed_t /*const*/ X[18], mad_fixed_t z[36], | ||
| 1987 | unsigned int block_type) | ||
| 1988 | { | ||
| 1989 | unsigned int i; | ||
| 1990 | + mad_fixed_t *z_ptr; | ||
| 1991 | + mad_coeff_t *w_ptr; | ||
| 1992 | |||
| 1993 | /* IMDCT */ | ||
| 1994 | |||
| 1995 | +#ifdef FPM_AVR32 | ||
| 1996 | + imdct36_avr32(X, z); | ||
| 1997 | +#else | ||
| 1998 | imdct36(X, z); | ||
| 1999 | +#endif | ||
| 2000 | |||
| 2001 | /* windowing */ | ||
| 2002 | |||
| 2003 | + z_ptr = &z[0]; | ||
| 2004 | + w_ptr = &window_l[0]; | ||
| 2005 | + | ||
| 2006 | switch (block_type) { | ||
| 2007 | case 0: /* normal window */ | ||
| 2008 | # if defined(ASO_INTERLEAVE1) | ||
| 2009 | { | ||
| 2010 | - register mad_fixed_t tmp1, tmp2; | ||
| 2011 | + register mad_coeff_t tmp1, tmp2; | ||
| 2012 | |||
| 2013 | tmp1 = window_l[0]; | ||
| 2014 | tmp2 = window_l[1]; | ||
| 2015 | @@ -2091,15 +2153,16 @@ void III_imdct_l(mad_fixed_t const X[18], mad_fixed_t z[36], | ||
| 2016 | } | ||
| 2017 | # elif defined(ASO_INTERLEAVE2) | ||
| 2018 | { | ||
| 2019 | - register mad_fixed_t tmp1, tmp2; | ||
| 2020 | + register mad_fixed_t tmp1; | ||
| 2021 | + register mad_coeff_t tmp2; | ||
| 2022 | |||
| 2023 | - tmp1 = z[0]; | ||
| 2024 | - tmp2 = window_l[0]; | ||
| 2025 | + tmp1 = *z_ptr; | ||
| 2026 | + tmp2 = *w_ptr++; | ||
| 2027 | |||
| 2028 | for (i = 0; i < 35; ++i) { | ||
| 2029 | - z[i] = mad_f_mul(tmp1, tmp2); | ||
| 2030 | - tmp1 = z[i + 1]; | ||
| 2031 | - tmp2 = window_l[i + 1]; | ||
| 2032 | + *z_ptr++ = mad_f_mul(tmp1, tmp2); | ||
| 2033 | + tmp1 = *z_ptr; | ||
| 2034 | + tmp2 = *w_ptr++; | ||
| 2035 | } | ||
| 2036 | |||
| 2037 | z[35] = mad_f_mul(tmp1, tmp2); | ||
| 2038 | @@ -2118,23 +2181,28 @@ void III_imdct_l(mad_fixed_t const X[18], mad_fixed_t z[36], | ||
| 2039 | |||
| 2040 | case 1: /* start block */ | ||
| 2041 | for (i = 0; i < 18; i += 3) { | ||
| 2042 | - z[i + 0] = mad_f_mul(z[i + 0], window_l[i + 0]); | ||
| 2043 | - z[i + 1] = mad_f_mul(z[i + 1], window_l[i + 1]); | ||
| 2044 | - z[i + 2] = mad_f_mul(z[i + 2], window_l[i + 2]); | ||
| 2045 | + *(z_ptr++) = mad_f_mul(*z_ptr, *w_ptr++); | ||
| 2046 | + *(z_ptr++) = mad_f_mul(*z_ptr, *w_ptr++); | ||
| 2047 | + *(z_ptr++) = mad_f_mul(*z_ptr, *w_ptr++); | ||
| 2048 | } | ||
| 2049 | + z_ptr += 6; | ||
| 2050 | + w_ptr = &window_s[6]; | ||
| 2051 | /* (i = 18; i < 24; ++i) z[i] unchanged */ | ||
| 2052 | - for (i = 24; i < 30; ++i) z[i] = mad_f_mul(z[i], window_s[i - 18]); | ||
| 2053 | - for (i = 30; i < 36; ++i) z[i] = 0; | ||
| 2054 | + for (i = 24; i < 30; ++i) *z_ptr++ = mad_f_mul(*z_ptr, *w_ptr++); | ||
| 2055 | + for (i = 30; i < 36; ++i) *z_ptr++ = 0; | ||
| 2056 | break; | ||
| 2057 | |||
| 2058 | case 3: /* stop block */ | ||
| 2059 | - for (i = 0; i < 6; ++i) z[i] = 0; | ||
| 2060 | - for (i = 6; i < 12; ++i) z[i] = mad_f_mul(z[i], window_s[i - 6]); | ||
| 2061 | + w_ptr = &window_s[0]; | ||
| 2062 | + for (i = 0; i < 6; ++i) *z_ptr++ = 0; | ||
| 2063 | + for (i = 6; i < 12; ++i) *z_ptr++ = mad_f_mul(*z_ptr, *w_ptr++); | ||
| 2064 | /* (i = 12; i < 18; ++i) z[i] unchanged */ | ||
| 2065 | + w_ptr = &window_l[18]; | ||
| 2066 | + z_ptr += 6; | ||
| 2067 | for (i = 18; i < 36; i += 3) { | ||
| 2068 | - z[i + 0] = mad_f_mul(z[i + 0], window_l[i + 0]); | ||
| 2069 | - z[i + 1] = mad_f_mul(z[i + 1], window_l[i + 1]); | ||
| 2070 | - z[i + 2] = mad_f_mul(z[i + 2], window_l[i + 2]); | ||
| 2071 | + *z_ptr++ = mad_f_mul(*z_ptr, *w_ptr++ ); | ||
| 2072 | + *z_ptr++ = mad_f_mul(*z_ptr, *w_ptr++); | ||
| 2073 | + *z_ptr++ = mad_f_mul(*z_ptr, *w_ptr++); | ||
| 2074 | } | ||
| 2075 | break; | ||
| 2076 | } | ||
| 2077 | @@ -2146,10 +2214,10 @@ void III_imdct_l(mad_fixed_t const X[18], mad_fixed_t z[36], | ||
| 2078 | * DESCRIPTION: perform IMDCT and windowing for short blocks | ||
| 2079 | */ | ||
| 2080 | static | ||
| 2081 | -void III_imdct_s(mad_fixed_t const X[18], mad_fixed_t z[36]) | ||
| 2082 | +void III_imdct_s(mad_fixed_t /*const*/ X[18], mad_fixed_t z[36]) | ||
| 2083 | { | ||
| 2084 | mad_fixed_t y[36], *yptr; | ||
| 2085 | - mad_fixed_t const *wptr; | ||
| 2086 | + mad_coeff_t const *wptr; | ||
| 2087 | int w, i; | ||
| 2088 | register mad_fixed64hi_t hi; | ||
| 2089 | register mad_fixed64lo_t lo; | ||
| 2090 | @@ -2159,11 +2227,56 @@ void III_imdct_s(mad_fixed_t const X[18], mad_fixed_t z[36]) | ||
| 2091 | yptr = &y[0]; | ||
| 2092 | |||
| 2093 | for (w = 0; w < 3; ++w) { | ||
| 2094 | - register mad_fixed_t const (*s)[6]; | ||
| 2095 | + register mad_coeff_t const (*s)[6]; | ||
| 2096 | |||
| 2097 | s = imdct_s; | ||
| 2098 | |||
| 2099 | for (i = 0; i < 3; ++i) { | ||
| 2100 | +#ifdef FPM_AVR32 | ||
| 2101 | + register long long int acc, tmp1, tmp2, tmp3, tmp4; | ||
| 2102 | + asm volatile ("ld.d\t%0, %5++\n" | ||
| 2103 | + "ld.d\t%1, %6[0]\n" | ||
| 2104 | + "ld.d\t%2, %6[2*4]\n" | ||
| 2105 | + "ld.d\t%3, %6[4*4]\n" | ||
| 2106 | + "mulwh.d\t%4, %m1, %m0:t\n" | ||
| 2107 | + "macwh.d\t%4, %1, %m0:b\n" | ||
| 2108 | + "ld.w\t%m0, %5++\n" | ||
| 2109 | + "macwh.d\t%4, %m2, %0:t\n" | ||
| 2110 | + "macwh.d\t%4, %2, %0:b\n" | ||
| 2111 | + "macwh.d\t%4, %m3, %m0:t\n" | ||
| 2112 | + "macwh.d\t%4, %3, %m0:b\n" | ||
| 2113 | + "ld.d\t%0, %5++\n" | ||
| 2114 | + "rol\t%4\n" | ||
| 2115 | + "rol\t%m4\n" | ||
| 2116 | + : "=&r"(tmp1), "=&r"(tmp2), "=&r"(tmp3), "=&r"(tmp4), | ||
| 2117 | + "=&r"(acc), "+r"(s) | ||
| 2118 | + : "r"(X)); | ||
| 2119 | + | ||
| 2120 | + asm volatile ("st.w\t%1[0], %m0\n" | ||
| 2121 | + "neg\t%m0\n" | ||
| 2122 | + "st.w\t%2[5*4], %m0\n" | ||
| 2123 | + : "+r"(acc) | ||
| 2124 | + : "r"(&yptr[i]), "r"(&yptr[-i])); | ||
| 2125 | + | ||
| 2126 | + asm volatile ("mulwh.d\t%4, %m1, %m0:t\n" | ||
| 2127 | + "macwh.d\t%4, %1, %m0:b\n" | ||
| 2128 | + "ld.w\t%m0, %5++\n" | ||
| 2129 | + "macwh.d\t%4, %m2, %0:t\n" | ||
| 2130 | + "macwh.d\t%4, %2, %0:b\n" | ||
| 2131 | + "macwh.d\t%4, %m3, %m0:t\n" | ||
| 2132 | + "macwh.d\t%4, %3, %m0:b\n" | ||
| 2133 | + "rol\t%4\n" | ||
| 2134 | + "rol\t%m4\n" | ||
| 2135 | + : "+r"(tmp1), "+r"(tmp2), "+r"(tmp3), "+r"(tmp4), | ||
| 2136 | + "=&r"(acc), "+r"(s) | ||
| 2137 | + : "r"(X)); | ||
| 2138 | + | ||
| 2139 | + asm volatile ( "st.w\t%1[6*4], %m0\n" | ||
| 2140 | + "st.w\t%2[11*4], %m0\n" | ||
| 2141 | + :: "r"(acc), "r"(&yptr[i]), "r"(&yptr[-i])); | ||
| 2142 | + | ||
| 2143 | + | ||
| 2144 | +#else | ||
| 2145 | MAD_F_ML0(hi, lo, X[0], (*s)[0]); | ||
| 2146 | MAD_F_MLA(hi, lo, X[1], (*s)[1]); | ||
| 2147 | MAD_F_MLA(hi, lo, X[2], (*s)[2]); | ||
| 2148 | @@ -2187,6 +2300,7 @@ void III_imdct_s(mad_fixed_t const X[18], mad_fixed_t z[36]) | ||
| 2149 | yptr[11 - i] = yptr[i + 6]; | ||
| 2150 | |||
| 2151 | ++s; | ||
| 2152 | +#endif | ||
| 2153 | } | ||
| 2154 | |||
| 2155 | yptr += 12; | ||
| 2156 | @@ -2198,6 +2312,196 @@ void III_imdct_s(mad_fixed_t const X[18], mad_fixed_t z[36]) | ||
| 2157 | yptr = &y[0]; | ||
| 2158 | wptr = &window_s[0]; | ||
| 2159 | |||
| 2160 | +#ifdef FPM_AVR32 | ||
| 2161 | + /* z[0] = 0; | ||
| 2162 | + z[1] = 0; | ||
| 2163 | + z[2] = 0; | ||
| 2164 | + z[3] = 0; | ||
| 2165 | + z[4] = 0; | ||
| 2166 | + z[5] = 0; | ||
| 2167 | + z[30] = 0; | ||
| 2168 | + z[31] = 0; | ||
| 2169 | + z[32] = 0; | ||
| 2170 | + z[33] = 0; | ||
| 2171 | + z[34] = 0; | ||
| 2172 | + z[35] = 0; | ||
| 2173 | + */ | ||
| 2174 | + { | ||
| 2175 | + register long long int tmp, tmp2, tmp3, w0123, w4567, w891011; | ||
| 2176 | + asm volatile ("mov\t%m0, 0\n" | ||
| 2177 | + "mov\t%0, %m0\n" | ||
| 2178 | + "st.d\t%1[0], %0\n" | ||
| 2179 | + "st.d\t%1[2*4], %0\n" | ||
| 2180 | + "st.d\t%1[4*4], %0\n" | ||
| 2181 | + "st.d\t%1[30*4], %0\n" | ||
| 2182 | + "st.d\t%1[32*4], %0\n" | ||
| 2183 | + "st.d\t%1[34*4], %0\n" | ||
| 2184 | + : "=&r"(tmp) : "r"(z)); | ||
| 2185 | + | ||
| 2186 | + | ||
| 2187 | + | ||
| 2188 | + /* | ||
| 2189 | + z[6] = mad_f_mul(yptr [0], wptr[0]); | ||
| 2190 | + z[7] = mad_f_mul(yptr [1], wptr[1]); | ||
| 2191 | + z[8] = mad_f_mul(yptr [2], wptr[2]); | ||
| 2192 | + z[9] = mad_f_mul(yptr [3], wptr[3]); | ||
| 2193 | + z[10] = mad_f_mul(yptr[4], wptr[4]); | ||
| 2194 | + z[11] = mad_f_mul(yptr[5], wptr[5]); | ||
| 2195 | + z[24] = mad_f_mul(yptr [30], wptr[6]); | ||
| 2196 | + z[25] = mad_f_mul(yptr [31], wptr[7]); | ||
| 2197 | + z[26] = mad_f_mul(yptr [32], wptr[8]); | ||
| 2198 | + z[27] = mad_f_mul(yptr [33], wptr[9]); | ||
| 2199 | + z[28] = mad_f_mul(yptr[34], wptr[10]); | ||
| 2200 | + z[29] = mad_f_mul(yptr[35], wptr[11]); | ||
| 2201 | + */ | ||
| 2202 | + | ||
| 2203 | + | ||
| 2204 | + asm volatile ("ld.d\t%0, %5[0*4]\n" | ||
| 2205 | + "ld.d\t%3, %6[0*4]\n" | ||
| 2206 | + "ld.d\t%1, %5[2*4]\n" | ||
| 2207 | + "ld.d\t%2, %5[4*4]\n" | ||
| 2208 | + "mulsatrndwh.w\t%m3, %m3, %m0:t\n" | ||
| 2209 | + "mulsatrndwh.w\t%3, %3, %m0:b\n" | ||
| 2210 | + "ld.d\t%4, %6[2*4]\n" | ||
| 2211 | + "st.d\t%7[6*4], %3\n" | ||
| 2212 | + | ||
| 2213 | + "mulsatrndwh.w\t%m4, %m4, %0:t\n" | ||
| 2214 | + "mulsatrndwh.w\t%4, %4, %0:b\n" | ||
| 2215 | + "ld.d\t%3, %6[4*4]\n" | ||
| 2216 | + "st.d\t%7[8*4], %4\n" | ||
| 2217 | + | ||
| 2218 | + "mulsatrndwh.w\t%m3, %m3, %m1:t\n" | ||
| 2219 | + "mulsatrndwh.w\t%3, %3, %m1:b\n" | ||
| 2220 | + "ld.d\t%4, %6[30*4]\n" | ||
| 2221 | + "st.d\t%7[10*4], %3\n" | ||
| 2222 | + | ||
| 2223 | + "mulsatrndwh.w\t%m4, %m4, %1:t\n" | ||
| 2224 | + "mulsatrndwh.w\t%4, %4, %1:b\n" | ||
| 2225 | + "ld.d\t%3, %6[32*4]\n" | ||
| 2226 | + "st.d\t%7[24*4], %4\n" | ||
| 2227 | + | ||
| 2228 | + "mulsatrndwh.w\t%m3, %m3, %m2:t\n" | ||
| 2229 | + "mulsatrndwh.w\t%3, %3, %m2:b\n" | ||
| 2230 | + "ld.d\t%4, %6[34*4]\n" | ||
| 2231 | + "st.d\t%7[26*4], %3\n" | ||
| 2232 | + | ||
| 2233 | + "mulsatrndwh.w\t%m4, %m4, %2:t\n" | ||
| 2234 | + "mulsatrndwh.w\t%4, %4, %2:b\n" | ||
| 2235 | + "st.d\t%7[28*4], %4\n" | ||
| 2236 | + | ||
| 2237 | + : "=&r"(w0123), "=&r"(w4567), "=&r"(w891011), "=&r"(tmp), "=&r"(tmp2) | ||
| 2238 | + : "r"(wptr), "r"(yptr), "r"(z)); | ||
| 2239 | + /* | ||
| 2240 | + MAD_F_ML0(hi, lo, yptr[6], wptr[6]); | ||
| 2241 | + MAD_F_MLA(hi, lo, yptr[12], wptr[0]); | ||
| 2242 | + z[12] = MAD_F_MLZ(hi, lo); | ||
| 2243 | + MAD_F_ML0(hi, lo, yptr[7], wptr[7]); | ||
| 2244 | + MAD_F_MLA(hi, lo, yptr[13], wptr[1]); | ||
| 2245 | + z[13] = MAD_F_MLZ(hi, lo); | ||
| 2246 | + MAD_F_ML0(hi, lo, yptr[8], wptr[8]); | ||
| 2247 | + MAD_F_MLA(hi, lo, yptr[14], wptr[2]); | ||
| 2248 | + z[14] = MAD_F_MLZ(hi, lo); | ||
| 2249 | + MAD_F_ML0(hi, lo, yptr[9], wptr[9]); | ||
| 2250 | + MAD_F_MLA(hi, lo, yptr[15], wptr[3]); | ||
| 2251 | + z[15] = MAD_F_MLZ(hi, lo); | ||
| 2252 | + MAD_F_ML0(hi, lo, yptr[10], wptr[10]); | ||
| 2253 | + MAD_F_MLA(hi, lo, yptr[16], wptr[4]); | ||
| 2254 | + z[16] = MAD_F_MLZ(hi, lo); | ||
| 2255 | + MAD_F_ML0(hi, lo, yptr[11], wptr[11]); | ||
| 2256 | + MAD_F_MLA(hi, lo, yptr[17], wptr[5]); | ||
| 2257 | + z[17] = MAD_F_MLZ(hi, lo); | ||
| 2258 | + | ||
| 2259 | + MAD_F_ML0(hi, lo, yptr[18], wptr[6]); | ||
| 2260 | + MAD_F_MLA(hi, lo, yptr[24], wptr[0]); | ||
| 2261 | + z[18] = MAD_F_MLZ(hi, lo); | ||
| 2262 | + MAD_F_ML0(hi, lo, yptr[19], wptr[7]); | ||
| 2263 | + MAD_F_MLA(hi, lo, yptr[25], wptr[1]); | ||
| 2264 | + z[19] = MAD_F_MLZ(hi, lo); | ||
| 2265 | + MAD_F_ML0(hi, lo, yptr[20], wptr[8]); | ||
| 2266 | + MAD_F_MLA(hi, lo, yptr[26], wptr[2]); | ||
| 2267 | + z[20] = MAD_F_MLZ(hi, lo); | ||
| 2268 | + MAD_F_ML0(hi, lo, yptr[21], wptr[9]); | ||
| 2269 | + MAD_F_MLA(hi, lo, yptr[27], wptr[3]); | ||
| 2270 | + z[21] = MAD_F_MLZ(hi, lo); | ||
| 2271 | + MAD_F_ML0(hi, lo, yptr[22], wptr[10]); | ||
| 2272 | + MAD_F_MLA(hi, lo, yptr[28], wptr[4]); | ||
| 2273 | + z[22] = MAD_F_MLZ(hi, lo); | ||
| 2274 | + MAD_F_ML0(hi, lo, yptr[23], wptr[11]); | ||
| 2275 | + MAD_F_MLA(hi, lo, yptr[29], wptr[5]); | ||
| 2276 | + z[23] = MAD_F_MLZ(hi, lo);*/ | ||
| 2277 | + | ||
| 2278 | + | ||
| 2279 | + asm volatile ("ld.d\t%0, %3[6*4]\n" | ||
| 2280 | + "ld.d\t%1, %3[12*4]\n" | ||
| 2281 | + "mulwh.d\t%2, %m0, %5:t\n" | ||
| 2282 | + "macwh.d\t%2, %m1, %m4:t\n" | ||
| 2283 | + "mulwh.d\t%0, %0, %5:b\n" | ||
| 2284 | + "macwh.d\t%0, %1, %m4:b\n" | ||
| 2285 | + "lsl\t%m2, 1\n" | ||
| 2286 | + "lsl\t%2, %m0, 1\n" | ||
| 2287 | + "st.d\t%6[12*4], %2\n" | ||
| 2288 | + | ||
| 2289 | + "ld.d\t%0, %3[18*4]\n" | ||
| 2290 | + "ld.d\t%1, %3[24*4]\n" | ||
| 2291 | + "mulwh.d\t%2, %m0, %5:t\n" | ||
| 2292 | + "macwh.d\t%2, %m1, %m4:t\n" | ||
| 2293 | + "mulwh.d\t%0, %0, %5:b\n" | ||
| 2294 | + "macwh.d\t%0, %1, %m4:b\n" | ||
| 2295 | + "lsl\t%m2, 1\n" | ||
| 2296 | + "lsl\t%2, %m0, 1\n" | ||
| 2297 | + "st.d\t%6[18*4], %2\n" | ||
| 2298 | + | ||
| 2299 | + : "=&r"(tmp), "=&r"(tmp2), "=&r"(tmp3) | ||
| 2300 | + : "r"(yptr), "r"(w0123), "r"(w4567), "r"(z)); | ||
| 2301 | + | ||
| 2302 | + asm volatile ("ld.d\t%0, %3[8*4]\n" | ||
| 2303 | + "ld.d\t%1, %3[14*4]\n" | ||
| 2304 | + "mulwh.d\t%2, %m0, %m5:t\n" | ||
| 2305 | + "macwh.d\t%2, %m1, %4:t\n" | ||
| 2306 | + "mulwh.d\t%0, %0, %m5:b\n" | ||
| 2307 | + "macwh.d\t%0, %1, %4:b\n" | ||
| 2308 | + "lsl\t%m2, 1\n" | ||
| 2309 | + "lsl\t%2, %m0, 1\n" | ||
| 2310 | + "st.d\t%6[14*4], %2\n" | ||
| 2311 | + | ||
| 2312 | + "ld.d\t%0, %3[20*4]\n" | ||
| 2313 | + "ld.d\t%1, %3[26*4]\n" | ||
| 2314 | + "mulwh.d\t%2, %m0, %m5:t\n" | ||
| 2315 | + "macwh.d\t%2, %m1, %4:t\n" | ||
| 2316 | + "mulwh.d\t%0, %0, %m5:b\n" | ||
| 2317 | + "macwh.d\t%0, %1, %4:b\n" | ||
| 2318 | + "lsl\t%m2, 1\n" | ||
| 2319 | + "lsl\t%2, %m0, 1\n" | ||
| 2320 | + "st.d\t%6[20*4], %2\n" | ||
| 2321 | + | ||
| 2322 | + : "=&r"(tmp), "=&r"(tmp2), "=&r"(tmp3) | ||
| 2323 | + : "r"(yptr), "r"(w0123), "r"(w891011), "r"(z)); | ||
| 2324 | + | ||
| 2325 | + asm volatile ("ld.d\t%0, %3[10*4]\n" | ||
| 2326 | + "ld.d\t%1, %3[16*4]\n" | ||
| 2327 | + "mulwh.d\t%2, %m0, %5:t\n" | ||
| 2328 | + "macwh.d\t%2, %m1, %m4:t\n" | ||
| 2329 | + "mulwh.d\t%0, %0, %5:b\n" | ||
| 2330 | + "macwh.d\t%0, %1, %m4:b\n" | ||
| 2331 | + "lsl\t%m2, 1\n" | ||
| 2332 | + "lsl\t%2, %m0, 1\n" | ||
| 2333 | + "st.d\t%6[16*4], %2\n" | ||
| 2334 | + | ||
| 2335 | + "ld.d\t%0, %3[22*4]\n" | ||
| 2336 | + "ld.d\t%1, %3[28*4]\n" | ||
| 2337 | + "mulwh.d\t%2, %m0, %5:t\n" | ||
| 2338 | + "macwh.d\t%2, %m1, %m4:t\n" | ||
| 2339 | + "mulwh.d\t%0, %0, %5:b\n" | ||
| 2340 | + "macwh.d\t%0, %1, %m4:b\n" | ||
| 2341 | + "lsl\t%m2, 1\n" | ||
| 2342 | + "lsl\t%2, %m0, 1\n" | ||
| 2343 | + "st.d\t%6[22*4], %2\n" | ||
| 2344 | + | ||
| 2345 | + : "=&r"(tmp), "=&r"(tmp2), "=&r"(tmp3) | ||
| 2346 | + : "r"(yptr), "r"(w4567), "r"(w891011), "r"(z)); | ||
| 2347 | + | ||
| 2348 | + } | ||
| 2349 | +#else | ||
| 2350 | for (i = 0; i < 6; ++i) { | ||
| 2351 | z[i + 0] = 0; | ||
| 2352 | z[i + 6] = mad_f_mul(yptr[ 0 + 0], wptr[0]); | ||
| 2353 | @@ -2218,8 +2522,15 @@ void III_imdct_s(mad_fixed_t const X[18], mad_fixed_t z[36]) | ||
| 2354 | ++yptr; | ||
| 2355 | ++wptr; | ||
| 2356 | } | ||
| 2357 | +#endif | ||
| 2358 | } | ||
| 2359 | |||
| 2360 | +#ifdef FPM_AVR32 | ||
| 2361 | +# undef mad_f_mul | ||
| 2362 | +# define mad_f_mul(x, y) ((((x) + (1L << 11)) >> 12) * \ | ||
| 2363 | + (((y) + (1L << 15)) >> 16)) | ||
| 2364 | +#endif | ||
| 2365 | + | ||
| 2366 | /* | ||
| 2367 | * NAME: III_overlap() | ||
| 2368 | * DESCRIPTION: perform overlap-add of windowed IMDCT outputs | ||
| 2369 | diff --git a/synth.c b/synth.c | ||
| 2370 | index 1d28d43..f42d49b 100644 | ||
| 2371 | --- a/synth.c | ||
| 2372 | +++ b/synth.c | ||
| 2373 | @@ -29,20 +29,6 @@ | ||
| 2374 | # include "frame.h" | ||
| 2375 | # include "synth.h" | ||
| 2376 | |||
| 2377 | -/* | ||
| 2378 | - * NAME: synth->init() | ||
| 2379 | - * DESCRIPTION: initialize synth struct | ||
| 2380 | - */ | ||
| 2381 | -void mad_synth_init(struct mad_synth *synth) | ||
| 2382 | -{ | ||
| 2383 | - mad_synth_mute(synth); | ||
| 2384 | - | ||
| 2385 | - synth->phase = 0; | ||
| 2386 | - | ||
| 2387 | - synth->pcm.samplerate = 0; | ||
| 2388 | - synth->pcm.channels = 0; | ||
| 2389 | - synth->pcm.length = 0; | ||
| 2390 | -} | ||
| 2391 | |||
| 2392 | /* | ||
| 2393 | * NAME: synth->mute() | ||
| 2394 | @@ -88,6 +74,10 @@ void mad_synth_mute(struct mad_synth *synth) | ||
| 2395 | |||
| 2396 | /* FPM_DEFAULT without OPT_SSO will actually lose accuracy and performance */ | ||
| 2397 | |||
| 2398 | +# if defined(FPM_AVR32) | ||
| 2399 | +# define OPT_SSO | ||
| 2400 | +# endif | ||
| 2401 | + | ||
| 2402 | # if defined(FPM_DEFAULT) && !defined(OPT_SSO) | ||
| 2403 | # define OPT_SSO | ||
| 2404 | # endif | ||
| 2405 | @@ -522,9 +512,15 @@ void dct32(mad_fixed_t const in[32], unsigned int slot, | ||
| 2406 | # endif | ||
| 2407 | # define ML0(hi, lo, x, y) ((lo) = (x) * (y)) | ||
| 2408 | # define MLA(hi, lo, x, y) ((lo) += (x) * (y)) | ||
| 2409 | -# define MLN(hi, lo) ((lo) = -(lo)) | ||
| 2410 | -# define MLZ(hi, lo) ((void) (hi), (mad_fixed_t) (lo)) | ||
| 2411 | -# define SHIFT(x) ((x) >> 2) | ||
| 2412 | +# if defined(FPM_AVR32) | ||
| 2413 | +# define MLN(hi, lo) MAD_F_MLN((hi), (lo)) | ||
| 2414 | +# define MLZ(hi, lo) (hi) | ||
| 2415 | +# define SHIFT(x) ((x) << 2) | ||
| 2416 | +# else | ||
| 2417 | +# define MLN(hi, lo) ((lo) = -(lo)) | ||
| 2418 | +# define MLZ(hi, lo) ((void) (hi), (mad_fixed_t) (lo)) | ||
| 2419 | +# define SHIFT(x) ((x) >> 2) | ||
| 2420 | +# endif | ||
| 2421 | # define PRESHIFT(x) ((MAD_F(x) + (1L << 13)) >> 14) | ||
| 2422 | # else | ||
| 2423 | # define ML0(hi, lo, x, y) MAD_F_ML0((hi), (lo), (x), (y)) | ||
| 2424 | @@ -541,11 +537,54 @@ void dct32(mad_fixed_t const in[32], unsigned int slot, | ||
| 2425 | # endif | ||
| 2426 | # endif | ||
| 2427 | |||
| 2428 | +/* | ||
| 2429 | + * NAME: synth->init() | ||
| 2430 | + * DESCRIPTION: initialize synth struct | ||
| 2431 | + */ | ||
| 2432 | + | ||
| 2433 | +#ifdef FPM_AVR32 | ||
| 2434 | +short Dmod[17][33]; | ||
| 2435 | +#endif | ||
| 2436 | + | ||
| 2437 | static | ||
| 2438 | +#ifdef FPM_AVR32 | ||
| 2439 | +short const D[17][32] = { | ||
| 2440 | +#else | ||
| 2441 | mad_fixed_t const D[17][32] = { | ||
| 2442 | +#endif | ||
| 2443 | # include "D.dat" | ||
| 2444 | }; | ||
| 2445 | |||
| 2446 | +void mad_synth_init(struct mad_synth *synth) | ||
| 2447 | +{ | ||
| 2448 | + | ||
| 2449 | + mad_synth_mute(synth); | ||
| 2450 | + | ||
| 2451 | + synth->phase = 0; | ||
| 2452 | + | ||
| 2453 | + synth->pcm.samplerate = 0; | ||
| 2454 | + synth->pcm.channels = 0; | ||
| 2455 | + synth->pcm.length = 0; | ||
| 2456 | + | ||
| 2457 | +#ifdef FPM_AVR32 | ||
| 2458 | + { | ||
| 2459 | + int i, j; | ||
| 2460 | + for ( i = 0; i < 17; i++ ){ | ||
| 2461 | + for ( j = 0; j < 32; j++ ){ | ||
| 2462 | + if ( j & 1 ){ | ||
| 2463 | + Dmod[i][17 + (j >> 1)]= D[i][j]; | ||
| 2464 | + } else { | ||
| 2465 | + Dmod[i][(j >> 1)]= D[i][j]; | ||
| 2466 | + } | ||
| 2467 | + } | ||
| 2468 | + | ||
| 2469 | + Dmod[i][16]= Dmod[i][16+8]; | ||
| 2470 | + } | ||
| 2471 | + } | ||
| 2472 | +#endif | ||
| 2473 | + | ||
| 2474 | +} | ||
| 2475 | + | ||
| 2476 | # if defined(ASO_SYNTH) | ||
| 2477 | void synth_full(struct mad_synth *, struct mad_frame const *, | ||
| 2478 | unsigned int, unsigned int); | ||
| 2479 | @@ -560,9 +599,13 @@ void synth_full(struct mad_synth *synth, struct mad_frame const *frame, | ||
| 2480 | { | ||
| 2481 | unsigned int phase, ch, s, sb, pe, po; | ||
| 2482 | mad_fixed_t *pcm1, *pcm2, (*filter)[2][2][16][8]; | ||
| 2483 | - mad_fixed_t const (*sbsample)[36][32]; | ||
| 2484 | + mad_fixed_t /*const*/ (*sbsample)[36][32]; | ||
| 2485 | register mad_fixed_t (*fe)[8], (*fx)[8], (*fo)[8]; | ||
| 2486 | +#ifdef FPM_AVR32 | ||
| 2487 | + register short const (*Dptr)[32], *ptr; | ||
| 2488 | +#else | ||
| 2489 | register mad_fixed_t const (*Dptr)[32], *ptr; | ||
| 2490 | +#endif | ||
| 2491 | register mad_fixed64hi_t hi; | ||
| 2492 | register mad_fixed64lo_t lo; | ||
| 2493 | |||
| 2494 | @@ -573,6 +616,20 @@ void synth_full(struct mad_synth *synth, struct mad_frame const *frame, | ||
| 2495 | pcm1 = synth->pcm.samples[ch]; | ||
| 2496 | |||
| 2497 | for (s = 0; s < ns; ++s) { | ||
| 2498 | +# ifdef FPM_AVR32 | ||
| 2499 | +/* | ||
| 2500 | + int i; | ||
| 2501 | + for ( i = 0; i < 32; i++ ){ | ||
| 2502 | + (*sbsample)[s][i] = ((*sbsample)[s][i] + (1 << 13)) & 0xFFFFC000; | ||
| 2503 | + } | ||
| 2504 | +*/ | ||
| 2505 | + dct32_avr32((*sbsample)[s], phase >> 1, | ||
| 2506 | + (*filter)[0][phase & 1], (*filter)[1][phase & 1]); | ||
| 2507 | + /* printf("dct32: %d\n", GET_CYCLES);*/ | ||
| 2508 | + pcm1 = synth_avr32(phase, (mad_fixed_t *)filter, \ | ||
| 2509 | + pcm1, (short *)&Dmod[0]); | ||
| 2510 | + /* printf("synth_window: %d\n", GET_CYCLES);*/ | ||
| 2511 | +# else | ||
| 2512 | dct32((*sbsample)[s], phase >> 1, | ||
| 2513 | (*filter)[0][phase & 1], (*filter)[1][phase & 1]); | ||
| 2514 | |||
| 2515 | @@ -679,6 +736,7 @@ void synth_full(struct mad_synth *synth, struct mad_frame const *frame, | ||
| 2516 | MLA(hi, lo, (*fo)[7], ptr[ 2]); | ||
| 2517 | |||
| 2518 | *pcm1 = SHIFT(-MLZ(hi, lo)); | ||
| 2519 | +# endif | ||
| 2520 | pcm1 += 16; | ||
| 2521 | |||
| 2522 | phase = (phase + 1) % 16; | ||
| 2523 | diff --git a/synth_avr32.S b/synth_avr32.S | ||
| 2524 | new file mode 100644 | ||
| 2525 | index 0000000..701077b | ||
| 2526 | --- /dev/null | ||
| 2527 | +++ b/synth_avr32.S | ||
| 2528 | @@ -0,0 +1,394 @@ | ||
| 2529 | +/* | ||
| 2530 | + Optimized function for speeding up synthesis filter | ||
| 2531 | + in MPEG Audio Decoding. | ||
| 2532 | + Copyright 2003-2006 Atmel Corporation. | ||
| 2533 | + | ||
| 2534 | + Written by Ronny Pedersen and Lars Even Almås, Atmel Norway | ||
| 2535 | + | ||
| 2536 | + This program is free software; you can redistribute it and/or modify | ||
| 2537 | + it under the terms of the GNU General Public License as published by | ||
| 2538 | + the Free Software Foundation; either version 2 of the License, or | ||
| 2539 | + (at your option) any later version. | ||
| 2540 | + | ||
| 2541 | + This program is distributed in the hope that it will be useful, | ||
| 2542 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 2543 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 2544 | + GNU General Public License for more details. | ||
| 2545 | + | ||
| 2546 | + You should have received a copy of the GNU General Public License | ||
| 2547 | + along with this program; if not, write to the Free Software | ||
| 2548 | + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ | ||
| 2549 | + | ||
| 2550 | + | ||
| 2551 | +/* ***************** | ||
| 2552 | + Defining macros | ||
| 2553 | + ***************** */ | ||
| 2554 | + | ||
| 2555 | + .macro window_1 f, ptr, acc, ptr_offset, mul, tmp1_lo, tmp1_hi, tmp2_lo, tmp2_hi, tmp3_lo, tmp3_hi | ||
| 2556 | + ld.d \tmp1_lo, \f[0*4] /* tmp1 = { f[0], f[1] } */ | ||
| 2557 | + ld.w \tmp2_lo, \ptr[0*2+\ptr_offset*2] /* tmp2_lo = { ptr[0], ptr[1] }*/ | ||
| 2558 | + ld.d \tmp3_lo, \f[6*4] /* tmp3 = { f[6], f[7] } */ | ||
| 2559 | + ld.w \tmp2_hi, \ptr[6*2+\ptr_offset*2] /* tmp2_hi = { ptr[6], ptr[7] }*/ | ||
| 2560 | + .if \mul | ||
| 2561 | + mulwh.d \acc, \tmp1_hi, \tmp2_lo:t /* f[0] * ptr[0]*/ | ||
| 2562 | + .else | ||
| 2563 | + macwh.d \acc, \tmp1_hi, \tmp2_lo:t /* f[0] * ptr[0]*/ | ||
| 2564 | + .endif | ||
| 2565 | + macwh.d \acc, \tmp3_lo, \tmp2_lo:b /* f[7] * ptr[1]*/ | ||
| 2566 | + ld.w \tmp2_lo, \ptr[2*2+\ptr_offset*2] /* tmp2_lo = { ptr[2], ptr[3] }*/ | ||
| 2567 | + macwh.d \acc, \tmp1_lo, \tmp2_hi:b /* f[1] * ptr[7]*/ | ||
| 2568 | + ld.d \tmp1_lo, \f[2*4] /* tmp1 = { f[2], f[3] } */ | ||
| 2569 | + | ||
| 2570 | + macwh.d \acc, \tmp3_hi, \tmp2_lo:t /* f[6] * ptr[2]*/ | ||
| 2571 | + macwh.d \acc, \tmp1_hi, \tmp2_hi:t /* f[2] * ptr[6]*/ | ||
| 2572 | + ld.d \tmp3_lo, \f[4*4] /* tmp3 = { f[4], f[5] } */ | ||
| 2573 | + ld.w \tmp2_hi, \ptr[4*2+\ptr_offset*2] /* tmp2_hi = { ptr[4], ptr[5] }*/ | ||
| 2574 | + macwh.d \acc, \tmp3_lo, \tmp2_lo:b /* f[5] * ptr[3]*/ | ||
| 2575 | + | ||
| 2576 | + macwh.d \acc, \tmp1_lo, \tmp2_hi:b /* f[3] * ptr[5]*/ | ||
| 2577 | + macwh.d \acc, \tmp3_hi, \tmp2_hi:t /* f[4] * ptr[4]*/ | ||
| 2578 | + .endm | ||
| 2579 | + | ||
| 2580 | + .macro window_2 f, ptr, acc, ptr_offset, mul, tmp1_lo, tmp1_hi, tmp2_lo, tmp2_hi, tmp3_lo, tmp3_hi | ||
| 2581 | + ld.d \tmp1_lo, \f[0*4] /* tmp1 = { f[0], f[1] } */ | ||
| 2582 | + ld.w \tmp2_lo, \ptr[7*2+\ptr_offset*2] /* tmp2_lo = { ptr[7], ptr[8] }*/ | ||
| 2583 | + ld.d \tmp3_lo, \f[2*4] /* tmp3 = { f[2], f[3] } */ | ||
| 2584 | + ld.w \tmp2_hi, \ptr[9*2+\ptr_offset*2] /* tmp2_hi = { ptr[9], ptr[10] }*/ | ||
| 2585 | + .if \mul | ||
| 2586 | + mulwh.d \acc, \tmp1_hi, \tmp2_lo:t /* f[0] * ptr[7]*/ | ||
| 2587 | + .else | ||
| 2588 | + macwh.d \acc, \tmp1_hi, \tmp2_lo:t /* f[0] * ptr[7]*/ | ||
| 2589 | + .endif | ||
| 2590 | + macwh.d \acc, \tmp1_lo, \tmp2_lo:b /* f[1] * ptr[8]*/ | ||
| 2591 | + | ||
| 2592 | + ld.d \tmp1_lo, \f[4*4] /* tmp1 = { f[4], f[5] } */ | ||
| 2593 | + ld.w \tmp2_lo, \ptr[11*2+\ptr_offset*2] /* tmp2_lo = { ptr[11], ptr[12] }*/ | ||
| 2594 | + | ||
| 2595 | + macwh.d \acc, \tmp3_hi, \tmp2_hi:t /* f[2] * ptr[9]*/ | ||
| 2596 | + macwh.d \acc, \tmp3_lo, \tmp2_hi:b /* f[3] * ptr[10]*/ | ||
| 2597 | + | ||
| 2598 | + ld.d \tmp3_lo, \f[6*4] /* tmp3 = { f[6], f[7] } */ | ||
| 2599 | + ld.w \tmp2_hi, \ptr[13*2+\ptr_offset*2] /* tmp2_hi = { ptr[13], ptr[14] }*/ | ||
| 2600 | + | ||
| 2601 | + macwh.d \acc, \tmp1_hi, \tmp2_lo:t /* f[4] * ptr[11]*/ | ||
| 2602 | + macwh.d \acc, \tmp1_lo, \tmp2_lo:b /* f[5] * ptr[12]*/ | ||
| 2603 | + macwh.d \acc, \tmp3_hi, \tmp2_hi:t /* f[6] * ptr[13]*/ | ||
| 2604 | + macwh.d \acc, \tmp3_lo, \tmp2_hi:b /* f[7] * ptr[14]*/ | ||
| 2605 | + .endm | ||
| 2606 | + | ||
| 2607 | + .macro scale res, d_lo, d_hi | ||
| 2608 | + lsl \d_hi, 2 | ||
| 2609 | + .endm | ||
| 2610 | + | ||
| 2611 | +/* ********************** | ||
| 2612 | + Starting main function | ||
| 2613 | + ********************** */ | ||
| 2614 | + | ||
| 2615 | +/* Function synth_avr32 is called from synth.c with arguments: | ||
| 2616 | + phase, filter, *pcm1, &D[0] */ | ||
| 2617 | + | ||
| 2618 | + .global synth_avr32 | ||
| 2619 | +synth_avr32: | ||
| 2620 | + pushm r0-r7, lr | ||
| 2621 | + sub sp, 8 | ||
| 2622 | + | ||
| 2623 | + /* R12 = phase, R11 = filter, R10 = pcm1, r9 = D*/ | ||
| 2624 | + bld r12, 0 | ||
| 2625 | + brcc synth_even | ||
| 2626 | + | ||
| 2627 | + /* Filter for odd phases */ | ||
| 2628 | + | ||
| 2629 | + /* fe = &(*filter)[0][1][0]; | ||
| 2630 | + fx = &(*filter)[0][0][0]; | ||
| 2631 | + fo = &(*filter)[1][0][0]; */ | ||
| 2632 | + sub lr /*fe*/, r11, -16*8*4 | ||
| 2633 | + sub r8 /*fo*/, r11, -16*8*4*2 | ||
| 2634 | + | ||
| 2635 | + /* pe = phase >> 1; */ | ||
| 2636 | + lsr r12, 1 | ||
| 2637 | + stdsp sp[4], r12 | ||
| 2638 | + /* ptr = (short const *)Dmod + pe; */ | ||
| 2639 | + add r12, r9, r12 << 1 | ||
| 2640 | + | ||
| 2641 | + /* ML0(hi, lo, (*fx)[0], ptr[0 + 17]); | ||
| 2642 | + MLA(hi, lo, (*fx)[1], ptr[7 + 17]); | ||
| 2643 | + MLA(hi, lo, (*fx)[2], ptr[6 + 17]); | ||
| 2644 | + MLA(hi, lo, (*fx)[3], ptr[5 + 17]); | ||
| 2645 | + MLA(hi, lo, (*fx)[4], ptr[4 + 17]); | ||
| 2646 | + MLA(hi, lo, (*fx)[5], ptr[3 + 17]); | ||
| 2647 | + MLA(hi, lo, (*fx)[6], ptr[2 + 17]); | ||
| 2648 | + MLA(hi, lo, (*fx)[7], ptr[1 + 17]); */ | ||
| 2649 | + window_1 r11/*fx*/,r12/*ptr*/,r0/*acc*/,17/*off*/,1/*mul*/,r2,r3,r4,r5,r6,r7 | ||
| 2650 | + | ||
| 2651 | + /* MLN(hi, lo); */ | ||
| 2652 | + neg r0 | ||
| 2653 | + acr r1 | ||
| 2654 | + neg r1 | ||
| 2655 | + | ||
| 2656 | + /* MLA(hi, lo, (*fe)[0], ptr[0]); | ||
| 2657 | + MLA(hi, lo, (*fe)[1], ptr[7]); | ||
| 2658 | + MLA(hi, lo, (*fe)[2], ptr[6]); | ||
| 2659 | + MLA(hi, lo, (*fe)[3], ptr[5]); | ||
| 2660 | + MLA(hi, lo, (*fe)[4], ptr[4]); | ||
| 2661 | + MLA(hi, lo, (*fe)[5], ptr[3]); | ||
| 2662 | + MLA(hi, lo, (*fe)[6], ptr[2]); | ||
| 2663 | + MLA(hi, lo, (*fe)[7], ptr[1]); */ | ||
| 2664 | + window_1 lr/*fe*/,r12/*ptr*/,r0/*acc*/,0/*off*/,0/*mac*/,r2,r3,r4,r5,r6,r7 | ||
| 2665 | + | ||
| 2666 | + /* *pcm1++ = SHIFT(MLZ(hi, lo)); | ||
| 2667 | + | ||
| 2668 | + pcm2 = pcm1 + 31; */ | ||
| 2669 | + scale r1, r0, r1 | ||
| 2670 | + st.w r10/*pcm_1*/++, r1 | ||
| 2671 | + sub r11/*pcm2*/, r10, -4*31 | ||
| 2672 | + | ||
| 2673 | + /* for (sb = 1; sb < 16; ++sb) { */ | ||
| 2674 | + mov r2, 15 | ||
| 2675 | + stdsp sp[0], r2 | ||
| 2676 | +odd_loop: | ||
| 2677 | + /* ++fe; | ||
| 2678 | + ptr += 33; */ | ||
| 2679 | + sub lr /*fe*/, -8*4 | ||
| 2680 | + sub r12, -33*2 | ||
| 2681 | + | ||
| 2682 | + /* ML0(hi, lo, (*fo)[0], ptr[0 + 17]); | ||
| 2683 | + MLA(hi, lo, (*fo)[1], ptr[7 + 17]); | ||
| 2684 | + MLA(hi, lo, (*fo)[2], ptr[6 + 17]); | ||
| 2685 | + MLA(hi, lo, (*fo)[3], ptr[5 + 17]); | ||
| 2686 | + MLA(hi, lo, (*fo)[4], ptr[4 + 17]); | ||
| 2687 | + MLA(hi, lo, (*fo)[5], ptr[3 + 17]); | ||
| 2688 | + MLA(hi, lo, (*fo)[6], ptr[2 + 17]); | ||
| 2689 | + MLA(hi, lo, (*fo)[7], ptr[1 + 17]); */ | ||
| 2690 | + window_1 r8/*fo*/,r12/*ptr*/,r0/*acc*/,17/*off*/,1/*mul*/,r2,r3,r4,r5,r6,r7 | ||
| 2691 | + /* MLN(hi, lo); */ | ||
| 2692 | + | ||
| 2693 | + neg r0 | ||
| 2694 | + acr r1 | ||
| 2695 | + neg r1 | ||
| 2696 | + | ||
| 2697 | + /* MLA(hi, lo, (*fe)[7], ptr[1]); | ||
| 2698 | + MLA(hi, lo, (*fe)[6], ptr[2]); | ||
| 2699 | + MLA(hi, lo, (*fe)[5], ptr[3]); | ||
| 2700 | + MLA(hi, lo, (*fe)[4], ptr[4]); | ||
| 2701 | + MLA(hi, lo, (*fe)[3], ptr[5]); | ||
| 2702 | + MLA(hi, lo, (*fe)[2], ptr[6]); | ||
| 2703 | + MLA(hi, lo, (*fe)[1], ptr[7]); | ||
| 2704 | + MLA(hi, lo, (*fe)[0], ptr[0]); */ | ||
| 2705 | + window_1 lr/*fe*/,r12/*ptr*/,r0/*acc*/,0/*off*/,0/*mac*/,r2,r3,r4,r5,r6,r7 | ||
| 2706 | + | ||
| 2707 | + /* ptr -= 2*pe; */ | ||
| 2708 | + lddsp r2, sp[4] | ||
| 2709 | + | ||
| 2710 | + /* *pcm1++ = SHIFT(MLZ(hi, lo)); */ | ||
| 2711 | + | ||
| 2712 | + scale r1, r0, r1 | ||
| 2713 | + sub r12/*ptr*/, r12, r2/*pe*/<< 2 | ||
| 2714 | + st.w r10/*pcm_1*/++, r1 | ||
| 2715 | + | ||
| 2716 | + | ||
| 2717 | + /* ML0(hi, lo, (*fe)[0], ptr[7 + 17]); | ||
| 2718 | + MLA(hi, lo, (*fe)[1], ptr[8 + 17]); | ||
| 2719 | + MLA(hi, lo, (*fe)[2], ptr[9 + 17]); | ||
| 2720 | + MLA(hi, lo, (*fe)[3], ptr[10 + 17]); | ||
| 2721 | + MLA(hi, lo, (*fe)[4], ptr[11 + 17]); | ||
| 2722 | + MLA(hi, lo, (*fe)[5], ptr[12 + 17]); | ||
| 2723 | + MLA(hi, lo, (*fe)[6], ptr[13 + 17]); | ||
| 2724 | + MLA(hi, lo, (*fe)[7], ptr[14 + 17]); */ | ||
| 2725 | + window_2 lr/*fe*/,r12/*ptr*/,r0/*acc*/,17/*off*/,1/*mul*/,r2,r3,r4,r5,r6,r7 | ||
| 2726 | + /* MLA(hi, lo, (*fo)[7], ptr[14]); | ||
| 2727 | + MLA(hi, lo, (*fo)[6], ptr[13]); | ||
| 2728 | + MLA(hi, lo, (*fo)[5], ptr[12]); | ||
| 2729 | + MLA(hi, lo, (*fo)[4], ptr[11]); | ||
| 2730 | + MLA(hi, lo, (*fo)[3], ptr[10]); | ||
| 2731 | + MLA(hi, lo, (*fo)[2], ptr[9]); | ||
| 2732 | + MLA(hi, lo, (*fo)[1], ptr[8]); | ||
| 2733 | + MLA(hi, lo, (*fo)[0], ptr[7]); */ | ||
| 2734 | + window_2 r8/*fo*/,r12/*ptr*/,r0/*acc*/,0/*off*/,0/*mac*/,r2,r3,r4,r5,r6,r7 | ||
| 2735 | + | ||
| 2736 | + | ||
| 2737 | + /* *pcm2-- = SHIFT(MLZ(hi, lo)); */ | ||
| 2738 | + lddsp r3, sp[4] | ||
| 2739 | + lddsp r2, sp[0] | ||
| 2740 | + scale r1, r0, r1 | ||
| 2741 | + st.w --r11/*pcm_2*/, r1 | ||
| 2742 | + | ||
| 2743 | + /* ptr += 2*pe; */ | ||
| 2744 | + add r12/*ptr*/, r12, r3/*pe*/<< 2 | ||
| 2745 | + | ||
| 2746 | + /* ++fo; | ||
| 2747 | + } */ | ||
| 2748 | + sub r8/*fo*/, -8*4 | ||
| 2749 | + | ||
| 2750 | + sub r2, 1 | ||
| 2751 | + stdsp sp[0], r2 | ||
| 2752 | + brne odd_loop | ||
| 2753 | + | ||
| 2754 | + /* ptr += 33; */ | ||
| 2755 | + sub r12/*ptr*/, -33*2 | ||
| 2756 | + | ||
| 2757 | + /* ML0(hi, lo, (*fo)[0], ptr[0 + 17]); | ||
| 2758 | + MLA(hi, lo, (*fo)[1], ptr[7 + 17]); | ||
| 2759 | + MLA(hi, lo, (*fo)[2], ptr[6 + 17]); | ||
| 2760 | + MLA(hi, lo, (*fo)[3], ptr[5 + 17]); | ||
| 2761 | + MLA(hi, lo, (*fo)[4], ptr[4 + 17]); | ||
| 2762 | + MLA(hi, lo, (*fo)[5], ptr[3 + 17]); | ||
| 2763 | + MLA(hi, lo, (*fo)[6], ptr[2 + 17]); | ||
| 2764 | + MLA(hi, lo, (*fo)[7], ptr[1 + 17]); */ | ||
| 2765 | + window_1 r8/*fo*/,r12/*ptr*/,r0/*acc*/,17/*off*/,1/*mul*/,r2,r3,r4,r5,r6,r7 | ||
| 2766 | + | ||
| 2767 | + rjmp synth_end | ||
| 2768 | +synth_even: | ||
| 2769 | + /* Filter for even phases */ | ||
| 2770 | + | ||
| 2771 | + /* fe = &(*filter)[0][0][0]; | ||
| 2772 | + fx = &(*filter)[0][1][0]; | ||
| 2773 | + fo = &(*filter)[1][1][0]; */ | ||
| 2774 | + sub lr /*fx*/, r11, -16*8*4 | ||
| 2775 | + sub r8 /*fo*/, r11, -(16*8*4*2 + 16*8*4) | ||
| 2776 | + | ||
| 2777 | + /* po = ((phase - 1) & 0xF) >> 1; */ | ||
| 2778 | + sub r12, 1 | ||
| 2779 | + andl r12, 0xe, COH | ||
| 2780 | + stdsp sp[4], r12 | ||
| 2781 | + /* ptr = (short const *)Dmod + po; */ | ||
| 2782 | + add r12, r9, r12 | ||
| 2783 | + | ||
| 2784 | + /* ML0(hi, lo, (*fx)[0], ptr[0 + 17]); | ||
| 2785 | + MLA(hi, lo, (*fx)[1], ptr[7 + 17]); | ||
| 2786 | + MLA(hi, lo, (*fx)[2], ptr[6 + 17]); | ||
| 2787 | + MLA(hi, lo, (*fx)[3], ptr[5 + 17]); | ||
| 2788 | + MLA(hi, lo, (*fx)[4], ptr[4 + 17]); | ||
| 2789 | + MLA(hi, lo, (*fx)[5], ptr[3 + 17]); | ||
| 2790 | + MLA(hi, lo, (*fx)[6], ptr[2 + 17]); | ||
| 2791 | + MLA(hi, lo, (*fx)[7], ptr[1 + 17]); */ | ||
| 2792 | + window_1 lr/*fx*/,r12/*ptr*/,r0/*acc*/,17/*off*/,1/*mul*/,r2,r3,r4,r5,r6,r7 | ||
| 2793 | + | ||
| 2794 | + /* MLN(hi, lo); */ | ||
| 2795 | + neg r0 | ||
| 2796 | + acr r1 | ||
| 2797 | + neg r1 | ||
| 2798 | + | ||
| 2799 | + /* MLA(hi, lo, (*fe)[0], ptr[0 + 1]); | ||
| 2800 | + MLA(hi, lo, (*fe)[1], ptr[7 + 1]); | ||
| 2801 | + MLA(hi, lo, (*fe)[2], ptr[6 + 1]); | ||
| 2802 | + MLA(hi, lo, (*fe)[3], ptr[5 + 1]); | ||
| 2803 | + MLA(hi, lo, (*fe)[4], ptr[4 + 1]); | ||
| 2804 | + MLA(hi, lo, (*fe)[5], ptr[3 + 1]); | ||
| 2805 | + MLA(hi, lo, (*fe)[6], ptr[2 + 1]); | ||
| 2806 | + MLA(hi, lo, (*fe)[7], ptr[1 + 1]); */ | ||
| 2807 | + window_1 r11/*fe*/,r12/*ptr*/,r0/*acc*/,1/*off*/,0/*mac*/,r2,r3,r4,r5,r6,r7 | ||
| 2808 | + | ||
| 2809 | + /* *pcm1++ = SHIFT(MLZ(hi, lo)); | ||
| 2810 | + | ||
| 2811 | + pcm2 = pcm1 + 31; */ | ||
| 2812 | + scale r1, r0, r1 | ||
| 2813 | + st.w r10/*pcm_1*/++, r1 | ||
| 2814 | + sub lr/*pcm2*/, r10, -4*31 | ||
| 2815 | + | ||
| 2816 | + /* for (sb = 1; sb < 16; ++sb) { */ | ||
| 2817 | + mov r2, 15 | ||
| 2818 | + stdsp sp[0], r2 | ||
| 2819 | +even_loop: | ||
| 2820 | + /* ++fe; | ||
| 2821 | + ptr += 33; */ | ||
| 2822 | + sub r11 /*fe*/, -8*4 | ||
| 2823 | + sub r12, -33*2 | ||
| 2824 | + | ||
| 2825 | + /* ML0(hi, lo, (*fo)[0], ptr[0 + 17]); | ||
| 2826 | + MLA(hi, lo, (*fo)[1], ptr[7 + 17]); | ||
| 2827 | + MLA(hi, lo, (*fo)[2], ptr[6 + 17]); | ||
| 2828 | + MLA(hi, lo, (*fo)[3], ptr[5 + 17]); | ||
| 2829 | + MLA(hi, lo, (*fo)[4], ptr[4 + 17]); | ||
| 2830 | + MLA(hi, lo, (*fo)[5], ptr[3 + 17]); | ||
| 2831 | + MLA(hi, lo, (*fo)[6], ptr[2 + 17]); | ||
| 2832 | + MLA(hi, lo, (*fo)[7], ptr[1 + 17]); */ | ||
| 2833 | + window_1 r8/*fo*/,r12/*ptr*/,r0/*acc*/,17/*off*/,1/*mul*/,r2,r3,r4,r5,r6,r7 | ||
| 2834 | + /* MLN(hi, lo); */ | ||
| 2835 | + neg r0 | ||
| 2836 | + acr r1 | ||
| 2837 | + neg r1 | ||
| 2838 | + | ||
| 2839 | + /* MLA(hi, lo, (*fe)[7], ptr[1 + 1]); | ||
| 2840 | + MLA(hi, lo, (*fe)[6], ptr[2 + 1]); | ||
| 2841 | + MLA(hi, lo, (*fe)[5], ptr[3 + 1]); | ||
| 2842 | + MLA(hi, lo, (*fe)[4], ptr[4 + 1]); | ||
| 2843 | + MLA(hi, lo, (*fe)[3], ptr[5 + 1]); | ||
| 2844 | + MLA(hi, lo, (*fe)[2], ptr[6 + 1]); | ||
| 2845 | + MLA(hi, lo, (*fe)[1], ptr[7 + 1]); | ||
| 2846 | + MLA(hi, lo, (*fe)[0], ptr[0 + 1]); */ | ||
| 2847 | + window_1 r11/*fe*/,r12/*ptr*/,r0/*acc*/,1/*off*/,0/*mac*/,r2,r3,r4,r5,r6,r7 | ||
| 2848 | + | ||
| 2849 | + /* *pcm1++ = SHIFT(MLZ(hi, lo)); */ | ||
| 2850 | + lddsp r2, sp[4] | ||
| 2851 | + scale r1, r0, r1 | ||
| 2852 | + /* ptr -= 2*po; */ | ||
| 2853 | + sub r12/*ptr*/, r12, r2/*po*/<< 1 | ||
| 2854 | + st.w r10/*pcm_1*/++, r1 | ||
| 2855 | + | ||
| 2856 | + | ||
| 2857 | + /* ML0(hi, lo, (*fe)[0], ptr[7 + 17 - 1]); | ||
| 2858 | + MLA(hi, lo, (*fe)[1], ptr[8 + 17 - 1]); | ||
| 2859 | + MLA(hi, lo, (*fe)[2], ptr[9 + 17 - 1]); | ||
| 2860 | + MLA(hi, lo, (*fe)[3], ptr[10 + 17 - 1]); | ||
| 2861 | + MLA(hi, lo, (*fe)[4], ptr[11 + 17 - 1]); | ||
| 2862 | + MLA(hi, lo, (*fe)[5], ptr[12 + 17 - 1]); | ||
| 2863 | + MLA(hi, lo, (*fe)[6], ptr[13 + 17 - 1]); | ||
| 2864 | + MLA(hi, lo, (*fe)[7], ptr[14 + 17 - 1]); */ | ||
| 2865 | + window_2 r11/*fe*/,r12/*ptr*/,r0/*acc*/,16/*off*/,1/*mul*/,r2,r3,r4,r5,r6,r7 | ||
| 2866 | + /* MLA(hi, lo, (*fo)[7], ptr[14]); | ||
| 2867 | + MLA(hi, lo, (*fo)[6], ptr[13]); | ||
| 2868 | + MLA(hi, lo, (*fo)[5], ptr[12]); | ||
| 2869 | + MLA(hi, lo, (*fo)[4], ptr[11]); | ||
| 2870 | + MLA(hi, lo, (*fo)[3], ptr[10]); | ||
| 2871 | + MLA(hi, lo, (*fo)[2], ptr[9]); | ||
| 2872 | + MLA(hi, lo, (*fo)[1], ptr[8]); | ||
| 2873 | + MLA(hi, lo, (*fo)[0], ptr[7]); */ | ||
| 2874 | + window_2 r8/*fo*/,r12/*ptr*/,r0/*acc*/,0/*off*/,0/*mac*/,r2,r3,r4,r5,r6,r7 | ||
| 2875 | + | ||
| 2876 | + | ||
| 2877 | + /* *pcm2-- = SHIFT(MLZ(hi, lo)); */ | ||
| 2878 | + lddsp r3, sp[4] | ||
| 2879 | + lddsp r2, sp[0] | ||
| 2880 | + scale r1, r0, r1 | ||
| 2881 | + st.w --lr/*pcm_2*/, r1 | ||
| 2882 | + | ||
| 2883 | + /* ptr += 2*po; */ | ||
| 2884 | + add r12/*ptr*/, r12, r3/*po*/<< 1 | ||
| 2885 | + | ||
| 2886 | + /* ++fo; | ||
| 2887 | + } */ | ||
| 2888 | + sub r8/*fo*/, -8*4 | ||
| 2889 | + | ||
| 2890 | + sub r2, 1 | ||
| 2891 | + stdsp sp[0], r2 | ||
| 2892 | + brne even_loop | ||
| 2893 | + | ||
| 2894 | + /* ptr += 33; */ | ||
| 2895 | + sub r12/*ptr*/, -33*2 | ||
| 2896 | + | ||
| 2897 | + /* ML0(hi, lo, (*fo)[0], ptr[0 + 17]); | ||
| 2898 | + MLA(hi, lo, (*fo)[1], ptr[7 + 17]); | ||
| 2899 | + MLA(hi, lo, (*fo)[2], ptr[6 + 17]); | ||
| 2900 | + MLA(hi, lo, (*fo)[3], ptr[5 + 17]); | ||
| 2901 | + MLA(hi, lo, (*fo)[4], ptr[4 + 17]); | ||
| 2902 | + MLA(hi, lo, (*fo)[5], ptr[3 + 17]); | ||
| 2903 | + MLA(hi, lo, (*fo)[6], ptr[2 + 17]); | ||
| 2904 | + MLA(hi, lo, (*fo)[7], ptr[1 + 17]); */ | ||
| 2905 | + window_1 r8/*fo*/,r12/*ptr*/,r0/*acc*/,17/*off*/,1/*mul*/,r2,r3,r4,r5,r6,r7 | ||
| 2906 | + | ||
| 2907 | + | ||
| 2908 | + | ||
| 2909 | +synth_end: | ||
| 2910 | + /* *pcm1 = SHIFT(-MLZ(hi, lo)); */ | ||
| 2911 | + scale r1, r0, r1 | ||
| 2912 | + neg r1 | ||
| 2913 | + st.w r10/*pcm_1*/, r1 | ||
| 2914 | + | ||
| 2915 | + mov r12, r10 | ||
| 2916 | + sub sp, -8 | ||
| 2917 | + popm r0-r7, pc | ||
| 2918 | + | ||
| 2919 | + | ||
| 2920 | + | ||
| 2921 | + | ||
| 2922 | + | ||
diff --git a/meta-oe/recipes-multimedia/libmad/files/mad-mips-h-constraint.patch b/meta-oe/recipes-multimedia/libmad/files/mad-mips-h-constraint.patch deleted file mode 100644 index b65555e1fa..0000000000 --- a/meta-oe/recipes-multimedia/libmad/files/mad-mips-h-constraint.patch +++ /dev/null | |||
| @@ -1,70 +0,0 @@ | |||
| 1 | diff -ur libmad-0.15.1b-orig/fixed.h libmad-0.15.1b/fixed.h | ||
| 2 | --- libmad-0.15.1b-orig/fixed.h 2004-02-17 12:32:03.000000000 +1030 | ||
| 3 | +++ libmad-0.15.1b/fixed.h 2009-08-05 10:46:30.000000000 +0930 | ||
| 4 | @@ -299,6 +299,23 @@ | ||
| 5 | |||
| 6 | # elif defined(FPM_MIPS) | ||
| 7 | |||
| 8 | +/* Test for gcc >= maj.min, as per __GNUC_PREREQ in glibc */ | ||
| 9 | +#if defined (__GNUC__) && defined (__GNUC_MINOR__) | ||
| 10 | +#define __GNUC_PREREQ(maj, min) \ | ||
| 11 | + ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min)) | ||
| 12 | +#else | ||
| 13 | +#define __GNUC_PREREQ(maj, min) 0 | ||
| 14 | +#endif | ||
| 15 | + | ||
| 16 | +#if __GNUC_PREREQ(4,4) | ||
| 17 | + typedef unsigned int u64_di_t __attribute__ ((mode (DI))); | ||
| 18 | +# define MAD_F_MLX(hi, lo, x, y) \ | ||
| 19 | + do { \ | ||
| 20 | + u64_di_t __ll = (u64_di_t) (x) * (y); \ | ||
| 21 | + hi = __ll >> 32; \ | ||
| 22 | + lo = __ll; \ | ||
| 23 | + } while (0) | ||
| 24 | +#else | ||
| 25 | /* | ||
| 26 | * This MIPS version is fast and accurate; the disposition of the least | ||
| 27 | * significant bit depends on OPT_ACCURACY via mad_f_scale64(). | ||
| 28 | @@ -328,6 +345,7 @@ | ||
| 29 | : "%r" ((x) >> 12), "r" ((y) >> 16)) | ||
| 30 | # define MAD_F_MLZ(hi, lo) ((mad_fixed_t) (lo)) | ||
| 31 | # endif | ||
| 32 | +#endif /* __GNU_PREREQ(4,4) */ | ||
| 33 | |||
| 34 | # if defined(OPT_SPEED) | ||
| 35 | # define mad_f_scale64(hi, lo) \ | ||
| 36 | diff -ur libmad-0.15.1b-orig/mad.h libmad-0.15.1b/mad.h | ||
| 37 | --- libmad-0.15.1b-orig/mad.h 2004-02-17 13:25:44.000000000 +1030 | ||
| 38 | +++ libmad-0.15.1b/mad.h 2009-08-05 10:42:40.000000000 +0930 | ||
| 39 | @@ -344,6 +344,23 @@ | ||
| 40 | |||
| 41 | # elif defined(FPM_MIPS) | ||
| 42 | |||
| 43 | +/* Test for gcc >= maj.min, as per __GNUC_PREREQ in glibc */ | ||
| 44 | +#if defined (__GNUC__) && defined (__GNUC_MINOR__) | ||
| 45 | +#define __GNUC_PREREQ(maj, min) \ | ||
| 46 | + ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min)) | ||
| 47 | +#else | ||
| 48 | +#define __GNUC_PREREQ(maj, min) 0 | ||
| 49 | +#endif | ||
| 50 | + | ||
| 51 | +#if __GNUC_PREREQ(4,4) | ||
| 52 | + typedef unsigned int u64_di_t __attribute__ ((mode (DI))); | ||
| 53 | +# define MAD_F_MLX(hi, lo, x, y) \ | ||
| 54 | + do { \ | ||
| 55 | + u64_di_t __ll = (u64_di_t) (x) * (y); \ | ||
| 56 | + hi = __ll >> 32; \ | ||
| 57 | + lo = __ll; \ | ||
| 58 | + } while (0) | ||
| 59 | +#else | ||
| 60 | /* | ||
| 61 | * This MIPS version is fast and accurate; the disposition of the least | ||
| 62 | * significant bit depends on OPT_ACCURACY via mad_f_scale64(). | ||
| 63 | @@ -373,6 +390,7 @@ | ||
| 64 | : "%r" ((x) >> 12), "r" ((y) >> 16)) | ||
| 65 | # define MAD_F_MLZ(hi, lo) ((mad_fixed_t) (lo)) | ||
| 66 | # endif | ||
| 67 | +#endif /* __GNU_PREREQ(4,4) */ | ||
| 68 | |||
| 69 | # if defined(OPT_SPEED) | ||
| 70 | # define mad_f_scale64(hi, lo) \ | ||
diff --git a/meta-oe/recipes-multimedia/libmad/files/mad.diff b/meta-oe/recipes-multimedia/libmad/files/mad.diff deleted file mode 100644 index 851dc01201..0000000000 --- a/meta-oe/recipes-multimedia/libmad/files/mad.diff +++ /dev/null | |||
| @@ -1,24 +0,0 @@ | |||
| 1 | --- /tmp/configure.ac 2008-07-11 10:19:17.000000000 +0200 | ||
| 2 | +++ libmad-0.15.1b/configure.ac 2008-07-11 10:20:00.313198000 +0200 | ||
| 3 | @@ -140,21 +140,14 @@ | ||
| 4 | case "$optimize" in | ||
| 5 | -O|"-O "*) | ||
| 6 | optimize="-O" | ||
| 7 | - optimize="$optimize -fforce-mem" | ||
| 8 | - optimize="$optimize -fforce-addr" | ||
| 9 | : #x optimize="$optimize -finline-functions" | ||
| 10 | : #- optimize="$optimize -fstrength-reduce" | ||
| 11 | - optimize="$optimize -fthread-jumps" | ||
| 12 | - optimize="$optimize -fcse-follow-jumps" | ||
| 13 | - optimize="$optimize -fcse-skip-blocks" | ||
| 14 | : #x optimize="$optimize -frerun-cse-after-loop" | ||
| 15 | : #x optimize="$optimize -frerun-loop-opt" | ||
| 16 | : #x optimize="$optimize -fgcse" | ||
| 17 | optimize="$optimize -fexpensive-optimizations" | ||
| 18 | - optimize="$optimize -fregmove" | ||
| 19 | : #* optimize="$optimize -fdelayed-branch" | ||
| 20 | : #x optimize="$optimize -fschedule-insns" | ||
| 21 | - optimize="$optimize -fschedule-insns2" | ||
| 22 | : #? optimize="$optimize -ffunction-sections" | ||
| 23 | : #? optimize="$optimize -fcaller-saves" | ||
| 24 | : #> optimize="$optimize -funroll-loops" | ||
diff --git a/meta-oe/recipes-multimedia/libmad/libmad-0.15.1b/obsolete_automake_macros.patch b/meta-oe/recipes-multimedia/libmad/libmad-0.15.1b/obsolete_automake_macros.patch deleted file mode 100644 index b0f5f77e2a..0000000000 --- a/meta-oe/recipes-multimedia/libmad/libmad-0.15.1b/obsolete_automake_macros.patch +++ /dev/null | |||
| @@ -1,14 +0,0 @@ | |||
| 1 | Upstream-Status: Submitted (https://sourceforge.net/tracker/?group_id=12349&atid=112349) | ||
| 2 | |||
| 3 | Signed-off-by: Marko Lindqvist <cazfi74@gmail.com> | ||
| 4 | diff -Nurd libmad-0.15.1b/configure.ac libmad-0.15.1b/configure.ac | ||
| 5 | --- libmad-0.15.1b/configure.ac 2004-01-23 11:41:32.000000000 +0200 | ||
| 6 | +++ libmad-0.15.1b/configure.ac 2013-01-03 08:28:23.718693697 +0200 | ||
| 7 | @@ -28,7 +28,7 @@ | ||
| 8 | |||
| 9 | AM_INIT_AUTOMAKE | ||
| 10 | |||
| 11 | -AM_CONFIG_HEADER([config.h]) | ||
| 12 | +AC_CONFIG_HEADERS([config.h]) | ||
| 13 | |||
| 14 | dnl System type. | ||
diff --git a/meta-oe/recipes-multimedia/libmad/libmad_0.15.1b.bb b/meta-oe/recipes-multimedia/libmad/libmad_0.15.1b.bb deleted file mode 100644 index b5ff6988a4..0000000000 --- a/meta-oe/recipes-multimedia/libmad/libmad_0.15.1b.bb +++ /dev/null | |||
| @@ -1,38 +0,0 @@ | |||
| 1 | DESCRIPTION = "MPEG Audio Decoder Library" | ||
| 2 | SECTION = "libs" | ||
| 3 | |||
| 4 | DEPENDS = "libid3tag" | ||
| 5 | |||
| 6 | LICENSE = "GPLv2" | ||
| 7 | LIC_FILES_CHKSUM = "file://COPYING;md5=94d55d512a9ba36caa9b7df079bae19f" | ||
| 8 | |||
| 9 | PR = "r1" | ||
| 10 | |||
| 11 | SRC_URI = "${SOURCEFORGE_MIRROR}/mad/libmad-${PV}.tar.gz \ | ||
| 12 | file://add-pkgconfig.patch \ | ||
| 13 | file://mad.diff \ | ||
| 14 | file://mad-mips-h-constraint.patch \ | ||
| 15 | file://obsolete_automake_macros.patch \ | ||
| 16 | " | ||
| 17 | |||
| 18 | SRC_URI_append_avr32 = " file://libmad-0.15.1b-avr32-optimization.patch" | ||
| 19 | |||
| 20 | SRC_URI[md5sum] = "1be543bc30c56fb6bea1d7bf6a64e66c" | ||
| 21 | SRC_URI[sha256sum] = "bbfac3ed6bfbc2823d3775ebb931087371e142bb0e9bb1bee51a76a6e0078690" | ||
| 22 | |||
| 23 | S = "${WORKDIR}/libmad-${PV}" | ||
| 24 | |||
| 25 | inherit autotools pkgconfig | ||
| 26 | |||
| 27 | EXTRA_OECONF = "-enable-speed --enable-shared" | ||
| 28 | # The ASO's don't take any account of thumb... | ||
| 29 | EXTRA_OECONF_append_thumb = " --disable-aso --enable-fpm=default" | ||
| 30 | EXTRA_OECONF_append_arm = " --enable-fpm=arm" | ||
| 31 | |||
| 32 | do_configure_prepend () { | ||
| 33 | # damn picky automake... | ||
| 34 | touch NEWS AUTHORS ChangeLog | ||
| 35 | } | ||
| 36 | |||
| 37 | ARM_INSTRUCTION_SET = "arm" | ||
| 38 | |||
