fix shine macros (#17725)

This commit is contained in:
gemu 2023-01-17 09:06:20 +01:00 committed by GitHub
parent d904e0aa7f
commit beb021210d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 55 additions and 55 deletions

View File

@ -138,7 +138,7 @@ void shine_iteration_loop(shine_global_config *config) {
*/
for (i=GRANULE_SIZE, config->l3loop->xrmax=0; i--;)
{
config->l3loop->xrsq[i] = mulsr(config->l3loop->xr[i],config->l3loop->xr[i]);
config->l3loop->xrsq[i] = asm_mulsr(config->l3loop->xr[i],config->l3loop->xr[i]);
config->l3loop->xrabs[i] = abs(config->l3loop->xr[i]);
if(config->l3loop->xrabs[i]>config->l3loop->xrmax)
config->l3loop->xrmax=config->l3loop->xrabs[i];
@ -408,7 +408,7 @@ int quantize(int ix[GRANULE_SIZE], int stepsize, shine_global_config *config )
/* a quick check to see if ixmax will be less than 8192 */
/* this speeds up the early calls to bin_search_StepSize */
if((mulr(config->l3loop->xrmax,scalei)) > 165140) /* 8192**(4/3) */
if((asm_mulr(config->l3loop->xrmax,scalei)) > 165140) /* 8192**(4/3) */
max = 16384; /* no point in continuing, stepsize not big enough */
else
for(i=0, max=0;i<GRANULE_SIZE;i++)
@ -416,7 +416,7 @@ int quantize(int ix[GRANULE_SIZE], int stepsize, shine_global_config *config )
/* This calculation is very sensitive. The multiply must round it's
* result or bad things happen to the quality.
*/
ln = mulr(abs(config->l3loop->xr[i]),scalei);
ln = asm_mulr(abs(config->l3loop->xr[i]),scalei);
if(ln<10000) /* ln < 10000 catches most values */
ix[i] = config->l3loop->int2idx[ln]; /* quick look up method */

View File

@ -98,32 +98,32 @@ void shine_mdct_sub(shine_global_config *config, int stride) {
uint32_t vm_lo __attribute__((unused));
#endif
mul0(vm, vm_lo, mdct_in[35], config->mdct.cos_l[k][35]);
asm_mul0(vm, vm_lo, mdct_in[35], config->mdct.cos_l[k][35]);
for(j=35; j; j-=7) {
muladd(vm, vm_lo, mdct_in[j-1], config->mdct.cos_l[k][j-1]);
muladd(vm, vm_lo, mdct_in[j-2], config->mdct.cos_l[k][j-2]);
muladd(vm, vm_lo, mdct_in[j-3], config->mdct.cos_l[k][j-3]);
muladd(vm, vm_lo, mdct_in[j-4], config->mdct.cos_l[k][j-4]);
muladd(vm, vm_lo, mdct_in[j-5], config->mdct.cos_l[k][j-5]);
muladd(vm, vm_lo, mdct_in[j-6], config->mdct.cos_l[k][j-6]);
muladd(vm, vm_lo, mdct_in[j-7], config->mdct.cos_l[k][j-7]);
asm_muladd(vm, vm_lo, mdct_in[j-1], config->mdct.cos_l[k][j-1]);
asm_muladd(vm, vm_lo, mdct_in[j-2], config->mdct.cos_l[k][j-2]);
asm_muladd(vm, vm_lo, mdct_in[j-3], config->mdct.cos_l[k][j-3]);
asm_muladd(vm, vm_lo, mdct_in[j-4], config->mdct.cos_l[k][j-4]);
asm_muladd(vm, vm_lo, mdct_in[j-5], config->mdct.cos_l[k][j-5]);
asm_muladd(vm, vm_lo, mdct_in[j-6], config->mdct.cos_l[k][j-6]);
asm_muladd(vm, vm_lo, mdct_in[j-7], config->mdct.cos_l[k][j-7]);
}
mulz(vm, vm_lo);
asm_mulz(vm, vm_lo);
mdct_enc[band][k] = vm;
}
/* Perform aliasing reduction butterfly */
asm ("#cmuls:");
asm ("#asm_cmuls:");
if (band != 0)
{
cmuls(mdct_enc[band][0], mdct_enc[band-1][17-0], mdct_enc[band][0], mdct_enc[band-1][17-0], MDCT_CS0, MDCT_CA0);
cmuls(mdct_enc[band][1], mdct_enc[band-1][17-1], mdct_enc[band][1], mdct_enc[band-1][17-1], MDCT_CS1, MDCT_CA1);
cmuls(mdct_enc[band][2], mdct_enc[band-1][17-2], mdct_enc[band][2], mdct_enc[band-1][17-2], MDCT_CS2, MDCT_CA2);
cmuls(mdct_enc[band][3], mdct_enc[band-1][17-3], mdct_enc[band][3], mdct_enc[band-1][17-3], MDCT_CS3, MDCT_CA3);
cmuls(mdct_enc[band][4], mdct_enc[band-1][17-4], mdct_enc[band][4], mdct_enc[band-1][17-4], MDCT_CS4, MDCT_CA4);
cmuls(mdct_enc[band][5], mdct_enc[band-1][17-5], mdct_enc[band][5], mdct_enc[band-1][17-5], MDCT_CS5, MDCT_CA5);
cmuls(mdct_enc[band][6], mdct_enc[band-1][17-6], mdct_enc[band][6], mdct_enc[band-1][17-6], MDCT_CS6, MDCT_CA6);
cmuls(mdct_enc[band][7], mdct_enc[band-1][17-7], mdct_enc[band][7], mdct_enc[band-1][17-7], MDCT_CS7, MDCT_CA7);
asm_cmuls(mdct_enc[band][0], mdct_enc[band-1][17-0], mdct_enc[band][0], mdct_enc[band-1][17-0], MDCT_CS0, MDCT_CA0);
asm_cmuls(mdct_enc[band][1], mdct_enc[band-1][17-1], mdct_enc[band][1], mdct_enc[band-1][17-1], MDCT_CS1, MDCT_CA1);
asm_cmuls(mdct_enc[band][2], mdct_enc[band-1][17-2], mdct_enc[band][2], mdct_enc[band-1][17-2], MDCT_CS2, MDCT_CA2);
asm_cmuls(mdct_enc[band][3], mdct_enc[band-1][17-3], mdct_enc[band][3], mdct_enc[band-1][17-3], MDCT_CS3, MDCT_CA3);
asm_cmuls(mdct_enc[band][4], mdct_enc[band-1][17-4], mdct_enc[band][4], mdct_enc[band-1][17-4], MDCT_CS4, MDCT_CA4);
asm_cmuls(mdct_enc[band][5], mdct_enc[band-1][17-5], mdct_enc[band][5], mdct_enc[band-1][17-5], MDCT_CS5, MDCT_CA5);
asm_cmuls(mdct_enc[band][6], mdct_enc[band-1][17-6], mdct_enc[band][6], mdct_enc[band-1][17-6], MDCT_CS6, MDCT_CA6);
asm_cmuls(mdct_enc[band][7], mdct_enc[band-1][17-7], mdct_enc[band][7], mdct_enc[band-1][17-7], MDCT_CS7, MDCT_CA7);
}
}
}

View File

@ -66,15 +66,15 @@ void shine_window_filter_subband(int16_t **buffer, int32_t s[SBLIMIT], int ch, s
uint32_t s_value_lo __attribute__((unused));
#endif
mul0 (s_value, s_value_lo, config->subband.x[ch][(config->subband.off[ch] + i + (0<<6)) & (HAN_SIZE-1)], shine_enwindow[i + (0<<6)]);
muladd(s_value, s_value_lo, config->subband.x[ch][(config->subband.off[ch] + i + (1<<6)) & (HAN_SIZE-1)], shine_enwindow[i + (1<<6)]);
muladd(s_value, s_value_lo, config->subband.x[ch][(config->subband.off[ch] + i + (2<<6)) & (HAN_SIZE-1)], shine_enwindow[i + (2<<6)]);
muladd(s_value, s_value_lo, config->subband.x[ch][(config->subband.off[ch] + i + (3<<6)) & (HAN_SIZE-1)], shine_enwindow[i + (3<<6)]);
muladd(s_value, s_value_lo, config->subband.x[ch][(config->subband.off[ch] + i + (4<<6)) & (HAN_SIZE-1)], shine_enwindow[i + (4<<6)]);
muladd(s_value, s_value_lo, config->subband.x[ch][(config->subband.off[ch] + i + (5<<6)) & (HAN_SIZE-1)], shine_enwindow[i + (5<<6)]);
muladd(s_value, s_value_lo, config->subband.x[ch][(config->subband.off[ch] + i + (6<<6)) & (HAN_SIZE-1)], shine_enwindow[i + (6<<6)]);
muladd(s_value, s_value_lo, config->subband.x[ch][(config->subband.off[ch] + i + (7<<6)) & (HAN_SIZE-1)], shine_enwindow[i + (7<<6)]);
mulz (s_value, s_value_lo);
asm_mul0 (s_value, s_value_lo, config->subband.x[ch][(config->subband.off[ch] + i + (0<<6)) & (HAN_SIZE-1)], shine_enwindow[i + (0<<6)]);
asm_muladd(s_value, s_value_lo, config->subband.x[ch][(config->subband.off[ch] + i + (1<<6)) & (HAN_SIZE-1)], shine_enwindow[i + (1<<6)]);
asm_muladd(s_value, s_value_lo, config->subband.x[ch][(config->subband.off[ch] + i + (2<<6)) & (HAN_SIZE-1)], shine_enwindow[i + (2<<6)]);
asm_muladd(s_value, s_value_lo, config->subband.x[ch][(config->subband.off[ch] + i + (3<<6)) & (HAN_SIZE-1)], shine_enwindow[i + (3<<6)]);
asm_muladd(s_value, s_value_lo, config->subband.x[ch][(config->subband.off[ch] + i + (4<<6)) & (HAN_SIZE-1)], shine_enwindow[i + (4<<6)]);
asm_muladd(s_value, s_value_lo, config->subband.x[ch][(config->subband.off[ch] + i + (5<<6)) & (HAN_SIZE-1)], shine_enwindow[i + (5<<6)]);
asm_muladd(s_value, s_value_lo, config->subband.x[ch][(config->subband.off[ch] + i + (6<<6)) & (HAN_SIZE-1)], shine_enwindow[i + (6<<6)]);
asm_muladd(s_value, s_value_lo, config->subband.x[ch][(config->subband.off[ch] + i + (7<<6)) & (HAN_SIZE-1)], shine_enwindow[i + (7<<6)]);
asm_mulz (s_value, s_value_lo);
y[i] = s_value;
}
@ -88,17 +88,17 @@ void shine_window_filter_subband(int16_t **buffer, int32_t s[SBLIMIT], int ch, s
uint32_t s_value_lo __attribute__((unused));
#endif
mul0(s_value, s_value_lo, config->subband.fl[i][63], y[63]);
asm_mul0(s_value, s_value_lo, config->subband.fl[i][63], y[63]);
for (j=63; j; j-=7) {
muladd(s_value, s_value_lo, config->subband.fl[i][j-1], y[j-1]);
muladd(s_value, s_value_lo, config->subband.fl[i][j-2], y[j-2]);
muladd(s_value, s_value_lo, config->subband.fl[i][j-3], y[j-3]);
muladd(s_value, s_value_lo, config->subband.fl[i][j-4], y[j-4]);
muladd(s_value, s_value_lo, config->subband.fl[i][j-5], y[j-5]);
muladd(s_value, s_value_lo, config->subband.fl[i][j-6], y[j-6]);
muladd(s_value, s_value_lo, config->subband.fl[i][j-7], y[j-7]);
asm_muladd(s_value, s_value_lo, config->subband.fl[i][j-1], y[j-1]);
asm_muladd(s_value, s_value_lo, config->subband.fl[i][j-2], y[j-2]);
asm_muladd(s_value, s_value_lo, config->subband.fl[i][j-3], y[j-3]);
asm_muladd(s_value, s_value_lo, config->subband.fl[i][j-4], y[j-4]);
asm_muladd(s_value, s_value_lo, config->subband.fl[i][j-5], y[j-5]);
asm_muladd(s_value, s_value_lo, config->subband.fl[i][j-6], y[j-6]);
asm_muladd(s_value, s_value_lo, config->subband.fl[i][j-7], y[j-7]);
}
mulz(s_value, s_value_lo);
asm_mulz(s_value, s_value_lo);
s[i] = s_value;
}
}

View File

@ -1,9 +1,9 @@
#include <stdint.h>
#ifndef mul
#ifndef asm_mul
//#define /// mul(a,b) (int32_t) ( ( ((int64_t) a) * ((int64_t) b) ) >>32 )
#define mul(x,y) \
#define asm_mul(x,y) \
({ \
register int32_t result; \
asm ("mulsh %0, %2, %1" : "=r" (result) : "r" (x), "r" (y)); \
@ -12,8 +12,8 @@
#endif
#ifndef muls //Not sure about this
#define muls(x,y) \
#ifndef asm_muls //Not sure about this
#define asm_muls(x,y) \
({ \
register int32_t result; \
asm ( \
@ -28,8 +28,8 @@
//#define muls(a,b) (int32_t) ( ( ((int64_t) a) * ((int64_t) b) ) >>31 )
#endif
#ifndef mulr //no rounding shortcut
#define mulr(x,y) \
#ifndef asm_mulr //no rounding shortcut
#define asm_mulr(x,y) \
({ \
register int32_t result; \
asm ("mulsh %0, %2, %1" : "=r" (result) : "r" (x), "r" (y)); \
@ -39,8 +39,8 @@
//#define mulr(a,b) (int32_t) ( ( ( ((int64_t) a) * ((int64_t) b)) + 0x80000000LL ) >>32 )
#endif
#ifndef mulsr //no rounding shortcut
#define mulsr(x,y) \
#ifndef asm_mulsr //no rounding shortcut
#define asm_mulsr(x,y) \
({ \
register int32_t result; \
asm ( \
@ -53,11 +53,11 @@
//#define mulsr(a,b) (int32_t) ( ( ( ((int64_t) a) * ((int64_t) b)) + 0x40000000LL ) >>31 )
#endif
#ifndef mul0
#define mul0(hi,lo,a,b) ((hi) = mul((a), (b)))
#ifndef asm_mul0
#define asm_mul0(hi,lo,a,b) ((hi) = asm_mul((a), (b)))
// This didn't seem to help either
#define muladd(hi, lo, x, y) \
#define asm_muladd(hi, lo, x, y) \
({ \
asm ( \
"mulsh a7, %2, %1\n\t" \
@ -67,8 +67,8 @@
})
//#define muladd(hi,lo,a,b) ((hi) += mul((a), (b)))
#define mulsub(hi, lo, x, y) \
//#define asm_muladd(hi,lo,a,b) ((hi) += mul((a), (b)))
#define asm_mulsub(hi, lo, x, y) \
({ \
asm ( \
"mulsh a8, %2, %1\n\t" \
@ -77,10 +77,10 @@
: "a8");\
})
//#define mulsub(hi,lo,a,b) ((hi) -= mul((a), (b)))
#define mulz(hi,lo)
#define asm_mulz(hi,lo)
#endif
#ifndef cmuls
#ifndef asm_cmuls
/*
#define cmuls(dre, dim, are, aim, bre, bim) \
do { \
@ -109,7 +109,7 @@ do { \
} while (0)*/
#define cmuls(dre, dim, are, aim, bre, bim) \
#define asm_cmuls(dre, dim, are, aim, bre, bim) \
do { \
int32_t tre; \
(tre) = (int32_t) (((int64_t) (are) * (int64_t) (bre) - (int64_t) (aim) * (int64_t) (bim)) >> 31); \