https://github.com/knik0/faac/commit/cb10c59666a3631547a9037c1576a34d8ca52df1 From cb10c59666a3631547a9037c1576a34d8ca52df1 Mon Sep 17 00:00:00 2001 From: enWILLYado Date: Mon, 2 Jun 2025 16:40:03 +0200 Subject: [PATCH] Fix SIMD calls (#59) * Fix SIMD calls Fast SIMD calls and secure align for result. * Update quantize.c Potential unaligned memory location. * Update quantize.c Only fix unaligned memory access. --- libfaac/quantize.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/libfaac/quantize.c b/libfaac/quantize.c index 870737a..02586c9 100644 --- a/libfaac/quantize.c +++ b/libfaac/quantize.c @@ -40,6 +40,19 @@ # define bit_SSE2 (1 << 26) #endif +#ifdef __SSE2__ +#ifdef _MSC_VER /* visual c++ */ +#define ALIGN16_BEG __declspec(align(16)) +#define ALIGN16_END +#else /* gcc or icc */ +#define ALIGN16_BEG +#define ALIGN16_END __attribute__((aligned(16))) +#endif +#else +#define ALIGN16_BEG +#define ALIGN16_END +#endif + #ifdef __GNUC__ #define GCC_VERSION (__GNUC__ * 10000 \ + __GNUC_MINOR__ * 100 \ @@ -182,7 +195,7 @@ static void qlevel(CoderInfo *coderInfo, int sfac; double rmsx; double etot; - int xitab[8 * MAXSHORTBAND]; + int ALIGN16_BEG xitab[8 * MAXSHORTBAND] ALIGN16_END; int *xi; int start, end; const double *xr; @@ -242,15 +255,18 @@ static void qlevel(CoderInfo *coderInfo, #ifdef __SSE2__ if (sse2) { + const __m128 zero = _mm_setzero_ps(); + const __m128 sfac = _mm_set1_ps(sfacfix); + const __m128 magic = _mm_set1_ps(MAGIC_NUMBER); for (cnt = 0; cnt < end; cnt += 4) { __m128 x = {xr[cnt], xr[cnt + 1], xr[cnt + 2], xr[cnt + 3]}; - x = _mm_max_ps(x, _mm_sub_ps((__m128){0, 0, 0, 0}, x)); - x = _mm_mul_ps(x, (__m128){sfacfix, sfacfix, sfacfix, sfacfix}); + x = _mm_max_ps(x, _mm_sub_ps(zero, x)); + x = _mm_mul_ps(x, sfac); x = _mm_mul_ps(x, _mm_sqrt_ps(x)); x = _mm_sqrt_ps(x); - x = _mm_add_ps(x, (__m128){MAGIC_NUMBER, MAGIC_NUMBER, MAGIC_NUMBER, MAGIC_NUMBER}); + x = _mm_add_ps(x, magic); *(__m128i*)(xi + cnt) = _mm_cvttps_epi32(x); }