Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/math/fft/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ if(CONFIG_MATH_32BIT_FFT)
endif()

if(CONFIG_MATH_FFT_MULTI)
list(APPEND base_files fft_multi.c)
list(APPEND base_files fft_multi.c fft_multi_generic.c fft_multi_hifi3.c)
endif()

is_zephyr(zephyr)
Expand Down
93 changes: 67 additions & 26 deletions src/math/fft/fft_32_hifi3.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,16 @@ void fft_execute_32(struct fft_plan *plan, bool ifft)
ae_int32x2 sample;
ae_int32x2 sample1;
ae_int32x2 sample2;
ae_int32x2 tw;
ae_int32x2 *inx = (ae_int32x2 *)plan->inb32;
ae_int32x2 *outx = (ae_int32x2 *)plan->outb32;
ae_int32x2 *outtop;
ae_int32x2 *outbottom;
ae_int32x2 *top_ptr;
ae_int32x2 *bot_ptr;
uint16_t *idx = &plan->bit_reverse_idx[0];
int depth, top, bottom, index;
int i, j, k, m, n;
const int32_t *tw_r;
const int32_t *tw_i;
int depth, i;
int j, k, m, n;
int size = plan->size;
int len = plan->len;

Expand All @@ -35,7 +38,6 @@ void fft_execute_32(struct fft_plan *plan, bool ifft)
if (!plan->inb32 || !plan->outb32)
return;

/* convert to complex conjugate for ifft */
/* step 1: re-arrange input in bit reverse order, and shrink the level to avoid overflow */
if (ifft) {
/* convert to complex conjugate for ifft */
Expand All @@ -54,43 +56,82 @@ void fft_execute_32(struct fft_plan *plan, bool ifft)
}
}

/* step 2: loop to do FFT transform in smaller size */
for (depth = 1; depth <= len; ++depth) {
/*
* Step 2a: First FFT stage (depth=1, m=2, n=1).
* All butterflies use twiddle factor W^0 = 1+0j,
* so the complex multiply is skipped entirely.
*/
top_ptr = outx;
bot_ptr = outx + 1;
for (k = 0; k < size; k += 2) {
sample1 = AE_L32X2_I(top_ptr, 0);
sample2 = AE_L32X2_I(bot_ptr, 0);
sample = AE_ADD32S(sample1, sample2);
AE_S32X2_I(sample, top_ptr, 0);
sample = AE_SUB32S(sample1, sample2);
AE_S32X2_I(sample, bot_ptr, 0);
top_ptr += 2;
bot_ptr += 2;
}

/* Step 2b: Remaining FFT stages (depth >= 2) */
for (depth = 2; depth <= len; ++depth) {
m = 1 << depth;
n = m >> 1;
i = FFT_SIZE_MAX >> depth;

top_ptr = outx;
bot_ptr = outx + n;

/* doing FFT transforms in size m */
for (k = 0; k < size; k += m) {
/* doing one FFT transform for size m */
for (j = 0; j < n; ++j) {
index = i * j;
top = k + j;
bottom = top + n;
/*
* j=0: twiddle factor W^0 = 1+0j,
* butterfly without complex multiply.
*/
sample1 = AE_L32X2_I(top_ptr, 0);
sample = AE_L32X2_I(bot_ptr, 0);
sample2 = AE_ADD32S(sample1, sample);
AE_S32X2_I(sample2, top_ptr, 0);
sample2 = AE_SUB32S(sample1, sample);
AE_S32X2_I(sample2, bot_ptr, 0);
top_ptr++;
bot_ptr++;

/* load twiddle factor to sample1 */
sample1 = twiddle_real_32[index];
sample2 = twiddle_imag_32[index];
sample1 = AE_SEL32_LH(sample1, sample2);
/* j=1..n-1: full butterfly with twiddle multiply */
tw_r = &twiddle_real_32[i];
tw_i = &twiddle_imag_32[i];
for (j = 1; j < n; ++j) {
/* load and combine twiddle factor {real, imag} */
sample1 = tw_r[0];
sample2 = tw_i[0];
tw = AE_SEL32_LH(sample1, sample2);

/* calculate the accumulator: twiddle * bottom */
sample2 = outx[bottom];
res = AE_MULF32S_HH(sample1, sample2);
AE_MULSF32S_LL(res, sample1, sample2);
res1 = AE_MULF32S_HL(sample1, sample2);
AE_MULAF32S_LH(res1, sample1, sample2);
sample2 = AE_L32X2_I(bot_ptr, 0);
res = AE_MULF32S_HH(tw, sample2);
AE_MULSF32S_LL(res, tw, sample2);
res1 = AE_MULF32S_HL(tw, sample2);
AE_MULAF32S_LH(res1, tw, sample2);
sample = AE_ROUND32X2F64SSYM(res, res1);
sample1 = outx[top];
sample1 = AE_L32X2_I(top_ptr, 0);

/* calculate the top output: top = top + accumulate */
sample2 = AE_ADD32S(sample1, sample);
outtop = outx + top;
AE_S32X2_I(sample2, outtop, 0);
AE_S32X2_I(sample2, top_ptr, 0);

/* calculate the bottom output: bottom = top - accumulate */
sample2 = AE_SUB32S(sample1, sample);
outbottom = outx + bottom;
AE_S32X2_I(sample2, outbottom, 0);
AE_S32X2_I(sample2, bot_ptr, 0);

top_ptr++;
bot_ptr++;
tw_r += i;
tw_i += i;
}
/* advance pointers past current group's bottom half */
top_ptr += n;
bot_ptr += n;
}
}

Expand Down
154 changes: 0 additions & 154 deletions src/math/fft/fft_multi.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,6 @@ LOG_MODULE_REGISTER(math_fft_multi, CONFIG_SOF_LOG_LEVEL);
SOF_DEFINE_REG_UUID(math_fft_multi);
DECLARE_TR_CTX(math_fft_multi_tr, SOF_UUID(math_fft_multi_uuid), LOG_LEVEL_INFO);

/* Constants for size 3 DFT */
#define DFT3_COEFR -1073741824 /* int32(-0.5 * 2^31) */
#define DFT3_COEFI 1859775393 /* int32(sqrt(3) / 2 * 2^31) */
#define DFT3_SCALE 715827883 /* int32(1/3*2^31) */

struct fft_multi_plan *mod_fft_multi_plan_new(struct processing_module *mod, void *inb,
void *outb, uint32_t size, int bits)
{
Expand Down Expand Up @@ -143,152 +138,3 @@ void mod_fft_multi_plan_free(struct processing_module *mod, struct fft_multi_pla
mod_free(mod, plan->bit_reverse_idx);
mod_free(mod, plan);
}

void dft3_32(struct icomplex32 *x_in, struct icomplex32 *y)
{
const struct icomplex32 c0 = {DFT3_COEFR, -DFT3_COEFI};
const struct icomplex32 c1 = {DFT3_COEFR, DFT3_COEFI};
struct icomplex32 x[3];
struct icomplex32 p1, p2, sum;
int i;

for (i = 0; i < 3; i++) {
x[i].real = Q_MULTSR_32X32((int64_t)x_in[i].real, DFT3_SCALE, 31, 31, 31);
x[i].imag = Q_MULTSR_32X32((int64_t)x_in[i].imag, DFT3_SCALE, 31, 31, 31);
}

/*
* | 1 1 1 |
* c = | 1 c0 c1 | , x = [ x0 x1 x2 ]
* | 1 c1 c0 |
*
* y(0) = c(0,0) * x(0) + c(1,0) * x(1) + c(2,0) * x(0)
* y(1) = c(0,1) * x(0) + c(1,1) * x(1) + c(2,1) * x(0)
* y(2) = c(0,2) * x(0) + c(1,2) * x(1) + c(2,2) * x(0)
*/

/* y(0) = 1 * x(0) + 1 * x(1) + 1 * x(2) */
icomplex32_adds(&x[0], &x[1], &sum);
icomplex32_adds(&x[2], &sum, &y[0]);

/* y(1) = 1 * x(0) + c0 * x(1) + c1 * x(2) */
icomplex32_mul(&c0, &x[1], &p1);
icomplex32_mul(&c1, &x[2], &p2);
icomplex32_adds(&p1, &p2, &sum);
icomplex32_adds(&x[0], &sum, &y[1]);

/* y(2) = 1 * x(0) + c1 * x(1) + c0 * x(2) */
icomplex32_mul(&c1, &x[1], &p1);
icomplex32_mul(&c0, &x[2], &p2);
icomplex32_adds(&p1, &p2, &sum);
icomplex32_adds(&x[0], &sum, &y[2]);
}

void fft_multi_execute_32(struct fft_multi_plan *plan, bool ifft)
{
struct icomplex32 x[FFT_MULTI_COUNT_MAX];
struct icomplex32 y[FFT_MULTI_COUNT_MAX];
struct icomplex32 t, c;
int i, j, k, m;

/* Handle 2^N FFT */
if (plan->num_ffts == 1) {
memset(plan->outb32, 0, plan->fft_size * sizeof(struct icomplex32));
fft_execute_32(plan->fft_plan[0], ifft);
return;
}

#ifdef DEBUG_DUMP_TO_FILE
FILE *fh1 = fopen("debug_fft_multi_int1.txt", "w");
FILE *fh2 = fopen("debug_fft_multi_int2.txt", "w");
FILE *fh3 = fopen("debug_fft_multi_twiddle.txt", "w");
FILE *fh4 = fopen("debug_fft_multi_dft_out.txt", "w");
#endif

/* convert to complex conjugate for IFFT */
if (ifft) {
for (i = 0; i < plan->total_size; i++)
icomplex32_conj(&plan->inb32[i]);
}

/* Copy input buffers */
k = 0;
for (i = 0; i < plan->fft_size; i++)
for (j = 0; j < plan->num_ffts; j++)
plan->tmp_i32[j][i] = plan->inb32[k++];

/* Clear output buffers and call individual FFTs*/
for (j = 0; j < plan->num_ffts; j++) {
memset(&plan->tmp_o32[j][0], 0, plan->fft_size * sizeof(struct icomplex32));
fft_execute_32(plan->fft_plan[j], 0);
}

#ifdef DEBUG_DUMP_TO_FILE
for (j = 0; j < plan->num_ffts; j++)
for (i = 0; i < plan->fft_size; i++)
fprintf(fh1, "%d %d\n", plan->tmp_o32[j][i].real, plan->tmp_o32[j][i].imag);
#endif

/* Multiply with twiddle factors */
m = FFT_MULTI_TWIDDLE_SIZE / 2 / plan->fft_size;
for (j = 1; j < plan->num_ffts; j++) {
for (i = 0; i < plan->fft_size; i++) {
c = plan->tmp_o32[j][i];
k = j * i * m;
t.real = multi_twiddle_real_32[k];
t.imag = multi_twiddle_imag_32[k];
//fprintf(fh3, "%d %d\n", t.real, t.imag);
icomplex32_mul(&t, &c, &plan->tmp_o32[j][i]);
}
}

#ifdef DEBUG_DUMP_TO_FILE
for (j = 0; j < plan->num_ffts; j++)
for (i = 0; i < plan->fft_size; i++)
fprintf(fh2, "%d %d\n", plan->tmp_o32[j][i].real, plan->tmp_o32[j][i].imag);
#endif

/* DFT of size 3 */
j = plan->fft_size;
k = 2 * plan->fft_size;
for (i = 0; i < plan->fft_size; i++) {
x[0] = plan->tmp_o32[0][i];
x[1] = plan->tmp_o32[1][i];
x[2] = plan->tmp_o32[2][i];
dft3_32(x, y);
plan->outb32[i] = y[0];
plan->outb32[i + j] = y[1];
plan->outb32[i + k] = y[2];
}

#ifdef DEBUG_DUMP_TO_FILE
for (i = 0; i < plan->total_size; i++)
fprintf(fh4, "%d %d\n", plan->outb32[i].real, plan->outb32[i].imag);
#endif

/* shift back for IFFT */

/* TODO: Check if time shift method for IFFT is more efficient or more accurate
* tmp = 1 / N * fft(X);
* x = tmp([1 N:-1:2])
*/
if (ifft) {
/*
* no need to divide N as it is already done in the input side
* for Q1.31 format. Instead, we need to multiply N to compensate
* the shrink we did in the FFT transform.
*/
for (i = 0; i < plan->total_size; i++) {
/* Need to negate imag part to match reference */
plan->outb32[i].imag = -plan->outb32[i].imag;
icomplex32_shift(&plan->outb32[i], plan->fft_plan[0]->len,
&plan->outb32[i]);
plan->outb32[i].real = sat_int32((int64_t)plan->outb32[i].real * 3);
plan->outb32[i].imag = sat_int32((int64_t)plan->outb32[i].imag * 3);
}
}

#ifdef DEBUG_DUMP_TO_FILE
fclose(fh1); fclose(fh2); fclose(fh3); fclose(fh4);
#endif
}
Loading
Loading