diff --git a/src/audio/stft_process/CMakeLists.txt b/src/audio/stft_process/CMakeLists.txt index 66ccb2276c58..e3a13e17bb93 100644 --- a/src/audio/stft_process/CMakeLists.txt +++ b/src/audio/stft_process/CMakeLists.txt @@ -8,6 +8,7 @@ else() add_local_sources(sof stft_process_setup.c) add_local_sources(sof stft_process_common.c) add_local_sources(sof stft_process-generic.c) + add_local_sources(sof stft_process-hifi3.c) if(CONFIG_IPC_MAJOR_4) add_local_sources(sof stft_process-ipc4.c) diff --git a/src/audio/stft_process/Kconfig b/src/audio/stft_process/Kconfig index b73bdebe3bb4..dcce64bbf53a 100644 --- a/src/audio/stft_process/Kconfig +++ b/src/audio/stft_process/Kconfig @@ -1,5 +1,7 @@ # SPDX-License-Identifier: BSD-3-Clause +rsource "Kconfig.simd" + config COMP_STFT_PROCESS tristate "STFT processing component" default n diff --git a/src/audio/stft_process/Kconfig.simd b/src/audio/stft_process/Kconfig.simd new file mode 100644 index 000000000000..1ec9ff6f0e61 --- /dev/null +++ b/src/audio/stft_process/Kconfig.simd @@ -0,0 +1,31 @@ +# SPDX-License-Identifier: BSD-3-Clause + +comment "STFT Process optimization level select" + +choice "COMP_STFT_PROCESS_SIMD_LEVEL_SELECT" + prompt "choose which SIMD level used for STFT Process module" + depends on COMP_STFT_PROCESS + default COMP_STFT_PROCESS_HIFI_MAX + + config COMP_STFT_PROCESS_HIFI_MAX + prompt "SIMD will be selected by toolchain pre-defined header" + bool + help + When this is selected, the optimization level will be + determined by the toolchain pre-defined macros in the + core isa header file. + + config COMP_STFT_PROCESS_HIFI_3 + prompt "Choose HIFI3 intrinsic optimized STFT Process module" + bool + help + This option is used to build HIFI3 intrinsic optimized + STFT Process code. + + config COMP_STFT_PROCESS_HIFI_NONE + prompt "Choose generic C STFT Process module, no HIFI SIMD involved" + bool + help + This option is used to build STFT Process + with generic C code. +endchoice diff --git a/src/audio/stft_process/stft_process-generic.c b/src/audio/stft_process/stft_process-generic.c index 3399c24657a4..327c429369f9 100644 --- a/src/audio/stft_process/stft_process-generic.c +++ b/src/audio/stft_process/stft_process-generic.c @@ -2,395 +2,12 @@ // // Copyright(c) 2025 Intel Corporation. -#include -#include #include -#include -#include -#include +#include #include #include "stft_process.h" -#if CONFIG_FORMAT_S32LE -/** - * stft_process_source_s32() - Process S16_LE format. - * @mod: Pointer to module data. - * @source: Source for PCM samples data. - * @sink: Sink for PCM samples data. - * @frames: Number of audio data frames to process. - * - * This is the processing function for 16-bit signed integer PCM formats. The - * audio samples in every frame are re-order to channels order defined in - * component data channel_map[]. - * - * Return: Value zero for success, otherwise an error code. - */ -int stft_process_source_s32(struct stft_comp_data *cd, struct sof_source *source, int frames) -{ - struct stft_process_state *state = &cd->state; - struct stft_process_buffer *ibuf; - int32_t const *x, *x_start, *x_end; - int x_size; - int bytes = frames * cd->frame_bytes; - int frames_left = frames; - int ret; - int n1; - int n2; - int channels = cd->channels; - int n; - int i; - int j; - - /* Get pointer to source data in circular buffer */ - ret = source_get_data_s32(source, bytes, &x, &x_start, &x_size); - if (ret) - return ret; - - /* Set helper pointers to buffer end for wrap check. Then loop until all - * samples are processed. - */ - x_end = x_start + x_size; - - while (frames_left) { - /* Find out samples to process before first wrap or end of data. */ - ibuf = &state->ibuf[0]; - n1 = (x_end - x) / cd->channels; - n2 = stft_process_buffer_samples_without_wrap(ibuf, ibuf->w_ptr); - n = MIN(n1, n2); - n = MIN(n, frames_left); - for (i = 0; i < n; i++) { - for (j = 0; j < channels; j++) { - ibuf = &state->ibuf[j]; - *ibuf->w_ptr++ = *x++; - } - } - - /* One of the buffers needs a wrap (or end of data), so check for wrap */ - for (j = 0; j < channels; j++) { - ibuf = &state->ibuf[j]; - ibuf->w_ptr = stft_process_buffer_wrap(ibuf, ibuf->w_ptr); - } - - if (x >= x_end) - x -= x_size; - - /* Update processed samples count for next loop iteration. */ - frames_left -= n; - } - - /* Update the source for bytes consumed. Return success. */ - source_release_data(source, bytes); - for (j = 0; j < channels; j++) { - ibuf = &state->ibuf[j]; - ibuf->s_avail += frames; - ibuf->s_free -= frames; - } - - return 0; -} - -/** - * stft_process_sink_s32() - Process S16_LE format. - * @mod: Pointer to module data. - * @source: Source for PCM samples data. - * @sink: Sink for PCM samples data. - * @frames: Number of audio data frames to process. - * - * This is the processing function for 16-bit signed integer PCM formats. The - * audio samples in every frame are re-order to channels order defined in - * component data channel_map[]. - * - * Return: Value zero for success, otherwise an error code. - */ -int stft_process_sink_s32(struct stft_comp_data *cd, struct sof_sink *sink, int frames) -{ - struct stft_process_state *state = &cd->state; - struct stft_process_buffer *obuf; - int32_t *y, *y_start, *y_end; - int frames_remain = frames; - int channels = cd->channels; - int bytes = frames * cd->frame_bytes; - int y_size; - int ret; - int ch, n1, n, i; - - /* Get pointer to sink data in circular buffer */ - ret = sink_get_buffer_s32(sink, bytes, &y, &y_start, &y_size); - if (ret) - return ret; - - /* Set helper pointers to buffer end for wrap check. Then loop until all - * samples are processed. - */ - y_end = y_start + y_size; - while (frames_remain) { - /* Find out samples to process before first wrap or end of data. */ - obuf = &state->obuf[0]; - n1 = (y_end - y) / cd->channels; - n = stft_process_buffer_samples_without_wrap(obuf, obuf->r_ptr); - n = MIN(n1, n); - n = MIN(n, frames_remain); - - for (i = 0; i < n; i++) { - for (ch = 0; ch < channels; ch++) { - obuf = &state->obuf[ch]; - *y++ = *obuf->r_ptr; - *obuf->r_ptr++ = 0; /* clear overlap add mix */ - } - } - - /* One of the buffers needs a wrap (or end of data), so check for wrap */ - for (ch = 0; ch < cd->channels; ch++) { - obuf = &state->obuf[ch]; - obuf->r_ptr = stft_process_buffer_wrap(obuf, obuf->r_ptr); - } - - if (y >= y_end) - y -= y_size; - - /* Update processed samples count for next loop iteration. */ - frames_remain -= n; - } - - /* Update the sink for bytes produced. Return success. */ - sink_commit_buffer(sink, bytes); - for (ch = 0; ch < channels; ch++) { - obuf = &state->obuf[ch]; - obuf->s_avail -= frames; - obuf->s_free += frames; - } - - return 0; -} -#endif /* CONFIG_FORMAT_S32LE */ - -#if CONFIG_FORMAT_S16LE -/** - * stft_process_source_s16() - Process S16_LE format. - * @mod: Pointer to module data. - * @source: Source for PCM samples data. - * @sink: Sink for PCM samples data. - * @frames: Number of audio data frames to process. - * - * This is the processing function for 16-bit signed integer PCM formats. The - * audio samples in every frame are re-order to channels order defined in - * component data channel_map[]. - * - * Return: Value zero for success, otherwise an error code. - */ -int stft_process_source_s16(struct stft_comp_data *cd, struct sof_source *source, int frames) -{ - struct stft_process_state *state = &cd->state; - struct stft_process_buffer *ibuf; - int16_t const *x, *x_start, *x_end; - int16_t in; - int x_size; - int channels = cd->channels; - int bytes = frames * cd->frame_bytes; - int frames_left = frames; - int ret; - int n1; - int n2; - int n; - int i; - int j; - - /* Get pointer to source data in circular buffer, get buffer start and size to - * check for wrap. The size in bytes is converted to number of s16 samples to - * control the samples process loop. If the number of bytes requested is not - * possible, an error is returned. - */ - ret = source_get_data_s16(source, bytes, &x, &x_start, &x_size); - if (ret) - return ret; - - /* Set helper pointers to buffer end for wrap check. Then loop until all - * samples are processed. - */ - x_end = x_start + x_size; - - while (frames_left) { - /* Find out samples to process before first wrap or end of data. */ - ibuf = &state->ibuf[0]; - n1 = (x_end - x) / cd->channels; - n2 = stft_process_buffer_samples_without_wrap(ibuf, ibuf->w_ptr); - n = MIN(n1, n2); - n = MIN(n, frames_left); - for (i = 0; i < n; i++) { - for (j = 0; j < channels; j++) { - ibuf = &state->ibuf[j]; - in = *x++; - *ibuf->w_ptr++ = (int32_t)in << 16; - } - } - - /* One of the buffers needs a wrap (or end of data), so check for wrap */ - for (j = 0; j < channels; j++) { - ibuf = &state->ibuf[j]; - ibuf->w_ptr = stft_process_buffer_wrap(ibuf, ibuf->w_ptr); - } - - if (x >= x_end) - x -= x_size; - - /* Update processed samples count for next loop iteration. */ - frames_left -= n; - } - - /* Update the source for bytes consumed. Return success. */ - source_release_data(source, bytes); - for (j = 0; j < channels; j++) { - ibuf = &state->ibuf[j]; - ibuf->s_avail += frames; - ibuf->s_free -= frames; - } - return 0; -} - -/** - * stft_process_sink_s16() - Process S16_LE format. - * @mod: Pointer to module data. - * @source: Source for PCM samples data. - * @sink: Sink for PCM samples data. - * @frames: Number of audio data frames to process. - * - * This is the processing function for 16-bit signed integer PCM formats. The - * audio samples in every frame are re-order to channels order defined in - * component data channel_map[]. - * - * Return: Value zero for success, otherwise an error code. - */ -int stft_process_sink_s16(struct stft_comp_data *cd, struct sof_sink *sink, int frames) -{ - struct stft_process_state *state = &cd->state; - struct stft_process_buffer *obuf; - int16_t *y, *y_start, *y_end; - int frames_remain = frames; - int channels = cd->channels; - int bytes = frames * cd->frame_bytes; - int y_size; - int ret; - int ch, n1, n, i; - - /* Get pointer to sink data in circular buffer */ - ret = sink_get_buffer_s16(sink, bytes, &y, &y_start, &y_size); - if (ret) - return ret; - - /* Set helper pointers to buffer end for wrap check. Then loop until all - * samples are processed. - */ - y_end = y_start + y_size; - while (frames_remain) { - /* Find out samples to process before first wrap or end of data. */ - obuf = &state->obuf[0]; - n1 = (y_end - y) / cd->channels; - n = stft_process_buffer_samples_without_wrap(obuf, obuf->r_ptr); - n = MIN(n1, n); - n = MIN(n, frames_remain); - - for (i = 0; i < n; i++) { - for (ch = 0; ch < channels; ch++) { - obuf = &state->obuf[ch]; - *y++ = sat_int16(Q_SHIFT_RND(*obuf->r_ptr, 31, 15)); - *obuf->r_ptr++ = 0; /* clear overlap add mix */ - } - } - - /* One of the buffers needs a wrap (or end of data), so check for wrap */ - for (ch = 0; ch < channels; ch++) { - obuf = &state->obuf[ch]; - obuf->r_ptr = stft_process_buffer_wrap(obuf, obuf->r_ptr); - } - - if (y >= y_end) - y -= y_size; - - /* Update processed samples count for next loop iteration. */ - frames_remain -= n; - } - - /* Update the sink for bytes produced. Return success. */ - sink_commit_buffer(sink, bytes); - for (ch = 0; ch < channels; ch++) { - obuf = &state->obuf[ch]; - obuf->s_avail -= frames; - obuf->s_free += frames; - } - - return 0; -} -#endif /* CONFIG_FORMAT_S16LE */ - -void stft_process_fill_prev_samples(struct stft_process_buffer *buf, int32_t *prev_data, - int prev_data_length) -{ - /* Fill prev_data from input buffer */ - int32_t *r = buf->r_ptr; - int32_t *p = prev_data; - int copied; - int nmax; - int n; - - for (copied = 0; copied < prev_data_length; copied += n) { - nmax = prev_data_length - copied; - n = stft_process_buffer_samples_without_wrap(buf, r); - n = MIN(n, nmax); - memcpy(p, r, sizeof(int32_t) * n); /* Not using memcpy_s() due to speed need */ - p += n; - r += n; - r = stft_process_buffer_wrap(buf, r); - } - - buf->s_avail -= copied; - buf->s_free += copied; - buf->r_ptr = r; -} - -void stft_process_fill_fft_buffer(struct stft_process_state *state, int ch) -{ - struct stft_process_buffer *ibuf = &state->ibuf[ch]; - struct stft_process_fft *fft = &state->fft; - int32_t *prev_data = state->prev_data[ch]; - int32_t *r = ibuf->r_ptr; - int copied; - int nmax; - int idx; - int j; - int n; - - /* Copy overlapped samples from state buffer. Imaginary part of input - * remains zero. - */ - for (j = 0; j < state->prev_data_size; j++) { - fft->fft_buf[j].real = prev_data[j]; - fft->fft_buf[j].imag = 0; - } - - /* Copy hop size of new data from circular buffer */ - idx = state->prev_data_size; - for (copied = 0; copied < fft->fft_hop_size; copied += n) { - nmax = fft->fft_hop_size - copied; - n = stft_process_buffer_samples_without_wrap(ibuf, r); - n = MIN(n, nmax); - for (j = 0; j < n; j++) { - fft->fft_buf[idx].real = *r++; - fft->fft_buf[idx].imag = 0; - idx++; - } - r = stft_process_buffer_wrap(ibuf, r); - } - - ibuf->s_avail -= copied; - ibuf->s_free += copied; - ibuf->r_ptr = r; - - /* Copy for next time data back to overlap buffer */ - idx = fft->fft_hop_size; - for (j = 0; j < state->prev_data_size; j++) - prev_data[j] = fft->fft_buf[idx + j].real; -} - +#if SOF_USE_HIFI(NONE, COMP_STFT_PROCESS) void stft_process_overlap_add_ifft_buffer(struct stft_process_state *state, int ch) { struct stft_process_buffer *obuf = &state->obuf[ch]; @@ -434,3 +51,4 @@ void stft_process_apply_window(struct stft_process_state *state) sat_int32(Q_MULTSR_32X32((int64_t)fft->fft_buf[i + j].real, state->window[j], 31, 31, 31)); } +#endif /* SOF_USE_HIFI(NONE, COMP_STFT_PROCESS) */ diff --git a/src/audio/stft_process/stft_process-hifi3.c b/src/audio/stft_process/stft_process-hifi3.c new file mode 100644 index 000000000000..d05397e14c83 --- /dev/null +++ b/src/audio/stft_process/stft_process-hifi3.c @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: BSD-3-Clause +// +// Copyright(c) 2025 Intel Corporation. + +/** + * \file + * \brief HiFi3 SIMD-optimized helpers for the STFT processing component. + * + * This compilation unit provides HiFi3 intrinsic versions of selected + * hot-path helpers. It is guarded by SOF_USE_HIFI(3, COMP_STFT_PROCESS) + * so only one of the generic / hifi3 implementations is active. + */ + +#include +#include +#include +#include +#include "stft_process.h" + +#if SOF_USE_MIN_HIFI(3, COMP_STFT_PROCESS) + +#include + +/** + * stft_process_apply_window() - Multiply FFT buffer by the analysis window. + * @state: STFT processing state that contains the FFT buffer and window. + * + * The real part of each icomplex32 sample in the FFT buffer is multiplied + * by the corresponding Q1.31 window coefficient. + */ +void stft_process_apply_window(struct stft_process_state *state) +{ + struct stft_process_fft *fft = &state->fft; + ae_int32 *buf; + const ae_int32x2 *win; + ae_f32x2 data01, data23; + ae_f32x2 win01, win23; + ae_int32x2 d0, d1; + int fft_size = fft->fft_size; + int i = fft->fft_fill_start_idx; + int j; + int n4; + + /* + * buf points to {real, imag} pairs (struct icomplex32). + * win points to scalar Q1.31 window coefficients. + * + * We load each complex pair, multiply only the real part by the + * window value, then store the pair back with the updated real. + * The imaginary part is left untouched. + * + * Stride for buf is sizeof(ae_int32x2) = 8 bytes per complex sample. + * Stride for win is sizeof(ae_int32) = 4 bytes per scalar window value. + */ + buf = (ae_int32 *)&fft->fft_buf[i]; + win = (const ae_int32x2 *)state->window; + + assert(!(fft_size & 3)); + + /* Main loop: process 4 samples per iteration */ + n4 = fft_size >> 2; + for (j = 0; j < n4; j++) { + /* Load four FFT real part values, combine into fft_data, + * buf[0] goes to data01 low, buf[1] goes to data01 high. + */ + d0 = AE_L32_I(buf, 0 * sizeof(ae_int32x2)); + d1 = AE_L32_I(buf, 1 * sizeof(ae_int32x2)); + data01 = AE_SEL32_HH(d0, d1); + d0 = AE_L32_I(buf, 2 * sizeof(ae_int32x2)); + d1 = AE_L32_I(buf, 3 * sizeof(ae_int32x2)); + data23 = AE_SEL32_HH(d0, d1); + + /* Load four window coefficients, + * win[0] goes to win01 low, win[1] goes to win01 high + */ + AE_L32X2_IP(win01, win, sizeof(ae_int32x2)); + AE_L32X2_IP(win23, win, sizeof(ae_int32x2)); + + /* Multiply with window function */ + data01 = AE_MULFP32X2RS(data01, win01); + data23 = AE_MULFP32X2RS(data23, win23); + + /* Store back the updated real parts */ + AE_S32_L_IP(AE_SEL32_LH(data01, data01), buf, sizeof(ae_int32x2)); + AE_S32_L_IP(data01, buf, sizeof(ae_int32x2)); + AE_S32_L_IP(AE_SEL32_LH(data23, data23), buf, sizeof(ae_int32x2)); + AE_S32_L_IP(data23, buf, sizeof(ae_int32x2)); + } +} + +/** + * stft_process_overlap_add_ifft_buffer() - Overlap-add IFFT output to circular output buffer. + * @state: STFT processing state. + * @ch: Channel index. + * + * Each IFFT output sample is multiplied by gain_comp (Q1.31 x Q1.31) and + * added with saturation to the existing content of the circular output + * buffer. HiFi3 AE_MULF32S_HH handles the multiply and + * AE_ADD32S provides the saturating accumulation. + * + * Note: obuf must be even number of samples and 64-bit aligned. + */ +void stft_process_overlap_add_ifft_buffer(struct stft_process_state *state, int ch) +{ + struct stft_process_buffer *obuf = &state->obuf[ch]; + struct stft_process_fft *fft = &state->fft; + ae_int32x2 *w = (ae_int32x2 *)obuf->w_ptr; + ae_int32 *fft_p = (ae_int32 *)&fft->fft_buf[fft->fft_fill_start_idx]; + int i, n; + int samples_remain = fft->fft_size; + + ae_f32x2 gain = AE_MOVDA32(state->gain_comp); + ae_f32x2 buffer_data; + ae_f32x2 fft_data; + ae_int32x2 d0, d1; + + while (samples_remain) { + n = stft_process_buffer_samples_without_wrap(obuf, (int32_t *)w); + n = MIN(samples_remain, n) >> 1; + for (i = 0; i < n; i++) { + /* Load two FFT real part values, combine into fft_data */ + AE_L32_IP(d0, fft_p, sizeof(ae_int32x2)); + AE_L32_IP(d1, fft_p, sizeof(ae_int32x2)); + fft_data = AE_SEL32_HH(d0, d1); + + /* Load buffer data, multiply fft_data with gain and accumulate */ + buffer_data = AE_L32X2_I(w, 0); + AE_MULAFP32X2RS(buffer_data, fft_data, gain); + AE_S32X2_IP(buffer_data, w, sizeof(ae_int32x2)); + } + w = (ae_int32x2 *)stft_process_buffer_wrap(obuf, (int32_t *)w); + samples_remain -= n << 1; + } + + obuf->w_ptr = stft_process_buffer_wrap(obuf, obuf->w_ptr + fft->fft_hop_size); + obuf->s_avail += fft->fft_hop_size; + obuf->s_free -= fft->fft_hop_size; +} + +#endif /* SOF_USE_MIN_HIFI(3, COMP_STFT_PROCESS) */ diff --git a/src/audio/stft_process/stft_process_common.c b/src/audio/stft_process/stft_process_common.c index 2fcaeb349b84..6ab3199082de 100644 --- a/src/audio/stft_process/stft_process_common.c +++ b/src/audio/stft_process/stft_process_common.c @@ -4,6 +4,10 @@ #include #include +#include +#include +#include +#include #include #include #include @@ -17,6 +21,7 @@ #include #include #include +#include #if STFT_DEBUG extern FILE *stft_debug_fft_in_fh; @@ -36,6 +41,313 @@ static void debug_print_to_file_complex(FILE *fh, struct icomplex32 *c, int n) } #endif +#if CONFIG_FORMAT_S32LE +int stft_process_source_s32(struct stft_comp_data *cd, struct sof_source *source, int frames) +{ + struct stft_process_state *state = &cd->state; + struct stft_process_buffer *ibuf; + int32_t const *x, *x_start, *x_end; + int x_size; + int bytes = frames * cd->frame_bytes; + int frames_left = frames; + int ret; + int n1; + int n2; + int channels = cd->channels; + int n; + int i; + int j; + + /* Get pointer to source data in circular buffer */ + ret = source_get_data_s32(source, bytes, &x, &x_start, &x_size); + if (ret) + return ret; + + /* Set helper pointers to buffer end for wrap check. Then loop until all + * samples are processed. + */ + x_end = x_start + x_size; + + while (frames_left) { + /* Find out samples to process before first wrap or end of data. */ + ibuf = &state->ibuf[0]; + n1 = (x_end - x) / cd->channels; + n2 = stft_process_buffer_samples_without_wrap(ibuf, ibuf->w_ptr); + n = MIN(n1, n2); + n = MIN(n, frames_left); + for (i = 0; i < n; i++) { + for (j = 0; j < channels; j++) { + ibuf = &state->ibuf[j]; + *ibuf->w_ptr++ = *x++; + } + } + + /* One of the buffers needs a wrap (or end of data), so check for wrap */ + for (j = 0; j < channels; j++) { + ibuf = &state->ibuf[j]; + ibuf->w_ptr = stft_process_buffer_wrap(ibuf, ibuf->w_ptr); + } + + if (x >= x_end) + x -= x_size; + + /* Update processed samples count for next loop iteration. */ + frames_left -= n; + } + + /* Update the source for bytes consumed. Return success. */ + source_release_data(source, bytes); + for (j = 0; j < channels; j++) { + ibuf = &state->ibuf[j]; + ibuf->s_avail += frames; + ibuf->s_free -= frames; + } + + return 0; +} + +int stft_process_sink_s32(struct stft_comp_data *cd, struct sof_sink *sink, int frames) +{ + struct stft_process_state *state = &cd->state; + struct stft_process_buffer *obuf; + int32_t *y, *y_start, *y_end; + int frames_remain = frames; + int channels = cd->channels; + int bytes = frames * cd->frame_bytes; + int y_size; + int ret; + int ch, n1, n, i; + + /* Get pointer to sink data in circular buffer */ + ret = sink_get_buffer_s32(sink, bytes, &y, &y_start, &y_size); + if (ret) + return ret; + + /* Set helper pointers to buffer end for wrap check. Then loop until all + * samples are processed. + */ + y_end = y_start + y_size; + while (frames_remain) { + /* Find out samples to process before first wrap or end of data. */ + obuf = &state->obuf[0]; + n1 = (y_end - y) / cd->channels; + n = stft_process_buffer_samples_without_wrap(obuf, obuf->r_ptr); + n = MIN(n1, n); + n = MIN(n, frames_remain); + + for (i = 0; i < n; i++) { + for (ch = 0; ch < channels; ch++) { + obuf = &state->obuf[ch]; + *y++ = *obuf->r_ptr; + *obuf->r_ptr++ = 0; /* clear overlap add mix */ + } + } + + /* One of the buffers needs a wrap (or end of data), so check for wrap */ + for (ch = 0; ch < cd->channels; ch++) { + obuf = &state->obuf[ch]; + obuf->r_ptr = stft_process_buffer_wrap(obuf, obuf->r_ptr); + } + + if (y >= y_end) + y -= y_size; + + /* Update processed samples count for next loop iteration. */ + frames_remain -= n; + } + + /* Update the sink for bytes produced. Return success. */ + sink_commit_buffer(sink, bytes); + for (ch = 0; ch < channels; ch++) { + obuf = &state->obuf[ch]; + obuf->s_avail -= frames; + obuf->s_free += frames; + } + + return 0; +} +#endif /* CONFIG_FORMAT_S32LE */ + +#if CONFIG_FORMAT_S16LE +int stft_process_source_s16(struct stft_comp_data *cd, struct sof_source *source, int frames) +{ + struct stft_process_state *state = &cd->state; + struct stft_process_buffer *ibuf; + int16_t const *x, *x_start, *x_end; + int16_t in; + int x_size; + int channels = cd->channels; + int bytes = frames * cd->frame_bytes; + int frames_left = frames; + int ret; + int n1; + int n2; + int n; + int i; + int j; + + ret = source_get_data_s16(source, bytes, &x, &x_start, &x_size); + if (ret) + return ret; + + x_end = x_start + x_size; + + while (frames_left) { + ibuf = &state->ibuf[0]; + n1 = (x_end - x) / cd->channels; + n2 = stft_process_buffer_samples_without_wrap(ibuf, ibuf->w_ptr); + n = MIN(n1, n2); + n = MIN(n, frames_left); + for (i = 0; i < n; i++) { + for (j = 0; j < channels; j++) { + ibuf = &state->ibuf[j]; + in = *x++; + *ibuf->w_ptr++ = (int32_t)in << 16; + } + } + + for (j = 0; j < channels; j++) { + ibuf = &state->ibuf[j]; + ibuf->w_ptr = stft_process_buffer_wrap(ibuf, ibuf->w_ptr); + } + + if (x >= x_end) + x -= x_size; + + frames_left -= n; + } + + source_release_data(source, bytes); + for (j = 0; j < channels; j++) { + ibuf = &state->ibuf[j]; + ibuf->s_avail += frames; + ibuf->s_free -= frames; + } + return 0; +} + +int stft_process_sink_s16(struct stft_comp_data *cd, struct sof_sink *sink, int frames) +{ + struct stft_process_state *state = &cd->state; + struct stft_process_buffer *obuf; + int16_t *y, *y_start, *y_end; + int frames_remain = frames; + int channels = cd->channels; + int bytes = frames * cd->frame_bytes; + int y_size; + int ret; + int ch, n1, n, i; + + ret = sink_get_buffer_s16(sink, bytes, &y, &y_start, &y_size); + if (ret) + return ret; + + y_end = y_start + y_size; + while (frames_remain) { + obuf = &state->obuf[0]; + n1 = (y_end - y) / cd->channels; + n = stft_process_buffer_samples_without_wrap(obuf, obuf->r_ptr); + n = MIN(n1, n); + n = MIN(n, frames_remain); + + for (i = 0; i < n; i++) { + for (ch = 0; ch < channels; ch++) { + obuf = &state->obuf[ch]; + *y++ = sat_int16(Q_SHIFT_RND(*obuf->r_ptr, 31, 15)); + *obuf->r_ptr++ = 0; /* clear overlap add mix */ + } + } + + for (ch = 0; ch < channels; ch++) { + obuf = &state->obuf[ch]; + obuf->r_ptr = stft_process_buffer_wrap(obuf, obuf->r_ptr); + } + + if (y >= y_end) + y -= y_size; + + frames_remain -= n; + } + + sink_commit_buffer(sink, bytes); + for (ch = 0; ch < channels; ch++) { + obuf = &state->obuf[ch]; + obuf->s_avail -= frames; + obuf->s_free += frames; + } + + return 0; +} +#endif /* CONFIG_FORMAT_S16LE */ + +void stft_process_fill_prev_samples(struct stft_process_buffer *buf, int32_t *prev_data, + int prev_data_length) +{ + int32_t *r = buf->r_ptr; + int32_t *p = prev_data; + int copied; + int nmax; + int n; + + for (copied = 0; copied < prev_data_length; copied += n) { + nmax = prev_data_length - copied; + n = stft_process_buffer_samples_without_wrap(buf, r); + n = MIN(n, nmax); + memcpy(p, r, sizeof(int32_t) * n); + p += n; + r += n; + r = stft_process_buffer_wrap(buf, r); + } + + buf->s_avail -= copied; + buf->s_free += copied; + buf->r_ptr = r; +} + +void stft_process_fill_fft_buffer(struct stft_process_state *state, int ch) +{ + struct stft_process_buffer *ibuf = &state->ibuf[ch]; + struct stft_process_fft *fft = &state->fft; + int32_t *prev_data = state->prev_data[ch]; + int32_t *r = ibuf->r_ptr; + int copied; + int nmax; + int idx; + int j; + int n; + + /* Copy overlapped samples from state buffer. Imaginary part of input + * remains zero. + */ + for (j = 0; j < state->prev_data_size; j++) { + fft->fft_buf[j].real = prev_data[j]; + fft->fft_buf[j].imag = 0; + } + + /* Copy hop size of new data from circular buffer */ + idx = state->prev_data_size; + for (copied = 0; copied < fft->fft_hop_size; copied += n) { + nmax = fft->fft_hop_size - copied; + n = stft_process_buffer_samples_without_wrap(ibuf, r); + n = MIN(n, nmax); + for (j = 0; j < n; j++) { + fft->fft_buf[idx].real = *r++; + fft->fft_buf[idx].imag = 0; + idx++; + } + r = stft_process_buffer_wrap(ibuf, r); + } + + ibuf->s_avail -= copied; + ibuf->s_free += copied; + ibuf->r_ptr = r; + + /* Copy for next time data back to overlap buffer */ + idx = fft->fft_hop_size; + for (j = 0; j < state->prev_data_size; j++) + prev_data[j] = fft->fft_buf[idx + j].real; +} + LOG_MODULE_REGISTER(stft_process_common, CONFIG_SOF_LOG_LEVEL); /*