Skip to content

Commit

Permalink
Audio: aec: optimize acoustic echo cancellation processing
Browse files Browse the repository at this point in the history
This check-in introduces performance optimization modifications to
the audio Echo Cancellation (AEC) implementation. The enhancements
primarily focus on refining loop structures and memory copy
operations to ensure more efficient use of cycles.

Signed-off-by: shastry <[email protected]>
  • Loading branch information
ShriramShastry committed Feb 29, 2024
1 parent 3681e09 commit b612915
Show file tree
Hide file tree
Showing 2 changed files with 274 additions and 87 deletions.
275 changes: 207 additions & 68 deletions src/audio/google/google_rtc_audio_processing.c
Original file line number Diff line number Diff line change
Expand Up @@ -791,8 +791,6 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
size_t dst_buf_size;

size_t num_of_bytes_to_process;
size_t channel;
size_t buffer_offset;

struct sof_source *ref_stream, *src_stream;
struct sof_sink *dst_stream;
Expand Down Expand Up @@ -822,23 +820,58 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
/* 32float: de-interlace ref buffer, convert it to float, skip channels if > Max
* 16int: linearize buffer, skip channels if > Max
*/
buffer_offset = 0;
for (int i = 0; i < cd->num_frames; i++) {
for (channel = 0; channel < cd->num_aec_reference_channels; ++channel) {
/* Reduce cycle waste by streamlining the inner loop,
* converting from array indexing to pointer arithmetic,
* and putting data copy verification outside the loop.
*/
const int16_t *ref_data_end = ref + cd->num_frames * cd->num_aec_reference_channels;

/* Check that ref is within the valid range of the ref_buf buffer */
if (!ref || ref < (int16_t *)ref_buf_start || ref >= (int16_t *)ref_buf_end) {
/* ref does not point to valid int16_t data,
* return -EINVAL immediately to indicate an invalid argument was passed
*/
return -EINVAL;
}

#if CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API
cd->aec_reference_buffer_ptrs[channel][i] =
convert_int16_to_float(ref[channel]);
float **ref_ptr = cd->aec_reference_buffer_ptrs;
int s_chan;
int i;

/* Loop over frames and channels, converting data from int16 to float */
for (i = 0; i < cd->num_frames; ++i) {
for (s_chan = 0; s_chan < cd->num_aec_reference_channels; ++s_chan) {
(*ref_ptr)[s_chan] = convert_int16_to_float(*ref++);
ref_ptr++;
}
}

#else /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
cd->aec_reference_buffer[buffer_offset++] = ref[channel];
#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
int16_t *ref_buf = cd->aec_reference_buffer;
size_t sizeofrefbuffer = sizeof(cd->aec_reference_buffer);

}
/* Use memcpy to copy the data from ref buffer to ref_buf buffer until it reaches
* ref_data_end
* This assumes that the data in the ref buffer is contiguous
*/
size_t num_bytes = (ref_data_end - ref) * sizeof(*ref);

ref += cd->num_aec_reference_channels;
if ((void *)ref >= (void *)ref_buf_end)
ref = (void *)ref_buf_start;
if (num_bytes > sizeofrefbuffer) {
/* Handle the error: the source data is too large to fit in the
* destination buffer
*/
return -EINVAL;
}

memcpy(ref_buf, ref, num_bytes);

/* Update the ref and ref_buf pointers */
ref = ref_data_end;
ref_buf += (ref_data_end - ref);

#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */

#if CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API
GoogleRtcAudioProcessingAnalyzeRender_float32(cd->state,
(const float **)
Expand All @@ -856,23 +889,64 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
assert(!ret);
src_buf_end = src_buf_start + src_buf_size;

buffer_offset = 0;
for (int i = 0; i < cd->num_frames; i++) {
for (channel = 0; channel < cd->num_capture_channels; channel++)
/* The second optimization eliminates the inner loop
* and replaces it with pointer arithmetic for speedier access.
* To reduce cycle waste, the data copy check is moved outside of the loop.
*/
const int16_t *src_end = src + cd->num_frames * cd->config.output_fmt.channels_count;

/* Check if the calculated end of the source buffer exceeds the actual end of the buffer */
src_end = (int16_t *)cir_buf_wrap((void *)src_end,
(void *)src_buf_start, (void *)src_buf_end);

#if CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API
cd->process_buffer_ptrs[channel][i] = convert_int16_to_float(src[channel]);
/* Declare a pointer to the process buffer */
float **proc_ptr = cd->process_buffer_ptrs;

/* Check for null pointers and buffer overflows */
if (!src || !proc_ptr || src >= (const int16_t *)src_end)
/* If there's an error, return -EINVAL immediately to indicate an
* invalid argument was passed
*/
return -EINVAL;

/* If there's no error, continue processing */
while (src != (const int16_t *)src_end) {
/* If the source pointer has reached or exceeded the end of the source
* buffer, wrap it back to the start
*/
src = (int16_t *)cir_buf_wrap((void *)src,
(void *)src_buf_start, (void *)src_buf_end);
/* Convert the source data from int16_t to float and store it in the
* process buffer
*/
*proc_ptr++ = convert_int16_to_float(src++);
}

#else /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
cd->process_buffer[buffer_offset++] = src[channel];
#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
/* Declare a pointer to the process buffer */
int16_t *proc_buf = cd->process_buffer;

/* Check for null pointers and buffer overflows */
if (!src || !proc_buf || src >= (int16_t *)src_end)
/* If there's an error, return -EINVAL immediately to indicate an
* invalid argument was passed
*/
return -EINVAL;

/* move pointer to next frame
* number of incoming channels may be < cd->num_capture_channels
/* If there's no error, continue processing */
while (src != (int16_t *)src_end) {
/* If the source pointer has reached or exceeded the end of the source
* buffer, wrap it back to the start
*/
src += cd->config.output_fmt.channels_count;
if ((void *)src >= (void *)src_buf_end)
src = (void *)src_buf_start;
src = (int16_t *)cir_buf_wrap((void *)src,
(void *)src_buf_start, (void *)src_buf_end);
/* Copy the source data to the process buffer */
*proc_buf++ = *src++;
}

#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */

source_release_data(src_stream, num_of_bytes_to_process);

/* call the library, use same in/out buffers */
Expand All @@ -893,26 +967,47 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
dst_buf_end = dst_buf_start + dst_buf_size;

/* process all channels in output stream */
buffer_offset = 0;
for (int i = 0; i < cd->num_frames; i++) {
for (channel = 0; channel < cd->config.output_fmt.channels_count; channel++) {
/* set data in processed channels, zeroize not processed */
if (channel < cd->num_capture_channels)
/* Calculate the end of the destination buffer based on the number of frames and
* channels
*/
int16_t *dst_end = dst + cd->num_frames * cd->config.output_fmt.channels_count;

/* Check if the calculated end of the destination buffer exceeds the actual end
* of the buffer
*/
dst_end = (int16_t *)cir_buf_wrap((void *)dst_end,
(void *)dst_buf_start, (void *)dst_buf_end);

#if CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API
dst[channel] = convert_float_to_int16(
cd->process_buffer_ptrs[channel][i]);
float **proc_ptr = cd->process_buffer_ptrs;

/* Check for null pointers and buffer overflows */
if (!dst || !proc_ptr || dst >= dst_end || *proc_ptr >= *proc_ptr + cd->num_frames)
/* If there's an error, return -EINVAL immediately to indicate an
* invalid argument was passed
*/
return -EINVAL;

/* Convert data from float to int16_t and store it in the destination buffer */
for (; dst != dst_end; ++dst, ++proc_ptr)
*dst = convert_float_to_int16(*proc_ptr);

#else /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
dst[channel] = cd->process_buffer[buffer_offset++];
#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
else
dst[channel] = 0;
}
int16_t *process_buffer = cd->process_buffer;

dst += cd->config.output_fmt.channels_count;
if ((void *)dst >= (void *)dst_buf_end)
dst = (void *)dst_buf_start;
}
/* Check for null pointers and buffer overflows */
if (!dst || !process_buffer || dst >= dst_end ||
process_buffer >= process_buffer + cd->num_frames)
/* If there's an error, return -EINVAL immediately to indicate an
* invalid argument was passed
*/
return -EINVAL;

/* Copy the data from the process buffer to the destination buffer */
for (; dst != dst_end; ++dst, ++process_buffer)
*dst = *process_buffer;

#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
sink_commit_buffer(dst_stream, num_of_bytes_to_process);

return 0;
Expand All @@ -928,6 +1023,8 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
int16_t *src, *dst, *ref;
uint32_t num_aec_reference_frames;
uint32_t num_aec_reference_bytes;
int ref_channels;
int aec_ref_product;
int num_samples_remaining;
int num_frames_remaining;
int channel;
Expand All @@ -950,25 +1047,33 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
ref_stream = ref_streamb->data;
ref = audio_stream_get_rptr(ref_stream);

/* Pre-calculate the number of channels in the reference stream for efficiency */
ref_channels = audio_stream_get_channels(ref_stream);

/* Pre-calculate the product of the number of AEC reference channels and the AEC
* reference frame index
*/
aec_ref_product = cd->num_aec_reference_channels * cd->aec_reference_frame_index;

num_aec_reference_frames = input_buffers[cd->aec_reference_source].size;
num_aec_reference_bytes = audio_stream_frame_bytes(ref_stream) * num_aec_reference_frames;

num_samples_remaining = num_aec_reference_frames * audio_stream_get_channels(ref_stream);
num_samples_remaining = num_aec_reference_frames * ref_channels;
while (num_samples_remaining) {
nmax = audio_stream_samples_without_wrap_s16(ref_stream, ref);
n = MIN(num_samples_remaining, nmax);
for (i = 0; i < n; i += cd->num_aec_reference_channels) {
j = cd->num_aec_reference_channels * cd->aec_reference_frame_index;
j = aec_ref_product;
for (channel = 0; channel < cd->num_aec_reference_channels; ++channel)
cd->aec_reference_buffer[j++] = ref[channel];

ref += audio_stream_get_channels(ref_stream);
ref += ref_channels;
++cd->aec_reference_frame_index;

if (cd->aec_reference_frame_index == cd->num_frames) {
GoogleRtcAudioProcessingAnalyzeRender_int16(cd->state,
cd->aec_reference_buffer);
cd->aec_reference_buffer);
cd->aec_reference_frame_index = 0;
/* Reset the product as the frame index is reset */
aec_ref_product = 0;
}
}
num_samples_remaining -= n;
Expand All @@ -984,6 +1089,8 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
src = audio_stream_get_rptr(mic_stream);
dst = audio_stream_get_wptr(out_stream);

/* Move out of loop */
int mic_stream_channels = audio_stream_get_channels(mic_stream);
frames = input_buffers[cd->raw_microphone_source].size;
num_frames_remaining = frames;

Expand All @@ -993,34 +1100,66 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
nmax = audio_stream_frames_without_wrap(out_stream, dst);
n = MIN(n, nmax);
for (i = 0; i < n; i++) {
memcpy_s(&(cd->raw_mic_buffer[cd->raw_mic_buffer_frame_index *
cd->num_capture_channels]),
cd->num_frames * cd->num_capture_channels *
sizeof(cd->raw_mic_buffer[0]), src,
sizeof(int16_t) * cd->num_capture_channels);
++cd->raw_mic_buffer_frame_index;

memcpy_s(dst, cd->num_frames * cd->num_capture_channels *
sizeof(cd->output_buffer[0]),
&(cd->output_buffer[cd->output_buffer_frame_index *
cd->num_capture_channels]),
sizeof(int16_t) * cd->num_capture_channels);
++cd->output_buffer_frame_index;

if (cd->raw_mic_buffer_frame_index == cd->num_frames) {
GoogleRtcAudioProcessingProcessCapture_int16(cd->state,
cd->raw_mic_buffer,
cd->output_buffer);
cd->output_buffer_frame_index = 0;
cd->raw_mic_buffer_frame_index = 0;
/* If we haven't filled the buffer yet, copy the data */
if (cd->raw_mic_buffer_frame_index < cd->num_frames) {
size_t num_bytes = sizeof(int16_t) * cd->num_capture_channels;
size_t buffer_size = sizeof(cd->raw_mic_buffer);
size_t frame_index = cd->raw_mic_buffer_frame_index;
size_t buffer_used = frame_index * sizeof(int16_t);
size_t buffer_remaining = buffer_size - buffer_used;

if (num_bytes <= buffer_remaining) {
int16_t *buffer_start = cd->raw_mic_buffer;
size_t offset = frame_index * cd->num_capture_channels;

buffer_start += offset;
memcpy(buffer_start, src, num_bytes);
++cd->raw_mic_buffer_frame_index;
} else {
/* The source data is too big to fit in the
* destination buffer.
*/
return -EINVAL;
}
}

src += audio_stream_get_channels(mic_stream);
dst += audio_stream_get_channels(out_stream);
if (cd->output_buffer_frame_index < cd->num_frames) {
size_t num_bytes = sizeof(int16_t) * cd->num_capture_channels;
size_t buffer_size = sizeof(cd->output_buffer);
size_t frame_index = cd->output_buffer_frame_index;
size_t buffer_used = frame_index * sizeof(int16_t);
size_t buffer_remaining = buffer_size - buffer_used;

if (num_bytes <= buffer_remaining) {
int16_t *output_start = cd->output_buffer;
size_t offset = frame_index * cd->num_capture_channels;

output_start += offset;
memcpy(dst, output_start, num_bytes);
++cd->output_buffer_frame_index;
} else {
/* The source data is too big to fit in the
* destination buffer.
*/
return -EINVAL;
}
}

src += mic_stream_channels;
dst += mic_stream_channels;
}
num_frames_remaining -= n;
src = audio_stream_wrap(mic_stream, src);
dst = audio_stream_wrap(out_stream, dst);

/* If we've filled the buffer, process the data */
if (cd->raw_mic_buffer_frame_index == cd->num_frames) {
GoogleRtcAudioProcessingProcessCapture_int16(cd->state,
cd->raw_mic_buffer,
cd->output_buffer);
cd->output_buffer_frame_index = 0;
cd->raw_mic_buffer_frame_index = 0;
}
}

module_update_buffer_position(&input_buffers[cd->raw_microphone_source],
Expand Down
Loading

0 comments on commit b612915

Please sign in to comment.