Skip to content

Commit

Permalink
Audio: aec: optimize acoustic echo cancellation processing
Browse files Browse the repository at this point in the history
This check-in introduces performance optimization modifications to
the audio Echo Cancellation (AEC) implementation. The enhancements
primarily focus on refining loop structures and memory copy
operations to ensure more efficient use of cycles.

Signed-off-by: shastry <[email protected]>
  • Loading branch information
ShriramShastry committed Feb 21, 2024
1 parent 3681e09 commit 1fee8c6
Show file tree
Hide file tree
Showing 2 changed files with 163 additions and 86 deletions.
162 changes: 95 additions & 67 deletions src/audio/google/google_rtc_audio_processing.c
Original file line number Diff line number Diff line change
Expand Up @@ -822,22 +822,28 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
/* 32float: de-interlace ref buffer, convert it to float, skip channels if > Max
* 16int: linearize buffer, skip channels if > Max
*/
// Optimization:Reduce cycle waste by streamlining the inner loop,
// converting from array indexing to pointer arithmetic,
// and putting data copy verification outside the loop.
buffer_offset = 0;
for (int i = 0; i < cd->num_frames; i++) {
for (channel = 0; channel < cd->num_aec_reference_channels; ++channel) {
int16_t *ref_end = ref + cd->num_frames * cd->num_aec_reference_channels;

if ((void *)ref_end >= (void *)ref_buf_end)
ref_end = (void *)ref_buf_start;

#if CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API
cd->aec_reference_buffer_ptrs[channel][i] =
convert_int16_to_float(ref[channel]);
float **ref_ptr = cd->aec_reference_buffer_ptrs;

while (ref != ref_end)
*ref_ptr++ = convert_int16_to_float(ref++);

#else /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
cd->aec_reference_buffer[buffer_offset++] = ref[channel];
#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
int16_t *ref_buf = cd->aec_reference_buffer;

}
while (ref != ref_end)
*ref_buf++ = *ref++;

ref += cd->num_aec_reference_channels;
if ((void *)ref >= (void *)ref_buf_end)
ref = (void *)ref_buf_start;
}
#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */

#if CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API
GoogleRtcAudioProcessingAnalyzeRender_float32(cd->state,
Expand All @@ -855,23 +861,28 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
(const void **)&src_buf_start, &src_buf_size);
assert(!ret);
src_buf_end = src_buf_start + src_buf_size;

// The second optimization eliminates the inner loop
// and replaces it with pointer arithmetic for speedier access.
// To reduce cycle waste, the data copy check is moved outside of the loop.
buffer_offset = 0;
for (int i = 0; i < cd->num_frames; i++) {
for (channel = 0; channel < cd->num_capture_channels; channel++)
int16_t *src_end = src + cd->num_frames * cd->config.output_fmt.channels_count;

if ((void *)src_end >= (void *)src_buf_end)
src_end = (void *)src_buf_start;

#if CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API
cd->process_buffer_ptrs[channel][i] = convert_int16_to_float(src[channel]);
float **proc_ptr = cd->process_buffer_ptrs;

while (src != src_end)
*proc_ptr++ = convert_int16_to_float(src++);

#else /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
cd->process_buffer[buffer_offset++] = src[channel];
#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
int16_t *proc_buf = cd->process_buffer;

/* move pointer to next frame
* number of incoming channels may be < cd->num_capture_channels
*/
src += cd->config.output_fmt.channels_count;
if ((void *)src >= (void *)src_buf_end)
src = (void *)src_buf_start;
}
while (src != src_end)
*proc_buf++ = *src++;

#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */

source_release_data(src_stream, num_of_bytes_to_process);

Expand All @@ -894,24 +905,24 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,

/* process all channels in output stream */
buffer_offset = 0;
for (int i = 0; i < cd->num_frames; i++) {
for (channel = 0; channel < cd->config.output_fmt.channels_count; channel++) {
/* set data in processed channels, zeroize not processed */
if (channel < cd->num_capture_channels)
int16_t *dst_end = dst + cd->num_frames * cd->config.output_fmt.channels_count;

if ((void *)dst_end >= (void *)dst_buf_end)
dst_end = (void *)dst_buf_start;

#if CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API
dst[channel] = convert_float_to_int16(
cd->process_buffer_ptrs[channel][i]);
float **proc_ptr = cd->process_buffer_ptrs;

while (dst != dst_end)
*dst++ = convert_float_to_int16(*proc_ptr++);

#else /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
dst[channel] = cd->process_buffer[buffer_offset++];
#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
else
dst[channel] = 0;
}
int16_t *process_buffer = cd->process_buffer;

dst += cd->config.output_fmt.channels_count;
if ((void *)dst >= (void *)dst_buf_end)
dst = (void *)dst_buf_start;
}
while (dst != dst_end)
*dst++ = *process_buffer++;

#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */

sink_commit_buffer(dst_stream, num_of_bytes_to_process);

Expand All @@ -928,6 +939,8 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
int16_t *src, *dst, *ref;
uint32_t num_aec_reference_frames;
uint32_t num_aec_reference_bytes;
int ref_channels;
int aec_ref_product;
int num_samples_remaining;
int num_frames_remaining;
int channel;
Expand All @@ -950,25 +963,32 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
ref_stream = ref_streamb->data;
ref = audio_stream_get_rptr(ref_stream);

// Pre-calculate the number of channels in the reference stream for efficiency
ref_channels = audio_stream_get_channels(ref_stream);

// Pre-calculate the product of the number of AEC reference channels and the AEC
// reference frame index
aec_ref_product = cd->num_aec_reference_channels * cd->aec_reference_frame_index;

num_aec_reference_frames = input_buffers[cd->aec_reference_source].size;
num_aec_reference_bytes = audio_stream_frame_bytes(ref_stream) * num_aec_reference_frames;

num_samples_remaining = num_aec_reference_frames * audio_stream_get_channels(ref_stream);
num_samples_remaining = num_aec_reference_frames * ref_channels;
while (num_samples_remaining) {
nmax = audio_stream_samples_without_wrap_s16(ref_stream, ref);
n = MIN(num_samples_remaining, nmax);
for (i = 0; i < n; i += cd->num_aec_reference_channels) {
j = cd->num_aec_reference_channels * cd->aec_reference_frame_index;
j = aec_ref_product;
for (channel = 0; channel < cd->num_aec_reference_channels; ++channel)
cd->aec_reference_buffer[j++] = ref[channel];

ref += audio_stream_get_channels(ref_stream);
ref += ref_channels;
++cd->aec_reference_frame_index;

if (cd->aec_reference_frame_index == cd->num_frames) {
GoogleRtcAudioProcessingAnalyzeRender_int16(cd->state,
cd->aec_reference_buffer);
cd->aec_reference_buffer);
cd->aec_reference_frame_index = 0;
// Reset the product as the frame index is reset
aec_ref_product = 0;
}
}
num_samples_remaining -= n;
Expand All @@ -984,6 +1004,8 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
src = audio_stream_get_rptr(mic_stream);
dst = audio_stream_get_wptr(out_stream);

// Optimization: Move function call out of loop
int mic_stream_channels = audio_stream_get_channels(mic_stream);
frames = input_buffers[cd->raw_microphone_source].size;
num_frames_remaining = frames;

Expand All @@ -993,34 +1015,40 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
nmax = audio_stream_frames_without_wrap(out_stream, dst);
n = MIN(n, nmax);
for (i = 0; i < n; i++) {
memcpy_s(&(cd->raw_mic_buffer[cd->raw_mic_buffer_frame_index *
cd->num_capture_channels]),
cd->num_frames * cd->num_capture_channels *
sizeof(cd->raw_mic_buffer[0]), src,
sizeof(int16_t) * cd->num_capture_channels);
++cd->raw_mic_buffer_frame_index;

memcpy_s(dst, cd->num_frames * cd->num_capture_channels *
sizeof(cd->output_buffer[0]),
&(cd->output_buffer[cd->output_buffer_frame_index *
cd->num_capture_channels]),
sizeof(int16_t) * cd->num_capture_channels);
++cd->output_buffer_frame_index;

if (cd->raw_mic_buffer_frame_index == cd->num_frames) {
GoogleRtcAudioProcessingProcessCapture_int16(cd->state,
cd->raw_mic_buffer,
cd->output_buffer);
cd->output_buffer_frame_index = 0;
cd->raw_mic_buffer_frame_index = 0;
// If we haven't filled the buffer yet, copy the data
if (cd->raw_mic_buffer_frame_index < cd->num_frames) {
memcpy_s(&(cd->raw_mic_buffer[cd->raw_mic_buffer_frame_index *
cd->num_capture_channels]),
cd->num_frames * cd->num_capture_channels *
sizeof(cd->raw_mic_buffer[0]), src,
sizeof(int16_t) * cd->num_capture_channels);
++cd->raw_mic_buffer_frame_index;
}

src += audio_stream_get_channels(mic_stream);
dst += audio_stream_get_channels(out_stream);
if (cd->output_buffer_frame_index < cd->num_frames) {
memcpy_s(dst, cd->num_frames * cd->num_capture_channels *
sizeof(cd->output_buffer[0]),
&(cd->output_buffer[cd->output_buffer_frame_index *
cd->num_capture_channels]),
sizeof(int16_t) * cd->num_capture_channels);
++cd->output_buffer_frame_index;
}

src += mic_stream_channels;
dst += mic_stream_channels;
}
num_frames_remaining -= n;
src = audio_stream_wrap(mic_stream, src);
dst = audio_stream_wrap(out_stream, dst);

// If we've filled the buffer, process the data
if (cd->raw_mic_buffer_frame_index == cd->num_frames) {
GoogleRtcAudioProcessingProcessCapture_int16(cd->state,
cd->raw_mic_buffer,
cd->output_buffer);
cd->output_buffer_frame_index = 0;
cd->raw_mic_buffer_frame_index = 0;
}
}

module_update_buffer_position(&input_buffers[cd->raw_microphone_source],
Expand Down
87 changes: 68 additions & 19 deletions src/audio/google/google_rtc_audio_processing_mock.c
Original file line number Diff line number Diff line change
Expand Up @@ -150,34 +150,61 @@ int GoogleRtcAudioProcessingProcessCapture_float32(GoogleRtcAudioProcessingState
const float *const *src,
float * const *dest)
{
// Check if the input pointers are NULL
if (!state || !src || !dest)
return -1; // Return an error code

// Check if the num_output_channels, num_aec_reference_channels,
// and num_frames values are positive
if (state->num_output_channels <= 0 ||
state->num_aec_reference_channels <= 0 ||
state->num_frames <= 0) {
return -2; // Return an error code
}

float *ref = state->aec_reference;
float **mic = (float **)src;
int n, chan;
int num_frames = state->num_frames;
int num_output_channels = state->num_output_channels;
int num_aec_reference_channels = state->num_aec_reference_channels;

for (chan = 0; chan < state->num_output_channels; chan++) {
for (n = 0; n < state->num_frames; ++n) {
float mic_save = mic[chan][n]; /* allow same in/out buffer */
for (chan = 0; chan < num_output_channels; chan++) {
float *mic_chan = mic[chan];
float *dest_chan = dest[chan];
float *ref_chan = ref + chan * num_frames;

if (chan < state->num_aec_reference_channels)
dest[chan][n] = mic_save + ref[n + (chan * state->num_frames)];
if (chan < num_aec_reference_channels)
for (n = 0; n < num_frames; ++n)
dest_chan[n] = mic_chan[n] + ref_chan[n];
else
if (mic_chan != dest_chan)
memcpy_s(dest_chan,
num_frames * sizeof(float),
mic_chan,
num_frames * sizeof(float));
else
dest[chan][n] = mic_save;
}
memmove(dest_chan, mic_chan, num_frames * sizeof(float));
}
return 0;
}

int GoogleRtcAudioProcessingAnalyzeRender_float32(GoogleRtcAudioProcessingState *const state,
const float *const *data)
inline int GoogleRtcAudioProcessingAnalyzeRender_float32
(GoogleRtcAudioProcessingState * const restrict state,
const float * const *restrict data)
{
const size_t buffer_size =
sizeof(state->aec_reference[0])
* state->num_frames;
int channel;
int num_aec_reference_channels = state->num_aec_reference_channels;
int num_frames = state->num_frames;

for (channel = 0; channel < state->num_aec_reference_channels; channel++) {
memcpy_s(&state->aec_reference[channel * state->num_frames], buffer_size,
data[channel], buffer_size);
if (buffer_size > 0) {
for (channel = 0; channel < num_aec_reference_channels; channel++) {
memcpy_s(&state->aec_reference[channel * num_frames], buffer_size,
data[channel], buffer_size);
}
}

return 0;
Expand All @@ -187,18 +214,38 @@ int GoogleRtcAudioProcessingProcessCapture_int16(GoogleRtcAudioProcessingState *
const int16_t *const src,
int16_t *const dest)
{
if (!state || !src || !dest || !state->aec_reference)
return -1; // Return an error code if any of the pointers are null

int16_t *ref = state->aec_reference;
int n, chan;

// Fetch the values from memory once and store them in local variables
// This can potentially improve performance by reducing the number of memory accesses
int num_capture_channels = state->num_capture_channels;
int num_aec_reference_channels = state->num_aec_reference_channels;

for (chan = 0; chan < state->num_output_channels; chan++) {
int capture_index = chan;
int reference_index = chan;

for (n = 0; n < state->num_frames; ++n) {
int16_t mic_save = src[(n * state->num_capture_channels) + chan];
if (capture_index >= num_capture_channels ||
reference_index >= num_aec_reference_channels) {
return -1; // Return an error code if the indices are out of bounds
}

int16_t mic_save = src[capture_index];

if (chan < state->num_aec_reference_channels)
dest[(n * state->num_capture_channels) + chan] =
mic_save + ref[(n * state->num_aec_reference_channels) + chan];
// Use the local variables instead of fetching the values from memory
// each time
if (chan < num_aec_reference_channels)
dest[capture_index] = mic_save + ref[reference_index];
else
dest[(n * state->num_capture_channels) + chan] = mic_save;
dest[capture_index] = mic_save;

capture_index += num_capture_channels;
reference_index += num_aec_reference_channels;
}
}

Expand All @@ -212,8 +259,10 @@ int GoogleRtcAudioProcessingAnalyzeRender_int16(GoogleRtcAudioProcessingState *c
sizeof(state->aec_reference[0])
* state->num_frames
* state->num_aec_reference_channels;
memcpy_s(state->aec_reference, buffer_size,
data, buffer_size);

if (memcmp(state->aec_reference, data, buffer_size) != 0)
memcpy_s(state->aec_reference, buffer_size, data, buffer_size);

return 0;
}

Expand Down

0 comments on commit 1fee8c6

Please sign in to comment.