From ab699fcd5b02680f658326de62b7856d02a45a8f Mon Sep 17 00:00:00 2001 From: shastry Date: Wed, 21 Feb 2024 21:56:36 +0530 Subject: [PATCH] Audio: aec: optimize acoustic echo cancellation processing This check-in introduces performance optimization modifications to the audio Echo Cancellation (AEC) implementation. The enhancements primarily focus on refining loop structures and memory copy operations to ensure more efficient use of cycles. Signed-off-by: shastry --- .../google/google_rtc_audio_processing.c | 207 ++++++++++++------ .../google/google_rtc_audio_processing_mock.c | 86 ++++++-- 2 files changed, 210 insertions(+), 83 deletions(-) diff --git a/src/audio/google/google_rtc_audio_processing.c b/src/audio/google/google_rtc_audio_processing.c index e1121c253e10..0270c76bddfd 100644 --- a/src/audio/google/google_rtc_audio_processing.c +++ b/src/audio/google/google_rtc_audio_processing.c @@ -822,23 +822,58 @@ static int google_rtc_audio_processing_process(struct processing_module *mod, /* 32float: de-interlace ref buffer, convert it to float, skip channels if > Max * 16int: linearize buffer, skip channels if > Max */ + /* Reduce cycle waste by streamlining the inner loop, + * converting from array indexing to pointer arithmetic, + * and putting data copy verification outside the loop. + */ buffer_offset = 0; - for (int i = 0; i < cd->num_frames; i++) { - for (channel = 0; channel < cd->num_aec_reference_channels; ++channel) { -#if CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API - cd->aec_reference_buffer_ptrs[channel][i] = - convert_int16_to_float(ref[channel]); -#else /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */ - cd->aec_reference_buffer[buffer_offset++] = ref[channel]; -#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */ + int16_t *ref_end = ref + cd->num_frames * cd->num_aec_reference_channels; + + if ((void *)ref_end >= (void *)ref_buf_end) + ref_end = (void *)ref_buf_start; +#if CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API + float **ref_ptr = cd->aec_reference_buffer_ptrs; + + /* Loop over frames and channels, converting data from int16 to float */ + for (int i = 0; i < cd->num_frames; ++i) { + for (int channel = 0; channel < cd->num_aec_reference_channels; ++channel) { + /* Check that ref is within the valid range of the ref_buf buffer */ + if (ref && (void *)ref >= (void *)ref_buf_start && + (void *)ref < (void *)ref_buf_end) + (*ref_ptr)[channel] = convert_int16_to_float(*ref++); + else + /* ref does not point to valid int16_t data */ + return -1; } + ref_ptr++; + } - ref += cd->num_aec_reference_channels; - if ((void *)ref >= (void *)ref_buf_end) - ref = (void *)ref_buf_start; +#else /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */ + int16_t *ref_buf = cd->aec_reference_buffer; + + /* Check that ref is within the valid range of the ref_buf buffer */ + if (ref && (void *)ref >= (void *)ref_buf_start && (void *)ref < (void *)ref_buf_end) { + /* Use memcpy_s to copy the data from ref buffer to ref_buf buffer until it reaches + * ref_end + * This assumes that the data in the ref buffer is contiguous + */ + size_t num_bytes = (ref_end - ref) * sizeof(*ref); + + if (memcpy_s(ref_buf, num_bytes, ref, num_bytes) != 0) { + /* Handle error */ + return -2; + } + /* Update the ref and ref_buf pointers */ + ref = ref_end; + ref_buf += (ref_end - ref); + } else { + /* ref does not point to valid int16_t data */ + return -2; } +#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */ + #if CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API GoogleRtcAudioProcessingAnalyzeRender_float32(cd->state, (const float **) @@ -855,24 +890,50 @@ static int google_rtc_audio_processing_process(struct processing_module *mod, (const void **)&src_buf_start, &src_buf_size); assert(!ret); src_buf_end = src_buf_start + src_buf_size; - + /* The second optimization eliminates the inner loop + * and replaces it with pointer arithmetic for speedier access. + * To reduce cycle waste, the data copy check is moved outside of the loop. + */ buffer_offset = 0; - for (int i = 0; i < cd->num_frames; i++) { - for (channel = 0; channel < cd->num_capture_channels; channel++) + int16_t *src_end = src + cd->num_frames * cd->config.output_fmt.channels_count; + + if ((void *)src_end >= (void *)src_buf_end) + src_end = (void *)src_buf_start; + #if CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API - cd->process_buffer_ptrs[channel][i] = convert_int16_to_float(src[channel]); + float **proc_ptr = cd->process_buffer_ptrs; + + /* Process the data until the source pointer reaches the end + * This assumes that the source buffer is continuous in memory + * If the source buffer is not continuous (i.e., if it wraps around + * like in a circular buffer), this code will not work correctly + */ + while (src != src_end) { + /* Check if src has exceeded the buffer end */ + if ((void *)src >= (void *)src_buf_end) + src = (void *)src_buf_start; + + *proc_ptr++ = convert_int16_to_float(src++); + } + #else /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */ - cd->process_buffer[buffer_offset++] = src[channel]; -#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */ + int16_t *proc_buf = cd->process_buffer; - /* move pointer to next frame - * number of incoming channels may be < cd->num_capture_channels - */ - src += cd->config.output_fmt.channels_count; + /* Process the data until the source pointer reaches the end + * This assumes that the source buffer is continuous in memory + * If the source buffer is not continuous (i.e., if it wraps + * around like in a circular buffer), this code will not work correctly + */ + while (src != src_end) { + /* Check if src has exceeded the buffer end */ if ((void *)src >= (void *)src_buf_end) src = (void *)src_buf_start; + + *proc_buf++ = *src++; } +#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */ + source_release_data(src_stream, num_of_bytes_to_process); /* call the library, use same in/out buffers */ @@ -894,24 +955,24 @@ static int google_rtc_audio_processing_process(struct processing_module *mod, /* process all channels in output stream */ buffer_offset = 0; - for (int i = 0; i < cd->num_frames; i++) { - for (channel = 0; channel < cd->config.output_fmt.channels_count; channel++) { - /* set data in processed channels, zeroize not processed */ - if (channel < cd->num_capture_channels) + int16_t *dst_end = dst + cd->num_frames * cd->config.output_fmt.channels_count; + + if ((void *)dst_end >= (void *)dst_buf_end) + dst_end = (void *)dst_buf_start; + #if CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API - dst[channel] = convert_float_to_int16( - cd->process_buffer_ptrs[channel][i]); + float **proc_ptr = cd->process_buffer_ptrs; + + while (dst != dst_end && *proc_ptr) + *dst++ = convert_float_to_int16(*proc_ptr++); + #else /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */ - dst[channel] = cd->process_buffer[buffer_offset++]; -#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */ - else - dst[channel] = 0; - } + int16_t *process_buffer = cd->process_buffer; - dst += cd->config.output_fmt.channels_count; - if ((void *)dst >= (void *)dst_buf_end) - dst = (void *)dst_buf_start; - } + while (dst != dst_end && *process_buffer) + *dst++ = *process_buffer++; + +#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */ sink_commit_buffer(dst_stream, num_of_bytes_to_process); @@ -928,6 +989,8 @@ static int google_rtc_audio_processing_process(struct processing_module *mod, int16_t *src, *dst, *ref; uint32_t num_aec_reference_frames; uint32_t num_aec_reference_bytes; + int ref_channels; + int aec_ref_product; int num_samples_remaining; int num_frames_remaining; int channel; @@ -950,25 +1013,33 @@ static int google_rtc_audio_processing_process(struct processing_module *mod, ref_stream = ref_streamb->data; ref = audio_stream_get_rptr(ref_stream); + /* Pre-calculate the number of channels in the reference stream for efficiency */ + ref_channels = audio_stream_get_channels(ref_stream); + + /* Pre-calculate the product of the number of AEC reference channels and the AEC + * reference frame index + */ + aec_ref_product = cd->num_aec_reference_channels * cd->aec_reference_frame_index; + num_aec_reference_frames = input_buffers[cd->aec_reference_source].size; num_aec_reference_bytes = audio_stream_frame_bytes(ref_stream) * num_aec_reference_frames; - num_samples_remaining = num_aec_reference_frames * audio_stream_get_channels(ref_stream); + num_samples_remaining = num_aec_reference_frames * ref_channels; while (num_samples_remaining) { nmax = audio_stream_samples_without_wrap_s16(ref_stream, ref); n = MIN(num_samples_remaining, nmax); for (i = 0; i < n; i += cd->num_aec_reference_channels) { - j = cd->num_aec_reference_channels * cd->aec_reference_frame_index; + j = aec_ref_product; for (channel = 0; channel < cd->num_aec_reference_channels; ++channel) cd->aec_reference_buffer[j++] = ref[channel]; - - ref += audio_stream_get_channels(ref_stream); + ref += ref_channels; ++cd->aec_reference_frame_index; - if (cd->aec_reference_frame_index == cd->num_frames) { GoogleRtcAudioProcessingAnalyzeRender_int16(cd->state, - cd->aec_reference_buffer); + cd->aec_reference_buffer); cd->aec_reference_frame_index = 0; + /* Reset the product as the frame index is reset */ + aec_ref_product = 0; } } num_samples_remaining -= n; @@ -984,6 +1055,8 @@ static int google_rtc_audio_processing_process(struct processing_module *mod, src = audio_stream_get_rptr(mic_stream); dst = audio_stream_get_wptr(out_stream); + /* Move out of loop */ + int mic_stream_channels = audio_stream_get_channels(mic_stream); frames = input_buffers[cd->raw_microphone_source].size; num_frames_remaining = frames; @@ -993,34 +1066,40 @@ static int google_rtc_audio_processing_process(struct processing_module *mod, nmax = audio_stream_frames_without_wrap(out_stream, dst); n = MIN(n, nmax); for (i = 0; i < n; i++) { - memcpy_s(&(cd->raw_mic_buffer[cd->raw_mic_buffer_frame_index * - cd->num_capture_channels]), - cd->num_frames * cd->num_capture_channels * - sizeof(cd->raw_mic_buffer[0]), src, - sizeof(int16_t) * cd->num_capture_channels); - ++cd->raw_mic_buffer_frame_index; - - memcpy_s(dst, cd->num_frames * cd->num_capture_channels * - sizeof(cd->output_buffer[0]), - &(cd->output_buffer[cd->output_buffer_frame_index * - cd->num_capture_channels]), - sizeof(int16_t) * cd->num_capture_channels); - ++cd->output_buffer_frame_index; - - if (cd->raw_mic_buffer_frame_index == cd->num_frames) { - GoogleRtcAudioProcessingProcessCapture_int16(cd->state, - cd->raw_mic_buffer, - cd->output_buffer); - cd->output_buffer_frame_index = 0; - cd->raw_mic_buffer_frame_index = 0; + /* If we haven't filled the buffer yet, copy the data */ + if (cd->raw_mic_buffer_frame_index < cd->num_frames) { + memcpy_s(&(cd->raw_mic_buffer[cd->raw_mic_buffer_frame_index * + cd->num_capture_channels]), + cd->num_frames * cd->num_capture_channels * + sizeof(cd->raw_mic_buffer[0]), src, + sizeof(int16_t) * cd->num_capture_channels); + ++cd->raw_mic_buffer_frame_index; + } + + if (cd->output_buffer_frame_index < cd->num_frames) { + memcpy_s(dst, cd->num_frames * cd->num_capture_channels * + sizeof(cd->output_buffer[0]), + &(cd->output_buffer[cd->output_buffer_frame_index * + cd->num_capture_channels]), + sizeof(int16_t) * cd->num_capture_channels); + ++cd->output_buffer_frame_index; } - src += audio_stream_get_channels(mic_stream); - dst += audio_stream_get_channels(out_stream); + src += mic_stream_channels; + dst += mic_stream_channels; } num_frames_remaining -= n; src = audio_stream_wrap(mic_stream, src); dst = audio_stream_wrap(out_stream, dst); + + /* If we've filled the buffer, process the data */ + if (cd->raw_mic_buffer_frame_index == cd->num_frames) { + GoogleRtcAudioProcessingProcessCapture_int16(cd->state, + cd->raw_mic_buffer, + cd->output_buffer); + cd->output_buffer_frame_index = 0; + cd->raw_mic_buffer_frame_index = 0; + } } module_update_buffer_position(&input_buffers[cd->raw_microphone_source], diff --git a/src/audio/google/google_rtc_audio_processing_mock.c b/src/audio/google/google_rtc_audio_processing_mock.c index 9c654f919fc5..1d01e9187278 100644 --- a/src/audio/google/google_rtc_audio_processing_mock.c +++ b/src/audio/google/google_rtc_audio_processing_mock.c @@ -150,34 +150,61 @@ int GoogleRtcAudioProcessingProcessCapture_float32(GoogleRtcAudioProcessingState const float *const *src, float * const *dest) { + // Check if the input pointers are NULL + if (!state || !src || !dest) + return -1; // Return an error code + + // Check if the num_output_channels, num_aec_reference_channels, + // and num_frames values are positive + if (state->num_output_channels <= 0 || + state->num_aec_reference_channels <= 0 || + state->num_frames <= 0) { + return -2; // Return an error code + } + float *ref = state->aec_reference; float **mic = (float **)src; int n, chan; + int num_frames = state->num_frames; + int num_output_channels = state->num_output_channels; + int num_aec_reference_channels = state->num_aec_reference_channels; - for (chan = 0; chan < state->num_output_channels; chan++) { - for (n = 0; n < state->num_frames; ++n) { - float mic_save = mic[chan][n]; /* allow same in/out buffer */ + for (chan = 0; chan < num_output_channels; chan++) { + float *mic_chan = mic[chan]; + float *dest_chan = dest[chan]; + float *ref_chan = ref + chan * num_frames; - if (chan < state->num_aec_reference_channels) - dest[chan][n] = mic_save + ref[n + (chan * state->num_frames)]; + if (chan < num_aec_reference_channels) + for (n = 0; n < num_frames; ++n) + dest_chan[n] = mic_chan[n] + ref_chan[n]; + else + if (mic_chan != dest_chan) + memcpy_s(dest_chan, + num_frames * sizeof(float), + mic_chan, + num_frames * sizeof(float)); else - dest[chan][n] = mic_save; - } + memmove(dest_chan, mic_chan, num_frames * sizeof(float)); } return 0; } -int GoogleRtcAudioProcessingAnalyzeRender_float32(GoogleRtcAudioProcessingState *const state, - const float *const *data) +inline int GoogleRtcAudioProcessingAnalyzeRender_float32 + (GoogleRtcAudioProcessingState * const restrict state, + const float * const *restrict data) { const size_t buffer_size = sizeof(state->aec_reference[0]) * state->num_frames; int channel; + int num_aec_reference_channels = state->num_aec_reference_channels; + int num_frames = state->num_frames; - for (channel = 0; channel < state->num_aec_reference_channels; channel++) { - memcpy_s(&state->aec_reference[channel * state->num_frames], buffer_size, - data[channel], buffer_size); + if (buffer_size > 0) { + for (channel = 0; channel < num_aec_reference_channels; channel++) { + memcpy_s(&state->aec_reference[channel * num_frames], buffer_size, + data[channel], buffer_size); + } } return 0; @@ -187,18 +214,37 @@ int GoogleRtcAudioProcessingProcessCapture_int16(GoogleRtcAudioProcessingState * const int16_t *const src, int16_t *const dest) { + if (!state || !src || !dest || !state->aec_reference) + return -1; // Return an error code if any of the pointers are null + int16_t *ref = state->aec_reference; int n, chan; + int num_capture_channels = state->num_capture_channels; + int num_aec_reference_channels = state->num_aec_reference_channels; + for (chan = 0; chan < state->num_output_channels; chan++) { + int capture_index = chan; + int reference_index = chan; + for (n = 0; n < state->num_frames; ++n) { - int16_t mic_save = src[(n * state->num_capture_channels) + chan]; + if (capture_index >= num_capture_channels || + reference_index >= num_aec_reference_channels) { + // Return an error code if the indices are out of bounds + return -1; + } + + int16_t mic_save = src[capture_index]; - if (chan < state->num_aec_reference_channels) - dest[(n * state->num_capture_channels) + chan] = - mic_save + ref[(n * state->num_aec_reference_channels) + chan]; + // Use the local variables instead of fetching the values from memory + // each time + if (chan < num_aec_reference_channels) + dest[capture_index] = mic_save + ref[reference_index]; else - dest[(n * state->num_capture_channels) + chan] = mic_save; + dest[capture_index] = mic_save; + + capture_index += num_capture_channels; + reference_index += num_aec_reference_channels; } } @@ -212,8 +258,10 @@ int GoogleRtcAudioProcessingAnalyzeRender_int16(GoogleRtcAudioProcessingState *c sizeof(state->aec_reference[0]) * state->num_frames * state->num_aec_reference_channels; - memcpy_s(state->aec_reference, buffer_size, - data, buffer_size); + + if (memcmp(state->aec_reference, data, buffer_size) != 0) + memcpy_s(state->aec_reference, buffer_size, data, buffer_size); + return 0; }