Audio: aec: optimize acoustic echo cancellation processing

This check-in introduces performance optimization modifications to the audio Echo Cancellation (AEC) implementation. The enhancements primarily focus on refining loop structures and memory copy operations to ensure more efficient use of cycles. Signed-off-by: shastry <[email protected]>
thesofproject · Feb 21, 2024 · 1fee8c6 · 1fee8c6
1 parent 3681e09
commit 1fee8c6
Show file tree

Hide file tree

Showing 2 changed files with 163 additions and 86 deletions.
diff --git a/src/audio/google/google_rtc_audio_processing.c b/src/audio/google/google_rtc_audio_processing.c
@@ -822,22 +822,28 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
 	/* 32float: de-interlace ref buffer, convert it to float, skip channels if > Max
 	 * 16int: linearize buffer, skip channels if > Max
 	 */
+	// Optimization:Reduce cycle waste by streamlining the inner loop,
+	// converting from array indexing to pointer arithmetic,
+	// and putting data copy verification outside the loop.
 	buffer_offset = 0;
-	for (int i = 0; i < cd->num_frames; i++) {
-		for (channel = 0; channel < cd->num_aec_reference_channels; ++channel) {
+	int16_t *ref_end = ref + cd->num_frames * cd->num_aec_reference_channels;
+
+	if ((void *)ref_end >= (void *)ref_buf_end)
+		ref_end = (void *)ref_buf_start;
+
 #if CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API
-			cd->aec_reference_buffer_ptrs[channel][i] =
-					convert_int16_to_float(ref[channel]);
+	float **ref_ptr = cd->aec_reference_buffer_ptrs;
+
+	while (ref != ref_end)
+		*ref_ptr++ = convert_int16_to_float(ref++);
+
 #else /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
-			cd->aec_reference_buffer[buffer_offset++] = ref[channel];
-#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
+	int16_t *ref_buf = cd->aec_reference_buffer;
 
-		}
+	while (ref != ref_end)
+		*ref_buf++ = *ref++;
 
-		ref += cd->num_aec_reference_channels;
-		if ((void *)ref >= (void *)ref_buf_end)
-			ref = (void *)ref_buf_start;
-	}
+#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
 
 #if CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API
 	GoogleRtcAudioProcessingAnalyzeRender_float32(cd->state,
@@ -855,23 +861,28 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
 			      (const void **)&src_buf_start,  &src_buf_size);
 	assert(!ret);
 	src_buf_end = src_buf_start + src_buf_size;
-
+	// The second optimization eliminates the inner loop
+	// and replaces it with pointer arithmetic for speedier access.
+	// To reduce cycle waste, the data copy check is moved outside of the loop.
 	buffer_offset = 0;
-	for (int i = 0; i < cd->num_frames; i++) {
-		for (channel = 0; channel < cd->num_capture_channels; channel++)
+	int16_t *src_end = src + cd->num_frames * cd->config.output_fmt.channels_count;
+
+	if ((void *)src_end >= (void *)src_buf_end)
+		src_end = (void *)src_buf_start;
+
 #if CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API
-		cd->process_buffer_ptrs[channel][i] = convert_int16_to_float(src[channel]);
+	float **proc_ptr = cd->process_buffer_ptrs;
+
+	while (src != src_end)
+		*proc_ptr++ = convert_int16_to_float(src++);
+
 #else /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
-		cd->process_buffer[buffer_offset++] = src[channel];
-#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
+	int16_t *proc_buf = cd->process_buffer;
 
-		/* move pointer to next frame
-		 * number of incoming channels may be < cd->num_capture_channels
-		 */
-		src += cd->config.output_fmt.channels_count;
-		if ((void *)src >= (void *)src_buf_end)
-			src = (void *)src_buf_start;
-	}
+	while (src != src_end)
+		*proc_buf++ = *src++;
+
+#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
 
 	source_release_data(src_stream, num_of_bytes_to_process);
 
@@ -894,24 +905,24 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
 
 	/* process all channels in output stream */
 	buffer_offset = 0;
-	for (int i = 0; i < cd->num_frames; i++) {
-		for (channel = 0; channel < cd->config.output_fmt.channels_count; channel++) {
-			/* set data in processed channels, zeroize not processed */
-			if (channel < cd->num_capture_channels)
+	int16_t *dst_end = dst + cd->num_frames * cd->config.output_fmt.channels_count;
+
+	if ((void *)dst_end >= (void *)dst_buf_end)
+		dst_end = (void *)dst_buf_start;
+
 #if CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API
-				dst[channel] = convert_float_to_int16(
-						   cd->process_buffer_ptrs[channel][i]);
+	float **proc_ptr = cd->process_buffer_ptrs;
+
+	while (dst != dst_end)
+		*dst++ = convert_float_to_int16(*proc_ptr++);
+
 #else /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
-				dst[channel] = cd->process_buffer[buffer_offset++];
-#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
-			else
-				dst[channel] = 0;
-		}
+	int16_t *process_buffer = cd->process_buffer;
 
-		dst += cd->config.output_fmt.channels_count;
-		if ((void *)dst >= (void *)dst_buf_end)
-			dst = (void *)dst_buf_start;
-	}
+	while (dst != dst_end)
+		*dst++ = *process_buffer++;
+
+#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
 
 	sink_commit_buffer(dst_stream, num_of_bytes_to_process);
 
@@ -928,6 +939,8 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
 	int16_t *src, *dst, *ref;
 	uint32_t num_aec_reference_frames;
 	uint32_t num_aec_reference_bytes;
+	int ref_channels;
+	int aec_ref_product;
 	int num_samples_remaining;
 	int num_frames_remaining;
 	int channel;
@@ -950,25 +963,32 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
 	ref_stream = ref_streamb->data;
 	ref = audio_stream_get_rptr(ref_stream);
 
+	// Pre-calculate the number of channels in the reference stream for efficiency
+	ref_channels = audio_stream_get_channels(ref_stream);
+
+	// Pre-calculate the product of the number of AEC reference channels and the AEC
+	// reference frame index
+	aec_ref_product = cd->num_aec_reference_channels * cd->aec_reference_frame_index;
+
 	num_aec_reference_frames = input_buffers[cd->aec_reference_source].size;
 	num_aec_reference_bytes = audio_stream_frame_bytes(ref_stream) * num_aec_reference_frames;
 
-	num_samples_remaining = num_aec_reference_frames * audio_stream_get_channels(ref_stream);
+	num_samples_remaining = num_aec_reference_frames * ref_channels;
 	while (num_samples_remaining) {
 		nmax = audio_stream_samples_without_wrap_s16(ref_stream, ref);
 		n = MIN(num_samples_remaining, nmax);
 		for (i = 0; i < n; i += cd->num_aec_reference_channels) {
-			j = cd->num_aec_reference_channels * cd->aec_reference_frame_index;
+			j = aec_ref_product;
 			for (channel = 0; channel < cd->num_aec_reference_channels; ++channel)
 				cd->aec_reference_buffer[j++] = ref[channel];
-
-			ref += audio_stream_get_channels(ref_stream);
+			ref += ref_channels;
 			++cd->aec_reference_frame_index;
-
 			if (cd->aec_reference_frame_index == cd->num_frames) {
 				GoogleRtcAudioProcessingAnalyzeRender_int16(cd->state,
-									    cd->aec_reference_buffer);
+									 cd->aec_reference_buffer);
 				cd->aec_reference_frame_index = 0;
+				// Reset the product as the frame index is reset
+				aec_ref_product = 0;
 			}
 		}
 		num_samples_remaining -= n;
@@ -984,6 +1004,8 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
 	src = audio_stream_get_rptr(mic_stream);
 	dst = audio_stream_get_wptr(out_stream);
 
+	// Optimization: Move function call out of loop
+	int mic_stream_channels = audio_stream_get_channels(mic_stream);
 	frames = input_buffers[cd->raw_microphone_source].size;
 	num_frames_remaining = frames;
 
@@ -993,34 +1015,40 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
 		nmax = audio_stream_frames_without_wrap(out_stream, dst);
 		n = MIN(n, nmax);
 		for (i = 0; i < n; i++) {
-			memcpy_s(&(cd->raw_mic_buffer[cd->raw_mic_buffer_frame_index *
-						      cd->num_capture_channels]),
-				 cd->num_frames * cd->num_capture_channels *
-				 sizeof(cd->raw_mic_buffer[0]), src,
-				 sizeof(int16_t) * cd->num_capture_channels);
-			++cd->raw_mic_buffer_frame_index;
-
-			memcpy_s(dst, cd->num_frames * cd->num_capture_channels *
-				 sizeof(cd->output_buffer[0]),
-				 &(cd->output_buffer[cd->output_buffer_frame_index *
-						     cd->num_capture_channels]),
-				 sizeof(int16_t) * cd->num_capture_channels);
-			++cd->output_buffer_frame_index;
-
-			if (cd->raw_mic_buffer_frame_index == cd->num_frames) {
-				GoogleRtcAudioProcessingProcessCapture_int16(cd->state,
-									     cd->raw_mic_buffer,
-									     cd->output_buffer);
-				cd->output_buffer_frame_index = 0;
-				cd->raw_mic_buffer_frame_index = 0;
+			// If we haven't filled the buffer yet, copy the data
+			if (cd->raw_mic_buffer_frame_index < cd->num_frames) {
+				memcpy_s(&(cd->raw_mic_buffer[cd->raw_mic_buffer_frame_index *
+							      cd->num_capture_channels]),
+					 cd->num_frames * cd->num_capture_channels *
+					 sizeof(cd->raw_mic_buffer[0]), src,
+					 sizeof(int16_t) * cd->num_capture_channels);
+				++cd->raw_mic_buffer_frame_index;
 			}
 
-			src += audio_stream_get_channels(mic_stream);
-			dst += audio_stream_get_channels(out_stream);
+			if (cd->output_buffer_frame_index < cd->num_frames) {
+				memcpy_s(dst, cd->num_frames * cd->num_capture_channels *
+					 sizeof(cd->output_buffer[0]),
+					 &(cd->output_buffer[cd->output_buffer_frame_index *
+							     cd->num_capture_channels]),
+					 sizeof(int16_t) * cd->num_capture_channels);
+				++cd->output_buffer_frame_index;
+			}
+
+			src += mic_stream_channels;
+			dst += mic_stream_channels;
 		}
 		num_frames_remaining -= n;
 		src = audio_stream_wrap(mic_stream, src);
 		dst = audio_stream_wrap(out_stream, dst);
+
+		// If we've filled the buffer, process the data
+		if (cd->raw_mic_buffer_frame_index == cd->num_frames) {
+			GoogleRtcAudioProcessingProcessCapture_int16(cd->state,
+								     cd->raw_mic_buffer,
+								     cd->output_buffer);
+			cd->output_buffer_frame_index = 0;
+			cd->raw_mic_buffer_frame_index = 0;
+		}
 	}
 
 	module_update_buffer_position(&input_buffers[cd->raw_microphone_source],

diff --git a/src/audio/google/google_rtc_audio_processing_mock.c b/src/audio/google/google_rtc_audio_processing_mock.c
@@ -150,34 +150,61 @@ int GoogleRtcAudioProcessingProcessCapture_float32(GoogleRtcAudioProcessingState
 						   const float *const *src,
 						   float * const *dest)
 {
+	// Check if the input pointers are NULL
+	if (!state || !src || !dest)
+		return -1;  // Return an error code
+
+	// Check if the num_output_channels, num_aec_reference_channels,
+	// and num_frames values are positive
+	if (state->num_output_channels <= 0 ||
+	    state->num_aec_reference_channels <= 0 ||
+	    state->num_frames <= 0) {
+		return -2;  // Return an error code
+	}
+
 	float *ref = state->aec_reference;
 	float **mic = (float **)src;
 	int n, chan;
+	int num_frames = state->num_frames;
+	int num_output_channels = state->num_output_channels;
+	int num_aec_reference_channels = state->num_aec_reference_channels;
 
-	for (chan = 0; chan < state->num_output_channels; chan++) {
-		for (n = 0; n < state->num_frames; ++n) {
-			float mic_save = mic[chan][n];	/* allow same in/out buffer */
+	for (chan = 0; chan < num_output_channels; chan++) {
+		float *mic_chan = mic[chan];
+		float *dest_chan = dest[chan];
+		float *ref_chan = ref + chan * num_frames;
 
-			if (chan < state->num_aec_reference_channels)
-				dest[chan][n] = mic_save + ref[n + (chan * state->num_frames)];
+		if (chan < num_aec_reference_channels)
+			for (n = 0; n < num_frames; ++n)
+				dest_chan[n] = mic_chan[n] + ref_chan[n];
+		else
+			if (mic_chan != dest_chan)
+				memcpy_s(dest_chan,
+					 num_frames * sizeof(float),
+					 mic_chan,
+					 num_frames * sizeof(float));
 			else
-				dest[chan][n] = mic_save;
-		}
+				memmove(dest_chan, mic_chan, num_frames * sizeof(float));
 	}
 	return 0;
 }
 
-int GoogleRtcAudioProcessingAnalyzeRender_float32(GoogleRtcAudioProcessingState *const state,
-						  const float *const *data)
+inline int GoogleRtcAudioProcessingAnalyzeRender_float32
+	   (GoogleRtcAudioProcessingState * const restrict state,
+	   const float * const *restrict data)
 {
 	const size_t buffer_size =
 		sizeof(state->aec_reference[0])
 		* state->num_frames;
 	int channel;
+	int num_aec_reference_channels = state->num_aec_reference_channels;
+	int num_frames = state->num_frames;
 
-	for (channel = 0; channel < state->num_aec_reference_channels; channel++) {
-		memcpy_s(&state->aec_reference[channel * state->num_frames], buffer_size,
-			 data[channel], buffer_size);
+	if (buffer_size > 0) {
+		for (channel = 0; channel < num_aec_reference_channels; channel++) {
+			memcpy_s(&state->aec_reference[channel * num_frames], buffer_size,
+				 data[channel], buffer_size);
+		}
 	}
 
 	return 0;
@@ -187,18 +214,38 @@ int GoogleRtcAudioProcessingProcessCapture_int16(GoogleRtcAudioProcessingState *
 						 const int16_t *const src,
 						 int16_t *const dest)
 {
+	if (!state || !src || !dest || !state->aec_reference)
+		return -1; // Return an error code if any of the pointers are null
+
 	int16_t *ref = state->aec_reference;
 	int n, chan;
 
+	// Fetch the values from memory once and store them in local variables
+	// This can potentially improve performance by reducing the number of memory accesses
+	int num_capture_channels = state->num_capture_channels;
+	int num_aec_reference_channels = state->num_aec_reference_channels;
+
 	for (chan = 0; chan < state->num_output_channels; chan++) {
+		int capture_index = chan;
+		int reference_index = chan;
+
 		for (n = 0; n < state->num_frames; ++n) {
-			int16_t mic_save = src[(n * state->num_capture_channels) + chan];
+			if (capture_index >= num_capture_channels ||
+			    reference_index >= num_aec_reference_channels) {
+				return -1; // Return an error code if the indices are out of bounds
+			}
+
+			int16_t mic_save = src[capture_index];
 
-			if (chan < state->num_aec_reference_channels)
-				dest[(n * state->num_capture_channels) + chan] =
-				   mic_save + ref[(n * state->num_aec_reference_channels) + chan];
+			// Use the local variables instead of fetching the values from memory
+			// each time
+			if (chan < num_aec_reference_channels)
+				dest[capture_index] = mic_save + ref[reference_index];
 			else
-				dest[(n * state->num_capture_channels) + chan] = mic_save;
+				dest[capture_index] = mic_save;
+
+			capture_index += num_capture_channels;
+			reference_index += num_aec_reference_channels;
 		}
 	}
 
@@ -212,8 +259,10 @@ int GoogleRtcAudioProcessingAnalyzeRender_int16(GoogleRtcAudioProcessingState *c
 		sizeof(state->aec_reference[0])
 		* state->num_frames
 		* state->num_aec_reference_channels;
-	memcpy_s(state->aec_reference, buffer_size,
-		 data, buffer_size);
+
+	if (memcmp(state->aec_reference, data, buffer_size) != 0)
+		memcpy_s(state->aec_reference, buffer_size, data, buffer_size);
+
 	return 0;
 }