zephyrproject-rtos · zephyrbot · Oct 15, 2023
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: Apache-2.0
+
+cmake_minimum_required(VERSION 3.20.0)
+find_package(Zephyr REQUIRED HINTS $ENV{ZEPHYR_BASE})
+project(log_blocking)
+
+target_sources(app PRIVATE src/main.c)
@@ -0,0 +1,22 @@
+# Copyright (c) 2023 Meta
+# SPDX-License-Identifier: Apache-2.0
+
+source "Kconfig.zephyr"
+
+config TEST_NUM_LOGS
+	int "Number of log messages per test"
+	default 5000
+
+config TEST_INPUT_LOG_RATE
+	int "Maximal input log rate for the test (in msg/s)"
+	default 1000
+	help
+	  Specify the maximum rate at which messages will be logged from the
+	  application.
+
+config TEST_OUTPUT_LOG_RATE
+	int "Maximal output log rate for the test (in msg/s)"
+	default 1000
+	help
+	  Specify the maximum rate at which log messages will be handled by
+	  the log backend.
@@ -0,0 +1,97 @@
+# Blocking the Log Processing Thread
+
+## Overview
+
+When the core log buffer becomes full, the logging subsystem can be configured to
+
+* Drop older log messages with `CONFIG_LOG_MODE_OVERFLOW=y` (**default**)
+* Drop newer log messages with `CONFIG_LOG_MODE_OVERFLOW=n`, or
+* Drop no log messages at all with `CONFIG_LOG_BLOCK_IN_THREAD=y`, `CONFIG_LOG_BLOCK_IN_THREAD_TIMEOUT_MS=-1`.
+
+In the last configuration, the log processing thread will block until space
+becomes available again in the core log buffer.
+
+> Warning ⚠️: Blocking the log processing thread is generally not recommended
+> and should only be attempted in advanced use cases.
+
+## Logging and Flow Rates
+
+There are roughly 4 scenarios we care about testing with
+`CONFIG_LOG_BLOCK_IN_THREAD`, and they can all be characterized by comparing
+log message flow rates. Typically, one would describe log message flow rates
+with units such as `[msg/s]`. However, in the table below, we are mainly
+concerned with the ratio of the Output Rate to the Input Rate, and in that
+case, the units themselves cancel-out. In the table we assume there exists an
+`N` such that `N > 1`.
+
+| Name           | Input Rate | Output Rate | Rate |
+|----------------|------------|-------------|------|
+| Input-Limited  | 1          | N           | 1    |
+| Matched        | 1          | 1           | 1    |
+| Output-Limited | 1          | 1/N         | 1/N  |
+| Stalled        | 0          | 0           | 0    |
+
+The resultant _Rate_ is always `Rate = MIN(Input Rate, Output Rate)`.
+
+Rate-limiting of any kind can be described approximately as _back pressure_.
+Back pressure is fine in short bursts but it can cause delays in application
+and driver code if the pressure is not relieved promptly.
+
+## Physical Sources of Backpressure
+
+Many log backends, such as UARTs, have a built-in hardware FIFO that
+inherently provides back-pressure; output log processing is rate-limited
+based on the baud rate of the UART. Other backends, such as UDP sockets or
+DMA, can provide significantly higher throughput but are still inherently
+rate-limited by the physical layer over which they operate, be it Gigabit
+Ethernet or PCI express.
+
+Even a trivial _message source_ or _message sink_ is still rate-limited by
+memory or the CPU. From that perspective, we can infer that there is a finite
+limit in the log processing rate for practical systems. That may be
+comforting to know, even if it is something astronomical like 1G `[msg/s]`.
+
+## Input-Limited Log Rate
+
+The ideal scenario is when the output "bandwidth" exceeds the input rate. If
+so configured, we minimize the liklihood that the log processing thread will
+stall. We can also be sure that the output will be able to relieve
+backpressure (i.e. the core log buffer usage will tend to zero over time).
+
+## Rate-Matched Input and Output
+
+When the input rate and output rates are equal, one might think this is the
+ideal scenario. In reality, it is not. The rates could be matched, but a
+sustained increase (or several small increases) in the input log rate, could
+cause the core log buffer to approach 100% capacity. Since the output log rate
+is still only matched with the input log rate, the core log buffer capacity
+would not decrease from 100%, and it would remain saturated.
+
+Logging has a tendency to be bursty, so it is definitely preferable to
+operate in the _Input-limited Log Rate_ regime.
+
+## Output-Limited Log Rate
+
+If the rate of output processing is less than the rate of input processing,
+the core log buffer will approach 100% capacity and, eventually, stall the
+log processing thread.
+
+## Stalling the Log Processing Thread
+
+When any log backend is unable to process logs for whatever reason,
+the output rate approaches 0 `[msg/s]`. If application or
+driver code continue to submit logs, the core log buffer approaches 100%
+capacity. Once the core log buffer is full, the log processing thread is
+unable to allocate new log messages and it will be stalled.
+
+Stalling a real-time application produces unexpected behaviour, so it is
+advised to avoid this for any non-negligible amount of time.
+
+It is absolutely critical that the log backend is capable of operating
+correctly _even when the log processing thread is blocking_ in order to
+automatically recover from a stall.
+
+On a live system, it may be necessary to manually perform remediation of log
+backends that are unable to recover from stalling the log processing thread.
+Remediation could involve disabling the log backend and freeing any in-use
+buffers.
@@ -0,0 +1,28 @@
+CONFIG_ZTEST=y
+
+CONFIG_TEST_LOGGING_DEFAULTS=n
+CONFIG_LOG=y
+CONFIG_LOG_PRINTK=n
+CONFIG_LOG_BUFFER_SIZE=512
+CONFIG_ASSERT=y
+CONFIG_MAIN_STACK_SIZE=2048
+
+# Disable any logs that could interfere.
+CONFIG_KERNEL_LOG_LEVEL_OFF=y
+CONFIG_SOC_LOG_LEVEL_OFF=y
+CONFIG_ARCH_LOG_LEVEL_OFF=y
+CONFIG_LOG_FUNC_NAME_PREFIX_DBG=n
+CONFIG_LOG_PROCESS_THREAD=y
+
+# Disable all potential default backends
+CONFIG_LOG_BACKEND_UART=n
+CONFIG_LOG_BACKEND_NATIVE_POSIX=n
+CONFIG_LOG_BACKEND_RTT=n
+CONFIG_LOG_BACKEND_XTENSA_SIM=n
+
+# Enable blocking in thread
+CONFIG_LOG_MODE_DEFERRED=y
+CONFIG_LOG_BLOCK_IN_THREAD=y
+
+# Block in thread indefinitely
+CONFIG_LOG_BLOCK_IN_THREAD_TIMEOUT_MS=-1
@@ -0,0 +1,241 @@
+/*
+ * Copyright (c) 2022 Nordic Semiconductor ASA
+ * Copyright (c) 2023 Meta
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <string.h>
+
+#include <zephyr/kernel.h>
+#include <zephyr/logging/log.h>
+#include <zephyr/logging/log_ctrl.h>
+#include <zephyr/logging/log_backend.h>
+#include <zephyr/random/random.h>
+#include <zephyr/sys/util.h>
+#include <zephyr/ztest.h>
+
+/* maximum time to wait before aborting thread in case of 0 log messages / second */
+#define MAX_STALL_TIMEOUT_S 3
+/* maximum time (seconds) to wait for logging thread to finish */
+#define MAX_JOIN_TIMEOUT_S  1
+
+#define WILL_STALL (CONFIG_TEST_INPUT_LOG_RATE == 0 || CONFIG_TEST_OUTPUT_LOG_RATE == 0)
+
+#define MODULE_NAME     test
+
+LOG_MODULE_REGISTER(MODULE_NAME);
+
+struct mock_log_backend {
+	uint32_t dropped;
+	uint32_t handled;
+};
+
+static uint32_t end_ms;
+static uint32_t start_ms;
+static uint32_t test_source_id;
+static struct mock_log_backend mock_backend;
+
+static inline uint32_t then(void)
+{
+	return start_ms;
+}
+
+static inline uint32_t now(void)
+{
+	/* some platforms currently _not_ starting uptime at 0!! */
+	return k_uptime_get_32();
+}
+
+static inline uint32_t end(void)
+{
+	return end_ms;
+}
+
+static inline void create_start_end(void)
+{
+	start_ms = k_uptime_get_32();
+	end_ms = start_ms;
+	/* some "fuzz" in ms to account for odd variances */
+	end_ms += MAX_STALL_TIMEOUT_S * MSEC_PER_SEC;
+
+#if WILL_STALL
+	end_ms += MAX_STALL_TIMEOUT_S * MSEC_PER_SEC;
+#elif CONFIG_TEST_INPUT_LOG_RATE > 0 && CONFIG_TEST_INPUT_LOG_RATE <= CONFIG_TEST_OUTPUT_LOG_RATE
+	end_ms += MSEC_PER_SEC * DIV_ROUND_UP(CONFIG_TEST_NUM_LOGS, CONFIG_TEST_INPUT_LOG_RATE);
+#elif CONFIG_TEST_OUTPUT_LOG_RATE > 0 && CONFIG_TEST_INPUT_LOG_RATE > CONFIG_TEST_OUTPUT_LOG_RATE
+	end_ms += MSEC_PER_SEC * DIV_ROUND_UP(CONFIG_TEST_NUM_LOGS, CONFIG_TEST_OUTPUT_LOG_RATE);
+#else
+#error "Impossible scenario"
+#endif
+
+	TC_PRINT("Start time: %u ms\n", start_ms);
+	TC_PRINT("End   time: %u ms\n", end_ms);
+}
+
+static void handle_output(uint32_t i)
+{
+	while (true) {
+		if (i + 1 <= (CONFIG_TEST_OUTPUT_LOG_RATE * (now() - then())) / MSEC_PER_SEC) {
+			break;
+		}
+		k_msleep(1);
+	}
+
+	++mock_backend.handled;
+}
+
+static void handle_input(void)
+{
+	for (int i = 0; i < CONFIG_TEST_NUM_LOGS; i++) {
+		while (true) {
+			if (i + 1 <= CONFIG_TEST_INPUT_LOG_RATE * (now() - then()) / MSEC_PER_SEC) {
+				break;
+			}
+			zassert_true(now() <= end());
+			k_msleep(1);
+		}
+
+		LOG_INF("%u", i);
+	}
+}
+
+static void process(const struct log_backend *const backend, union log_msg_generic *msg)
+{
+	size_t len;
+	uint8_t *package = log_msg_get_package(&msg->log, &len);
+
+	package += 2 * sizeof(void *);
+
+	handle_output(*(uint32_t *)package);
+}
+
+static void mock_init(struct log_backend const *const backend)
+{
+}
+
+static void panic(struct log_backend const *const backend)
+{
+#if WILL_STALL
+	/* Don't panic! */
+	return;
+#endif
+
+	zassert_true(false);
+}
+
+static void dropped(const struct log_backend *const backend, uint32_t cnt)
+{
+	mock_backend.dropped += cnt;
+}
+
+static const struct log_backend_api log_blocking_api = {
+	.process = process,
+	.panic = panic,
+	.init = mock_init,
+	.dropped = dropped,
+};
+
+LOG_BACKEND_DEFINE(blocking_log_backend, log_blocking_api, true, NULL);
+
+BUILD_ASSERT(CONFIG_TEST_INPUT_LOG_RATE >= 0);
+BUILD_ASSERT(CONFIG_TEST_OUTPUT_LOG_RATE >= 0);
+
+static void print_input(void)
+{
+	TC_PRINT("CONFIG_TEST_NUM_LOGS: %d\n", CONFIG_TEST_NUM_LOGS);
+	TC_PRINT("CONFIG_TEST_INPUT_LOG_RATE: %d\n", CONFIG_TEST_INPUT_LOG_RATE);
+	TC_PRINT("CONFIG_TEST_OUTPUT_LOG_RATE: %d\n", CONFIG_TEST_OUTPUT_LOG_RATE);
+}
+
+static void print_output(void)
+{
+	TC_PRINT("Log backend dropped %u messages\n", mock_backend.dropped);
+	TC_PRINT("Log backend handled %u messages\n", mock_backend.handled);
+}
+
+static void test_blocking_thread_entry(void *p1, void *p2, void *p3)
+{
+	ARG_UNUSED(p1);
+	ARG_UNUSED(p2);
+	ARG_UNUSED(p3);
+
+	handle_input();
+}
+K_THREAD_DEFINE(test_blocking_thread, 4096, test_blocking_thread_entry, NULL, NULL, NULL,
+		K_HIGHEST_THREAD_PRIO, 0, UINT32_MAX);
+
+#if WILL_STALL
+ZTEST_EXPECT_FAIL(log_blocking, test_blocking);
+#endif
+ZTEST(log_blocking, test_blocking)
+{
+#if WILL_STALL
+	/*
+	 * This is a workaround for a possible bug in the testing subsys:
+	 * - comment-out ztest_test_fail() below
+	 * - run with:
+	 *   west build -p auto -b qemu_riscv64 -t run \
+	 *     -T tests/subsys/logging/log_blocking/logging.blocking.rate.stalled
+	 * - observe "Assertion failed at..."
+	 * - technically, testsuite should pass. Since ZTEST_EXPECT_FAIL() is set. Never gets there.
+	 * - run with:
+	 *   twister -i -p qemu_riscv64 -T tests/subsys/logging/log_blocking/
+	 * - observe "..FAILED : Timeout"
+	 * - possible conclusions:
+	 *   - test thread has not properly longjumped?
+	 *   - twister not detecting assertion failures?
+	 *   - twister expecting some other string and never sees it?
+	 */
+	ztest_test_fail();
+#endif
+
+	create_start_end();
+	k_thread_start(test_blocking_thread);
+	k_msleep(end() - now());
+
+#if WILL_STALL
+	k_thread_abort(test_blocking_thread);
+#endif
+	zassert_ok(k_thread_join(test_blocking_thread, K_SECONDS(MAX_JOIN_TIMEOUT_S)));
+
+	print_output();
+
+	zassert_equal(mock_backend.dropped, 0, "dropped %u / %u logs", mock_backend.dropped,
+		      CONFIG_TEST_NUM_LOGS);
+
+	zassert_equal(mock_backend.handled, CONFIG_TEST_NUM_LOGS, "handled %u / %u logs",
+		      mock_backend.handled, CONFIG_TEST_NUM_LOGS);
+}
+
+static void *setup(void)
+{
+	/*
+	 * This testsuite was added mainly to address a regression caused
+	 * by this subtle, but very different interpretation.
+	 */
+	__ASSERT(K_TIMEOUT_EQ(K_NO_WAIT, K_MSEC(-1)), "K_NO_WAIT should be equal to K_MSEC(-1)");
+	__ASSERT(!K_TIMEOUT_EQ(K_FOREVER, K_MSEC(-1)),
+		 "K_FOREVER should not be equal to K_MSEC(-1)");
+
+	test_source_id = log_source_id_get(STRINGIFY(MODULE_NAME));
+
+	print_input();
+
+	return NULL;
+}
+
+static void before(void *arg)
+{
+	memset(&mock_backend, 0, sizeof(mock_backend));
+}
+
+static void teardown(void *data)
+{
+	ARG_UNUSED(data);
+	log_backend_disable(&blocking_log_backend);
+}
+
+ZTEST_SUITE(log_blocking, NULL, setup, before, NULL, teardown);