diff --git a/tests/subsys/logging/log_blocking/CMakeLists.txt b/tests/subsys/logging/log_blocking/CMakeLists.txt new file mode 100644 index 000000000000..ca0f6392d692 --- /dev/null +++ b/tests/subsys/logging/log_blocking/CMakeLists.txt @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: Apache-2.0 + +cmake_minimum_required(VERSION 3.20.0) +find_package(Zephyr REQUIRED HINTS $ENV{ZEPHYR_BASE}) +project(log_blocking) + +target_sources(app PRIVATE src/main.c) diff --git a/tests/subsys/logging/log_blocking/Kconfig b/tests/subsys/logging/log_blocking/Kconfig new file mode 100644 index 000000000000..ef650c97550b --- /dev/null +++ b/tests/subsys/logging/log_blocking/Kconfig @@ -0,0 +1,22 @@ +# Copyright (c) 2023 Meta +# SPDX-License-Identifier: Apache-2.0 + +source "Kconfig.zephyr" + +config TEST_NUM_LOGS + int "Number of log messages per test" + default 5000 + +config TEST_INPUT_LOG_RATE + int "Maximal input log rate for the test (in msg/s)" + default 1000 + help + Specify the maximum rate at which messages will be logged from the + application. + +config TEST_OUTPUT_LOG_RATE + int "Maximal output log rate for the test (in msg/s)" + default 1000 + help + Specify the maximum rate at which log messages will be handled by + the log backend. diff --git a/tests/subsys/logging/log_blocking/REAME.md b/tests/subsys/logging/log_blocking/REAME.md new file mode 100644 index 000000000000..05c53174a042 --- /dev/null +++ b/tests/subsys/logging/log_blocking/REAME.md @@ -0,0 +1,97 @@ +# Blocking the Log Processing Thread + +## Overview + +When the core log buffer becomes full, the logging subsystem can be configured to + +* Drop older log messages with `CONFIG_LOG_MODE_OVERFLOW=y` (**default**) +* Drop newer log messages with `CONFIG_LOG_MODE_OVERFLOW=n`, or +* Drop no log messages at all with `CONFIG_LOG_BLOCK_IN_THREAD=y`, `CONFIG_LOG_BLOCK_IN_THREAD_TIMEOUT_MS=-1`. + +In the last configuration, the log processing thread will block until space +becomes available again in the core log buffer. + +> Warning ⚠️: Blocking the log processing thread is generally not recommended +> and should only be attempted in advanced use cases. + +## Logging and Flow Rates + +There are roughly 4 scenarios we care about testing with +`CONFIG_LOG_BLOCK_IN_THREAD`, and they can all be characterized by comparing +log message flow rates. Typically, one would describe log message flow rates +with units such as `[msg/s]`. However, in the table below, we are mainly +concerned with the ratio of the Output Rate to the Input Rate, and in that +case, the units themselves cancel-out. In the table we assume there exists an +`N` such that `N > 1`. + +| Name | Input Rate | Output Rate | Rate | +|----------------|------------|-------------|------| +| Input-Limited | 1 | N | 1 | +| Matched | 1 | 1 | 1 | +| Output-Limited | 1 | 1/N | 1/N | +| Stalled | 0 | 0 | 0 | + +The resultant _Rate_ is always `Rate = MIN(Input Rate, Output Rate)`. + +Rate-limiting of any kind can be described approximately as _back pressure_. +Back pressure is fine in short bursts but it can cause delays in application +and driver code if the pressure is not relieved promptly. + +## Physical Sources of Backpressure + +Many log backends, such as UARTs, have a built-in hardware FIFO that +inherently provides back-pressure; output log processing is rate-limited +based on the baud rate of the UART. Other backends, such as UDP sockets or +DMA, can provide significantly higher throughput but are still inherently +rate-limited by the physical layer over which they operate, be it Gigabit +Ethernet or PCI express. + +Even a trivial _message source_ or _message sink_ is still rate-limited by +memory or the CPU. From that perspective, we can infer that there is a finite +limit in the log processing rate for practical systems. That may be +comforting to know, even if it is something astronomical like 1G `[msg/s]`. + +## Input-Limited Log Rate + +The ideal scenario is when the output "bandwidth" exceeds the input rate. If +so configured, we minimize the liklihood that the log processing thread will +stall. We can also be sure that the output will be able to relieve +backpressure (i.e. the core log buffer usage will tend to zero over time). + +## Rate-Matched Input and Output + +When the input rate and output rates are equal, one might think this is the +ideal scenario. In reality, it is not. The rates could be matched, but a +sustained increase (or several small increases) in the input log rate, could +cause the core log buffer to approach 100% capacity. Since the output log rate +is still only matched with the input log rate, the core log buffer capacity +would not decrease from 100%, and it would remain saturated. + +Logging has a tendency to be bursty, so it is definitely preferable to +operate in the _Input-limited Log Rate_ regime. + +## Output-Limited Log Rate + +If the rate of output processing is less than the rate of input processing, +the core log buffer will approach 100% capacity and, eventually, stall the +log processing thread. + +## Stalling the Log Processing Thread + +When any log backend is unable to process logs for whatever reason, +the output rate approaches 0 `[msg/s]`. If application or +driver code continue to submit logs, the core log buffer approaches 100% +capacity. Once the core log buffer is full, the log processing thread is +unable to allocate new log messages and it will be stalled. + +Stalling a real-time application produces unexpected behaviour, so it is +advised to avoid this for any non-negligible amount of time. + +It is absolutely critical that the log backend is capable of operating +correctly _even when the log processing thread is blocking_ in order to +automatically recover from a stall. + +On a live system, it may be necessary to manually perform remediation of log +backends that are unable to recover from stalling the log processing thread. +Remediation could involve disabling the log backend and freeing any in-use +buffers. diff --git a/tests/subsys/logging/log_blocking/prj.conf b/tests/subsys/logging/log_blocking/prj.conf new file mode 100644 index 000000000000..7dd3a6967685 --- /dev/null +++ b/tests/subsys/logging/log_blocking/prj.conf @@ -0,0 +1,28 @@ +CONFIG_ZTEST=y + +CONFIG_TEST_LOGGING_DEFAULTS=n +CONFIG_LOG=y +CONFIG_LOG_PRINTK=n +CONFIG_LOG_BUFFER_SIZE=512 +CONFIG_ASSERT=y +CONFIG_MAIN_STACK_SIZE=2048 + +# Disable any logs that could interfere. +CONFIG_KERNEL_LOG_LEVEL_OFF=y +CONFIG_SOC_LOG_LEVEL_OFF=y +CONFIG_ARCH_LOG_LEVEL_OFF=y +CONFIG_LOG_FUNC_NAME_PREFIX_DBG=n +CONFIG_LOG_PROCESS_THREAD=y + +# Disable all potential default backends +CONFIG_LOG_BACKEND_UART=n +CONFIG_LOG_BACKEND_NATIVE_POSIX=n +CONFIG_LOG_BACKEND_RTT=n +CONFIG_LOG_BACKEND_XTENSA_SIM=n + +# Enable blocking in thread +CONFIG_LOG_MODE_DEFERRED=y +CONFIG_LOG_BLOCK_IN_THREAD=y + +# Block in thread indefinitely +CONFIG_LOG_BLOCK_IN_THREAD_TIMEOUT_MS=-1 diff --git a/tests/subsys/logging/log_blocking/src/main.c b/tests/subsys/logging/log_blocking/src/main.c new file mode 100644 index 000000000000..8ce30a5c5f5d --- /dev/null +++ b/tests/subsys/logging/log_blocking/src/main.c @@ -0,0 +1,241 @@ +/* + * Copyright (c) 2022 Nordic Semiconductor ASA + * Copyright (c) 2023 Meta + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +/* maximum time to wait before aborting thread in case of 0 log messages / second */ +#define MAX_STALL_TIMEOUT_S 3 +/* maximum time (seconds) to wait for logging thread to finish */ +#define MAX_JOIN_TIMEOUT_S 1 + +#define WILL_STALL (CONFIG_TEST_INPUT_LOG_RATE == 0 || CONFIG_TEST_OUTPUT_LOG_RATE == 0) + +#define MODULE_NAME test + +LOG_MODULE_REGISTER(MODULE_NAME); + +struct mock_log_backend { + uint32_t dropped; + uint32_t handled; +}; + +static uint32_t end_ms; +static uint32_t start_ms; +static uint32_t test_source_id; +static struct mock_log_backend mock_backend; + +static inline uint32_t then(void) +{ + return start_ms; +} + +static inline uint32_t now(void) +{ + /* some platforms currently _not_ starting uptime at 0!! */ + return k_uptime_get_32(); +} + +static inline uint32_t end(void) +{ + return end_ms; +} + +static inline void create_start_end(void) +{ + start_ms = k_uptime_get_32(); + end_ms = start_ms; + /* some "fuzz" in ms to account for odd variances */ + end_ms += MAX_STALL_TIMEOUT_S * MSEC_PER_SEC; + +#if WILL_STALL + end_ms += MAX_STALL_TIMEOUT_S * MSEC_PER_SEC; +#elif CONFIG_TEST_INPUT_LOG_RATE > 0 && CONFIG_TEST_INPUT_LOG_RATE <= CONFIG_TEST_OUTPUT_LOG_RATE + end_ms += MSEC_PER_SEC * DIV_ROUND_UP(CONFIG_TEST_NUM_LOGS, CONFIG_TEST_INPUT_LOG_RATE); +#elif CONFIG_TEST_OUTPUT_LOG_RATE > 0 && CONFIG_TEST_INPUT_LOG_RATE > CONFIG_TEST_OUTPUT_LOG_RATE + end_ms += MSEC_PER_SEC * DIV_ROUND_UP(CONFIG_TEST_NUM_LOGS, CONFIG_TEST_OUTPUT_LOG_RATE); +#else +#error "Impossible scenario" +#endif + + TC_PRINT("Start time: %u ms\n", start_ms); + TC_PRINT("End time: %u ms\n", end_ms); +} + +static void handle_output(uint32_t i) +{ + while (true) { + if (i + 1 <= (CONFIG_TEST_OUTPUT_LOG_RATE * (now() - then())) / MSEC_PER_SEC) { + break; + } + k_msleep(1); + } + + ++mock_backend.handled; +} + +static void handle_input(void) +{ + for (int i = 0; i < CONFIG_TEST_NUM_LOGS; i++) { + while (true) { + if (i + 1 <= CONFIG_TEST_INPUT_LOG_RATE * (now() - then()) / MSEC_PER_SEC) { + break; + } + zassert_true(now() <= end()); + k_msleep(1); + } + + LOG_INF("%u", i); + } +} + +static void process(const struct log_backend *const backend, union log_msg_generic *msg) +{ + size_t len; + uint8_t *package = log_msg_get_package(&msg->log, &len); + + package += 2 * sizeof(void *); + + handle_output(*(uint32_t *)package); +} + +static void mock_init(struct log_backend const *const backend) +{ +} + +static void panic(struct log_backend const *const backend) +{ +#if WILL_STALL + /* Don't panic! */ + return; +#endif + + zassert_true(false); +} + +static void dropped(const struct log_backend *const backend, uint32_t cnt) +{ + mock_backend.dropped += cnt; +} + +static const struct log_backend_api log_blocking_api = { + .process = process, + .panic = panic, + .init = mock_init, + .dropped = dropped, +}; + +LOG_BACKEND_DEFINE(blocking_log_backend, log_blocking_api, true, NULL); + +BUILD_ASSERT(CONFIG_TEST_INPUT_LOG_RATE >= 0); +BUILD_ASSERT(CONFIG_TEST_OUTPUT_LOG_RATE >= 0); + +static void print_input(void) +{ + TC_PRINT("CONFIG_TEST_NUM_LOGS: %d\n", CONFIG_TEST_NUM_LOGS); + TC_PRINT("CONFIG_TEST_INPUT_LOG_RATE: %d\n", CONFIG_TEST_INPUT_LOG_RATE); + TC_PRINT("CONFIG_TEST_OUTPUT_LOG_RATE: %d\n", CONFIG_TEST_OUTPUT_LOG_RATE); +} + +static void print_output(void) +{ + TC_PRINT("Log backend dropped %u messages\n", mock_backend.dropped); + TC_PRINT("Log backend handled %u messages\n", mock_backend.handled); +} + +static void test_blocking_thread_entry(void *p1, void *p2, void *p3) +{ + ARG_UNUSED(p1); + ARG_UNUSED(p2); + ARG_UNUSED(p3); + + handle_input(); +} +K_THREAD_DEFINE(test_blocking_thread, 4096, test_blocking_thread_entry, NULL, NULL, NULL, + K_HIGHEST_THREAD_PRIO, 0, UINT32_MAX); + +#if WILL_STALL +ZTEST_EXPECT_FAIL(log_blocking, test_blocking); +#endif +ZTEST(log_blocking, test_blocking) +{ +#if WILL_STALL + /* + * This is a workaround for a possible bug in the testing subsys: + * - comment-out ztest_test_fail() below + * - run with: + * west build -p auto -b qemu_riscv64 -t run \ + * -T tests/subsys/logging/log_blocking/logging.blocking.rate.stalled + * - observe "Assertion failed at..." + * - technically, testsuite should pass. Since ZTEST_EXPECT_FAIL() is set. Never gets there. + * - run with: + * twister -i -p qemu_riscv64 -T tests/subsys/logging/log_blocking/ + * - observe "..FAILED : Timeout" + * - possible conclusions: + * - test thread has not properly longjumped? + * - twister not detecting assertion failures? + * - twister expecting some other string and never sees it? + */ + ztest_test_fail(); +#endif + + create_start_end(); + k_thread_start(test_blocking_thread); + k_msleep(end() - now()); + +#if WILL_STALL + k_thread_abort(test_blocking_thread); +#endif + zassert_ok(k_thread_join(test_blocking_thread, K_SECONDS(MAX_JOIN_TIMEOUT_S))); + + print_output(); + + zassert_equal(mock_backend.dropped, 0, "dropped %u / %u logs", mock_backend.dropped, + CONFIG_TEST_NUM_LOGS); + + zassert_equal(mock_backend.handled, CONFIG_TEST_NUM_LOGS, "handled %u / %u logs", + mock_backend.handled, CONFIG_TEST_NUM_LOGS); +} + +static void *setup(void) +{ + /* + * This testsuite was added mainly to address a regression caused + * by this subtle, but very different interpretation. + */ + __ASSERT(K_TIMEOUT_EQ(K_NO_WAIT, K_MSEC(-1)), "K_NO_WAIT should be equal to K_MSEC(-1)"); + __ASSERT(!K_TIMEOUT_EQ(K_FOREVER, K_MSEC(-1)), + "K_FOREVER should not be equal to K_MSEC(-1)"); + + test_source_id = log_source_id_get(STRINGIFY(MODULE_NAME)); + + print_input(); + + return NULL; +} + +static void before(void *arg) +{ + memset(&mock_backend, 0, sizeof(mock_backend)); +} + +static void teardown(void *data) +{ + ARG_UNUSED(data); + log_backend_disable(&blocking_log_backend); +} + +ZTEST_SUITE(log_blocking, NULL, setup, before, NULL, teardown); diff --git a/tests/subsys/logging/log_blocking/testcase.yaml b/tests/subsys/logging/log_blocking/testcase.yaml new file mode 100644 index 000000000000..7d2880115bb8 --- /dev/null +++ b/tests/subsys/logging/log_blocking/testcase.yaml @@ -0,0 +1,28 @@ +common: + filter: CONFIG_QEMU_TARGET and not CONFIG_SMP + tags: + - log_api + - logging + integration_platforms: + - qemu_x86 +tests: + logging.blocking.rate.input_limited: + extra_configs: + - CONFIG_TEST_INPUT_LOG_RATE=500 + - CONFIG_TEST_OUTPUT_LOG_RATE=1000 + logging.blocking.rate.matched: + extra_configs: + - CONFIG_TEST_INPUT_LOG_RATE=1000 + - CONFIG_TEST_OUTPUT_LOG_RATE=1000 + logging.blocking.rate.output_limited: + extra_configs: + - CONFIG_TEST_INPUT_LOG_RATE=1000 + - CONFIG_TEST_OUTPUT_LOG_RATE=500 + logging.blocking.rate.stalled: + platform_exclude: + # twister fails with "unexpected eof" even though the test passes + - qemu_cortex_r5 + timeout: 5 + extra_configs: + - CONFIG_TEST_INPUT_LOG_RATE=1000 + - CONFIG_TEST_OUTPUT_LOG_RATE=0