diff --git a/docs/index.h b/docs/index.h index ded921d75..b760936c8 100644 --- a/docs/index.h +++ b/docs/index.h @@ -46,6 +46,7 @@ * \cond hardware_uart \defgroup hardware_uart hardware_uart \endcond * \cond hardware_vreg \defgroup hardware_vreg hardware_vreg \endcond * \cond hardware_watchdog \defgroup hardware_watchdog hardware_watchdog \endcond + * \cond hardware_xip_cache \defgroup hardware_xip_cache hardware_xip_cache \endcond * \cond hardware_xosc \defgroup hardware_xosc hardware_xosc \endcond * \cond hardware_powman hardware_powman * \cond hardware_hazard3 hardware_hazard3 diff --git a/src/cmake/rp2_common.cmake b/src/cmake/rp2_common.cmake index 2da2af6c1..726bab933 100644 --- a/src/cmake/rp2_common.cmake +++ b/src/cmake/rp2_common.cmake @@ -60,6 +60,7 @@ pico_add_subdirectory(rp2_common/hardware_timer) pico_add_subdirectory(rp2_common/hardware_uart) pico_add_subdirectory(rp2_common/hardware_vreg) pico_add_subdirectory(rp2_common/hardware_watchdog) +pico_add_subdirectory(rp2_common/hardware_xip_cache) pico_add_subdirectory(rp2_common/hardware_xosc) if (PICO_RP2350 OR PICO_COMBINED_DOCS) diff --git a/src/rp2_common/hardware_flash/CMakeLists.txt b/src/rp2_common/hardware_flash/CMakeLists.txt index b0538ac1c..0139b9e4e 100644 --- a/src/rp2_common/hardware_flash/CMakeLists.txt +++ b/src/rp2_common/hardware_flash/CMakeLists.txt @@ -1,2 +1,3 @@ pico_simple_hardware_target(flash) pico_mirrored_target_link_libraries(hardware_flash INTERFACE pico_bootrom) +pico_mirrored_target_link_libraries(hardware_flash INTERFACE hardware_xip_cache) diff --git a/src/rp2_common/hardware_flash/flash.c b/src/rp2_common/hardware_flash/flash.c index cd2eafe4f..8ae397c96 100644 --- a/src/rp2_common/hardware_flash/flash.c +++ b/src/rp2_common/hardware_flash/flash.c @@ -13,6 +13,7 @@ #else #include "hardware/structs/qmi.h" #endif +#include "hardware/xip_cache.h" #define FLASH_BLOCK_ERASE_CMD 0xd8 @@ -84,6 +85,8 @@ void __no_inline_not_in_flash_func(flash_range_erase)(uint32_t flash_offs, size_ rom_flash_flush_cache_fn flash_flush_cache_func = (rom_flash_flush_cache_fn)rom_func_lookup_inline(ROM_FUNC_FLASH_FLUSH_CACHE); assert(connect_internal_flash_func && flash_exit_xip_func && flash_range_erase_func && flash_flush_cache_func); flash_init_boot2_copyout(); + // Commit any pending writes to external RAM, to avoid losing them in the subsequent flush: + xip_cache_clean_all(); // No flash accesses after this point __compiler_memory_barrier(); @@ -112,6 +115,7 @@ void __no_inline_not_in_flash_func(flash_range_program)(uint32_t flash_offs, con rom_flash_flush_cache_fn flash_flush_cache_func = (rom_flash_flush_cache_fn)rom_func_lookup_inline(ROM_FUNC_FLASH_FLUSH_CACHE); assert(connect_internal_flash_func && flash_exit_xip_func && flash_range_program_func && flash_flush_cache_func); flash_init_boot2_copyout(); + xip_cache_clean_all(); __compiler_memory_barrier(); @@ -152,6 +156,8 @@ void __no_inline_not_in_flash_func(flash_do_cmd)(const uint8_t *txbuf, uint8_t * rom_flash_flush_cache_fn flash_flush_cache_func = (rom_flash_flush_cache_fn)rom_func_lookup_inline(ROM_FUNC_FLASH_FLUSH_CACHE); assert(connect_internal_flash_func && flash_exit_xip_func && flash_flush_cache_func); flash_init_boot2_copyout(); + xip_cache_clean_all(); + __compiler_memory_barrier(); connect_internal_flash_func(); flash_exit_xip_func(); diff --git a/src/rp2_common/hardware_xip_cache/CMakeLists.txt b/src/rp2_common/hardware_xip_cache/CMakeLists.txt new file mode 100644 index 000000000..7a94b0c05 --- /dev/null +++ b/src/rp2_common/hardware_xip_cache/CMakeLists.txt @@ -0,0 +1,3 @@ +pico_simple_hardware_target(xip_cache) + +pico_mirrored_target_link_libraries(hardware_xip_cache INTERFACE hardware_sync) diff --git a/src/rp2_common/hardware_xip_cache/include/hardware/xip_cache.h b/src/rp2_common/hardware_xip_cache/include/hardware/xip_cache.h new file mode 100644 index 000000000..286db8bdc --- /dev/null +++ b/src/rp2_common/hardware_xip_cache/include/hardware/xip_cache.h @@ -0,0 +1,210 @@ +/* + * Copyright (c) 2024 Raspberry Pi Ltd. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef _HARDWARE_XIP_CACHE_H +#define _HARDWARE_XIP_CACHE_H + +#include "pico.h" +#include "hardware/regs/addressmap.h" + +/** \file xip_cache.h + * \defgroup hardware_xip_cache hardware_xip_cache + * + * \brief Low-level cache maintenance operations for the XIP cache + * + * These functions apply some maintenance operation to either the entire cache contents, or a range + * of offsets within the downstream address space. Offsets start from 0 (indicating the first byte + * of flash), so pointers should have XIP_BASE subtracted before passing into one of these + * functions. + * + * \if rp2040-specific + * The only valid cache maintenance operation on RP2040 is "invalidate", which tells the cache to + * forget everything it knows about some address. This is necessary after a programming operation, + * because the cache does not automatically know about any serial programming operations performed + * on the external flash device, and could return stale data. + * \endif + * + * \if rp2350-specific + * On RP2350, the three types of operation are: + * + * * Invalidate: tell the cache to forget everything it knows about some address. The next access to + * that address will fetch from downstream memory. + * + * * Clean: if the addressed cache line contains data not yet written to external memory, then write + * that data out now, and mark the line as "clean" (i.e. not containing uncommitted write data) + * + * * Pin: mark an address as always being resident in the cache. This persists until the line is + * invalidated, and can be used to allocate part of the cache for cache-as-SRAM use. + * + * When using both external flash and external RAM (e.g. PSRAM), a simple way to maintain coherence + * over flash programming operations is to: + * + * 1. Clean the entire cache (e.g. using xip_cache_clean_all()) + * + * 2. Erase + program the flash using serial SPI commands + * + * 3. Invalidate ("flush") the entire cache (e.g. using xip_cache_invalidate_all()) + * + * The invalidate ensures the programming is visible to subsequent reads. The clean ensures that the + * invalidate does not discard any cached PSRAM write data. + * + * \endif + * + */ + +// PICO_CONFIG: PARAM_ASSERTIONS_ENABLED_HARDWARE_XIP_CACHE, Enable/disable assertions in the hardware_xip_cache module, type=bool, default=0, group=hardware_xip_cache +#ifndef PARAM_ASSERTIONS_ENABLED_HARDWARE_XIP_CACHE +#define PARAM_ASSERTIONS_ENABLED_HARDWARE_XIP_CACHE 0 +#endif + +#define XIP_CACHE_LINE_SIZE _u(8) + +#define XIP_CACHE_SIZE (_u(16) * _u(1024)) + +#if PICO_RP2040 +#define XIP_CACHE_ADDRESS_SPACE_SIZE (_u(16) * _u(1024) * _u(1024)) +#else +#define XIP_CACHE_ADDRESS_SPACE_SIZE (XIP_END - XIP_BASE) +#endif + +// A read-only cache never requires cleaning (you can still call the functions, they are just no-ops) +#if PICO_RP2040 +#define XIP_CACHE_IS_READ_ONLY 1 +#else +#define XIP_CACHE_IS_READ_ONLY 0 +#endif + +#ifndef __ASSEMBLER__ + +#ifdef __cplusplus +extern "C" { +#endif + +/*! \brief Invalidate the cache for the entire XIP address space + * \ingroup hardware_xip_cache + * + * Invalidation ensures that subsequent reads will fetch data from the downstream memory, rather + * than using (potentially stale) cached data. + * + * This function is faster than calling xip_cache_invalidate_range() for the entire address space, + * because it iterates over cachelines instead of addresses. + * + * @note Any pending write data held in the cache is lost: you can force the cache to commit these + * writes first, by calling xip_cache_clean_all() + * + * @note Unlike flash_flush_cache(), this function affects *only* the cache line state. + * flash_flush_cache() calls a ROM API which can have other effects on some platforms, like + * cleaning up the bootrom's QSPI GPIO setup on RP2040. Prefer this function for general cache + * maintenance use, and prefer flash_flush_cache in sequences of ROM flash API calls. + */ +void xip_cache_invalidate_all(void); + +/*! \brief Invalidate a range of offsets within the XIP address space + * \ingroup hardware_xip_cache + * + * \param start_offset The first offset to be invalidated. Offset 0 means the first byte of XIP + * memory (e.g. flash). Pointers must have XIP_BASE subtracted before passing into this function. + * Must be 4-byte-aligned on RP2040. Must be a aligned to the start of a cache line + * (XIP_CACHE_LINE_SIZE) on other platforms. + * + * \param size_bytes The number of bytes to invalidate. Must be a multiple of 4 bytes on RP2040. + * Must be a multiple of XIP_CACHE_LINE_SIZE on other platforms. + * + * Invalidation ensures that subsequent reads will fetch data from the downstream memory, rather + * than using (potentially stale) cached data. + + * @note Any pending write data held in the cache is lost: you can force the cache to commit these + * writes first, by calling xip_cache_clean_range() with the same parameters. Generally this is + * not necessary because invalidation is used with flash (write-behind via programming), and + * cleaning is used with PSRAM (writing through the cache). + * + */ +void xip_cache_invalidate_range(uintptr_t start_offset, uintptr_t size_bytes); + +#if !XIP_CACHE_IS_READ_ONLY + +/*! \brief Clean the cache for the entire XIP address space + * \ingroup hardware_xip_cache + * + * This causes the cache to write out all pending write data to the downstream memory. For example, + * when suspending the system with state retained in external PSRAM, this ensures all data has made + * it out to external PSRAM before powering down. + * + * This function is faster than calling xip_cache_clean_range() for the entire address space, + * because it iterates over cachelines instead of addresses. + * + * \if rp2040-specific + * On RP2040 this is a no-op, as the XIP cache is read-only. This is indicated by the + * XIP_CACHE_IS_READ_ONLY macro. + * \endif + * + * \if rp2350-specific + * On RP2350, due to the workaround applied for RP2350-E11, this function also effectively + * invalidates all cache lines after cleaning them. The next access to each line will miss. Avoid + * this by calling xip_cache_clean_range() which does not suffer this issue. + * \endif + * + */ +void xip_cache_clean_all(void); + +/*! \brief Clean a range of offsets within the XIP address space + * \ingroup hardware_xip_cache + * + * This causes the cache to write out pending write data at these offsets to the downstream memory. + * + * \if rp2040-specific + * On RP2040 this is a no-op, as the XIP cache is read-only. This is indicated by the + * XIP_CACHE_IS_READ_ONLY macro. + * \endif + * + * \param start_offset The first offset to be invalidated. Offset 0 means the first byte of XIP + * memory (e.g. flash). Pointers must have XIP_BASE subtracted before passing into this function. + * Must be aligned to the start of a cache line (XIP_CACHE_LINE_SIZE). + * + * \param size_bytes The number of bytes to clean. Must be a multiple of XIP_CACHE_LINE_SIZE. + */ +void xip_cache_clean_range(uintptr_t start_offset, uintptr_t size_bytes); + +#else +// Stub these out inline to avoid generating a call to an empty function when they are no-ops +static inline void xip_cache_clean_all(void) {} +static inline void xip_cache_clean_range(uintptr_t start_offset, uintptr_t size_bytes) { + (void)start_offset; + (void)size_bytes; +} +#endif + +#if !PICO_RP2040 + +/*! \brief Pin a range of offsets within the XIP address space + * \ingroup hardware_xip_cache + * + * Pinning a line at an address allocates the line exclusively for use at that address. This means + * that all subsequent accesses to that address will hit the cache, and will not go to downstream + * memory. This persists until one of two things happens: + * + * * The line is invalidated, e.g. via xip_cache_invalidate_all() + * + * * The same line is pinned at a different address (note lines are selected by address modulo + * XIP_CACHE_SIZE) + * + * \param start_offset The first offset to be pinnned. Offset 0 means the first byte of XIP + * memory (e.g. flash). Pointers must have XIP_BASE subtracted before passing into this function. + * Must be aligned to the start of a cache line (XIP_CACHE_LINE_SIZE). + * + * \param size_bytes The number of bytes to pin. Must be a multiple of XIP_CACHE_LINE_SIZE. + * + */ +void xip_cache_pin_range(uintptr_t start_offset, uintptr_t size_bytes); +#endif + +#ifdef __cplusplus +} +#endif + +#endif // !__ASSEMBLER__ + +#endif // !_HARDWARE_XIP_CACHE_H diff --git a/src/rp2_common/hardware_xip_cache/xip_cache.c b/src/rp2_common/hardware_xip_cache/xip_cache.c new file mode 100644 index 000000000..c72b7de7d --- /dev/null +++ b/src/rp2_common/hardware_xip_cache/xip_cache.c @@ -0,0 +1,109 @@ +#include "hardware/xip_cache.h" +#include "hardware/structs/xip.h" +// For barrier macros: +#include "hardware/sync.h" + +// Implementation-private constants (exporting these would create a compatibility headache as they +// don't exist on all platforms; all of these operations are exposed through APIs anyways) + +#if !PICO_RP2040 +typedef enum { + XIP_CACHE_INVALIDATE_BY_SET_WAY = 0, + XIP_CACHE_CLEAN_BY_SET_WAY = 1, + XIP_CACHE_INVALIDATE_BY_ADDRESS = 2, + XIP_CACHE_CLEAN_BY_ADDRESS = 3, + XIP_CACHE_PIN_AT_ADDRESS = 7, + XIP_CACHE_OP_MAX = 7 +} cache_op_t; +#endif + +// Used to ensure subsequent accesses observe the new state of the maintained cache lines +#define __post_maintenance_barrier() do {__dsb(); __isb();} while (0) + +// All functions in this file are marked non-flash, even though they themselves may be executed +// safely from flash, because they are likely to be called during a flash programming operation +// (which makes flash execution momentarily unsafe) + +__always_inline static void check_xip_offset_range(uintptr_t start_offset, uintptr_t size_bytes) { + // We use offsets, not addresses, for consistency with the flash API. This means the range of + // valid inputs starts at 0. + (void)start_offset; + (void)size_bytes; + valid_params_if(HARDWARE_XIP_CACHE, start_offset <= XIP_CACHE_ADDRESS_SPACE_SIZE); + valid_params_if(HARDWARE_XIP_CACHE, start_offset + size_bytes <= XIP_CACHE_ADDRESS_SPACE_SIZE); + // Check for unsigned wrapping too: + valid_params_if(HARDWARE_XIP_CACHE, start_offset + size_bytes >= start_offset); +} + +#if !PICO_RP2040 +// Generic code for RP2350-style caches: apply a maintenance operation to a range of offsets +static void __no_inline_not_in_flash_func(xip_cache_maintain)(uintptr_t start_offset, uintptr_t size_bytes, cache_op_t op) { + check_xip_offset_range(start_offset, size_bytes); + valid_params_if(HARDWARE_XIP_CACHE, (start_offset & (XIP_CACHE_LINE_SIZE - 1u)) == 0); + valid_params_if(HARDWARE_XIP_CACHE, (size_bytes & (XIP_CACHE_LINE_SIZE - 1u)) == 0); + valid_params_if(HARDWARE_XIP_CACHE, (uint)op <= (uint)XIP_CACHE_OP_MAX); + + uintptr_t end = start_offset + size_bytes; + for (uintptr_t offset = start_offset; offset < end; offset += XIP_CACHE_LINE_SIZE) { + *(io_wo_8 *) (XIP_MAINTENANCE_BASE + offset + (uintptr_t)op) = 0; + } + __post_maintenance_barrier(); +} +#endif + +void __no_inline_not_in_flash_func(xip_cache_invalidate_all)(void) { +#if PICO_RP2040 + xip_ctrl_hw->flush = 1; + // Read back to wait for completion + (void)xip_ctrl_hw->flush; + __post_maintenance_barrier(); +#else + xip_cache_maintain(XIP_CACHE_ADDRESS_SPACE_SIZE - XIP_CACHE_SIZE, XIP_CACHE_SIZE, XIP_CACHE_INVALIDATE_BY_SET_WAY); +#endif +} + +void __no_inline_not_in_flash_func(xip_cache_invalidate_range)(uintptr_t start_offset, uintptr_t size_bytes) { +#if PICO_RP2040 + // Accsses are at intervals of one half cache line (so 4 bytes) because RP2040's cache has two + // valid flags per cache line, and we need to clear both. + check_xip_offset_range(start_offset, size_bytes); + valid_params_if(HARDWARE_XIP_CACHE, (start_offset & 3u) == 0); + valid_params_if(HARDWARE_XIP_CACHE, (size_bytes & 3u) == 0); + + uintptr_t end = start_offset + size_bytes; + // On RP2040 you can invalidate a sector (half-line) by writing to its normal cached+allocating address + for (uintptr_t offset = start_offset; offset < end; offset += 4u) { + *(io_wo_32 *)(offset + XIP_BASE) = 0; + } + __post_maintenance_barrier(); + +#else + + xip_cache_maintain(start_offset, size_bytes, XIP_CACHE_INVALIDATE_BY_ADDRESS); + +#endif +} + +#if !XIP_CACHE_IS_READ_ONLY +void __no_inline_not_in_flash_func(xip_cache_clean_all)(void) { + // Use addresses outside of the downstream QMI address range to work around RP2350-E11; this + // effectively performs a clean+invalidate (except being a no-op on pinned lines) due to the + // erroneous update of the tag. Consequently you will take a miss on the next access to the + // cleaned address. + xip_cache_maintain(XIP_END - XIP_BASE - XIP_CACHE_SIZE, XIP_CACHE_SIZE, XIP_CACHE_CLEAN_BY_SET_WAY); +} +#endif + +#if !XIP_CACHE_IS_READ_ONLY +void __no_inline_not_in_flash_func(xip_cache_clean_range)(uintptr_t start_offset, uintptr_t size_bytes) { + xip_cache_maintain(start_offset, size_bytes, XIP_CACHE_CLEAN_BY_ADDRESS); +} +#endif + +#if !PICO_RP2040 +void __no_inline_not_in_flash_func(xip_cache_pin_range)(uintptr_t start_offset, uintptr_t size_bytes) { + valid_params_if(HARDWARE_XIP_CACHE, size_bytes <= XIP_CACHE_SIZE); + xip_cache_maintain(start_offset, size_bytes, XIP_CACHE_PIN_AT_ADDRESS); +} +#endif + diff --git a/test/kitchen_sink/CMakeLists.txt b/test/kitchen_sink/CMakeLists.txt index 5395129d6..aaa3d31bb 100644 --- a/test/kitchen_sink/CMakeLists.txt +++ b/test/kitchen_sink/CMakeLists.txt @@ -24,6 +24,7 @@ set(KITCHEN_SINK_LIBS hardware_uart hardware_vreg hardware_watchdog + hardware_xip_cache hardware_xosc pico_aon_timer pico_bit_ops