diff --git a/doc/en/build.md b/doc/en/build.md index 2a4c1b5..40e63e1 100644 --- a/doc/en/build.md +++ b/doc/en/build.md @@ -16,3 +16,10 @@ Run `build.sh` from the project root directory. For Windows, run `build.ps1` ins ## Install the Library Run `cmake --install` in the `build/` directory. + +## Use the Library in `CMakeLists.txt` +```cmake +find_package(Papilio REQUIRED) + +target_link_libraries(main PRIVATE papilio::papilio) +``` diff --git a/doc/en/builtin_accessor.md b/doc/en/builtin_accessor.md index a8dfd0e..43fb5f3 100644 --- a/doc/en/builtin_accessor.md +++ b/doc/en/builtin_accessor.md @@ -12,9 +12,9 @@ Example: Given string `"hello world!"`, then - `[-1]`: Returns `'!'`. - `[-100]`: Returns null character. -### Slicing -The left-closed, right-open interval `[begin, end)` consists of index pair `begin`, `end` -Default values are `0` and `.length`, respectively +### Indexing by Range +The left-closed, right-opened range `[begin, end)` consists of index pair `begin`, `end`. Default values are `0` and `.length`, respectively. + Example: Given string "hello world!" - `[:]`: Returns `"hello world!"` - `[:-1]`: Returns `"hello world"` @@ -24,7 +24,8 @@ Example: Given string "hello world!" ### Attributes - `size`:The number of *elements* in the string. That is, the string is regarded as a container whose value type is `char` (or other character type), and the result is its number of elements. - `length`:The number of *characters* in the string. -For string containing non-ASCII characters, these two values may not be equal. For example, the `size` of string `"ü"` is `2`, but its `length` is `1`; for string `L"ü"` (`wchar_t` string), its `size` and `length` are both `1`. + For string containing non-ASCII characters, these two values may not be equal. + For example, the `size` of string `"ü"` is `2`, but its `length` is `1`. For string `L"ü"` (`wchar_t` string), its `size` and `length` are both `1`. ## Tuples (`tuple` and `pair`) ### Indexing by Integer diff --git a/doc/en/builtin_formatter.md b/doc/en/builtin_formatter.md index f8b477b..638ad67 100644 --- a/doc/en/builtin_formatter.md +++ b/doc/en/builtin_formatter.md @@ -1,4 +1,7 @@ # Built-In Formatter +The format specification of most built-in formatters are compatible with the usage of the corresponding parts of the standard library ``. +See the [standard library documentation](https://en.cppreference.com/w/cpp/utility/format/spec) for more detailed explanation. + ## Format Specification for Common Types Used by fundamental types, character and string. ``` @@ -6,6 +9,10 @@ fill-and-align sign # 0 width .precision L type ``` These arguments are all optional. +- **Note:** + In most of the cases the syntax is similar to the C-style `%`-formatting of `printf` family, with the addition of the `{}` and with `:` used instead of `%`. + For example, `%03.2f` can be translated to `{:03.2f}` + ### Fill and Align Fill can be any character, followed by align option which is one of the `<`, `>` and `^`. Align Option: @@ -72,6 +79,7 @@ This option is only available for some types. It may cause the output to be affe ### Type #### String - None, `s`: Copy the string to the output. +- `?`: Copy escaped string (see below) to the output. #### Integral Type (Except `bool` type) - `b`: Binary output. @@ -83,6 +91,7 @@ This option is only available for some types. It may cause the output to be affe #### Character Type - None, `c`: Copy the character to the output. +- `?`: Copy the escaped character (see below) to the output. - `b`, `B`, `d`, `o`, `x`, `X`: Use integer representation types with `static_cast(value)`. #### `bool` Type @@ -106,6 +115,34 @@ Infinite values and NaN are formatted to `inf` and `nan`, respectively. - None, `s`: Copy the corresponding string of the enumeration value to the output. - `b`, `B`, `d`, `o`, `x`, `X`: Use integer representation types with `static_cast>(value)`. -Note: The `enum_name` function defined in `` uses compiler extension to retrieve string from enumeration value. It has following limitations: -1. Only support enumeration values within the `[-128, 128)` range. -2. Output result of multiple enumerations with same value is compiler dependent. +Note: The `enum_name` function defined in `` uses compiler extension to retrieve string from enumeration value. It has following limitations: +1. Requires compiler extension. If supported by the compiler, the implementation will define a `PAPILIO_HAS_ENUM_NAME` macro. +2. Only support enumeration values within the `[-128, 128)` range. +3. Output result of multiple enumerations with same value is compiler dependent. + +# Formatting escaped characters and strings +A character or string can be formatted as escaped to make it more suitable for debugging or for logging. + +For a character `C`: +- If `C` is one of the characters in the following table, the corresponding escape sequence is used. + | Character | Escape sequence | Notes | + | --------------- | --------------- | ----------------------------------------------- | + | Horizontal tab | `\t` | | + | New line | `\n` | | + | Carriage return | `\r` | | + | Double quote | `\"` | Used only if the output is double-quoted string | + | Single quote | `\'` | Used only if the output is single-quoted string | + | Backslash | `\\` | | + +- If `C` and following characters form a sequence that is not printable. + +- If `C` and following characters cannot form a valid code point. The hexadecimal digits will be used to represent the invalid sequence. + +## Example +```c++ +papilio::format("{:?} {:?}", '\'', '"'); // Returns "\\' \"" +papilio::format("{:?}", "hello\n"); // Returns "hello\\n" +papilio::format("{:?}", std::string("\0 \n \t \x02 \x1b", 9)); // Returns "\\u{0} \\n \\t \\u{2} \\u{1b}" +// Invalid UTF-8 +papilio::format("{:?}", "\xc3\x28"); // Returns "\\x{c3}(" +``` diff --git a/doc/en/formatter.md b/doc/en/formatter.md index 252c1a1..307b0b0 100644 --- a/doc/en/formatter.md +++ b/doc/en/formatter.md @@ -126,7 +126,7 @@ format("{:S}", used_adl_ex{}); // Returns "ADL (EX)" ``` # Overloaded `operator<<` -If a type does not implement the above format methods, but it has a overloaded `operator<<` of traditional C++, that overload will be used for outputting. +If a type does not implement the above format methods, but it has a overloaded `operator<<` for legacy stream output, that overload will be used for outputting. # Disabled Formatter Explicitly prevent a type from being formatted: diff --git a/doc/zh-CN/build.md b/doc/zh-CN/build.md index aec555d..434e58c 100644 --- a/doc/zh-CN/build.md +++ b/doc/zh-CN/build.md @@ -16,3 +16,10 @@ ## 安装库 在 `build/` 目录执行 `cmake --install .` 即可。 + +## 在 `CMakeLists.txt` 中使用库 +```cmake +find_package(Papilio REQUIRED) + +target_link_libraries(main PRIVATE papilio::papilio) +``` diff --git a/doc/zh-CN/builtin_accessor.md b/doc/zh-CN/builtin_accessor.md index 3c8681a..1afc174 100644 --- a/doc/zh-CN/builtin_accessor.md +++ b/doc/zh-CN/builtin_accessor.md @@ -11,11 +11,10 @@ - `[-1]`:返回 `'!'` - `[-100]`:返回空字符 -### 切片 -由索引对 `begin`、`end` 所组成的左闭右开区间 `[begin, end)` -默认值分别为 `0` 和 `.length` -示例:给定字符串 `"hello world!"` +### 索引范围 +由索引对 `begin`、`end` 所组成的左闭右开区间 `[begin, end)`。默认值分别为 `0` 和 `.length`。 +示例:给定字符串 `"hello world!"` - `[:]`:返回 `"hello world!"` - `[:-1]`:返回 `"hello world"` - `[6:-1]`:返回 `"world"` @@ -24,7 +23,8 @@ ### 属性 - `size`:字符串中的**元素**数。即将字符串视作一个值类型为 `char`(或其他字符类型)的容器,结果为其元素的个数。 - `length`:字符串中的**字符**数。 -对于含有非 ASCII 字符的字符串,这两个值可能会不相等。如对字符串 `"ü"` 而言,其 `size` 为 `2`,而 `length` 则为 `1`;对字符串 `L"ü"` (`wchar_t` 字符串)而言,其 `size` 与 `length` 均为 `1`。 + 对于含有非 ASCII 字符的字符串,这两个值可能会不相等。 + 如对字符串 `"ü"` 而言,其 `size` 为 `2`,而 `length` 则为 `1`;对字符串 `L"ü"` (`wchar_t` 字符串)而言,其 `size` 与 `length` 均为 `1`。 ## 元组 (`tuple` 与 `pair`) ### 整数索引 diff --git a/doc/zh-CN/builtin_formatter.md b/doc/zh-CN/builtin_formatter.md index 8932dfd..96feeaf 100644 --- a/doc/zh-CN/builtin_formatter.md +++ b/doc/zh-CN/builtin_formatter.md @@ -1,4 +1,7 @@ # 内建格式化器(Formatter) +绝大部分的内建格式化器的格式说明都兼容标准库 `` 里对应部分的用法。 +可以参考[标准库文档](https://zh.cppreference.com/w/cpp/utility/format/spec)获取更详细的说明。 + ## 常见类型的格式说明 用于基本类型、字符和字符串。 ``` @@ -6,6 +9,10 @@ ``` 这些参数都是可选的。 +- **注**: + 大多数情况下,这个语法与 C 式(`printf` 族函数)的 `%` 格式化类似。仅增加了 `{}`, 并用 `:` 替换掉 `%`。 + 例如 `%03.2f` 可被转换为 `{:03.2f}`。 + ### 填充与对齐 填充可以为任意字符,后随对齐选项为 `<`、`>` 与 `^` 之一。 对齐选项: @@ -72,6 +79,7 @@ papilio::format("{:.<5.5s}", "文文文"); // "文文." ### 类型 #### 字符串类型 - 无、`s`:复制字符串到输出 +- `?`: 复制转义过的字符串(见下文)到输出 #### 整数类型(除 `bool` 类型) - `b`:二进制输出 @@ -83,6 +91,7 @@ papilio::format("{:.<5.5s}", "文文文"); // "文文." #### 字符类型 - 无、`c`:复制字符到输出 +- `?`: 复制转义过的字符(见下文)到输出 - `b`、`B`、`d`、`o`、`x`、`X`:以值 `static_cast(value)` 使用整数表示字符 #### `bool` 类型 @@ -106,6 +115,34 @@ papilio::format("{:.<5.5s}", "文文文"); // "文文." - 无、`s`:复制枚举值对应的字符串到输出中 - `b`、`B`、`d`、`o`、`x`、`X`:以值 `static_cast>(value)` 使用整数表示 -注意:`` 中定义的 `enum_name` 函数使用编译器扩展从枚举值中获取字符串,它有以下限制: -1. 仅支持 `[-128, 128)` 范围内的枚举值 -2. 具有相同值的多个枚举的输出结果取决于编译器 +注意:`` 中定义的 `enum_name` 函数使用编译器扩展从枚举值中获取字符串,它有以下限制: +1. 需要编译器拓展。如果编译器支持,实现会定义 `PAPILIO_HAS_ENUM_NAME` 宏 +2. 仅支持 `[-128, 128)` 范围内的枚举值 +3. 具有相同值的多个枚举的输出结果取决于编译器 + +# 格式化输出转义过的字符与字符串 +字符或字符串可以在格式化时进行转义,使其更适合用于调试或记录日志。 + +对于字符 `C` 而言: +- 如果 C 是下表中的字符之一,那么使用对应的转义序列: + | 字符 | 转义序列 | 注解 | + | -------- | -------- | -------------------------------------- | + | 横向制表 | `\t` | | + | 换行 | `\n` | | + | 回车 | `\r` | | + | 双引号 | `\"` | 仅会在输出是用双引号包围的字符串时使用 | + | 单引号 | `\'` | 仅会在输出是用单引号包围的字符串时使用 | + | 反斜杠 | `\\` | | + +- 如果 `C` 及其后续字符形成不可打印的序列。 + +- 如果 `C` 及其后续字符不能形成有效的码点。将使用十六进制数字来表示无效的序列。 + +## 示例 +```c++ +papilio::format("{:?} {:?}", '\'', '"'); // 返回 "\\' \"" +papilio::format("{:?}", "hello\n"); // 返回 "hello\\n" +papilio::format("{:?}", std::string("\0 \n \t \x02 \x1b", 9)); // 返回 "\\u{0} \\n \\t \\u{2} \\u{1b}" +// 无效的 UTF-8 +papilio::format("{:?}", "\xc3\x28"); // 返回 "\\x{c3}(" +``` diff --git a/doc/zh-CN/formatter.md b/doc/zh-CN/formatter.md index 19c64cb..04c8f98 100644 --- a/doc/zh-CN/formatter.md +++ b/doc/zh-CN/formatter.md @@ -126,7 +126,7 @@ format("{:S}", used_adl_ex{}); // 返回 "ADL (EX)" ``` # `operator<<` 重载 -如果上述格式化方式某个类型均未实现,但该类型实现了 C++ 传统的 `operator<<` 重载,则该重载将会被用于输出。 +如果上述格式化方式某个类型均未实现,但该类型实现了用于流式输出的老式 `operator<<` 重载,则该重载将会被用于输出。 # 禁用格式化器(Disabled Formatter) 显式阻止某个类型被格式化: diff --git a/include/papilio/core.hpp b/include/papilio/core.hpp index 8600192..80b06c2 100755 --- a/include/papilio/core.hpp +++ b/include/papilio/core.hpp @@ -2251,13 +2251,24 @@ class format_context_traits format_context_traits() = delete; private: - static void append_hex_digits(context_type& ctx, int_type val) + static void append_hex_digits(context_type& ctx, int_type val, bool is_valid) { - format_to( - ctx, - PAPILIO_TSTRING_VIEW(char_type, "\\u{{{:x}}}"), - val - ); + if(is_valid) + { + format_to( + ctx, + PAPILIO_TSTRING_VIEW(char_type, "\\u{{{:x}}}"), + val + ); + } + else + { + format_to( + ctx, + PAPILIO_TSTRING_VIEW(char_type, "\\x{{{:x}}}"), + val + ); + } } template @@ -2267,7 +2278,7 @@ class format_context_traits { default: other_ch: - append_hex_digits(ctx, val); + append_hex_digits(ctx, val, true); break; case '\t': @@ -2316,7 +2327,8 @@ class format_context_traits val == '\r' || val == '\\' || (DoubleQuote && val == '"') || - (SingleQuote && val == '\''); + (SingleQuote && val == '\'') || + val < U' '; } public: @@ -2459,12 +2471,12 @@ class format_context_traits { if(PAPILIO_NS utf::is_leading_byte(str[i])) { - std::uint8_t size_bytes = utf::byte_count(str[i]); + std::uint8_t size_bytes = PAPILIO_NS utf::byte_count(str[i]); if(i + size_bytes > str.size()) { for(std::size_t j = i; j < str.size(); ++j) { - append_hex_digits(ctx, static_cast(str[j])); + append_hex_digits(ctx, static_cast(str[j]), false); } return; } @@ -2497,7 +2509,7 @@ class format_context_traits for(auto it = str.begin() + i; it != stop; ++it) { - append_hex_digits(ctx, static_cast(*it)); + append_hex_digits(ctx, static_cast(*it), false); } i += std::distance(str.begin() + i, stop); @@ -2508,7 +2520,7 @@ class format_context_traits } else { - append_hex_digits(ctx, str[i]); + append_hex_digits(ctx, str[i], false); ++i; } } @@ -2526,16 +2538,16 @@ class format_context_traits append_as_esc_seq(ctx, ch); ++i; } - else if(utf::is_high_surrogate(ch)) + else if(PAPILIO_NS utf::is_high_surrogate(ch)) { if(i + 1 >= str.size()) { - append_hex_digits(ctx, ch); + append_hex_digits(ctx, ch, false); return; } - else if(!utf::is_low_surrogate(str[i + 1])) + else if(!PAPILIO_NS utf::is_low_surrogate(str[i + 1])) { - append_hex_digits(ctx, ch); + append_hex_digits(ctx, ch, false); } else { @@ -2546,9 +2558,9 @@ class format_context_traits } else { - if(utf::is_low_surrogate(ch)) + if(PAPILIO_NS utf::is_low_surrogate(ch)) { - append_hex_digits(ctx, ch); + append_hex_digits(ctx, ch, false); } else { @@ -2586,7 +2598,7 @@ class format_context_traits * @code{.cpp} * append_escaped(ctx, "hello\t"); // Appends "hello\\t" * // Invalid UTF-8 - * append_escaped(ctx, "\xc3\x28"); // Appends "\u{c3}(" + * append_escaped(ctx, "\xc3\x28"); // Appends "\x{c3}(" * @endcode */ static void append_escaped(context_type& ctx, string_view_type str) @@ -5395,6 +5407,13 @@ class range_formatter context_t::append(ctx, m_opening); + // Possible implicit conversion when forwarding range values to the underlying formatter. + // Suppress related compiler warnings. +#ifdef PAPILIO_COMPILER_CLANG +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wsign-conversion" +#endif + bool first = true; for(auto&& i : rng) { @@ -5407,6 +5426,11 @@ class range_formatter context_t::advance_to(ctx, m_underlying.format(i, ctx)); } + +#ifdef PAPILIO_COMPILER_CLANG +# pragma clang diagnostic pop +#endif + context_t::append(ctx, m_closing); return context_t::out(ctx); diff --git a/include/papilio/utf/string.hpp b/include/papilio/utf/string.hpp index cd5d8c3..67a923a 100644 --- a/include/papilio/utf/string.hpp +++ b/include/papilio/utf/string.hpp @@ -985,16 +985,16 @@ class basic_string_container : public detail::str_impl) buf.reserve(std::ranges::size(r)); - for(CharT ch : r) - buf.push_back(ch); + for(auto&& ch : r) + buf.push_back(static_cast(ch)); assign(std::move(buf)); } else if constexpr(std::convertible_to, utf::codepoint>) { string_type buf; - for(utf::codepoint cp : r) - cp.append_to(buf); + for(auto&& cp : r) + utf::codepoint(cp).append_to(buf); assign(std::move(buf)); } diff --git a/test/test_core/format_context.cpp b/test/test_core/format_context.cpp index 8ab4855..5ea18fc 100644 --- a/test/test_core/format_context.cpp +++ b/test/test_core/format_context.cpp @@ -134,7 +134,7 @@ TYPED_TEST(format_context_suite, append_escaped) { context_t::append_escaped(ctx, reinterpret_cast("\xc3\x28")); - const auto expected_str = PAPILIO_TSTRING(TypeParam, "\\u{c3}("); + const auto expected_str = PAPILIO_TSTRING(TypeParam, "\\x{c3}("); EXPECT_EQ(result, expected_str); } } diff --git a/test/test_format/formatter.cpp b/test/test_format/formatter.cpp index 9567c15..9cbe5d4 100644 --- a/test/test_format/formatter.cpp +++ b/test/test_format/formatter.cpp @@ -205,6 +205,11 @@ TEST(fundamental_formatter, string) EXPECT_EQ(PAPILIO_NS format("{:?}", "hello\n\t\r"), "hello\\n\\t\\r"); EXPECT_EQ(PAPILIO_NS format(L"{:?}", L"hello\n\t\r"), L"hello\\n\\t\\r"); + EXPECT_EQ(PAPILIO_NS format("{:?}", std::string("\0 \n \t \x02 \x1b", 9)), "\\u{0} \\n \\t \\u{2} \\u{1b}"); + EXPECT_EQ(PAPILIO_NS format(L"{:?}", std::wstring(L"\0 \n \t \x02 \x1b", 9)), L"\\u{0} \\n \\t \\u{2} \\u{1b}"); + + EXPECT_EQ(PAPILIO_NS format("{:?}", "\xc3\x28"), "\\x{c3}("); + EXPECT_EQ(PAPILIO_NS format("{:s}", "hello"), "hello"); EXPECT_EQ(PAPILIO_NS format(L"{:s}", L"hello"), L"hello");