diff --git a/deps/simdutf/simdutf.cpp b/deps/simdutf/simdutf.cpp index 12a2f494e0a7aa..21962c3bad378d 100644 --- a/deps/simdutf/simdutf.cpp +++ b/deps/simdutf/simdutf.cpp @@ -1,4 +1,4 @@ -/* auto-generated on 2024-12-26 12:42:33 -0500. Do not edit! */ +/* auto-generated on 2025-01-08 17:51:07 -0500. Do not edit! */ /* begin file src/simdutf.cpp */ #include "simdutf.h" // We include base64_tables once. @@ -17142,8 +17142,33 @@ size_t convert_masked_utf8_to_utf16(const char *input, for (int k = 0; k < 6; k++) { utf16_output[k] = buffer[k]; } // the loop might compiler to a couple of instructions. - utf16_output += 6; // We wrote 3 32-bit surrogate pairs. - return 12; // We consumed 12 bytes. + // We need some validation. See + // https://github.com/simdutf/simdutf/pull/631 +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + uint8x16_t expected_mask = simdutf_make_uint8x16_t( + 0xf8, 0xc0, 0xc0, 0xc0, 0xf8, 0xc0, 0xc0, 0xc0, 0xf8, 0xc0, 0xc0, + 0xc0, 0x0, 0x0, 0x0, 0x0); +#else + uint8x16_t expected_mask = {0xf8, 0xc0, 0xc0, 0xc0, 0xf8, 0xc0, + 0xc0, 0xc0, 0xf8, 0xc0, 0xc0, 0xc0, + 0x0, 0x0, 0x0, 0x0}; +#endif +#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO + uint8x16_t expected = simdutf_make_uint8x16_t( + 0xf0, 0x80, 0x80, 0x80, 0xf0, 0x80, 0x80, 0x80, 0xf0, 0x80, 0x80, + 0x80, 0x0, 0x0, 0x0, 0x0); +#else + uint8x16_t expected = {0xf0, 0x80, 0x80, 0x80, 0xf0, 0x80, 0x80, 0x80, + 0xf0, 0x80, 0x80, 0x80, 0x0, 0x0, 0x0, 0x0}; +#endif + uint8x16_t check = vceqq_u8(vandq_u8(in, expected_mask), expected); + bool correct = (vminvq_u32(vreinterpretq_u32_u8(check)) == 0xFFFFFFFF); + // The validation is just three instructions and it is not on a critical + // path. + if (correct) { + utf16_output += 6; // We wrote 3 32-bit surrogate pairs. + } + return 12; // We consumed 12 bytes. } // 3 1-4 byte sequences uint8x16_t sh = vld1q_u8(reinterpret_cast( @@ -18634,6 +18659,12 @@ compress_decode_base64(char *dst, const char_type *src, size_t srclen, } if (srclen == 0) { if (!ignore_garbage && equalsigns > 0) { + if (last_chunk_options == last_chunk_handling_options::strict) { + return {BASE64_INPUT_REMAINDER, 0, 0}; + } else if (last_chunk_options == + last_chunk_handling_options::stop_before_partial) { + return {SUCCESS, 0, 0}; + } return {INVALID_BASE64_CHARACTER, equallocation, 0}; } return {SUCCESS, 0, 0}; @@ -22881,6 +22912,12 @@ simdutf_warn_unused result implementation::base64_to_binary( } if (length == 0) { if (!ignore_garbage && equalsigns > 0) { + if (last_chunk_options == last_chunk_handling_options::strict) { + return {BASE64_INPUT_REMAINDER, 0}; + } else if (last_chunk_options == + last_chunk_handling_options::stop_before_partial) { + return {SUCCESS, 0}; + } return {INVALID_BASE64_CHARACTER, equallocation}; } return {SUCCESS, 0}; @@ -22926,6 +22963,12 @@ simdutf_warn_unused full_result implementation::base64_to_binary_details( } if (length == 0) { if (!ignore_garbage && equalsigns > 0) { + if (last_chunk_options == last_chunk_handling_options::strict) { + return {BASE64_INPUT_REMAINDER, 0, 0}; + } else if (last_chunk_options == + last_chunk_handling_options::stop_before_partial) { + return {SUCCESS, 0, 0}; + } return {INVALID_BASE64_CHARACTER, equallocation, 0}; } return {SUCCESS, 0, 0}; @@ -22977,6 +23020,12 @@ simdutf_warn_unused result implementation::base64_to_binary( } if (length == 0) { if (!ignore_garbage && equalsigns > 0) { + if (last_chunk_options == last_chunk_handling_options::strict) { + return {BASE64_INPUT_REMAINDER, 0}; + } else if (last_chunk_options == + last_chunk_handling_options::stop_before_partial) { + return {SUCCESS, 0}; + } return {INVALID_BASE64_CHARACTER, equallocation}; } return {SUCCESS, 0}; @@ -23022,6 +23071,12 @@ simdutf_warn_unused full_result implementation::base64_to_binary_details( } if (length == 0) { if (!ignore_garbage && equalsigns > 0) { + if (last_chunk_options == last_chunk_handling_options::strict) { + return {BASE64_INPUT_REMAINDER, 0, 0}; + } else if (last_chunk_options == + last_chunk_handling_options::stop_before_partial) { + return {SUCCESS, 0, 0}; + } return {INVALID_BASE64_CHARACTER, equallocation, 0}; } return {SUCCESS, 0, 0}; @@ -23058,6 +23113,8 @@ size_t implementation::binary_to_base64(const char *input, size_t length, #endif #if SIMDUTF_IMPLEMENTATION_ICELAKE /* begin file src/icelake/implementation.cpp */ +#include +#include /* begin file src/simdutf/icelake/begin.h */ @@ -26106,17 +26163,17 @@ bool validate_ascii(const char *buf, size_t len) { /* begin file src/icelake/icelake_utf32_validation.inl.cpp */ // file included directly -const char32_t *validate_utf32(const char32_t *buf, size_t len) { - if (len < 16) { - return buf; +bool validate_utf32(const char32_t *buf, size_t len) { + if (len == 0) { + return true; } - const char32_t *end = buf + len - 16; + const char32_t *end = buf + len; const __m512i offset = _mm512_set1_epi32((uint32_t)0xffff2000); __m512i currentmax = _mm512_setzero_si512(); __m512i currentoffsetmax = _mm512_setzero_si512(); - while (buf <= end) { + while (buf < end - 16) { __m512i utf32 = _mm512_loadu_si512((const __m512i *)buf); buf += 16; currentoffsetmax = @@ -26124,20 +26181,26 @@ const char32_t *validate_utf32(const char32_t *buf, size_t len) { currentmax = _mm512_max_epu32(utf32, currentmax); } + __m512i utf32 = + _mm512_maskz_loadu_epi32(__mmask16((1 << (end - buf)) - 1), buf); + currentoffsetmax = + _mm512_max_epu32(_mm512_add_epi32(utf32, offset), currentoffsetmax); + currentmax = _mm512_max_epu32(utf32, currentmax); + const __m512i standardmax = _mm512_set1_epi32((uint32_t)0x10ffff); const __m512i standardoffsetmax = _mm512_set1_epi32((uint32_t)0xfffff7ff); __m512i is_zero = _mm512_xor_si512(_mm512_max_epu32(currentmax, standardmax), standardmax); if (_mm512_test_epi8_mask(is_zero, is_zero) != 0) { - return nullptr; + return false; } is_zero = _mm512_xor_si512( _mm512_max_epu32(currentoffsetmax, standardoffsetmax), standardoffsetmax); if (_mm512_test_epi8_mask(is_zero, is_zero) != 0) { - return nullptr; + return false; } - return buf; + return true; } /* end file src/icelake/icelake_utf32_validation.inl.cpp */ /* begin file src/icelake/icelake_convert_latin1_to_utf8.inl.cpp */ @@ -26556,6 +26619,12 @@ compress_decode_base64(char *dst, const chartype *src, size_t srclen, } if (srclen == 0) { if (!ignore_garbage && equalsigns > 0) { + if (last_chunk_options == last_chunk_handling_options::strict) { + return {BASE64_INPUT_REMAINDER, 0, 0}; + } else if (last_chunk_options == + last_chunk_handling_options::stop_before_partial) { + return {SUCCESS, 0, 0}; + } return {INVALID_BASE64_CHARACTER, equallocation, 0}; } return {SUCCESS, 0, 0}; @@ -26753,24 +26822,76 @@ implementation::detect_encodings(const char *input, size_t length) const noexcept { // If there is a BOM, then we trust it. auto bom_encoding = simdutf::BOM::check_bom(input, length); - // todo: convert to a one-pass algorithm if (bom_encoding != encoding_type::unspecified) { return bom_encoding; } + int out = 0; - if (validate_utf8(input, length)) { + uint32_t utf16_err = (length % 2); + uint32_t utf32_err = (length % 4); + uint32_t ends_with_high = 0; + avx512_utf8_checker checker{}; + const __m512i offset = _mm512_set1_epi32((uint32_t)0xffff2000); + __m512i currentmax = _mm512_setzero_si512(); + __m512i currentoffsetmax = _mm512_setzero_si512(); + const char *ptr = input; + const char *end = ptr + length; + for (; end - ptr >= 64; ptr += 64) { + // utf8 checks + const __m512i data = _mm512_loadu_si512((const __m512i *)ptr); + checker.check_next_input(data); + + // utf16le_checks + __m512i diff = _mm512_sub_epi16(data, _mm512_set1_epi16(uint16_t(0xD800))); + __mmask32 surrogates = + _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800))); + __mmask32 highsurrogates = + _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400))); + __mmask32 lowsurrogates = surrogates ^ highsurrogates; + utf16_err |= (((highsurrogates << 1) | ends_with_high) != lowsurrogates); + ends_with_high = ((highsurrogates & 0x80000000) != 0); + + // utf32le checks + currentoffsetmax = + _mm512_max_epu32(_mm512_add_epi32(data, offset), currentoffsetmax); + currentmax = _mm512_max_epu32(data, currentmax); + } + + // last block with 0 <= len < 64 + __mmask64 read_mask = (__mmask64(1) << (end - ptr)) - 1; + const __m512i data = _mm512_maskz_loadu_epi8(read_mask, (const __m512i *)ptr); + checker.check_next_input(data); + + __m512i diff = _mm512_sub_epi16(data, _mm512_set1_epi16(uint16_t(0xD800))); + __mmask32 surrogates = + _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800))); + __mmask32 highsurrogates = + _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400))); + __mmask32 lowsurrogates = surrogates ^ highsurrogates; + utf16_err |= (((highsurrogates << 1) | ends_with_high) != lowsurrogates); + + currentoffsetmax = + _mm512_max_epu32(_mm512_add_epi32(data, offset), currentoffsetmax); + currentmax = _mm512_max_epu32(data, currentmax); + + const __m512i standardmax = _mm512_set1_epi32((uint32_t)0x10ffff); + const __m512i standardoffsetmax = _mm512_set1_epi32((uint32_t)0xfffff7ff); + __m512i is_zero = + _mm512_xor_si512(_mm512_max_epu32(currentmax, standardmax), standardmax); + utf32_err |= (_mm512_test_epi8_mask(is_zero, is_zero) != 0); + is_zero = _mm512_xor_si512( + _mm512_max_epu32(currentoffsetmax, standardoffsetmax), standardoffsetmax); + utf32_err |= (_mm512_test_epi8_mask(is_zero, is_zero) != 0); + checker.check_eof(); + bool is_valid_utf8 = !checker.errors(); + if (is_valid_utf8) { out |= encoding_type::UTF8; } - if ((length % 2) == 0) { - if (validate_utf16le(reinterpret_cast(input), - length / 2)) { - out |= encoding_type::UTF16_LE; - } + if (utf16_err == 0) { + out |= encoding_type::UTF16_LE; } - if ((length % 4) == 0) { - if (validate_utf32(reinterpret_cast(input), length / 4)) { - out |= encoding_type::UTF32_LE; - } + if (utf32_err == 0) { + out |= encoding_type::UTF32_LE; } return out; } @@ -27092,14 +27213,7 @@ simdutf_warn_unused result implementation::validate_utf16be_with_errors( simdutf_warn_unused bool implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept { - const char32_t *tail = icelake::validate_utf32(buf, len); - if (tail) { - return scalar::utf32::validate(tail, len - (tail - buf)); - } else { - // we come here if there was an error, or buf was nullptr which may happen - // for empty input. - return len == 0; - } + return icelake::validate_utf32(buf, len); } simdutf_warn_unused result implementation::validate_utf32_with_errors( @@ -27980,16 +28094,7 @@ implementation::count_utf8(const char *input, size_t length) const noexcept { } } - __m256i first_half = _mm512_extracti64x4_epi64(unrolled_popcount, 0); - __m256i second_half = _mm512_extracti64x4_epi64(unrolled_popcount, 1); - answer -= (size_t)_mm256_extract_epi64(first_half, 0) + - (size_t)_mm256_extract_epi64(first_half, 1) + - (size_t)_mm256_extract_epi64(first_half, 2) + - (size_t)_mm256_extract_epi64(first_half, 3) + - (size_t)_mm256_extract_epi64(second_half, 0) + - (size_t)_mm256_extract_epi64(second_half, 1) + - (size_t)_mm256_extract_epi64(second_half, 2) + - (size_t)_mm256_extract_epi64(second_half, 3); + answer -= _mm512_reduce_add_epi64(unrolled_popcount); return answer + scalar::utf8::count_code_points( reinterpret_cast(str + i), length - i); @@ -28175,16 +28280,7 @@ simdutf_warn_unused size_t implementation::utf8_length_from_latin1( eight_64bits, _mm512_sad_epu8(runner, _mm512_setzero_si512())); } - __m256i first_half = _mm512_extracti64x4_epi64(eight_64bits, 0); - __m256i second_half = _mm512_extracti64x4_epi64(eight_64bits, 1); - answer += (size_t)_mm256_extract_epi64(first_half, 0) + - (size_t)_mm256_extract_epi64(first_half, 1) + - (size_t)_mm256_extract_epi64(first_half, 2) + - (size_t)_mm256_extract_epi64(first_half, 3) + - (size_t)_mm256_extract_epi64(second_half, 0) + - (size_t)_mm256_extract_epi64(second_half, 1) + - (size_t)_mm256_extract_epi64(second_half, 2) + - (size_t)_mm256_extract_epi64(second_half, 3); + answer += _mm512_reduce_add_epi64(eight_64bits); } else if (answer > 0) { for (; i + sizeof(__m512i) <= length; i += sizeof(__m512i)) { __m512i latin = _mm512_loadu_si512((const __m512i *)(str + i)); @@ -31471,6 +31567,12 @@ compress_decode_base64(char *dst, const chartype *src, size_t srclen, } if (srclen == 0) { if (!ignore_garbage && equalsigns > 0) { + if (last_chunk_options == last_chunk_handling_options::strict) { + return {BASE64_INPUT_REMAINDER, 0, 0}; + } else if (last_chunk_options == + last_chunk_handling_options::stop_before_partial) { + return {SUCCESS, 0, 0}; + } return {INVALID_BASE64_CHARACTER, equallocation, 0}; } return {SUCCESS, 0, 0}; @@ -33426,20 +33528,103 @@ implementation::detect_encodings(const char *input, if (bom_encoding != encoding_type::unspecified) { return bom_encoding; } + int out = 0; - if (validate_utf8(input, length)) { + uint32_t utf16_err = (length % 2); + uint32_t utf32_err = (length % 4); + uint32_t ends_with_high = 0; + const auto v_d8 = simd8::splat(0xd8); + const auto v_f8 = simd8::splat(0xf8); + const auto v_fc = simd8::splat(0xfc); + const auto v_dc = simd8::splat(0xdc); + const __m256i standardmax = _mm256_set1_epi32(0x10ffff); + const __m256i offset = _mm256_set1_epi32(0xffff2000); + const __m256i standardoffsetmax = _mm256_set1_epi32(0xfffff7ff); + __m256i currentmax = _mm256_setzero_si256(); + __m256i currentoffsetmax = _mm256_setzero_si256(); + + utf8_checker c{}; + buf_block_reader<64> reader(reinterpret_cast(input), length); + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + // utf8 checks + c.check_next_input(in); + + // utf16le checks + auto in0 = simd16(in.chunks[0]); + auto in1 = simd16(in.chunks[1]); + const auto t0 = in0.shr<8>(); + const auto t1 = in1.shr<8>(); + const auto in2 = simd16::pack(t0, t1); + const auto surrogates_wordmask = (in2 & v_f8) == v_d8; + const uint32_t surrogates_bitmask = surrogates_wordmask.to_bitmask(); + const auto vL = (in2 & v_fc) == v_dc; + const uint32_t L = vL.to_bitmask(); + const uint32_t H = L ^ surrogates_bitmask; + utf16_err |= (((H << 1) | ends_with_high) != L); + ends_with_high = (H & 0x80000000) != 0; + + // utf32le checks + currentmax = _mm256_max_epu32(in.chunks[0], currentmax); + currentoffsetmax = _mm256_max_epu32(_mm256_add_epi32(in.chunks[0], offset), + currentoffsetmax); + currentmax = _mm256_max_epu32(in.chunks[1], currentmax); + currentoffsetmax = _mm256_max_epu32(_mm256_add_epi32(in.chunks[1], offset), + currentoffsetmax); + + reader.advance(); + } + + uint8_t block[64]{}; + size_t idx = reader.block_index(); + std::memcpy(block, &input[idx], length - idx); + simd::simd8x64 in(block); + c.check_next_input(in); + + // utf16le last block check + auto in0 = simd16(in.chunks[0]); + auto in1 = simd16(in.chunks[1]); + const auto t0 = in0.shr<8>(); + const auto t1 = in1.shr<8>(); + const auto in2 = simd16::pack(t0, t1); + const auto surrogates_wordmask = (in2 & v_f8) == v_d8; + const uint32_t surrogates_bitmask = surrogates_wordmask.to_bitmask(); + const auto vL = (in2 & v_fc) == v_dc; + const uint32_t L = vL.to_bitmask(); + const uint32_t H = L ^ surrogates_bitmask; + utf16_err |= (((H << 1) | ends_with_high) != L); + // this is required to check for last byte ending in high and end of input + // is reached + ends_with_high = (H & 0x80000000) != 0; + utf16_err |= ends_with_high; + + // utf32le last block check + currentmax = _mm256_max_epu32(in.chunks[0], currentmax); + currentoffsetmax = _mm256_max_epu32(_mm256_add_epi32(in.chunks[0], offset), + currentoffsetmax); + currentmax = _mm256_max_epu32(in.chunks[1], currentmax); + currentoffsetmax = _mm256_max_epu32(_mm256_add_epi32(in.chunks[1], offset), + currentoffsetmax); + + reader.advance(); + + c.check_eof(); + bool is_valid_utf8 = !c.errors(); + __m256i is_zero = + _mm256_xor_si256(_mm256_max_epu32(currentmax, standardmax), standardmax); + utf32_err |= (_mm256_testz_si256(is_zero, is_zero) == 0); + + is_zero = _mm256_xor_si256( + _mm256_max_epu32(currentoffsetmax, standardoffsetmax), standardoffsetmax); + utf32_err |= (_mm256_testz_si256(is_zero, is_zero) == 0); + if (is_valid_utf8) { out |= encoding_type::UTF8; } - if ((length % 2) == 0) { - if (validate_utf16le(reinterpret_cast(input), - length / 2)) { - out |= encoding_type::UTF16_LE; - } + if (utf16_err == 0) { + out |= encoding_type::UTF16_LE; } - if ((length % 4) == 0) { - if (validate_utf32(reinterpret_cast(input), length / 4)) { - out |= encoding_type::UTF32_LE; - } + if (utf32_err == 0) { + out |= encoding_type::UTF32_LE; } return out; } @@ -36317,6 +36502,12 @@ simdutf_warn_unused result implementation::base64_to_binary( } if (length == 0) { if (!ignore_garbage && equalsigns > 0) { + if (last_chunk_options == last_chunk_handling_options::strict) { + return {BASE64_INPUT_REMAINDER, 0}; + } else if (last_chunk_options == + last_chunk_handling_options::stop_before_partial) { + return {SUCCESS, 0}; + } return {INVALID_BASE64_CHARACTER, equallocation}; } return {SUCCESS, 0}; @@ -36368,6 +36559,12 @@ simdutf_warn_unused result implementation::base64_to_binary( } if (length == 0) { if (!ignore_garbage && equalsigns > 0) { + if (last_chunk_options == last_chunk_handling_options::strict) { + return {BASE64_INPUT_REMAINDER, 0}; + } else if (last_chunk_options == + last_chunk_handling_options::stop_before_partial) { + return {SUCCESS, 0}; + } return {INVALID_BASE64_CHARACTER, equallocation}; } return {SUCCESS, 0}; @@ -38120,6 +38317,12 @@ simdutf_warn_unused result implementation::base64_to_binary( } if (length == 0) { if (!ignore_garbage && equalsigns > 0) { + if (last_chunk_options == last_chunk_handling_options::strict) { + return {BASE64_INPUT_REMAINDER, 0}; + } else if (last_chunk_options == + last_chunk_handling_options::stop_before_partial) { + return {SUCCESS, 0}; + } return {INVALID_BASE64_CHARACTER, equallocation}; } return {SUCCESS, 0}; @@ -38165,6 +38368,12 @@ simdutf_warn_unused full_result implementation::base64_to_binary_details( } if (length == 0) { if (!ignore_garbage && equalsigns > 0) { + if (last_chunk_options == last_chunk_handling_options::strict) { + return {BASE64_INPUT_REMAINDER, 0, 0}; + } else if (last_chunk_options == + last_chunk_handling_options::stop_before_partial) { + return {SUCCESS, 0, 0}; + } return {INVALID_BASE64_CHARACTER, equallocation, 0}; } return {SUCCESS, 0, 0}; @@ -38216,6 +38425,12 @@ simdutf_warn_unused result implementation::base64_to_binary( } if (length == 0) { if (!ignore_garbage && equalsigns > 0) { + if (last_chunk_options == last_chunk_handling_options::strict) { + return {BASE64_INPUT_REMAINDER, 0}; + } else if (last_chunk_options == + last_chunk_handling_options::stop_before_partial) { + return {SUCCESS, 0}; + } return {INVALID_BASE64_CHARACTER, equallocation}; } return {SUCCESS, 0}; @@ -38261,6 +38476,12 @@ simdutf_warn_unused full_result implementation::base64_to_binary_details( } if (length == 0) { if (!ignore_garbage && equalsigns > 0) { + if (last_chunk_options == last_chunk_handling_options::strict) { + return {BASE64_INPUT_REMAINDER, 0, 0}; + } else if (last_chunk_options == + last_chunk_handling_options::stop_before_partial) { + return {SUCCESS, 0, 0}; + } return {INVALID_BASE64_CHARACTER, equallocation, 0}; } return {SUCCESS, 0, 0}; @@ -41328,6 +41549,12 @@ compress_decode_base64(char *dst, const chartype *src, size_t srclen, } if (srclen == 0) { if (!ignore_garbage && equalsigns > 0) { + if (last_chunk_options == last_chunk_handling_options::strict) { + return {BASE64_INPUT_REMAINDER, 0, 0}; + } else if (last_chunk_options == + last_chunk_handling_options::stop_before_partial) { + return {SUCCESS, 0, 0}; + } return {INVALID_BASE64_CHARACTER, equallocation, 0}; } return {SUCCESS, 0, 0}; @@ -43282,24 +43509,138 @@ implementation::detect_encodings(const char *input, size_t length) const noexcept { // If there is a BOM, then we trust it. auto bom_encoding = simdutf::BOM::check_bom(input, length); - // todo: reimplement as a one-pass algorithm. if (bom_encoding != encoding_type::unspecified) { return bom_encoding; } + int out = 0; - if (validate_utf8(input, length)) { + uint32_t utf16_err = (length % 2); + uint32_t utf32_err = (length % 4); + uint32_t ends_with_high = 0; + const auto v_d8 = simd8::splat(0xd8); + const auto v_f8 = simd8::splat(0xf8); + const auto v_fc = simd8::splat(0xfc); + const auto v_dc = simd8::splat(0xdc); + const __m128i standardmax = _mm_set1_epi32(0x10ffff); + const __m128i offset = _mm_set1_epi32(0xffff2000); + const __m128i standardoffsetmax = _mm_set1_epi32(0xfffff7ff); + __m128i currentmax = _mm_setzero_si128(); + __m128i currentoffsetmax = _mm_setzero_si128(); + + utf8_checker c{}; + buf_block_reader<64> reader(reinterpret_cast(input), length); + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + // utf8 checks + c.check_next_input(in); + + // utf16le checks + auto in0 = simd16(in.chunks[0]); + auto in1 = simd16(in.chunks[1]); + const auto t0 = in0.shr<8>(); + const auto t1 = in1.shr<8>(); + const auto packed1 = simd16::pack(t0, t1); + auto in2 = simd16(in.chunks[2]); + auto in3 = simd16(in.chunks[3]); + const auto t2 = in2.shr<8>(); + const auto t3 = in3.shr<8>(); + const auto packed2 = simd16::pack(t2, t3); + + const auto surrogates_wordmask_lo = (packed1 & v_f8) == v_d8; + const auto surrogates_wordmask_hi = (packed2 & v_f8) == v_d8; + const uint32_t surrogates_bitmask = + (surrogates_wordmask_hi.to_bitmask() << 16) | + surrogates_wordmask_lo.to_bitmask(); + const auto vL_lo = (packed1 & v_fc) == v_dc; + const auto vL_hi = (packed2 & v_fc) == v_dc; + const uint32_t L = (vL_hi.to_bitmask() << 16) | vL_lo.to_bitmask(); + const uint32_t H = L ^ surrogates_bitmask; + utf16_err |= (((H << 1) | ends_with_high) != L); + ends_with_high = (H & 0x80000000) != 0; + + // utf32le checks + currentmax = _mm_max_epu32(in.chunks[0], currentmax); + currentoffsetmax = + _mm_max_epu32(_mm_add_epi32(in.chunks[0], offset), currentoffsetmax); + currentmax = _mm_max_epu32(in.chunks[1], currentmax); + currentoffsetmax = + _mm_max_epu32(_mm_add_epi32(in.chunks[1], offset), currentoffsetmax); + currentmax = _mm_max_epu32(in.chunks[2], currentmax); + currentoffsetmax = + _mm_max_epu32(_mm_add_epi32(in.chunks[2], offset), currentoffsetmax); + currentmax = _mm_max_epu32(in.chunks[3], currentmax); + currentoffsetmax = + _mm_max_epu32(_mm_add_epi32(in.chunks[3], offset), currentoffsetmax); + + reader.advance(); + } + + uint8_t block[64]{}; + size_t idx = reader.block_index(); + std::memcpy(block, &input[idx], length - idx); + simd::simd8x64 in(block); + c.check_next_input(in); + + // utf16le last block check + auto in0 = simd16(in.chunks[0]); + auto in1 = simd16(in.chunks[1]); + const auto t0 = in0.shr<8>(); + const auto t1 = in1.shr<8>(); + const auto packed1 = simd16::pack(t0, t1); + auto in2 = simd16(in.chunks[2]); + auto in3 = simd16(in.chunks[3]); + const auto t2 = in2.shr<8>(); + const auto t3 = in3.shr<8>(); + const auto packed2 = simd16::pack(t2, t3); + + const auto surrogates_wordmask_lo = (packed1 & v_f8) == v_d8; + const auto surrogates_wordmask_hi = (packed2 & v_f8) == v_d8; + const uint32_t surrogates_bitmask = + (surrogates_wordmask_hi.to_bitmask() << 16) | + surrogates_wordmask_lo.to_bitmask(); + const auto vL_lo = (packed1 & v_fc) == v_dc; + const auto vL_hi = (packed2 & v_fc) == v_dc; + const uint32_t L = (vL_hi.to_bitmask() << 16) | vL_lo.to_bitmask(); + const uint32_t H = L ^ surrogates_bitmask; + utf16_err |= (((H << 1) | ends_with_high) != L); + // this is required to check for last byte ending in high and end of input + // is reached + ends_with_high = (H & 0x80000000) != 0; + utf16_err |= ends_with_high; + + // utf32le last block check + currentmax = _mm_max_epu32(in.chunks[0], currentmax); + currentoffsetmax = + _mm_max_epu32(_mm_add_epi32(in.chunks[0], offset), currentoffsetmax); + currentmax = _mm_max_epu32(in.chunks[1], currentmax); + currentoffsetmax = + _mm_max_epu32(_mm_add_epi32(in.chunks[1], offset), currentoffsetmax); + currentmax = _mm_max_epu32(in.chunks[2], currentmax); + currentoffsetmax = + _mm_max_epu32(_mm_add_epi32(in.chunks[2], offset), currentoffsetmax); + currentmax = _mm_max_epu32(in.chunks[3], currentmax); + currentoffsetmax = + _mm_max_epu32(_mm_add_epi32(in.chunks[3], offset), currentoffsetmax); + + reader.advance(); + + c.check_eof(); + bool is_valid_utf8 = !c.errors(); + __m128i is_zero = + _mm_xor_si128(_mm_max_epu32(currentmax, standardmax), standardmax); + utf32_err |= (_mm_test_all_zeros(is_zero, is_zero) == 0); + + is_zero = _mm_xor_si128(_mm_max_epu32(currentoffsetmax, standardoffsetmax), + standardoffsetmax); + utf32_err |= (_mm_test_all_zeros(is_zero, is_zero) == 0); + if (is_valid_utf8) { out |= encoding_type::UTF8; } - if ((length % 2) == 0) { - if (validate_utf16le(reinterpret_cast(input), - length / 2)) { - out |= encoding_type::UTF16_LE; - } + if (utf16_err == 0) { + out |= encoding_type::UTF16_LE; } - if ((length % 4) == 0) { - if (validate_utf32(reinterpret_cast(input), length / 4)) { - out |= encoding_type::UTF32_LE; - } + if (utf32_err == 0) { + out |= encoding_type::UTF32_LE; } return out; } @@ -47336,6 +47677,12 @@ compress_decode_base64(char *dst, const char_type *src, size_t srclen, } if (srclen == 0) { if (!ignore_garbage && equalsigns > 0) { + if (last_chunk_options == last_chunk_handling_options::strict) { + return {BASE64_INPUT_REMAINDER, 0, 0}; + } else if (last_chunk_options == + last_chunk_handling_options::stop_before_partial) { + return {SUCCESS, 0, 0}; + } return {INVALID_BASE64_CHARACTER, equallocation, 0}; } return {SUCCESS, 0, 0}; @@ -53668,6 +54015,12 @@ compress_decode_base64(char *dst, const chartype *src, size_t srclen, } if (srclen == 0) { if (!ignore_garbage && equalsigns > 0) { + if (last_chunk_options == last_chunk_handling_options::strict) { + return {BASE64_INPUT_REMAINDER, 0, 0}; + } else if (last_chunk_options == + last_chunk_handling_options::stop_before_partial) { + return {SUCCESS, 0, 0}; + } return {INVALID_BASE64_CHARACTER, equallocation, 0}; } return {SUCCESS, 0, 0}; diff --git a/deps/simdutf/simdutf.h b/deps/simdutf/simdutf.h index 9a4b4580da91a1..4bec0cf300292a 100644 --- a/deps/simdutf/simdutf.h +++ b/deps/simdutf/simdutf.h @@ -1,4 +1,4 @@ -/* auto-generated on 2024-12-26 12:42:33 -0500. Do not edit! */ +/* auto-generated on 2025-01-08 17:51:07 -0500. Do not edit! */ /* begin file include/simdutf.h */ #ifndef SIMDUTF_H #define SIMDUTF_H @@ -55,21 +55,35 @@ #ifndef SIMDUTF_COMMON_DEFS_H #define SIMDUTF_COMMON_DEFS_H -#include /* begin file include/simdutf/portability.h */ #ifndef SIMDUTF_PORTABILITY_H #define SIMDUTF_PORTABILITY_H + +#include #include #include #include -#include -#include #ifndef _WIN32 // strcasecmp, strncasecmp #include #endif +#if defined(__apple_build_version__) + #if __apple_build_version__ < 14000000 + #define SIMDUTF_SPAN_DISABLED \ + 1 // apple-clang/13 doesn't support std::convertible_to + #endif +#endif + +#if SIMDUTF_CPLUSPLUS20 + #include + #if __cpp_concepts >= 201907L && __cpp_lib_span >= 202002L && \ + !defined(SIMDUTF_SPAN_DISABLED) + #define SIMDUTF_SPAN 1 + #endif +#endif + /** * We want to check that it is actually a little endian system at * compile-time. @@ -291,27 +305,6 @@ #define simdutf_strncasecmp strncasecmp #endif -#ifdef NDEBUG - - #ifdef SIMDUTF_VISUAL_STUDIO - #define SIMDUTF_UNREACHABLE() __assume(0) - #define SIMDUTF_ASSUME(COND) __assume(COND) - #else - #define SIMDUTF_UNREACHABLE() __builtin_unreachable(); - #define SIMDUTF_ASSUME(COND) \ - do { \ - if (!(COND)) \ - __builtin_unreachable(); \ - } while (0) - #endif - -#else // NDEBUG - - #define SIMDUTF_UNREACHABLE() assert(0); - #define SIMDUTF_ASSUME(COND) assert(COND) - -#endif - #if defined(__GNUC__) && !defined(__clang__) #if __GNUC__ >= 11 #define SIMDUTF_GCC11ORMORE 1 @@ -402,27 +395,6 @@ #endif // SIMDUTF_AVX512_H_ /* end file include/simdutf/avx512.h */ -#if defined(__GNUC__) - // Marks a block with a name so that MCA analysis can see it. - #define SIMDUTF_BEGIN_DEBUG_BLOCK(name) \ - __asm volatile("# LLVM-MCA-BEGIN " #name); - #define SIMDUTF_END_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-END " #name); - #define SIMDUTF_DEBUG_BLOCK(name, block) \ - BEGIN_DEBUG_BLOCK(name); \ - block; \ - END_DEBUG_BLOCK(name); -#else - #define SIMDUTF_BEGIN_DEBUG_BLOCK(name) - #define SIMDUTF_END_DEBUG_BLOCK(name) - #define SIMDUTF_DEBUG_BLOCK(name, block) -#endif - -// Align to N-byte boundary -#define SIMDUTF_ROUNDUP_N(a, n) (((a) + ((n) - 1)) & ~((n) - 1)) -#define SIMDUTF_ROUNDDOWN_N(a, n) ((a) & ~((n) - 1)) - -#define SIMDUTF_ISALIGNED_N(ptr, n) (((uintptr_t)(ptr) & ((n) - 1)) == 0) - #if defined(SIMDUTF_REGULAR_VISUAL_STUDIO) #define SIMDUTF_DEPRECATED __declspec(deprecated) @@ -536,18 +508,11 @@ #endif #endif -/// If EXPR is an error, returns it. -#define SIMDUTF_TRY(EXPR) \ - { \ - auto _err = (EXPR); \ - if (_err) { \ - return _err; \ - } \ - } - #endif // SIMDUTF_COMMON_DEFS_H /* end file include/simdutf/common_defs.h */ /* begin file include/simdutf/encoding_types.h */ +#ifndef SIMDUTF_ENCODING_TYPES_H +#define SIMDUTF_ENCODING_TYPES_H #include namespace simdutf { @@ -591,6 +556,7 @@ size_t bom_byte_size(encoding_type bom); } // namespace BOM } // namespace simdutf +#endif /* end file include/simdutf/encoding_types.h */ /* begin file include/simdutf/error.h */ #ifndef SIMDUTF_ERROR_H @@ -675,22 +641,22 @@ SIMDUTF_DISABLE_UNDESIRED_WARNINGS #define SIMDUTF_SIMDUTF_VERSION_H /** The version of simdutf being used (major.minor.revision) */ -#define SIMDUTF_VERSION "5.7.2" +#define SIMDUTF_VERSION "6.0.3" namespace simdutf { enum { /** * The major version (MAJOR.minor.revision) of simdutf being used. */ - SIMDUTF_VERSION_MAJOR = 5, + SIMDUTF_VERSION_MAJOR = 6, /** * The minor version (major.MINOR.revision) of simdutf being used. */ - SIMDUTF_VERSION_MINOR = 7, + SIMDUTF_VERSION_MINOR = 0, /** * The revision (major.minor.REVISION) of simdutf being used. */ - SIMDUTF_VERSION_REVISION = 2 + SIMDUTF_VERSION_REVISION = 3 }; } // namespace simdutf @@ -699,11 +665,10 @@ enum { /* begin file include/simdutf/implementation.h */ #ifndef SIMDUTF_IMPLEMENTATION_H #define SIMDUTF_IMPLEMENTATION_H -#include #if !defined(SIMDUTF_NO_THREADS) #include #endif -#include +#include #include /* begin file include/simdutf/internal/isadetection.h */ /* From @@ -1031,8 +996,61 @@ static inline uint32_t detect_supported_architectures() { #endif // SIMDutf_INTERNAL_ISADETECTION_H /* end file include/simdutf/internal/isadetection.h */ +#if SIMDUTF_SPAN + #include + #include + #include +#endif + namespace simdutf { +#if SIMDUTF_SPAN +/// helpers placed in namespace detail are not a part of the public API +namespace detail { +/** + * matches a byte, in the many ways C++ allows. note that these + * are all distinct types. + */ +template +concept byte_like = std::is_same_v || // + std::is_same_v || // + std::is_same_v || // + std::is_same_v; + +template +concept is_byte_like = byte_like>; + +template +concept is_pointer = std::is_pointer_v; + +/** + * matches anything that behaves like std::span and points to character-like + * data such as: std::byte, char, unsigned char, signed char, std::int8_t, + * std::uint8_t + */ +template +concept input_span_of_byte_like = requires(const T &t) { + { t.size() } noexcept -> std::convertible_to; + { t.data() } noexcept -> is_pointer; + { *t.data() } noexcept -> is_byte_like; +}; + +template +concept is_mutable = !std::is_const_v>; + +/** + * like span_of_byte_like, but for an output span (intended to be written to) + */ +template +concept output_span_of_byte_like = requires(T &t) { + { t.size() } noexcept -> std::convertible_to; + { t.data() } noexcept -> is_pointer; + { *t.data() } noexcept -> is_byte_like; + { *t.data() } noexcept -> is_mutable; +}; +} // namespace detail +#endif + /** * Autodetect the encoding of the input, a single encoding is recommended. * E.g., the function might return simdutf::encoding_type::UTF8, @@ -1049,6 +1067,25 @@ simdutf_really_inline simdutf_warn_unused simdutf::encoding_type autodetect_encoding(const uint8_t *input, size_t length) noexcept { return autodetect_encoding(reinterpret_cast(input), length); } +#if SIMDUTF_SPAN +/** + * Autodetect the encoding of the input, a single encoding is recommended. + * E.g., the function might return simdutf::encoding_type::UTF8, + * simdutf::encoding_type::UTF16_LE, simdutf::encoding_type::UTF16_BE, or + * simdutf::encoding_type::UTF32_LE. + * + * @param input the string to analyze. can be a anything span-like that has a + * data() and size() that points to character data: std::string, + * std::string_view, std::vector, std::span etc. + * @return the detected encoding type + */ +simdutf_really_inline simdutf_warn_unused simdutf::encoding_type +autodetect_encoding( + const detail::input_span_of_byte_like auto &input) noexcept { + return autodetect_encoding(reinterpret_cast(input.data()), + input.size()); +} +#endif /** * Autodetect the possible encodings of the input in one pass. @@ -1067,6 +1104,13 @@ simdutf_really_inline simdutf_warn_unused int detect_encodings(const uint8_t *input, size_t length) noexcept { return detect_encodings(reinterpret_cast(input), length); } +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused int +detect_encodings(const detail::input_span_of_byte_like auto &input) noexcept { + return detect_encodings(reinterpret_cast(input.data()), + input.size()); +} +#endif /** * Validate the UTF-8 string. This function may be best when you expect @@ -1080,6 +1124,13 @@ detect_encodings(const uint8_t *input, size_t length) noexcept { * @return true if and only if the string is valid UTF-8. */ simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused bool +validate_utf8(const detail::input_span_of_byte_like auto &input) noexcept { + return validate_utf8(reinterpret_cast(input.data()), + input.size()); +} +#endif /** * Validate the UTF-8 string and stop on error. @@ -1095,6 +1146,13 @@ simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) noexcept; */ simdutf_warn_unused result validate_utf8_with_errors(const char *buf, size_t len) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused result validate_utf8_with_errors( + const detail::input_span_of_byte_like auto &input) noexcept { + return validate_utf8_with_errors(reinterpret_cast(input.data()), + input.size()); +} +#endif /** * Validate the ASCII string. @@ -1106,6 +1164,13 @@ simdutf_warn_unused result validate_utf8_with_errors(const char *buf, * @return true if and only if the string is valid ASCII. */ simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused bool +validate_ascii(const detail::input_span_of_byte_like auto &input) noexcept { + return validate_ascii(reinterpret_cast(input.data()), + input.size()); +} +#endif /** * Validate the ASCII string and stop on error. It might be faster than @@ -1122,6 +1187,13 @@ simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) noexcept; */ simdutf_warn_unused result validate_ascii_with_errors(const char *buf, size_t len) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused result validate_ascii_with_errors( + const detail::input_span_of_byte_like auto &input) noexcept { + return validate_ascii_with_errors( + reinterpret_cast(input.data()), input.size()); +} +#endif /** * Using native endianness; Validate the UTF-16 string. @@ -1139,6 +1211,12 @@ simdutf_warn_unused result validate_ascii_with_errors(const char *buf, */ simdutf_warn_unused bool validate_utf16(const char16_t *buf, size_t len) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused bool +validate_utf16(std::span input) noexcept { + return validate_utf16(input.data(), input.size()); +} +#endif /** * Validate the UTF-16LE string. This function may be best when you expect @@ -1156,6 +1234,12 @@ simdutf_warn_unused bool validate_utf16(const char16_t *buf, */ simdutf_warn_unused bool validate_utf16le(const char16_t *buf, size_t len) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused bool +validate_utf16le(std::span input) noexcept { + return validate_utf16le(input.data(), input.size()); +} +#endif /** * Validate the UTF-16BE string. This function may be best when you expect @@ -1173,6 +1257,12 @@ simdutf_warn_unused bool validate_utf16le(const char16_t *buf, */ simdutf_warn_unused bool validate_utf16be(const char16_t *buf, size_t len) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused bool +validate_utf16be(std::span input) noexcept { + return validate_utf16be(input.data(), input.size()); +} +#endif /** * Using native endianness; Validate the UTF-16 string and stop on error. @@ -1193,6 +1283,12 @@ simdutf_warn_unused bool validate_utf16be(const char16_t *buf, */ simdutf_warn_unused result validate_utf16_with_errors(const char16_t *buf, size_t len) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused result +validate_utf16_with_errors(std::span input) noexcept { + return validate_utf16_with_errors(input.data(), input.size()); +} +#endif /** * Validate the UTF-16LE string and stop on error. It might be faster than @@ -1212,6 +1308,12 @@ simdutf_warn_unused result validate_utf16_with_errors(const char16_t *buf, */ simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf, size_t len) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused result +validate_utf16le_with_errors(std::span input) noexcept { + return validate_utf16le_with_errors(input.data(), input.size()); +} +#endif /** * Validate the UTF-16BE string and stop on error. It might be faster than @@ -1231,6 +1333,12 @@ simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf, */ simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, size_t len) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused result +validate_utf16be_with_errors(std::span input) noexcept { + return validate_utf16be_with_errors(input.data(), input.size()); +} +#endif /** * Validate the UTF-32 string. This function may be best when you expect @@ -1248,6 +1356,12 @@ simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, */ simdutf_warn_unused bool validate_utf32(const char32_t *buf, size_t len) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused bool +validate_utf32(std::span input) noexcept { + return validate_utf32(input.data(), input.size()); +} +#endif /** * Validate the UTF-32 string and stop on error. It might be faster than @@ -1267,6 +1381,12 @@ simdutf_warn_unused bool validate_utf32(const char32_t *buf, */ simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, size_t len) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused result +validate_utf32_with_errors(std::span input) noexcept { + return validate_utf32_with_errors(input.data(), input.size()); +} +#endif /** * Convert Latin1 string into UTF8 string. @@ -1281,6 +1401,15 @@ simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, simdutf_warn_unused size_t convert_latin1_to_utf8(const char *input, size_t length, char *utf8_output) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t convert_latin1_to_utf8( + const detail::input_span_of_byte_like auto &latin1_input, + detail::output_span_of_byte_like auto &&utf8_output) noexcept { + return convert_latin1_to_utf8( + reinterpret_cast(latin1_input.data()), latin1_input.size(), + utf8_output.data()); +} +#endif /** * Convert Latin1 string into UTF8 string with output limit. @@ -1296,6 +1425,21 @@ simdutf_warn_unused size_t convert_latin1_to_utf8(const char *input, simdutf_warn_unused size_t convert_latin1_to_utf8_safe(const char *input, size_t length, char *utf8_output, size_t utf8_len) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t convert_latin1_to_utf8_safe( + const detail::input_span_of_byte_like auto &input, + detail::output_span_of_byte_like auto &&utf8_output) noexcept { + // implementation note: outputspan is a forwarding ref to avoid copying and + // allow both lvalues and rvalues. std::span can be copied without problems, + // but std::vector should not, and this function should accept both. it will + // allow using an owning rvalue ref (example: passing a temporary std::string) + // as output, but the user will quickly find out that he has no way of getting + // the data out of the object in that case. + return convert_latin1_to_utf8_safe( + input.data(), input.size(), reinterpret_cast(utf8_output.data()), + utf8_output.size()); +} +#endif /** * Convert possibly Latin1 string into UTF-16LE string. @@ -1309,6 +1453,15 @@ convert_latin1_to_utf8_safe(const char *input, size_t length, char *utf8_output, */ simdutf_warn_unused size_t convert_latin1_to_utf16le( const char *input, size_t length, char16_t *utf16_output) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t convert_latin1_to_utf16le( + const detail::input_span_of_byte_like auto &latin1_input, + std::span utf16_output) noexcept { + return convert_latin1_to_utf16le( + reinterpret_cast(latin1_input.data()), latin1_input.size(), + utf16_output.data()); +} +#endif /** * Convert Latin1 string into UTF-16BE string. @@ -1322,6 +1475,14 @@ simdutf_warn_unused size_t convert_latin1_to_utf16le( */ simdutf_warn_unused size_t convert_latin1_to_utf16be( const char *input, size_t length, char16_t *utf16_output) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t +convert_latin1_to_utf16be(const detail::input_span_of_byte_like auto &input, + std::span output) noexcept { + return convert_latin1_to_utf16be(reinterpret_cast(input.data()), + input.size(), output.data()); +} +#endif /** * Convert Latin1 string into UTF-32 string. @@ -1335,6 +1496,15 @@ simdutf_warn_unused size_t convert_latin1_to_utf16be( */ simdutf_warn_unused size_t convert_latin1_to_utf32( const char *input, size_t length, char32_t *utf32_buffer) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t convert_latin1_to_utf32( + const detail::input_span_of_byte_like auto &latin1_input, + std::span utf32_output) noexcept { + return convert_latin1_to_utf32( + reinterpret_cast(latin1_input.data()), latin1_input.size(), + utf32_output.data()); +} +#endif /** * Convert possibly broken UTF-8 string into latin1 string. @@ -1351,6 +1521,15 @@ simdutf_warn_unused size_t convert_latin1_to_utf32( simdutf_warn_unused size_t convert_utf8_to_latin1(const char *input, size_t length, char *latin1_output) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t convert_utf8_to_latin1( + const detail::input_span_of_byte_like auto &input, + detail::output_span_of_byte_like auto &&output) noexcept { + return convert_utf8_to_latin1(reinterpret_cast(input.data()), + input.size(), + reinterpret_cast(output.data())); +} +#endif /** * Using native endianness, convert possibly broken UTF-8 string into a UTF-16 @@ -1367,6 +1546,14 @@ simdutf_warn_unused size_t convert_utf8_to_latin1(const char *input, */ simdutf_warn_unused size_t convert_utf8_to_utf16( const char *input, size_t length, char16_t *utf16_output) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t +convert_utf8_to_utf16(const detail::input_span_of_byte_like auto &input, + std::span output) noexcept { + return convert_utf8_to_utf16(reinterpret_cast(input.data()), + input.size(), output.data()); +} +#endif /** * Using native endianness, convert a Latin1 string into a UTF-16 string. @@ -1378,6 +1565,14 @@ simdutf_warn_unused size_t convert_utf8_to_utf16( */ simdutf_warn_unused size_t convert_latin1_to_utf16( const char *input, size_t length, char16_t *utf16_output) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t +convert_latin1_to_utf16(const detail::input_span_of_byte_like auto &input, + std::span output) noexcept { + return convert_latin1_to_utf16(reinterpret_cast(input.data()), + input.size(), output.data()); +} +#endif /** * Convert possibly broken UTF-8 string into UTF-16LE string. @@ -1393,6 +1588,15 @@ simdutf_warn_unused size_t convert_latin1_to_utf16( */ simdutf_warn_unused size_t convert_utf8_to_utf16le( const char *input, size_t length, char16_t *utf16_output) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t +convert_utf8_to_utf16le(const detail::input_span_of_byte_like auto &utf8_input, + std::span utf16_output) noexcept { + return convert_utf8_to_utf16le( + reinterpret_cast(utf8_input.data()), utf8_input.size(), + utf16_output.data()); +} +#endif /** * Convert possibly broken UTF-8 string into UTF-16BE string. @@ -1408,6 +1612,15 @@ simdutf_warn_unused size_t convert_utf8_to_utf16le( */ simdutf_warn_unused size_t convert_utf8_to_utf16be( const char *input, size_t length, char16_t *utf16_output) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t +convert_utf8_to_utf16be(const detail::input_span_of_byte_like auto &utf8_input, + std::span utf16_output) noexcept { + return convert_utf8_to_utf16be( + reinterpret_cast(utf8_input.data()), utf8_input.size(), + utf16_output.data()); +} +#endif /** * Convert possibly broken UTF-8 string into latin1 string with errors. @@ -1427,6 +1640,16 @@ simdutf_warn_unused size_t convert_utf8_to_utf16be( */ simdutf_warn_unused result convert_utf8_to_latin1_with_errors( const char *input, size_t length, char *latin1_output) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused result +convert_utf8_to_latin1_with_errors( + const detail::input_span_of_byte_like auto &utf8_input, + detail::output_span_of_byte_like auto &&latin1_output) noexcept { + return convert_utf8_to_latin1_with_errors( + reinterpret_cast(utf8_input.data()), utf8_input.size(), + reinterpret_cast(latin1_output.data())); +} +#endif /** * Using native endianness, convert possibly broken UTF-8 string into UTF-16 @@ -1445,6 +1668,16 @@ simdutf_warn_unused result convert_utf8_to_latin1_with_errors( */ simdutf_warn_unused result convert_utf8_to_utf16_with_errors( const char *input, size_t length, char16_t *utf16_output) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused result +convert_utf8_to_utf16_with_errors( + const detail::input_span_of_byte_like auto &utf8_input, + std::span utf16_output) noexcept { + return convert_utf8_to_utf16_with_errors( + reinterpret_cast(utf8_input.data()), utf8_input.size(), + utf16_output.data()); +} +#endif /** * Convert possibly broken UTF-8 string into UTF-16LE string and stop on error. @@ -1462,6 +1695,16 @@ simdutf_warn_unused result convert_utf8_to_utf16_with_errors( */ simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( const char *input, size_t length, char16_t *utf16_output) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused result +convert_utf8_to_utf16le_with_errors( + const detail::input_span_of_byte_like auto &utf8_input, + std::span utf16_output) noexcept { + return convert_utf8_to_utf16le_with_errors( + reinterpret_cast(utf8_input.data()), utf8_input.size(), + utf16_output.data()); +} +#endif /** * Convert possibly broken UTF-8 string into UTF-16BE string and stop on error. @@ -1479,6 +1722,16 @@ simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( */ simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( const char *input, size_t length, char16_t *utf16_output) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused result +convert_utf8_to_utf16be_with_errors( + const detail::input_span_of_byte_like auto &utf8_input, + std::span utf16_output) noexcept { + return convert_utf8_to_utf16be_with_errors( + reinterpret_cast(utf8_input.data()), utf8_input.size(), + utf16_output.data()); +} +#endif /** * Convert possibly broken UTF-8 string into UTF-32 string. @@ -1494,6 +1747,15 @@ simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( */ simdutf_warn_unused size_t convert_utf8_to_utf32( const char *input, size_t length, char32_t *utf32_output) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t +convert_utf8_to_utf32(const detail::input_span_of_byte_like auto &utf8_input, + std::span utf32_output) noexcept { + return convert_utf8_to_utf32( + reinterpret_cast(utf8_input.data()), utf8_input.size(), + utf32_output.data()); +} +#endif /** * Convert possibly broken UTF-8 string into UTF-32 string and stop on error. @@ -1511,6 +1773,16 @@ simdutf_warn_unused size_t convert_utf8_to_utf32( */ simdutf_warn_unused result convert_utf8_to_utf32_with_errors( const char *input, size_t length, char32_t *utf32_output) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused result +convert_utf8_to_utf32_with_errors( + const detail::input_span_of_byte_like auto &utf8_input, + std::span utf32_output) noexcept { + return convert_utf8_to_utf32_with_errors( + reinterpret_cast(utf8_input.data()), utf8_input.size(), + utf32_output.data()); +} +#endif /** * Convert valid UTF-8 string into latin1 string. @@ -1533,6 +1805,15 @@ simdutf_warn_unused result convert_utf8_to_utf32_with_errors( */ simdutf_warn_unused size_t convert_valid_utf8_to_latin1( const char *input, size_t length, char *latin1_output) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf8_to_latin1( + const detail::input_span_of_byte_like auto &valid_utf8_input, + detail::output_span_of_byte_like auto &&latin1_output) noexcept { + return convert_valid_utf8_to_latin1( + reinterpret_cast(valid_utf8_input.data()), + valid_utf8_input.size(), latin1_output.data()); +} +#endif /** * Using native endianness, convert valid UTF-8 string into a UTF-16 string. @@ -1546,6 +1827,15 @@ simdutf_warn_unused size_t convert_valid_utf8_to_latin1( */ simdutf_warn_unused size_t convert_valid_utf8_to_utf16( const char *input, size_t length, char16_t *utf16_buffer) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf8_to_utf16( + const detail::input_span_of_byte_like auto &valid_utf8_input, + std::span utf16_output) noexcept { + return convert_valid_utf8_to_utf16( + reinterpret_cast(valid_utf8_input.data()), + valid_utf8_input.size(), utf16_output.data()); +} +#endif /** * Convert valid UTF-8 string into UTF-16LE string. @@ -1559,6 +1849,15 @@ simdutf_warn_unused size_t convert_valid_utf8_to_utf16( */ simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( const char *input, size_t length, char16_t *utf16_buffer) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( + const detail::input_span_of_byte_like auto &valid_utf8_input, + std::span utf16_output) noexcept { + return convert_valid_utf8_to_utf16le( + reinterpret_cast(valid_utf8_input.data()), + valid_utf8_input.size(), utf16_output.data()); +} +#endif /** * Convert valid UTF-8 string into UTF-16BE string. @@ -1572,6 +1871,15 @@ simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( */ simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( const char *input, size_t length, char16_t *utf16_buffer) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( + const detail::input_span_of_byte_like auto &valid_utf8_input, + std::span utf16_output) noexcept { + return convert_valid_utf8_to_utf16be( + reinterpret_cast(valid_utf8_input.data()), + valid_utf8_input.size(), utf16_output.data()); +} +#endif /** * Convert valid UTF-8 string into UTF-32 string. @@ -1585,6 +1893,15 @@ simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( */ simdutf_warn_unused size_t convert_valid_utf8_to_utf32( const char *input, size_t length, char32_t *utf32_buffer) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf8_to_utf32( + const detail::input_span_of_byte_like auto &valid_utf8_input, + std::span utf32_output) noexcept { + return convert_valid_utf8_to_utf32( + reinterpret_cast(valid_utf8_input.data()), + valid_utf8_input.size(), utf32_output.data()); +} +#endif /** * Return the number of bytes that this Latin1 string would require in UTF-8 @@ -1596,6 +1913,13 @@ simdutf_warn_unused size_t convert_valid_utf8_to_utf32( */ simdutf_warn_unused size_t utf8_length_from_latin1(const char *input, size_t length) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t utf8_length_from_latin1( + const detail::input_span_of_byte_like auto &latin1_input) noexcept { + return utf8_length_from_latin1( + reinterpret_cast(latin1_input.data()), latin1_input.size()); +} +#endif /** * Compute the number of bytes that this UTF-8 string would require in Latin1 @@ -1612,6 +1936,14 @@ simdutf_warn_unused size_t utf8_length_from_latin1(const char *input, */ simdutf_warn_unused size_t latin1_length_from_utf8(const char *input, size_t length) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t latin1_length_from_utf8( + const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept { + return latin1_length_from_utf8( + reinterpret_cast(valid_utf8_input.data()), + valid_utf8_input.size()); +} +#endif /** * Compute the number of 2-byte code units that this UTF-8 string would require @@ -1629,6 +1961,14 @@ simdutf_warn_unused size_t latin1_length_from_utf8(const char *input, */ simdutf_warn_unused size_t utf16_length_from_utf8(const char *input, size_t length) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t utf16_length_from_utf8( + const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept { + return utf16_length_from_utf8( + reinterpret_cast(valid_utf8_input.data()), + valid_utf8_input.size()); +} +#endif /** * Compute the number of 4-byte code units that this UTF-8 string would require @@ -1648,6 +1988,14 @@ simdutf_warn_unused size_t utf16_length_from_utf8(const char *input, */ simdutf_warn_unused size_t utf32_length_from_utf8(const char *input, size_t length) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t utf32_length_from_utf8( + const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept { + return utf32_length_from_utf8( + reinterpret_cast(valid_utf8_input.data()), + valid_utf8_input.size()); +} +#endif /** * Using native endianness, convert possibly broken UTF-16 string into UTF-8 @@ -1667,6 +2015,14 @@ simdutf_warn_unused size_t utf32_length_from_utf8(const char *input, simdutf_warn_unused size_t convert_utf16_to_utf8(const char16_t *input, size_t length, char *utf8_buffer) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t convert_utf16_to_utf8( + std::span utf16_input, + detail::output_span_of_byte_like auto &&utf8_output) noexcept { + return convert_utf16_to_utf8(utf16_input.data(), utf16_input.size(), + reinterpret_cast(utf8_output.data())); +} +#endif /** * Using native endianness, convert possibly broken UTF-16 string into Latin1 @@ -1685,6 +2041,15 @@ simdutf_warn_unused size_t convert_utf16_to_utf8(const char16_t *input, */ simdutf_warn_unused size_t convert_utf16_to_latin1( const char16_t *input, size_t length, char *latin1_buffer) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t convert_utf16_to_latin1( + std::span utf16_input, + detail::output_span_of_byte_like auto &&latin1_output) noexcept { + return convert_utf16_to_latin1( + utf16_input.data(), utf16_input.size(), + reinterpret_cast(latin1_output.data())); +} +#endif /** * Convert possibly broken UTF-16LE string into Latin1 string. @@ -1704,6 +2069,15 @@ simdutf_warn_unused size_t convert_utf16_to_latin1( */ simdutf_warn_unused size_t convert_utf16le_to_latin1( const char16_t *input, size_t length, char *latin1_buffer) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t convert_utf16le_to_latin1( + std::span utf16_input, + detail::output_span_of_byte_like auto &&latin1_output) noexcept { + return convert_utf16le_to_latin1( + utf16_input.data(), utf16_input.size(), + reinterpret_cast(latin1_output.data())); +} +#endif /** * Convert possibly broken UTF-16BE string into Latin1 string. @@ -1721,6 +2095,15 @@ simdutf_warn_unused size_t convert_utf16le_to_latin1( */ simdutf_warn_unused size_t convert_utf16be_to_latin1( const char16_t *input, size_t length, char *latin1_buffer) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t convert_utf16be_to_latin1( + std::span utf16_input, + detail::output_span_of_byte_like auto &&latin1_output) noexcept { + return convert_utf16be_to_latin1( + utf16_input.data(), utf16_input.size(), + reinterpret_cast(latin1_output.data())); +} +#endif /** * Convert possibly broken UTF-16LE string into UTF-8 string. @@ -1739,6 +2122,14 @@ simdutf_warn_unused size_t convert_utf16be_to_latin1( simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t *input, size_t length, char *utf8_buffer) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t convert_utf16le_to_utf8( + std::span utf16_input, + detail::output_span_of_byte_like auto &&utf8_output) noexcept { + return convert_utf16le_to_utf8(utf16_input.data(), utf16_input.size(), + reinterpret_cast(utf8_output.data())); +} +#endif /** * Convert possibly broken UTF-16BE string into UTF-8 string. @@ -1757,6 +2148,14 @@ simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t *input, simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t *input, size_t length, char *utf8_buffer) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t convert_utf16be_to_utf8( + std::span utf16_input, + detail::output_span_of_byte_like auto &&utf8_output) noexcept { + return convert_utf16be_to_utf8(utf16_input.data(), utf16_input.size(), + reinterpret_cast(utf8_output.data())); +} +#endif /** * Using native endianness, convert possibly broken UTF-16 string into Latin1 @@ -1776,6 +2175,16 @@ simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t *input, */ simdutf_warn_unused result convert_utf16_to_latin1_with_errors( const char16_t *input, size_t length, char *latin1_buffer) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused result +convert_utf16_to_latin1_with_errors( + std::span utf16_input, + detail::output_span_of_byte_like auto &&latin1_output) noexcept { + return convert_utf16_to_latin1_with_errors( + utf16_input.data(), utf16_input.size(), + reinterpret_cast(latin1_output.data())); +} +#endif /** * Convert possibly broken UTF-16LE string into Latin1 string. @@ -1794,6 +2203,16 @@ simdutf_warn_unused result convert_utf16_to_latin1_with_errors( */ simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( const char16_t *input, size_t length, char *latin1_buffer) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused result +convert_utf16le_to_latin1_with_errors( + std::span utf16_input, + detail::output_span_of_byte_like auto &&latin1_output) noexcept { + return convert_utf16le_to_latin1_with_errors( + utf16_input.data(), utf16_input.size(), + reinterpret_cast(latin1_output.data())); +} +#endif /** * Convert possibly broken UTF-16BE string into Latin1 string. @@ -1814,6 +2233,16 @@ simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( */ simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( const char16_t *input, size_t length, char *latin1_buffer) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused result +convert_utf16be_to_latin1_with_errors( + std::span utf16_input, + detail::output_span_of_byte_like auto &&latin1_output) noexcept { + return convert_utf16be_to_latin1_with_errors( + utf16_input.data(), utf16_input.size(), + reinterpret_cast(latin1_output.data())); +} +#endif /** * Using native endianness, convert possibly broken UTF-16 string into UTF-8 @@ -1834,6 +2263,16 @@ simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( */ simdutf_warn_unused result convert_utf16_to_utf8_with_errors( const char16_t *input, size_t length, char *utf8_buffer) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused result +convert_utf16_to_utf8_with_errors( + std::span utf16_input, + detail::output_span_of_byte_like auto &&utf8_output) noexcept { + return convert_utf16_to_utf8_with_errors( + utf16_input.data(), utf16_input.size(), + reinterpret_cast(utf8_output.data())); +} +#endif /** * Convert possibly broken UTF-16LE string into UTF-8 string and stop on error. @@ -1853,6 +2292,16 @@ simdutf_warn_unused result convert_utf16_to_utf8_with_errors( */ simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( const char16_t *input, size_t length, char *utf8_buffer) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused result +convert_utf16le_to_utf8_with_errors( + std::span utf16_input, + detail::output_span_of_byte_like auto &&utf8_output) noexcept { + return convert_utf16le_to_utf8_with_errors( + utf16_input.data(), utf16_input.size(), + reinterpret_cast(utf8_output.data())); +} +#endif /** * Convert possibly broken UTF-16BE string into UTF-8 string and stop on error. @@ -1872,6 +2321,16 @@ simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( */ simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( const char16_t *input, size_t length, char *utf8_buffer) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused result +convert_utf16be_to_utf8_with_errors( + std::span utf16_input, + detail::output_span_of_byte_like auto &&utf8_output) noexcept { + return convert_utf16be_to_utf8_with_errors( + utf16_input.data(), utf16_input.size(), + reinterpret_cast(utf8_output.data())); +} +#endif /** * Using native endianness, convert valid UTF-16 string into UTF-8 string. @@ -1888,6 +2347,15 @@ simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( */ simdutf_warn_unused size_t convert_valid_utf16_to_utf8( const char16_t *input, size_t length, char *utf8_buffer) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf16_to_utf8( + std::span valid_utf16_input, + detail::output_span_of_byte_like auto &&utf8_output) noexcept { + return convert_valid_utf16_to_utf8( + valid_utf16_input.data(), valid_utf16_input.size(), + reinterpret_cast(utf8_output.data())); +} +#endif /** * Using native endianness, convert UTF-16 string into Latin1 string. @@ -1910,6 +2378,15 @@ simdutf_warn_unused size_t convert_valid_utf16_to_utf8( */ simdutf_warn_unused size_t convert_valid_utf16_to_latin1( const char16_t *input, size_t length, char *latin1_buffer) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf16_to_latin1( + std::span valid_utf16_input, + detail::output_span_of_byte_like auto &&latin1_output) noexcept { + return convert_valid_utf16_to_latin1( + valid_utf16_input.data(), valid_utf16_input.size(), + reinterpret_cast(latin1_output.data())); +} +#endif /** * Convert valid UTF-16LE string into Latin1 string. @@ -1932,6 +2409,16 @@ simdutf_warn_unused size_t convert_valid_utf16_to_latin1( */ simdutf_warn_unused size_t convert_valid_utf16le_to_latin1( const char16_t *input, size_t length, char *latin1_buffer) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t +convert_valid_utf16le_to_latin1( + std::span valid_utf16_input, + detail::output_span_of_byte_like auto &&latin1_output) noexcept { + return convert_valid_utf16le_to_latin1( + valid_utf16_input.data(), valid_utf16_input.size(), + reinterpret_cast(latin1_output.data())); +} +#endif /** * Convert valid UTF-16BE string into Latin1 string. @@ -1954,6 +2441,16 @@ simdutf_warn_unused size_t convert_valid_utf16le_to_latin1( */ simdutf_warn_unused size_t convert_valid_utf16be_to_latin1( const char16_t *input, size_t length, char *latin1_buffer) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t +convert_valid_utf16be_to_latin1( + std::span valid_utf16_input, + detail::output_span_of_byte_like auto &&latin1_output) noexcept { + return convert_valid_utf16be_to_latin1( + valid_utf16_input.data(), valid_utf16_input.size(), + reinterpret_cast(latin1_output.data())); +} +#endif /** * Convert valid UTF-16LE string into UTF-8 string. @@ -1971,6 +2468,15 @@ simdutf_warn_unused size_t convert_valid_utf16be_to_latin1( */ simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( const char16_t *input, size_t length, char *utf8_buffer) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( + std::span valid_utf16_input, + detail::output_span_of_byte_like auto &&utf8_output) noexcept { + return convert_valid_utf16le_to_utf8( + valid_utf16_input.data(), valid_utf16_input.size(), + reinterpret_cast(utf8_output.data())); +} +#endif /** * Convert valid UTF-16BE string into UTF-8 string. @@ -1987,6 +2493,15 @@ simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( */ simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( const char16_t *input, size_t length, char *utf8_buffer) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( + std::span valid_utf16_input, + detail::output_span_of_byte_like auto &&utf8_output) noexcept { + return convert_valid_utf16be_to_utf8( + valid_utf16_input.data(), valid_utf16_input.size(), + reinterpret_cast(utf8_output.data())); +} +#endif /** * Using native endianness, convert possibly broken UTF-16 string into UTF-32 @@ -2005,6 +2520,14 @@ simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( */ simdutf_warn_unused size_t convert_utf16_to_utf32( const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t +convert_utf16_to_utf32(std::span utf16_input, + std::span utf32_output) noexcept { + return convert_utf16_to_utf32(utf16_input.data(), utf16_input.size(), + utf32_output.data()); +} +#endif /** * Convert possibly broken UTF-16LE string into UTF-32 string. @@ -2022,6 +2545,14 @@ simdutf_warn_unused size_t convert_utf16_to_utf32( */ simdutf_warn_unused size_t convert_utf16le_to_utf32( const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t +convert_utf16le_to_utf32(std::span utf16_input, + std::span utf32_output) noexcept { + return convert_utf16le_to_utf32(utf16_input.data(), utf16_input.size(), + utf32_output.data()); +} +#endif /** * Convert possibly broken UTF-16BE string into UTF-32 string. @@ -2039,6 +2570,14 @@ simdutf_warn_unused size_t convert_utf16le_to_utf32( */ simdutf_warn_unused size_t convert_utf16be_to_utf32( const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t +convert_utf16be_to_utf32(std::span utf16_input, + std::span utf32_output) noexcept { + return convert_utf16be_to_utf32(utf16_input.data(), utf16_input.size(), + utf32_output.data()); +} +#endif /** * Using native endianness, convert possibly broken UTF-16 string into @@ -2059,6 +2598,14 @@ simdutf_warn_unused size_t convert_utf16be_to_utf32( */ simdutf_warn_unused result convert_utf16_to_utf32_with_errors( const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused result +convert_utf16_to_utf32_with_errors(std::span utf16_input, + std::span utf32_output) noexcept { + return convert_utf16_to_utf32_with_errors( + utf16_input.data(), utf16_input.size(), utf32_output.data()); +} +#endif /** * Convert possibly broken UTF-16LE string into UTF-32 string and stop on error. @@ -2078,6 +2625,15 @@ simdutf_warn_unused result convert_utf16_to_utf32_with_errors( */ simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused result +convert_utf16le_to_utf32_with_errors( + std::span utf16_input, + std::span utf32_output) noexcept { + return convert_utf16le_to_utf32_with_errors( + utf16_input.data(), utf16_input.size(), utf32_output.data()); +} +#endif /** * Convert possibly broken UTF-16BE string into UTF-32 string and stop on error. @@ -2097,6 +2653,15 @@ simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( */ simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused result +convert_utf16be_to_utf32_with_errors( + std::span utf16_input, + std::span utf32_output) noexcept { + return convert_utf16be_to_utf32_with_errors( + utf16_input.data(), utf16_input.size(), utf32_output.data()); +} +#endif /** * Using native endianness, convert valid UTF-16 string into UTF-32 string. @@ -2114,6 +2679,14 @@ simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( */ simdutf_warn_unused size_t convert_valid_utf16_to_utf32( const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t +convert_valid_utf16_to_utf32(std::span valid_utf16_input, + std::span utf32_output) noexcept { + return convert_valid_utf16_to_utf32( + valid_utf16_input.data(), valid_utf16_input.size(), utf32_output.data()); +} +#endif /** * Convert valid UTF-16LE string into UTF-32 string. @@ -2130,6 +2703,14 @@ simdutf_warn_unused size_t convert_valid_utf16_to_utf32( */ simdutf_warn_unused size_t convert_valid_utf16le_to_utf32( const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t +convert_valid_utf16le_to_utf32(std::span valid_utf16_input, + std::span utf32_output) noexcept { + return convert_valid_utf16le_to_utf32( + valid_utf16_input.data(), valid_utf16_input.size(), utf32_output.data()); +} +#endif /** * Convert valid UTF-16BE string into UTF-32 string. @@ -2146,8 +2727,16 @@ simdutf_warn_unused size_t convert_valid_utf16le_to_utf32( */ simdutf_warn_unused size_t convert_valid_utf16be_to_utf32( const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t +convert_valid_utf16be_to_utf32(std::span valid_utf16_input, + std::span utf32_output) noexcept { + return convert_valid_utf16be_to_utf32( + valid_utf16_input.data(), valid_utf16_input.size(), utf32_output.data()); +} +#endif -/* +/** * Compute the number of bytes that this UTF-16LE/BE string would require in * Latin1 format. * @@ -2174,6 +2763,13 @@ simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) noexcept; */ simdutf_warn_unused size_t utf8_length_from_utf16(const char16_t *input, size_t length) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t +utf8_length_from_utf16(std::span valid_utf16_input) noexcept { + return utf8_length_from_utf16(valid_utf16_input.data(), + valid_utf16_input.size()); +} +#endif /** * Compute the number of bytes that this UTF-16LE string would require in UTF-8 @@ -2188,6 +2784,13 @@ simdutf_warn_unused size_t utf8_length_from_utf16(const char16_t *input, */ simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t *input, size_t length) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t +utf8_length_from_utf16le(std::span valid_utf16_input) noexcept { + return utf8_length_from_utf16le(valid_utf16_input.data(), + valid_utf16_input.size()); +} +#endif /** * Compute the number of bytes that this UTF-16BE string would require in UTF-8 @@ -2202,6 +2805,13 @@ simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t *input, */ simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t *input, size_t length) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t +utf8_length_from_utf16be(std::span valid_utf16_input) noexcept { + return utf8_length_from_utf16be(valid_utf16_input.data(), + valid_utf16_input.size()); +} +#endif /** * Convert possibly broken UTF-32 string into UTF-8 string. @@ -2219,6 +2829,14 @@ simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t *input, simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t *input, size_t length, char *utf8_buffer) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t convert_utf32_to_utf8( + std::span utf32_input, + detail::output_span_of_byte_like auto &&utf8_output) noexcept { + return convert_utf32_to_utf8(utf32_input.data(), utf32_input.size(), + reinterpret_cast(utf8_output.data())); +} +#endif /** * Convert possibly broken UTF-32 string into UTF-8 string and stop on error. @@ -2238,6 +2856,16 @@ simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t *input, */ simdutf_warn_unused result convert_utf32_to_utf8_with_errors( const char32_t *input, size_t length, char *utf8_buffer) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused result +convert_utf32_to_utf8_with_errors( + std::span utf32_input, + detail::output_span_of_byte_like auto &&utf8_output) noexcept { + return convert_utf32_to_utf8_with_errors( + utf32_input.data(), utf32_input.size(), + reinterpret_cast(utf8_output.data())); +} +#endif /** * Convert valid UTF-32 string into UTF-8 string. @@ -2254,6 +2882,15 @@ simdutf_warn_unused result convert_utf32_to_utf8_with_errors( */ simdutf_warn_unused size_t convert_valid_utf32_to_utf8( const char32_t *input, size_t length, char *utf8_buffer) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf32_to_utf8( + std::span valid_utf32_input, + detail::output_span_of_byte_like auto &&utf8_output) noexcept { + return convert_valid_utf32_to_utf8( + valid_utf32_input.data(), valid_utf32_input.size(), + reinterpret_cast(utf8_output.data())); +} +#endif /** * Using native endianness, convert possibly broken UTF-32 string into a UTF-16 @@ -2271,6 +2908,14 @@ simdutf_warn_unused size_t convert_valid_utf32_to_utf8( */ simdutf_warn_unused size_t convert_utf32_to_utf16( const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t +convert_utf32_to_utf16(std::span utf32_input, + std::span utf16_output) noexcept { + return convert_utf32_to_utf16(utf32_input.data(), utf32_input.size(), + utf16_output.data()); +} +#endif /** * Convert possibly broken UTF-32 string into UTF-16LE string. @@ -2287,6 +2932,14 @@ simdutf_warn_unused size_t convert_utf32_to_utf16( */ simdutf_warn_unused size_t convert_utf32_to_utf16le( const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t +convert_utf32_to_utf16le(std::span utf32_input, + std::span utf16_output) noexcept { + return convert_utf32_to_utf16le(utf32_input.data(), utf32_input.size(), + utf16_output.data()); +} +#endif /** * Convert possibly broken UTF-32 string into Latin1 string. @@ -2304,6 +2957,15 @@ simdutf_warn_unused size_t convert_utf32_to_utf16le( */ simdutf_warn_unused size_t convert_utf32_to_latin1( const char32_t *input, size_t length, char *latin1_buffer) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t convert_utf32_to_latin1( + std::span utf32_input, + detail::output_span_of_byte_like auto &&latin1_output) noexcept { + return convert_utf32_to_latin1( + utf32_input.data(), utf32_input.size(), + reinterpret_cast(latin1_output.data())); +} +#endif /** * Convert possibly broken UTF-32 string into Latin1 string and stop on error. @@ -2324,6 +2986,16 @@ simdutf_warn_unused size_t convert_utf32_to_latin1( */ simdutf_warn_unused result convert_utf32_to_latin1_with_errors( const char32_t *input, size_t length, char *latin1_buffer) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused result +convert_utf32_to_latin1_with_errors( + std::span utf32_input, + detail::output_span_of_byte_like auto &&latin1_output) noexcept { + return convert_utf32_to_latin1_with_errors( + utf32_input.data(), utf32_input.size(), + reinterpret_cast(latin1_output.data())); +} +#endif /** * Convert valid UTF-32 string into Latin1 string. @@ -2347,6 +3019,15 @@ simdutf_warn_unused result convert_utf32_to_latin1_with_errors( */ simdutf_warn_unused size_t convert_valid_utf32_to_latin1( const char32_t *input, size_t length, char *latin1_buffer) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf32_to_latin1( + std::span valid_utf32_input, + detail::output_span_of_byte_like auto &&latin1_output) noexcept { + return convert_valid_utf32_to_latin1( + valid_utf32_input.data(), valid_utf32_input.size(), + reinterpret_cast(latin1_output.data())); +} +#endif /** * Convert possibly broken UTF-32 string into UTF-16BE string. @@ -2363,6 +3044,14 @@ simdutf_warn_unused size_t convert_valid_utf32_to_latin1( */ simdutf_warn_unused size_t convert_utf32_to_utf16be( const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t +convert_utf32_to_utf16be(std::span utf32_input, + std::span utf16_output) noexcept { + return convert_utf32_to_utf16be(utf32_input.data(), utf32_input.size(), + utf16_output.data()); +} +#endif /** * Using native endianness, convert possibly broken UTF-32 string into UTF-16 @@ -2383,6 +3072,14 @@ simdutf_warn_unused size_t convert_utf32_to_utf16be( */ simdutf_warn_unused result convert_utf32_to_utf16_with_errors( const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused result +convert_utf32_to_utf16_with_errors(std::span utf32_input, + std::span utf16_output) noexcept { + return convert_utf32_to_utf16_with_errors( + utf32_input.data(), utf32_input.size(), utf16_output.data()); +} +#endif /** * Convert possibly broken UTF-32 string into UTF-16LE string and stop on error. @@ -2402,6 +3099,15 @@ simdutf_warn_unused result convert_utf32_to_utf16_with_errors( */ simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused result +convert_utf32_to_utf16le_with_errors( + std::span utf32_input, + std::span utf16_output) noexcept { + return convert_utf32_to_utf16le_with_errors( + utf32_input.data(), utf32_input.size(), utf16_output.data()); +} +#endif /** * Convert possibly broken UTF-32 string into UTF-16BE string and stop on error. @@ -2421,6 +3127,15 @@ simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( */ simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused result +convert_utf32_to_utf16be_with_errors( + std::span utf32_input, + std::span utf16_output) noexcept { + return convert_utf32_to_utf16be_with_errors( + utf32_input.data(), utf32_input.size(), utf16_output.data()); +} +#endif /** * Using native endianness, convert valid UTF-32 string into a UTF-16 string. @@ -2437,6 +3152,14 @@ simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( */ simdutf_warn_unused size_t convert_valid_utf32_to_utf16( const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t +convert_valid_utf32_to_utf16(std::span valid_utf32_input, + std::span utf16_output) noexcept { + return convert_valid_utf32_to_utf16( + valid_utf32_input.data(), valid_utf32_input.size(), utf16_output.data()); +} +#endif /** * Convert valid UTF-32 string into UTF-16LE string. @@ -2453,6 +3176,14 @@ simdutf_warn_unused size_t convert_valid_utf32_to_utf16( */ simdutf_warn_unused size_t convert_valid_utf32_to_utf16le( const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t +convert_valid_utf32_to_utf16le(std::span valid_utf32_input, + std::span utf16_output) noexcept { + return convert_valid_utf32_to_utf16le( + valid_utf32_input.data(), valid_utf32_input.size(), utf16_output.data()); +} +#endif /** * Convert valid UTF-32 string into UTF-16BE string. @@ -2469,6 +3200,14 @@ simdutf_warn_unused size_t convert_valid_utf32_to_utf16le( */ simdutf_warn_unused size_t convert_valid_utf32_to_utf16be( const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t +convert_valid_utf32_to_utf16be(std::span valid_utf32_input, + std::span utf16_output) noexcept { + return convert_valid_utf32_to_utf16be( + valid_utf32_input.data(), valid_utf32_input.size(), utf16_output.data()); +} +#endif /** * Change the endianness of the input. Can be used to go from UTF-16LE to @@ -2485,6 +3224,14 @@ simdutf_warn_unused size_t convert_valid_utf32_to_utf16be( */ void change_endianness_utf16(const char16_t *input, size_t length, char16_t *output) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline void +change_endianness_utf16(std::span utf16_input, + std::span utf16_output) noexcept { + return change_endianness_utf16(utf16_input.data(), utf16_input.size(), + utf16_output.data()); +} +#endif /** * Compute the number of bytes that this UTF-32 string would require in UTF-8 @@ -2499,6 +3246,13 @@ void change_endianness_utf16(const char16_t *input, size_t length, */ simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t *input, size_t length) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t +utf8_length_from_utf32(std::span valid_utf32_input) noexcept { + return utf8_length_from_utf32(valid_utf32_input.data(), + valid_utf32_input.size()); +} +#endif /** * Compute the number of two-byte code units that this UTF-32 string would @@ -2513,6 +3267,13 @@ simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t *input, */ simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t *input, size_t length) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t +utf16_length_from_utf32(std::span valid_utf32_input) noexcept { + return utf16_length_from_utf32(valid_utf32_input.data(), + valid_utf32_input.size()); +} +#endif /** * Using native endianness; Compute the number of bytes that this UTF-16 @@ -2531,6 +3292,13 @@ simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t *input, */ simdutf_warn_unused size_t utf32_length_from_utf16(const char16_t *input, size_t length) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t +utf32_length_from_utf16(std::span valid_utf16_input) noexcept { + return utf32_length_from_utf16(valid_utf16_input.data(), + valid_utf16_input.size()); +} +#endif /** * Compute the number of bytes that this UTF-16LE string would require in UTF-32 @@ -2549,6 +3317,13 @@ simdutf_warn_unused size_t utf32_length_from_utf16(const char16_t *input, */ simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t *input, size_t length) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t utf32_length_from_utf16le( + std::span valid_utf16_input) noexcept { + return utf32_length_from_utf16le(valid_utf16_input.data(), + valid_utf16_input.size()); +} +#endif /** * Compute the number of bytes that this UTF-16BE string would require in UTF-32 @@ -2567,6 +3342,13 @@ simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t *input, */ simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t *input, size_t length) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t utf32_length_from_utf16be( + std::span valid_utf16_input) noexcept { + return utf32_length_from_utf16be(valid_utf16_input.data(), + valid_utf16_input.size()); +} +#endif /** * Count the number of code points (characters) in the string assuming that @@ -2584,6 +3366,12 @@ simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t *input, */ simdutf_warn_unused size_t count_utf16(const char16_t *input, size_t length) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t +count_utf16(std::span valid_utf16_input) noexcept { + return count_utf16(valid_utf16_input.data(), valid_utf16_input.size()); +} +#endif /** * Count the number of code points (characters) in the string assuming that @@ -2601,6 +3389,12 @@ simdutf_warn_unused size_t count_utf16(const char16_t *input, */ simdutf_warn_unused size_t count_utf16le(const char16_t *input, size_t length) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t +count_utf16le(std::span valid_utf16_input) noexcept { + return count_utf16le(valid_utf16_input.data(), valid_utf16_input.size()); +} +#endif /** * Count the number of code points (characters) in the string assuming that @@ -2618,6 +3412,12 @@ simdutf_warn_unused size_t count_utf16le(const char16_t *input, */ simdutf_warn_unused size_t count_utf16be(const char16_t *input, size_t length) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t +count_utf16be(std::span valid_utf16_input) noexcept { + return count_utf16be(valid_utf16_input.data(), valid_utf16_input.size()); +} +#endif /** * Count the number of code points (characters) in the string assuming that @@ -2633,6 +3433,13 @@ simdutf_warn_unused size_t count_utf16be(const char16_t *input, */ simdutf_warn_unused size_t count_utf8(const char *input, size_t length) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t count_utf8( + const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept { + return count_utf8(reinterpret_cast(valid_utf8_input.data()), + valid_utf8_input.size()); +} +#endif /** * Given a valid UTF-8 string having a possibly truncated last character, @@ -2649,6 +3456,14 @@ simdutf_warn_unused size_t count_utf8(const char *input, * @return the length of the string in bytes, possibly shorter by 1 to 3 bytes */ simdutf_warn_unused size_t trim_partial_utf8(const char *input, size_t length); +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t trim_partial_utf8( + const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept { + return trim_partial_utf8( + reinterpret_cast(valid_utf8_input.data()), + valid_utf8_input.size()); +} +#endif /** * Given a valid UTF-16BE string having a possibly truncated last character, @@ -2666,6 +3481,13 @@ simdutf_warn_unused size_t trim_partial_utf8(const char *input, size_t length); */ simdutf_warn_unused size_t trim_partial_utf16be(const char16_t *input, size_t length); +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t +trim_partial_utf16be(std::span valid_utf16_input) noexcept { + return trim_partial_utf16be(valid_utf16_input.data(), + valid_utf16_input.size()); +} +#endif /** * Given a valid UTF-16LE string having a possibly truncated last character, @@ -2683,6 +3505,13 @@ simdutf_warn_unused size_t trim_partial_utf16be(const char16_t *input, */ simdutf_warn_unused size_t trim_partial_utf16le(const char16_t *input, size_t length); +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t +trim_partial_utf16le(std::span valid_utf16_input) noexcept { + return trim_partial_utf16le(valid_utf16_input.data(), + valid_utf16_input.size()); +} +#endif /** * Given a valid UTF-16 string having a possibly truncated last character, @@ -2700,6 +3529,12 @@ simdutf_warn_unused size_t trim_partial_utf16le(const char16_t *input, */ simdutf_warn_unused size_t trim_partial_utf16(const char16_t *input, size_t length); +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t +trim_partial_utf16(std::span valid_utf16_input) noexcept { + return trim_partial_utf16(valid_utf16_input.data(), valid_utf16_input.size()); +} +#endif // base64_options are used to specify the base64 encoding options. // ASCII spaces are ' ', '\t', '\n', '\r', '\f' @@ -2742,6 +3577,14 @@ enum last_chunk_handling_options : uint64_t { */ simdutf_warn_unused size_t maximal_binary_length_from_base64(const char *input, size_t length) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t +maximal_binary_length_from_base64( + const detail::input_span_of_byte_like auto &input) noexcept { + return maximal_binary_length_from_base64( + reinterpret_cast(input.data()), input.size()); +} +#endif /** * Provide the maximal binary length in bytes given the base64 input. @@ -2755,6 +3598,12 @@ maximal_binary_length_from_base64(const char *input, size_t length) noexcept; */ simdutf_warn_unused size_t maximal_binary_length_from_base64( const char16_t *input, size_t length) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t +maximal_binary_length_from_base64(std::span input) noexcept { + return maximal_binary_length_from_base64(input.data(), input.size()); +} +#endif /** * Convert a base64 input to a binary output. @@ -2814,6 +3663,18 @@ simdutf_warn_unused result base64_to_binary( const char *input, size_t length, char *output, base64_options options = base64_default, last_chunk_handling_options last_chunk_options = loose) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused result base64_to_binary( + const detail::input_span_of_byte_like auto &input, + detail::output_span_of_byte_like auto &&binary_output, + base64_options options = base64_default, + last_chunk_handling_options last_chunk_options = loose) noexcept { + return base64_to_binary(reinterpret_cast(input.data()), + input.size(), + reinterpret_cast(binary_output.data()), + options, last_chunk_options); +} +#endif /** * Provide the base64 length in bytes given the length of a binary input. @@ -2847,6 +3708,16 @@ simdutf_warn_unused size_t base64_length_from_binary( */ size_t binary_to_base64(const char *input, size_t length, char *output, base64_options options = base64_default) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused size_t +binary_to_base64(const detail::input_span_of_byte_like auto &input, + detail::output_span_of_byte_like auto &&binary_output, + base64_options options = base64_default) noexcept { + return binary_to_base64( + reinterpret_cast(input.data()), input.size(), + reinterpret_cast(binary_output.data()), options); +} +#endif /** * Convert a base64 input to a binary output. @@ -2909,6 +3780,17 @@ base64_to_binary(const char16_t *input, size_t length, char *output, base64_options options = base64_default, last_chunk_handling_options last_chunk_options = last_chunk_handling_options::loose) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused result base64_to_binary( + std::span input, + detail::output_span_of_byte_like auto &&binary_output, + base64_options options = base64_default, + last_chunk_handling_options last_chunk_options = loose) noexcept { + return base64_to_binary(input.data(), input.size(), + reinterpret_cast(binary_output.data()), + options, last_chunk_options); +} +#endif /** * Convert a base64 input to a binary output. @@ -2976,11 +3858,43 @@ base64_to_binary_safe(const char *input, size_t length, char *output, size_t &outlen, base64_options options = base64_default, last_chunk_handling_options last_chunk_options = last_chunk_handling_options::loose) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused result base64_to_binary_safe( + const detail::input_span_of_byte_like auto &input, + detail::output_span_of_byte_like auto &&binary_output, + base64_options options = base64_default, + last_chunk_handling_options last_chunk_options = loose) noexcept { + // we can't write the outlen to the provided output span, the user will have + // to pick it up from the returned value instead (assuming success). we still + // get the benefit of providing info of how long the output buffer is. + size_t outlen = binary_output.size(); + return base64_to_binary_safe(reinterpret_cast(input.data()), + input.size(), + reinterpret_cast(binary_output.data()), + outlen, options, last_chunk_options); +} +#endif + simdutf_warn_unused result base64_to_binary_safe(const char16_t *input, size_t length, char *output, size_t &outlen, base64_options options = base64_default, last_chunk_handling_options last_chunk_options = last_chunk_handling_options::loose) noexcept; +#if SIMDUTF_SPAN +simdutf_really_inline simdutf_warn_unused result base64_to_binary_safe( + std::span input, + detail::output_span_of_byte_like auto &&binary_output, + base64_options options = base64_default, + last_chunk_handling_options last_chunk_options = loose) noexcept { + // we can't write the outlen to the provided output span, the user will have + // to pick it up from the returned value instead (assuming success). we still + // get the benefit of providing info of how long the output buffer is. + size_t outlen = binary_output.size(); + return base64_to_binary_safe(input.data(), input.size(), + reinterpret_cast(binary_output.data()), + outlen, options, last_chunk_options); +} +#endif /** * An implementation of simdutf for a particular CPU architecture. @@ -4243,7 +5157,7 @@ class implementation { simdutf_warn_unused virtual size_t latin1_length_from_utf8(const char *input, size_t length) const noexcept = 0; - /* + /** * Compute the number of bytes that this UTF-16LE/BE string would require in * Latin1 format. * @@ -4289,7 +5203,7 @@ class implementation { simdutf_warn_unused virtual size_t utf32_length_from_latin1(size_t length) const noexcept = 0; - /* + /** * Compute the number of bytes that this UTF-16LE string would require in * UTF-32 format. * @@ -4310,7 +5224,7 @@ class implementation { utf32_length_from_utf16le(const char16_t *input, size_t length) const noexcept = 0; - /* + /** * Compute the number of bytes that this UTF-16BE string would require in * UTF-32 format. *