From 78ba48a60dbfe4999275b747dba2b435e09b6c62 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Sun, 19 Jan 2025 05:18:59 -0700 Subject: [PATCH] Fixes to utf8-to_bytes derivative functions This commit turns them into inline functions instead of macros and changes the type of a parameter to void*, which is a more accurate type for it. --- embed.fnc | 10 +++++----- embed.h | 6 +++--- hv.c | 16 ++++++++-------- inline.h | 36 +++++++++++++++++++++++++++++++++++- pp.c | 10 ++++++---- proto.h | 26 ++++++++++++++++---------- utf8.c | 4 ++-- utf8.h | 13 ------------- 8 files changed, 75 insertions(+), 46 deletions(-) diff --git a/embed.fnc b/embed.fnc index 05f0e1879c66..cc2b9ac7aaaf 100644 --- a/embed.fnc +++ b/embed.fnc @@ -3711,16 +3711,16 @@ Adpx |U8 * |utf8_to_bytes |NN U8 *s \ |NN STRLEN *lenp Cp |bool |utf8_to_bytes_ |NN U8 **s_ptr \ |NN STRLEN *lenp \ - |NN U8 **free_me \ + |NN void **free_me \ |Perl_utf8_to_bytes_arg result_as -Admp |bool |utf8_to_bytes_new_pv \ +Adip |bool |utf8_to_bytes_new_pv \ |NN U8 const **s_ptr \ |NN STRLEN *lenp \ - |NN U8 *free_me -Admp |bool |utf8_to_bytes_overwrite \ + |NN void **free_me +Adip |bool |utf8_to_bytes_overwrite \ |NN U8 **s_ptr \ |NN STRLEN *lenp -Admp |bool |utf8_to_bytes_temp_pv \ +Adip |bool |utf8_to_bytes_temp_pv \ |NN U8 const **s_ptr \ |NN STRLEN *lenp EMXp |U8 * |utf16_to_utf8 |NN U8 *p \ diff --git a/embed.h b/embed.h index c34e5fc27216..883b2fb72bdb 100644 --- a/embed.h +++ b/embed.h @@ -863,9 +863,9 @@ # define utf8_length(a,b) Perl_utf8_length(aTHX_ a,b) # define utf8_to_bytes(a,b) Perl_utf8_to_bytes(aTHX_ a,b) # define utf8_to_bytes_(a,b,c,d) Perl_utf8_to_bytes_(aTHX_ a,b,c,d) -# define utf8_to_bytes_new_pv(a,b,c) Perl_utf8_to_bytes_new_pv(aTHX,a,b,c) -# define utf8_to_bytes_overwrite(a,b) Perl_utf8_to_bytes_overwrite(aTHX,a,b) -# define utf8_to_bytes_temp_pv(a,b) Perl_utf8_to_bytes_temp_pv(aTHX,a,b) +# define utf8_to_bytes_new_pv(a,b,c) Perl_utf8_to_bytes_new_pv(aTHX_ a,b,c) +# define utf8_to_bytes_overwrite(a,b) Perl_utf8_to_bytes_overwrite(aTHX_ a,b) +# define utf8_to_bytes_temp_pv(a,b) Perl_utf8_to_bytes_temp_pv(aTHX_ a,b) # define utf8_to_uv Perl_utf8_to_uv # define utf8_to_uv_errors Perl_utf8_to_uv_errors # define utf8_to_uv_flags Perl_utf8_to_uv_flags diff --git a/hv.c b/hv.c index 59cc38c26ac9..5d2816d72737 100644 --- a/hv.c +++ b/hv.c @@ -1338,9 +1338,9 @@ S_hv_delete_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, if (is_utf8 && !(k_flags & HVhek_KEYCANONICAL)) { const char * const keysave = key; - U8 * free_me = NULL; + void * free_me = NULL; - if (! utf8_to_bytes_new_pv(&key, &klen, &free_me)) { + if (! utf8_to_bytes_new_pv((const U8 **) &key, &klen, &free_me)) { k_flags |= HVhek_UTF8; } else { @@ -3270,8 +3270,8 @@ S_unshare_hek_or_pvn(pTHX_ const HEK *hek, const char *str, I32 len, U32 hash) } else if (len < 0) { STRLEN tmplen = -len; /* See the note in hv_fetch(). --jhi */ - U8 * free_str = NULL; - if (! utf8_to_bytes_new_pv(&str, &tmplen, &free_str)) { + void * free_str = NULL; + if (! utf8_to_bytes_new_pv((const U8 **) &str, &tmplen, &free_str)) { k_flags = HVhek_UTF8; } else { @@ -3687,7 +3687,7 @@ Perl_refcounted_he_fetch_pvn(pTHX_ const struct refcounted_he *chain, PERL_ARGS_ASSERT_REFCOUNTED_HE_FETCH_PVN; U8 utf8_flag; - U8 * free_me = NULL; + void * free_me = NULL; if (flags & ~(REFCOUNTED_HE_KEY_UTF8|REFCOUNTED_HE_EXISTS)) Perl_croak(aTHX_ "panic: refcounted_he_fetch_pvn bad flags %" UVxf, @@ -3696,7 +3696,7 @@ Perl_refcounted_he_fetch_pvn(pTHX_ const struct refcounted_he *chain, goto ret; /* For searching purposes, canonicalise to Latin-1 where possible. */ if ( flags & REFCOUNTED_HE_KEY_UTF8 - && utf8_to_bytes_new_pv(&keypv, &keylen, &free_me)) + && utf8_to_bytes_new_pv((const U8 **) &keypv, &keylen, &free_me)) { flags &= ~REFCOUNTED_HE_KEY_UTF8; } @@ -3821,7 +3821,7 @@ Perl_refcounted_he_new_pvn(pTHX_ struct refcounted_he *parent, char hekflags; STRLEN key_offset = 1; struct refcounted_he *he; - U8 * free_me = NULL; + void * free_me = NULL; if (!value || value == &PL_sv_placeholder) { value_type = HVrhek_delete; @@ -3847,7 +3847,7 @@ Perl_refcounted_he_new_pvn(pTHX_ struct refcounted_he *parent, /* Canonicalise to Latin-1 where possible. */ if ( (flags & REFCOUNTED_HE_KEY_UTF8) - && utf8_to_bytes_new_pv(&keypv, &keylen, &free_me)) + && utf8_to_bytes_new_pv((const U8 **) &keypv, &keylen, &free_me)) { flags &= ~REFCOUNTED_HE_KEY_UTF8; } diff --git a/inline.h b/inline.h index 490e09da4fca..bad3967673f4 100644 --- a/inline.h +++ b/inline.h @@ -1,4 +1,4 @@ -/* inline.h +/*> inline.h * * Copyright (C) 2012 by Larry Wall and others * @@ -1236,6 +1236,40 @@ Perl_bytes_to_utf8(pTHX_ const U8 *s, STRLEN *lenp) return bytes_to_utf8_free_me(s, lenp, NULL); } +PERL_STATIC_INLINE bool +Perl_utf8_to_bytes_new_pv(pTHX_ U8 const **s_ptr, STRLEN *lenp, void ** free_me) +{ + /* utf8_to_bytes_() is declared to take a non-const s_ptr because it may + * change it, but NOT when called with PL_utf8_to_bytes_new_memory, so it + * is ok to cast away const */ + return utf8_to_bytes_((U8 **) s_ptr, lenp, free_me, + PL_utf8_to_bytes_new_memory); +} + +PERL_STATIC_INLINE bool +Perl_utf8_to_bytes_temp_pv(pTHX_ U8 const **s_ptr, STRLEN *lenp) +{ + /* utf8_to_bytes_() requires a non-NULL pointer, but doesn't use it when + * called with PL_utf8_to_bytes_use_temporary */ + void* dummy = NULL; + + /* utf8_to_bytes_() is declared to take a non-const s_ptr because it may + * change it, but NOT when called with PL_utf8_to_bytes_use_temporary, so + * it is ok to cast away const */ + return utf8_to_bytes_((U8 **) s_ptr, lenp, &dummy, + PL_utf8_to_bytes_use_temporary); +} + +PERL_STATIC_INLINE bool +Perl_utf8_to_bytes_overwrite(pTHX_ U8 **s_ptr, STRLEN *lenp) +{ + /* utf8_to_bytes_() requires a non-NULL pointer, but doesn't use it when + * called with PL_utf8_to_bytes_overwrite */ + void* dummy = NULL; + + return utf8_to_bytes_(s_ptr, lenp, &dummy, PL_utf8_to_bytes_overwrite); +} + /* =for apidoc valid_utf8_to_uvchr Like C>, but should only be called when it is diff --git a/pp.c b/pp.c index 00d6684c93d9..2b853656d1e6 100644 --- a/pp.c +++ b/pp.c @@ -793,7 +793,7 @@ S_do_chomp(pTHX_ SV *retval, SV *sv, bool chomping) s = SvPV(sv, len); if (chomping) { if (s && len) { - U8 *temp_buffer = NULL; + void *temp_buffer = NULL; s += --len; if (RsPARA(PL_rs)) { if (*s != '\n') @@ -817,7 +817,7 @@ S_do_chomp(pTHX_ SV *retval, SV *sv, bool chomping) /* Assumption is that rs is shorter than the scalar. */ if (SvUTF8(PL_rs)) { /* RS is utf8, scalar is 8 bit. */ - if (! utf8_to_bytes_new_pv(&rsptr, &rslen, + if (! utf8_to_bytes_new_pv((const U8 **) &rsptr, &rslen, &temp_buffer)) { /* Cannot downgrade, therefore cannot possibly @@ -3911,8 +3911,10 @@ PP(pp_index) if (little_utf8) { /* Well, maybe instead we might be able to downgrade the small string? */ - U8 * free_little_p = NULL; - if (utf8_to_bytes_new_pv(&little_p, &llen, &free_little_p)) { + void * free_little_p = NULL; + if (utf8_to_bytes_new_pv((const U8 **) &little_p, &llen, + &free_little_p)) + { little_utf8 = false; /* Here 'little_p' is in byte form, and 'free_little_p' is diff --git a/proto.h b/proto.h index 40a98587be69..513965b4ef88 100644 --- a/proto.h +++ b/proto.h @@ -5350,19 +5350,10 @@ Perl_utf8_to_bytes(pTHX_ U8 *s, STRLEN *lenp); assert(s); assert(lenp) PERL_CALLCONV bool -Perl_utf8_to_bytes_(pTHX_ U8 **s_ptr, STRLEN *lenp, U8 **free_me, Perl_utf8_to_bytes_arg result_as); +Perl_utf8_to_bytes_(pTHX_ U8 **s_ptr, STRLEN *lenp, void **free_me, Perl_utf8_to_bytes_arg result_as); #define PERL_ARGS_ASSERT_UTF8_TO_BYTES_ \ assert(s_ptr); assert(lenp); assert(free_me) -/* PERL_CALLCONV bool -Perl_utf8_to_bytes_new_pv(pTHX_ U8 const **s_ptr, STRLEN *lenp, U8 *free_me); */ - -/* PERL_CALLCONV bool -Perl_utf8_to_bytes_overwrite(pTHX_ U8 **s_ptr, STRLEN *lenp); */ - -/* PERL_CALLCONV bool -Perl_utf8_to_bytes_temp_pv(pTHX_ U8 const **s_ptr, STRLEN *lenp); */ - PERL_CALLCONV U8 * Perl_utf8_to_utf16_base(pTHX_ U8 *s, U8 *d, Size_t bytelen, Size_t *newlen, const bool high, const bool low); #define PERL_ARGS_ASSERT_UTF8_TO_UTF16_BASE \ @@ -10039,6 +10030,21 @@ Perl_utf8_hop_overshoot(const U8 *s, SSize_t off, const U8 * const start, const # define PERL_ARGS_ASSERT_UTF8_HOP_OVERSHOOT \ assert(s); assert(start); assert(end) +PERL_STATIC_INLINE bool +Perl_utf8_to_bytes_new_pv(pTHX_ U8 const **s_ptr, STRLEN *lenp, void **free_me); +# define PERL_ARGS_ASSERT_UTF8_TO_BYTES_NEW_PV \ + assert(s_ptr); assert(lenp); assert(free_me) + +PERL_STATIC_INLINE bool +Perl_utf8_to_bytes_overwrite(pTHX_ U8 **s_ptr, STRLEN *lenp); +# define PERL_ARGS_ASSERT_UTF8_TO_BYTES_OVERWRITE \ + assert(s_ptr); assert(lenp) + +PERL_STATIC_INLINE bool +Perl_utf8_to_bytes_temp_pv(pTHX_ U8 const **s_ptr, STRLEN *lenp); +# define PERL_ARGS_ASSERT_UTF8_TO_BYTES_TEMP_PV \ + assert(s_ptr); assert(lenp) + PERL_STATIC_INLINE bool Perl_utf8_to_uv_msgs(const U8 * const s0, const U8 *e, UV *cp_p, Size_t *advance_p, U32 flags, U32 *errors, AV **msgs); # define PERL_ARGS_ASSERT_UTF8_TO_UV_MSGS \ diff --git a/utf8.c b/utf8.c index 3c5898e7f05e..1ff620361526 100644 --- a/utf8.c +++ b/utf8.c @@ -2940,7 +2940,7 @@ New code should use the first three functions listed above. */ bool -Perl_utf8_to_bytes_(pTHX_ U8 **s_ptr, STRLEN *lenp, U8 ** free_me, +Perl_utf8_to_bytes_(pTHX_ U8 **s_ptr, STRLEN *lenp, void ** free_me, Perl_utf8_to_bytes_arg result_as) { PERL_ARGS_ASSERT_UTF8_TO_BYTES_; @@ -3219,7 +3219,7 @@ Perl_bytes_from_utf8(pTHX_ const U8 *s, STRLEN *lenp, bool *is_utf8p) PERL_ARGS_ASSERT_BYTES_FROM_UTF8; if (*is_utf8p) { - U8 * new_memory = NULL; + void * new_memory = NULL; if (utf8_to_bytes_new_pv(&s, lenp, &new_memory)) { *is_utf8p = false; diff --git a/utf8.h b/utf8.h index 6ed7c3304e4d..34e055e06892 100644 --- a/utf8.h +++ b/utf8.h @@ -1336,19 +1336,6 @@ typedef enum { PL_utf8_to_bytes_use_temporary, } Perl_utf8_to_bytes_arg; -/* INT2PTR() is because this parameter should not be used in this case, but - * there is a NN assertion for it. It causes that to pass but to still - * segfault if wrongly gets used */ -#define Perl_utf8_to_bytes_overwrite(mTHX, s, l) \ - Perl_utf8_to_bytes_(aTHX_ s, l, INT2PTR(U8 **, 1), \ - PL_utf8_to_bytes_overwrite) -#define Perl_utf8_to_bytes_new_pv(mTHX, s, l, f) \ - Perl_utf8_to_bytes_(aTHX_ (U8 **) s, l, f, \ - PL_utf8_to_bytes_new_memory) -#define Perl_utf8_to_bytes_temp_pv(mTHX, s, l) \ - Perl_utf8_to_bytes_(aTHX_ (U8 **) s, l, INT2PTR(U8 **, 1), \ - PL_utf8_to_bytes_use_temporary) - /* Do not use; should be deprecated. Use isUTF8_CHAR() instead; this is * retained solely for backwards compatibility */ #define IS_UTF8_CHAR(p, n) (isUTF8_CHAR(p, (p) + (n)) == n)