Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes to utf8-to_bytes derivative functions #22932

Merged
merged 1 commit into from
Jan 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions embed.fnc
Original file line number Diff line number Diff line change
Expand Up @@ -3711,16 +3711,16 @@ Adpx |U8 * |utf8_to_bytes |NN U8 *s \
|NN STRLEN *lenp
Cp |bool |utf8_to_bytes_ |NN U8 **s_ptr \
|NN STRLEN *lenp \
|NN U8 **free_me \
|NN void **free_me \
|Perl_utf8_to_bytes_arg result_as
Admp |bool |utf8_to_bytes_new_pv \
Adip |bool |utf8_to_bytes_new_pv \
|NN U8 const **s_ptr \
|NN STRLEN *lenp \
|NN U8 *free_me
Admp |bool |utf8_to_bytes_overwrite \
|NN void **free_me
Adip |bool |utf8_to_bytes_overwrite \
|NN U8 **s_ptr \
|NN STRLEN *lenp
Admp |bool |utf8_to_bytes_temp_pv \
Adip |bool |utf8_to_bytes_temp_pv \
|NN U8 const **s_ptr \
|NN STRLEN *lenp
EMXp |U8 * |utf16_to_utf8 |NN U8 *p \
Expand Down
6 changes: 3 additions & 3 deletions embed.h
Original file line number Diff line number Diff line change
Expand Up @@ -863,9 +863,9 @@
# define utf8_length(a,b) Perl_utf8_length(aTHX_ a,b)
# define utf8_to_bytes(a,b) Perl_utf8_to_bytes(aTHX_ a,b)
# define utf8_to_bytes_(a,b,c,d) Perl_utf8_to_bytes_(aTHX_ a,b,c,d)
# define utf8_to_bytes_new_pv(a,b,c) Perl_utf8_to_bytes_new_pv(aTHX,a,b,c)
# define utf8_to_bytes_overwrite(a,b) Perl_utf8_to_bytes_overwrite(aTHX,a,b)
# define utf8_to_bytes_temp_pv(a,b) Perl_utf8_to_bytes_temp_pv(aTHX,a,b)
# define utf8_to_bytes_new_pv(a,b,c) Perl_utf8_to_bytes_new_pv(aTHX_ a,b,c)
# define utf8_to_bytes_overwrite(a,b) Perl_utf8_to_bytes_overwrite(aTHX_ a,b)
# define utf8_to_bytes_temp_pv(a,b) Perl_utf8_to_bytes_temp_pv(aTHX_ a,b)
# define utf8_to_uv Perl_utf8_to_uv
# define utf8_to_uv_errors Perl_utf8_to_uv_errors
# define utf8_to_uv_flags Perl_utf8_to_uv_flags
Expand Down
16 changes: 8 additions & 8 deletions hv.c
Original file line number Diff line number Diff line change
Expand Up @@ -1338,9 +1338,9 @@ S_hv_delete_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen,

if (is_utf8 && !(k_flags & HVhek_KEYCANONICAL)) {
const char * const keysave = key;
U8 * free_me = NULL;
void * free_me = NULL;

if (! utf8_to_bytes_new_pv(&key, &klen, &free_me)) {
if (! utf8_to_bytes_new_pv((const U8 **) &key, &klen, &free_me)) {
k_flags |= HVhek_UTF8;
}
else {
Expand Down Expand Up @@ -3270,8 +3270,8 @@ S_unshare_hek_or_pvn(pTHX_ const HEK *hek, const char *str, I32 len, U32 hash)
} else if (len < 0) {
STRLEN tmplen = -len;
/* See the note in hv_fetch(). --jhi */
U8 * free_str = NULL;
if (! utf8_to_bytes_new_pv(&str, &tmplen, &free_str)) {
void * free_str = NULL;
if (! utf8_to_bytes_new_pv((const U8 **) &str, &tmplen, &free_str)) {
k_flags = HVhek_UTF8;
}
else {
Expand Down Expand Up @@ -3687,7 +3687,7 @@ Perl_refcounted_he_fetch_pvn(pTHX_ const struct refcounted_he *chain,
PERL_ARGS_ASSERT_REFCOUNTED_HE_FETCH_PVN;

U8 utf8_flag;
U8 * free_me = NULL;
void * free_me = NULL;

if (flags & ~(REFCOUNTED_HE_KEY_UTF8|REFCOUNTED_HE_EXISTS))
Perl_croak(aTHX_ "panic: refcounted_he_fetch_pvn bad flags %" UVxf,
Expand All @@ -3696,7 +3696,7 @@ Perl_refcounted_he_fetch_pvn(pTHX_ const struct refcounted_he *chain,
goto ret;
/* For searching purposes, canonicalise to Latin-1 where possible. */
if ( flags & REFCOUNTED_HE_KEY_UTF8
&& utf8_to_bytes_new_pv(&keypv, &keylen, &free_me))
&& utf8_to_bytes_new_pv((const U8 **) &keypv, &keylen, &free_me))
{
flags &= ~REFCOUNTED_HE_KEY_UTF8;
}
Expand Down Expand Up @@ -3821,7 +3821,7 @@ Perl_refcounted_he_new_pvn(pTHX_ struct refcounted_he *parent,
char hekflags;
STRLEN key_offset = 1;
struct refcounted_he *he;
U8 * free_me = NULL;
void * free_me = NULL;

if (!value || value == &PL_sv_placeholder) {
value_type = HVrhek_delete;
Expand All @@ -3847,7 +3847,7 @@ Perl_refcounted_he_new_pvn(pTHX_ struct refcounted_he *parent,

/* Canonicalise to Latin-1 where possible. */
if ( (flags & REFCOUNTED_HE_KEY_UTF8)
&& utf8_to_bytes_new_pv(&keypv, &keylen, &free_me))
&& utf8_to_bytes_new_pv((const U8 **) &keypv, &keylen, &free_me))
{
flags &= ~REFCOUNTED_HE_KEY_UTF8;
}
Expand Down
36 changes: 35 additions & 1 deletion inline.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* inline.h
/*> inline.h
*
* Copyright (C) 2012 by Larry Wall and others
*
Expand Down Expand Up @@ -1236,6 +1236,40 @@ Perl_bytes_to_utf8(pTHX_ const U8 *s, STRLEN *lenp)
return bytes_to_utf8_free_me(s, lenp, NULL);
}

PERL_STATIC_INLINE bool
Perl_utf8_to_bytes_new_pv(pTHX_ U8 const **s_ptr, STRLEN *lenp, void ** free_me)
{
/* utf8_to_bytes_() is declared to take a non-const s_ptr because it may
* change it, but NOT when called with PL_utf8_to_bytes_new_memory, so it
* is ok to cast away const */
return utf8_to_bytes_((U8 **) s_ptr, lenp, free_me,
PL_utf8_to_bytes_new_memory);
}

PERL_STATIC_INLINE bool
Perl_utf8_to_bytes_temp_pv(pTHX_ U8 const **s_ptr, STRLEN *lenp)
{
/* utf8_to_bytes_() requires a non-NULL pointer, but doesn't use it when
* called with PL_utf8_to_bytes_use_temporary */
void* dummy = NULL;

/* utf8_to_bytes_() is declared to take a non-const s_ptr because it may
* change it, but NOT when called with PL_utf8_to_bytes_use_temporary, so
* it is ok to cast away const */
return utf8_to_bytes_((U8 **) s_ptr, lenp, &dummy,
PL_utf8_to_bytes_use_temporary);
}

PERL_STATIC_INLINE bool
Perl_utf8_to_bytes_overwrite(pTHX_ U8 **s_ptr, STRLEN *lenp)
{
/* utf8_to_bytes_() requires a non-NULL pointer, but doesn't use it when
* called with PL_utf8_to_bytes_overwrite */
void* dummy = NULL;

return utf8_to_bytes_(s_ptr, lenp, &dummy, PL_utf8_to_bytes_overwrite);
}

/*
=for apidoc valid_utf8_to_uvchr
Like C<L<perlapi/utf8_to_uvchr_buf>>, but should only be called when it is
Expand Down
10 changes: 6 additions & 4 deletions pp.c
Original file line number Diff line number Diff line change
Expand Up @@ -793,7 +793,7 @@ S_do_chomp(pTHX_ SV *retval, SV *sv, bool chomping)
s = SvPV(sv, len);
if (chomping) {
if (s && len) {
U8 *temp_buffer = NULL;
void *temp_buffer = NULL;
s += --len;
if (RsPARA(PL_rs)) {
if (*s != '\n')
Expand All @@ -817,7 +817,7 @@ S_do_chomp(pTHX_ SV *retval, SV *sv, bool chomping)
/* Assumption is that rs is shorter than the scalar. */
if (SvUTF8(PL_rs)) {
/* RS is utf8, scalar is 8 bit. */
if (! utf8_to_bytes_new_pv(&rsptr, &rslen,
if (! utf8_to_bytes_new_pv((const U8 **) &rsptr, &rslen,
&temp_buffer))
{
/* Cannot downgrade, therefore cannot possibly
Expand Down Expand Up @@ -3911,8 +3911,10 @@ PP(pp_index)
if (little_utf8) {
/* Well, maybe instead we might be able to downgrade the small
string? */
U8 * free_little_p = NULL;
if (utf8_to_bytes_new_pv(&little_p, &llen, &free_little_p)) {
void * free_little_p = NULL;
if (utf8_to_bytes_new_pv((const U8 **) &little_p, &llen,
&free_little_p))
{
little_utf8 = false;

/* Here 'little_p' is in byte form, and 'free_little_p' is
Expand Down
26 changes: 16 additions & 10 deletions proto.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions utf8.c
Original file line number Diff line number Diff line change
Expand Up @@ -2940,7 +2940,7 @@ New code should use the first three functions listed above.
*/

bool
Perl_utf8_to_bytes_(pTHX_ U8 **s_ptr, STRLEN *lenp, U8 ** free_me,
Perl_utf8_to_bytes_(pTHX_ U8 **s_ptr, STRLEN *lenp, void ** free_me,
Perl_utf8_to_bytes_arg result_as)
{
PERL_ARGS_ASSERT_UTF8_TO_BYTES_;
Expand Down Expand Up @@ -3219,7 +3219,7 @@ Perl_bytes_from_utf8(pTHX_ const U8 *s, STRLEN *lenp, bool *is_utf8p)
PERL_ARGS_ASSERT_BYTES_FROM_UTF8;

if (*is_utf8p) {
U8 * new_memory = NULL;
void * new_memory = NULL;
if (utf8_to_bytes_new_pv(&s, lenp, &new_memory)) {
*is_utf8p = false;

Expand Down
13 changes: 0 additions & 13 deletions utf8.h
Original file line number Diff line number Diff line change
Expand Up @@ -1336,19 +1336,6 @@ typedef enum {
PL_utf8_to_bytes_use_temporary,
} Perl_utf8_to_bytes_arg;

/* INT2PTR() is because this parameter should not be used in this case, but
* there is a NN assertion for it. It causes that to pass but to still
* segfault if wrongly gets used */
#define Perl_utf8_to_bytes_overwrite(mTHX, s, l) \
Perl_utf8_to_bytes_(aTHX_ s, l, INT2PTR(U8 **, 1), \
PL_utf8_to_bytes_overwrite)
#define Perl_utf8_to_bytes_new_pv(mTHX, s, l, f) \
Perl_utf8_to_bytes_(aTHX_ (U8 **) s, l, f, \
PL_utf8_to_bytes_new_memory)
#define Perl_utf8_to_bytes_temp_pv(mTHX, s, l) \
Perl_utf8_to_bytes_(aTHX_ (U8 **) s, l, INT2PTR(U8 **, 1), \
PL_utf8_to_bytes_use_temporary)

/* Do not use; should be deprecated. Use isUTF8_CHAR() instead; this is
* retained solely for backwards compatibility */
#define IS_UTF8_CHAR(p, n) (isUTF8_CHAR(p, (p) + (n)) == n)
Expand Down
Loading