diff --git a/utf8.c b/utf8.c index c06c546ae7e0..f1abd238cae3 100644 --- a/utf8.c +++ b/utf8.c @@ -1590,13 +1590,6 @@ Perl_utf8_to_uv_msgs_helper_(const U8 * const s0, { PERL_ARGS_ASSERT_UTF8_TO_UV_MSGS_HELPER_; - const U8 * s = s0; - - U32 possible_problems; /* A bit is set here for each potential problem - found as we go along */ - UV uv; - SSize_t expectlen; /* How long should this sequence be? */ - /* Here, is one of: * a) malformed; * b) a problematic code point (surrogate, non-unicode, or nonchar); or @@ -1635,9 +1628,6 @@ Perl_utf8_to_uv_msgs_helper_(const U8 * const s0, || UTF8_IS_NONCHAR(s0, e)); */ - s = s0; - possible_problems = 0; - expectlen = 0; if (errors) { *errors = 0; } @@ -1672,7 +1662,10 @@ Perl_utf8_to_uv_msgs_helper_(const U8 * const s0, * allowed one, we could allow in something that shouldn't have been. */ + SSize_t expectlen = 0; /* How long should this sequence be? */ SSize_t curlen = 0; /* How many bytes have we processed so far */ + UV uv = 0; /* The accumulated code point, so far */ + const U8 * s = s0; /* Our current position examining the sequence */ /* Gives how many bytes are available, which may turn out to be less than * the expected length */ @@ -1683,15 +1676,18 @@ Perl_utf8_to_uv_msgs_helper_(const U8 * const s0, * than a single character */ const U8 * send = e; + /* A bit is set here for each potential problem found as we go along */ + U32 possible_problems = 0; + + /* The above variables have to be initialized before the 'goto' */ + if (UNLIKELY(avail_len <= 0)) { possible_problems |= UTF8_GOT_EMPTY; goto ready_to_handle_errors; } - /* We now know we can examine the first byte of the input */ - expectlen = UTF8SKIP(s0); - - /* A continuation character can't start a valid sequence */ + /* We now know we can examine the first byte of the input. A continuation + * character can't start a valid sequence */ if (UNLIKELY(UTF8_IS_CONTINUATION(*s0))) { possible_problems |= UTF8_GOT_CONTINUATION; curlen = 1; @@ -1707,6 +1703,8 @@ Perl_utf8_to_uv_msgs_helper_(const U8 * const s0, * to check for sure because it excludes start bytes like \xC0 that always * lead to overlongs.) */ + expectlen = UTF8SKIP(s0); /* How long should this sequence be? */ + /* Convert to I8 on EBCDIC (no-op on ASCII), then remove the leading bits * that indicate the number of bytes in the character's whole UTF-8 * sequence, leaving just the bits that are part of the value. */