diff --git a/utf8.c b/utf8.c index 89f3f89df79a..dcc95f27f6f7 100644 --- a/utf8.c +++ b/utf8.c @@ -4748,7 +4748,13 @@ See also L. =for apidoc Amnh||UNI_DISPLAY_QQ =for apidoc Amnh||UNI_DISPLAY_REGEX =cut + +Undocumented is UNI_DISPLAY_TR_ which is used internally to display an operand +of the tr/// operation. These operands have a peculiar, deliberate UTF-8 +malformation which this flag enables the proper handling of. It turns on +ISPRINT and BACKSLASH as well. */ + char * Perl_pv_uni_display(pTHX_ SV *dsv, const U8 *spv, STRLEN len, STRLEN pvlim, UV flags) @@ -4770,6 +4776,14 @@ Perl_pv_uni_display(pTHX_ SV *dsv, const U8 *spv, STRLEN len, STRLEN pvlim, break; } + /* The minus is unambiguously the range indicator within a UTF-8 tr/// + * operand */ + if (UNLIKELY(flags & UNI_DISPLAY_TR_ && *s == ILLEGAL_UTF8_BYTE)) { + sv_catpvs(dsv, "-"); + next_len = 1; + continue; + } + u = utf8_to_uvchr_buf(s, e, &next_len); assert(next_len > 0); diff --git a/utf8.h b/utf8.h index f95311637c34..faa3a429805e 100644 --- a/utf8.h +++ b/utf8.h @@ -1318,6 +1318,9 @@ point's representation. #define UNI_DISPLAY_BACKSLASH 0x0002 #define UNI_DISPLAY_BACKSPACE 0x0004 /* Allow \b when also UNI_DISPLAY_BACKSLASH */ +#define UNI_DISPLAY_TR_ ( 0x0008 \ + |UNI_DISPLAY_ISPRINT \ + |UNI_DISPLAY_BACKSLASH) #define UNI_DISPLAY_QQ (UNI_DISPLAY_ISPRINT \ |UNI_DISPLAY_BACKSLASH \ |UNI_DISPLAY_BACKSPACE)