Skip to content

Commit

Permalink
Modify Charset Aliases : Stop treat ISO-8859-1 as Windows-1252's alias
Browse files Browse the repository at this point in the history
  • Loading branch information
PeterAlfredLee committed Dec 5, 2020
1 parent 813a6eb commit 0e7b475
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,9 @@ private static void addAll() {
addCharset(charset("windows-1250"), "cp1250", "windows-1250", "x-cp1250");
addCharset(charset("windows-1251"), "cp1251", "windows-1251", "x-cp1251");
addCharset(charset("windows-1252"), "ansi_x3.4-1968", "ascii", "cp1252", "cp819", "csisolatin1",
"ibm819", "iso-8859-1", "iso-ir-100", "iso8859-1", "iso88591", "iso_8859-1", "iso_8859-1:1987",
"l1", "latin1", "us-ascii", "windows-1252", "x-cp1252");
"ibm819", "l1", "latin1", "us-ascii", "windows-1252", "x-cp1252");
addCharset(charset("ISO-8859-1"), "iso-8859-1", "iso-ir-100", "iso8859-1", "iso88591",
"iso_8859-1", "iso_8859-1:1987");
addCharset(charset("windows-1253"), "cp1253", "windows-1253", "x-cp1253");
addCharset(charset("windows-1254"), "cp1254", "csisolatin5", "iso-8859-9", "iso-ir-148",
"iso8859-9", "iso88599", "iso_8859-9", "iso_8859-9:1989", "l5", "latin5", "windows-1254", "x-cp1254");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -177,8 +177,7 @@ public void replacement() throws IOException {

@Test
public void iso88591() throws IOException {
// In the spec, iso-8859-1 is an alias for WINDOWS-1252
assertWindows1252("<meta charset='iso-8859-1'>");
assertCharset("<meta charset='iso-8859-1'>", StandardCharsets.ISO_8859_1);
}

@Test
Expand Down Expand Up @@ -294,10 +293,9 @@ public void withCompactComment() throws IOException {
@Test
public void withCharsetInContentType() throws IOException {
metadata.set(Metadata.CONTENT_TYPE, "text/html; Charset=ISO-8859-1");
// ISO-8859-1 is an alias for WINDOWS-1252, even if it's set at the transport layer level
assertWindows1252("");
assertWindows1252("<meta charset='UTF-8'>");
assertWindows1252("<meta http-equiv='content-type' content='charset=utf-8'>");
assertCharset("", StandardCharsets.ISO_8859_1);
assertCharset("<meta charset='UTF-8'>", StandardCharsets.ISO_8859_1);
assertCharset("<meta http-equiv='content-type' content='charset=utf-8'>", StandardCharsets.ISO_8859_1);
// if a BOM is present, it has precedence over transport layer information
assertCharset("\ufeff<meta charset='WINDOWS-1252'>", StandardCharsets.UTF_8);
assertCharset("\ufeff<meta charset='WINDOWS-1252'>", StandardCharsets.UTF_16LE);
Expand Down

0 comments on commit 0e7b475

Please sign in to comment.