Index: LayoutTests/fast/encoding/char-decoding-invalid-trail.html |
diff --git a/LayoutTests/fast/encoding/char-decoding-invalid-trail.html b/LayoutTests/fast/encoding/char-decoding-invalid-trail.html |
new file mode 100644 |
index 0000000000000000000000000000000000000000..919fbf78ca0753089a3670e27463920932371b88 |
--- /dev/null |
+++ b/LayoutTests/fast/encoding/char-decoding-invalid-trail.html |
@@ -0,0 +1,56 @@ |
+<!DOCTYPE html> |
+<script src="../../resources/js-test.js"></script> |
+<script src="resources/char-decoding-utils.js"></script> |
+<script> |
+ |
+description("Test encoding behavior for sequences with invalid trail bytes"); |
+ |
+// UTF-8 codec emits replacement characters |
+testDecode('UTF-8', '%C3%22', 'U+FFFD/U+0022'); |
+testDecode('UTF-8', '%E2%22', 'U+FFFD/U+0022'); |
+testDecode('UTF-8', '%E2%A0%22', 'U+FFFD/U+FFFD/U+0022'); |
+testDecode('UTF-8', '%F0%90%80%22', 'U+FFFD/U+FFFD/U+FFFD/U+0022'); |
+ |
+// When a trail byte is in the ASCII range and lead+trail does not |
+// encode a character, the ASCII trail byte has to be added back to |
+// the stream. Otherwise, lead+trail as a whole is turned to U+FFFD. |
+testDecode('EUC-KR', '%C4%22', 'U+FFFD/U+0022'); |
+testDecode('EUC-KR', '%C4%5C', 'U+FFFD/U+005C'); |
+testDecode('EUC-KR', '%C4%7B', 'U+FFFD/U+007B'); |
+testDecode('EUC-KR', '%C6%53', 'U+FFFD/U+0053'); |
+testDecode('EUC-KR', '%C7%41', 'U+FFFD/U+0041'); |
+testDecode('EUC-KR', '%C7%81', 'U+FFFD'); |
+testDecode('EUC-KR', '%FE%A1', 'U+FFFD'); |
+ |
+// TODO(jshin): Add more tests for EUC-JP. |
+testDecode('EUC-JP', '%8F%A1%A1', 'U+FFFD'); |
+testDecode('EUC-JP', '%8F%A1%81%22', 'U+FFFD/U+FFFD/U+0022'); |
+testDecode('EUC-JP', '%8F%A1%22', 'U+FFFD/U+FFFD/U+0022'); |
+testDecode('EUC-JP', '%8E%8E%A1', 'U+FFFD/U+FF61'); |
+testDecode('EUC-JP', '%8E%E0', 'U+FFFD/U+FFFD'); |
+ |
+testDecode('Big5', '%A1%22', 'U+FFFD/U+0022'); |
+testDecode('Big5', '%87%66', 'U+FFFD/U+0066'); |
+testDecode('Big5', '%89%44', 'U+FFFD/U+0044'); |
+testDecode('Big5', '%8A%63', 'U+FFFD/U+0063'); |
+testDecode('Big5', '%8B%54', 'U+FFFD/U+0054'); |
+testDecode('Big5', '%8D%41', 'U+FFFD/U+0041'); |
+testDecode('Big5', '%9B%61', 'U+FFFD/U+0061'); |
+testDecode('Big5', '%9F%4E', 'U+FFFD/U+004E'); |
+testDecode('Big5', '%A0%54', 'U+FFFD/U+0054'); |
+ |
+ |
+testDecode('Shift_JIS', '%82%23', 'U+FFFD/U+0023'); |
+testDecode('Shift_JIS', '%82%5C', 'U+FFFD/U+005C'); |
+testDecode('Shift_JIS', '%82%7A', 'U+FFFD/U+007A'); |
+testDecode('Shift_JIS', '%84%61', 'U+FFFD/U+0061'); |
+testDecode('Shift_JIS', '%85%7B', 'U+FFFD/U+007B'); |
+testDecode('Shift_JIS', '%87%7B', 'U+FFFD/U+007B'); |
+testDecode('Shift_JIS', '%98%7E', 'U+FFFD/U+007E'); |
+testDecode('Shift_JIS', '%FC%5B', 'U+FFFD/U+005B'); |
+ |
+// See https://www.w3.org/Bugs/Public/show_bug.cgi?id=28141#c4 |
+// We're different from the encoding spec as of 2015-03-18. |
+testDecode('shift_jis', '%EB%9F', 'U+FFFD'); |
+ |
+</script> |