OLD | NEW |
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 681 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
692 static const int kAllUtf8CharsSize = | 692 static const int kAllUtf8CharsSize = |
693 (unibrow::Utf8::kMaxOneByteChar + 1) + | 693 (unibrow::Utf8::kMaxOneByteChar + 1) + |
694 (unibrow::Utf8::kMaxTwoByteChar - unibrow::Utf8::kMaxOneByteChar) * 2 + | 694 (unibrow::Utf8::kMaxTwoByteChar - unibrow::Utf8::kMaxOneByteChar) * 2 + |
695 (unibrow::Utf8::kMaxThreeByteChar - unibrow::Utf8::kMaxTwoByteChar) * 3; | 695 (unibrow::Utf8::kMaxThreeByteChar - unibrow::Utf8::kMaxTwoByteChar) * 3; |
696 static const unsigned kAllUtf8CharsSizeU = | 696 static const unsigned kAllUtf8CharsSizeU = |
697 static_cast<unsigned>(kAllUtf8CharsSize); | 697 static_cast<unsigned>(kAllUtf8CharsSize); |
698 | 698 |
699 char buffer[kAllUtf8CharsSizeU]; | 699 char buffer[kAllUtf8CharsSizeU]; |
700 unsigned cursor = 0; | 700 unsigned cursor = 0; |
701 for (int i = 0; i <= kMaxUC16Char; i++) { | 701 for (int i = 0; i <= kMaxUC16Char; i++) { |
702 cursor += unibrow::Utf8::Encode(buffer + cursor, | 702 cursor += unibrow::Utf8::Encode(buffer + cursor, i, |
703 i, | 703 unibrow::Utf16::kNoPreviousCharacter, true); |
704 unibrow::Utf16::kNoPreviousCharacter); | |
705 } | 704 } |
706 DCHECK(cursor == kAllUtf8CharsSizeU); | 705 DCHECK(cursor == kAllUtf8CharsSizeU); |
707 | 706 |
708 i::Utf8ToUtf16CharacterStream stream(reinterpret_cast<const i::byte*>(buffer), | 707 i::Utf8ToUtf16CharacterStream stream(reinterpret_cast<const i::byte*>(buffer), |
709 kAllUtf8CharsSizeU); | 708 kAllUtf8CharsSizeU); |
| 709 int32_t bad = unibrow::Utf8::kBadChar; |
710 for (int i = 0; i <= kMaxUC16Char; i++) { | 710 for (int i = 0; i <= kMaxUC16Char; i++) { |
711 CHECK_EQU(i, stream.pos()); | 711 CHECK_EQU(i, stream.pos()); |
712 int32_t c = stream.Advance(); | 712 int32_t c = stream.Advance(); |
713 CHECK_EQ(i, c); | 713 if (i >= 0xd800 && i <= 0xdfff) { |
| 714 CHECK_EQ(bad, c); |
| 715 } else { |
| 716 CHECK_EQ(i, c); |
| 717 } |
714 CHECK_EQU(i + 1, stream.pos()); | 718 CHECK_EQU(i + 1, stream.pos()); |
715 } | 719 } |
716 for (int i = kMaxUC16Char; i >= 0; i--) { | 720 for (int i = kMaxUC16Char; i >= 0; i--) { |
717 CHECK_EQU(i + 1, stream.pos()); | 721 CHECK_EQU(i + 1, stream.pos()); |
718 stream.PushBack(i); | 722 stream.PushBack(i); |
719 CHECK_EQU(i, stream.pos()); | 723 CHECK_EQU(i, stream.pos()); |
720 } | 724 } |
721 int i = 0; | 725 int i = 0; |
722 while (stream.pos() < kMaxUC16CharU) { | 726 while (stream.pos() < kMaxUC16CharU) { |
723 CHECK_EQU(i, stream.pos()); | 727 CHECK_EQU(i, stream.pos()); |
724 int progress = static_cast<int>(stream.SeekForward(12)); | 728 int progress = static_cast<int>(stream.SeekForward(12)); |
725 i += progress; | 729 i += progress; |
726 int32_t c = stream.Advance(); | 730 int32_t c = stream.Advance(); |
727 if (i <= kMaxUC16Char) { | 731 if (i >= 0xd800 && i <= 0xdfff) { |
| 732 CHECK_EQ(bad, c); |
| 733 } else if (i <= kMaxUC16Char) { |
728 CHECK_EQ(i, c); | 734 CHECK_EQ(i, c); |
729 } else { | 735 } else { |
730 CHECK_EQ(-1, c); | 736 CHECK_EQ(-1, c); |
731 } | 737 } |
732 i += 1; | 738 i += 1; |
733 CHECK_EQU(i, stream.pos()); | 739 CHECK_EQU(i, stream.pos()); |
734 } | 740 } |
735 } | 741 } |
736 | 742 |
737 #undef CHECK_EQU | 743 #undef CHECK_EQU |
(...skipping 168 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
906 } | 912 } |
907 input_offset = 3; | 913 input_offset = 3; |
908 // 4 bytes of UTF-8 turn into 2 UTF-16 code units. | 914 // 4 bytes of UTF-8 turn into 2 UTF-16 code units. |
909 character_length -= 2; | 915 character_length -= 2; |
910 } else if (c >= 0xe0) { | 916 } else if (c >= 0xe0) { |
911 if ((c & 0xf) == 0 && ((s[i + 1] & 0x20) == 0)) { | 917 if ((c & 0xf) == 0 && ((s[i + 1] & 0x20) == 0)) { |
912 // This 3 byte sequence could have been coded as a 2 byte sequence. | 918 // This 3 byte sequence could have been coded as a 2 byte sequence. |
913 // Record a single kBadChar for the first byte and continue. | 919 // Record a single kBadChar for the first byte and continue. |
914 continue; | 920 continue; |
915 } | 921 } |
| 922 if (c == 0xed) { |
| 923 unsigned char d = s[i + 1]; |
| 924 if ((d < 0x80) || (d > 0x9f)) { |
| 925 // This 3 byte sequence is part of a surrogate pair which is not |
| 926 // supported by UTF-8. Record a single kBadChar for the first byte |
| 927 // and continue. |
| 928 continue; |
| 929 } |
| 930 } |
916 input_offset = 2; | 931 input_offset = 2; |
917 // 3 bytes of UTF-8 turn into 1 UTF-16 code unit. | 932 // 3 bytes of UTF-8 turn into 1 UTF-16 code unit. |
918 output_adjust = 2; | 933 output_adjust = 2; |
919 } else { | 934 } else { |
920 if ((c & 0x1e) == 0) { | 935 if ((c & 0x1e) == 0) { |
921 // This 2 byte sequence could have been coded as a 1 byte sequence. | 936 // This 2 byte sequence could have been coded as a 1 byte sequence. |
922 // Record a single kBadChar for the first byte and continue. | 937 // Record a single kBadChar for the first byte and continue. |
923 continue; | 938 continue; |
924 } | 939 } |
925 input_offset = 1; | 940 input_offset = 1; |
(...skipping 5663 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6589 "[a, ...]", | 6604 "[a, ...]", |
6590 "[..., ]", | 6605 "[..., ]", |
6591 "[..., ...]", | 6606 "[..., ...]", |
6592 "[ (...a)]", | 6607 "[ (...a)]", |
6593 NULL}; | 6608 NULL}; |
6594 // clang-format on | 6609 // clang-format on |
6595 static const ParserFlag always_flags[] = {kAllowHarmonySpreadArrays}; | 6610 static const ParserFlag always_flags[] = {kAllowHarmonySpreadArrays}; |
6596 RunParserSyncTest(context_data, data, kError, NULL, 0, always_flags, | 6611 RunParserSyncTest(context_data, data, kError, NULL, 0, always_flags, |
6597 arraysize(always_flags)); | 6612 arraysize(always_flags)); |
6598 } | 6613 } |
OLD | NEW |