Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(7)

Side by Side Diff: test/cctest/test-parsing.cc

Issue 1148653007: Update UTF-8 decoder to detect more special cases. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: updates Created 5 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « test/cctest/test-api.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 681 matching lines...) Expand 10 before | Expand all | Expand 10 after
692 static const int kAllUtf8CharsSize = 692 static const int kAllUtf8CharsSize =
693 (unibrow::Utf8::kMaxOneByteChar + 1) + 693 (unibrow::Utf8::kMaxOneByteChar + 1) +
694 (unibrow::Utf8::kMaxTwoByteChar - unibrow::Utf8::kMaxOneByteChar) * 2 + 694 (unibrow::Utf8::kMaxTwoByteChar - unibrow::Utf8::kMaxOneByteChar) * 2 +
695 (unibrow::Utf8::kMaxThreeByteChar - unibrow::Utf8::kMaxTwoByteChar) * 3; 695 (unibrow::Utf8::kMaxThreeByteChar - unibrow::Utf8::kMaxTwoByteChar) * 3;
696 static const unsigned kAllUtf8CharsSizeU = 696 static const unsigned kAllUtf8CharsSizeU =
697 static_cast<unsigned>(kAllUtf8CharsSize); 697 static_cast<unsigned>(kAllUtf8CharsSize);
698 698
699 char buffer[kAllUtf8CharsSizeU]; 699 char buffer[kAllUtf8CharsSizeU];
700 unsigned cursor = 0; 700 unsigned cursor = 0;
701 for (int i = 0; i <= kMaxUC16Char; i++) { 701 for (int i = 0; i <= kMaxUC16Char; i++) {
702 cursor += unibrow::Utf8::Encode(buffer + cursor, 702 cursor += unibrow::Utf8::Encode(buffer + cursor, i,
703 i, 703 unibrow::Utf16::kNoPreviousCharacter, true);
704 unibrow::Utf16::kNoPreviousCharacter);
705 } 704 }
706 DCHECK(cursor == kAllUtf8CharsSizeU); 705 DCHECK(cursor == kAllUtf8CharsSizeU);
707 706
708 i::Utf8ToUtf16CharacterStream stream(reinterpret_cast<const i::byte*>(buffer), 707 i::Utf8ToUtf16CharacterStream stream(reinterpret_cast<const i::byte*>(buffer),
709 kAllUtf8CharsSizeU); 708 kAllUtf8CharsSizeU);
709 int32_t bad = unibrow::Utf8::kBadChar;
710 for (int i = 0; i <= kMaxUC16Char; i++) { 710 for (int i = 0; i <= kMaxUC16Char; i++) {
711 CHECK_EQU(i, stream.pos()); 711 CHECK_EQU(i, stream.pos());
712 int32_t c = stream.Advance(); 712 int32_t c = stream.Advance();
713 CHECK_EQ(i, c); 713 if (i >= 0xd800 && i <= 0xdfff) {
714 CHECK_EQ(bad, c);
715 } else {
716 CHECK_EQ(i, c);
717 }
714 CHECK_EQU(i + 1, stream.pos()); 718 CHECK_EQU(i + 1, stream.pos());
715 } 719 }
716 for (int i = kMaxUC16Char; i >= 0; i--) { 720 for (int i = kMaxUC16Char; i >= 0; i--) {
717 CHECK_EQU(i + 1, stream.pos()); 721 CHECK_EQU(i + 1, stream.pos());
718 stream.PushBack(i); 722 stream.PushBack(i);
719 CHECK_EQU(i, stream.pos()); 723 CHECK_EQU(i, stream.pos());
720 } 724 }
721 int i = 0; 725 int i = 0;
722 while (stream.pos() < kMaxUC16CharU) { 726 while (stream.pos() < kMaxUC16CharU) {
723 CHECK_EQU(i, stream.pos()); 727 CHECK_EQU(i, stream.pos());
724 int progress = static_cast<int>(stream.SeekForward(12)); 728 int progress = static_cast<int>(stream.SeekForward(12));
725 i += progress; 729 i += progress;
726 int32_t c = stream.Advance(); 730 int32_t c = stream.Advance();
727 if (i <= kMaxUC16Char) { 731 if (i >= 0xd800 && i <= 0xdfff) {
732 CHECK_EQ(bad, c);
733 } else if (i <= kMaxUC16Char) {
728 CHECK_EQ(i, c); 734 CHECK_EQ(i, c);
729 } else { 735 } else {
730 CHECK_EQ(-1, c); 736 CHECK_EQ(-1, c);
731 } 737 }
732 i += 1; 738 i += 1;
733 CHECK_EQU(i, stream.pos()); 739 CHECK_EQU(i, stream.pos());
734 } 740 }
735 } 741 }
736 742
737 #undef CHECK_EQU 743 #undef CHECK_EQU
(...skipping 168 matching lines...) Expand 10 before | Expand all | Expand 10 after
906 } 912 }
907 input_offset = 3; 913 input_offset = 3;
908 // 4 bytes of UTF-8 turn into 2 UTF-16 code units. 914 // 4 bytes of UTF-8 turn into 2 UTF-16 code units.
909 character_length -= 2; 915 character_length -= 2;
910 } else if (c >= 0xe0) { 916 } else if (c >= 0xe0) {
911 if ((c & 0xf) == 0 && ((s[i + 1] & 0x20) == 0)) { 917 if ((c & 0xf) == 0 && ((s[i + 1] & 0x20) == 0)) {
912 // This 3 byte sequence could have been coded as a 2 byte sequence. 918 // This 3 byte sequence could have been coded as a 2 byte sequence.
913 // Record a single kBadChar for the first byte and continue. 919 // Record a single kBadChar for the first byte and continue.
914 continue; 920 continue;
915 } 921 }
922 if (c == 0xed) {
923 unsigned char d = s[i + 1];
924 if ((d < 0x80) || (d > 0x9f)) {
925 // This 3 byte sequence is part of a surrogate pair which is not
926 // supported by UTF-8. Record a single kBadChar for the first byte
927 // and continue.
928 continue;
929 }
930 }
916 input_offset = 2; 931 input_offset = 2;
917 // 3 bytes of UTF-8 turn into 1 UTF-16 code unit. 932 // 3 bytes of UTF-8 turn into 1 UTF-16 code unit.
918 output_adjust = 2; 933 output_adjust = 2;
919 } else { 934 } else {
920 if ((c & 0x1e) == 0) { 935 if ((c & 0x1e) == 0) {
921 // This 2 byte sequence could have been coded as a 1 byte sequence. 936 // This 2 byte sequence could have been coded as a 1 byte sequence.
922 // Record a single kBadChar for the first byte and continue. 937 // Record a single kBadChar for the first byte and continue.
923 continue; 938 continue;
924 } 939 }
925 input_offset = 1; 940 input_offset = 1;
(...skipping 5663 matching lines...) Expand 10 before | Expand all | Expand 10 after
6589 "[a, ...]", 6604 "[a, ...]",
6590 "[..., ]", 6605 "[..., ]",
6591 "[..., ...]", 6606 "[..., ...]",
6592 "[ (...a)]", 6607 "[ (...a)]",
6593 NULL}; 6608 NULL};
6594 // clang-format on 6609 // clang-format on
6595 static const ParserFlag always_flags[] = {kAllowHarmonySpreadArrays}; 6610 static const ParserFlag always_flags[] = {kAllowHarmonySpreadArrays};
6596 RunParserSyncTest(context_data, data, kError, NULL, 0, always_flags, 6611 RunParserSyncTest(context_data, data, kError, NULL, 0, always_flags,
6597 arraysize(always_flags)); 6612 arraysize(always_flags));
6598 } 6613 }
OLDNEW
« no previous file with comments | « test/cctest/test-api.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698