Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(649)

Side by Side Diff: test/cctest/test-api.cc

Issue 662003003: Script streaming: more UTF-8 handing fixes (again). (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: rebased Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/scanner-character-streams.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 23618 matching lines...) Expand 10 before | Expand all | Expand 10 after
23629 " function foo() { return "; 23629 " function foo() { return ";
23630 char chunk2[] = " 13; }\n"; 23630 char chunk2[] = " 13; }\n";
23631 const char* chunks[] = {chunk1, chunk2, "foo();", NULL}; 23631 const char* chunks[] = {chunk1, chunk2, "foo();", NULL};
23632 23632
23633 RunStreamingTest(chunks); 23633 RunStreamingTest(chunks);
23634 } 23634 }
23635 } 23635 }
23636 23636
23637 23637
23638 TEST(StreamingUtf8Script) { 23638 TEST(StreamingUtf8Script) {
23639 // We'd want to write \uc481 instead of \xeb\x91\x80, but Windows compilers 23639 // We'd want to write \uc481 instead of \xec\x92\x81, but Windows compilers
23640 // don't like it. 23640 // don't like it.
23641 const char* chunk1 = 23641 const char* chunk1 =
23642 "function foo() {\n" 23642 "function foo() {\n"
23643 " // This function will contain an UTF-8 character which is not in\n" 23643 " // This function will contain an UTF-8 character which is not in\n"
23644 " // ASCII.\n" 23644 " // ASCII.\n"
23645 " var foob\xeb\x91\x80r = 13;\n" 23645 " var foob\xec\x92\x81r = 13;\n"
23646 " return foob\xeb\x91\x80r;\n" 23646 " return foob\xec\x92\x81r;\n"
23647 "}\n"; 23647 "}\n";
23648 const char* chunks[] = {chunk1, "foo(); ", NULL}; 23648 const char* chunks[] = {chunk1, "foo(); ", NULL};
23649 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8); 23649 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8);
23650 } 23650 }
23651 23651
23652 23652
23653 TEST(StreamingUtf8ScriptWithSplitCharactersSanityCheck) { 23653 TEST(StreamingUtf8ScriptWithSplitCharactersSanityCheck) {
23654 // A sanity check to prove that the approach of splitting UTF-8 23654 // A sanity check to prove that the approach of splitting UTF-8
23655 // characters is correct. Here is an UTF-8 character which will take three 23655 // characters is correct. Here is an UTF-8 character which will take three
23656 // bytes. 23656 // bytes.
23657 const char* reference = "\xeb\x91\x80"; 23657 const char* reference = "\xec\x92\x81";
23658 CHECK(3u == strlen(reference)); // NOLINT - no CHECK_EQ for unsigned. 23658 CHECK(3u == strlen(reference)); // NOLINT - no CHECK_EQ for unsigned.
23659 23659
23660 char chunk1[] = 23660 char chunk1[] =
23661 "function foo() {\n" 23661 "function foo() {\n"
23662 " // This function will contain an UTF-8 character which is not in\n" 23662 " // This function will contain an UTF-8 character which is not in\n"
23663 " // ASCII.\n" 23663 " // ASCII.\n"
23664 " var foob"; 23664 " var foob";
23665 char chunk2[] = 23665 char chunk2[] =
23666 "XXXr = 13;\n" 23666 "XXXr = 13;\n"
23667 " return foob\xeb\x91\x80r;\n" 23667 " return foob\xec\x92\x81r;\n"
23668 "}\n"; 23668 "}\n";
23669 for (int i = 0; i < 3; ++i) { 23669 for (int i = 0; i < 3; ++i) {
23670 chunk2[i] = reference[i]; 23670 chunk2[i] = reference[i];
23671 } 23671 }
23672 const char* chunks[] = {chunk1, chunk2, "foo();", NULL}; 23672 const char* chunks[] = {chunk1, chunk2, "foo();", NULL};
23673 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8); 23673 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8);
23674 } 23674 }
23675 23675
23676 23676
23677 TEST(StreamingUtf8ScriptWithSplitCharacters) { 23677 TEST(StreamingUtf8ScriptWithSplitCharacters) {
23678 // Stream data where a multi-byte UTF-8 character is split between two data 23678 // Stream data where a multi-byte UTF-8 character is split between two data
23679 // chunks. 23679 // chunks.
23680 const char* reference = "\xeb\x91\x80"; 23680 const char* reference = "\xec\x92\x81";
23681 char chunk1[] = 23681 char chunk1[] =
23682 "function foo() {\n" 23682 "function foo() {\n"
23683 " // This function will contain an UTF-8 character which is not in\n" 23683 " // This function will contain an UTF-8 character which is not in\n"
23684 " // ASCII.\n" 23684 " // ASCII.\n"
23685 " var foobX"; 23685 " var foobX";
23686 char chunk2[] = 23686 char chunk2[] =
23687 "XXr = 13;\n" 23687 "XXr = 13;\n"
23688 " return foob\xeb\x91\x80r;\n" 23688 " return foob\xec\x92\x81r;\n"
23689 "}\n"; 23689 "}\n";
23690 chunk1[strlen(chunk1) - 1] = reference[0]; 23690 chunk1[strlen(chunk1) - 1] = reference[0];
23691 chunk2[0] = reference[1]; 23691 chunk2[0] = reference[1];
23692 chunk2[1] = reference[2]; 23692 chunk2[1] = reference[2];
23693 const char* chunks[] = {chunk1, chunk2, "foo();", NULL}; 23693 const char* chunks[] = {chunk1, chunk2, "foo();", NULL};
23694 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8); 23694 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8);
23695 } 23695 }
23696 23696
23697 23697
23698 TEST(StreamingUtf8ScriptWithSplitCharactersValidEdgeCases) { 23698 TEST(StreamingUtf8ScriptWithSplitCharactersValidEdgeCases) {
23699 // Tests edge cases which should still be decoded correctly. 23699 // Tests edge cases which should still be decoded correctly.
23700 23700
23701 // Case 1: a chunk contains only bytes for a split character (and no other 23701 // Case 1: a chunk contains only bytes for a split character (and no other
23702 // data). This kind of a chunk would be exceptionally small, but we should 23702 // data). This kind of a chunk would be exceptionally small, but we should
23703 // still decode it correctly. 23703 // still decode it correctly.
23704 const char* reference = "\xeb\x91\x80"; 23704 const char* reference = "\xec\x92\x81";
23705 // The small chunk is at the beginning of the split character 23705 // The small chunk is at the beginning of the split character
23706 { 23706 {
23707 char chunk1[] = 23707 char chunk1[] =
23708 "function foo() {\n" 23708 "function foo() {\n"
23709 " // This function will contain an UTF-8 character which is not in\n" 23709 " // This function will contain an UTF-8 character which is not in\n"
23710 " // ASCII.\n" 23710 " // ASCII.\n"
23711 " var foob"; 23711 " var foob";
23712 char chunk2[] = "XX"; 23712 char chunk2[] = "XX";
23713 char chunk3[] = 23713 char chunk3[] =
23714 "Xr = 13;\n" 23714 "Xr = 13;\n"
23715 " return foob\xeb\x91\x80r;\n" 23715 " return foob\xec\x92\x81r;\n"
23716 "}\n"; 23716 "}\n";
23717 chunk2[0] = reference[0]; 23717 chunk2[0] = reference[0];
23718 chunk2[1] = reference[1]; 23718 chunk2[1] = reference[1];
23719 chunk3[0] = reference[2]; 23719 chunk3[0] = reference[2];
23720 const char* chunks[] = {chunk1, chunk2, chunk3, "foo();", NULL}; 23720 const char* chunks[] = {chunk1, chunk2, chunk3, "foo();", NULL};
23721 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8); 23721 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8);
23722 } 23722 }
23723 // The small chunk is at the end of a character 23723 // The small chunk is at the end of a character
23724 { 23724 {
23725 char chunk1[] = 23725 char chunk1[] =
23726 "function foo() {\n" 23726 "function foo() {\n"
23727 " // This function will contain an UTF-8 character which is not in\n" 23727 " // This function will contain an UTF-8 character which is not in\n"
23728 " // ASCII.\n" 23728 " // ASCII.\n"
23729 " var foobX"; 23729 " var foobX";
23730 char chunk2[] = "XX"; 23730 char chunk2[] = "XX";
23731 char chunk3[] = 23731 char chunk3[] =
23732 "r = 13;\n" 23732 "r = 13;\n"
23733 " return foob\xeb\x91\x80r;\n" 23733 " return foob\xec\x92\x81r;\n"
23734 "}\n"; 23734 "}\n";
23735 chunk1[strlen(chunk1) - 1] = reference[0]; 23735 chunk1[strlen(chunk1) - 1] = reference[0];
23736 chunk2[0] = reference[1]; 23736 chunk2[0] = reference[1];
23737 chunk2[1] = reference[2]; 23737 chunk2[1] = reference[2];
23738 const char* chunks[] = {chunk1, chunk2, chunk3, "foo();", NULL}; 23738 const char* chunks[] = {chunk1, chunk2, chunk3, "foo();", NULL};
23739 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8); 23739 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8);
23740 } 23740 }
23741 // Case 2: the script ends with a multi-byte character. Make sure that it's 23741 // Case 2: the script ends with a multi-byte character. Make sure that it's
23742 // decoded correctly and not just ignored. 23742 // decoded correctly and not just ignored.
23743 { 23743 {
23744 char chunk1[] = 23744 char chunk1[] =
23745 "var foob\xeb\x91\x80 = 13;\n" 23745 "var foob\xec\x92\x81 = 13;\n"
23746 "foob\xeb\x91\x80"; 23746 "foob\xec\x92\x81";
23747 const char* chunks[] = {chunk1, NULL}; 23747 const char* chunks[] = {chunk1, NULL};
23748 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8); 23748 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8);
23749 } 23749 }
23750 } 23750 }
23751 23751
23752 23752
23753 TEST(StreamingUtf8ScriptWithSplitCharactersInvalidEdgeCases) { 23753 TEST(StreamingUtf8ScriptWithSplitCharactersInvalidEdgeCases) {
23754 // Test cases where a UTF-8 character is split over several chunks. Those 23754 // Test cases where a UTF-8 character is split over several chunks. Those
23755 // cases are not supported (the embedder should give the data in big enough 23755 // cases are not supported (the embedder should give the data in big enough
23756 // chunks), but we shouldn't crash, just produce a parse error. 23756 // chunks), but we shouldn't crash, just produce a parse error.
23757 const char* reference = "\xeb\x91\x80"; 23757 const char* reference = "\xec\x92\x81";
23758 char chunk1[] = 23758 char chunk1[] =
23759 "function foo() {\n" 23759 "function foo() {\n"
23760 " // This function will contain an UTF-8 character which is not in\n" 23760 " // This function will contain an UTF-8 character which is not in\n"
23761 " // ASCII.\n" 23761 " // ASCII.\n"
23762 " var foobX"; 23762 " var foobX";
23763 char chunk2[] = "X"; 23763 char chunk2[] = "X";
23764 char chunk3[] = 23764 char chunk3[] =
23765 "Xr = 13;\n" 23765 "Xr = 13;\n"
23766 " return foob\xeb\x91\x80r;\n" 23766 " return foob\xec\x92\x81r;\n"
23767 "}\n"; 23767 "}\n";
23768 chunk1[strlen(chunk1) - 1] = reference[0]; 23768 chunk1[strlen(chunk1) - 1] = reference[0];
23769 chunk2[0] = reference[1]; 23769 chunk2[0] = reference[1];
23770 chunk3[0] = reference[2]; 23770 chunk3[0] = reference[2];
23771 const char* chunks[] = {chunk1, chunk2, chunk3, "foo();", NULL}; 23771 const char* chunks[] = {chunk1, chunk2, chunk3, "foo();", NULL};
23772 23772
23773 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8, false); 23773 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8, false);
23774 } 23774 }
23775 23775
23776 23776
(...skipping 21 matching lines...) Expand all
23798 const v8::ScriptCompiler::CachedData* cached_data = source.GetCachedData(); 23798 const v8::ScriptCompiler::CachedData* cached_data = source.GetCachedData();
23799 CHECK(cached_data != NULL); 23799 CHECK(cached_data != NULL);
23800 CHECK(cached_data->data != NULL); 23800 CHECK(cached_data->data != NULL);
23801 CHECK_GT(cached_data->length, 0); 23801 CHECK_GT(cached_data->length, 0);
23802 } 23802 }
23803 23803
23804 23804
23805 TEST(StreamingScriptWithInvalidUtf8) { 23805 TEST(StreamingScriptWithInvalidUtf8) {
23806 // Regression test for a crash: test that invalid UTF-8 bytes in the end of a 23806 // Regression test for a crash: test that invalid UTF-8 bytes in the end of a
23807 // chunk don't produce a crash. 23807 // chunk don't produce a crash.
23808 const char* reference = "\xeb\x91\x80\x80\x80"; 23808 const char* reference = "\xec\x92\x81\x80\x80";
23809 char chunk1[] = 23809 char chunk1[] =
23810 "function foo() {\n" 23810 "function foo() {\n"
23811 " // This function will contain an UTF-8 character which is not in\n" 23811 " // This function will contain an UTF-8 character which is not in\n"
23812 " // ASCII.\n" 23812 " // ASCII.\n"
23813 " var foobXXXXX"; // Too many bytes which look like incomplete chars! 23813 " var foobXXXXX"; // Too many bytes which look like incomplete chars!
23814 char chunk2[] = 23814 char chunk2[] =
23815 "r = 13;\n" 23815 "r = 13;\n"
23816 " return foob\xeb\x91\x80\x80\x80r;\n" 23816 " return foob\xec\x92\x81\x80\x80r;\n"
23817 "}\n"; 23817 "}\n";
23818 for (int i = 0; i < 5; ++i) chunk1[strlen(chunk1) - 5 + i] = reference[i]; 23818 for (int i = 0; i < 5; ++i) chunk1[strlen(chunk1) - 5 + i] = reference[i];
23819 23819
23820 const char* chunks[] = {chunk1, chunk2, "foo();", NULL}; 23820 const char* chunks[] = {chunk1, chunk2, "foo();", NULL};
23821 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8, false); 23821 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8, false);
23822 } 23822 }
23823 23823
23824 23824
23825 TEST(StreamingUtf8ScriptWithMultipleMultibyteCharactersSomeSplit) { 23825 TEST(StreamingUtf8ScriptWithMultipleMultibyteCharactersSomeSplit) {
23826 // Regression test: Stream data where there are several multi-byte UTF-8 23826 // Regression test: Stream data where there are several multi-byte UTF-8
23827 // characters in a sequence and one of them is split between two data chunks. 23827 // characters in a sequence and one of them is split between two data chunks.
23828 const char* reference = "\xeb\x91\x80"; 23828 const char* reference = "\xec\x92\x81";
23829 char chunk1[] = 23829 char chunk1[] =
23830 "function foo() {\n" 23830 "function foo() {\n"
23831 " // This function will contain an UTF-8 character which is not in\n" 23831 " // This function will contain an UTF-8 character which is not in\n"
23832 " // ASCII.\n" 23832 " // ASCII.\n"
23833 " var foob\xeb\x91\x80X"; 23833 " var foob\xec\x92\x81X";
23834 char chunk2[] = 23834 char chunk2[] =
23835 "XXr = 13;\n" 23835 "XXr = 13;\n"
23836 " return foob\xeb\x91\x80\xeb\x91\x80r;\n" 23836 " return foob\xec\x92\x81\xec\x92\x81r;\n"
23837 "}\n"; 23837 "}\n";
23838 chunk1[strlen(chunk1) - 1] = reference[0]; 23838 chunk1[strlen(chunk1) - 1] = reference[0];
23839 chunk2[0] = reference[1]; 23839 chunk2[0] = reference[1];
23840 chunk2[1] = reference[2];
23841 const char* chunks[] = {chunk1, chunk2, "foo();", NULL};
23842 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8);
23843 }
23844
23845
23846 TEST(StreamingUtf8ScriptWithMultipleMultibyteCharactersSomeSplit2) {
23847 // Another regression test, similar to the previous one. The difference is
23848 // that the split character is not the last one in the sequence.
23849 const char* reference = "\xec\x92\x81";
23850 char chunk1[] =
23851 "function foo() {\n"
23852 " // This function will contain an UTF-8 character which is not in\n"
23853 " // ASCII.\n"
23854 " var foobX";
23855 char chunk2[] =
23856 "XX\xec\x92\x81r = 13;\n"
23857 " return foob\xec\x92\x81\xec\x92\x81r;\n"
23858 "}\n";
23859 chunk1[strlen(chunk1) - 1] = reference[0];
23860 chunk2[0] = reference[1];
23840 chunk2[1] = reference[2]; 23861 chunk2[1] = reference[2];
23841 const char* chunks[] = {chunk1, chunk2, "foo();", NULL}; 23862 const char* chunks[] = {chunk1, chunk2, "foo();", NULL};
23842 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8); 23863 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8);
23843 } 23864 }
OLDNEW
« no previous file with comments | « src/scanner-character-streams.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698