test/cctest/test-api.cc - Issue 662003003: Script streaming: more UTF-8 handing fixes (again).

Side by Side Diff: test/cctest/test-api.cc

Issue 662003003: Script streaming: more UTF-8 handing fixes (again). (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge

Patch Set: rebased Created 6 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2012 the V8 project authors. All rights reserved.	1 // Copyright 2012 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 23618 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
23629 " function foo() { return ";	23629 " function foo() { return ";

23630 char chunk2[] = " 13; }\n";	23630 char chunk2[] = " 13; }\n";

23631 const char* chunks[] = {chunk1, chunk2, "foo();", NULL};	23631 const char* chunks[] = {chunk1, chunk2, "foo();", NULL};

23632	23632

23633 RunStreamingTest(chunks);	23633 RunStreamingTest(chunks);

23634 }	23634 }

23635 }	23635 }

23636	23636

23637	23637

23638 TEST(StreamingUtf8Script) {	23638 TEST(StreamingUtf8Script) {

23639 // We'd want to write \uc481 instead of \xeb\x91\x80, but Windows compilers	23639 // We'd want to write \uc481 instead of \xec\x92\x81, but Windows compilers

23640 // don't like it.	23640 // don't like it.

23641 const char* chunk1 =	23641 const char* chunk1 =

23642 "function foo() {\n"	23642 "function foo() {\n"

23643 " // This function will contain an UTF-8 character which is not in\n"	23643 " // This function will contain an UTF-8 character which is not in\n"

23644 " // ASCII.\n"	23644 " // ASCII.\n"

23645 " var foob\xeb\x91\x80r = 13;\n"	23645 " var foob\xec\x92\x81r = 13;\n"

23646 " return foob\xeb\x91\x80r;\n"	23646 " return foob\xec\x92\x81r;\n"

23647 "}\n";	23647 "}\n";

23648 const char* chunks[] = {chunk1, "foo(); ", NULL};	23648 const char* chunks[] = {chunk1, "foo(); ", NULL};

23649 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8);	23649 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8);

23650 }	23650 }

23651	23651

23652	23652

23653 TEST(StreamingUtf8ScriptWithSplitCharactersSanityCheck) {	23653 TEST(StreamingUtf8ScriptWithSplitCharactersSanityCheck) {

23654 // A sanity check to prove that the approach of splitting UTF-8	23654 // A sanity check to prove that the approach of splitting UTF-8

23655 // characters is correct. Here is an UTF-8 character which will take three	23655 // characters is correct. Here is an UTF-8 character which will take three

23656 // bytes.	23656 // bytes.

23657 const char* reference = "\xeb\x91\x80";	23657 const char* reference = "\xec\x92\x81";

23658 CHECK(3u == strlen(reference)); // NOLINT - no CHECK_EQ for unsigned.	23658 CHECK(3u == strlen(reference)); // NOLINT - no CHECK_EQ for unsigned.

23659	23659

23660 char chunk1[] =	23660 char chunk1[] =

23661 "function foo() {\n"	23661 "function foo() {\n"

23662 " // This function will contain an UTF-8 character which is not in\n"	23662 " // This function will contain an UTF-8 character which is not in\n"

23663 " // ASCII.\n"	23663 " // ASCII.\n"

23664 " var foob";	23664 " var foob";

23665 char chunk2[] =	23665 char chunk2[] =

23666 "XXXr = 13;\n"	23666 "XXXr = 13;\n"

23667 " return foob\xeb\x91\x80r;\n"	23667 " return foob\xec\x92\x81r;\n"

23668 "}\n";	23668 "}\n";

23669 for (int i = 0; i < 3; ++i) {	23669 for (int i = 0; i < 3; ++i) {

23670 chunk2[i] = reference[i];	23670 chunk2[i] = reference[i];

23671 }	23671 }

23672 const char* chunks[] = {chunk1, chunk2, "foo();", NULL};	23672 const char* chunks[] = {chunk1, chunk2, "foo();", NULL};

23673 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8);	23673 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8);

23674 }	23674 }

23675	23675

23676	23676

23677 TEST(StreamingUtf8ScriptWithSplitCharacters) {	23677 TEST(StreamingUtf8ScriptWithSplitCharacters) {

23678 // Stream data where a multi-byte UTF-8 character is split between two data	23678 // Stream data where a multi-byte UTF-8 character is split between two data

23679 // chunks.	23679 // chunks.

23680 const char* reference = "\xeb\x91\x80";	23680 const char* reference = "\xec\x92\x81";

23681 char chunk1[] =	23681 char chunk1[] =

23682 "function foo() {\n"	23682 "function foo() {\n"

23683 " // This function will contain an UTF-8 character which is not in\n"	23683 " // This function will contain an UTF-8 character which is not in\n"

23684 " // ASCII.\n"	23684 " // ASCII.\n"

23685 " var foobX";	23685 " var foobX";

23686 char chunk2[] =	23686 char chunk2[] =

23687 "XXr = 13;\n"	23687 "XXr = 13;\n"

23688 " return foob\xeb\x91\x80r;\n"	23688 " return foob\xec\x92\x81r;\n"

23689 "}\n";	23689 "}\n";

23690 chunk1[strlen(chunk1) - 1] = reference[0];	23690 chunk1[strlen(chunk1) - 1] = reference[0];

23691 chunk2[0] = reference[1];	23691 chunk2[0] = reference[1];

23692 chunk2[1] = reference[2];	23692 chunk2[1] = reference[2];

23693 const char* chunks[] = {chunk1, chunk2, "foo();", NULL};	23693 const char* chunks[] = {chunk1, chunk2, "foo();", NULL};

23694 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8);	23694 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8);

23695 }	23695 }

23696	23696

23697	23697

23698 TEST(StreamingUtf8ScriptWithSplitCharactersValidEdgeCases) {	23698 TEST(StreamingUtf8ScriptWithSplitCharactersValidEdgeCases) {

23699 // Tests edge cases which should still be decoded correctly.	23699 // Tests edge cases which should still be decoded correctly.

23700	23700

23701 // Case 1: a chunk contains only bytes for a split character (and no other	23701 // Case 1: a chunk contains only bytes for a split character (and no other

23702 // data). This kind of a chunk would be exceptionally small, but we should	23702 // data). This kind of a chunk would be exceptionally small, but we should

23703 // still decode it correctly.	23703 // still decode it correctly.

23704 const char* reference = "\xeb\x91\x80";	23704 const char* reference = "\xec\x92\x81";

23705 // The small chunk is at the beginning of the split character	23705 // The small chunk is at the beginning of the split character

23706 {	23706 {

23707 char chunk1[] =	23707 char chunk1[] =

23708 "function foo() {\n"	23708 "function foo() {\n"

23709 " // This function will contain an UTF-8 character which is not in\n"	23709 " // This function will contain an UTF-8 character which is not in\n"

23710 " // ASCII.\n"	23710 " // ASCII.\n"

23711 " var foob";	23711 " var foob";

23712 char chunk2[] = "XX";	23712 char chunk2[] = "XX";

23713 char chunk3[] =	23713 char chunk3[] =

23714 "Xr = 13;\n"	23714 "Xr = 13;\n"

23715 " return foob\xeb\x91\x80r;\n"	23715 " return foob\xec\x92\x81r;\n"

23716 "}\n";	23716 "}\n";

23717 chunk2[0] = reference[0];	23717 chunk2[0] = reference[0];

23718 chunk2[1] = reference[1];	23718 chunk2[1] = reference[1];

23719 chunk3[0] = reference[2];	23719 chunk3[0] = reference[2];

23720 const char* chunks[] = {chunk1, chunk2, chunk3, "foo();", NULL};	23720 const char* chunks[] = {chunk1, chunk2, chunk3, "foo();", NULL};

23721 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8);	23721 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8);

23722 }	23722 }

23723 // The small chunk is at the end of a character	23723 // The small chunk is at the end of a character

23724 {	23724 {

23725 char chunk1[] =	23725 char chunk1[] =

23726 "function foo() {\n"	23726 "function foo() {\n"

23727 " // This function will contain an UTF-8 character which is not in\n"	23727 " // This function will contain an UTF-8 character which is not in\n"

23728 " // ASCII.\n"	23728 " // ASCII.\n"

23729 " var foobX";	23729 " var foobX";

23730 char chunk2[] = "XX";	23730 char chunk2[] = "XX";

23731 char chunk3[] =	23731 char chunk3[] =

23732 "r = 13;\n"	23732 "r = 13;\n"

23733 " return foob\xeb\x91\x80r;\n"	23733 " return foob\xec\x92\x81r;\n"

23734 "}\n";	23734 "}\n";

23735 chunk1[strlen(chunk1) - 1] = reference[0];	23735 chunk1[strlen(chunk1) - 1] = reference[0];

23736 chunk2[0] = reference[1];	23736 chunk2[0] = reference[1];

23737 chunk2[1] = reference[2];	23737 chunk2[1] = reference[2];

23738 const char* chunks[] = {chunk1, chunk2, chunk3, "foo();", NULL};	23738 const char* chunks[] = {chunk1, chunk2, chunk3, "foo();", NULL};

23739 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8);	23739 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8);

23740 }	23740 }

23741 // Case 2: the script ends with a multi-byte character. Make sure that it's	23741 // Case 2: the script ends with a multi-byte character. Make sure that it's

23742 // decoded correctly and not just ignored.	23742 // decoded correctly and not just ignored.

23743 {	23743 {

23744 char chunk1[] =	23744 char chunk1[] =

23745 "var foob\xeb\x91\x80 = 13;\n"	23745 "var foob\xec\x92\x81 = 13;\n"

23746 "foob\xeb\x91\x80";	23746 "foob\xec\x92\x81";

23747 const char* chunks[] = {chunk1, NULL};	23747 const char* chunks[] = {chunk1, NULL};

23748 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8);	23748 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8);

23749 }	23749 }

23750 }	23750 }

23751	23751

23752	23752

23753 TEST(StreamingUtf8ScriptWithSplitCharactersInvalidEdgeCases) {	23753 TEST(StreamingUtf8ScriptWithSplitCharactersInvalidEdgeCases) {

23754 // Test cases where a UTF-8 character is split over several chunks. Those	23754 // Test cases where a UTF-8 character is split over several chunks. Those

23755 // cases are not supported (the embedder should give the data in big enough	23755 // cases are not supported (the embedder should give the data in big enough

23756 // chunks), but we shouldn't crash, just produce a parse error.	23756 // chunks), but we shouldn't crash, just produce a parse error.

23757 const char* reference = "\xeb\x91\x80";	23757 const char* reference = "\xec\x92\x81";

23758 char chunk1[] =	23758 char chunk1[] =

23759 "function foo() {\n"	23759 "function foo() {\n"

23760 " // This function will contain an UTF-8 character which is not in\n"	23760 " // This function will contain an UTF-8 character which is not in\n"

23761 " // ASCII.\n"	23761 " // ASCII.\n"

23762 " var foobX";	23762 " var foobX";

23763 char chunk2[] = "X";	23763 char chunk2[] = "X";

23764 char chunk3[] =	23764 char chunk3[] =

23765 "Xr = 13;\n"	23765 "Xr = 13;\n"

23766 " return foob\xeb\x91\x80r;\n"	23766 " return foob\xec\x92\x81r;\n"

23767 "}\n";	23767 "}\n";

23768 chunk1[strlen(chunk1) - 1] = reference[0];	23768 chunk1[strlen(chunk1) - 1] = reference[0];

23769 chunk2[0] = reference[1];	23769 chunk2[0] = reference[1];

23770 chunk3[0] = reference[2];	23770 chunk3[0] = reference[2];

23771 const char* chunks[] = {chunk1, chunk2, chunk3, "foo();", NULL};	23771 const char* chunks[] = {chunk1, chunk2, chunk3, "foo();", NULL};

23772	23772

23773 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8, false);	23773 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8, false);

23774 }	23774 }

23775	23775

23776	23776

(...skipping 21 matching lines...) Expand all Loading...
23798 const v8::ScriptCompiler::CachedData* cached_data = source.GetCachedData();	23798 const v8::ScriptCompiler::CachedData* cached_data = source.GetCachedData();

23799 CHECK(cached_data != NULL);	23799 CHECK(cached_data != NULL);

23800 CHECK(cached_data->data != NULL);	23800 CHECK(cached_data->data != NULL);

23801 CHECK_GT(cached_data->length, 0);	23801 CHECK_GT(cached_data->length, 0);

23802 }	23802 }

23803	23803

23804	23804

23805 TEST(StreamingScriptWithInvalidUtf8) {	23805 TEST(StreamingScriptWithInvalidUtf8) {

23806 // Regression test for a crash: test that invalid UTF-8 bytes in the end of a	23806 // Regression test for a crash: test that invalid UTF-8 bytes in the end of a

23807 // chunk don't produce a crash.	23807 // chunk don't produce a crash.

23808 const char* reference = "\xeb\x91\x80\x80\x80";	23808 const char* reference = "\xec\x92\x81\x80\x80";

23809 char chunk1[] =	23809 char chunk1[] =

23810 "function foo() {\n"	23810 "function foo() {\n"

23811 " // This function will contain an UTF-8 character which is not in\n"	23811 " // This function will contain an UTF-8 character which is not in\n"

23812 " // ASCII.\n"	23812 " // ASCII.\n"

23813 " var foobXXXXX"; // Too many bytes which look like incomplete chars!	23813 " var foobXXXXX"; // Too many bytes which look like incomplete chars!

23814 char chunk2[] =	23814 char chunk2[] =

23815 "r = 13;\n"	23815 "r = 13;\n"

23816 " return foob\xeb\x91\x80\x80\x80r;\n"	23816 " return foob\xec\x92\x81\x80\x80r;\n"

23817 "}\n";	23817 "}\n";

23818 for (int i = 0; i < 5; ++i) chunk1[strlen(chunk1) - 5 + i] = reference[i];	23818 for (int i = 0; i < 5; ++i) chunk1[strlen(chunk1) - 5 + i] = reference[i];

23819	23819

23820 const char* chunks[] = {chunk1, chunk2, "foo();", NULL};	23820 const char* chunks[] = {chunk1, chunk2, "foo();", NULL};

23821 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8, false);	23821 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8, false);

23822 }	23822 }

23823	23823

23824	23824

23825 TEST(StreamingUtf8ScriptWithMultipleMultibyteCharactersSomeSplit) {	23825 TEST(StreamingUtf8ScriptWithMultipleMultibyteCharactersSomeSplit) {

23826 // Regression test: Stream data where there are several multi-byte UTF-8	23826 // Regression test: Stream data where there are several multi-byte UTF-8

23827 // characters in a sequence and one of them is split between two data chunks.	23827 // characters in a sequence and one of them is split between two data chunks.

23828 const char* reference = "\xeb\x91\x80";	23828 const char* reference = "\xec\x92\x81";

23829 char chunk1[] =	23829 char chunk1[] =

23830 "function foo() {\n"	23830 "function foo() {\n"

23831 " // This function will contain an UTF-8 character which is not in\n"	23831 " // This function will contain an UTF-8 character which is not in\n"

23832 " // ASCII.\n"	23832 " // ASCII.\n"

23833 " var foob\xeb\x91\x80X";	23833 " var foob\xec\x92\x81X";

23834 char chunk2[] =	23834 char chunk2[] =

23835 "XXr = 13;\n"	23835 "XXr = 13;\n"

23836 " return foob\xeb\x91\x80\xeb\x91\x80r;\n"	23836 " return foob\xec\x92\x81\xec\x92\x81r;\n"

23837 "}\n";	23837 "}\n";

23838 chunk1[strlen(chunk1) - 1] = reference[0];	23838 chunk1[strlen(chunk1) - 1] = reference[0];

23839 chunk2[0] = reference[1];	23839 chunk2[0] = reference[1];

	23840 chunk2[1] = reference[2];

	23841 const char* chunks[] = {chunk1, chunk2, "foo();", NULL};

	23842 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8);

	23843 }

	23844

	23845

	23846 TEST(StreamingUtf8ScriptWithMultipleMultibyteCharactersSomeSplit2) {

	23847 // Another regression test, similar to the previous one. The difference is

	23848 // that the split character is not the last one in the sequence.

	23849 const char* reference = "\xec\x92\x81";

	23850 char chunk1[] =

	23851 "function foo() {\n"

	23852 " // This function will contain an UTF-8 character which is not in\n"

	23853 " // ASCII.\n"

	23854 " var foobX";

	23855 char chunk2[] =

	23856 "XX\xec\x92\x81r = 13;\n"

	23857 " return foob\xec\x92\x81\xec\x92\x81r;\n"

	23858 "}\n";

	23859 chunk1[strlen(chunk1) - 1] = reference[0];

	23860 chunk2[0] = reference[1];

23840 chunk2[1] = reference[2];	23861 chunk2[1] = reference[2];

23841 const char* chunks[] = {chunk1, chunk2, "foo();", NULL};	23862 const char* chunks[] = {chunk1, chunk2, "foo();", NULL};

23842 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8);	23863 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8);

23843 }	23864 }

OLD	NEW

« no previous file with comments | « src/scanner-character-streams.cc ('k') | no next file » | no next file with comments »