OLD | NEW |
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 23618 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
23629 " function foo() { return "; | 23629 " function foo() { return "; |
23630 char chunk2[] = " 13; }\n"; | 23630 char chunk2[] = " 13; }\n"; |
23631 const char* chunks[] = {chunk1, chunk2, "foo();", NULL}; | 23631 const char* chunks[] = {chunk1, chunk2, "foo();", NULL}; |
23632 | 23632 |
23633 RunStreamingTest(chunks); | 23633 RunStreamingTest(chunks); |
23634 } | 23634 } |
23635 } | 23635 } |
23636 | 23636 |
23637 | 23637 |
23638 TEST(StreamingUtf8Script) { | 23638 TEST(StreamingUtf8Script) { |
23639 // We'd want to write \uc481 instead of \xeb\x91\x80, but Windows compilers | 23639 // We'd want to write \uc481 instead of \xec\x92\x81, but Windows compilers |
23640 // don't like it. | 23640 // don't like it. |
23641 const char* chunk1 = | 23641 const char* chunk1 = |
23642 "function foo() {\n" | 23642 "function foo() {\n" |
23643 " // This function will contain an UTF-8 character which is not in\n" | 23643 " // This function will contain an UTF-8 character which is not in\n" |
23644 " // ASCII.\n" | 23644 " // ASCII.\n" |
23645 " var foob\xeb\x91\x80r = 13;\n" | 23645 " var foob\xec\x92\x81r = 13;\n" |
23646 " return foob\xeb\x91\x80r;\n" | 23646 " return foob\xec\x92\x81r;\n" |
23647 "}\n"; | 23647 "}\n"; |
23648 const char* chunks[] = {chunk1, "foo(); ", NULL}; | 23648 const char* chunks[] = {chunk1, "foo(); ", NULL}; |
23649 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8); | 23649 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8); |
23650 } | 23650 } |
23651 | 23651 |
23652 | 23652 |
23653 TEST(StreamingUtf8ScriptWithSplitCharactersSanityCheck) { | 23653 TEST(StreamingUtf8ScriptWithSplitCharactersSanityCheck) { |
23654 // A sanity check to prove that the approach of splitting UTF-8 | 23654 // A sanity check to prove that the approach of splitting UTF-8 |
23655 // characters is correct. Here is an UTF-8 character which will take three | 23655 // characters is correct. Here is an UTF-8 character which will take three |
23656 // bytes. | 23656 // bytes. |
23657 const char* reference = "\xeb\x91\x80"; | 23657 const char* reference = "\xec\x92\x81"; |
23658 CHECK(3u == strlen(reference)); // NOLINT - no CHECK_EQ for unsigned. | 23658 CHECK(3u == strlen(reference)); // NOLINT - no CHECK_EQ for unsigned. |
23659 | 23659 |
23660 char chunk1[] = | 23660 char chunk1[] = |
23661 "function foo() {\n" | 23661 "function foo() {\n" |
23662 " // This function will contain an UTF-8 character which is not in\n" | 23662 " // This function will contain an UTF-8 character which is not in\n" |
23663 " // ASCII.\n" | 23663 " // ASCII.\n" |
23664 " var foob"; | 23664 " var foob"; |
23665 char chunk2[] = | 23665 char chunk2[] = |
23666 "XXXr = 13;\n" | 23666 "XXXr = 13;\n" |
23667 " return foob\xeb\x91\x80r;\n" | 23667 " return foob\xec\x92\x81r;\n" |
23668 "}\n"; | 23668 "}\n"; |
23669 for (int i = 0; i < 3; ++i) { | 23669 for (int i = 0; i < 3; ++i) { |
23670 chunk2[i] = reference[i]; | 23670 chunk2[i] = reference[i]; |
23671 } | 23671 } |
23672 const char* chunks[] = {chunk1, chunk2, "foo();", NULL}; | 23672 const char* chunks[] = {chunk1, chunk2, "foo();", NULL}; |
23673 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8); | 23673 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8); |
23674 } | 23674 } |
23675 | 23675 |
23676 | 23676 |
23677 TEST(StreamingUtf8ScriptWithSplitCharacters) { | 23677 TEST(StreamingUtf8ScriptWithSplitCharacters) { |
23678 // Stream data where a multi-byte UTF-8 character is split between two data | 23678 // Stream data where a multi-byte UTF-8 character is split between two data |
23679 // chunks. | 23679 // chunks. |
23680 const char* reference = "\xeb\x91\x80"; | 23680 const char* reference = "\xec\x92\x81"; |
23681 char chunk1[] = | 23681 char chunk1[] = |
23682 "function foo() {\n" | 23682 "function foo() {\n" |
23683 " // This function will contain an UTF-8 character which is not in\n" | 23683 " // This function will contain an UTF-8 character which is not in\n" |
23684 " // ASCII.\n" | 23684 " // ASCII.\n" |
23685 " var foobX"; | 23685 " var foobX"; |
23686 char chunk2[] = | 23686 char chunk2[] = |
23687 "XXr = 13;\n" | 23687 "XXr = 13;\n" |
23688 " return foob\xeb\x91\x80r;\n" | 23688 " return foob\xec\x92\x81r;\n" |
23689 "}\n"; | 23689 "}\n"; |
23690 chunk1[strlen(chunk1) - 1] = reference[0]; | 23690 chunk1[strlen(chunk1) - 1] = reference[0]; |
23691 chunk2[0] = reference[1]; | 23691 chunk2[0] = reference[1]; |
23692 chunk2[1] = reference[2]; | 23692 chunk2[1] = reference[2]; |
23693 const char* chunks[] = {chunk1, chunk2, "foo();", NULL}; | 23693 const char* chunks[] = {chunk1, chunk2, "foo();", NULL}; |
23694 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8); | 23694 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8); |
23695 } | 23695 } |
23696 | 23696 |
23697 | 23697 |
23698 TEST(StreamingUtf8ScriptWithSplitCharactersValidEdgeCases) { | 23698 TEST(StreamingUtf8ScriptWithSplitCharactersValidEdgeCases) { |
23699 // Tests edge cases which should still be decoded correctly. | 23699 // Tests edge cases which should still be decoded correctly. |
23700 | 23700 |
23701 // Case 1: a chunk contains only bytes for a split character (and no other | 23701 // Case 1: a chunk contains only bytes for a split character (and no other |
23702 // data). This kind of a chunk would be exceptionally small, but we should | 23702 // data). This kind of a chunk would be exceptionally small, but we should |
23703 // still decode it correctly. | 23703 // still decode it correctly. |
23704 const char* reference = "\xeb\x91\x80"; | 23704 const char* reference = "\xec\x92\x81"; |
23705 // The small chunk is at the beginning of the split character | 23705 // The small chunk is at the beginning of the split character |
23706 { | 23706 { |
23707 char chunk1[] = | 23707 char chunk1[] = |
23708 "function foo() {\n" | 23708 "function foo() {\n" |
23709 " // This function will contain an UTF-8 character which is not in\n" | 23709 " // This function will contain an UTF-8 character which is not in\n" |
23710 " // ASCII.\n" | 23710 " // ASCII.\n" |
23711 " var foob"; | 23711 " var foob"; |
23712 char chunk2[] = "XX"; | 23712 char chunk2[] = "XX"; |
23713 char chunk3[] = | 23713 char chunk3[] = |
23714 "Xr = 13;\n" | 23714 "Xr = 13;\n" |
23715 " return foob\xeb\x91\x80r;\n" | 23715 " return foob\xec\x92\x81r;\n" |
23716 "}\n"; | 23716 "}\n"; |
23717 chunk2[0] = reference[0]; | 23717 chunk2[0] = reference[0]; |
23718 chunk2[1] = reference[1]; | 23718 chunk2[1] = reference[1]; |
23719 chunk3[0] = reference[2]; | 23719 chunk3[0] = reference[2]; |
23720 const char* chunks[] = {chunk1, chunk2, chunk3, "foo();", NULL}; | 23720 const char* chunks[] = {chunk1, chunk2, chunk3, "foo();", NULL}; |
23721 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8); | 23721 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8); |
23722 } | 23722 } |
23723 // The small chunk is at the end of a character | 23723 // The small chunk is at the end of a character |
23724 { | 23724 { |
23725 char chunk1[] = | 23725 char chunk1[] = |
23726 "function foo() {\n" | 23726 "function foo() {\n" |
23727 " // This function will contain an UTF-8 character which is not in\n" | 23727 " // This function will contain an UTF-8 character which is not in\n" |
23728 " // ASCII.\n" | 23728 " // ASCII.\n" |
23729 " var foobX"; | 23729 " var foobX"; |
23730 char chunk2[] = "XX"; | 23730 char chunk2[] = "XX"; |
23731 char chunk3[] = | 23731 char chunk3[] = |
23732 "r = 13;\n" | 23732 "r = 13;\n" |
23733 " return foob\xeb\x91\x80r;\n" | 23733 " return foob\xec\x92\x81r;\n" |
23734 "}\n"; | 23734 "}\n"; |
23735 chunk1[strlen(chunk1) - 1] = reference[0]; | 23735 chunk1[strlen(chunk1) - 1] = reference[0]; |
23736 chunk2[0] = reference[1]; | 23736 chunk2[0] = reference[1]; |
23737 chunk2[1] = reference[2]; | 23737 chunk2[1] = reference[2]; |
23738 const char* chunks[] = {chunk1, chunk2, chunk3, "foo();", NULL}; | 23738 const char* chunks[] = {chunk1, chunk2, chunk3, "foo();", NULL}; |
23739 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8); | 23739 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8); |
23740 } | 23740 } |
23741 // Case 2: the script ends with a multi-byte character. Make sure that it's | 23741 // Case 2: the script ends with a multi-byte character. Make sure that it's |
23742 // decoded correctly and not just ignored. | 23742 // decoded correctly and not just ignored. |
23743 { | 23743 { |
23744 char chunk1[] = | 23744 char chunk1[] = |
23745 "var foob\xeb\x91\x80 = 13;\n" | 23745 "var foob\xec\x92\x81 = 13;\n" |
23746 "foob\xeb\x91\x80"; | 23746 "foob\xec\x92\x81"; |
23747 const char* chunks[] = {chunk1, NULL}; | 23747 const char* chunks[] = {chunk1, NULL}; |
23748 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8); | 23748 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8); |
23749 } | 23749 } |
23750 } | 23750 } |
23751 | 23751 |
23752 | 23752 |
23753 TEST(StreamingUtf8ScriptWithSplitCharactersInvalidEdgeCases) { | 23753 TEST(StreamingUtf8ScriptWithSplitCharactersInvalidEdgeCases) { |
23754 // Test cases where a UTF-8 character is split over several chunks. Those | 23754 // Test cases where a UTF-8 character is split over several chunks. Those |
23755 // cases are not supported (the embedder should give the data in big enough | 23755 // cases are not supported (the embedder should give the data in big enough |
23756 // chunks), but we shouldn't crash, just produce a parse error. | 23756 // chunks), but we shouldn't crash, just produce a parse error. |
23757 const char* reference = "\xeb\x91\x80"; | 23757 const char* reference = "\xec\x92\x81"; |
23758 char chunk1[] = | 23758 char chunk1[] = |
23759 "function foo() {\n" | 23759 "function foo() {\n" |
23760 " // This function will contain an UTF-8 character which is not in\n" | 23760 " // This function will contain an UTF-8 character which is not in\n" |
23761 " // ASCII.\n" | 23761 " // ASCII.\n" |
23762 " var foobX"; | 23762 " var foobX"; |
23763 char chunk2[] = "X"; | 23763 char chunk2[] = "X"; |
23764 char chunk3[] = | 23764 char chunk3[] = |
23765 "Xr = 13;\n" | 23765 "Xr = 13;\n" |
23766 " return foob\xeb\x91\x80r;\n" | 23766 " return foob\xec\x92\x81r;\n" |
23767 "}\n"; | 23767 "}\n"; |
23768 chunk1[strlen(chunk1) - 1] = reference[0]; | 23768 chunk1[strlen(chunk1) - 1] = reference[0]; |
23769 chunk2[0] = reference[1]; | 23769 chunk2[0] = reference[1]; |
23770 chunk3[0] = reference[2]; | 23770 chunk3[0] = reference[2]; |
23771 const char* chunks[] = {chunk1, chunk2, chunk3, "foo();", NULL}; | 23771 const char* chunks[] = {chunk1, chunk2, chunk3, "foo();", NULL}; |
23772 | 23772 |
23773 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8, false); | 23773 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8, false); |
23774 } | 23774 } |
23775 | 23775 |
23776 | 23776 |
(...skipping 21 matching lines...) Expand all Loading... |
23798 const v8::ScriptCompiler::CachedData* cached_data = source.GetCachedData(); | 23798 const v8::ScriptCompiler::CachedData* cached_data = source.GetCachedData(); |
23799 CHECK(cached_data != NULL); | 23799 CHECK(cached_data != NULL); |
23800 CHECK(cached_data->data != NULL); | 23800 CHECK(cached_data->data != NULL); |
23801 CHECK_GT(cached_data->length, 0); | 23801 CHECK_GT(cached_data->length, 0); |
23802 } | 23802 } |
23803 | 23803 |
23804 | 23804 |
23805 TEST(StreamingScriptWithInvalidUtf8) { | 23805 TEST(StreamingScriptWithInvalidUtf8) { |
23806 // Regression test for a crash: test that invalid UTF-8 bytes in the end of a | 23806 // Regression test for a crash: test that invalid UTF-8 bytes in the end of a |
23807 // chunk don't produce a crash. | 23807 // chunk don't produce a crash. |
23808 const char* reference = "\xeb\x91\x80\x80\x80"; | 23808 const char* reference = "\xec\x92\x81\x80\x80"; |
23809 char chunk1[] = | 23809 char chunk1[] = |
23810 "function foo() {\n" | 23810 "function foo() {\n" |
23811 " // This function will contain an UTF-8 character which is not in\n" | 23811 " // This function will contain an UTF-8 character which is not in\n" |
23812 " // ASCII.\n" | 23812 " // ASCII.\n" |
23813 " var foobXXXXX"; // Too many bytes which look like incomplete chars! | 23813 " var foobXXXXX"; // Too many bytes which look like incomplete chars! |
23814 char chunk2[] = | 23814 char chunk2[] = |
23815 "r = 13;\n" | 23815 "r = 13;\n" |
23816 " return foob\xeb\x91\x80\x80\x80r;\n" | 23816 " return foob\xec\x92\x81\x80\x80r;\n" |
23817 "}\n"; | 23817 "}\n"; |
23818 for (int i = 0; i < 5; ++i) chunk1[strlen(chunk1) - 5 + i] = reference[i]; | 23818 for (int i = 0; i < 5; ++i) chunk1[strlen(chunk1) - 5 + i] = reference[i]; |
23819 | 23819 |
23820 const char* chunks[] = {chunk1, chunk2, "foo();", NULL}; | 23820 const char* chunks[] = {chunk1, chunk2, "foo();", NULL}; |
23821 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8, false); | 23821 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8, false); |
23822 } | 23822 } |
23823 | 23823 |
23824 | 23824 |
23825 TEST(StreamingUtf8ScriptWithMultipleMultibyteCharactersSomeSplit) { | 23825 TEST(StreamingUtf8ScriptWithMultipleMultibyteCharactersSomeSplit) { |
23826 // Regression test: Stream data where there are several multi-byte UTF-8 | 23826 // Regression test: Stream data where there are several multi-byte UTF-8 |
23827 // characters in a sequence and one of them is split between two data chunks. | 23827 // characters in a sequence and one of them is split between two data chunks. |
23828 const char* reference = "\xeb\x91\x80"; | 23828 const char* reference = "\xec\x92\x81"; |
23829 char chunk1[] = | 23829 char chunk1[] = |
23830 "function foo() {\n" | 23830 "function foo() {\n" |
23831 " // This function will contain an UTF-8 character which is not in\n" | 23831 " // This function will contain an UTF-8 character which is not in\n" |
23832 " // ASCII.\n" | 23832 " // ASCII.\n" |
23833 " var foob\xeb\x91\x80X"; | 23833 " var foob\xec\x92\x81X"; |
23834 char chunk2[] = | 23834 char chunk2[] = |
23835 "XXr = 13;\n" | 23835 "XXr = 13;\n" |
23836 " return foob\xeb\x91\x80\xeb\x91\x80r;\n" | 23836 " return foob\xec\x92\x81\xec\x92\x81r;\n" |
23837 "}\n"; | 23837 "}\n"; |
23838 chunk1[strlen(chunk1) - 1] = reference[0]; | 23838 chunk1[strlen(chunk1) - 1] = reference[0]; |
23839 chunk2[0] = reference[1]; | 23839 chunk2[0] = reference[1]; |
| 23840 chunk2[1] = reference[2]; |
| 23841 const char* chunks[] = {chunk1, chunk2, "foo();", NULL}; |
| 23842 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8); |
| 23843 } |
| 23844 |
| 23845 |
| 23846 TEST(StreamingUtf8ScriptWithMultipleMultibyteCharactersSomeSplit2) { |
| 23847 // Another regression test, similar to the previous one. The difference is |
| 23848 // that the split character is not the last one in the sequence. |
| 23849 const char* reference = "\xec\x92\x81"; |
| 23850 char chunk1[] = |
| 23851 "function foo() {\n" |
| 23852 " // This function will contain an UTF-8 character which is not in\n" |
| 23853 " // ASCII.\n" |
| 23854 " var foobX"; |
| 23855 char chunk2[] = |
| 23856 "XX\xec\x92\x81r = 13;\n" |
| 23857 " return foob\xec\x92\x81\xec\x92\x81r;\n" |
| 23858 "}\n"; |
| 23859 chunk1[strlen(chunk1) - 1] = reference[0]; |
| 23860 chunk2[0] = reference[1]; |
23840 chunk2[1] = reference[2]; | 23861 chunk2[1] = reference[2]; |
23841 const char* chunks[] = {chunk1, chunk2, "foo();", NULL}; | 23862 const char* chunks[] = {chunk1, chunk2, "foo();", NULL}; |
23842 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8); | 23863 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8); |
23843 } | 23864 } |
OLD | NEW |