test/cctest/test-api.cc - Issue 708823002: Streaming API: detect UTF-8 BOM.

Side by Side Diff: test/cctest/test-api.cc

Issue 708823002: Streaming API: detect UTF-8 BOM. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge

Patch Set: . Created 6 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2012 the V8 project authors. All rights reserved.	1 // Copyright 2012 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 23830 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
23841 memcpy(copy, chunks_[index_], len);	23841 memcpy(copy, chunks_[index_], len);

23842 *src = copy;	23842 *src = copy;

23843 ++index_;	23843 ++index_;

23844 return len;	23844 return len;

23845 }	23845 }

23846	23846

23847 // Helper for constructing a string from chunks (the compilation needs it	23847 // Helper for constructing a string from chunks (the compilation needs it

23848 // too).	23848 // too).

23849 static char* FullSourceString(const char** chunks) {	23849 static char* FullSourceString(const char** chunks) {

23850 size_t total_len = 0;	23850 size_t total_len = 0;

	23851 bool has_bom = false;

23851 for (size_t i = 0; chunks[i] != NULL; ++i) {	23852 for (size_t i = 0; chunks[i] != NULL; ++i) {

23852 total_len += strlen(chunks[i]);	23853 total_len += strlen(chunks[i]);

	23854

	23855 // Remove BOM when constructing the full source string; this simluates

	23856 // what the embedder does.

	23857 if (i == 0 && strlen(chunks[i]) >= 3 &&

	23858 chunks[i][0] == static_cast<char>(0xef) &&

	23859 chunks[i][1] == static_cast<char>(0xbb) &&

	23860 chunks[i][2] == static_cast<char>(0xbf)) {

	23861 total_len -= 3;

	23862 has_bom = true;

	23863 }

23853 }	23864 }

23854 char* full_string = new char[total_len + 1];	23865 char* full_string = new char[total_len + 1];

23855 size_t offset = 0;	23866 size_t offset = 0;

23856 for (size_t i = 0; chunks[i] != NULL; ++i) {	23867 for (size_t i = 0; chunks[i] != NULL; ++i) {

23857 size_t len = strlen(chunks[i]);	23868 size_t len = strlen(chunks[i]);

23858 memcpy(full_string + offset, chunks[i], len);	23869 if (has_bom) {

23859 offset += len;	23870 memcpy(full_string + offset, chunks[i] + 3, len - 3);

	23871 offset += len - 3;

	23872 } else {

	23873 memcpy(full_string + offset, chunks[i], len);

	23874 offset += len;

	23875 }

23860 }	23876 }

23861 full_string[total_len] = 0;	23877 full_string[total_len] = 0;

23862 return full_string;	23878 return full_string;

23863 }	23879 }

23864	23880

23865 private:	23881 private:

23866 const char** chunks_;	23882 const char** chunks_;

23867 unsigned index_;	23883 unsigned index_;

23868 };	23884 };

23869	23885

(...skipping 23 matching lines...) Expand all Loading...
23893	23909

23894 // The possible errors are only produced while compiling.	23910 // The possible errors are only produced while compiling.

23895 CHECK_EQ(false, try_catch.HasCaught());	23911 CHECK_EQ(false, try_catch.HasCaught());

23896	23912

23897 v8::Handle<Script> script = v8::ScriptCompiler::Compile(	23913 v8::Handle<Script> script = v8::ScriptCompiler::Compile(

23898 isolate, &source, v8_str(full_source), origin);	23914 isolate, &source, v8_str(full_source), origin);

23899 if (expected_success) {	23915 if (expected_success) {

23900 CHECK(!script.IsEmpty());	23916 CHECK(!script.IsEmpty());

23901 v8::Handle<Value> result(script->Run());	23917 v8::Handle<Value> result(script->Run());

23902 // All scripts are supposed to return the fixed value 13 when ran.	23918 // All scripts are supposed to return the fixed value 13 when ran.

	23919 CHECK_EQ(false, try_catch.HasCaught());

23903 CHECK_EQ(13, result->Int32Value());	23920 CHECK_EQ(13, result->Int32Value());

23904 } else {	23921 } else {

23905 CHECK(script.IsEmpty());	23922 CHECK(script.IsEmpty());

23906 CHECK(try_catch.HasCaught());	23923 CHECK(try_catch.HasCaught());

23907 }	23924 }

23908 delete[] full_source;	23925 delete[] full_source;

23909 }	23926 }

23910	23927

23911	23928

23912 TEST(StreamingSimpleScript) {	23929 TEST(StreamingSimpleScript) {

(...skipping 271 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
24184 char chunk2[] =	24201 char chunk2[] =

24185 "XX\xec\x92\x81r = 13;\n"	24202 "XX\xec\x92\x81r = 13;\n"

24186 " return foob\xec\x92\x81\xec\x92\x81r;\n"	24203 " return foob\xec\x92\x81\xec\x92\x81r;\n"

24187 "}\n";	24204 "}\n";

24188 chunk1[strlen(chunk1) - 1] = reference[0];	24205 chunk1[strlen(chunk1) - 1] = reference[0];

24189 chunk2[0] = reference[1];	24206 chunk2[0] = reference[1];

24190 chunk2[1] = reference[2];	24207 chunk2[1] = reference[2];

24191 const char* chunks[] = {chunk1, chunk2, "foo();", NULL};	24208 const char* chunks[] = {chunk1, chunk2, "foo();", NULL};

24192 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8);	24209 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8);

24193 }	24210 }

	24211

	24212

	24213 TEST(StreamingUtf8ScriptWithByteOrderMark) {

	24214 // Some Windows editors add the "UTF-8 BOM". If we don't handle it properly,

	24215 // some scripts might be parsed successfully but but the positions of lazy

	24216 // functions will be off.

	24217 i::FLAG_min_preparse_length = 0;

	24218 i::FLAG_lazy = true;

	24219 // Note that in this case the byte order mark doesn't actually cause a parse

	24220 // error, since it's a UTF-8 character followed by a comment.

	24221 char chunk1[] =

	24222 "XXX// That's the BOM.\n"

	24223 "function this_is_lazy() {\n"

	24224 " return 13;\n"

	24225 "}\n";

	24226 chunk1[0] = static_cast<char>(0xef);

	24227 chunk1[1] = static_cast<char>(0xbb);

	24228 chunk1[2] = static_cast<char>(0xbf);

	24229 const char* chunks[] = {chunk1, "this_is_lazy();", NULL};

	24230 RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8);

	24231 }

OLD	NEW

« no previous file with comments | « src/scanner-character-streams.cc ('k') | no next file » | no next file with comments »