Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(296)

Side by Side Diff: test/cctest/test-api.cc

Issue 121173009: String:WriteUtf8: Add REPLACE_INVALID_UTF8 option (Closed) Base URL: git://github.com/v8/v8.git@master
Patch Set: Fix mistake in test case, finish patch Created 6 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« src/unicode-inl.h ('K') | « src/unicode-inl.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 7535 matching lines...) Expand 10 before | Expand all | Expand 10 after
7546 7546
7547 7547
7548 THREADED_TEST(StringWrite) { 7548 THREADED_TEST(StringWrite) {
7549 LocalContext context; 7549 LocalContext context;
7550 v8::HandleScope scope(context->GetIsolate()); 7550 v8::HandleScope scope(context->GetIsolate());
7551 v8::Handle<String> str = v8_str("abcde"); 7551 v8::Handle<String> str = v8_str("abcde");
7552 // abc<Icelandic eth><Unicode snowman>. 7552 // abc<Icelandic eth><Unicode snowman>.
7553 v8::Handle<String> str2 = v8_str("abc\303\260\342\230\203"); 7553 v8::Handle<String> str2 = v8_str("abc\303\260\342\230\203");
7554 v8::Handle<String> str3 = v8::String::NewFromUtf8( 7554 v8::Handle<String> str3 = v8::String::NewFromUtf8(
7555 context->GetIsolate(), "abc\0def", v8::String::kNormalString, 7); 7555 context->GetIsolate(), "abc\0def", v8::String::kNormalString, 7);
7556 // "ab" + lead surrogate + "cd" + trail surrogate + "ef"
7557 uint16_t orphans[8] = { 0x61, 0x62, 0xd800, 0x63, 0x64, 0xdc00, 0x65, 0x66 };
7558 v8::Handle<String> orphans_str = v8::String::NewFromTwoByte(
7559 context->GetIsolate(), orphans, v8::String::kNormalString, 8);
7560 // single lead surrogate
7561 uint16_t lead[1] = { 0xd800 };
7562 v8::Handle<String> lead_str = v8::String::NewFromTwoByte(
7563 context->GetIsolate(), lead, v8::String::kNormalString, 1);
7564 // single trail surrogate
7565 uint16_t trail[1] = { 0xdc00 };
7566 v8::Handle<String> trail_str = v8::String::NewFromTwoByte(
7567 context->GetIsolate(), trail, v8::String::kNormalString, 1);
7568 // surrogate pair
7569 uint16_t pair[2] = { 0xd800, 0xdc00 };
7570 v8::Handle<String> pair_str = v8::String::NewFromTwoByte(
7571 context->GetIsolate(), pair, v8::String::kNormalString, 2);
haimuiba 2014/01/17 08:33:07 This mistake was the missing piece in the puzzle.
7556 const int kStride = 4; // Must match stride in for loops in JS below. 7572 const int kStride = 4; // Must match stride in for loops in JS below.
7557 CompileRun( 7573 CompileRun(
7558 "var left = '';" 7574 "var left = '';"
7559 "for (var i = 0; i < 0xd800; i += 4) {" 7575 "for (var i = 0; i < 0xd800; i += 4) {"
7560 " left = left + String.fromCharCode(i);" 7576 " left = left + String.fromCharCode(i);"
7561 "}"); 7577 "}");
7562 CompileRun( 7578 CompileRun(
7563 "var right = '';" 7579 "var right = '';"
7564 "for (var i = 0; i < 0xd800; i += 4) {" 7580 "for (var i = 0; i < 0xd800; i += 4) {"
7565 " right = String.fromCharCode(i) + right;" 7581 " right = String.fromCharCode(i) + right;"
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after
7619 CHECK_EQ(3, len); 7635 CHECK_EQ(3, len);
7620 CHECK_EQ(3, charlen); 7636 CHECK_EQ(3, charlen);
7621 CHECK_EQ(0, strncmp(utf8buf, "abc\1", 4)); 7637 CHECK_EQ(0, strncmp(utf8buf, "abc\1", 4));
7622 7638
7623 memset(utf8buf, 0x1, 1000); 7639 memset(utf8buf, 0x1, 1000);
7624 len = str2->WriteUtf8(utf8buf, 2, &charlen); 7640 len = str2->WriteUtf8(utf8buf, 2, &charlen);
7625 CHECK_EQ(2, len); 7641 CHECK_EQ(2, len);
7626 CHECK_EQ(2, charlen); 7642 CHECK_EQ(2, charlen);
7627 CHECK_EQ(0, strncmp(utf8buf, "ab\1", 3)); 7643 CHECK_EQ(0, strncmp(utf8buf, "ab\1", 3));
7628 7644
7645 // allow orphan surrogates by default
7646 memset(utf8buf, 0x1, 1000);
7647 len = orphans_str->WriteUtf8(utf8buf, sizeof(utf8buf), &charlen);
7648 CHECK_EQ(13, len);
7649 CHECK_EQ(8, charlen);
7650 CHECK_EQ(0, strcmp(utf8buf, "ab\355\240\200cd\355\260\200ef"));
7651
7652 // replace orphan surrogates with unicode replacement character
7653 memset(utf8buf, 0x1, 1000);
7654 len = orphans_str->WriteUtf8(utf8buf,
7655 sizeof(utf8buf),
7656 &charlen,
7657 String::REPLACE_INVALID_UTF8);
7658 CHECK_EQ(13, len);
7659 CHECK_EQ(8, charlen);
7660 CHECK_EQ(0, strcmp(utf8buf, "ab\357\277\275cd\357\277\275ef"));
7661
7662 // replace single lead surrogate with unicode replacement character
7663 memset(utf8buf, 0x1, 1000);
7664 len = lead_str->WriteUtf8(utf8buf,
7665 sizeof(utf8buf),
7666 &charlen,
7667 String::REPLACE_INVALID_UTF8);
7668 CHECK_EQ(4, len);
7669 CHECK_EQ(1, charlen);
7670 CHECK_EQ(0, strcmp(utf8buf, "\357\277\275"));
7671
7672 // replace single trail surrogate with unicode replacement character
7673 memset(utf8buf, 0x1, 1000);
7674 len = trail_str->WriteUtf8(utf8buf,
7675 sizeof(utf8buf),
7676 &charlen,
7677 String::REPLACE_INVALID_UTF8);
7678 CHECK_EQ(4, len);
7679 CHECK_EQ(1, charlen);
7680 CHECK_EQ(0, strcmp(utf8buf, "\357\277\275"));
7681
7682 // do not replace / write anything if surrogate pair does not fit the buffer
7683 // space
7684 memset(utf8buf, 0x1, 1000);
7685 len = pair_str->WriteUtf8(utf8buf,
7686 3,
7687 &charlen,
7688 String::REPLACE_INVALID_UTF8);
7689 CHECK_EQ(0, len);
7690 CHECK_EQ(0, charlen);
7691
7629 memset(utf8buf, 0x1, sizeof(utf8buf)); 7692 memset(utf8buf, 0x1, sizeof(utf8buf));
7630 len = GetUtf8Length(left_tree); 7693 len = GetUtf8Length(left_tree);
7631 int utf8_expected = 7694 int utf8_expected =
7632 (0x80 + (0x800 - 0x80) * 2 + (0xd800 - 0x800) * 3) / kStride; 7695 (0x80 + (0x800 - 0x80) * 2 + (0xd800 - 0x800) * 3) / kStride;
7633 CHECK_EQ(utf8_expected, len); 7696 CHECK_EQ(utf8_expected, len);
7634 len = left_tree->WriteUtf8(utf8buf, utf8_expected, &charlen); 7697 len = left_tree->WriteUtf8(utf8buf, utf8_expected, &charlen);
7635 CHECK_EQ(utf8_expected, len); 7698 CHECK_EQ(utf8_expected, len);
7636 CHECK_EQ(0xd800 / kStride, charlen); 7699 CHECK_EQ(0xd800 / kStride, charlen);
7637 CHECK_EQ(0xed, static_cast<unsigned char>(utf8buf[utf8_expected - 3])); 7700 CHECK_EQ(0xed, static_cast<unsigned char>(utf8buf[utf8_expected - 3]));
7638 CHECK_EQ(0x9f, static_cast<unsigned char>(utf8buf[utf8_expected - 2])); 7701 CHECK_EQ(0x9f, static_cast<unsigned char>(utf8buf[utf8_expected - 2]));
(...skipping 13436 matching lines...) Expand 10 before | Expand all | Expand 10 after
21075 } 21138 }
21076 for (int i = 0; i < runs; i++) { 21139 for (int i = 0; i < runs; i++) {
21077 Local<String> expected; 21140 Local<String> expected;
21078 if (i != 0) { 21141 if (i != 0) {
21079 CHECK_EQ(v8_str("escape value"), values[i]); 21142 CHECK_EQ(v8_str("escape value"), values[i]);
21080 } else { 21143 } else {
21081 CHECK(values[i].IsEmpty()); 21144 CHECK(values[i].IsEmpty());
21082 } 21145 }
21083 } 21146 }
21084 } 21147 }
OLDNEW
« src/unicode-inl.h ('K') | « src/unicode-inl.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698