Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(343)

Side by Side Diff: src/heap.cc

Issue 2832050: Convert Unicode code points outside the basic multilingual plane to the replacement character. (Closed)
Patch Set: Created 10 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2009 the V8 project authors. All rights reserved. 1 // Copyright 2009 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 2848 matching lines...) Expand 10 before | Expand all | Expand 10 after
2859 SeqAsciiString* string_result = SeqAsciiString::cast(result); 2859 SeqAsciiString* string_result = SeqAsciiString::cast(result);
2860 for (int i = 0; i < string.length(); i++) { 2860 for (int i = 0; i < string.length(); i++) {
2861 string_result->SeqAsciiStringSet(i, string[i]); 2861 string_result->SeqAsciiStringSet(i, string[i]);
2862 } 2862 }
2863 return result; 2863 return result;
2864 } 2864 }
2865 2865
2866 2866
2867 Object* Heap::AllocateStringFromUtf8(Vector<const char> string, 2867 Object* Heap::AllocateStringFromUtf8(Vector<const char> string,
2868 PretenureFlag pretenure) { 2868 PretenureFlag pretenure) {
2869 // V8 only supports characters in the Basic Multilingual Plane.
2870 const uc32 kMaxSupportedChar = 0xFFFF;
2869 // Count the number of characters in the UTF-8 string and check if 2871 // Count the number of characters in the UTF-8 string and check if
2870 // it is an ASCII string. 2872 // it is an ASCII string.
2871 Access<Scanner::Utf8Decoder> decoder(Scanner::utf8_decoder()); 2873 Access<Scanner::Utf8Decoder> decoder(Scanner::utf8_decoder());
2872 decoder->Reset(string.start(), string.length()); 2874 decoder->Reset(string.start(), string.length());
2873 int chars = 0; 2875 int chars = 0;
2874 bool is_ascii = true; 2876 bool is_ascii = true;
2875 while (decoder->has_more()) { 2877 while (decoder->has_more()) {
2876 uc32 r = decoder->GetNext(); 2878 uc32 r = decoder->GetNext();
2877 if (r > String::kMaxAsciiCharCode) is_ascii = false; 2879 if (r > String::kMaxAsciiCharCode) is_ascii = false;
2878 chars++; 2880 chars++;
2879 } 2881 }
2880 2882
2881 // If the string is ascii, we do not need to convert the characters 2883 // If the string is ascii, we do not need to convert the characters
2882 // since UTF8 is backwards compatible with ascii. 2884 // since UTF8 is backwards compatible with ascii.
2883 if (is_ascii) return AllocateStringFromAscii(string, pretenure); 2885 if (is_ascii) return AllocateStringFromAscii(string, pretenure);
2884 2886
2885 Object* result = AllocateRawTwoByteString(chars, pretenure); 2887 Object* result = AllocateRawTwoByteString(chars, pretenure);
2886 if (result->IsFailure()) return result; 2888 if (result->IsFailure()) return result;
2887 2889
2888 // Convert and copy the characters into the new object. 2890 // Convert and copy the characters into the new object.
2889 String* string_result = String::cast(result); 2891 String* string_result = String::cast(result);
2890 decoder->Reset(string.start(), string.length()); 2892 decoder->Reset(string.start(), string.length());
2891 for (int i = 0; i < chars; i++) { 2893 for (int i = 0; i < chars; i++) {
2892 uc32 r = decoder->GetNext(); 2894 uc32 r = decoder->GetNext();
2895 if (r > kMaxSupportedChar) { r = unibrow::Utf8::kBadChar; }
2893 string_result->Set(i, r); 2896 string_result->Set(i, r);
2894 } 2897 }
2895 return result; 2898 return result;
2896 } 2899 }
2897 2900
2898 2901
2899 Object* Heap::AllocateStringFromTwoByte(Vector<const uc16> string, 2902 Object* Heap::AllocateStringFromTwoByte(Vector<const uc16> string,
2900 PretenureFlag pretenure) { 2903 PretenureFlag pretenure) {
2901 // Check if the string is an ASCII string. 2904 // Check if the string is an ASCII string.
2902 int i = 0; 2905 int i = 0;
(...skipping 1919 matching lines...) Expand 10 before | Expand all | Expand 10 after
4822 void ExternalStringTable::TearDown() { 4825 void ExternalStringTable::TearDown() {
4823 new_space_strings_.Free(); 4826 new_space_strings_.Free();
4824 old_space_strings_.Free(); 4827 old_space_strings_.Free();
4825 } 4828 }
4826 4829
4827 4830
4828 List<Object*> ExternalStringTable::new_space_strings_; 4831 List<Object*> ExternalStringTable::new_space_strings_;
4829 List<Object*> ExternalStringTable::old_space_strings_; 4832 List<Object*> ExternalStringTable::old_space_strings_;
4830 4833
4831 } } // namespace v8::internal 4834 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698