Chromium Code Reviews| Index: src/builtins/builtins-intl.cc |
| diff --git a/src/builtins/builtins-intl.cc b/src/builtins/builtins-intl.cc |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..5ef5841e14bbad3df8f961b02e0ec7a0ae356615 |
| --- /dev/null |
| +++ b/src/builtins/builtins-intl.cc |
| @@ -0,0 +1,197 @@ |
| +// Copyright 2017 the V8 project authors. All rights reserved. |
| +// Use of this source code is governed by a BSD-style license that can be |
| +// found in the LICENSE file. |
| + |
| +#include "src/builtins/builtins-regexp.h" |
| +#include "src/builtins/builtins-utils.h" |
| +#include "src/builtins/builtins.h" |
| +#include "src/code-factory.h" |
| +#include "src/code-stub-assembler.h" |
| +#include "src/regexp/regexp-utils.h" |
| +#include "src/string-case.h" |
| +#include "src/unicode-inl.h" |
| +#include "src/unicode.h" |
| + |
| +namespace v8 { |
| +namespace internal { |
| + |
| +namespace { |
| + |
| +inline bool ToUpperOverflows(uc32 character) { |
| + // y with umlauts and the micro sign are the only characters that stop |
| + // fitting into one-byte when converting to uppercase. |
| + static const uc32 yuml_code = 0xff; |
| + static const uc32 micro_code = 0xb5; |
| + return (character == yuml_code || character == micro_code); |
| +} |
| + |
| +template <class Converter> |
| +MUST_USE_RESULT static Object* ConvertCaseHelper( |
| + Isolate* isolate, String* string, SeqString* result, int result_length, |
| + unibrow::Mapping<Converter, 128>* mapping) { |
| + DisallowHeapAllocation no_gc; |
| + // We try this twice, once with the assumption that the result is no longer |
| + // than the input and, if that assumption breaks, again with the exact |
| + // length. This may not be pretty, but it is nicer than what was here before |
| + // and I hereby claim my vaffel-is. |
| + // |
| + // NOTE: This assumes that the upper/lower case of an ASCII |
| + // character is also ASCII. This is currently the case, but it |
| + // might break in the future if we implement more context and locale |
| + // dependent upper/lower conversions. |
| + bool has_changed_character = false; |
| + |
| + // Convert all characters to upper case, assuming that they will fit |
| + // in the buffer |
| + StringCharacterStream stream(string); |
| + unibrow::uchar chars[Converter::kMaxWidth]; |
| + // We can assume that the string is not empty |
| + uc32 current = stream.GetNext(); |
| + bool ignore_overflow = Converter::kIsToLower || result->IsSeqTwoByteString(); |
| + for (int i = 0; i < result_length;) { |
| + bool has_next = stream.HasMore(); |
| + uc32 next = has_next ? stream.GetNext() : 0; |
| + int char_length = mapping->get(current, next, chars); |
| + if (char_length == 0) { |
| + // The case conversion of this character is the character itself. |
| + result->Set(i, current); |
| + i++; |
| + } else if (char_length == 1 && |
| + (ignore_overflow || !ToUpperOverflows(current))) { |
| + // Common case: converting the letter resulted in one character. |
| + DCHECK(static_cast<uc32>(chars[0]) != current); |
| + result->Set(i, chars[0]); |
| + has_changed_character = true; |
| + i++; |
| + } else if (result_length == string->length()) { |
| + bool overflows = ToUpperOverflows(current); |
| + // We've assumed that the result would be as long as the |
| + // input but here is a character that converts to several |
| + // characters. No matter, we calculate the exact length |
| + // of the result and try the whole thing again. |
| + // |
| + // Note that this leaves room for optimization. We could just |
| + // memcpy what we already have to the result string. Also, |
| + // the result string is the last object allocated we could |
| + // "realloc" it and probably, in the vast majority of cases, |
| + // extend the existing string to be able to hold the full |
| + // result. |
| + int next_length = 0; |
| + if (has_next) { |
| + next_length = mapping->get(next, 0, chars); |
| + if (next_length == 0) next_length = 1; |
| + } |
| + int current_length = i + char_length + next_length; |
| + while (stream.HasMore()) { |
| + current = stream.GetNext(); |
| + overflows |= ToUpperOverflows(current); |
| + // NOTE: we use 0 as the next character here because, while |
| + // the next character may affect what a character converts to, |
| + // it does not in any case affect the length of what it convert |
| + // to. |
| + int char_length = mapping->get(current, 0, chars); |
| + if (char_length == 0) char_length = 1; |
| + current_length += char_length; |
| + if (current_length > String::kMaxLength) { |
| + AllowHeapAllocation allocate_error_and_return; |
| + THROW_NEW_ERROR_RETURN_FAILURE(isolate, |
| + NewInvalidStringLengthError()); |
| + } |
| + } |
| + // Try again with the real length. Return signed if we need |
| + // to allocate a two-byte string for to uppercase. |
| + return (overflows && !ignore_overflow) ? Smi::FromInt(-current_length) |
| + : Smi::FromInt(current_length); |
| + } else { |
| + for (int j = 0; j < char_length; j++) { |
| + result->Set(i, chars[j]); |
| + i++; |
| + } |
| + has_changed_character = true; |
| + } |
| + current = next; |
| + } |
| + if (has_changed_character) { |
| + return result; |
| + } else { |
| + // If we didn't actually change anything in doing the conversion |
| + // we simple return the result and let the converted string |
| + // become garbage; there is no reason to keep two identical strings |
| + // alive. |
| + return string; |
| + } |
| +} |
| + |
| +template <class Converter> |
| +MUST_USE_RESULT static Object* ConvertCase( |
|
Dan Ehrenberg
2017/03/14 16:19:56
It looks like this case conversion code was copied
|
| + Handle<String> s, Isolate* isolate, |
| + unibrow::Mapping<Converter, 128>* mapping) { |
| + s = String::Flatten(s); |
| + int length = s->length(); |
| + // Assume that the string is not empty; we need this assumption later |
| + if (length == 0) return *s; |
| + |
| + // Simpler handling of ASCII strings. |
| + // |
| + // NOTE: This assumes that the upper/lower case of an ASCII |
| + // character is also ASCII. This is currently the case, but it |
| + // might break in the future if we implement more context and locale |
| + // dependent upper/lower conversions. |
| + if (s->IsOneByteRepresentationUnderneath()) { |
| + // Same length as input. |
| + Handle<SeqOneByteString> result = |
| + isolate->factory()->NewRawOneByteString(length).ToHandleChecked(); |
| + DisallowHeapAllocation no_gc; |
| + String::FlatContent flat_content = s->GetFlatContent(); |
| + DCHECK(flat_content.IsFlat()); |
| + bool has_changed_character = false; |
| + int index_to_first_unprocessed = FastAsciiConvert<Converter::kIsToLower>( |
| + reinterpret_cast<char*>(result->GetChars()), |
| + reinterpret_cast<const char*>(flat_content.ToOneByteVector().start()), |
| + length, &has_changed_character); |
| + // If not ASCII, we discard the result and take the 2 byte path. |
| + if (index_to_first_unprocessed == length) |
| + return has_changed_character ? *result : *s; |
| + } |
| + |
| + Handle<SeqString> result; // Same length as input. |
| + if (s->IsOneByteRepresentation()) { |
| + result = isolate->factory()->NewRawOneByteString(length).ToHandleChecked(); |
| + } else { |
| + result = isolate->factory()->NewRawTwoByteString(length).ToHandleChecked(); |
| + } |
| + |
| + Object* answer = ConvertCaseHelper(isolate, *s, *result, length, mapping); |
| + if (answer->IsException(isolate) || answer->IsString()) return answer; |
| + |
| + DCHECK(answer->IsSmi()); |
| + length = Smi::cast(answer)->value(); |
| + if (s->IsOneByteRepresentation() && length > 0) { |
| + ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
| + isolate, result, isolate->factory()->NewRawOneByteString(length)); |
| + } else { |
| + if (length < 0) length = -length; |
| + ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
| + isolate, result, isolate->factory()->NewRawTwoByteString(length)); |
| + } |
| + return ConvertCaseHelper(isolate, *s, *result, length, mapping); |
| +} |
| + |
| +} // namespace |
| + |
| +BUILTIN(StringPrototypeToLowerCaseI18N) { |
| + HandleScope scope(isolate); |
| + TO_THIS_STRING(string, "String.prototype.toLowerCase"); |
| + return ConvertCase(string, isolate, |
| + isolate->runtime_state()->to_lower_mapping()); |
| +} |
| + |
| +BUILTIN(StringPrototypeToUpperCaseI18N) { |
| + HandleScope scope(isolate); |
| + TO_THIS_STRING(string, "String.prototype.toUpperCase"); |
| + return ConvertCase(string, isolate, |
| + isolate->runtime_state()->to_upper_mapping()); |
| +} |
| + |
| +} // namespace internal |
| +} // namespace v8 |