src/builtins/builtins-intl.cc - Issue 2728763006: Migrate some case conversion functions from JS to CPP builtins

Side by Side Diff: src/builtins/builtins-intl.cc

Issue 2728763006: Migrate some case conversion functions from JS to CPP builtins (Closed)

Patch Set: this also doesn't work Created 3 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
(Empty)
	1 // Copyright 2017 the V8 project authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 #include "src/builtins/builtins-regexp.h"

	6 #include "src/builtins/builtins-utils.h"

	7 #include "src/builtins/builtins.h"

	8 #include "src/code-factory.h"

	9 #include "src/code-stub-assembler.h"

	10 #include "src/regexp/regexp-utils.h"

	11 #include "src/string-case.h"

	12 #include "src/unicode-inl.h"

	13 #include "src/unicode.h"

	14

	15 namespace v8 {

	16 namespace internal {

	17

	18 namespace {

	19

	20 inline bool ToUpperOverflows(uc32 character) {

	21 // y with umlauts and the micro sign are the only characters that stop

	22 // fitting into one-byte when converting to uppercase.

	23 static const uc32 yuml_code = 0xff;

	24 static const uc32 micro_code = 0xb5;

	25 return (character == yuml_code \|\| character == micro_code);

	26 }

	27

	28 template <class Converter>

	29 MUST_USE_RESULT static Object* ConvertCaseHelper(

	30 Isolate* isolate, String* string, SeqString* result, int result_length,

	31 unibrow::Mapping<Converter, 128>* mapping) {

	32 DisallowHeapAllocation no_gc;

	33 // We try this twice, once with the assumption that the result is no longer

	34 // than the input and, if that assumption breaks, again with the exact

	35 // length. This may not be pretty, but it is nicer than what was here before

	36 // and I hereby claim my vaffel-is.

	37 //

	38 // NOTE: This assumes that the upper/lower case of an ASCII

	39 // character is also ASCII. This is currently the case, but it

	40 // might break in the future if we implement more context and locale

	41 // dependent upper/lower conversions.

	42 bool has_changed_character = false;

	43

	44 // Convert all characters to upper case, assuming that they will fit

	45 // in the buffer

	46 StringCharacterStream stream(string);

	47 unibrow::uchar chars[Converter::kMaxWidth];

	48 // We can assume that the string is not empty

	49 uc32 current = stream.GetNext();

	50 bool ignore_overflow = Converter::kIsToLower \|\| result->IsSeqTwoByteString();

	51 for (int i = 0; i < result_length;) {

	52 bool has_next = stream.HasMore();

	53 uc32 next = has_next ? stream.GetNext() : 0;

	54 int char_length = mapping->get(current, next, chars);

	55 if (char_length == 0) {

	56 // The case conversion of this character is the character itself.

	57 result->Set(i, current);

	58 i++;

	59 } else if (char_length == 1 &&

	60 (ignore_overflow \|\| !ToUpperOverflows(current))) {

	61 // Common case: converting the letter resulted in one character.

	62 DCHECK(static_cast<uc32>(chars[0]) != current);

	63 result->Set(i, chars[0]);

	64 has_changed_character = true;

	65 i++;

	66 } else if (result_length == string->length()) {

	67 bool overflows = ToUpperOverflows(current);

	68 // We've assumed that the result would be as long as the

	69 // input but here is a character that converts to several

	70 // characters. No matter, we calculate the exact length

	71 // of the result and try the whole thing again.

	72 //

	73 // Note that this leaves room for optimization. We could just

	74 // memcpy what we already have to the result string. Also,

	75 // the result string is the last object allocated we could

	76 // "realloc" it and probably, in the vast majority of cases,

	77 // extend the existing string to be able to hold the full

	78 // result.

	79 int next_length = 0;

	80 if (has_next) {

	81 next_length = mapping->get(next, 0, chars);

	82 if (next_length == 0) next_length = 1;

	83 }

	84 int current_length = i + char_length + next_length;

	85 while (stream.HasMore()) {

	86 current = stream.GetNext();

	87 overflows \|= ToUpperOverflows(current);

	88 // NOTE: we use 0 as the next character here because, while

	89 // the next character may affect what a character converts to,

	90 // it does not in any case affect the length of what it convert

	91 // to.

	92 int char_length = mapping->get(current, 0, chars);

	93 if (char_length == 0) char_length = 1;

	94 current_length += char_length;

	95 if (current_length > String::kMaxLength) {

	96 AllowHeapAllocation allocate_error_and_return;

	97 THROW_NEW_ERROR_RETURN_FAILURE(isolate,

	98 NewInvalidStringLengthError());

	99 }

	100 }

	101 // Try again with the real length. Return signed if we need

	102 // to allocate a two-byte string for to uppercase.

	103 return (overflows && !ignore_overflow) ? Smi::FromInt(-current_length)

	104 : Smi::FromInt(current_length);

	105 } else {

	106 for (int j = 0; j < char_length; j++) {

	107 result->Set(i, chars[j]);

	108 i++;

	109 }

	110 has_changed_character = true;

	111 }

	112 current = next;

	113 }

	114 if (has_changed_character) {

	115 return result;

	116 } else {

	117 // If we didn't actually change anything in doing the conversion

	118 // we simple return the result and let the converted string

	119 // become garbage; there is no reason to keep two identical strings

	120 // alive.

	121 return string;

	122 }

	123 }

	124

	125 template <class Converter>

	126 MUST_USE_RESULT static Object* ConvertCase(
	Dan Ehrenberg 2017/03/14 16:19:56 It looks like this case conversion code was copied It looks like this case conversion code was copied from src/builtins/builtins-string.cc . Instead, we should be using the case conversion code in src/runtime/runtime-i18n.cc . This ConvertCase function has the same name, but it does something different--calling out to ICU rather than our own, custom case conversion ("unibrow").
	127 Handle<String> s, Isolate* isolate,

	128 unibrow::Mapping<Converter, 128>* mapping) {

	129 s = String::Flatten(s);

	130 int length = s->length();

	131 // Assume that the string is not empty; we need this assumption later

	132 if (length == 0) return *s;

	133

	134 // Simpler handling of ASCII strings.

	135 //

	136 // NOTE: This assumes that the upper/lower case of an ASCII

	137 // character is also ASCII. This is currently the case, but it

	138 // might break in the future if we implement more context and locale

	139 // dependent upper/lower conversions.

	140 if (s->IsOneByteRepresentationUnderneath()) {

	141 // Same length as input.

	142 Handle<SeqOneByteString> result =

	143 isolate->factory()->NewRawOneByteString(length).ToHandleChecked();

	144 DisallowHeapAllocation no_gc;

	145 String::FlatContent flat_content = s->GetFlatContent();

	146 DCHECK(flat_content.IsFlat());

	147 bool has_changed_character = false;

	148 int index_to_first_unprocessed = FastAsciiConvert<Converter::kIsToLower>(

	149 reinterpret_cast<char*>(result->GetChars()),

	150 reinterpret_cast<const char*>(flat_content.ToOneByteVector().start()),

	151 length, &has_changed_character);

	152 // If not ASCII, we discard the result and take the 2 byte path.

	153 if (index_to_first_unprocessed == length)

	154 return has_changed_character ? result : s;

	155 }

	156

	157 Handle<SeqString> result; // Same length as input.

	158 if (s->IsOneByteRepresentation()) {

	159 result = isolate->factory()->NewRawOneByteString(length).ToHandleChecked();

	160 } else {

	161 result = isolate->factory()->NewRawTwoByteString(length).ToHandleChecked();

	162 }

	163

	164 Object* answer = ConvertCaseHelper(isolate, s, result, length, mapping);

	165 if (answer->IsException(isolate) \|\| answer->IsString()) return answer;

	166

	167 DCHECK(answer->IsSmi());

	168 length = Smi::cast(answer)->value();

	169 if (s->IsOneByteRepresentation() && length > 0) {

	170 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(

	171 isolate, result, isolate->factory()->NewRawOneByteString(length));

	172 } else {

	173 if (length < 0) length = -length;

	174 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(

	175 isolate, result, isolate->factory()->NewRawTwoByteString(length));

	176 }

	177 return ConvertCaseHelper(isolate, s, result, length, mapping);

	178 }

	179

	180 } // namespace

	181

	182 BUILTIN(StringPrototypeToLowerCaseI18N) {

	183 HandleScope scope(isolate);

	184 TO_THIS_STRING(string, "String.prototype.toLowerCase");

	185 return ConvertCase(string, isolate,

	186 isolate->runtime_state()->to_lower_mapping());

	187 }

	188

	189 BUILTIN(StringPrototypeToUpperCaseI18N) {

	190 HandleScope scope(isolate);

	191 TO_THIS_STRING(string, "String.prototype.toUpperCase");

	192 return ConvertCase(string, isolate,

	193 isolate->runtime_state()->to_upper_mapping());

	194 }

	195

	196 } // namespace internal

	197 } // namespace v8

OLD	NEW

« no previous file with comments | « src/builtins/builtins.h ('k') | src/v8.gyp » ('j') | no next file with comments »