Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(160)

Side by Side Diff: src/builtins/builtins-intl.cc

Issue 2728763006: Migrate some case conversion functions from JS to CPP builtins (Closed)
Patch Set: this also doesn't work Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/builtins/builtins.h ('k') | src/v8.gyp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright 2017 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "src/builtins/builtins-regexp.h"
6 #include "src/builtins/builtins-utils.h"
7 #include "src/builtins/builtins.h"
8 #include "src/code-factory.h"
9 #include "src/code-stub-assembler.h"
10 #include "src/regexp/regexp-utils.h"
11 #include "src/string-case.h"
12 #include "src/unicode-inl.h"
13 #include "src/unicode.h"
14
15 namespace v8 {
16 namespace internal {
17
18 namespace {
19
20 inline bool ToUpperOverflows(uc32 character) {
21 // y with umlauts and the micro sign are the only characters that stop
22 // fitting into one-byte when converting to uppercase.
23 static const uc32 yuml_code = 0xff;
24 static const uc32 micro_code = 0xb5;
25 return (character == yuml_code || character == micro_code);
26 }
27
28 template <class Converter>
29 MUST_USE_RESULT static Object* ConvertCaseHelper(
30 Isolate* isolate, String* string, SeqString* result, int result_length,
31 unibrow::Mapping<Converter, 128>* mapping) {
32 DisallowHeapAllocation no_gc;
33 // We try this twice, once with the assumption that the result is no longer
34 // than the input and, if that assumption breaks, again with the exact
35 // length. This may not be pretty, but it is nicer than what was here before
36 // and I hereby claim my vaffel-is.
37 //
38 // NOTE: This assumes that the upper/lower case of an ASCII
39 // character is also ASCII. This is currently the case, but it
40 // might break in the future if we implement more context and locale
41 // dependent upper/lower conversions.
42 bool has_changed_character = false;
43
44 // Convert all characters to upper case, assuming that they will fit
45 // in the buffer
46 StringCharacterStream stream(string);
47 unibrow::uchar chars[Converter::kMaxWidth];
48 // We can assume that the string is not empty
49 uc32 current = stream.GetNext();
50 bool ignore_overflow = Converter::kIsToLower || result->IsSeqTwoByteString();
51 for (int i = 0; i < result_length;) {
52 bool has_next = stream.HasMore();
53 uc32 next = has_next ? stream.GetNext() : 0;
54 int char_length = mapping->get(current, next, chars);
55 if (char_length == 0) {
56 // The case conversion of this character is the character itself.
57 result->Set(i, current);
58 i++;
59 } else if (char_length == 1 &&
60 (ignore_overflow || !ToUpperOverflows(current))) {
61 // Common case: converting the letter resulted in one character.
62 DCHECK(static_cast<uc32>(chars[0]) != current);
63 result->Set(i, chars[0]);
64 has_changed_character = true;
65 i++;
66 } else if (result_length == string->length()) {
67 bool overflows = ToUpperOverflows(current);
68 // We've assumed that the result would be as long as the
69 // input but here is a character that converts to several
70 // characters. No matter, we calculate the exact length
71 // of the result and try the whole thing again.
72 //
73 // Note that this leaves room for optimization. We could just
74 // memcpy what we already have to the result string. Also,
75 // the result string is the last object allocated we could
76 // "realloc" it and probably, in the vast majority of cases,
77 // extend the existing string to be able to hold the full
78 // result.
79 int next_length = 0;
80 if (has_next) {
81 next_length = mapping->get(next, 0, chars);
82 if (next_length == 0) next_length = 1;
83 }
84 int current_length = i + char_length + next_length;
85 while (stream.HasMore()) {
86 current = stream.GetNext();
87 overflows |= ToUpperOverflows(current);
88 // NOTE: we use 0 as the next character here because, while
89 // the next character may affect what a character converts to,
90 // it does not in any case affect the length of what it convert
91 // to.
92 int char_length = mapping->get(current, 0, chars);
93 if (char_length == 0) char_length = 1;
94 current_length += char_length;
95 if (current_length > String::kMaxLength) {
96 AllowHeapAllocation allocate_error_and_return;
97 THROW_NEW_ERROR_RETURN_FAILURE(isolate,
98 NewInvalidStringLengthError());
99 }
100 }
101 // Try again with the real length. Return signed if we need
102 // to allocate a two-byte string for to uppercase.
103 return (overflows && !ignore_overflow) ? Smi::FromInt(-current_length)
104 : Smi::FromInt(current_length);
105 } else {
106 for (int j = 0; j < char_length; j++) {
107 result->Set(i, chars[j]);
108 i++;
109 }
110 has_changed_character = true;
111 }
112 current = next;
113 }
114 if (has_changed_character) {
115 return result;
116 } else {
117 // If we didn't actually change anything in doing the conversion
118 // we simple return the result and let the converted string
119 // become garbage; there is no reason to keep two identical strings
120 // alive.
121 return string;
122 }
123 }
124
125 template <class Converter>
126 MUST_USE_RESULT static Object* ConvertCase(
Dan Ehrenberg 2017/03/14 16:19:56 It looks like this case conversion code was copied
127 Handle<String> s, Isolate* isolate,
128 unibrow::Mapping<Converter, 128>* mapping) {
129 s = String::Flatten(s);
130 int length = s->length();
131 // Assume that the string is not empty; we need this assumption later
132 if (length == 0) return *s;
133
134 // Simpler handling of ASCII strings.
135 //
136 // NOTE: This assumes that the upper/lower case of an ASCII
137 // character is also ASCII. This is currently the case, but it
138 // might break in the future if we implement more context and locale
139 // dependent upper/lower conversions.
140 if (s->IsOneByteRepresentationUnderneath()) {
141 // Same length as input.
142 Handle<SeqOneByteString> result =
143 isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
144 DisallowHeapAllocation no_gc;
145 String::FlatContent flat_content = s->GetFlatContent();
146 DCHECK(flat_content.IsFlat());
147 bool has_changed_character = false;
148 int index_to_first_unprocessed = FastAsciiConvert<Converter::kIsToLower>(
149 reinterpret_cast<char*>(result->GetChars()),
150 reinterpret_cast<const char*>(flat_content.ToOneByteVector().start()),
151 length, &has_changed_character);
152 // If not ASCII, we discard the result and take the 2 byte path.
153 if (index_to_first_unprocessed == length)
154 return has_changed_character ? *result : *s;
155 }
156
157 Handle<SeqString> result; // Same length as input.
158 if (s->IsOneByteRepresentation()) {
159 result = isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
160 } else {
161 result = isolate->factory()->NewRawTwoByteString(length).ToHandleChecked();
162 }
163
164 Object* answer = ConvertCaseHelper(isolate, *s, *result, length, mapping);
165 if (answer->IsException(isolate) || answer->IsString()) return answer;
166
167 DCHECK(answer->IsSmi());
168 length = Smi::cast(answer)->value();
169 if (s->IsOneByteRepresentation() && length > 0) {
170 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
171 isolate, result, isolate->factory()->NewRawOneByteString(length));
172 } else {
173 if (length < 0) length = -length;
174 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
175 isolate, result, isolate->factory()->NewRawTwoByteString(length));
176 }
177 return ConvertCaseHelper(isolate, *s, *result, length, mapping);
178 }
179
180 } // namespace
181
182 BUILTIN(StringPrototypeToLowerCaseI18N) {
183 HandleScope scope(isolate);
184 TO_THIS_STRING(string, "String.prototype.toLowerCase");
185 return ConvertCase(string, isolate,
186 isolate->runtime_state()->to_lower_mapping());
187 }
188
189 BUILTIN(StringPrototypeToUpperCaseI18N) {
190 HandleScope scope(isolate);
191 TO_THIS_STRING(string, "String.prototype.toUpperCase");
192 return ConvertCase(string, isolate,
193 isolate->runtime_state()->to_upper_mapping());
194 }
195
196 } // namespace internal
197 } // namespace v8
OLDNEW
« no previous file with comments | « src/builtins/builtins.h ('k') | src/v8.gyp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698