Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(9)

Side by Side Diff: src/intl.cc

Issue 2859203002: [string] Move String.p.toLowerCase to CSA (Closed)
Patch Set: Address comments Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2013 the V8 project authors. All rights reserved. 1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef V8_INTL_SUPPORT 5 #ifndef V8_INTL_SUPPORT
6 #error Internationalization is expected to be enabled. 6 #error Internationalization is expected to be enabled.
7 #endif // V8_INTL_SUPPORT 7 #endif // V8_INTL_SUPPORT
8 8
9 #include "src/intl.h" 9 #include "src/intl.h"
10 10
(...skipping 123 matching lines...) Expand 10 before | Expand all | Expand 10 after
134 uint16_t ch = s->Get(index); 134 uint16_t ch = s->Get(index);
135 if (V8_UNLIKELY(IsASCIIUpper(ch) || ch & ~0x7F)) { 135 if (V8_UNLIKELY(IsASCIIUpper(ch) || ch & ~0x7F)) {
136 return index; 136 return index;
137 } 137 }
138 } 138 }
139 return length; 139 return length;
140 } 140 }
141 141
142 } // namespace 142 } // namespace
143 143
144 const uint8_t* ToLatin1LowerTable() { return &kToLower[0]; }
145
144 const UChar* GetUCharBufferFromFlat(const String::FlatContent& flat, 146 const UChar* GetUCharBufferFromFlat(const String::FlatContent& flat,
145 std::unique_ptr<uc16[]>* dest, 147 std::unique_ptr<uc16[]>* dest,
146 int32_t length) { 148 int32_t length) {
147 DCHECK(flat.IsFlat()); 149 DCHECK(flat.IsFlat());
148 if (flat.IsOneByte()) { 150 if (flat.IsOneByte()) {
149 if (!*dest) { 151 if (!*dest) {
150 dest->reset(NewArray<uc16>(length)); 152 dest->reset(NewArray<uc16>(length));
151 CopyChars(dest->get(), flat.ToOneByteVector().start(), length); 153 CopyChars(dest->get(), flat.ToOneByteVector().start(), length);
152 } 154 }
153 return reinterpret_cast<const UChar*>(dest->get()); 155 return reinterpret_cast<const UChar*>(dest->get());
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after
194 return *result; 196 return *result;
195 } 197 }
196 if (U_SUCCESS(status)) { 198 if (U_SUCCESS(status)) {
197 DCHECK(dest_length < result->length()); 199 DCHECK(dest_length < result->length());
198 return *Handle<SeqTwoByteString>::cast( 200 return *Handle<SeqTwoByteString>::cast(
199 SeqString::Truncate(result, dest_length)); 201 SeqString::Truncate(result, dest_length));
200 } 202 }
201 return *s; 203 return *s;
202 } 204 }
203 205
206 // A stripped-down version of ConvertToLower that can only handle flat one-byte
207 // strings and does not allocate.
208 // Called from TF builtins.
209 MUST_USE_RESULT Object* ConvertOneByteToLower(String* src, String* dst,
210 Isolate* isolate) {
211 DCHECK_EQ(src->length(), dst->length());
212 DCHECK(src->IsOneByteRepresentation());
213 DCHECK(src->IsFlat());
214 DCHECK(dst->IsSeqOneByteString());
215
216 DisallowHeapAllocation no_gc;
217
218 const int length = src->length();
219
220 const uint8_t* src_data = src->GetFlatContent().ToOneByteVector().start();
221 uint8_t* dst_data = SeqOneByteString::cast(dst)->GetChars();
222
223 bool has_changed_character = false;
224 int index_to_first_unprocessed = FastAsciiConvert<true>(
225 reinterpret_cast<char*>(dst_data),
226 reinterpret_cast<const char*>(src_data), length, &has_changed_character);
227
228 // If not ASCII, we keep the result up to index_to_first_unprocessed and
229 // process the rest.
Camillo Bruni 2017/05/05 11:44:26 nit: this comment makes probably more sense before
jgruber 2017/05/05 15:26:58 Yep, thought so as well. Done.
230 if (index_to_first_unprocessed == length) {
231 return has_changed_character ? dst : src;
232 }
233
234 for (int index = index_to_first_unprocessed; index < length; ++index) {
235 dst_data[index] = ToLatin1Lower(static_cast<uint16_t>(src_data[index]));
236 }
237
238 return dst;
239 }
240
204 MUST_USE_RESULT Object* ConvertToLower(Handle<String> s, Isolate* isolate) { 241 MUST_USE_RESULT Object* ConvertToLower(Handle<String> s, Isolate* isolate) {
205 if (!s->HasOnlyOneByteChars()) { 242 if (!s->HasOnlyOneByteChars()) {
206 // Use a slower implementation for strings with characters beyond U+00FF. 243 // Use a slower implementation for strings with characters beyond U+00FF.
207 return LocaleConvertCase(s, isolate, false, ""); 244 return LocaleConvertCase(s, isolate, false, "");
208 } 245 }
209 246
210 int length = s->length(); 247 int length = s->length();
211 248
212 // We depend here on the invariant that the length of a Latin1 249 // We depend here on the invariant that the length of a Latin1
213 // string is invariant under ToLowerCase, and the result always 250 // string is invariant under ToLowerCase, and the result always
214 // fits in the Latin1 range in the *root locale*. It does not hold 251 // fits in the Latin1 range in the *root locale*. It does not hold
215 // for ToUpperCase even in the root locale. 252 // for ToUpperCase even in the root locale.
216 253
217 // Scan the string for uppercase and non-ASCII characters for strings 254 // Scan the string for uppercase and non-ASCII characters for strings
218 // shorter than a machine-word without any memory allocation overhead. 255 // shorter than a machine-word without any memory allocation overhead.
219 // TODO(jshin): Apply this to a longer input by breaking FastAsciiConvert() 256 // TODO(jshin): Apply this to a longer input by breaking FastAsciiConvert()
220 // to two parts, one for scanning the prefix with no change and the other for 257 // to two parts, one for scanning the prefix with no change and the other for
221 // handling ASCII-only characters. 258 // handling ASCII-only characters.
222 int index_to_first_unprocessed = length; 259 int index_to_first_unprocessed = length;
223 const bool is_short = length < static_cast<int>(sizeof(uintptr_t)); 260 const bool is_short = length < static_cast<int>(sizeof(uintptr_t));
224 if (is_short) { 261 if (is_short) {
225 index_to_first_unprocessed = FindFirstUpperOrNonAscii(s, length); 262 index_to_first_unprocessed = FindFirstUpperOrNonAscii(s, length);
226 // Nothing to do if the string is all ASCII with no uppercase. 263 // Nothing to do if the string is all ASCII with no uppercase.
227 if (index_to_first_unprocessed == length) return *s; 264 if (index_to_first_unprocessed == length) return *s;
228 } 265 }
229 266
230 Handle<SeqOneByteString> result = 267 Handle<SeqOneByteString> result =
231 isolate->factory()->NewRawOneByteString(length).ToHandleChecked(); 268 isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
232 269
270 if (s->IsOneByteRepresentation()) {
271 return ConvertOneByteToLower(*s, *result, isolate);
272 }
273
233 DisallowHeapAllocation no_gc; 274 DisallowHeapAllocation no_gc;
234 DCHECK(s->IsFlat()); 275 DCHECK(s->IsFlat());
276 DCHECK(s->IsTwoByteRepresentation());
235 String::FlatContent flat = s->GetFlatContent(); 277 String::FlatContent flat = s->GetFlatContent();
278 DCHECK(flat.IsTwoByte());
279
236 uint8_t* dest = result->GetChars(); 280 uint8_t* dest = result->GetChars();
237 if (flat.IsOneByte()) { 281 if (index_to_first_unprocessed == length) {
238 const uint8_t* src = flat.ToOneByteVector().start(); 282 DCHECK(!is_short);
239 bool has_changed_character = false; 283 index_to_first_unprocessed = FindFirstUpperOrNonAscii(s, length);
240 index_to_first_unprocessed = FastAsciiConvert<true>( 284 }
241 reinterpret_cast<char*>(dest), reinterpret_cast<const char*>(src), 285 // Nothing to do if the string is all ASCII with no uppercase.
242 length, &has_changed_character); 286 if (index_to_first_unprocessed == length) return *s;
243 // If not ASCII, we keep the result up to index_to_first_unprocessed and 287 const uint16_t* src = flat.ToUC16Vector().start();
244 // process the rest. 288 CopyChars(dest, src, index_to_first_unprocessed);
245 if (index_to_first_unprocessed == length) 289 for (int index = index_to_first_unprocessed; index < length; ++index) {
246 return has_changed_character ? *result : *s; 290 dest[index] = ToLatin1Lower(static_cast<uint16_t>(src[index]));
247
248 for (int index = index_to_first_unprocessed; index < length; ++index) {
249 dest[index] = ToLatin1Lower(static_cast<uint16_t>(src[index]));
250 }
251 } else {
252 if (index_to_first_unprocessed == length) {
253 DCHECK(!is_short);
254 index_to_first_unprocessed = FindFirstUpperOrNonAscii(s, length);
255 }
256 // Nothing to do if the string is all ASCII with no uppercase.
257 if (index_to_first_unprocessed == length) return *s;
258 const uint16_t* src = flat.ToUC16Vector().start();
259 CopyChars(dest, src, index_to_first_unprocessed);
260 for (int index = index_to_first_unprocessed; index < length; ++index) {
261 dest[index] = ToLatin1Lower(static_cast<uint16_t>(src[index]));
262 }
263 } 291 }
264 292
265 return *result; 293 return *result;
266 } 294 }
267 295
268 MUST_USE_RESULT Object* ConvertToUpper(Handle<String> s, Isolate* isolate) { 296 MUST_USE_RESULT Object* ConvertToUpper(Handle<String> s, Isolate* isolate) {
269 int32_t length = s->length(); 297 int32_t length = s->length();
270 if (s->HasOnlyOneByteChars() && length > 0) { 298 if (s->HasOnlyOneByteChars() && length > 0) {
271 Handle<SeqOneByteString> result = 299 Handle<SeqOneByteString> result =
272 isolate->factory()->NewRawOneByteString(length).ToHandleChecked(); 300 isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
(...skipping 106 matching lines...) Expand 10 before | Expand all | Expand 10 after
379 407
380 void ICUTimezoneCache::Clear() { 408 void ICUTimezoneCache::Clear() {
381 delete timezone_; 409 delete timezone_;
382 timezone_ = nullptr; 410 timezone_ = nullptr;
383 timezone_name_[0] = '\0'; 411 timezone_name_[0] = '\0';
384 dst_timezone_name_[0] = '\0'; 412 dst_timezone_name_[0] = '\0';
385 } 413 }
386 414
387 } // namespace internal 415 } // namespace internal
388 } // namespace v8 416 } // namespace v8
OLDNEW
« src/builtins/builtins-intl-gen.cc ('K') | « src/intl.h ('k') | src/v8.gyp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698