Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2013 the V8 project authors. All rights reserved. | 1 // Copyright 2013 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef V8_INTL_SUPPORT | 5 #ifndef V8_INTL_SUPPORT |
| 6 #error Internationalization is expected to be enabled. | 6 #error Internationalization is expected to be enabled. |
| 7 #endif // V8_INTL_SUPPORT | 7 #endif // V8_INTL_SUPPORT |
| 8 | 8 |
| 9 #include "src/intl.h" | 9 #include "src/intl.h" |
| 10 | 10 |
| (...skipping 123 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 134 uint16_t ch = s->Get(index); | 134 uint16_t ch = s->Get(index); |
| 135 if (V8_UNLIKELY(IsASCIIUpper(ch) || ch & ~0x7F)) { | 135 if (V8_UNLIKELY(IsASCIIUpper(ch) || ch & ~0x7F)) { |
| 136 return index; | 136 return index; |
| 137 } | 137 } |
| 138 } | 138 } |
| 139 return length; | 139 return length; |
| 140 } | 140 } |
| 141 | 141 |
| 142 } // namespace | 142 } // namespace |
| 143 | 143 |
| 144 const uint8_t* ToLatin1LowerTable() { return &kToLower[0]; } | |
| 145 | |
| 144 const UChar* GetUCharBufferFromFlat(const String::FlatContent& flat, | 146 const UChar* GetUCharBufferFromFlat(const String::FlatContent& flat, |
| 145 std::unique_ptr<uc16[]>* dest, | 147 std::unique_ptr<uc16[]>* dest, |
| 146 int32_t length) { | 148 int32_t length) { |
| 147 DCHECK(flat.IsFlat()); | 149 DCHECK(flat.IsFlat()); |
| 148 if (flat.IsOneByte()) { | 150 if (flat.IsOneByte()) { |
| 149 if (!*dest) { | 151 if (!*dest) { |
| 150 dest->reset(NewArray<uc16>(length)); | 152 dest->reset(NewArray<uc16>(length)); |
| 151 CopyChars(dest->get(), flat.ToOneByteVector().start(), length); | 153 CopyChars(dest->get(), flat.ToOneByteVector().start(), length); |
| 152 } | 154 } |
| 153 return reinterpret_cast<const UChar*>(dest->get()); | 155 return reinterpret_cast<const UChar*>(dest->get()); |
| (...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 194 return *result; | 196 return *result; |
| 195 } | 197 } |
| 196 if (U_SUCCESS(status)) { | 198 if (U_SUCCESS(status)) { |
| 197 DCHECK(dest_length < result->length()); | 199 DCHECK(dest_length < result->length()); |
| 198 return *Handle<SeqTwoByteString>::cast( | 200 return *Handle<SeqTwoByteString>::cast( |
| 199 SeqString::Truncate(result, dest_length)); | 201 SeqString::Truncate(result, dest_length)); |
| 200 } | 202 } |
| 201 return *s; | 203 return *s; |
| 202 } | 204 } |
| 203 | 205 |
| 206 // A stripped-down version of ConvertToLower that can only handle flat one-byte | |
| 207 // strings and does not allocate. | |
| 208 // Called from TF builtins. | |
| 209 MUST_USE_RESULT Object* ConvertOneByteToLower(String* src, String* dst, | |
| 210 Isolate* isolate) { | |
| 211 DCHECK_EQ(src->length(), dst->length()); | |
| 212 DCHECK(src->IsOneByteRepresentation()); | |
| 213 DCHECK(src->IsFlat()); | |
| 214 DCHECK(dst->IsSeqOneByteString()); | |
| 215 | |
| 216 DisallowHeapAllocation no_gc; | |
| 217 | |
| 218 const int length = src->length(); | |
| 219 | |
| 220 const uint8_t* src_data = src->GetFlatContent().ToOneByteVector().start(); | |
| 221 uint8_t* dst_data = SeqOneByteString::cast(dst)->GetChars(); | |
| 222 | |
| 223 bool has_changed_character = false; | |
| 224 int index_to_first_unprocessed = FastAsciiConvert<true>( | |
| 225 reinterpret_cast<char*>(dst_data), | |
| 226 reinterpret_cast<const char*>(src_data), length, &has_changed_character); | |
| 227 | |
| 228 // If not ASCII, we keep the result up to index_to_first_unprocessed and | |
| 229 // process the rest. | |
|
Camillo Bruni
2017/05/05 11:44:26
nit: this comment makes probably more sense before
jgruber
2017/05/05 15:26:58
Yep, thought so as well. Done.
| |
| 230 if (index_to_first_unprocessed == length) { | |
| 231 return has_changed_character ? dst : src; | |
| 232 } | |
| 233 | |
| 234 for (int index = index_to_first_unprocessed; index < length; ++index) { | |
| 235 dst_data[index] = ToLatin1Lower(static_cast<uint16_t>(src_data[index])); | |
| 236 } | |
| 237 | |
| 238 return dst; | |
| 239 } | |
| 240 | |
| 204 MUST_USE_RESULT Object* ConvertToLower(Handle<String> s, Isolate* isolate) { | 241 MUST_USE_RESULT Object* ConvertToLower(Handle<String> s, Isolate* isolate) { |
| 205 if (!s->HasOnlyOneByteChars()) { | 242 if (!s->HasOnlyOneByteChars()) { |
| 206 // Use a slower implementation for strings with characters beyond U+00FF. | 243 // Use a slower implementation for strings with characters beyond U+00FF. |
| 207 return LocaleConvertCase(s, isolate, false, ""); | 244 return LocaleConvertCase(s, isolate, false, ""); |
| 208 } | 245 } |
| 209 | 246 |
| 210 int length = s->length(); | 247 int length = s->length(); |
| 211 | 248 |
| 212 // We depend here on the invariant that the length of a Latin1 | 249 // We depend here on the invariant that the length of a Latin1 |
| 213 // string is invariant under ToLowerCase, and the result always | 250 // string is invariant under ToLowerCase, and the result always |
| 214 // fits in the Latin1 range in the *root locale*. It does not hold | 251 // fits in the Latin1 range in the *root locale*. It does not hold |
| 215 // for ToUpperCase even in the root locale. | 252 // for ToUpperCase even in the root locale. |
| 216 | 253 |
| 217 // Scan the string for uppercase and non-ASCII characters for strings | 254 // Scan the string for uppercase and non-ASCII characters for strings |
| 218 // shorter than a machine-word without any memory allocation overhead. | 255 // shorter than a machine-word without any memory allocation overhead. |
| 219 // TODO(jshin): Apply this to a longer input by breaking FastAsciiConvert() | 256 // TODO(jshin): Apply this to a longer input by breaking FastAsciiConvert() |
| 220 // to two parts, one for scanning the prefix with no change and the other for | 257 // to two parts, one for scanning the prefix with no change and the other for |
| 221 // handling ASCII-only characters. | 258 // handling ASCII-only characters. |
| 222 int index_to_first_unprocessed = length; | 259 int index_to_first_unprocessed = length; |
| 223 const bool is_short = length < static_cast<int>(sizeof(uintptr_t)); | 260 const bool is_short = length < static_cast<int>(sizeof(uintptr_t)); |
| 224 if (is_short) { | 261 if (is_short) { |
| 225 index_to_first_unprocessed = FindFirstUpperOrNonAscii(s, length); | 262 index_to_first_unprocessed = FindFirstUpperOrNonAscii(s, length); |
| 226 // Nothing to do if the string is all ASCII with no uppercase. | 263 // Nothing to do if the string is all ASCII with no uppercase. |
| 227 if (index_to_first_unprocessed == length) return *s; | 264 if (index_to_first_unprocessed == length) return *s; |
| 228 } | 265 } |
| 229 | 266 |
| 230 Handle<SeqOneByteString> result = | 267 Handle<SeqOneByteString> result = |
| 231 isolate->factory()->NewRawOneByteString(length).ToHandleChecked(); | 268 isolate->factory()->NewRawOneByteString(length).ToHandleChecked(); |
| 232 | 269 |
| 270 if (s->IsOneByteRepresentation()) { | |
| 271 return ConvertOneByteToLower(*s, *result, isolate); | |
| 272 } | |
| 273 | |
| 233 DisallowHeapAllocation no_gc; | 274 DisallowHeapAllocation no_gc; |
| 234 DCHECK(s->IsFlat()); | 275 DCHECK(s->IsFlat()); |
| 276 DCHECK(s->IsTwoByteRepresentation()); | |
| 235 String::FlatContent flat = s->GetFlatContent(); | 277 String::FlatContent flat = s->GetFlatContent(); |
| 278 DCHECK(flat.IsTwoByte()); | |
| 279 | |
| 236 uint8_t* dest = result->GetChars(); | 280 uint8_t* dest = result->GetChars(); |
| 237 if (flat.IsOneByte()) { | 281 if (index_to_first_unprocessed == length) { |
| 238 const uint8_t* src = flat.ToOneByteVector().start(); | 282 DCHECK(!is_short); |
| 239 bool has_changed_character = false; | 283 index_to_first_unprocessed = FindFirstUpperOrNonAscii(s, length); |
| 240 index_to_first_unprocessed = FastAsciiConvert<true>( | 284 } |
| 241 reinterpret_cast<char*>(dest), reinterpret_cast<const char*>(src), | 285 // Nothing to do if the string is all ASCII with no uppercase. |
| 242 length, &has_changed_character); | 286 if (index_to_first_unprocessed == length) return *s; |
| 243 // If not ASCII, we keep the result up to index_to_first_unprocessed and | 287 const uint16_t* src = flat.ToUC16Vector().start(); |
| 244 // process the rest. | 288 CopyChars(dest, src, index_to_first_unprocessed); |
| 245 if (index_to_first_unprocessed == length) | 289 for (int index = index_to_first_unprocessed; index < length; ++index) { |
| 246 return has_changed_character ? *result : *s; | 290 dest[index] = ToLatin1Lower(static_cast<uint16_t>(src[index])); |
| 247 | |
| 248 for (int index = index_to_first_unprocessed; index < length; ++index) { | |
| 249 dest[index] = ToLatin1Lower(static_cast<uint16_t>(src[index])); | |
| 250 } | |
| 251 } else { | |
| 252 if (index_to_first_unprocessed == length) { | |
| 253 DCHECK(!is_short); | |
| 254 index_to_first_unprocessed = FindFirstUpperOrNonAscii(s, length); | |
| 255 } | |
| 256 // Nothing to do if the string is all ASCII with no uppercase. | |
| 257 if (index_to_first_unprocessed == length) return *s; | |
| 258 const uint16_t* src = flat.ToUC16Vector().start(); | |
| 259 CopyChars(dest, src, index_to_first_unprocessed); | |
| 260 for (int index = index_to_first_unprocessed; index < length; ++index) { | |
| 261 dest[index] = ToLatin1Lower(static_cast<uint16_t>(src[index])); | |
| 262 } | |
| 263 } | 291 } |
| 264 | 292 |
| 265 return *result; | 293 return *result; |
| 266 } | 294 } |
| 267 | 295 |
| 268 MUST_USE_RESULT Object* ConvertToUpper(Handle<String> s, Isolate* isolate) { | 296 MUST_USE_RESULT Object* ConvertToUpper(Handle<String> s, Isolate* isolate) { |
| 269 int32_t length = s->length(); | 297 int32_t length = s->length(); |
| 270 if (s->HasOnlyOneByteChars() && length > 0) { | 298 if (s->HasOnlyOneByteChars() && length > 0) { |
| 271 Handle<SeqOneByteString> result = | 299 Handle<SeqOneByteString> result = |
| 272 isolate->factory()->NewRawOneByteString(length).ToHandleChecked(); | 300 isolate->factory()->NewRawOneByteString(length).ToHandleChecked(); |
| (...skipping 106 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 379 | 407 |
| 380 void ICUTimezoneCache::Clear() { | 408 void ICUTimezoneCache::Clear() { |
| 381 delete timezone_; | 409 delete timezone_; |
| 382 timezone_ = nullptr; | 410 timezone_ = nullptr; |
| 383 timezone_name_[0] = '\0'; | 411 timezone_name_[0] = '\0'; |
| 384 dst_timezone_name_[0] = '\0'; | 412 dst_timezone_name_[0] = '\0'; |
| 385 } | 413 } |
| 386 | 414 |
| 387 } // namespace internal | 415 } // namespace internal |
| 388 } // namespace v8 | 416 } // namespace v8 |
| OLD | NEW |