OLD | NEW |
---|---|
1 // Copyright 2013 the V8 project authors. All rights reserved. | 1 // Copyright 2013 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef V8_INTL_SUPPORT | 5 #ifndef V8_INTL_SUPPORT |
6 #error Internationalization is expected to be enabled. | 6 #error Internationalization is expected to be enabled. |
7 #endif // V8_INTL_SUPPORT | 7 #endif // V8_INTL_SUPPORT |
8 | 8 |
9 #include "src/intl.h" | 9 #include "src/intl.h" |
10 | 10 |
(...skipping 123 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
134 uint16_t ch = s->Get(index); | 134 uint16_t ch = s->Get(index); |
135 if (V8_UNLIKELY(IsASCIIUpper(ch) || ch & ~0x7F)) { | 135 if (V8_UNLIKELY(IsASCIIUpper(ch) || ch & ~0x7F)) { |
136 return index; | 136 return index; |
137 } | 137 } |
138 } | 138 } |
139 return length; | 139 return length; |
140 } | 140 } |
141 | 141 |
142 } // namespace | 142 } // namespace |
143 | 143 |
144 const uint8_t* ToLatin1LowerTable() { return &kToLower[0]; } | |
145 | |
144 const UChar* GetUCharBufferFromFlat(const String::FlatContent& flat, | 146 const UChar* GetUCharBufferFromFlat(const String::FlatContent& flat, |
145 std::unique_ptr<uc16[]>* dest, | 147 std::unique_ptr<uc16[]>* dest, |
146 int32_t length) { | 148 int32_t length) { |
147 DCHECK(flat.IsFlat()); | 149 DCHECK(flat.IsFlat()); |
148 if (flat.IsOneByte()) { | 150 if (flat.IsOneByte()) { |
149 if (!*dest) { | 151 if (!*dest) { |
150 dest->reset(NewArray<uc16>(length)); | 152 dest->reset(NewArray<uc16>(length)); |
151 CopyChars(dest->get(), flat.ToOneByteVector().start(), length); | 153 CopyChars(dest->get(), flat.ToOneByteVector().start(), length); |
152 } | 154 } |
153 return reinterpret_cast<const UChar*>(dest->get()); | 155 return reinterpret_cast<const UChar*>(dest->get()); |
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
194 return *result; | 196 return *result; |
195 } | 197 } |
196 if (U_SUCCESS(status)) { | 198 if (U_SUCCESS(status)) { |
197 DCHECK(dest_length < result->length()); | 199 DCHECK(dest_length < result->length()); |
198 return *Handle<SeqTwoByteString>::cast( | 200 return *Handle<SeqTwoByteString>::cast( |
199 SeqString::Truncate(result, dest_length)); | 201 SeqString::Truncate(result, dest_length)); |
200 } | 202 } |
201 return *s; | 203 return *s; |
202 } | 204 } |
203 | 205 |
206 // A stripped-down version of ConvertToLower that can only handle flat one-byte | |
207 // strings and does not allocate. | |
208 // Called from TF builtins. | |
209 MUST_USE_RESULT Object* ConvertOneByteToLower(String* src, String* dst, | |
210 Isolate* isolate) { | |
211 DCHECK_EQ(src->length(), dst->length()); | |
212 DCHECK(src->IsOneByteRepresentation()); | |
213 DCHECK(src->IsFlat()); | |
214 DCHECK(dst->IsSeqOneByteString()); | |
215 | |
216 DisallowHeapAllocation no_gc; | |
217 | |
218 const int length = src->length(); | |
219 | |
220 const uint8_t* src_data = src->GetFlatContent().ToOneByteVector().start(); | |
221 uint8_t* dst_data = SeqOneByteString::cast(dst)->GetChars(); | |
222 | |
223 bool has_changed_character = false; | |
224 int index_to_first_unprocessed = FastAsciiConvert<true>( | |
225 reinterpret_cast<char*>(dst_data), | |
226 reinterpret_cast<const char*>(src_data), length, &has_changed_character); | |
227 | |
228 // If not ASCII, we keep the result up to index_to_first_unprocessed and | |
229 // process the rest. | |
Camillo Bruni
2017/05/05 11:44:26
nit: this comment makes probably more sense before
jgruber
2017/05/05 15:26:58
Yep, thought so as well. Done.
| |
230 if (index_to_first_unprocessed == length) { | |
231 return has_changed_character ? dst : src; | |
232 } | |
233 | |
234 for (int index = index_to_first_unprocessed; index < length; ++index) { | |
235 dst_data[index] = ToLatin1Lower(static_cast<uint16_t>(src_data[index])); | |
236 } | |
237 | |
238 return dst; | |
239 } | |
240 | |
204 MUST_USE_RESULT Object* ConvertToLower(Handle<String> s, Isolate* isolate) { | 241 MUST_USE_RESULT Object* ConvertToLower(Handle<String> s, Isolate* isolate) { |
205 if (!s->HasOnlyOneByteChars()) { | 242 if (!s->HasOnlyOneByteChars()) { |
206 // Use a slower implementation for strings with characters beyond U+00FF. | 243 // Use a slower implementation for strings with characters beyond U+00FF. |
207 return LocaleConvertCase(s, isolate, false, ""); | 244 return LocaleConvertCase(s, isolate, false, ""); |
208 } | 245 } |
209 | 246 |
210 int length = s->length(); | 247 int length = s->length(); |
211 | 248 |
212 // We depend here on the invariant that the length of a Latin1 | 249 // We depend here on the invariant that the length of a Latin1 |
213 // string is invariant under ToLowerCase, and the result always | 250 // string is invariant under ToLowerCase, and the result always |
214 // fits in the Latin1 range in the *root locale*. It does not hold | 251 // fits in the Latin1 range in the *root locale*. It does not hold |
215 // for ToUpperCase even in the root locale. | 252 // for ToUpperCase even in the root locale. |
216 | 253 |
217 // Scan the string for uppercase and non-ASCII characters for strings | 254 // Scan the string for uppercase and non-ASCII characters for strings |
218 // shorter than a machine-word without any memory allocation overhead. | 255 // shorter than a machine-word without any memory allocation overhead. |
219 // TODO(jshin): Apply this to a longer input by breaking FastAsciiConvert() | 256 // TODO(jshin): Apply this to a longer input by breaking FastAsciiConvert() |
220 // to two parts, one for scanning the prefix with no change and the other for | 257 // to two parts, one for scanning the prefix with no change and the other for |
221 // handling ASCII-only characters. | 258 // handling ASCII-only characters. |
222 int index_to_first_unprocessed = length; | 259 int index_to_first_unprocessed = length; |
223 const bool is_short = length < static_cast<int>(sizeof(uintptr_t)); | 260 const bool is_short = length < static_cast<int>(sizeof(uintptr_t)); |
224 if (is_short) { | 261 if (is_short) { |
225 index_to_first_unprocessed = FindFirstUpperOrNonAscii(s, length); | 262 index_to_first_unprocessed = FindFirstUpperOrNonAscii(s, length); |
226 // Nothing to do if the string is all ASCII with no uppercase. | 263 // Nothing to do if the string is all ASCII with no uppercase. |
227 if (index_to_first_unprocessed == length) return *s; | 264 if (index_to_first_unprocessed == length) return *s; |
228 } | 265 } |
229 | 266 |
230 Handle<SeqOneByteString> result = | 267 Handle<SeqOneByteString> result = |
231 isolate->factory()->NewRawOneByteString(length).ToHandleChecked(); | 268 isolate->factory()->NewRawOneByteString(length).ToHandleChecked(); |
232 | 269 |
270 if (s->IsOneByteRepresentation()) { | |
271 return ConvertOneByteToLower(*s, *result, isolate); | |
272 } | |
273 | |
233 DisallowHeapAllocation no_gc; | 274 DisallowHeapAllocation no_gc; |
234 DCHECK(s->IsFlat()); | 275 DCHECK(s->IsFlat()); |
276 DCHECK(s->IsTwoByteRepresentation()); | |
235 String::FlatContent flat = s->GetFlatContent(); | 277 String::FlatContent flat = s->GetFlatContent(); |
278 DCHECK(flat.IsTwoByte()); | |
279 | |
236 uint8_t* dest = result->GetChars(); | 280 uint8_t* dest = result->GetChars(); |
237 if (flat.IsOneByte()) { | 281 if (index_to_first_unprocessed == length) { |
238 const uint8_t* src = flat.ToOneByteVector().start(); | 282 DCHECK(!is_short); |
239 bool has_changed_character = false; | 283 index_to_first_unprocessed = FindFirstUpperOrNonAscii(s, length); |
240 index_to_first_unprocessed = FastAsciiConvert<true>( | 284 } |
241 reinterpret_cast<char*>(dest), reinterpret_cast<const char*>(src), | 285 // Nothing to do if the string is all ASCII with no uppercase. |
242 length, &has_changed_character); | 286 if (index_to_first_unprocessed == length) return *s; |
243 // If not ASCII, we keep the result up to index_to_first_unprocessed and | 287 const uint16_t* src = flat.ToUC16Vector().start(); |
244 // process the rest. | 288 CopyChars(dest, src, index_to_first_unprocessed); |
245 if (index_to_first_unprocessed == length) | 289 for (int index = index_to_first_unprocessed; index < length; ++index) { |
246 return has_changed_character ? *result : *s; | 290 dest[index] = ToLatin1Lower(static_cast<uint16_t>(src[index])); |
247 | |
248 for (int index = index_to_first_unprocessed; index < length; ++index) { | |
249 dest[index] = ToLatin1Lower(static_cast<uint16_t>(src[index])); | |
250 } | |
251 } else { | |
252 if (index_to_first_unprocessed == length) { | |
253 DCHECK(!is_short); | |
254 index_to_first_unprocessed = FindFirstUpperOrNonAscii(s, length); | |
255 } | |
256 // Nothing to do if the string is all ASCII with no uppercase. | |
257 if (index_to_first_unprocessed == length) return *s; | |
258 const uint16_t* src = flat.ToUC16Vector().start(); | |
259 CopyChars(dest, src, index_to_first_unprocessed); | |
260 for (int index = index_to_first_unprocessed; index < length; ++index) { | |
261 dest[index] = ToLatin1Lower(static_cast<uint16_t>(src[index])); | |
262 } | |
263 } | 291 } |
264 | 292 |
265 return *result; | 293 return *result; |
266 } | 294 } |
267 | 295 |
268 MUST_USE_RESULT Object* ConvertToUpper(Handle<String> s, Isolate* isolate) { | 296 MUST_USE_RESULT Object* ConvertToUpper(Handle<String> s, Isolate* isolate) { |
269 int32_t length = s->length(); | 297 int32_t length = s->length(); |
270 if (s->HasOnlyOneByteChars() && length > 0) { | 298 if (s->HasOnlyOneByteChars() && length > 0) { |
271 Handle<SeqOneByteString> result = | 299 Handle<SeqOneByteString> result = |
272 isolate->factory()->NewRawOneByteString(length).ToHandleChecked(); | 300 isolate->factory()->NewRawOneByteString(length).ToHandleChecked(); |
(...skipping 106 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
379 | 407 |
380 void ICUTimezoneCache::Clear() { | 408 void ICUTimezoneCache::Clear() { |
381 delete timezone_; | 409 delete timezone_; |
382 timezone_ = nullptr; | 410 timezone_ = nullptr; |
383 timezone_name_[0] = '\0'; | 411 timezone_name_[0] = '\0'; |
384 dst_timezone_name_[0] = '\0'; | 412 dst_timezone_name_[0] = '\0'; |
385 } | 413 } |
386 | 414 |
387 } // namespace internal | 415 } // namespace internal |
388 } // namespace v8 | 416 } // namespace v8 |
OLD | NEW |