OLD | NEW |
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 6227 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6238 str, ALLOW_TRAILING_JUNK, OS::nan_value()); | 6238 str, ALLOW_TRAILING_JUNK, OS::nan_value()); |
6239 | 6239 |
6240 // Create a number object from the value. | 6240 // Create a number object from the value. |
6241 return isolate->heap()->NumberFromDouble(value); | 6241 return isolate->heap()->NumberFromDouble(value); |
6242 } | 6242 } |
6243 | 6243 |
6244 | 6244 |
6245 template <class Converter> | 6245 template <class Converter> |
6246 MUST_USE_RESULT static MaybeObject* ConvertCaseHelper( | 6246 MUST_USE_RESULT static MaybeObject* ConvertCaseHelper( |
6247 Isolate* isolate, | 6247 Isolate* isolate, |
6248 String* s, | 6248 String* string, |
6249 String::Encoding result_encoding, | 6249 SeqString* result, |
6250 int length, | 6250 int result_length, |
6251 int input_string_length, | |
6252 unibrow::Mapping<Converter, 128>* mapping) { | 6251 unibrow::Mapping<Converter, 128>* mapping) { |
| 6252 DisallowHeapAllocation no_gc; |
6253 // We try this twice, once with the assumption that the result is no longer | 6253 // We try this twice, once with the assumption that the result is no longer |
6254 // than the input and, if that assumption breaks, again with the exact | 6254 // than the input and, if that assumption breaks, again with the exact |
6255 // length. This may not be pretty, but it is nicer than what was here before | 6255 // length. This may not be pretty, but it is nicer than what was here before |
6256 // and I hereby claim my vaffel-is. | 6256 // and I hereby claim my vaffel-is. |
6257 // | 6257 // |
6258 // Allocate the resulting string. | |
6259 // | |
6260 // NOTE: This assumes that the upper/lower case of an ASCII | 6258 // NOTE: This assumes that the upper/lower case of an ASCII |
6261 // character is also ASCII. This is currently the case, but it | 6259 // character is also ASCII. This is currently the case, but it |
6262 // might break in the future if we implement more context and locale | 6260 // might break in the future if we implement more context and locale |
6263 // dependent upper/lower conversions. | 6261 // dependent upper/lower conversions. |
6264 Object* o; | |
6265 { MaybeObject* maybe_o = result_encoding == String::ONE_BYTE_ENCODING | |
6266 ? isolate->heap()->AllocateRawOneByteString(length) | |
6267 : isolate->heap()->AllocateRawTwoByteString(length); | |
6268 if (!maybe_o->ToObject(&o)) return maybe_o; | |
6269 } | |
6270 String* result = String::cast(o); | |
6271 bool has_changed_character = false; | 6262 bool has_changed_character = false; |
6272 | 6263 |
6273 DisallowHeapAllocation no_gc; | |
6274 | |
6275 // Convert all characters to upper case, assuming that they will fit | 6264 // Convert all characters to upper case, assuming that they will fit |
6276 // in the buffer | 6265 // in the buffer |
6277 Access<ConsStringIteratorOp> op( | 6266 Access<ConsStringIteratorOp> op( |
6278 isolate->runtime_state()->string_iterator()); | 6267 isolate->runtime_state()->string_iterator()); |
6279 StringCharacterStream stream(s, op.value()); | 6268 StringCharacterStream stream(string, op.value()); |
6280 unibrow::uchar chars[Converter::kMaxWidth]; | 6269 unibrow::uchar chars[Converter::kMaxWidth]; |
6281 // We can assume that the string is not empty | 6270 // We can assume that the string is not empty |
6282 uc32 current = stream.GetNext(); | 6271 uc32 current = stream.GetNext(); |
6283 // y with umlauts is the only character that stops fitting into one-byte | 6272 // y with umlauts is the only character that stops fitting into one-byte |
6284 // when converting to uppercase. | 6273 // when converting to uppercase. |
6285 static const uc32 yuml_code = 0xff; | 6274 static const uc32 yuml_code = 0xff; |
6286 bool ignore_yuml = result->IsSeqTwoByteString() || Converter::kIsToLower; | 6275 bool ignore_yuml = result->IsSeqTwoByteString() || Converter::kIsToLower; |
6287 for (int i = 0; i < length;) { | 6276 for (int i = 0; i < result_length;) { |
6288 bool has_next = stream.HasMore(); | 6277 bool has_next = stream.HasMore(); |
6289 uc32 next = has_next ? stream.GetNext() : 0; | 6278 uc32 next = has_next ? stream.GetNext() : 0; |
6290 int char_length = mapping->get(current, next, chars); | 6279 int char_length = mapping->get(current, next, chars); |
6291 if (char_length == 0) { | 6280 if (char_length == 0) { |
6292 // The case conversion of this character is the character itself. | 6281 // The case conversion of this character is the character itself. |
6293 result->Set(i, current); | 6282 result->Set(i, current); |
6294 i++; | 6283 i++; |
6295 } else if (char_length == 1 && (ignore_yuml || current != yuml_code)) { | 6284 } else if (char_length == 1 && (ignore_yuml || current != yuml_code)) { |
6296 // Common case: converting the letter resulted in one character. | 6285 // Common case: converting the letter resulted in one character. |
6297 ASSERT(static_cast<uc32>(chars[0]) != current); | 6286 ASSERT(static_cast<uc32>(chars[0]) != current); |
6298 result->Set(i, chars[0]); | 6287 result->Set(i, chars[0]); |
6299 has_changed_character = true; | 6288 has_changed_character = true; |
6300 i++; | 6289 i++; |
6301 } else if (length == input_string_length) { | 6290 } else if (result_length == string->length()) { |
6302 bool found_yuml = (current == yuml_code); | 6291 bool found_yuml = (current == yuml_code); |
6303 // We've assumed that the result would be as long as the | 6292 // We've assumed that the result would be as long as the |
6304 // input but here is a character that converts to several | 6293 // input but here is a character that converts to several |
6305 // characters. No matter, we calculate the exact length | 6294 // characters. No matter, we calculate the exact length |
6306 // of the result and try the whole thing again. | 6295 // of the result and try the whole thing again. |
6307 // | 6296 // |
6308 // Note that this leaves room for optimization. We could just | 6297 // Note that this leaves room for optimization. We could just |
6309 // memcpy what we already have to the result string. Also, | 6298 // memcpy what we already have to the result string. Also, |
6310 // the result string is the last object allocated we could | 6299 // the result string is the last object allocated we could |
6311 // "realloc" it and probably, in the vast majority of cases, | 6300 // "realloc" it and probably, in the vast majority of cases, |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6345 } | 6334 } |
6346 current = next; | 6335 current = next; |
6347 } | 6336 } |
6348 if (has_changed_character) { | 6337 if (has_changed_character) { |
6349 return result; | 6338 return result; |
6350 } else { | 6339 } else { |
6351 // If we didn't actually change anything in doing the conversion | 6340 // If we didn't actually change anything in doing the conversion |
6352 // we simple return the result and let the converted string | 6341 // we simple return the result and let the converted string |
6353 // become garbage; there is no reason to keep two identical strings | 6342 // become garbage; there is no reason to keep two identical strings |
6354 // alive. | 6343 // alive. |
6355 return s; | 6344 return string; |
6356 } | 6345 } |
6357 } | 6346 } |
6358 | 6347 |
6359 | 6348 |
6360 namespace { | 6349 namespace { |
6361 | 6350 |
6362 static const uintptr_t kOneInEveryByte = kUintptrAllBitsSet / 0xFF; | 6351 static const uintptr_t kOneInEveryByte = kUintptrAllBitsSet / 0xFF; |
6363 static const uintptr_t kAsciiMask = kOneInEveryByte << 7; | 6352 static const uintptr_t kAsciiMask = kOneInEveryByte << 7; |
6364 | 6353 |
6365 // Given a word and two range boundaries returns a word with high bit | 6354 // Given a word and two range boundaries returns a word with high bit |
(...skipping 10 matching lines...) Expand all Loading... |
6376 // Has high bit set in every w byte less than n. | 6365 // Has high bit set in every w byte less than n. |
6377 uintptr_t tmp1 = kOneInEveryByte * (0x7F + n) - w; | 6366 uintptr_t tmp1 = kOneInEveryByte * (0x7F + n) - w; |
6378 // Has high bit set in every w byte greater than m. | 6367 // Has high bit set in every w byte greater than m. |
6379 uintptr_t tmp2 = w + kOneInEveryByte * (0x7F - m); | 6368 uintptr_t tmp2 = w + kOneInEveryByte * (0x7F - m); |
6380 return (tmp1 & tmp2 & (kOneInEveryByte * 0x80)); | 6369 return (tmp1 & tmp2 & (kOneInEveryByte * 0x80)); |
6381 } | 6370 } |
6382 | 6371 |
6383 | 6372 |
6384 #ifdef DEBUG | 6373 #ifdef DEBUG |
6385 static bool CheckFastAsciiConvert(char* dst, | 6374 static bool CheckFastAsciiConvert(char* dst, |
6386 char* src, | 6375 const char* src, |
6387 int length, | 6376 int length, |
6388 bool changed, | 6377 bool changed, |
6389 bool is_to_lower) { | 6378 bool is_to_lower) { |
6390 bool expected_changed = false; | 6379 bool expected_changed = false; |
6391 for (int i = 0; i < length; i++) { | 6380 for (int i = 0; i < length; i++) { |
6392 if (dst[i] == src[i]) continue; | 6381 if (dst[i] == src[i]) continue; |
6393 expected_changed = true; | 6382 expected_changed = true; |
6394 if (is_to_lower) { | 6383 if (is_to_lower) { |
6395 ASSERT('A' <= src[i] && src[i] <= 'Z'); | 6384 ASSERT('A' <= src[i] && src[i] <= 'Z'); |
6396 ASSERT(dst[i] == src[i] + ('a' - 'A')); | 6385 ASSERT(dst[i] == src[i] + ('a' - 'A')); |
6397 } else { | 6386 } else { |
6398 ASSERT('a' <= src[i] && src[i] <= 'z'); | 6387 ASSERT('a' <= src[i] && src[i] <= 'z'); |
6399 ASSERT(dst[i] == src[i] - ('a' - 'A')); | 6388 ASSERT(dst[i] == src[i] - ('a' - 'A')); |
6400 } | 6389 } |
6401 } | 6390 } |
6402 return (expected_changed == changed); | 6391 return (expected_changed == changed); |
6403 } | 6392 } |
6404 #endif | 6393 #endif |
6405 | 6394 |
6406 | 6395 |
6407 template<class Converter> | 6396 template<class Converter> |
6408 static bool FastAsciiConvert(char* dst, | 6397 static bool FastAsciiConvert(char* dst, |
6409 char* src, | 6398 const char* src, |
6410 int length, | 6399 int length, |
6411 bool* changed_out) { | 6400 bool* changed_out) { |
6412 #ifdef DEBUG | 6401 #ifdef DEBUG |
6413 char* saved_dst = dst; | 6402 char* saved_dst = dst; |
6414 char* saved_src = src; | 6403 const char* saved_src = src; |
6415 #endif | 6404 #endif |
6416 DisallowHeapAllocation no_gc; | 6405 DisallowHeapAllocation no_gc; |
6417 // We rely on the distance between upper and lower case letters | 6406 // We rely on the distance between upper and lower case letters |
6418 // being a known power of 2. | 6407 // being a known power of 2. |
6419 ASSERT('a' - 'A' == (1 << 5)); | 6408 ASSERT('a' - 'A' == (1 << 5)); |
6420 // Boundaries for the range of input characters than require conversion. | 6409 // Boundaries for the range of input characters than require conversion. |
6421 static const char lo = Converter::kIsToLower ? 'A' - 1 : 'a' - 1; | 6410 static const char lo = Converter::kIsToLower ? 'A' - 1 : 'a' - 1; |
6422 static const char hi = Converter::kIsToLower ? 'Z' + 1 : 'z' + 1; | 6411 static const char hi = Converter::kIsToLower ? 'Z' + 1 : 'z' + 1; |
6423 bool changed = false; | 6412 bool changed = false; |
6424 uintptr_t or_acc = 0; | 6413 uintptr_t or_acc = 0; |
6425 char* const limit = src + length; | 6414 const char* const limit = src + length; |
6426 #ifdef V8_HOST_CAN_READ_UNALIGNED | 6415 #ifdef V8_HOST_CAN_READ_UNALIGNED |
6427 // Process the prefix of the input that requires no conversion one | 6416 // Process the prefix of the input that requires no conversion one |
6428 // (machine) word at a time. | 6417 // (machine) word at a time. |
6429 while (src <= limit - sizeof(uintptr_t)) { | 6418 while (src <= limit - sizeof(uintptr_t)) { |
6430 uintptr_t w = *reinterpret_cast<uintptr_t*>(src); | 6419 const uintptr_t w = *reinterpret_cast<const uintptr_t*>(src); |
6431 or_acc |= w; | 6420 or_acc |= w; |
6432 if (AsciiRangeMask(w, lo, hi) != 0) { | 6421 if (AsciiRangeMask(w, lo, hi) != 0) { |
6433 changed = true; | 6422 changed = true; |
6434 break; | 6423 break; |
6435 } | 6424 } |
6436 *reinterpret_cast<uintptr_t*>(dst) = w; | 6425 *reinterpret_cast<uintptr_t*>(dst) = w; |
6437 src += sizeof(uintptr_t); | 6426 src += sizeof(uintptr_t); |
6438 dst += sizeof(uintptr_t); | 6427 dst += sizeof(uintptr_t); |
6439 } | 6428 } |
6440 // Process the remainder of the input performing conversion when | 6429 // Process the remainder of the input performing conversion when |
6441 // required one word at a time. | 6430 // required one word at a time. |
6442 while (src <= limit - sizeof(uintptr_t)) { | 6431 while (src <= limit - sizeof(uintptr_t)) { |
6443 uintptr_t w = *reinterpret_cast<uintptr_t*>(src); | 6432 const uintptr_t w = *reinterpret_cast<const uintptr_t*>(src); |
6444 or_acc |= w; | 6433 or_acc |= w; |
6445 uintptr_t m = AsciiRangeMask(w, lo, hi); | 6434 uintptr_t m = AsciiRangeMask(w, lo, hi); |
6446 // The mask has high (7th) bit set in every byte that needs | 6435 // The mask has high (7th) bit set in every byte that needs |
6447 // conversion and we know that the distance between cases is | 6436 // conversion and we know that the distance between cases is |
6448 // 1 << 5. | 6437 // 1 << 5. |
6449 *reinterpret_cast<uintptr_t*>(dst) = w ^ (m >> 2); | 6438 *reinterpret_cast<uintptr_t*>(dst) = w ^ (m >> 2); |
6450 src += sizeof(uintptr_t); | 6439 src += sizeof(uintptr_t); |
6451 dst += sizeof(uintptr_t); | 6440 dst += sizeof(uintptr_t); |
6452 } | 6441 } |
6453 #endif | 6442 #endif |
(...skipping 22 matching lines...) Expand all Loading... |
6476 } | 6465 } |
6477 | 6466 |
6478 } // namespace | 6467 } // namespace |
6479 | 6468 |
6480 | 6469 |
6481 template <class Converter> | 6470 template <class Converter> |
6482 MUST_USE_RESULT static MaybeObject* ConvertCase( | 6471 MUST_USE_RESULT static MaybeObject* ConvertCase( |
6483 Arguments args, | 6472 Arguments args, |
6484 Isolate* isolate, | 6473 Isolate* isolate, |
6485 unibrow::Mapping<Converter, 128>* mapping) { | 6474 unibrow::Mapping<Converter, 128>* mapping) { |
6486 SealHandleScope shs(isolate); | 6475 HandleScope handle_scope(isolate); |
6487 CONVERT_ARG_CHECKED(String, s, 0); | 6476 CONVERT_ARG_HANDLE_CHECKED(String, s, 0); |
6488 s = s->TryFlattenGetString(); | 6477 s = FlattenGetString(s); |
6489 | 6478 int length = s->length(); |
6490 const int length = s->length(); | |
6491 // Assume that the string is not empty; we need this assumption later | 6479 // Assume that the string is not empty; we need this assumption later |
6492 if (length == 0) return s; | 6480 if (length == 0) return *s; |
6493 | 6481 |
6494 // Simpler handling of ASCII strings. | 6482 // Simpler handling of ASCII strings. |
6495 // | 6483 // |
6496 // NOTE: This assumes that the upper/lower case of an ASCII | 6484 // NOTE: This assumes that the upper/lower case of an ASCII |
6497 // character is also ASCII. This is currently the case, but it | 6485 // character is also ASCII. This is currently the case, but it |
6498 // might break in the future if we implement more context and locale | 6486 // might break in the future if we implement more context and locale |
6499 // dependent upper/lower conversions. | 6487 // dependent upper/lower conversions. |
6500 if (s->IsSeqOneByteString()) { | 6488 if (s->IsOneByteRepresentationUnderneath()) { |
6501 Object* o; | 6489 Handle<SeqOneByteString> result = |
6502 { MaybeObject* maybe_o = isolate->heap()->AllocateRawOneByteString(length); | 6490 isolate->factory()->NewRawOneByteString(length); |
6503 if (!maybe_o->ToObject(&o)) return maybe_o; | 6491 |
6504 } | 6492 DisallowHeapAllocation no_gc; |
6505 SeqOneByteString* result = SeqOneByteString::cast(o); | 6493 String::FlatContent flat_content = s->GetFlatContent(); |
| 6494 ASSERT(flat_content.IsFlat()); |
6506 bool has_changed_character = false; | 6495 bool has_changed_character = false; |
6507 bool is_ascii = FastAsciiConvert<Converter>( | 6496 bool is_ascii = FastAsciiConvert<Converter>( |
6508 reinterpret_cast<char*>(result->GetChars()), | 6497 reinterpret_cast<char*>(result->GetChars()), |
6509 reinterpret_cast<char*>(SeqOneByteString::cast(s)->GetChars()), | 6498 reinterpret_cast<const char*>(flat_content.ToOneByteVector().start()), |
6510 length, | 6499 length, |
6511 &has_changed_character); | 6500 &has_changed_character); |
6512 // If not ASCII, we discard the result and take the 2 byte path. | 6501 // If not ASCII, we discard the result and take the 2 byte path. |
6513 if (is_ascii) { | 6502 if (is_ascii) return has_changed_character ? *result : *s; |
6514 return has_changed_character ? result : s; | |
6515 } | |
6516 } | 6503 } |
6517 | 6504 |
6518 String::Encoding result_encoding = s->IsOneByteRepresentation() | 6505 Handle<SeqString> result; |
6519 ? String::ONE_BYTE_ENCODING : String::TWO_BYTE_ENCODING; | 6506 if (s->IsOneByteRepresentation()) { |
| 6507 result = isolate->factory()->NewRawOneByteString(length); |
| 6508 } else { |
| 6509 result = isolate->factory()->NewRawTwoByteString(length); |
| 6510 } |
| 6511 MaybeObject* maybe = ConvertCaseHelper(isolate, *s, *result, length, mapping); |
6520 Object* answer; | 6512 Object* answer; |
6521 { MaybeObject* maybe_answer = ConvertCaseHelper( | 6513 if (!maybe->ToObject(&answer)) return maybe; |
6522 isolate, s, result_encoding, length, length, mapping); | 6514 if (answer->IsString()) return answer; |
6523 if (!maybe_answer->ToObject(&answer)) return maybe_answer; | 6515 |
| 6516 ASSERT(answer->IsSmi()); |
| 6517 length = Smi::cast(answer)->value(); |
| 6518 if (s->IsOneByteRepresentation() && length > 0) { |
| 6519 result = isolate->factory()->NewRawOneByteString(length); |
| 6520 } else { |
| 6521 if (length < 0) length = -length; |
| 6522 result = isolate->factory()->NewRawTwoByteString(length); |
6524 } | 6523 } |
6525 if (answer->IsSmi()) { | 6524 return ConvertCaseHelper(isolate, *s, *result, length, mapping); |
6526 int new_length = Smi::cast(answer)->value(); | |
6527 if (new_length < 0) { | |
6528 result_encoding = String::TWO_BYTE_ENCODING; | |
6529 new_length = -new_length; | |
6530 } | |
6531 MaybeObject* maybe_answer = ConvertCaseHelper( | |
6532 isolate, s, result_encoding, new_length, length, mapping); | |
6533 if (!maybe_answer->ToObject(&answer)) return maybe_answer; | |
6534 } | |
6535 return answer; | |
6536 } | 6525 } |
6537 | 6526 |
6538 | 6527 |
6539 RUNTIME_FUNCTION(MaybeObject*, Runtime_StringToLowerCase) { | 6528 RUNTIME_FUNCTION(MaybeObject*, Runtime_StringToLowerCase) { |
6540 return ConvertCase( | 6529 return ConvertCase( |
6541 args, isolate, isolate->runtime_state()->to_lower_mapping()); | 6530 args, isolate, isolate->runtime_state()->to_lower_mapping()); |
6542 } | 6531 } |
6543 | 6532 |
6544 | 6533 |
6545 RUNTIME_FUNCTION(MaybeObject*, Runtime_StringToUpperCase) { | 6534 RUNTIME_FUNCTION(MaybeObject*, Runtime_StringToUpperCase) { |
(...skipping 8419 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
14965 // Handle last resort GC and make sure to allow future allocations | 14954 // Handle last resort GC and make sure to allow future allocations |
14966 // to grow the heap without causing GCs (if possible). | 14955 // to grow the heap without causing GCs (if possible). |
14967 isolate->counters()->gc_last_resort_from_js()->Increment(); | 14956 isolate->counters()->gc_last_resort_from_js()->Increment(); |
14968 isolate->heap()->CollectAllGarbage(Heap::kNoGCFlags, | 14957 isolate->heap()->CollectAllGarbage(Heap::kNoGCFlags, |
14969 "Runtime::PerformGC"); | 14958 "Runtime::PerformGC"); |
14970 } | 14959 } |
14971 } | 14960 } |
14972 | 14961 |
14973 | 14962 |
14974 } } // namespace v8::internal | 14963 } } // namespace v8::internal |
OLD | NEW |