| OLD | NEW |
| 1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 6227 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6238 str, ALLOW_TRAILING_JUNK, OS::nan_value()); | 6238 str, ALLOW_TRAILING_JUNK, OS::nan_value()); |
| 6239 | 6239 |
| 6240 // Create a number object from the value. | 6240 // Create a number object from the value. |
| 6241 return isolate->heap()->NumberFromDouble(value); | 6241 return isolate->heap()->NumberFromDouble(value); |
| 6242 } | 6242 } |
| 6243 | 6243 |
| 6244 | 6244 |
| 6245 template <class Converter> | 6245 template <class Converter> |
| 6246 MUST_USE_RESULT static MaybeObject* ConvertCaseHelper( | 6246 MUST_USE_RESULT static MaybeObject* ConvertCaseHelper( |
| 6247 Isolate* isolate, | 6247 Isolate* isolate, |
| 6248 String* s, | 6248 String* string, |
| 6249 String::Encoding result_encoding, | 6249 SeqString* result, |
| 6250 int length, | 6250 int result_length, |
| 6251 int input_string_length, | |
| 6252 unibrow::Mapping<Converter, 128>* mapping) { | 6251 unibrow::Mapping<Converter, 128>* mapping) { |
| 6252 DisallowHeapAllocation no_gc; |
| 6253 // We try this twice, once with the assumption that the result is no longer | 6253 // We try this twice, once with the assumption that the result is no longer |
| 6254 // than the input and, if that assumption breaks, again with the exact | 6254 // than the input and, if that assumption breaks, again with the exact |
| 6255 // length. This may not be pretty, but it is nicer than what was here before | 6255 // length. This may not be pretty, but it is nicer than what was here before |
| 6256 // and I hereby claim my vaffel-is. | 6256 // and I hereby claim my vaffel-is. |
| 6257 // | 6257 // |
| 6258 // Allocate the resulting string. | |
| 6259 // | |
| 6260 // NOTE: This assumes that the upper/lower case of an ASCII | 6258 // NOTE: This assumes that the upper/lower case of an ASCII |
| 6261 // character is also ASCII. This is currently the case, but it | 6259 // character is also ASCII. This is currently the case, but it |
| 6262 // might break in the future if we implement more context and locale | 6260 // might break in the future if we implement more context and locale |
| 6263 // dependent upper/lower conversions. | 6261 // dependent upper/lower conversions. |
| 6264 Object* o; | |
| 6265 { MaybeObject* maybe_o = result_encoding == String::ONE_BYTE_ENCODING | |
| 6266 ? isolate->heap()->AllocateRawOneByteString(length) | |
| 6267 : isolate->heap()->AllocateRawTwoByteString(length); | |
| 6268 if (!maybe_o->ToObject(&o)) return maybe_o; | |
| 6269 } | |
| 6270 String* result = String::cast(o); | |
| 6271 bool has_changed_character = false; | 6262 bool has_changed_character = false; |
| 6272 | 6263 |
| 6273 DisallowHeapAllocation no_gc; | |
| 6274 | |
| 6275 // Convert all characters to upper case, assuming that they will fit | 6264 // Convert all characters to upper case, assuming that they will fit |
| 6276 // in the buffer | 6265 // in the buffer |
| 6277 Access<ConsStringIteratorOp> op( | 6266 Access<ConsStringIteratorOp> op( |
| 6278 isolate->runtime_state()->string_iterator()); | 6267 isolate->runtime_state()->string_iterator()); |
| 6279 StringCharacterStream stream(s, op.value()); | 6268 StringCharacterStream stream(string, op.value()); |
| 6280 unibrow::uchar chars[Converter::kMaxWidth]; | 6269 unibrow::uchar chars[Converter::kMaxWidth]; |
| 6281 // We can assume that the string is not empty | 6270 // We can assume that the string is not empty |
| 6282 uc32 current = stream.GetNext(); | 6271 uc32 current = stream.GetNext(); |
| 6283 // y with umlauts is the only character that stops fitting into one-byte | 6272 // y with umlauts is the only character that stops fitting into one-byte |
| 6284 // when converting to uppercase. | 6273 // when converting to uppercase. |
| 6285 static const uc32 yuml_code = 0xff; | 6274 static const uc32 yuml_code = 0xff; |
| 6286 bool ignore_yuml = result->IsSeqTwoByteString() || Converter::kIsToLower; | 6275 bool ignore_yuml = result->IsSeqTwoByteString() || Converter::kIsToLower; |
| 6287 for (int i = 0; i < length;) { | 6276 for (int i = 0; i < result_length;) { |
| 6288 bool has_next = stream.HasMore(); | 6277 bool has_next = stream.HasMore(); |
| 6289 uc32 next = has_next ? stream.GetNext() : 0; | 6278 uc32 next = has_next ? stream.GetNext() : 0; |
| 6290 int char_length = mapping->get(current, next, chars); | 6279 int char_length = mapping->get(current, next, chars); |
| 6291 if (char_length == 0) { | 6280 if (char_length == 0) { |
| 6292 // The case conversion of this character is the character itself. | 6281 // The case conversion of this character is the character itself. |
| 6293 result->Set(i, current); | 6282 result->Set(i, current); |
| 6294 i++; | 6283 i++; |
| 6295 } else if (char_length == 1 && (ignore_yuml || current != yuml_code)) { | 6284 } else if (char_length == 1 && (ignore_yuml || current != yuml_code)) { |
| 6296 // Common case: converting the letter resulted in one character. | 6285 // Common case: converting the letter resulted in one character. |
| 6297 ASSERT(static_cast<uc32>(chars[0]) != current); | 6286 ASSERT(static_cast<uc32>(chars[0]) != current); |
| 6298 result->Set(i, chars[0]); | 6287 result->Set(i, chars[0]); |
| 6299 has_changed_character = true; | 6288 has_changed_character = true; |
| 6300 i++; | 6289 i++; |
| 6301 } else if (length == input_string_length) { | 6290 } else if (result_length == string->length()) { |
| 6302 bool found_yuml = (current == yuml_code); | 6291 bool found_yuml = (current == yuml_code); |
| 6303 // We've assumed that the result would be as long as the | 6292 // We've assumed that the result would be as long as the |
| 6304 // input but here is a character that converts to several | 6293 // input but here is a character that converts to several |
| 6305 // characters. No matter, we calculate the exact length | 6294 // characters. No matter, we calculate the exact length |
| 6306 // of the result and try the whole thing again. | 6295 // of the result and try the whole thing again. |
| 6307 // | 6296 // |
| 6308 // Note that this leaves room for optimization. We could just | 6297 // Note that this leaves room for optimization. We could just |
| 6309 // memcpy what we already have to the result string. Also, | 6298 // memcpy what we already have to the result string. Also, |
| 6310 // the result string is the last object allocated we could | 6299 // the result string is the last object allocated we could |
| 6311 // "realloc" it and probably, in the vast majority of cases, | 6300 // "realloc" it and probably, in the vast majority of cases, |
| (...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6345 } | 6334 } |
| 6346 current = next; | 6335 current = next; |
| 6347 } | 6336 } |
| 6348 if (has_changed_character) { | 6337 if (has_changed_character) { |
| 6349 return result; | 6338 return result; |
| 6350 } else { | 6339 } else { |
| 6351 // If we didn't actually change anything in doing the conversion | 6340 // If we didn't actually change anything in doing the conversion |
| 6352 // we simple return the result and let the converted string | 6341 // we simple return the result and let the converted string |
| 6353 // become garbage; there is no reason to keep two identical strings | 6342 // become garbage; there is no reason to keep two identical strings |
| 6354 // alive. | 6343 // alive. |
| 6355 return s; | 6344 return string; |
| 6356 } | 6345 } |
| 6357 } | 6346 } |
| 6358 | 6347 |
| 6359 | 6348 |
| 6360 namespace { | 6349 namespace { |
| 6361 | 6350 |
| 6362 static const uintptr_t kOneInEveryByte = kUintptrAllBitsSet / 0xFF; | 6351 static const uintptr_t kOneInEveryByte = kUintptrAllBitsSet / 0xFF; |
| 6363 static const uintptr_t kAsciiMask = kOneInEveryByte << 7; | 6352 static const uintptr_t kAsciiMask = kOneInEveryByte << 7; |
| 6364 | 6353 |
| 6365 // Given a word and two range boundaries returns a word with high bit | 6354 // Given a word and two range boundaries returns a word with high bit |
| (...skipping 10 matching lines...) Expand all Loading... |
| 6376 // Has high bit set in every w byte less than n. | 6365 // Has high bit set in every w byte less than n. |
| 6377 uintptr_t tmp1 = kOneInEveryByte * (0x7F + n) - w; | 6366 uintptr_t tmp1 = kOneInEveryByte * (0x7F + n) - w; |
| 6378 // Has high bit set in every w byte greater than m. | 6367 // Has high bit set in every w byte greater than m. |
| 6379 uintptr_t tmp2 = w + kOneInEveryByte * (0x7F - m); | 6368 uintptr_t tmp2 = w + kOneInEveryByte * (0x7F - m); |
| 6380 return (tmp1 & tmp2 & (kOneInEveryByte * 0x80)); | 6369 return (tmp1 & tmp2 & (kOneInEveryByte * 0x80)); |
| 6381 } | 6370 } |
| 6382 | 6371 |
| 6383 | 6372 |
| 6384 #ifdef DEBUG | 6373 #ifdef DEBUG |
| 6385 static bool CheckFastAsciiConvert(char* dst, | 6374 static bool CheckFastAsciiConvert(char* dst, |
| 6386 char* src, | 6375 const char* src, |
| 6387 int length, | 6376 int length, |
| 6388 bool changed, | 6377 bool changed, |
| 6389 bool is_to_lower) { | 6378 bool is_to_lower) { |
| 6390 bool expected_changed = false; | 6379 bool expected_changed = false; |
| 6391 for (int i = 0; i < length; i++) { | 6380 for (int i = 0; i < length; i++) { |
| 6392 if (dst[i] == src[i]) continue; | 6381 if (dst[i] == src[i]) continue; |
| 6393 expected_changed = true; | 6382 expected_changed = true; |
| 6394 if (is_to_lower) { | 6383 if (is_to_lower) { |
| 6395 ASSERT('A' <= src[i] && src[i] <= 'Z'); | 6384 ASSERT('A' <= src[i] && src[i] <= 'Z'); |
| 6396 ASSERT(dst[i] == src[i] + ('a' - 'A')); | 6385 ASSERT(dst[i] == src[i] + ('a' - 'A')); |
| 6397 } else { | 6386 } else { |
| 6398 ASSERT('a' <= src[i] && src[i] <= 'z'); | 6387 ASSERT('a' <= src[i] && src[i] <= 'z'); |
| 6399 ASSERT(dst[i] == src[i] - ('a' - 'A')); | 6388 ASSERT(dst[i] == src[i] - ('a' - 'A')); |
| 6400 } | 6389 } |
| 6401 } | 6390 } |
| 6402 return (expected_changed == changed); | 6391 return (expected_changed == changed); |
| 6403 } | 6392 } |
| 6404 #endif | 6393 #endif |
| 6405 | 6394 |
| 6406 | 6395 |
| 6407 template<class Converter> | 6396 template<class Converter> |
| 6408 static bool FastAsciiConvert(char* dst, | 6397 static bool FastAsciiConvert(char* dst, |
| 6409 char* src, | 6398 const char* src, |
| 6410 int length, | 6399 int length, |
| 6411 bool* changed_out) { | 6400 bool* changed_out) { |
| 6412 #ifdef DEBUG | 6401 #ifdef DEBUG |
| 6413 char* saved_dst = dst; | 6402 char* saved_dst = dst; |
| 6414 char* saved_src = src; | 6403 const char* saved_src = src; |
| 6415 #endif | 6404 #endif |
| 6416 DisallowHeapAllocation no_gc; | 6405 DisallowHeapAllocation no_gc; |
| 6417 // We rely on the distance between upper and lower case letters | 6406 // We rely on the distance between upper and lower case letters |
| 6418 // being a known power of 2. | 6407 // being a known power of 2. |
| 6419 ASSERT('a' - 'A' == (1 << 5)); | 6408 ASSERT('a' - 'A' == (1 << 5)); |
| 6420 // Boundaries for the range of input characters than require conversion. | 6409 // Boundaries for the range of input characters than require conversion. |
| 6421 static const char lo = Converter::kIsToLower ? 'A' - 1 : 'a' - 1; | 6410 static const char lo = Converter::kIsToLower ? 'A' - 1 : 'a' - 1; |
| 6422 static const char hi = Converter::kIsToLower ? 'Z' + 1 : 'z' + 1; | 6411 static const char hi = Converter::kIsToLower ? 'Z' + 1 : 'z' + 1; |
| 6423 bool changed = false; | 6412 bool changed = false; |
| 6424 uintptr_t or_acc = 0; | 6413 uintptr_t or_acc = 0; |
| 6425 char* const limit = src + length; | 6414 const char* const limit = src + length; |
| 6426 #ifdef V8_HOST_CAN_READ_UNALIGNED | 6415 #ifdef V8_HOST_CAN_READ_UNALIGNED |
| 6427 // Process the prefix of the input that requires no conversion one | 6416 // Process the prefix of the input that requires no conversion one |
| 6428 // (machine) word at a time. | 6417 // (machine) word at a time. |
| 6429 while (src <= limit - sizeof(uintptr_t)) { | 6418 while (src <= limit - sizeof(uintptr_t)) { |
| 6430 uintptr_t w = *reinterpret_cast<uintptr_t*>(src); | 6419 const uintptr_t w = *reinterpret_cast<const uintptr_t*>(src); |
| 6431 or_acc |= w; | 6420 or_acc |= w; |
| 6432 if (AsciiRangeMask(w, lo, hi) != 0) { | 6421 if (AsciiRangeMask(w, lo, hi) != 0) { |
| 6433 changed = true; | 6422 changed = true; |
| 6434 break; | 6423 break; |
| 6435 } | 6424 } |
| 6436 *reinterpret_cast<uintptr_t*>(dst) = w; | 6425 *reinterpret_cast<uintptr_t*>(dst) = w; |
| 6437 src += sizeof(uintptr_t); | 6426 src += sizeof(uintptr_t); |
| 6438 dst += sizeof(uintptr_t); | 6427 dst += sizeof(uintptr_t); |
| 6439 } | 6428 } |
| 6440 // Process the remainder of the input performing conversion when | 6429 // Process the remainder of the input performing conversion when |
| 6441 // required one word at a time. | 6430 // required one word at a time. |
| 6442 while (src <= limit - sizeof(uintptr_t)) { | 6431 while (src <= limit - sizeof(uintptr_t)) { |
| 6443 uintptr_t w = *reinterpret_cast<uintptr_t*>(src); | 6432 const uintptr_t w = *reinterpret_cast<const uintptr_t*>(src); |
| 6444 or_acc |= w; | 6433 or_acc |= w; |
| 6445 uintptr_t m = AsciiRangeMask(w, lo, hi); | 6434 uintptr_t m = AsciiRangeMask(w, lo, hi); |
| 6446 // The mask has high (7th) bit set in every byte that needs | 6435 // The mask has high (7th) bit set in every byte that needs |
| 6447 // conversion and we know that the distance between cases is | 6436 // conversion and we know that the distance between cases is |
| 6448 // 1 << 5. | 6437 // 1 << 5. |
| 6449 *reinterpret_cast<uintptr_t*>(dst) = w ^ (m >> 2); | 6438 *reinterpret_cast<uintptr_t*>(dst) = w ^ (m >> 2); |
| 6450 src += sizeof(uintptr_t); | 6439 src += sizeof(uintptr_t); |
| 6451 dst += sizeof(uintptr_t); | 6440 dst += sizeof(uintptr_t); |
| 6452 } | 6441 } |
| 6453 #endif | 6442 #endif |
| (...skipping 22 matching lines...) Expand all Loading... |
| 6476 } | 6465 } |
| 6477 | 6466 |
| 6478 } // namespace | 6467 } // namespace |
| 6479 | 6468 |
| 6480 | 6469 |
| 6481 template <class Converter> | 6470 template <class Converter> |
| 6482 MUST_USE_RESULT static MaybeObject* ConvertCase( | 6471 MUST_USE_RESULT static MaybeObject* ConvertCase( |
| 6483 Arguments args, | 6472 Arguments args, |
| 6484 Isolate* isolate, | 6473 Isolate* isolate, |
| 6485 unibrow::Mapping<Converter, 128>* mapping) { | 6474 unibrow::Mapping<Converter, 128>* mapping) { |
| 6486 SealHandleScope shs(isolate); | 6475 HandleScope handle_scope(isolate); |
| 6487 CONVERT_ARG_CHECKED(String, s, 0); | 6476 CONVERT_ARG_HANDLE_CHECKED(String, s, 0); |
| 6488 s = s->TryFlattenGetString(); | 6477 s = FlattenGetString(s); |
| 6489 | 6478 int length = s->length(); |
| 6490 const int length = s->length(); | |
| 6491 // Assume that the string is not empty; we need this assumption later | 6479 // Assume that the string is not empty; we need this assumption later |
| 6492 if (length == 0) return s; | 6480 if (length == 0) return *s; |
| 6493 | 6481 |
| 6494 // Simpler handling of ASCII strings. | 6482 // Simpler handling of ASCII strings. |
| 6495 // | 6483 // |
| 6496 // NOTE: This assumes that the upper/lower case of an ASCII | 6484 // NOTE: This assumes that the upper/lower case of an ASCII |
| 6497 // character is also ASCII. This is currently the case, but it | 6485 // character is also ASCII. This is currently the case, but it |
| 6498 // might break in the future if we implement more context and locale | 6486 // might break in the future if we implement more context and locale |
| 6499 // dependent upper/lower conversions. | 6487 // dependent upper/lower conversions. |
| 6500 if (s->IsSeqOneByteString()) { | 6488 if (s->IsOneByteRepresentationUnderneath()) { |
| 6501 Object* o; | 6489 Handle<SeqOneByteString> result = |
| 6502 { MaybeObject* maybe_o = isolate->heap()->AllocateRawOneByteString(length); | 6490 isolate->factory()->NewRawOneByteString(length); |
| 6503 if (!maybe_o->ToObject(&o)) return maybe_o; | 6491 |
| 6504 } | 6492 DisallowHeapAllocation no_gc; |
| 6505 SeqOneByteString* result = SeqOneByteString::cast(o); | 6493 String::FlatContent flat_content = s->GetFlatContent(); |
| 6494 ASSERT(flat_content.IsFlat()); |
| 6506 bool has_changed_character = false; | 6495 bool has_changed_character = false; |
| 6507 bool is_ascii = FastAsciiConvert<Converter>( | 6496 bool is_ascii = FastAsciiConvert<Converter>( |
| 6508 reinterpret_cast<char*>(result->GetChars()), | 6497 reinterpret_cast<char*>(result->GetChars()), |
| 6509 reinterpret_cast<char*>(SeqOneByteString::cast(s)->GetChars()), | 6498 reinterpret_cast<const char*>(flat_content.ToOneByteVector().start()), |
| 6510 length, | 6499 length, |
| 6511 &has_changed_character); | 6500 &has_changed_character); |
| 6512 // If not ASCII, we discard the result and take the 2 byte path. | 6501 // If not ASCII, we discard the result and take the 2 byte path. |
| 6513 if (is_ascii) { | 6502 if (is_ascii) return has_changed_character ? *result : *s; |
| 6514 return has_changed_character ? result : s; | |
| 6515 } | |
| 6516 } | 6503 } |
| 6517 | 6504 |
| 6518 String::Encoding result_encoding = s->IsOneByteRepresentation() | 6505 Handle<SeqString> result; |
| 6519 ? String::ONE_BYTE_ENCODING : String::TWO_BYTE_ENCODING; | 6506 if (s->IsOneByteRepresentation()) { |
| 6507 result = isolate->factory()->NewRawOneByteString(length); |
| 6508 } else { |
| 6509 result = isolate->factory()->NewRawTwoByteString(length); |
| 6510 } |
| 6511 MaybeObject* maybe = ConvertCaseHelper(isolate, *s, *result, length, mapping); |
| 6520 Object* answer; | 6512 Object* answer; |
| 6521 { MaybeObject* maybe_answer = ConvertCaseHelper( | 6513 if (!maybe->ToObject(&answer)) return maybe; |
| 6522 isolate, s, result_encoding, length, length, mapping); | 6514 if (answer->IsString()) return answer; |
| 6523 if (!maybe_answer->ToObject(&answer)) return maybe_answer; | 6515 |
| 6516 ASSERT(answer->IsSmi()); |
| 6517 length = Smi::cast(answer)->value(); |
| 6518 if (s->IsOneByteRepresentation() && length > 0) { |
| 6519 result = isolate->factory()->NewRawOneByteString(length); |
| 6520 } else { |
| 6521 if (length < 0) length = -length; |
| 6522 result = isolate->factory()->NewRawTwoByteString(length); |
| 6524 } | 6523 } |
| 6525 if (answer->IsSmi()) { | 6524 return ConvertCaseHelper(isolate, *s, *result, length, mapping); |
| 6526 int new_length = Smi::cast(answer)->value(); | |
| 6527 if (new_length < 0) { | |
| 6528 result_encoding = String::TWO_BYTE_ENCODING; | |
| 6529 new_length = -new_length; | |
| 6530 } | |
| 6531 MaybeObject* maybe_answer = ConvertCaseHelper( | |
| 6532 isolate, s, result_encoding, new_length, length, mapping); | |
| 6533 if (!maybe_answer->ToObject(&answer)) return maybe_answer; | |
| 6534 } | |
| 6535 return answer; | |
| 6536 } | 6525 } |
| 6537 | 6526 |
| 6538 | 6527 |
| 6539 RUNTIME_FUNCTION(MaybeObject*, Runtime_StringToLowerCase) { | 6528 RUNTIME_FUNCTION(MaybeObject*, Runtime_StringToLowerCase) { |
| 6540 return ConvertCase( | 6529 return ConvertCase( |
| 6541 args, isolate, isolate->runtime_state()->to_lower_mapping()); | 6530 args, isolate, isolate->runtime_state()->to_lower_mapping()); |
| 6542 } | 6531 } |
| 6543 | 6532 |
| 6544 | 6533 |
| 6545 RUNTIME_FUNCTION(MaybeObject*, Runtime_StringToUpperCase) { | 6534 RUNTIME_FUNCTION(MaybeObject*, Runtime_StringToUpperCase) { |
| (...skipping 8419 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 14965 // Handle last resort GC and make sure to allow future allocations | 14954 // Handle last resort GC and make sure to allow future allocations |
| 14966 // to grow the heap without causing GCs (if possible). | 14955 // to grow the heap without causing GCs (if possible). |
| 14967 isolate->counters()->gc_last_resort_from_js()->Increment(); | 14956 isolate->counters()->gc_last_resort_from_js()->Increment(); |
| 14968 isolate->heap()->CollectAllGarbage(Heap::kNoGCFlags, | 14957 isolate->heap()->CollectAllGarbage(Heap::kNoGCFlags, |
| 14969 "Runtime::PerformGC"); | 14958 "Runtime::PerformGC"); |
| 14970 } | 14959 } |
| 14971 } | 14960 } |
| 14972 | 14961 |
| 14973 | 14962 |
| 14974 } } // namespace v8::internal | 14963 } } // namespace v8::internal |
| OLD | NEW |