Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(37)

Side by Side Diff: src/runtime.cc

Issue 180063002: Use fast path for sliced and external strings in ConvertCase. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: Created 6 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | test/mjsunit/string-case.js » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 6227 matching lines...) Expand 10 before | Expand all | Expand 10 after
6238 str, ALLOW_TRAILING_JUNK, OS::nan_value()); 6238 str, ALLOW_TRAILING_JUNK, OS::nan_value());
6239 6239
6240 // Create a number object from the value. 6240 // Create a number object from the value.
6241 return isolate->heap()->NumberFromDouble(value); 6241 return isolate->heap()->NumberFromDouble(value);
6242 } 6242 }
6243 6243
6244 6244
6245 template <class Converter> 6245 template <class Converter>
6246 MUST_USE_RESULT static MaybeObject* ConvertCaseHelper( 6246 MUST_USE_RESULT static MaybeObject* ConvertCaseHelper(
6247 Isolate* isolate, 6247 Isolate* isolate,
6248 String* s, 6248 String* string,
6249 String::Encoding result_encoding, 6249 SeqString* result,
6250 int length, 6250 int result_length,
6251 int input_string_length,
6252 unibrow::Mapping<Converter, 128>* mapping) { 6251 unibrow::Mapping<Converter, 128>* mapping) {
6252 DisallowHeapAllocation no_gc;
6253 // We try this twice, once with the assumption that the result is no longer 6253 // We try this twice, once with the assumption that the result is no longer
6254 // than the input and, if that assumption breaks, again with the exact 6254 // than the input and, if that assumption breaks, again with the exact
6255 // length. This may not be pretty, but it is nicer than what was here before 6255 // length. This may not be pretty, but it is nicer than what was here before
6256 // and I hereby claim my vaffel-is. 6256 // and I hereby claim my vaffel-is.
6257 // 6257 //
6258 // Allocate the resulting string.
6259 //
6260 // NOTE: This assumes that the upper/lower case of an ASCII 6258 // NOTE: This assumes that the upper/lower case of an ASCII
6261 // character is also ASCII. This is currently the case, but it 6259 // character is also ASCII. This is currently the case, but it
6262 // might break in the future if we implement more context and locale 6260 // might break in the future if we implement more context and locale
6263 // dependent upper/lower conversions. 6261 // dependent upper/lower conversions.
6264 Object* o;
6265 { MaybeObject* maybe_o = result_encoding == String::ONE_BYTE_ENCODING
6266 ? isolate->heap()->AllocateRawOneByteString(length)
6267 : isolate->heap()->AllocateRawTwoByteString(length);
6268 if (!maybe_o->ToObject(&o)) return maybe_o;
6269 }
6270 String* result = String::cast(o);
6271 bool has_changed_character = false; 6262 bool has_changed_character = false;
6272 6263
6273 DisallowHeapAllocation no_gc;
6274
6275 // Convert all characters to upper case, assuming that they will fit 6264 // Convert all characters to upper case, assuming that they will fit
6276 // in the buffer 6265 // in the buffer
6277 Access<ConsStringIteratorOp> op( 6266 Access<ConsStringIteratorOp> op(
6278 isolate->runtime_state()->string_iterator()); 6267 isolate->runtime_state()->string_iterator());
6279 StringCharacterStream stream(s, op.value()); 6268 StringCharacterStream stream(string, op.value());
6280 unibrow::uchar chars[Converter::kMaxWidth]; 6269 unibrow::uchar chars[Converter::kMaxWidth];
6281 // We can assume that the string is not empty 6270 // We can assume that the string is not empty
6282 uc32 current = stream.GetNext(); 6271 uc32 current = stream.GetNext();
6283 // y with umlauts is the only character that stops fitting into one-byte 6272 // y with umlauts is the only character that stops fitting into one-byte
6284 // when converting to uppercase. 6273 // when converting to uppercase.
6285 static const uc32 yuml_code = 0xff; 6274 static const uc32 yuml_code = 0xff;
6286 bool ignore_yuml = result->IsSeqTwoByteString() || Converter::kIsToLower; 6275 bool ignore_yuml = result->IsSeqTwoByteString() || Converter::kIsToLower;
6287 for (int i = 0; i < length;) { 6276 for (int i = 0; i < result_length;) {
6288 bool has_next = stream.HasMore(); 6277 bool has_next = stream.HasMore();
6289 uc32 next = has_next ? stream.GetNext() : 0; 6278 uc32 next = has_next ? stream.GetNext() : 0;
6290 int char_length = mapping->get(current, next, chars); 6279 int char_length = mapping->get(current, next, chars);
6291 if (char_length == 0) { 6280 if (char_length == 0) {
6292 // The case conversion of this character is the character itself. 6281 // The case conversion of this character is the character itself.
6293 result->Set(i, current); 6282 result->Set(i, current);
6294 i++; 6283 i++;
6295 } else if (char_length == 1 && (ignore_yuml || current != yuml_code)) { 6284 } else if (char_length == 1 && (ignore_yuml || current != yuml_code)) {
6296 // Common case: converting the letter resulted in one character. 6285 // Common case: converting the letter resulted in one character.
6297 ASSERT(static_cast<uc32>(chars[0]) != current); 6286 ASSERT(static_cast<uc32>(chars[0]) != current);
6298 result->Set(i, chars[0]); 6287 result->Set(i, chars[0]);
6299 has_changed_character = true; 6288 has_changed_character = true;
6300 i++; 6289 i++;
6301 } else if (length == input_string_length) { 6290 } else if (result_length == string->length()) {
6302 bool found_yuml = (current == yuml_code); 6291 bool found_yuml = (current == yuml_code);
6303 // We've assumed that the result would be as long as the 6292 // We've assumed that the result would be as long as the
6304 // input but here is a character that converts to several 6293 // input but here is a character that converts to several
6305 // characters. No matter, we calculate the exact length 6294 // characters. No matter, we calculate the exact length
6306 // of the result and try the whole thing again. 6295 // of the result and try the whole thing again.
6307 // 6296 //
6308 // Note that this leaves room for optimization. We could just 6297 // Note that this leaves room for optimization. We could just
6309 // memcpy what we already have to the result string. Also, 6298 // memcpy what we already have to the result string. Also,
6310 // the result string is the last object allocated we could 6299 // the result string is the last object allocated we could
6311 // "realloc" it and probably, in the vast majority of cases, 6300 // "realloc" it and probably, in the vast majority of cases,
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
6345 } 6334 }
6346 current = next; 6335 current = next;
6347 } 6336 }
6348 if (has_changed_character) { 6337 if (has_changed_character) {
6349 return result; 6338 return result;
6350 } else { 6339 } else {
6351 // If we didn't actually change anything in doing the conversion 6340 // If we didn't actually change anything in doing the conversion
6352 // we simple return the result and let the converted string 6341 // we simple return the result and let the converted string
6353 // become garbage; there is no reason to keep two identical strings 6342 // become garbage; there is no reason to keep two identical strings
6354 // alive. 6343 // alive.
6355 return s; 6344 return string;
6356 } 6345 }
6357 } 6346 }
6358 6347
6359 6348
6360 namespace { 6349 namespace {
6361 6350
6362 static const uintptr_t kOneInEveryByte = kUintptrAllBitsSet / 0xFF; 6351 static const uintptr_t kOneInEveryByte = kUintptrAllBitsSet / 0xFF;
6363 static const uintptr_t kAsciiMask = kOneInEveryByte << 7; 6352 static const uintptr_t kAsciiMask = kOneInEveryByte << 7;
6364 6353
6365 // Given a word and two range boundaries returns a word with high bit 6354 // Given a word and two range boundaries returns a word with high bit
(...skipping 10 matching lines...) Expand all
6376 // Has high bit set in every w byte less than n. 6365 // Has high bit set in every w byte less than n.
6377 uintptr_t tmp1 = kOneInEveryByte * (0x7F + n) - w; 6366 uintptr_t tmp1 = kOneInEveryByte * (0x7F + n) - w;
6378 // Has high bit set in every w byte greater than m. 6367 // Has high bit set in every w byte greater than m.
6379 uintptr_t tmp2 = w + kOneInEveryByte * (0x7F - m); 6368 uintptr_t tmp2 = w + kOneInEveryByte * (0x7F - m);
6380 return (tmp1 & tmp2 & (kOneInEveryByte * 0x80)); 6369 return (tmp1 & tmp2 & (kOneInEveryByte * 0x80));
6381 } 6370 }
6382 6371
6383 6372
6384 #ifdef DEBUG 6373 #ifdef DEBUG
6385 static bool CheckFastAsciiConvert(char* dst, 6374 static bool CheckFastAsciiConvert(char* dst,
6386 char* src, 6375 const char* src,
6387 int length, 6376 int length,
6388 bool changed, 6377 bool changed,
6389 bool is_to_lower) { 6378 bool is_to_lower) {
6390 bool expected_changed = false; 6379 bool expected_changed = false;
6391 for (int i = 0; i < length; i++) { 6380 for (int i = 0; i < length; i++) {
6392 if (dst[i] == src[i]) continue; 6381 if (dst[i] == src[i]) continue;
6393 expected_changed = true; 6382 expected_changed = true;
6394 if (is_to_lower) { 6383 if (is_to_lower) {
6395 ASSERT('A' <= src[i] && src[i] <= 'Z'); 6384 ASSERT('A' <= src[i] && src[i] <= 'Z');
6396 ASSERT(dst[i] == src[i] + ('a' - 'A')); 6385 ASSERT(dst[i] == src[i] + ('a' - 'A'));
6397 } else { 6386 } else {
6398 ASSERT('a' <= src[i] && src[i] <= 'z'); 6387 ASSERT('a' <= src[i] && src[i] <= 'z');
6399 ASSERT(dst[i] == src[i] - ('a' - 'A')); 6388 ASSERT(dst[i] == src[i] - ('a' - 'A'));
6400 } 6389 }
6401 } 6390 }
6402 return (expected_changed == changed); 6391 return (expected_changed == changed);
6403 } 6392 }
6404 #endif 6393 #endif
6405 6394
6406 6395
6407 template<class Converter> 6396 template<class Converter>
6408 static bool FastAsciiConvert(char* dst, 6397 static bool FastAsciiConvert(char* dst,
6409 char* src, 6398 const char* src,
6410 int length, 6399 int length,
6411 bool* changed_out) { 6400 bool* changed_out) {
6412 #ifdef DEBUG 6401 #ifdef DEBUG
6413 char* saved_dst = dst; 6402 char* saved_dst = dst;
6414 char* saved_src = src; 6403 const char* saved_src = src;
6415 #endif 6404 #endif
6416 DisallowHeapAllocation no_gc; 6405 DisallowHeapAllocation no_gc;
6417 // We rely on the distance between upper and lower case letters 6406 // We rely on the distance between upper and lower case letters
6418 // being a known power of 2. 6407 // being a known power of 2.
6419 ASSERT('a' - 'A' == (1 << 5)); 6408 ASSERT('a' - 'A' == (1 << 5));
6420 // Boundaries for the range of input characters than require conversion. 6409 // Boundaries for the range of input characters than require conversion.
6421 static const char lo = Converter::kIsToLower ? 'A' - 1 : 'a' - 1; 6410 static const char lo = Converter::kIsToLower ? 'A' - 1 : 'a' - 1;
6422 static const char hi = Converter::kIsToLower ? 'Z' + 1 : 'z' + 1; 6411 static const char hi = Converter::kIsToLower ? 'Z' + 1 : 'z' + 1;
6423 bool changed = false; 6412 bool changed = false;
6424 uintptr_t or_acc = 0; 6413 uintptr_t or_acc = 0;
6425 char* const limit = src + length; 6414 const char* const limit = src + length;
6426 #ifdef V8_HOST_CAN_READ_UNALIGNED 6415 #ifdef V8_HOST_CAN_READ_UNALIGNED
6427 // Process the prefix of the input that requires no conversion one 6416 // Process the prefix of the input that requires no conversion one
6428 // (machine) word at a time. 6417 // (machine) word at a time.
6429 while (src <= limit - sizeof(uintptr_t)) { 6418 while (src <= limit - sizeof(uintptr_t)) {
6430 uintptr_t w = *reinterpret_cast<uintptr_t*>(src); 6419 const uintptr_t w = *reinterpret_cast<const uintptr_t*>(src);
6431 or_acc |= w; 6420 or_acc |= w;
6432 if (AsciiRangeMask(w, lo, hi) != 0) { 6421 if (AsciiRangeMask(w, lo, hi) != 0) {
6433 changed = true; 6422 changed = true;
6434 break; 6423 break;
6435 } 6424 }
6436 *reinterpret_cast<uintptr_t*>(dst) = w; 6425 *reinterpret_cast<uintptr_t*>(dst) = w;
6437 src += sizeof(uintptr_t); 6426 src += sizeof(uintptr_t);
6438 dst += sizeof(uintptr_t); 6427 dst += sizeof(uintptr_t);
6439 } 6428 }
6440 // Process the remainder of the input performing conversion when 6429 // Process the remainder of the input performing conversion when
6441 // required one word at a time. 6430 // required one word at a time.
6442 while (src <= limit - sizeof(uintptr_t)) { 6431 while (src <= limit - sizeof(uintptr_t)) {
6443 uintptr_t w = *reinterpret_cast<uintptr_t*>(src); 6432 const uintptr_t w = *reinterpret_cast<const uintptr_t*>(src);
6444 or_acc |= w; 6433 or_acc |= w;
6445 uintptr_t m = AsciiRangeMask(w, lo, hi); 6434 uintptr_t m = AsciiRangeMask(w, lo, hi);
6446 // The mask has high (7th) bit set in every byte that needs 6435 // The mask has high (7th) bit set in every byte that needs
6447 // conversion and we know that the distance between cases is 6436 // conversion and we know that the distance between cases is
6448 // 1 << 5. 6437 // 1 << 5.
6449 *reinterpret_cast<uintptr_t*>(dst) = w ^ (m >> 2); 6438 *reinterpret_cast<uintptr_t*>(dst) = w ^ (m >> 2);
6450 src += sizeof(uintptr_t); 6439 src += sizeof(uintptr_t);
6451 dst += sizeof(uintptr_t); 6440 dst += sizeof(uintptr_t);
6452 } 6441 }
6453 #endif 6442 #endif
(...skipping 22 matching lines...) Expand all
6476 } 6465 }
6477 6466
6478 } // namespace 6467 } // namespace
6479 6468
6480 6469
6481 template <class Converter> 6470 template <class Converter>
6482 MUST_USE_RESULT static MaybeObject* ConvertCase( 6471 MUST_USE_RESULT static MaybeObject* ConvertCase(
6483 Arguments args, 6472 Arguments args,
6484 Isolate* isolate, 6473 Isolate* isolate,
6485 unibrow::Mapping<Converter, 128>* mapping) { 6474 unibrow::Mapping<Converter, 128>* mapping) {
6486 SealHandleScope shs(isolate); 6475 HandleScope handle_scope(isolate);
6487 CONVERT_ARG_CHECKED(String, s, 0); 6476 CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
6488 s = s->TryFlattenGetString(); 6477 s = FlattenGetString(s);
6489 6478 int length = s->length();
6490 const int length = s->length();
6491 // Assume that the string is not empty; we need this assumption later 6479 // Assume that the string is not empty; we need this assumption later
6492 if (length == 0) return s; 6480 if (length == 0) return *s;
6493 6481
6494 // Simpler handling of ASCII strings. 6482 // Simpler handling of ASCII strings.
6495 // 6483 //
6496 // NOTE: This assumes that the upper/lower case of an ASCII 6484 // NOTE: This assumes that the upper/lower case of an ASCII
6497 // character is also ASCII. This is currently the case, but it 6485 // character is also ASCII. This is currently the case, but it
6498 // might break in the future if we implement more context and locale 6486 // might break in the future if we implement more context and locale
6499 // dependent upper/lower conversions. 6487 // dependent upper/lower conversions.
6500 if (s->IsSeqOneByteString()) { 6488 if (s->IsOneByteRepresentationUnderneath()) {
6501 Object* o; 6489 Handle<SeqOneByteString> result =
6502 { MaybeObject* maybe_o = isolate->heap()->AllocateRawOneByteString(length); 6490 isolate->factory()->NewRawOneByteString(length);
6503 if (!maybe_o->ToObject(&o)) return maybe_o; 6491
6504 } 6492 DisallowHeapAllocation no_gc;
6505 SeqOneByteString* result = SeqOneByteString::cast(o); 6493 String::FlatContent flat_content = s->GetFlatContent();
6494 ASSERT(flat_content.IsFlat());
6506 bool has_changed_character = false; 6495 bool has_changed_character = false;
6507 bool is_ascii = FastAsciiConvert<Converter>( 6496 bool is_ascii = FastAsciiConvert<Converter>(
6508 reinterpret_cast<char*>(result->GetChars()), 6497 reinterpret_cast<char*>(result->GetChars()),
6509 reinterpret_cast<char*>(SeqOneByteString::cast(s)->GetChars()), 6498 reinterpret_cast<const char*>(flat_content.ToOneByteVector().start()),
6510 length, 6499 length,
6511 &has_changed_character); 6500 &has_changed_character);
6512 // If not ASCII, we discard the result and take the 2 byte path. 6501 // If not ASCII, we discard the result and take the 2 byte path.
6513 if (is_ascii) { 6502 if (is_ascii) return has_changed_character ? *result : *s;
6514 return has_changed_character ? result : s;
6515 }
6516 } 6503 }
6517 6504
6518 String::Encoding result_encoding = s->IsOneByteRepresentation() 6505 Handle<SeqString> result;
6519 ? String::ONE_BYTE_ENCODING : String::TWO_BYTE_ENCODING; 6506 if (s->IsOneByteRepresentation()) {
6507 result = isolate->factory()->NewRawOneByteString(length);
6508 } else {
6509 result = isolate->factory()->NewRawTwoByteString(length);
6510 }
6511 MaybeObject* maybe = ConvertCaseHelper(isolate, *s, *result, length, mapping);
6520 Object* answer; 6512 Object* answer;
6521 { MaybeObject* maybe_answer = ConvertCaseHelper( 6513 if (!maybe->ToObject(&answer)) return maybe;
6522 isolate, s, result_encoding, length, length, mapping); 6514 if (answer->IsString()) return answer;
6523 if (!maybe_answer->ToObject(&answer)) return maybe_answer; 6515
6516 ASSERT(answer->IsSmi());
6517 length = Smi::cast(answer)->value();
6518 if (s->IsOneByteRepresentation() && length > 0) {
6519 result = isolate->factory()->NewRawOneByteString(length);
6520 } else {
6521 if (length < 0) length = -length;
6522 result = isolate->factory()->NewRawTwoByteString(length);
6524 } 6523 }
6525 if (answer->IsSmi()) { 6524 return ConvertCaseHelper(isolate, *s, *result, length, mapping);
6526 int new_length = Smi::cast(answer)->value();
6527 if (new_length < 0) {
6528 result_encoding = String::TWO_BYTE_ENCODING;
6529 new_length = -new_length;
6530 }
6531 MaybeObject* maybe_answer = ConvertCaseHelper(
6532 isolate, s, result_encoding, new_length, length, mapping);
6533 if (!maybe_answer->ToObject(&answer)) return maybe_answer;
6534 }
6535 return answer;
6536 } 6525 }
6537 6526
6538 6527
6539 RUNTIME_FUNCTION(MaybeObject*, Runtime_StringToLowerCase) { 6528 RUNTIME_FUNCTION(MaybeObject*, Runtime_StringToLowerCase) {
6540 return ConvertCase( 6529 return ConvertCase(
6541 args, isolate, isolate->runtime_state()->to_lower_mapping()); 6530 args, isolate, isolate->runtime_state()->to_lower_mapping());
6542 } 6531 }
6543 6532
6544 6533
6545 RUNTIME_FUNCTION(MaybeObject*, Runtime_StringToUpperCase) { 6534 RUNTIME_FUNCTION(MaybeObject*, Runtime_StringToUpperCase) {
(...skipping 8419 matching lines...) Expand 10 before | Expand all | Expand 10 after
14965 // Handle last resort GC and make sure to allow future allocations 14954 // Handle last resort GC and make sure to allow future allocations
14966 // to grow the heap without causing GCs (if possible). 14955 // to grow the heap without causing GCs (if possible).
14967 isolate->counters()->gc_last_resort_from_js()->Increment(); 14956 isolate->counters()->gc_last_resort_from_js()->Increment();
14968 isolate->heap()->CollectAllGarbage(Heap::kNoGCFlags, 14957 isolate->heap()->CollectAllGarbage(Heap::kNoGCFlags,
14969 "Runtime::PerformGC"); 14958 "Runtime::PerformGC");
14970 } 14959 }
14971 } 14960 }
14972 14961
14973 14962
14974 } } // namespace v8::internal 14963 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « no previous file | test/mjsunit/string-case.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698