src/runtime.cc - Issue 180063002: Use fast path for sliced and external strings in ConvertCase.

Side by Side Diff: src/runtime.cc

Issue 180063002: Use fast path for sliced and external strings in ConvertCase. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge

Patch Set: Created 6 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2012 the V8 project authors. All rights reserved.	1 // Copyright 2012 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 6227 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
6238 str, ALLOW_TRAILING_JUNK, OS::nan_value());	6238 str, ALLOW_TRAILING_JUNK, OS::nan_value());

6239	6239

6240 // Create a number object from the value.	6240 // Create a number object from the value.

6241 return isolate->heap()->NumberFromDouble(value);	6241 return isolate->heap()->NumberFromDouble(value);

6242 }	6242 }

6243	6243

6244	6244

6245 template <class Converter>	6245 template <class Converter>

6246 MUST_USE_RESULT static MaybeObject* ConvertCaseHelper(	6246 MUST_USE_RESULT static MaybeObject* ConvertCaseHelper(

6247 Isolate* isolate,	6247 Isolate* isolate,

6248 String* s,	6248 String* string,

6249 String::Encoding result_encoding,	6249 SeqString* result,

6250 int length,	6250 int result_length,

6251 int input_string_length,

6252 unibrow::Mapping<Converter, 128>* mapping) {	6251 unibrow::Mapping<Converter, 128>* mapping) {

	6252 DisallowHeapAllocation no_gc;

6253 // We try this twice, once with the assumption that the result is no longer	6253 // We try this twice, once with the assumption that the result is no longer

6254 // than the input and, if that assumption breaks, again with the exact	6254 // than the input and, if that assumption breaks, again with the exact

6255 // length. This may not be pretty, but it is nicer than what was here before	6255 // length. This may not be pretty, but it is nicer than what was here before

6256 // and I hereby claim my vaffel-is.	6256 // and I hereby claim my vaffel-is.

6257 //	6257 //

6258 // Allocate the resulting string.

6259 //

6260 // NOTE: This assumes that the upper/lower case of an ASCII	6258 // NOTE: This assumes that the upper/lower case of an ASCII

6261 // character is also ASCII. This is currently the case, but it	6259 // character is also ASCII. This is currently the case, but it

6262 // might break in the future if we implement more context and locale	6260 // might break in the future if we implement more context and locale

6263 // dependent upper/lower conversions.	6261 // dependent upper/lower conversions.

6264 Object* o;

6265 { MaybeObject* maybe_o = result_encoding == String::ONE_BYTE_ENCODING

6266 ? isolate->heap()->AllocateRawOneByteString(length)

6267 : isolate->heap()->AllocateRawTwoByteString(length);

6268 if (!maybe_o->ToObject(&o)) return maybe_o;

6269 }

6270 String* result = String::cast(o);

6271 bool has_changed_character = false;	6262 bool has_changed_character = false;

6272	6263

6273 DisallowHeapAllocation no_gc;

6274

6275 // Convert all characters to upper case, assuming that they will fit	6264 // Convert all characters to upper case, assuming that they will fit

6276 // in the buffer	6265 // in the buffer

6277 Access<ConsStringIteratorOp> op(	6266 Access<ConsStringIteratorOp> op(

6278 isolate->runtime_state()->string_iterator());	6267 isolate->runtime_state()->string_iterator());

6279 StringCharacterStream stream(s, op.value());	6268 StringCharacterStream stream(string, op.value());

6280 unibrow::uchar chars[Converter::kMaxWidth];	6269 unibrow::uchar chars[Converter::kMaxWidth];

6281 // We can assume that the string is not empty	6270 // We can assume that the string is not empty

6282 uc32 current = stream.GetNext();	6271 uc32 current = stream.GetNext();

6283 // y with umlauts is the only character that stops fitting into one-byte	6272 // y with umlauts is the only character that stops fitting into one-byte

6284 // when converting to uppercase.	6273 // when converting to uppercase.

6285 static const uc32 yuml_code = 0xff;	6274 static const uc32 yuml_code = 0xff;

6286 bool ignore_yuml = result->IsSeqTwoByteString() \|\| Converter::kIsToLower;	6275 bool ignore_yuml = result->IsSeqTwoByteString() \|\| Converter::kIsToLower;

6287 for (int i = 0; i < length;) {	6276 for (int i = 0; i < result_length;) {

6288 bool has_next = stream.HasMore();	6277 bool has_next = stream.HasMore();

6289 uc32 next = has_next ? stream.GetNext() : 0;	6278 uc32 next = has_next ? stream.GetNext() : 0;

6290 int char_length = mapping->get(current, next, chars);	6279 int char_length = mapping->get(current, next, chars);

6291 if (char_length == 0) {	6280 if (char_length == 0) {

6292 // The case conversion of this character is the character itself.	6281 // The case conversion of this character is the character itself.

6293 result->Set(i, current);	6282 result->Set(i, current);

6294 i++;	6283 i++;

6295 } else if (char_length == 1 && (ignore_yuml \|\| current != yuml_code)) {	6284 } else if (char_length == 1 && (ignore_yuml \|\| current != yuml_code)) {

6296 // Common case: converting the letter resulted in one character.	6285 // Common case: converting the letter resulted in one character.

6297 ASSERT(static_cast<uc32>(chars[0]) != current);	6286 ASSERT(static_cast<uc32>(chars[0]) != current);

6298 result->Set(i, chars[0]);	6287 result->Set(i, chars[0]);

6299 has_changed_character = true;	6288 has_changed_character = true;

6300 i++;	6289 i++;

6301 } else if (length == input_string_length) {	6290 } else if (result_length == string->length()) {

6302 bool found_yuml = (current == yuml_code);	6291 bool found_yuml = (current == yuml_code);

6303 // We've assumed that the result would be as long as the	6292 // We've assumed that the result would be as long as the

6304 // input but here is a character that converts to several	6293 // input but here is a character that converts to several

6305 // characters. No matter, we calculate the exact length	6294 // characters. No matter, we calculate the exact length

6306 // of the result and try the whole thing again.	6295 // of the result and try the whole thing again.

6307 //	6296 //

6308 // Note that this leaves room for optimization. We could just	6297 // Note that this leaves room for optimization. We could just

6309 // memcpy what we already have to the result string. Also,	6298 // memcpy what we already have to the result string. Also,

6310 // the result string is the last object allocated we could	6299 // the result string is the last object allocated we could

6311 // "realloc" it and probably, in the vast majority of cases,	6300 // "realloc" it and probably, in the vast majority of cases,

(...skipping 33 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
6345 }	6334 }

6346 current = next;	6335 current = next;

6347 }	6336 }

6348 if (has_changed_character) {	6337 if (has_changed_character) {

6349 return result;	6338 return result;

6350 } else {	6339 } else {

6351 // If we didn't actually change anything in doing the conversion	6340 // If we didn't actually change anything in doing the conversion

6352 // we simple return the result and let the converted string	6341 // we simple return the result and let the converted string

6353 // become garbage; there is no reason to keep two identical strings	6342 // become garbage; there is no reason to keep two identical strings

6354 // alive.	6343 // alive.

6355 return s;	6344 return string;

6356 }	6345 }

6357 }	6346 }

6358	6347

6359	6348

6360 namespace {	6349 namespace {

6361	6350

6362 static const uintptr_t kOneInEveryByte = kUintptrAllBitsSet / 0xFF;	6351 static const uintptr_t kOneInEveryByte = kUintptrAllBitsSet / 0xFF;

6363 static const uintptr_t kAsciiMask = kOneInEveryByte << 7;	6352 static const uintptr_t kAsciiMask = kOneInEveryByte << 7;

6364	6353

6365 // Given a word and two range boundaries returns a word with high bit	6354 // Given a word and two range boundaries returns a word with high bit

(...skipping 10 matching lines...) Expand all Loading...
6376 // Has high bit set in every w byte less than n.	6365 // Has high bit set in every w byte less than n.

6377 uintptr_t tmp1 = kOneInEveryByte * (0x7F + n) - w;	6366 uintptr_t tmp1 = kOneInEveryByte * (0x7F + n) - w;

6378 // Has high bit set in every w byte greater than m.	6367 // Has high bit set in every w byte greater than m.

6379 uintptr_t tmp2 = w + kOneInEveryByte * (0x7F - m);	6368 uintptr_t tmp2 = w + kOneInEveryByte * (0x7F - m);

6380 return (tmp1 & tmp2 & (kOneInEveryByte * 0x80));	6369 return (tmp1 & tmp2 & (kOneInEveryByte * 0x80));

6381 }	6370 }

6382	6371

6383	6372

6384 #ifdef DEBUG	6373 #ifdef DEBUG

6385 static bool CheckFastAsciiConvert(char* dst,	6374 static bool CheckFastAsciiConvert(char* dst,

6386 char* src,	6375 const char* src,

6387 int length,	6376 int length,

6388 bool changed,	6377 bool changed,

6389 bool is_to_lower) {	6378 bool is_to_lower) {

6390 bool expected_changed = false;	6379 bool expected_changed = false;

6391 for (int i = 0; i < length; i++) {	6380 for (int i = 0; i < length; i++) {

6392 if (dst[i] == src[i]) continue;	6381 if (dst[i] == src[i]) continue;

6393 expected_changed = true;	6382 expected_changed = true;

6394 if (is_to_lower) {	6383 if (is_to_lower) {

6395 ASSERT('A' <= src[i] && src[i] <= 'Z');	6384 ASSERT('A' <= src[i] && src[i] <= 'Z');

6396 ASSERT(dst[i] == src[i] + ('a' - 'A'));	6385 ASSERT(dst[i] == src[i] + ('a' - 'A'));

6397 } else {	6386 } else {

6398 ASSERT('a' <= src[i] && src[i] <= 'z');	6387 ASSERT('a' <= src[i] && src[i] <= 'z');

6399 ASSERT(dst[i] == src[i] - ('a' - 'A'));	6388 ASSERT(dst[i] == src[i] - ('a' - 'A'));

6400 }	6389 }

6401 }	6390 }

6402 return (expected_changed == changed);	6391 return (expected_changed == changed);

6403 }	6392 }

6404 #endif	6393 #endif

6405	6394

6406	6395

6407 template<class Converter>	6396 template<class Converter>

6408 static bool FastAsciiConvert(char* dst,	6397 static bool FastAsciiConvert(char* dst,

6409 char* src,	6398 const char* src,

6410 int length,	6399 int length,

6411 bool* changed_out) {	6400 bool* changed_out) {

6412 #ifdef DEBUG	6401 #ifdef DEBUG

6413 char* saved_dst = dst;	6402 char* saved_dst = dst;

6414 char* saved_src = src;	6403 const char* saved_src = src;

6415 #endif	6404 #endif

6416 DisallowHeapAllocation no_gc;	6405 DisallowHeapAllocation no_gc;

6417 // We rely on the distance between upper and lower case letters	6406 // We rely on the distance between upper and lower case letters

6418 // being a known power of 2.	6407 // being a known power of 2.

6419 ASSERT('a' - 'A' == (1 << 5));	6408 ASSERT('a' - 'A' == (1 << 5));

6420 // Boundaries for the range of input characters than require conversion.	6409 // Boundaries for the range of input characters than require conversion.

6421 static const char lo = Converter::kIsToLower ? 'A' - 1 : 'a' - 1;	6410 static const char lo = Converter::kIsToLower ? 'A' - 1 : 'a' - 1;

6422 static const char hi = Converter::kIsToLower ? 'Z' + 1 : 'z' + 1;	6411 static const char hi = Converter::kIsToLower ? 'Z' + 1 : 'z' + 1;

6423 bool changed = false;	6412 bool changed = false;

6424 uintptr_t or_acc = 0;	6413 uintptr_t or_acc = 0;

6425 char* const limit = src + length;	6414 const char* const limit = src + length;

6426 #ifdef V8_HOST_CAN_READ_UNALIGNED	6415 #ifdef V8_HOST_CAN_READ_UNALIGNED

6427 // Process the prefix of the input that requires no conversion one	6416 // Process the prefix of the input that requires no conversion one

6428 // (machine) word at a time.	6417 // (machine) word at a time.

6429 while (src <= limit - sizeof(uintptr_t)) {	6418 while (src <= limit - sizeof(uintptr_t)) {

6430 uintptr_t w = reinterpret_cast<uintptr_t>(src);	6419 const uintptr_t w = reinterpret_cast<const uintptr_t>(src);

6431 or_acc \|= w;	6420 or_acc \|= w;

6432 if (AsciiRangeMask(w, lo, hi) != 0) {	6421 if (AsciiRangeMask(w, lo, hi) != 0) {

6433 changed = true;	6422 changed = true;

6434 break;	6423 break;

6435 }	6424 }

6436 reinterpret_cast<uintptr_t>(dst) = w;	6425 reinterpret_cast<uintptr_t>(dst) = w;

6437 src += sizeof(uintptr_t);	6426 src += sizeof(uintptr_t);

6438 dst += sizeof(uintptr_t);	6427 dst += sizeof(uintptr_t);

6439 }	6428 }

6440 // Process the remainder of the input performing conversion when	6429 // Process the remainder of the input performing conversion when

6441 // required one word at a time.	6430 // required one word at a time.

6442 while (src <= limit - sizeof(uintptr_t)) {	6431 while (src <= limit - sizeof(uintptr_t)) {

6443 uintptr_t w = reinterpret_cast<uintptr_t>(src);	6432 const uintptr_t w = reinterpret_cast<const uintptr_t>(src);

6444 or_acc \|= w;	6433 or_acc \|= w;

6445 uintptr_t m = AsciiRangeMask(w, lo, hi);	6434 uintptr_t m = AsciiRangeMask(w, lo, hi);

6446 // The mask has high (7th) bit set in every byte that needs	6435 // The mask has high (7th) bit set in every byte that needs

6447 // conversion and we know that the distance between cases is	6436 // conversion and we know that the distance between cases is

6448 // 1 << 5.	6437 // 1 << 5.

6449 reinterpret_cast<uintptr_t>(dst) = w ^ (m >> 2);	6438 reinterpret_cast<uintptr_t>(dst) = w ^ (m >> 2);

6450 src += sizeof(uintptr_t);	6439 src += sizeof(uintptr_t);

6451 dst += sizeof(uintptr_t);	6440 dst += sizeof(uintptr_t);

6452 }	6441 }

6453 #endif	6442 #endif

(...skipping 22 matching lines...) Expand all Loading...
6476 }	6465 }

6477	6466

6478 } // namespace	6467 } // namespace

6479	6468

6480	6469

6481 template <class Converter>	6470 template <class Converter>

6482 MUST_USE_RESULT static MaybeObject* ConvertCase(	6471 MUST_USE_RESULT static MaybeObject* ConvertCase(

6483 Arguments args,	6472 Arguments args,

6484 Isolate* isolate,	6473 Isolate* isolate,

6485 unibrow::Mapping<Converter, 128>* mapping) {	6474 unibrow::Mapping<Converter, 128>* mapping) {

6486 SealHandleScope shs(isolate);	6475 HandleScope handle_scope(isolate);

6487 CONVERT_ARG_CHECKED(String, s, 0);	6476 CONVERT_ARG_HANDLE_CHECKED(String, s, 0);

6488 s = s->TryFlattenGetString();	6477 s = FlattenGetString(s);

6489	6478 int length = s->length();

6490 const int length = s->length();

6491 // Assume that the string is not empty; we need this assumption later	6479 // Assume that the string is not empty; we need this assumption later

6492 if (length == 0) return s;	6480 if (length == 0) return *s;

6493	6481

6494 // Simpler handling of ASCII strings.	6482 // Simpler handling of ASCII strings.

6495 //	6483 //

6496 // NOTE: This assumes that the upper/lower case of an ASCII	6484 // NOTE: This assumes that the upper/lower case of an ASCII

6497 // character is also ASCII. This is currently the case, but it	6485 // character is also ASCII. This is currently the case, but it

6498 // might break in the future if we implement more context and locale	6486 // might break in the future if we implement more context and locale

6499 // dependent upper/lower conversions.	6487 // dependent upper/lower conversions.

6500 if (s->IsSeqOneByteString()) {	6488 if (s->IsOneByteRepresentationUnderneath()) {

6501 Object* o;	6489 Handle<SeqOneByteString> result =

6502 { MaybeObject* maybe_o = isolate->heap()->AllocateRawOneByteString(length);	6490 isolate->factory()->NewRawOneByteString(length);

6503 if (!maybe_o->ToObject(&o)) return maybe_o;	6491

6504 }	6492 DisallowHeapAllocation no_gc;

6505 SeqOneByteString* result = SeqOneByteString::cast(o);	6493 String::FlatContent flat_content = s->GetFlatContent();

	6494 ASSERT(flat_content.IsFlat());

6506 bool has_changed_character = false;	6495 bool has_changed_character = false;

6507 bool is_ascii = FastAsciiConvert<Converter>(	6496 bool is_ascii = FastAsciiConvert<Converter>(

6508 reinterpret_cast<char*>(result->GetChars()),	6497 reinterpret_cast<char*>(result->GetChars()),

6509 reinterpret_cast<char*>(SeqOneByteString::cast(s)->GetChars()),	6498 reinterpret_cast<const char*>(flat_content.ToOneByteVector().start()),

6510 length,	6499 length,

6511 &has_changed_character);	6500 &has_changed_character);

6512 // If not ASCII, we discard the result and take the 2 byte path.	6501 // If not ASCII, we discard the result and take the 2 byte path.

6513 if (is_ascii) {	6502 if (is_ascii) return has_changed_character ? result : s;

6514 return has_changed_character ? result : s;

6515 }

6516 }	6503 }

6517	6504

6518 String::Encoding result_encoding = s->IsOneByteRepresentation()	6505 Handle<SeqString> result;

6519 ? String::ONE_BYTE_ENCODING : String::TWO_BYTE_ENCODING;	6506 if (s->IsOneByteRepresentation()) {

	6507 result = isolate->factory()->NewRawOneByteString(length);

	6508 } else {

	6509 result = isolate->factory()->NewRawTwoByteString(length);

	6510 }

	6511 MaybeObject* maybe = ConvertCaseHelper(isolate, s, result, length, mapping);

6520 Object* answer;	6512 Object* answer;

6521 { MaybeObject* maybe_answer = ConvertCaseHelper(	6513 if (!maybe->ToObject(&answer)) return maybe;

6522 isolate, s, result_encoding, length, length, mapping);	6514 if (answer->IsString()) return answer;

6523 if (!maybe_answer->ToObject(&answer)) return maybe_answer;	6515

	6516 ASSERT(answer->IsSmi());

	6517 length = Smi::cast(answer)->value();

	6518 if (s->IsOneByteRepresentation() && length > 0) {

	6519 result = isolate->factory()->NewRawOneByteString(length);

	6520 } else {

	6521 if (length < 0) length = -length;

	6522 result = isolate->factory()->NewRawTwoByteString(length);

6524 }	6523 }

6525 if (answer->IsSmi()) {	6524 return ConvertCaseHelper(isolate, s, result, length, mapping);

6526 int new_length = Smi::cast(answer)->value();

6527 if (new_length < 0) {

6528 result_encoding = String::TWO_BYTE_ENCODING;

6529 new_length = -new_length;

6530 }

6531 MaybeObject* maybe_answer = ConvertCaseHelper(

6532 isolate, s, result_encoding, new_length, length, mapping);

6533 if (!maybe_answer->ToObject(&answer)) return maybe_answer;

6534 }

6535 return answer;

6536 }	6525 }

6537	6526

6538	6527

6539 RUNTIME_FUNCTION(MaybeObject*, Runtime_StringToLowerCase) {	6528 RUNTIME_FUNCTION(MaybeObject*, Runtime_StringToLowerCase) {

6540 return ConvertCase(	6529 return ConvertCase(

6541 args, isolate, isolate->runtime_state()->to_lower_mapping());	6530 args, isolate, isolate->runtime_state()->to_lower_mapping());

6542 }	6531 }

6543	6532

6544	6533

6545 RUNTIME_FUNCTION(MaybeObject*, Runtime_StringToUpperCase) {	6534 RUNTIME_FUNCTION(MaybeObject*, Runtime_StringToUpperCase) {

(...skipping 8419 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
14965 // Handle last resort GC and make sure to allow future allocations	14954 // Handle last resort GC and make sure to allow future allocations

14966 // to grow the heap without causing GCs (if possible).	14955 // to grow the heap without causing GCs (if possible).

14967 isolate->counters()->gc_last_resort_from_js()->Increment();	14956 isolate->counters()->gc_last_resort_from_js()->Increment();

14968 isolate->heap()->CollectAllGarbage(Heap::kNoGCFlags,	14957 isolate->heap()->CollectAllGarbage(Heap::kNoGCFlags,

14969 "Runtime::PerformGC");	14958 "Runtime::PerformGC");

14970 }	14959 }

14971 }	14960 }

14972	14961

14973	14962

14974 } } // namespace v8::internal	14963 } } // namespace v8::internal

OLD	NEW

« no previous file with comments | « no previous file | test/mjsunit/string-case.js » ('j') | no next file with comments »