Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(385)

Side by Side Diff: src/runtime.cc

Issue 4189001: Faster ascii string case conversion. (Closed)
Patch Set: Fix typo Created 10 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2006-2009 the V8 project authors. All rights reserved. 1 // Copyright 2006-2009 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 4618 matching lines...) Expand 10 before | Expand all | Expand 10 after
4629 // we simple return the result and let the converted string 4629 // we simple return the result and let the converted string
4630 // become garbage; there is no reason to keep two identical strings 4630 // become garbage; there is no reason to keep two identical strings
4631 // alive. 4631 // alive.
4632 return s; 4632 return s;
4633 } 4633 }
4634 } 4634 }
4635 4635
4636 4636
4637 namespace { 4637 namespace {
4638 4638
4639 static const uintptr_t kOneInEveryByte = kUintptrAllBitsSet / 0xFF;
4640
4641
4642 // Given a word and two range boundaries returns a word with high bit
4643 // set in every byte iff the corresponding input byte was strictly in
4644 // the range (m, n). All the other bits in the result are cleared.
antonm 2010/10/26 17:17:33 maybe follow more standard [;) intervals
Vitaly Repeshko 2010/10/26 18:14:48 I want to make usages of m and n similar, so eithe
4645 // This function is only useful when it can be inlined and the
4646 // boundaries are statically known.
4647 // Requires: the input word and the boundaries must be ascii.
antonm 2010/10/26 17:17:33 I think ascii requires additional clarification.
Vitaly Repeshko 2010/10/26 18:14:48 Agreed. Extended the comment.
4648 static inline uintptr_t AsciiRangeMask(uintptr_t w, char m, char n) {
4649 // Every byte in an ascii string is less than or equal to 0x7F.
4650 ASSERT((w & (kOneInEveryByte * 0x7F)) == w);
4651 ASSERT(0 < m && m < n && n < 0x7F);
4652 // Has high bit set in every w byte less than n.
4653 uintptr_t tmp1 = kOneInEveryByte * (0x7F + n) - w;
4654 // Has high bit set in every w byte greater than m.
4655 uintptr_t tmp2 = w + kOneInEveryByte * (0x7F - m);
4656 return (tmp1 & tmp2 & (kOneInEveryByte * 0x80));
4657 }
4658
4659
4660 enum AsciiCaseConversion {
4661 ASCII_TO_LOWER,
4662 ASCII_TO_UPPER
4663 };
4664
4665
4666 template <AsciiCaseConversion dir>
4667 struct FastAsciiConverter {
4668 static bool Convert(char* dst, char* src, int length) {
4669 #ifdef DEBUG
4670 char* saved_dst = dst;
4671 char* saved_src = src;
4672 #endif
4673 // We rely on the distance between lower and upper case letters
4674 // being a power of 2.
4675 ASSERT('a' - 'A' == (1 << 5));
4676 // Boundaries for the range of input characters than require conversion.
4677 const char lo = (dir == ASCII_TO_LOWER) ? 'A' - 1 : 'a' - 1;
4678 const char hi = (dir == ASCII_TO_LOWER) ? 'Z' + 1 : 'z' + 1;
4679 bool changed = false;
4680 char* const limit = src + length;
4681 #ifdef V8_HOST_CAN_READ_UNALIGNED
4682 // Process the prefix of the input that requires no conversion one
4683 // (machine) word at a time.
4684 while (src <= limit - sizeof(uintptr_t)) {
antonm 2010/10/26 17:17:33 Maybe convert src to uintptr_t*? That would allow
Vitaly Repeshko 2010/10/26 18:14:48 Ahh, I'm really afraid of the compiler emitting di
4685 uintptr_t w = *reinterpret_cast<uintptr_t*>(src);
4686 if (AsciiRangeMask(w, lo, hi) != 0) {
4687 changed = true;
4688 break;
4689 }
4690 *reinterpret_cast<uintptr_t*>(dst) = w;
4691 src += sizeof(uintptr_t);
4692 dst += sizeof(uintptr_t);
4693 }
4694 // Process the remainder of the input performing conversion when
4695 // required one word at a time.
4696 while (src <= limit - sizeof(uintptr_t)) {
4697 uintptr_t w = *reinterpret_cast<uintptr_t*>(src);
4698 uintptr_t m = AsciiRangeMask(w, lo, hi);
4699 // The mask has high (7th) bit set in every byte that needs
4700 // conversion and we know that the distance between cases is
4701 // 1 << 5.
4702 *reinterpret_cast<uintptr_t*>(dst) = w ^ (m >> 2);
4703 src += sizeof(uintptr_t);
4704 dst += sizeof(uintptr_t);
4705 }
4706 #endif
4707 // Process the last few bytes of the input (or the whole input if
4708 // unaligned accesses are not supported).
4709 while (src < limit) {
4710 char c = *src;
4711 if (lo < c && c < hi) {
4712 c ^= (1 << 5);
4713 changed = true;
4714 }
4715 *dst = c;
4716 ++src;
4717 ++dst;
4718 }
4719 #ifdef DEBUG
4720 CheckConvert(saved_dst, saved_src, length, changed);
4721 #endif
4722 return changed;
4723 }
4724
4725 #ifdef DEBUG
4726 static void CheckConvert(char* dst, char* src, int length, bool changed) {
4727 bool expected_changed = false;
4728 for (int i = 0; i < length; i++) {
4729 if (dst[i] == src[i]) continue;
4730 expected_changed = true;
4731 if (dir == ASCII_TO_LOWER) {
4732 ASSERT('A' <= src[i] && src[i] <= 'Z');
4733 ASSERT(dst[i] == src[i] + ('a' - 'A'));
4734 } else {
4735 ASSERT(dir == ASCII_TO_UPPER);
4736 ASSERT('a' <= src[i] && src[i] <= 'z');
4737 ASSERT(dst[i] == src[i] - ('a' - 'A'));
4738 }
4739 }
4740 ASSERT(expected_changed == changed);
4741 }
4742 #endif
4743 };
4744
4745
4639 struct ToLowerTraits { 4746 struct ToLowerTraits {
4640 typedef unibrow::ToLowercase UnibrowConverter; 4747 typedef unibrow::ToLowercase UnibrowConverter;
4641 4748
4642 static bool ConvertAscii(char* dst, char* src, int length) { 4749 typedef FastAsciiConverter<ASCII_TO_LOWER> AsciiConverter;
4643 bool changed = false;
4644 for (int i = 0; i < length; ++i) {
4645 char c = src[i];
4646 if ('A' <= c && c <= 'Z') {
4647 c += ('a' - 'A');
4648 changed = true;
4649 }
4650 dst[i] = c;
4651 }
4652 return changed;
4653 }
4654 }; 4750 };
4655 4751
4656 4752
4657 struct ToUpperTraits { 4753 struct ToUpperTraits {
4658 typedef unibrow::ToUppercase UnibrowConverter; 4754 typedef unibrow::ToUppercase UnibrowConverter;
4659 4755
4660 static bool ConvertAscii(char* dst, char* src, int length) { 4756 typedef FastAsciiConverter<ASCII_TO_UPPER> AsciiConverter;
4661 bool changed = false;
4662 for (int i = 0; i < length; ++i) {
4663 char c = src[i];
4664 if ('a' <= c && c <= 'z') {
4665 c -= ('a' - 'A');
4666 changed = true;
4667 }
4668 dst[i] = c;
4669 }
4670 return changed;
4671 }
4672 }; 4757 };
4673 4758
4674 } // namespace 4759 } // namespace
4675 4760
4676 4761
4677 template <typename ConvertTraits> 4762 template <typename ConvertTraits>
4678 static Object* ConvertCase( 4763 static Object* ConvertCase(
4679 Arguments args, 4764 Arguments args,
4680 unibrow::Mapping<typename ConvertTraits::UnibrowConverter, 128>* mapping) { 4765 unibrow::Mapping<typename ConvertTraits::UnibrowConverter, 128>* mapping) {
4681 NoHandleAllocation ha; 4766 NoHandleAllocation ha;
4682 CONVERT_CHECKED(String, s, args[0]); 4767 CONVERT_CHECKED(String, s, args[0]);
4683 s = s->TryFlattenGetString(); 4768 s = s->TryFlattenGetString();
4684 4769
4685 const int length = s->length(); 4770 const int length = s->length();
4686 // Assume that the string is not empty; we need this assumption later 4771 // Assume that the string is not empty; we need this assumption later
4687 if (length == 0) return s; 4772 if (length == 0) return s;
4688 4773
4689 // Simpler handling of ascii strings. 4774 // Simpler handling of ascii strings.
4690 // 4775 //
4691 // NOTE: This assumes that the upper/lower case of an ascii 4776 // NOTE: This assumes that the upper/lower case of an ascii
4692 // character is also ascii. This is currently the case, but it 4777 // character is also ascii. This is currently the case, but it
4693 // might break in the future if we implement more context and locale 4778 // might break in the future if we implement more context and locale
4694 // dependent upper/lower conversions. 4779 // dependent upper/lower conversions.
4695 if (s->IsSeqAsciiString()) { 4780 if (s->IsSeqAsciiString()) {
4696 Object* o = Heap::AllocateRawAsciiString(length); 4781 Object* o = Heap::AllocateRawAsciiString(length);
4697 if (o->IsFailure()) return o; 4782 if (o->IsFailure()) return o;
4698 SeqAsciiString* result = SeqAsciiString::cast(o); 4783 SeqAsciiString* result = SeqAsciiString::cast(o);
4699 bool has_changed_character = ConvertTraits::ConvertAscii( 4784 bool has_changed_character = ConvertTraits::AsciiConverter::Convert(
4700 result->GetChars(), SeqAsciiString::cast(s)->GetChars(), length); 4785 result->GetChars(), SeqAsciiString::cast(s)->GetChars(), length);
4701 return has_changed_character ? result : s; 4786 return has_changed_character ? result : s;
4702 } 4787 }
4703 4788
4704 Object* answer = ConvertCaseHelper(s, length, length, mapping); 4789 Object* answer = ConvertCaseHelper(s, length, length, mapping);
4705 if (answer->IsSmi()) { 4790 if (answer->IsSmi()) {
4706 // Retry with correct length. 4791 // Retry with correct length.
4707 answer = ConvertCaseHelper(s, Smi::cast(answer)->value(), length, mapping); 4792 answer = ConvertCaseHelper(s, Smi::cast(answer)->value(), length, mapping);
4708 } 4793 }
4709 return answer; // This may be a failure. 4794 return answer; // This may be a failure.
(...skipping 5440 matching lines...) Expand 10 before | Expand all | Expand 10 after
10150 } else { 10235 } else {
10151 // Handle last resort GC and make sure to allow future allocations 10236 // Handle last resort GC and make sure to allow future allocations
10152 // to grow the heap without causing GCs (if possible). 10237 // to grow the heap without causing GCs (if possible).
10153 Counters::gc_last_resort_from_js.Increment(); 10238 Counters::gc_last_resort_from_js.Increment();
10154 Heap::CollectAllGarbage(false); 10239 Heap::CollectAllGarbage(false);
10155 } 10240 }
10156 } 10241 }
10157 10242
10158 10243
10159 } } // namespace v8::internal 10244 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « src/globals.h ('k') | test/mjsunit/string-case.js » ('j') | test/mjsunit/string-case.js » ('J')

Powered by Google App Engine
This is Rietveld 408576698