OLD | NEW |
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 5657 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5668 // become garbage; there is no reason to keep two identical strings | 5668 // become garbage; there is no reason to keep two identical strings |
5669 // alive. | 5669 // alive. |
5670 return s; | 5670 return s; |
5671 } | 5671 } |
5672 } | 5672 } |
5673 | 5673 |
5674 | 5674 |
5675 namespace { | 5675 namespace { |
5676 | 5676 |
5677 static const uintptr_t kOneInEveryByte = kUintptrAllBitsSet / 0xFF; | 5677 static const uintptr_t kOneInEveryByte = kUintptrAllBitsSet / 0xFF; |
5678 #ifdef ENABLE_LATIN_1 | |
5679 static const uintptr_t kAsciiMask = kOneInEveryByte << 7; | 5678 static const uintptr_t kAsciiMask = kOneInEveryByte << 7; |
5680 #endif | |
5681 | 5679 |
5682 // Given a word and two range boundaries returns a word with high bit | 5680 // Given a word and two range boundaries returns a word with high bit |
5683 // set in every byte iff the corresponding input byte was strictly in | 5681 // set in every byte iff the corresponding input byte was strictly in |
5684 // the range (m, n). All the other bits in the result are cleared. | 5682 // the range (m, n). All the other bits in the result are cleared. |
5685 // This function is only useful when it can be inlined and the | 5683 // This function is only useful when it can be inlined and the |
5686 // boundaries are statically known. | 5684 // boundaries are statically known. |
5687 // Requires: all bytes in the input word and the boundaries must be | 5685 // Requires: all bytes in the input word and the boundaries must be |
5688 // ASCII (less than 0x7F). | 5686 // ASCII (less than 0x7F). |
5689 static inline uintptr_t AsciiRangeMask(uintptr_t w, char m, char n) { | 5687 static inline uintptr_t AsciiRangeMask(uintptr_t w, char m, char n) { |
5690 // Use strict inequalities since in edge cases the function could be | 5688 // Use strict inequalities since in edge cases the function could be |
5691 // further simplified. | 5689 // further simplified. |
5692 ASSERT(0 < m && m < n); | 5690 ASSERT(0 < m && m < n); |
5693 #ifndef ENABLE_LATIN_1 | |
5694 // Every byte in an ASCII string is less than or equal to 0x7F. | |
5695 ASSERT((w & (kOneInEveryByte * 0x7F)) == w); | |
5696 ASSERT(n < 0x7F); | |
5697 #endif | |
5698 // Has high bit set in every w byte less than n. | 5691 // Has high bit set in every w byte less than n. |
5699 uintptr_t tmp1 = kOneInEveryByte * (0x7F + n) - w; | 5692 uintptr_t tmp1 = kOneInEveryByte * (0x7F + n) - w; |
5700 // Has high bit set in every w byte greater than m. | 5693 // Has high bit set in every w byte greater than m. |
5701 uintptr_t tmp2 = w + kOneInEveryByte * (0x7F - m); | 5694 uintptr_t tmp2 = w + kOneInEveryByte * (0x7F - m); |
5702 return (tmp1 & tmp2 & (kOneInEveryByte * 0x80)); | 5695 return (tmp1 & tmp2 & (kOneInEveryByte * 0x80)); |
5703 } | 5696 } |
5704 | 5697 |
5705 | 5698 |
5706 enum AsciiCaseConversion { | 5699 enum AsciiCaseConversion { |
5707 ASCII_TO_LOWER, | 5700 ASCII_TO_LOWER, |
5708 ASCII_TO_UPPER | 5701 ASCII_TO_UPPER |
5709 }; | 5702 }; |
5710 | 5703 |
5711 | 5704 |
5712 template <AsciiCaseConversion dir> | 5705 template <AsciiCaseConversion dir> |
5713 struct FastAsciiConverter { | 5706 struct FastAsciiConverter { |
5714 #ifdef ENABLE_LATIN_1 | |
5715 static bool Convert(char* dst, char* src, int length, bool* changed_out) { | 5707 static bool Convert(char* dst, char* src, int length, bool* changed_out) { |
5716 #else | |
5717 static bool Convert(char* dst, char* src, int length) { | |
5718 #endif | |
5719 #ifdef DEBUG | 5708 #ifdef DEBUG |
5720 char* saved_dst = dst; | 5709 char* saved_dst = dst; |
5721 char* saved_src = src; | 5710 char* saved_src = src; |
5722 #endif | 5711 #endif |
5723 // We rely on the distance between upper and lower case letters | 5712 // We rely on the distance between upper and lower case letters |
5724 // being a known power of 2. | 5713 // being a known power of 2. |
5725 ASSERT('a' - 'A' == (1 << 5)); | 5714 ASSERT('a' - 'A' == (1 << 5)); |
5726 // Boundaries for the range of input characters than require conversion. | 5715 // Boundaries for the range of input characters than require conversion. |
5727 const char lo = (dir == ASCII_TO_LOWER) ? 'A' - 1 : 'a' - 1; | 5716 const char lo = (dir == ASCII_TO_LOWER) ? 'A' - 1 : 'a' - 1; |
5728 const char hi = (dir == ASCII_TO_LOWER) ? 'Z' + 1 : 'z' + 1; | 5717 const char hi = (dir == ASCII_TO_LOWER) ? 'Z' + 1 : 'z' + 1; |
5729 bool changed = false; | 5718 bool changed = false; |
5730 #ifdef ENABLE_LATIN_1 | |
5731 uintptr_t or_acc = 0; | 5719 uintptr_t or_acc = 0; |
5732 #endif | |
5733 char* const limit = src + length; | 5720 char* const limit = src + length; |
5734 #ifdef V8_HOST_CAN_READ_UNALIGNED | 5721 #ifdef V8_HOST_CAN_READ_UNALIGNED |
5735 // Process the prefix of the input that requires no conversion one | 5722 // Process the prefix of the input that requires no conversion one |
5736 // (machine) word at a time. | 5723 // (machine) word at a time. |
5737 while (src <= limit - sizeof(uintptr_t)) { | 5724 while (src <= limit - sizeof(uintptr_t)) { |
5738 uintptr_t w = *reinterpret_cast<uintptr_t*>(src); | 5725 uintptr_t w = *reinterpret_cast<uintptr_t*>(src); |
5739 #ifdef ENABLE_LATIN_1 | |
5740 or_acc |= w; | 5726 or_acc |= w; |
5741 #endif | |
5742 if (AsciiRangeMask(w, lo, hi) != 0) { | 5727 if (AsciiRangeMask(w, lo, hi) != 0) { |
5743 changed = true; | 5728 changed = true; |
5744 break; | 5729 break; |
5745 } | 5730 } |
5746 *reinterpret_cast<uintptr_t*>(dst) = w; | 5731 *reinterpret_cast<uintptr_t*>(dst) = w; |
5747 src += sizeof(uintptr_t); | 5732 src += sizeof(uintptr_t); |
5748 dst += sizeof(uintptr_t); | 5733 dst += sizeof(uintptr_t); |
5749 } | 5734 } |
5750 // Process the remainder of the input performing conversion when | 5735 // Process the remainder of the input performing conversion when |
5751 // required one word at a time. | 5736 // required one word at a time. |
5752 while (src <= limit - sizeof(uintptr_t)) { | 5737 while (src <= limit - sizeof(uintptr_t)) { |
5753 uintptr_t w = *reinterpret_cast<uintptr_t*>(src); | 5738 uintptr_t w = *reinterpret_cast<uintptr_t*>(src); |
5754 #ifdef ENABLE_LATIN_1 | |
5755 or_acc |= w; | 5739 or_acc |= w; |
5756 #endif | |
5757 uintptr_t m = AsciiRangeMask(w, lo, hi); | 5740 uintptr_t m = AsciiRangeMask(w, lo, hi); |
5758 // The mask has high (7th) bit set in every byte that needs | 5741 // The mask has high (7th) bit set in every byte that needs |
5759 // conversion and we know that the distance between cases is | 5742 // conversion and we know that the distance between cases is |
5760 // 1 << 5. | 5743 // 1 << 5. |
5761 *reinterpret_cast<uintptr_t*>(dst) = w ^ (m >> 2); | 5744 *reinterpret_cast<uintptr_t*>(dst) = w ^ (m >> 2); |
5762 src += sizeof(uintptr_t); | 5745 src += sizeof(uintptr_t); |
5763 dst += sizeof(uintptr_t); | 5746 dst += sizeof(uintptr_t); |
5764 } | 5747 } |
5765 #endif | 5748 #endif |
5766 // Process the last few bytes of the input (or the whole input if | 5749 // Process the last few bytes of the input (or the whole input if |
5767 // unaligned access is not supported). | 5750 // unaligned access is not supported). |
5768 while (src < limit) { | 5751 while (src < limit) { |
5769 char c = *src; | 5752 char c = *src; |
5770 #ifdef ENABLE_LATIN_1 | |
5771 or_acc |= c; | 5753 or_acc |= c; |
5772 #endif | |
5773 if (lo < c && c < hi) { | 5754 if (lo < c && c < hi) { |
5774 c ^= (1 << 5); | 5755 c ^= (1 << 5); |
5775 changed = true; | 5756 changed = true; |
5776 } | 5757 } |
5777 *dst = c; | 5758 *dst = c; |
5778 ++src; | 5759 ++src; |
5779 ++dst; | 5760 ++dst; |
5780 } | 5761 } |
5781 #ifdef ENABLE_LATIN_1 | |
5782 if ((or_acc & kAsciiMask) != 0) { | 5762 if ((or_acc & kAsciiMask) != 0) { |
5783 return false; | 5763 return false; |
5784 } | 5764 } |
5785 #endif | |
5786 #ifdef DEBUG | 5765 #ifdef DEBUG |
5787 CheckConvert(saved_dst, saved_src, length, changed); | 5766 CheckConvert(saved_dst, saved_src, length, changed); |
5788 #endif | 5767 #endif |
5789 #ifdef ENABLE_LATIN_1 | |
5790 *changed_out = changed; | 5768 *changed_out = changed; |
5791 return true; | 5769 return true; |
5792 #else | |
5793 return changed; | |
5794 #endif | |
5795 } | 5770 } |
5796 | 5771 |
5797 #ifdef DEBUG | 5772 #ifdef DEBUG |
5798 static void CheckConvert(char* dst, char* src, int length, bool changed) { | 5773 static void CheckConvert(char* dst, char* src, int length, bool changed) { |
5799 bool expected_changed = false; | 5774 bool expected_changed = false; |
5800 for (int i = 0; i < length; i++) { | 5775 for (int i = 0; i < length; i++) { |
5801 if (dst[i] == src[i]) continue; | 5776 if (dst[i] == src[i]) continue; |
5802 expected_changed = true; | 5777 expected_changed = true; |
5803 if (dir == ASCII_TO_LOWER) { | 5778 if (dir == ASCII_TO_LOWER) { |
5804 ASSERT('A' <= src[i] && src[i] <= 'Z'); | 5779 ASSERT('A' <= src[i] && src[i] <= 'Z'); |
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5849 // NOTE: This assumes that the upper/lower case of an ASCII | 5824 // NOTE: This assumes that the upper/lower case of an ASCII |
5850 // character is also ASCII. This is currently the case, but it | 5825 // character is also ASCII. This is currently the case, but it |
5851 // might break in the future if we implement more context and locale | 5826 // might break in the future if we implement more context and locale |
5852 // dependent upper/lower conversions. | 5827 // dependent upper/lower conversions. |
5853 if (s->IsSeqOneByteString()) { | 5828 if (s->IsSeqOneByteString()) { |
5854 Object* o; | 5829 Object* o; |
5855 { MaybeObject* maybe_o = isolate->heap()->AllocateRawOneByteString(length); | 5830 { MaybeObject* maybe_o = isolate->heap()->AllocateRawOneByteString(length); |
5856 if (!maybe_o->ToObject(&o)) return maybe_o; | 5831 if (!maybe_o->ToObject(&o)) return maybe_o; |
5857 } | 5832 } |
5858 SeqOneByteString* result = SeqOneByteString::cast(o); | 5833 SeqOneByteString* result = SeqOneByteString::cast(o); |
5859 #ifndef ENABLE_LATIN_1 | |
5860 bool has_changed_character = ConvertTraits::AsciiConverter::Convert( | |
5861 reinterpret_cast<char*>(result->GetChars()), | |
5862 reinterpret_cast<char*>(SeqOneByteString::cast(s)->GetChars()), | |
5863 length); | |
5864 return has_changed_character ? result : s; | |
5865 #else | |
5866 bool has_changed_character; | 5834 bool has_changed_character; |
5867 bool is_ascii = ConvertTraits::AsciiConverter::Convert( | 5835 bool is_ascii = ConvertTraits::AsciiConverter::Convert( |
5868 reinterpret_cast<char*>(result->GetChars()), | 5836 reinterpret_cast<char*>(result->GetChars()), |
5869 reinterpret_cast<char*>(SeqOneByteString::cast(s)->GetChars()), | 5837 reinterpret_cast<char*>(SeqOneByteString::cast(s)->GetChars()), |
5870 length, | 5838 length, |
5871 &has_changed_character); | 5839 &has_changed_character); |
5872 // If not ASCII, we discard the result and take the 2 byte path. | 5840 // If not ASCII, we discard the result and take the 2 byte path. |
5873 if (is_ascii) { | 5841 if (is_ascii) { |
5874 return has_changed_character ? result : s; | 5842 return has_changed_character ? result : s; |
5875 } | 5843 } |
5876 #endif | |
5877 } | 5844 } |
5878 | 5845 |
5879 Object* answer; | 5846 Object* answer; |
5880 { MaybeObject* maybe_answer = | 5847 { MaybeObject* maybe_answer = |
5881 ConvertCaseHelper(isolate, s, length, length, mapping); | 5848 ConvertCaseHelper(isolate, s, length, length, mapping); |
5882 if (!maybe_answer->ToObject(&answer)) return maybe_answer; | 5849 if (!maybe_answer->ToObject(&answer)) return maybe_answer; |
5883 } | 5850 } |
5884 if (answer->IsSmi()) { | 5851 if (answer->IsSmi()) { |
5885 // Retry with correct length. | 5852 // Retry with correct length. |
5886 { MaybeObject* maybe_answer = | 5853 { MaybeObject* maybe_answer = |
(...skipping 7528 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
13415 // Handle last resort GC and make sure to allow future allocations | 13382 // Handle last resort GC and make sure to allow future allocations |
13416 // to grow the heap without causing GCs (if possible). | 13383 // to grow the heap without causing GCs (if possible). |
13417 isolate->counters()->gc_last_resort_from_js()->Increment(); | 13384 isolate->counters()->gc_last_resort_from_js()->Increment(); |
13418 isolate->heap()->CollectAllGarbage(Heap::kNoGCFlags, | 13385 isolate->heap()->CollectAllGarbage(Heap::kNoGCFlags, |
13419 "Runtime::PerformGC"); | 13386 "Runtime::PerformGC"); |
13420 } | 13387 } |
13421 } | 13388 } |
13422 | 13389 |
13423 | 13390 |
13424 } } // namespace v8::internal | 13391 } } // namespace v8::internal |
OLD | NEW |