Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(3)

Side by Side Diff: src/runtime.cc

Issue 12700008: remove latin-1 flag (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: removed SeqOneByteStringVerify Created 7 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/objects-inl.h ('k') | src/string-search.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 5657 matching lines...) Expand 10 before | Expand all | Expand 10 after
5668 // become garbage; there is no reason to keep two identical strings 5668 // become garbage; there is no reason to keep two identical strings
5669 // alive. 5669 // alive.
5670 return s; 5670 return s;
5671 } 5671 }
5672 } 5672 }
5673 5673
5674 5674
5675 namespace { 5675 namespace {
5676 5676
5677 static const uintptr_t kOneInEveryByte = kUintptrAllBitsSet / 0xFF; 5677 static const uintptr_t kOneInEveryByte = kUintptrAllBitsSet / 0xFF;
5678 #ifdef ENABLE_LATIN_1
5679 static const uintptr_t kAsciiMask = kOneInEveryByte << 7; 5678 static const uintptr_t kAsciiMask = kOneInEveryByte << 7;
5680 #endif
5681 5679
5682 // Given a word and two range boundaries returns a word with high bit 5680 // Given a word and two range boundaries returns a word with high bit
5683 // set in every byte iff the corresponding input byte was strictly in 5681 // set in every byte iff the corresponding input byte was strictly in
5684 // the range (m, n). All the other bits in the result are cleared. 5682 // the range (m, n). All the other bits in the result are cleared.
5685 // This function is only useful when it can be inlined and the 5683 // This function is only useful when it can be inlined and the
5686 // boundaries are statically known. 5684 // boundaries are statically known.
5687 // Requires: all bytes in the input word and the boundaries must be 5685 // Requires: all bytes in the input word and the boundaries must be
5688 // ASCII (less than 0x7F). 5686 // ASCII (less than 0x7F).
5689 static inline uintptr_t AsciiRangeMask(uintptr_t w, char m, char n) { 5687 static inline uintptr_t AsciiRangeMask(uintptr_t w, char m, char n) {
5690 // Use strict inequalities since in edge cases the function could be 5688 // Use strict inequalities since in edge cases the function could be
5691 // further simplified. 5689 // further simplified.
5692 ASSERT(0 < m && m < n); 5690 ASSERT(0 < m && m < n);
5693 #ifndef ENABLE_LATIN_1
5694 // Every byte in an ASCII string is less than or equal to 0x7F.
5695 ASSERT((w & (kOneInEveryByte * 0x7F)) == w);
5696 ASSERT(n < 0x7F);
5697 #endif
5698 // Has high bit set in every w byte less than n. 5691 // Has high bit set in every w byte less than n.
5699 uintptr_t tmp1 = kOneInEveryByte * (0x7F + n) - w; 5692 uintptr_t tmp1 = kOneInEveryByte * (0x7F + n) - w;
5700 // Has high bit set in every w byte greater than m. 5693 // Has high bit set in every w byte greater than m.
5701 uintptr_t tmp2 = w + kOneInEveryByte * (0x7F - m); 5694 uintptr_t tmp2 = w + kOneInEveryByte * (0x7F - m);
5702 return (tmp1 & tmp2 & (kOneInEveryByte * 0x80)); 5695 return (tmp1 & tmp2 & (kOneInEveryByte * 0x80));
5703 } 5696 }
5704 5697
5705 5698
5706 enum AsciiCaseConversion { 5699 enum AsciiCaseConversion {
5707 ASCII_TO_LOWER, 5700 ASCII_TO_LOWER,
5708 ASCII_TO_UPPER 5701 ASCII_TO_UPPER
5709 }; 5702 };
5710 5703
5711 5704
5712 template <AsciiCaseConversion dir> 5705 template <AsciiCaseConversion dir>
5713 struct FastAsciiConverter { 5706 struct FastAsciiConverter {
5714 #ifdef ENABLE_LATIN_1
5715 static bool Convert(char* dst, char* src, int length, bool* changed_out) { 5707 static bool Convert(char* dst, char* src, int length, bool* changed_out) {
5716 #else
5717 static bool Convert(char* dst, char* src, int length) {
5718 #endif
5719 #ifdef DEBUG 5708 #ifdef DEBUG
5720 char* saved_dst = dst; 5709 char* saved_dst = dst;
5721 char* saved_src = src; 5710 char* saved_src = src;
5722 #endif 5711 #endif
5723 // We rely on the distance between upper and lower case letters 5712 // We rely on the distance between upper and lower case letters
5724 // being a known power of 2. 5713 // being a known power of 2.
5725 ASSERT('a' - 'A' == (1 << 5)); 5714 ASSERT('a' - 'A' == (1 << 5));
5726 // Boundaries for the range of input characters than require conversion. 5715 // Boundaries for the range of input characters than require conversion.
5727 const char lo = (dir == ASCII_TO_LOWER) ? 'A' - 1 : 'a' - 1; 5716 const char lo = (dir == ASCII_TO_LOWER) ? 'A' - 1 : 'a' - 1;
5728 const char hi = (dir == ASCII_TO_LOWER) ? 'Z' + 1 : 'z' + 1; 5717 const char hi = (dir == ASCII_TO_LOWER) ? 'Z' + 1 : 'z' + 1;
5729 bool changed = false; 5718 bool changed = false;
5730 #ifdef ENABLE_LATIN_1
5731 uintptr_t or_acc = 0; 5719 uintptr_t or_acc = 0;
5732 #endif
5733 char* const limit = src + length; 5720 char* const limit = src + length;
5734 #ifdef V8_HOST_CAN_READ_UNALIGNED 5721 #ifdef V8_HOST_CAN_READ_UNALIGNED
5735 // Process the prefix of the input that requires no conversion one 5722 // Process the prefix of the input that requires no conversion one
5736 // (machine) word at a time. 5723 // (machine) word at a time.
5737 while (src <= limit - sizeof(uintptr_t)) { 5724 while (src <= limit - sizeof(uintptr_t)) {
5738 uintptr_t w = *reinterpret_cast<uintptr_t*>(src); 5725 uintptr_t w = *reinterpret_cast<uintptr_t*>(src);
5739 #ifdef ENABLE_LATIN_1
5740 or_acc |= w; 5726 or_acc |= w;
5741 #endif
5742 if (AsciiRangeMask(w, lo, hi) != 0) { 5727 if (AsciiRangeMask(w, lo, hi) != 0) {
5743 changed = true; 5728 changed = true;
5744 break; 5729 break;
5745 } 5730 }
5746 *reinterpret_cast<uintptr_t*>(dst) = w; 5731 *reinterpret_cast<uintptr_t*>(dst) = w;
5747 src += sizeof(uintptr_t); 5732 src += sizeof(uintptr_t);
5748 dst += sizeof(uintptr_t); 5733 dst += sizeof(uintptr_t);
5749 } 5734 }
5750 // Process the remainder of the input performing conversion when 5735 // Process the remainder of the input performing conversion when
5751 // required one word at a time. 5736 // required one word at a time.
5752 while (src <= limit - sizeof(uintptr_t)) { 5737 while (src <= limit - sizeof(uintptr_t)) {
5753 uintptr_t w = *reinterpret_cast<uintptr_t*>(src); 5738 uintptr_t w = *reinterpret_cast<uintptr_t*>(src);
5754 #ifdef ENABLE_LATIN_1
5755 or_acc |= w; 5739 or_acc |= w;
5756 #endif
5757 uintptr_t m = AsciiRangeMask(w, lo, hi); 5740 uintptr_t m = AsciiRangeMask(w, lo, hi);
5758 // The mask has high (7th) bit set in every byte that needs 5741 // The mask has high (7th) bit set in every byte that needs
5759 // conversion and we know that the distance between cases is 5742 // conversion and we know that the distance between cases is
5760 // 1 << 5. 5743 // 1 << 5.
5761 *reinterpret_cast<uintptr_t*>(dst) = w ^ (m >> 2); 5744 *reinterpret_cast<uintptr_t*>(dst) = w ^ (m >> 2);
5762 src += sizeof(uintptr_t); 5745 src += sizeof(uintptr_t);
5763 dst += sizeof(uintptr_t); 5746 dst += sizeof(uintptr_t);
5764 } 5747 }
5765 #endif 5748 #endif
5766 // Process the last few bytes of the input (or the whole input if 5749 // Process the last few bytes of the input (or the whole input if
5767 // unaligned access is not supported). 5750 // unaligned access is not supported).
5768 while (src < limit) { 5751 while (src < limit) {
5769 char c = *src; 5752 char c = *src;
5770 #ifdef ENABLE_LATIN_1
5771 or_acc |= c; 5753 or_acc |= c;
5772 #endif
5773 if (lo < c && c < hi) { 5754 if (lo < c && c < hi) {
5774 c ^= (1 << 5); 5755 c ^= (1 << 5);
5775 changed = true; 5756 changed = true;
5776 } 5757 }
5777 *dst = c; 5758 *dst = c;
5778 ++src; 5759 ++src;
5779 ++dst; 5760 ++dst;
5780 } 5761 }
5781 #ifdef ENABLE_LATIN_1
5782 if ((or_acc & kAsciiMask) != 0) { 5762 if ((or_acc & kAsciiMask) != 0) {
5783 return false; 5763 return false;
5784 } 5764 }
5785 #endif
5786 #ifdef DEBUG 5765 #ifdef DEBUG
5787 CheckConvert(saved_dst, saved_src, length, changed); 5766 CheckConvert(saved_dst, saved_src, length, changed);
5788 #endif 5767 #endif
5789 #ifdef ENABLE_LATIN_1
5790 *changed_out = changed; 5768 *changed_out = changed;
5791 return true; 5769 return true;
5792 #else
5793 return changed;
5794 #endif
5795 } 5770 }
5796 5771
5797 #ifdef DEBUG 5772 #ifdef DEBUG
5798 static void CheckConvert(char* dst, char* src, int length, bool changed) { 5773 static void CheckConvert(char* dst, char* src, int length, bool changed) {
5799 bool expected_changed = false; 5774 bool expected_changed = false;
5800 for (int i = 0; i < length; i++) { 5775 for (int i = 0; i < length; i++) {
5801 if (dst[i] == src[i]) continue; 5776 if (dst[i] == src[i]) continue;
5802 expected_changed = true; 5777 expected_changed = true;
5803 if (dir == ASCII_TO_LOWER) { 5778 if (dir == ASCII_TO_LOWER) {
5804 ASSERT('A' <= src[i] && src[i] <= 'Z'); 5779 ASSERT('A' <= src[i] && src[i] <= 'Z');
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
5849 // NOTE: This assumes that the upper/lower case of an ASCII 5824 // NOTE: This assumes that the upper/lower case of an ASCII
5850 // character is also ASCII. This is currently the case, but it 5825 // character is also ASCII. This is currently the case, but it
5851 // might break in the future if we implement more context and locale 5826 // might break in the future if we implement more context and locale
5852 // dependent upper/lower conversions. 5827 // dependent upper/lower conversions.
5853 if (s->IsSeqOneByteString()) { 5828 if (s->IsSeqOneByteString()) {
5854 Object* o; 5829 Object* o;
5855 { MaybeObject* maybe_o = isolate->heap()->AllocateRawOneByteString(length); 5830 { MaybeObject* maybe_o = isolate->heap()->AllocateRawOneByteString(length);
5856 if (!maybe_o->ToObject(&o)) return maybe_o; 5831 if (!maybe_o->ToObject(&o)) return maybe_o;
5857 } 5832 }
5858 SeqOneByteString* result = SeqOneByteString::cast(o); 5833 SeqOneByteString* result = SeqOneByteString::cast(o);
5859 #ifndef ENABLE_LATIN_1
5860 bool has_changed_character = ConvertTraits::AsciiConverter::Convert(
5861 reinterpret_cast<char*>(result->GetChars()),
5862 reinterpret_cast<char*>(SeqOneByteString::cast(s)->GetChars()),
5863 length);
5864 return has_changed_character ? result : s;
5865 #else
5866 bool has_changed_character; 5834 bool has_changed_character;
5867 bool is_ascii = ConvertTraits::AsciiConverter::Convert( 5835 bool is_ascii = ConvertTraits::AsciiConverter::Convert(
5868 reinterpret_cast<char*>(result->GetChars()), 5836 reinterpret_cast<char*>(result->GetChars()),
5869 reinterpret_cast<char*>(SeqOneByteString::cast(s)->GetChars()), 5837 reinterpret_cast<char*>(SeqOneByteString::cast(s)->GetChars()),
5870 length, 5838 length,
5871 &has_changed_character); 5839 &has_changed_character);
5872 // If not ASCII, we discard the result and take the 2 byte path. 5840 // If not ASCII, we discard the result and take the 2 byte path.
5873 if (is_ascii) { 5841 if (is_ascii) {
5874 return has_changed_character ? result : s; 5842 return has_changed_character ? result : s;
5875 } 5843 }
5876 #endif
5877 } 5844 }
5878 5845
5879 Object* answer; 5846 Object* answer;
5880 { MaybeObject* maybe_answer = 5847 { MaybeObject* maybe_answer =
5881 ConvertCaseHelper(isolate, s, length, length, mapping); 5848 ConvertCaseHelper(isolate, s, length, length, mapping);
5882 if (!maybe_answer->ToObject(&answer)) return maybe_answer; 5849 if (!maybe_answer->ToObject(&answer)) return maybe_answer;
5883 } 5850 }
5884 if (answer->IsSmi()) { 5851 if (answer->IsSmi()) {
5885 // Retry with correct length. 5852 // Retry with correct length.
5886 { MaybeObject* maybe_answer = 5853 { MaybeObject* maybe_answer =
(...skipping 7528 matching lines...) Expand 10 before | Expand all | Expand 10 after
13415 // Handle last resort GC and make sure to allow future allocations 13382 // Handle last resort GC and make sure to allow future allocations
13416 // to grow the heap without causing GCs (if possible). 13383 // to grow the heap without causing GCs (if possible).
13417 isolate->counters()->gc_last_resort_from_js()->Increment(); 13384 isolate->counters()->gc_last_resort_from_js()->Increment();
13418 isolate->heap()->CollectAllGarbage(Heap::kNoGCFlags, 13385 isolate->heap()->CollectAllGarbage(Heap::kNoGCFlags,
13419 "Runtime::PerformGC"); 13386 "Runtime::PerformGC");
13420 } 13387 }
13421 } 13388 }
13422 13389
13423 13390
13424 } } // namespace v8::internal 13391 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « src/objects-inl.h ('k') | src/string-search.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698