src/runtime.cc - Issue 12700008: remove latin-1 flag

Side by Side Diff: src/runtime.cc

Issue 12700008: remove latin-1 flag (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge

Patch Set: removed SeqOneByteStringVerify Created 7 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2012 the V8 project authors. All rights reserved.	1 // Copyright 2012 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 5657 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5668 // become garbage; there is no reason to keep two identical strings	5668 // become garbage; there is no reason to keep two identical strings

5669 // alive.	5669 // alive.

5670 return s;	5670 return s;

5671 }	5671 }

5672 }	5672 }

5673	5673

5674	5674

5675 namespace {	5675 namespace {

5676	5676

5677 static const uintptr_t kOneInEveryByte = kUintptrAllBitsSet / 0xFF;	5677 static const uintptr_t kOneInEveryByte = kUintptrAllBitsSet / 0xFF;

5678 #ifdef ENABLE_LATIN_1

5679 static const uintptr_t kAsciiMask = kOneInEveryByte << 7;	5678 static const uintptr_t kAsciiMask = kOneInEveryByte << 7;

5680 #endif

5681	5679

5682 // Given a word and two range boundaries returns a word with high bit	5680 // Given a word and two range boundaries returns a word with high bit

5683 // set in every byte iff the corresponding input byte was strictly in	5681 // set in every byte iff the corresponding input byte was strictly in

5684 // the range (m, n). All the other bits in the result are cleared.	5682 // the range (m, n). All the other bits in the result are cleared.

5685 // This function is only useful when it can be inlined and the	5683 // This function is only useful when it can be inlined and the

5686 // boundaries are statically known.	5684 // boundaries are statically known.

5687 // Requires: all bytes in the input word and the boundaries must be	5685 // Requires: all bytes in the input word and the boundaries must be

5688 // ASCII (less than 0x7F).	5686 // ASCII (less than 0x7F).

5689 static inline uintptr_t AsciiRangeMask(uintptr_t w, char m, char n) {	5687 static inline uintptr_t AsciiRangeMask(uintptr_t w, char m, char n) {

5690 // Use strict inequalities since in edge cases the function could be	5688 // Use strict inequalities since in edge cases the function could be

5691 // further simplified.	5689 // further simplified.

5692 ASSERT(0 < m && m < n);	5690 ASSERT(0 < m && m < n);

5693 #ifndef ENABLE_LATIN_1

5694 // Every byte in an ASCII string is less than or equal to 0x7F.

5695 ASSERT((w & (kOneInEveryByte * 0x7F)) == w);

5696 ASSERT(n < 0x7F);

5697 #endif

5698 // Has high bit set in every w byte less than n.	5691 // Has high bit set in every w byte less than n.

5699 uintptr_t tmp1 = kOneInEveryByte * (0x7F + n) - w;	5692 uintptr_t tmp1 = kOneInEveryByte * (0x7F + n) - w;

5700 // Has high bit set in every w byte greater than m.	5693 // Has high bit set in every w byte greater than m.

5701 uintptr_t tmp2 = w + kOneInEveryByte * (0x7F - m);	5694 uintptr_t tmp2 = w + kOneInEveryByte * (0x7F - m);

5702 return (tmp1 & tmp2 & (kOneInEveryByte * 0x80));	5695 return (tmp1 & tmp2 & (kOneInEveryByte * 0x80));

5703 }	5696 }

5704	5697

5705	5698

5706 enum AsciiCaseConversion {	5699 enum AsciiCaseConversion {

5707 ASCII_TO_LOWER,	5700 ASCII_TO_LOWER,

5708 ASCII_TO_UPPER	5701 ASCII_TO_UPPER

5709 };	5702 };

5710	5703

5711	5704

5712 template <AsciiCaseConversion dir>	5705 template <AsciiCaseConversion dir>

5713 struct FastAsciiConverter {	5706 struct FastAsciiConverter {

5714 #ifdef ENABLE_LATIN_1

5715 static bool Convert(char* dst, char* src, int length, bool* changed_out) {	5707 static bool Convert(char* dst, char* src, int length, bool* changed_out) {

5716 #else

5717 static bool Convert(char* dst, char* src, int length) {

5718 #endif

5719 #ifdef DEBUG	5708 #ifdef DEBUG

5720 char* saved_dst = dst;	5709 char* saved_dst = dst;

5721 char* saved_src = src;	5710 char* saved_src = src;

5722 #endif	5711 #endif

5723 // We rely on the distance between upper and lower case letters	5712 // We rely on the distance between upper and lower case letters

5724 // being a known power of 2.	5713 // being a known power of 2.

5725 ASSERT('a' - 'A' == (1 << 5));	5714 ASSERT('a' - 'A' == (1 << 5));

5726 // Boundaries for the range of input characters than require conversion.	5715 // Boundaries for the range of input characters than require conversion.

5727 const char lo = (dir == ASCII_TO_LOWER) ? 'A' - 1 : 'a' - 1;	5716 const char lo = (dir == ASCII_TO_LOWER) ? 'A' - 1 : 'a' - 1;

5728 const char hi = (dir == ASCII_TO_LOWER) ? 'Z' + 1 : 'z' + 1;	5717 const char hi = (dir == ASCII_TO_LOWER) ? 'Z' + 1 : 'z' + 1;

5729 bool changed = false;	5718 bool changed = false;

5730 #ifdef ENABLE_LATIN_1

5731 uintptr_t or_acc = 0;	5719 uintptr_t or_acc = 0;

5732 #endif

5733 char* const limit = src + length;	5720 char* const limit = src + length;

5734 #ifdef V8_HOST_CAN_READ_UNALIGNED	5721 #ifdef V8_HOST_CAN_READ_UNALIGNED

5735 // Process the prefix of the input that requires no conversion one	5722 // Process the prefix of the input that requires no conversion one

5736 // (machine) word at a time.	5723 // (machine) word at a time.

5737 while (src <= limit - sizeof(uintptr_t)) {	5724 while (src <= limit - sizeof(uintptr_t)) {

5738 uintptr_t w = reinterpret_cast<uintptr_t>(src);	5725 uintptr_t w = reinterpret_cast<uintptr_t>(src);

5739 #ifdef ENABLE_LATIN_1

5740 or_acc \|= w;	5726 or_acc \|= w;

5741 #endif

5742 if (AsciiRangeMask(w, lo, hi) != 0) {	5727 if (AsciiRangeMask(w, lo, hi) != 0) {

5743 changed = true;	5728 changed = true;

5744 break;	5729 break;

5745 }	5730 }

5746 reinterpret_cast<uintptr_t>(dst) = w;	5731 reinterpret_cast<uintptr_t>(dst) = w;

5747 src += sizeof(uintptr_t);	5732 src += sizeof(uintptr_t);

5748 dst += sizeof(uintptr_t);	5733 dst += sizeof(uintptr_t);

5749 }	5734 }

5750 // Process the remainder of the input performing conversion when	5735 // Process the remainder of the input performing conversion when

5751 // required one word at a time.	5736 // required one word at a time.

5752 while (src <= limit - sizeof(uintptr_t)) {	5737 while (src <= limit - sizeof(uintptr_t)) {

5753 uintptr_t w = reinterpret_cast<uintptr_t>(src);	5738 uintptr_t w = reinterpret_cast<uintptr_t>(src);

5754 #ifdef ENABLE_LATIN_1

5755 or_acc \|= w;	5739 or_acc \|= w;

5756 #endif

5757 uintptr_t m = AsciiRangeMask(w, lo, hi);	5740 uintptr_t m = AsciiRangeMask(w, lo, hi);

5758 // The mask has high (7th) bit set in every byte that needs	5741 // The mask has high (7th) bit set in every byte that needs

5759 // conversion and we know that the distance between cases is	5742 // conversion and we know that the distance between cases is

5760 // 1 << 5.	5743 // 1 << 5.

5761 reinterpret_cast<uintptr_t>(dst) = w ^ (m >> 2);	5744 reinterpret_cast<uintptr_t>(dst) = w ^ (m >> 2);

5762 src += sizeof(uintptr_t);	5745 src += sizeof(uintptr_t);

5763 dst += sizeof(uintptr_t);	5746 dst += sizeof(uintptr_t);

5764 }	5747 }

5765 #endif	5748 #endif

5766 // Process the last few bytes of the input (or the whole input if	5749 // Process the last few bytes of the input (or the whole input if

5767 // unaligned access is not supported).	5750 // unaligned access is not supported).

5768 while (src < limit) {	5751 while (src < limit) {

5769 char c = *src;	5752 char c = *src;

5770 #ifdef ENABLE_LATIN_1

5771 or_acc \|= c;	5753 or_acc \|= c;

5772 #endif

5773 if (lo < c && c < hi) {	5754 if (lo < c && c < hi) {

5774 c ^= (1 << 5);	5755 c ^= (1 << 5);

5775 changed = true;	5756 changed = true;

5776 }	5757 }

5777 *dst = c;	5758 *dst = c;

5778 ++src;	5759 ++src;

5779 ++dst;	5760 ++dst;

5780 }	5761 }

5781 #ifdef ENABLE_LATIN_1

5782 if ((or_acc & kAsciiMask) != 0) {	5762 if ((or_acc & kAsciiMask) != 0) {

5783 return false;	5763 return false;

5784 }	5764 }

5785 #endif

5786 #ifdef DEBUG	5765 #ifdef DEBUG

5787 CheckConvert(saved_dst, saved_src, length, changed);	5766 CheckConvert(saved_dst, saved_src, length, changed);

5788 #endif	5767 #endif

5789 #ifdef ENABLE_LATIN_1

5790 *changed_out = changed;	5768 *changed_out = changed;

5791 return true;	5769 return true;

5792 #else

5793 return changed;

5794 #endif

5795 }	5770 }

5796	5771

5797 #ifdef DEBUG	5772 #ifdef DEBUG

5798 static void CheckConvert(char* dst, char* src, int length, bool changed) {	5773 static void CheckConvert(char* dst, char* src, int length, bool changed) {

5799 bool expected_changed = false;	5774 bool expected_changed = false;

5800 for (int i = 0; i < length; i++) {	5775 for (int i = 0; i < length; i++) {

5801 if (dst[i] == src[i]) continue;	5776 if (dst[i] == src[i]) continue;

5802 expected_changed = true;	5777 expected_changed = true;

5803 if (dir == ASCII_TO_LOWER) {	5778 if (dir == ASCII_TO_LOWER) {

5804 ASSERT('A' <= src[i] && src[i] <= 'Z');	5779 ASSERT('A' <= src[i] && src[i] <= 'Z');

(...skipping 44 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5849 // NOTE: This assumes that the upper/lower case of an ASCII	5824 // NOTE: This assumes that the upper/lower case of an ASCII

5850 // character is also ASCII. This is currently the case, but it	5825 // character is also ASCII. This is currently the case, but it

5851 // might break in the future if we implement more context and locale	5826 // might break in the future if we implement more context and locale

5852 // dependent upper/lower conversions.	5827 // dependent upper/lower conversions.

5853 if (s->IsSeqOneByteString()) {	5828 if (s->IsSeqOneByteString()) {

5854 Object* o;	5829 Object* o;

5855 { MaybeObject* maybe_o = isolate->heap()->AllocateRawOneByteString(length);	5830 { MaybeObject* maybe_o = isolate->heap()->AllocateRawOneByteString(length);

5856 if (!maybe_o->ToObject(&o)) return maybe_o;	5831 if (!maybe_o->ToObject(&o)) return maybe_o;

5857 }	5832 }

5858 SeqOneByteString* result = SeqOneByteString::cast(o);	5833 SeqOneByteString* result = SeqOneByteString::cast(o);

5859 #ifndef ENABLE_LATIN_1

5860 bool has_changed_character = ConvertTraits::AsciiConverter::Convert(

5861 reinterpret_cast<char*>(result->GetChars()),

5862 reinterpret_cast<char*>(SeqOneByteString::cast(s)->GetChars()),

5863 length);

5864 return has_changed_character ? result : s;

5865 #else

5866 bool has_changed_character;	5834 bool has_changed_character;

5867 bool is_ascii = ConvertTraits::AsciiConverter::Convert(	5835 bool is_ascii = ConvertTraits::AsciiConverter::Convert(

5868 reinterpret_cast<char*>(result->GetChars()),	5836 reinterpret_cast<char*>(result->GetChars()),

5869 reinterpret_cast<char*>(SeqOneByteString::cast(s)->GetChars()),	5837 reinterpret_cast<char*>(SeqOneByteString::cast(s)->GetChars()),

5870 length,	5838 length,

5871 &has_changed_character);	5839 &has_changed_character);

5872 // If not ASCII, we discard the result and take the 2 byte path.	5840 // If not ASCII, we discard the result and take the 2 byte path.

5873 if (is_ascii) {	5841 if (is_ascii) {

5874 return has_changed_character ? result : s;	5842 return has_changed_character ? result : s;

5875 }	5843 }

5876 #endif

5877 }	5844 }

5878	5845

5879 Object* answer;	5846 Object* answer;

5880 { MaybeObject* maybe_answer =	5847 { MaybeObject* maybe_answer =

5881 ConvertCaseHelper(isolate, s, length, length, mapping);	5848 ConvertCaseHelper(isolate, s, length, length, mapping);

5882 if (!maybe_answer->ToObject(&answer)) return maybe_answer;	5849 if (!maybe_answer->ToObject(&answer)) return maybe_answer;

5883 }	5850 }

5884 if (answer->IsSmi()) {	5851 if (answer->IsSmi()) {

5885 // Retry with correct length.	5852 // Retry with correct length.

5886 { MaybeObject* maybe_answer =	5853 { MaybeObject* maybe_answer =

(...skipping 7528 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
13415 // Handle last resort GC and make sure to allow future allocations	13382 // Handle last resort GC and make sure to allow future allocations

13416 // to grow the heap without causing GCs (if possible).	13383 // to grow the heap without causing GCs (if possible).

13417 isolate->counters()->gc_last_resort_from_js()->Increment();	13384 isolate->counters()->gc_last_resort_from_js()->Increment();

13418 isolate->heap()->CollectAllGarbage(Heap::kNoGCFlags,	13385 isolate->heap()->CollectAllGarbage(Heap::kNoGCFlags,

13419 "Runtime::PerformGC");	13386 "Runtime::PerformGC");

13420 }	13387 }

13421 }	13388 }

13422	13389

13423	13390

13424 } } // namespace v8::internal	13391 } } // namespace v8::internal

OLD	NEW

« no previous file with comments | « src/objects-inl.h ('k') | src/string-search.h » ('j') | no next file with comments »