OLD | NEW |
---|---|
1 // Copyright 2006-2009 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2009 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 4618 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4629 // we simple return the result and let the converted string | 4629 // we simple return the result and let the converted string |
4630 // become garbage; there is no reason to keep two identical strings | 4630 // become garbage; there is no reason to keep two identical strings |
4631 // alive. | 4631 // alive. |
4632 return s; | 4632 return s; |
4633 } | 4633 } |
4634 } | 4634 } |
4635 | 4635 |
4636 | 4636 |
4637 namespace { | 4637 namespace { |
4638 | 4638 |
4639 static const uintptr_t kOneInEveryByte = kUintptrAllBitsSet / 0xFF; | |
4640 | |
4641 | |
4642 // Given a word and two range boundaries returns a word with high bit | |
4643 // set in every byte iff the corresponding input byte was strictly in | |
4644 // the range (m, n). All the other bits in the result are cleared. | |
antonm
2010/10/26 17:17:33
maybe follow more standard [;) intervals
Vitaly Repeshko
2010/10/26 18:14:48
I want to make usages of m and n similar, so eithe
| |
4645 // This function is only useful when it can be inlined and the | |
4646 // boundaries are statically known. | |
4647 // Requires: the input word and the boundaries must be ascii. | |
antonm
2010/10/26 17:17:33
I think ascii requires additional clarification.
Vitaly Repeshko
2010/10/26 18:14:48
Agreed. Extended the comment.
| |
4648 static inline uintptr_t AsciiRangeMask(uintptr_t w, char m, char n) { | |
4649 // Every byte in an ascii string is less than or equal to 0x7F. | |
4650 ASSERT((w & (kOneInEveryByte * 0x7F)) == w); | |
4651 ASSERT(0 < m && m < n && n < 0x7F); | |
4652 // Has high bit set in every w byte less than n. | |
4653 uintptr_t tmp1 = kOneInEveryByte * (0x7F + n) - w; | |
4654 // Has high bit set in every w byte greater than m. | |
4655 uintptr_t tmp2 = w + kOneInEveryByte * (0x7F - m); | |
4656 return (tmp1 & tmp2 & (kOneInEveryByte * 0x80)); | |
4657 } | |
4658 | |
4659 | |
4660 enum AsciiCaseConversion { | |
4661 ASCII_TO_LOWER, | |
4662 ASCII_TO_UPPER | |
4663 }; | |
4664 | |
4665 | |
4666 template <AsciiCaseConversion dir> | |
4667 struct FastAsciiConverter { | |
4668 static bool Convert(char* dst, char* src, int length) { | |
4669 #ifdef DEBUG | |
4670 char* saved_dst = dst; | |
4671 char* saved_src = src; | |
4672 #endif | |
4673 // We rely on the distance between lower and upper case letters | |
4674 // being a power of 2. | |
4675 ASSERT('a' - 'A' == (1 << 5)); | |
4676 // Boundaries for the range of input characters than require conversion. | |
4677 const char lo = (dir == ASCII_TO_LOWER) ? 'A' - 1 : 'a' - 1; | |
4678 const char hi = (dir == ASCII_TO_LOWER) ? 'Z' + 1 : 'z' + 1; | |
4679 bool changed = false; | |
4680 char* const limit = src + length; | |
4681 #ifdef V8_HOST_CAN_READ_UNALIGNED | |
4682 // Process the prefix of the input that requires no conversion one | |
4683 // (machine) word at a time. | |
4684 while (src <= limit - sizeof(uintptr_t)) { | |
antonm
2010/10/26 17:17:33
Maybe convert src to uintptr_t*? That would allow
Vitaly Repeshko
2010/10/26 18:14:48
Ahh, I'm really afraid of the compiler emitting di
| |
4685 uintptr_t w = *reinterpret_cast<uintptr_t*>(src); | |
4686 if (AsciiRangeMask(w, lo, hi) != 0) { | |
4687 changed = true; | |
4688 break; | |
4689 } | |
4690 *reinterpret_cast<uintptr_t*>(dst) = w; | |
4691 src += sizeof(uintptr_t); | |
4692 dst += sizeof(uintptr_t); | |
4693 } | |
4694 // Process the remainder of the input performing conversion when | |
4695 // required one word at a time. | |
4696 while (src <= limit - sizeof(uintptr_t)) { | |
4697 uintptr_t w = *reinterpret_cast<uintptr_t*>(src); | |
4698 uintptr_t m = AsciiRangeMask(w, lo, hi); | |
4699 // The mask has high (7th) bit set in every byte that needs | |
4700 // conversion and we know that the distance between cases is | |
4701 // 1 << 5. | |
4702 *reinterpret_cast<uintptr_t*>(dst) = w ^ (m >> 2); | |
4703 src += sizeof(uintptr_t); | |
4704 dst += sizeof(uintptr_t); | |
4705 } | |
4706 #endif | |
4707 // Process the last few bytes of the input (or the whole input if | |
4708 // unaligned accesses are not supported). | |
4709 while (src < limit) { | |
4710 char c = *src; | |
4711 if (lo < c && c < hi) { | |
4712 c ^= (1 << 5); | |
4713 changed = true; | |
4714 } | |
4715 *dst = c; | |
4716 ++src; | |
4717 ++dst; | |
4718 } | |
4719 #ifdef DEBUG | |
4720 CheckConvert(saved_dst, saved_src, length, changed); | |
4721 #endif | |
4722 return changed; | |
4723 } | |
4724 | |
4725 #ifdef DEBUG | |
4726 static void CheckConvert(char* dst, char* src, int length, bool changed) { | |
4727 bool expected_changed = false; | |
4728 for (int i = 0; i < length; i++) { | |
4729 if (dst[i] == src[i]) continue; | |
4730 expected_changed = true; | |
4731 if (dir == ASCII_TO_LOWER) { | |
4732 ASSERT('A' <= src[i] && src[i] <= 'Z'); | |
4733 ASSERT(dst[i] == src[i] + ('a' - 'A')); | |
4734 } else { | |
4735 ASSERT(dir == ASCII_TO_UPPER); | |
4736 ASSERT('a' <= src[i] && src[i] <= 'z'); | |
4737 ASSERT(dst[i] == src[i] - ('a' - 'A')); | |
4738 } | |
4739 } | |
4740 ASSERT(expected_changed == changed); | |
4741 } | |
4742 #endif | |
4743 }; | |
4744 | |
4745 | |
4639 struct ToLowerTraits { | 4746 struct ToLowerTraits { |
4640 typedef unibrow::ToLowercase UnibrowConverter; | 4747 typedef unibrow::ToLowercase UnibrowConverter; |
4641 | 4748 |
4642 static bool ConvertAscii(char* dst, char* src, int length) { | 4749 typedef FastAsciiConverter<ASCII_TO_LOWER> AsciiConverter; |
4643 bool changed = false; | |
4644 for (int i = 0; i < length; ++i) { | |
4645 char c = src[i]; | |
4646 if ('A' <= c && c <= 'Z') { | |
4647 c += ('a' - 'A'); | |
4648 changed = true; | |
4649 } | |
4650 dst[i] = c; | |
4651 } | |
4652 return changed; | |
4653 } | |
4654 }; | 4750 }; |
4655 | 4751 |
4656 | 4752 |
4657 struct ToUpperTraits { | 4753 struct ToUpperTraits { |
4658 typedef unibrow::ToUppercase UnibrowConverter; | 4754 typedef unibrow::ToUppercase UnibrowConverter; |
4659 | 4755 |
4660 static bool ConvertAscii(char* dst, char* src, int length) { | 4756 typedef FastAsciiConverter<ASCII_TO_UPPER> AsciiConverter; |
4661 bool changed = false; | |
4662 for (int i = 0; i < length; ++i) { | |
4663 char c = src[i]; | |
4664 if ('a' <= c && c <= 'z') { | |
4665 c -= ('a' - 'A'); | |
4666 changed = true; | |
4667 } | |
4668 dst[i] = c; | |
4669 } | |
4670 return changed; | |
4671 } | |
4672 }; | 4757 }; |
4673 | 4758 |
4674 } // namespace | 4759 } // namespace |
4675 | 4760 |
4676 | 4761 |
4677 template <typename ConvertTraits> | 4762 template <typename ConvertTraits> |
4678 static Object* ConvertCase( | 4763 static Object* ConvertCase( |
4679 Arguments args, | 4764 Arguments args, |
4680 unibrow::Mapping<typename ConvertTraits::UnibrowConverter, 128>* mapping) { | 4765 unibrow::Mapping<typename ConvertTraits::UnibrowConverter, 128>* mapping) { |
4681 NoHandleAllocation ha; | 4766 NoHandleAllocation ha; |
4682 CONVERT_CHECKED(String, s, args[0]); | 4767 CONVERT_CHECKED(String, s, args[0]); |
4683 s = s->TryFlattenGetString(); | 4768 s = s->TryFlattenGetString(); |
4684 | 4769 |
4685 const int length = s->length(); | 4770 const int length = s->length(); |
4686 // Assume that the string is not empty; we need this assumption later | 4771 // Assume that the string is not empty; we need this assumption later |
4687 if (length == 0) return s; | 4772 if (length == 0) return s; |
4688 | 4773 |
4689 // Simpler handling of ascii strings. | 4774 // Simpler handling of ascii strings. |
4690 // | 4775 // |
4691 // NOTE: This assumes that the upper/lower case of an ascii | 4776 // NOTE: This assumes that the upper/lower case of an ascii |
4692 // character is also ascii. This is currently the case, but it | 4777 // character is also ascii. This is currently the case, but it |
4693 // might break in the future if we implement more context and locale | 4778 // might break in the future if we implement more context and locale |
4694 // dependent upper/lower conversions. | 4779 // dependent upper/lower conversions. |
4695 if (s->IsSeqAsciiString()) { | 4780 if (s->IsSeqAsciiString()) { |
4696 Object* o = Heap::AllocateRawAsciiString(length); | 4781 Object* o = Heap::AllocateRawAsciiString(length); |
4697 if (o->IsFailure()) return o; | 4782 if (o->IsFailure()) return o; |
4698 SeqAsciiString* result = SeqAsciiString::cast(o); | 4783 SeqAsciiString* result = SeqAsciiString::cast(o); |
4699 bool has_changed_character = ConvertTraits::ConvertAscii( | 4784 bool has_changed_character = ConvertTraits::AsciiConverter::Convert( |
4700 result->GetChars(), SeqAsciiString::cast(s)->GetChars(), length); | 4785 result->GetChars(), SeqAsciiString::cast(s)->GetChars(), length); |
4701 return has_changed_character ? result : s; | 4786 return has_changed_character ? result : s; |
4702 } | 4787 } |
4703 | 4788 |
4704 Object* answer = ConvertCaseHelper(s, length, length, mapping); | 4789 Object* answer = ConvertCaseHelper(s, length, length, mapping); |
4705 if (answer->IsSmi()) { | 4790 if (answer->IsSmi()) { |
4706 // Retry with correct length. | 4791 // Retry with correct length. |
4707 answer = ConvertCaseHelper(s, Smi::cast(answer)->value(), length, mapping); | 4792 answer = ConvertCaseHelper(s, Smi::cast(answer)->value(), length, mapping); |
4708 } | 4793 } |
4709 return answer; // This may be a failure. | 4794 return answer; // This may be a failure. |
(...skipping 5440 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
10150 } else { | 10235 } else { |
10151 // Handle last resort GC and make sure to allow future allocations | 10236 // Handle last resort GC and make sure to allow future allocations |
10152 // to grow the heap without causing GCs (if possible). | 10237 // to grow the heap without causing GCs (if possible). |
10153 Counters::gc_last_resort_from_js.Increment(); | 10238 Counters::gc_last_resort_from_js.Increment(); |
10154 Heap::CollectAllGarbage(false); | 10239 Heap::CollectAllGarbage(false); |
10155 } | 10240 } |
10156 } | 10241 } |
10157 | 10242 |
10158 | 10243 |
10159 } } // namespace v8::internal | 10244 } } // namespace v8::internal |
OLD | NEW |