Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 2209 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2220 char_mask = String::kMaxUC16CharCode; | 2220 char_mask = String::kMaxUC16CharCode; |
| 2221 char_shift = 16; | 2221 char_shift = 16; |
| 2222 } | 2222 } |
| 2223 for (int k = 0; k < elms_->length(); k++) { | 2223 for (int k = 0; k < elms_->length(); k++) { |
| 2224 TextElement elm = elms_->at(k); | 2224 TextElement elm = elms_->at(k); |
| 2225 if (elm.type == TextElement::ATOM) { | 2225 if (elm.type == TextElement::ATOM) { |
| 2226 Vector<const uc16> quarks = elm.data.u_atom->data(); | 2226 Vector<const uc16> quarks = elm.data.u_atom->data(); |
| 2227 for (int i = 0; i < characters && i < quarks.length(); i++) { | 2227 for (int i = 0; i < characters && i < quarks.length(); i++) { |
| 2228 QuickCheckDetails::Position* pos = | 2228 QuickCheckDetails::Position* pos = |
| 2229 details->positions(characters_filled_in); | 2229 details->positions(characters_filled_in); |
| 2230 uc16 c = quarks[i]; | |
| 2231 if (c > char_mask) { | |
| 2232 // If we expect a non-ASCII character from an ASCII string, | |
| 2233 // there is no way we can match. Not even case independent | |
| 2234 // matching can turn an ASCII character into non-ASCII or | |
| 2235 // vice versa. | |
| 2236 details->set_cannot_match(); | |
| 2237 return; | |
| 2238 } | |
| 2230 if (compiler->ignore_case()) { | 2239 if (compiler->ignore_case()) { |
| 2231 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 2240 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
| 2232 uc16 c = quarks[i]; | |
| 2233 int length = uncanonicalize.get(c, '\0', chars); | 2241 int length = uncanonicalize.get(c, '\0', chars); |
| 2234 if (length < 2) { | 2242 if (length < 2) { |
| 2235 // This letter has no case equivalents, so it's nice and simple | 2243 // This letter has no case equivalents, so it's nice and simple |
| 2236 // and the mask-compare will determine definitely whether we have | 2244 // and the mask-compare will determine definitely whether we have |
| 2237 // a match at this character position. | 2245 // a match at this character position. |
| 2238 pos->mask = char_mask; | 2246 pos->mask = char_mask; |
| 2239 pos->value = c; | 2247 pos->value = c; |
| 2240 pos->determines_perfectly = true; | 2248 pos->determines_perfectly = true; |
| 2241 } else { | 2249 } else { |
| 2242 uint32_t common_bits = char_mask; | 2250 uint32_t common_bits = char_mask; |
| (...skipping 12 matching lines...) Expand all Loading... | |
| 2255 pos->determines_perfectly = true; | 2263 pos->determines_perfectly = true; |
| 2256 } | 2264 } |
| 2257 pos->mask = common_bits; | 2265 pos->mask = common_bits; |
| 2258 pos->value = bits; | 2266 pos->value = bits; |
| 2259 } | 2267 } |
| 2260 } else { | 2268 } else { |
| 2261 // Don't ignore case. Nice simple case where the mask-compare will | 2269 // Don't ignore case. Nice simple case where the mask-compare will |
| 2262 // determine definitely whether we have a match at this character | 2270 // determine definitely whether we have a match at this character |
| 2263 // position. | 2271 // position. |
| 2264 pos->mask = char_mask; | 2272 pos->mask = char_mask; |
| 2265 pos->value = quarks[i]; | 2273 pos->value = c; |
| 2266 pos->determines_perfectly = true; | 2274 pos->determines_perfectly = true; |
| 2267 } | 2275 } |
| 2268 characters_filled_in++; | 2276 characters_filled_in++; |
| 2269 ASSERT(characters_filled_in <= details->characters()); | 2277 ASSERT(characters_filled_in <= details->characters()); |
| 2270 if (characters_filled_in == details->characters()) { | 2278 if (characters_filled_in == details->characters()) { |
| 2271 return; | 2279 return; |
| 2272 } | 2280 } |
| 2273 } | 2281 } |
| 2274 } else { | 2282 } else { |
| 2275 QuickCheckDetails::Position* pos = | 2283 QuickCheckDetails::Position* pos = |
| (...skipping 375 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2651 TextElement elm = elms_->at(i); | 2659 TextElement elm = elms_->at(i); |
| 2652 int cp_offset = trace->cp_offset() + elm.cp_offset; | 2660 int cp_offset = trace->cp_offset() + elm.cp_offset; |
| 2653 if (elm.type == TextElement::ATOM) { | 2661 if (elm.type == TextElement::ATOM) { |
| 2654 if (pass == NON_ASCII_MATCH || | 2662 if (pass == NON_ASCII_MATCH || |
| 2655 pass == CHARACTER_MATCH || | 2663 pass == CHARACTER_MATCH || |
| 2656 pass == CASE_CHARACTER_MATCH) { | 2664 pass == CASE_CHARACTER_MATCH) { |
| 2657 Vector<const uc16> quarks = elm.data.u_atom->data(); | 2665 Vector<const uc16> quarks = elm.data.u_atom->data(); |
| 2658 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) { | 2666 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) { |
| 2659 bool bound_checked = true; // Most ops will check their bounds. | 2667 bool bound_checked = true; // Most ops will check their bounds. |
| 2660 if (first_element_checked && i == 0 && j == 0) continue; | 2668 if (first_element_checked && i == 0 && j == 0) continue; |
| 2661 if (quick_check != NULL && | |
| 2662 elm.cp_offset + j < quick_check->characters() && | |
| 2663 quick_check->positions(elm.cp_offset + j)->determines_perfectly) { | |
| 2664 continue; | |
| 2665 } | |
| 2666 if (pass == NON_ASCII_MATCH) { | 2669 if (pass == NON_ASCII_MATCH) { |
| 2667 ASSERT(ascii); | 2670 ASSERT(ascii); |
| 2668 if (quarks[j] > String::kMaxAsciiCharCode) { | 2671 if (quarks[j] > String::kMaxAsciiCharCode) { |
| 2669 assembler->GoTo(backtrack); | 2672 assembler->GoTo(backtrack); |
| 2670 return; | 2673 return; |
| 2671 } | 2674 } |
| 2672 } else if (pass == CHARACTER_MATCH) { | 2675 } else { |
|
Christian Plesner Hansen
2009/02/11 11:46:54
This code is disgustingly complicated. Your code
| |
| 2673 if (compiler->ignore_case()) { | 2676 if (quick_check != NULL && |
| 2674 bound_checked = EmitAtomNonLetter(assembler, | 2677 elm.cp_offset + j < quick_check->characters() && |
| 2675 quarks[j], | 2678 quick_check->positions(elm.cp_offset + j)-> |
| 2676 backtrack, | 2679 determines_perfectly) { |
| 2677 cp_offset + j, | 2680 continue; |
| 2678 *checked_up_to < cp_offset + j, | 2681 } |
| 2679 preloaded); | 2682 if (pass == CHARACTER_MATCH) { |
| 2683 if (compiler->ignore_case()) { | |
| 2684 bound_checked = EmitAtomNonLetter( | |
| 2685 assembler, | |
| 2686 quarks[j], | |
| 2687 backtrack, | |
| 2688 cp_offset + j, | |
| 2689 *checked_up_to < cp_offset + j, | |
| 2690 preloaded); | |
| 2691 } else { | |
| 2692 if (!preloaded) { | |
| 2693 assembler->LoadCurrentCharacter( | |
| 2694 cp_offset + j, | |
| 2695 backtrack, | |
| 2696 *checked_up_to < cp_offset + j); | |
| 2697 } | |
| 2698 assembler->CheckNotCharacter(quarks[j], backtrack); | |
| 2699 } | |
| 2680 } else { | 2700 } else { |
| 2681 if (!preloaded) { | 2701 ASSERT_EQ(pass, CASE_CHARACTER_MATCH); |
| 2682 assembler->LoadCurrentCharacter(cp_offset + j, | 2702 ASSERT(compiler->ignore_case()); |
| 2683 backtrack, | 2703 bound_checked = EmitAtomLetter(assembler, |
| 2684 *checked_up_to < cp_offset + j); | 2704 compiler->ascii(), |
| 2705 quarks[j], | |
| 2706 backtrack, | |
| 2707 cp_offset + j, | |
| 2708 *checked_up_to < cp_offset + j, | |
| 2709 preloaded); | |
| 2710 } | |
| 2711 if (bound_checked) { | |
| 2712 if (cp_offset + j > *checked_up_to) { | |
| 2713 *checked_up_to = cp_offset + j; | |
| 2685 } | 2714 } |
| 2686 assembler->CheckNotCharacter(quarks[j], backtrack); | |
| 2687 } | |
| 2688 } else { | |
| 2689 ASSERT_EQ(pass, CASE_CHARACTER_MATCH); | |
| 2690 ASSERT(compiler->ignore_case()); | |
| 2691 bound_checked = EmitAtomLetter(assembler, | |
| 2692 compiler->ascii(), | |
| 2693 quarks[j], | |
| 2694 backtrack, | |
| 2695 cp_offset + j, | |
| 2696 *checked_up_to < cp_offset + j, | |
| 2697 preloaded); | |
| 2698 } | |
| 2699 if (pass != NON_ASCII_MATCH && bound_checked) { | |
| 2700 if (cp_offset + j > *checked_up_to) { | |
| 2701 *checked_up_to = cp_offset + j; | |
| 2702 } | 2715 } |
| 2703 } | 2716 } |
| 2704 } | 2717 } |
| 2705 } | 2718 } |
| 2706 } else { | 2719 } else { |
| 2707 ASSERT_EQ(elm.type, TextElement::CHAR_CLASS); | 2720 ASSERT_EQ(elm.type, TextElement::CHAR_CLASS); |
| 2708 if (first_element_checked && i == 0) continue; | 2721 if (first_element_checked && i == 0) continue; |
| 2709 if (quick_check != NULL && | 2722 if (quick_check != NULL && |
| 2710 elm.cp_offset < quick_check->characters() && | 2723 elm.cp_offset < quick_check->characters() && |
| 2711 quick_check->positions(elm.cp_offset)->determines_perfectly) { | 2724 quick_check->positions(elm.cp_offset)->determines_perfectly) { |
| (...skipping 2155 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4867 EmbeddedVector<byte, 1024> codes; | 4880 EmbeddedVector<byte, 1024> codes; |
| 4868 RegExpMacroAssemblerIrregexp macro_assembler(codes); | 4881 RegExpMacroAssemblerIrregexp macro_assembler(codes); |
| 4869 return compiler.Assemble(¯o_assembler, | 4882 return compiler.Assemble(¯o_assembler, |
| 4870 node, | 4883 node, |
| 4871 data->capture_count, | 4884 data->capture_count, |
| 4872 pattern); | 4885 pattern); |
| 4873 } | 4886 } |
| 4874 | 4887 |
| 4875 | 4888 |
| 4876 }} // namespace v8::internal | 4889 }} // namespace v8::internal |
| OLD | NEW |