Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(433)

Side by Side Diff: src/jsregexp.cc

Issue 20258: * Issue 227 Fix. (Closed)
Patch Set: Created 11 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | test/mjsunit/regress/regress-227.js » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 2209 matching lines...) Expand 10 before | Expand all | Expand 10 after
2220 char_mask = String::kMaxUC16CharCode; 2220 char_mask = String::kMaxUC16CharCode;
2221 char_shift = 16; 2221 char_shift = 16;
2222 } 2222 }
2223 for (int k = 0; k < elms_->length(); k++) { 2223 for (int k = 0; k < elms_->length(); k++) {
2224 TextElement elm = elms_->at(k); 2224 TextElement elm = elms_->at(k);
2225 if (elm.type == TextElement::ATOM) { 2225 if (elm.type == TextElement::ATOM) {
2226 Vector<const uc16> quarks = elm.data.u_atom->data(); 2226 Vector<const uc16> quarks = elm.data.u_atom->data();
2227 for (int i = 0; i < characters && i < quarks.length(); i++) { 2227 for (int i = 0; i < characters && i < quarks.length(); i++) {
2228 QuickCheckDetails::Position* pos = 2228 QuickCheckDetails::Position* pos =
2229 details->positions(characters_filled_in); 2229 details->positions(characters_filled_in);
2230 uc16 c = quarks[i];
2231 if (c > char_mask) {
2232 // If we expect a non-ASCII character from an ASCII string,
2233 // there is no way we can match. Not even case independent
2234 // matching can turn an ASCII character into non-ASCII or
2235 // vice versa.
2236 details->set_cannot_match();
2237 return;
2238 }
2230 if (compiler->ignore_case()) { 2239 if (compiler->ignore_case()) {
2231 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 2240 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
2232 uc16 c = quarks[i];
2233 int length = uncanonicalize.get(c, '\0', chars); 2241 int length = uncanonicalize.get(c, '\0', chars);
2234 if (length < 2) { 2242 if (length < 2) {
2235 // This letter has no case equivalents, so it's nice and simple 2243 // This letter has no case equivalents, so it's nice and simple
2236 // and the mask-compare will determine definitely whether we have 2244 // and the mask-compare will determine definitely whether we have
2237 // a match at this character position. 2245 // a match at this character position.
2238 pos->mask = char_mask; 2246 pos->mask = char_mask;
2239 pos->value = c; 2247 pos->value = c;
2240 pos->determines_perfectly = true; 2248 pos->determines_perfectly = true;
2241 } else { 2249 } else {
2242 uint32_t common_bits = char_mask; 2250 uint32_t common_bits = char_mask;
(...skipping 12 matching lines...) Expand all
2255 pos->determines_perfectly = true; 2263 pos->determines_perfectly = true;
2256 } 2264 }
2257 pos->mask = common_bits; 2265 pos->mask = common_bits;
2258 pos->value = bits; 2266 pos->value = bits;
2259 } 2267 }
2260 } else { 2268 } else {
2261 // Don't ignore case. Nice simple case where the mask-compare will 2269 // Don't ignore case. Nice simple case where the mask-compare will
2262 // determine definitely whether we have a match at this character 2270 // determine definitely whether we have a match at this character
2263 // position. 2271 // position.
2264 pos->mask = char_mask; 2272 pos->mask = char_mask;
2265 pos->value = quarks[i]; 2273 pos->value = c;
2266 pos->determines_perfectly = true; 2274 pos->determines_perfectly = true;
2267 } 2275 }
2268 characters_filled_in++; 2276 characters_filled_in++;
2269 ASSERT(characters_filled_in <= details->characters()); 2277 ASSERT(characters_filled_in <= details->characters());
2270 if (characters_filled_in == details->characters()) { 2278 if (characters_filled_in == details->characters()) {
2271 return; 2279 return;
2272 } 2280 }
2273 } 2281 }
2274 } else { 2282 } else {
2275 QuickCheckDetails::Position* pos = 2283 QuickCheckDetails::Position* pos =
(...skipping 375 matching lines...) Expand 10 before | Expand all | Expand 10 after
2651 TextElement elm = elms_->at(i); 2659 TextElement elm = elms_->at(i);
2652 int cp_offset = trace->cp_offset() + elm.cp_offset; 2660 int cp_offset = trace->cp_offset() + elm.cp_offset;
2653 if (elm.type == TextElement::ATOM) { 2661 if (elm.type == TextElement::ATOM) {
2654 if (pass == NON_ASCII_MATCH || 2662 if (pass == NON_ASCII_MATCH ||
2655 pass == CHARACTER_MATCH || 2663 pass == CHARACTER_MATCH ||
2656 pass == CASE_CHARACTER_MATCH) { 2664 pass == CASE_CHARACTER_MATCH) {
2657 Vector<const uc16> quarks = elm.data.u_atom->data(); 2665 Vector<const uc16> quarks = elm.data.u_atom->data();
2658 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) { 2666 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {
2659 bool bound_checked = true; // Most ops will check their bounds. 2667 bool bound_checked = true; // Most ops will check their bounds.
2660 if (first_element_checked && i == 0 && j == 0) continue; 2668 if (first_element_checked && i == 0 && j == 0) continue;
2661 if (quick_check != NULL &&
2662 elm.cp_offset + j < quick_check->characters() &&
2663 quick_check->positions(elm.cp_offset + j)->determines_perfectly) {
2664 continue;
2665 }
2666 if (pass == NON_ASCII_MATCH) { 2669 if (pass == NON_ASCII_MATCH) {
2667 ASSERT(ascii); 2670 ASSERT(ascii);
2668 if (quarks[j] > String::kMaxAsciiCharCode) { 2671 if (quarks[j] > String::kMaxAsciiCharCode) {
2669 assembler->GoTo(backtrack); 2672 assembler->GoTo(backtrack);
2670 return; 2673 return;
2671 } 2674 }
2672 } else if (pass == CHARACTER_MATCH) { 2675 } else {
Christian Plesner Hansen 2009/02/11 11:46:54 This code is disgustingly complicated. Your code
2673 if (compiler->ignore_case()) { 2676 if (quick_check != NULL &&
2674 bound_checked = EmitAtomNonLetter(assembler, 2677 elm.cp_offset + j < quick_check->characters() &&
2675 quarks[j], 2678 quick_check->positions(elm.cp_offset + j)->
2676 backtrack, 2679 determines_perfectly) {
2677 cp_offset + j, 2680 continue;
2678 *checked_up_to < cp_offset + j, 2681 }
2679 preloaded); 2682 if (pass == CHARACTER_MATCH) {
2683 if (compiler->ignore_case()) {
2684 bound_checked = EmitAtomNonLetter(
2685 assembler,
2686 quarks[j],
2687 backtrack,
2688 cp_offset + j,
2689 *checked_up_to < cp_offset + j,
2690 preloaded);
2691 } else {
2692 if (!preloaded) {
2693 assembler->LoadCurrentCharacter(
2694 cp_offset + j,
2695 backtrack,
2696 *checked_up_to < cp_offset + j);
2697 }
2698 assembler->CheckNotCharacter(quarks[j], backtrack);
2699 }
2680 } else { 2700 } else {
2681 if (!preloaded) { 2701 ASSERT_EQ(pass, CASE_CHARACTER_MATCH);
2682 assembler->LoadCurrentCharacter(cp_offset + j, 2702 ASSERT(compiler->ignore_case());
2683 backtrack, 2703 bound_checked = EmitAtomLetter(assembler,
2684 *checked_up_to < cp_offset + j); 2704 compiler->ascii(),
2705 quarks[j],
2706 backtrack,
2707 cp_offset + j,
2708 *checked_up_to < cp_offset + j,
2709 preloaded);
2710 }
2711 if (bound_checked) {
2712 if (cp_offset + j > *checked_up_to) {
2713 *checked_up_to = cp_offset + j;
2685 } 2714 }
2686 assembler->CheckNotCharacter(quarks[j], backtrack);
2687 }
2688 } else {
2689 ASSERT_EQ(pass, CASE_CHARACTER_MATCH);
2690 ASSERT(compiler->ignore_case());
2691 bound_checked = EmitAtomLetter(assembler,
2692 compiler->ascii(),
2693 quarks[j],
2694 backtrack,
2695 cp_offset + j,
2696 *checked_up_to < cp_offset + j,
2697 preloaded);
2698 }
2699 if (pass != NON_ASCII_MATCH && bound_checked) {
2700 if (cp_offset + j > *checked_up_to) {
2701 *checked_up_to = cp_offset + j;
2702 } 2715 }
2703 } 2716 }
2704 } 2717 }
2705 } 2718 }
2706 } else { 2719 } else {
2707 ASSERT_EQ(elm.type, TextElement::CHAR_CLASS); 2720 ASSERT_EQ(elm.type, TextElement::CHAR_CLASS);
2708 if (first_element_checked && i == 0) continue; 2721 if (first_element_checked && i == 0) continue;
2709 if (quick_check != NULL && 2722 if (quick_check != NULL &&
2710 elm.cp_offset < quick_check->characters() && 2723 elm.cp_offset < quick_check->characters() &&
2711 quick_check->positions(elm.cp_offset)->determines_perfectly) { 2724 quick_check->positions(elm.cp_offset)->determines_perfectly) {
(...skipping 2155 matching lines...) Expand 10 before | Expand all | Expand 10 after
4867 EmbeddedVector<byte, 1024> codes; 4880 EmbeddedVector<byte, 1024> codes;
4868 RegExpMacroAssemblerIrregexp macro_assembler(codes); 4881 RegExpMacroAssemblerIrregexp macro_assembler(codes);
4869 return compiler.Assemble(&macro_assembler, 4882 return compiler.Assemble(&macro_assembler,
4870 node, 4883 node,
4871 data->capture_count, 4884 data->capture_count,
4872 pattern); 4885 pattern);
4873 } 4886 }
4874 4887
4875 4888
4876 }} // namespace v8::internal 4889 }} // namespace v8::internal
OLDNEW
« no previous file with comments | « no previous file | test/mjsunit/regress/regress-227.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698