Chromium Code Reviews

Side by Side Diff: src/jsregexp.cc

Issue 11759008: Introduce ENABLE_LATIN_1 compile flag (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: a bunch of sign conversions Created 7 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | | Annotate | Revision Log
« no previous file with comments | « src/json-parser.h ('k') | src/log.cc » ('j') | src/objects.h » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 1663 matching lines...)
1674 bool ascii_subject, 1674 bool ascii_subject,
1675 unibrow::uchar* letters) { 1675 unibrow::uchar* letters) {
1676 int length = 1676 int length =
1677 isolate->jsregexp_uncanonicalize()->get(character, '\0', letters); 1677 isolate->jsregexp_uncanonicalize()->get(character, '\0', letters);
1678 // Unibrow returns 0 or 1 for characters where case independence is 1678 // Unibrow returns 0 or 1 for characters where case independence is
1679 // trivial. 1679 // trivial.
1680 if (length == 0) { 1680 if (length == 0) {
1681 letters[0] = character; 1681 letters[0] = character;
1682 length = 1; 1682 length = 1;
1683 } 1683 }
1684 if (!ascii_subject || character <= String::kMaxAsciiCharCode) { 1684 if (!ascii_subject || character <= String::kMaxOneByteCharCode) {
1685 return length; 1685 return length;
1686 } 1686 }
1687 // The standard requires that non-ASCII characters cannot have ASCII 1687 // The standard requires that non-ASCII characters cannot have ASCII
1688 // character codes in their equivalence class. 1688 // character codes in their equivalence class.
1689 return 0; 1689 return 0;
1690 } 1690 }
1691 1691
1692 1692
1693 static inline bool EmitSimpleCharacter(Isolate* isolate, 1693 static inline bool EmitSimpleCharacter(Isolate* isolate,
1694 RegExpCompiler* compiler, 1694 RegExpCompiler* compiler,
(...skipping 30 matching lines...)
1725 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 1725 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1726 int length = GetCaseIndependentLetters(isolate, c, ascii, chars); 1726 int length = GetCaseIndependentLetters(isolate, c, ascii, chars);
1727 if (length < 1) { 1727 if (length < 1) {
1728 // This can't match. Must be an ASCII subject and a non-ASCII character. 1728 // This can't match. Must be an ASCII subject and a non-ASCII character.
1729 // We do not need to do anything since the ASCII pass already handled this. 1729 // We do not need to do anything since the ASCII pass already handled this.
1730 return false; // Bounds not checked. 1730 return false; // Bounds not checked.
1731 } 1731 }
1732 bool checked = false; 1732 bool checked = false;
1733 // We handle the length > 1 case in a later pass. 1733 // We handle the length > 1 case in a later pass.
1734 if (length == 1) { 1734 if (length == 1) {
1735 if (ascii && c > String::kMaxAsciiCharCodeU) { 1735 if (ascii && c > String::kMaxOneByteCharCodeU) {
1736 // Can't match - see above. 1736 // Can't match - see above.
1737 return false; // Bounds not checked. 1737 return false; // Bounds not checked.
1738 } 1738 }
1739 if (!preloaded) { 1739 if (!preloaded) {
1740 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); 1740 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
1741 checked = check; 1741 checked = check;
1742 } 1742 }
1743 macro_assembler->CheckNotCharacter(c, on_failure); 1743 macro_assembler->CheckNotCharacter(c, on_failure);
1744 } 1744 }
1745 return checked; 1745 return checked;
1746 } 1746 }
1747 1747
1748 1748
1749 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler, 1749 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,
1750 bool ascii, 1750 bool ascii,
1751 uc16 c1, 1751 uc16 c1,
1752 uc16 c2, 1752 uc16 c2,
1753 Label* on_failure) { 1753 Label* on_failure) {
1754 uc16 char_mask; 1754 uc16 char_mask;
1755 if (ascii) { 1755 if (ascii) {
1756 char_mask = String::kMaxAsciiCharCode; 1756 char_mask = String::kMaxOneByteCharCode;
1757 } else { 1757 } else {
1758 char_mask = String::kMaxUtf16CodeUnit; 1758 char_mask = String::kMaxUtf16CodeUnit;
1759 } 1759 }
1760 uc16 exor = c1 ^ c2; 1760 uc16 exor = c1 ^ c2;
1761 // Check whether exor has only one bit set. 1761 // Check whether exor has only one bit set.
1762 if (((exor - 1) & exor) == 0) { 1762 if (((exor - 1) & exor) == 0) {
1763 // If c1 and c2 differ only by one bit. 1763 // If c1 and c2 differ only by one bit.
1764 // Ecma262UnCanonicalize always gives the highest number last. 1764 // Ecma262UnCanonicalize always gives the highest number last.
1765 ASSERT(c2 > c1); 1765 ASSERT(c2 > c1);
1766 uc16 mask = char_mask ^ exor; 1766 uc16 mask = char_mask ^ exor;
(...skipping 233 matching lines...)
2000 // encoding space can be quickly tested with a table lookup, so we don't 2000 // encoding space can be quickly tested with a table lookup, so we don't
2001 // wish to do binary chop search at a smaller granularity than that. A 2001 // wish to do binary chop search at a smaller granularity than that. A
2002 // 128-character space can take up a lot of space in the ranges array if, 2002 // 128-character space can take up a lot of space in the ranges array if,
2003 // for example, we only want to match every second character (eg. the lower 2003 // for example, we only want to match every second character (eg. the lower
2004 // case characters on some Unicode pages). 2004 // case characters on some Unicode pages).
2005 int binary_chop_index = (end_index + start_index) / 2; 2005 int binary_chop_index = (end_index + start_index) / 2;
2006 // The first test ensures that we get to the code that handles the ASCII 2006 // The first test ensures that we get to the code that handles the ASCII
2007 // range with a single not-taken branch, speeding up this important 2007 // range with a single not-taken branch, speeding up this important
2008 // character range (even non-ASCII charset-based text has spaces and 2008 // character range (even non-ASCII charset-based text has spaces and
2009 // punctuation). 2009 // punctuation).
2010 if (*border - 1 > String::kMaxAsciiCharCode && // ASCII case. 2010 if (*border - 1 > String::kMaxOneByteCharCode && // ASCII case.
Yang 2013/01/07 16:15:07 Change comment.
2011 end_index - start_index > (*new_start_index - start_index) * 2 && 2011 end_index - start_index > (*new_start_index - start_index) * 2 &&
2012 last - first > kSize * 2 && 2012 last - first > kSize * 2 &&
2013 binary_chop_index > *new_start_index && 2013 binary_chop_index > *new_start_index &&
2014 ranges->at(binary_chop_index) >= first + 2 * kSize) { 2014 ranges->at(binary_chop_index) >= first + 2 * kSize) {
2015 int scan_forward_for_section_border = binary_chop_index;; 2015 int scan_forward_for_section_border = binary_chop_index;;
2016 int new_border = (ranges->at(binary_chop_index) | kMask) + 1; 2016 int new_border = (ranges->at(binary_chop_index) | kMask) + 1;
2017 2017
2018 while (scan_forward_for_section_border < end_index) { 2018 while (scan_forward_for_section_border < end_index) {
2019 if (ranges->at(scan_forward_for_section_border) > new_border) { 2019 if (ranges->at(scan_forward_for_section_border) > new_border) {
2020 *new_start_index = scan_forward_for_section_border; 2020 *new_start_index = scan_forward_for_section_border;
(...skipping 170 matching lines...)
2191 max_char, 2191 max_char,
2192 &dummy, 2192 &dummy,
2193 flip ? odd_label : even_label, 2193 flip ? odd_label : even_label,
2194 flip ? even_label : odd_label); 2194 flip ? even_label : odd_label);
2195 } 2195 }
2196 } 2196 }
2197 2197
2198 2198
2199 static void EmitCharClass(RegExpMacroAssembler* macro_assembler, 2199 static void EmitCharClass(RegExpMacroAssembler* macro_assembler,
2200 RegExpCharacterClass* cc, 2200 RegExpCharacterClass* cc,
2201 bool ascii, 2201 bool ascii,
Yang 2013/01/07 16:15:07 Changing the parameter name would make sense.
2202 Label* on_failure, 2202 Label* on_failure,
2203 int cp_offset, 2203 int cp_offset,
2204 bool check_offset, 2204 bool check_offset,
2205 bool preloaded, 2205 bool preloaded,
2206 Zone* zone) { 2206 Zone* zone) {
2207 ZoneList<CharacterRange>* ranges = cc->ranges(zone); 2207 ZoneList<CharacterRange>* ranges = cc->ranges(zone);
2208 if (!CharacterRange::IsCanonical(ranges)) { 2208 if (!CharacterRange::IsCanonical(ranges)) {
2209 CharacterRange::Canonicalize(ranges); 2209 CharacterRange::Canonicalize(ranges);
2210 } 2210 }
2211 2211
2212 int max_char; 2212 int max_char;
2213 if (ascii) { 2213 if (ascii) {
2214 max_char = String::kMaxAsciiCharCode; 2214 max_char = String::kMaxOneByteCharCode;
2215 } else { 2215 } else {
2216 max_char = String::kMaxUtf16CodeUnit; 2216 max_char = String::kMaxUtf16CodeUnit;
2217 } 2217 }
2218 2218
2219 int range_count = ranges->length(); 2219 int range_count = ranges->length();
2220 2220
2221 int last_valid_range = range_count - 1; 2221 int last_valid_range = range_count - 1;
2222 while (last_valid_range >= 0) { 2222 while (last_valid_range >= 0) {
2223 CharacterRange& range = ranges->at(last_valid_range); 2223 CharacterRange& range = ranges->at(last_valid_range);
2224 if (range.from() <= max_char) { 2224 if (range.from() <= max_char) {
(...skipping 277 matching lines...)
2502 static inline uint32_t SmearBitsRight(uint32_t v) { 2502 static inline uint32_t SmearBitsRight(uint32_t v) {
2503 v |= v >> 1; 2503 v |= v >> 1;
2504 v |= v >> 2; 2504 v |= v >> 2;
2505 v |= v >> 4; 2505 v |= v >> 4;
2506 v |= v >> 8; 2506 v |= v >> 8;
2507 v |= v >> 16; 2507 v |= v >> 16;
2508 return v; 2508 return v;
2509 } 2509 }
2510 2510
2511 2511
2512 bool QuickCheckDetails::Rationalize(bool asc) { 2512 bool QuickCheckDetails::Rationalize(bool asc) {
Yang 2013/01/07 16:15:07 Ditto (parameter name).
2513 bool found_useful_op = false; 2513 bool found_useful_op = false;
2514 uint32_t char_mask; 2514 uint32_t char_mask;
2515 if (asc) { 2515 if (asc) {
2516 char_mask = String::kMaxAsciiCharCode; 2516 char_mask = String::kMaxOneByteCharCode;
2517 } else { 2517 } else {
2518 char_mask = String::kMaxUtf16CodeUnit; 2518 char_mask = String::kMaxUtf16CodeUnit;
2519 } 2519 }
2520 mask_ = 0; 2520 mask_ = 0;
2521 value_ = 0; 2521 value_ = 0;
2522 int char_shift = 0; 2522 int char_shift = 0;
2523 for (int i = 0; i < characters_; i++) { 2523 for (int i = 0; i < characters_; i++) {
2524 Position* pos = &positions_[i]; 2524 Position* pos = &positions_[i];
2525 if ((pos->mask & String::kMaxAsciiCharCode) != 0) { 2525 if ((pos->mask & String::kMaxOneByteCharCode) != 0) {
2526 found_useful_op = true; 2526 found_useful_op = true;
2527 } 2527 }
2528 mask_ |= (pos->mask & char_mask) << char_shift; 2528 mask_ |= (pos->mask & char_mask) << char_shift;
2529 value_ |= (pos->value & char_mask) << char_shift; 2529 value_ |= (pos->value & char_mask) << char_shift;
2530 char_shift += asc ? 8 : 16; 2530 char_shift += asc ? 8 : 16;
2531 } 2531 }
2532 return found_useful_op; 2532 return found_useful_op;
2533 } 2533 }
2534 2534
2535 2535
(...skipping 21 matching lines...)
2557 details->characters()); 2557 details->characters());
2558 } 2558 }
2559 2559
2560 2560
2561 bool need_mask = true; 2561 bool need_mask = true;
2562 2562
2563 if (details->characters() == 1) { 2563 if (details->characters() == 1) {
2564 // If number of characters preloaded is 1 then we used a byte or 16 bit 2564 // If number of characters preloaded is 1 then we used a byte or 16 bit
2565 // load so the value is already masked down. 2565 // load so the value is already masked down.
2566 uint32_t char_mask; 2566 uint32_t char_mask;
2567 if (compiler->ascii()) { 2567 if (compiler->ascii()) {
Yang 2013/01/07 16:15:07 Maybe onebyte() instead of ascii()?
2568 char_mask = String::kMaxAsciiCharCode; 2568 char_mask = String::kMaxOneByteCharCode;
2569 } else { 2569 } else {
2570 char_mask = String::kMaxUtf16CodeUnit; 2570 char_mask = String::kMaxUtf16CodeUnit;
2571 } 2571 }
2572 if ((mask & char_mask) == char_mask) need_mask = false; 2572 if ((mask & char_mask) == char_mask) need_mask = false;
2573 mask &= char_mask; 2573 mask &= char_mask;
2574 } else { 2574 } else {
2575 // For 2-character preloads in ASCII mode or 1-character preloads in 2575 // For 2-character preloads in ASCII mode or 1-character preloads in
2576 // TWO_BYTE mode we also use a 16 bit load with zero extend. 2576 // TWO_BYTE mode we also use a 16 bit load with zero extend.
2577 if (details->characters() == 2 && compiler->ascii()) { 2577 if (details->characters() == 2 && compiler->ascii()) {
2578 if ((mask & 0x7f7f) == 0x7f7f) need_mask = false; 2578 if ((mask & 0xffff) == 0xffff) need_mask = false;
2579 } else if (details->characters() == 1 && !compiler->ascii()) { 2579 } else if (details->characters() == 1 && !compiler->ascii()) {
2580 if ((mask & 0xffff) == 0xffff) need_mask = false; 2580 if ((mask & 0xffff) == 0xffff) need_mask = false;
2581 } else { 2581 } else {
2582 if (mask == 0xffffffff) need_mask = false; 2582 if (mask == 0xffffffff) need_mask = false;
2583 } 2583 }
2584 } 2584 }
2585 2585
2586 if (fall_through_on_failure) { 2586 if (fall_through_on_failure) {
2587 if (need_mask) { 2587 if (need_mask) {
2588 assembler->CheckCharacterAfterAnd(value, mask, on_possible_success); 2588 assembler->CheckCharacterAfterAnd(value, mask, on_possible_success);
(...skipping 21 matching lines...)
2610 // generating a quick check. 2610 // generating a quick check.
2611 void TextNode::GetQuickCheckDetails(QuickCheckDetails* details, 2611 void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
2612 RegExpCompiler* compiler, 2612 RegExpCompiler* compiler,
2613 int characters_filled_in, 2613 int characters_filled_in,
2614 bool not_at_start) { 2614 bool not_at_start) {
2615 Isolate* isolate = Isolate::Current(); 2615 Isolate* isolate = Isolate::Current();
2616 ASSERT(characters_filled_in < details->characters()); 2616 ASSERT(characters_filled_in < details->characters());
2617 int characters = details->characters(); 2617 int characters = details->characters();
2618 int char_mask; 2618 int char_mask;
2619 if (compiler->ascii()) { 2619 if (compiler->ascii()) {
2620 char_mask = String::kMaxAsciiCharCode; 2620 char_mask = String::kMaxOneByteCharCode;
2621 } else { 2621 } else {
2622 char_mask = String::kMaxUtf16CodeUnit; 2622 char_mask = String::kMaxUtf16CodeUnit;
2623 } 2623 }
2624 for (int k = 0; k < elms_->length(); k++) { 2624 for (int k = 0; k < elms_->length(); k++) {
2625 TextElement elm = elms_->at(k); 2625 TextElement elm = elms_->at(k);
2626 if (elm.type == TextElement::ATOM) { 2626 if (elm.type == TextElement::ATOM) {
2627 Vector<const uc16> quarks = elm.data.u_atom->data(); 2627 Vector<const uc16> quarks = elm.data.u_atom->data();
2628 for (int i = 0; i < characters && i < quarks.length(); i++) { 2628 for (int i = 0; i < characters && i < quarks.length(); i++) {
2629 QuickCheckDetails::Position* pos = 2629 QuickCheckDetails::Position* pos =
2630 details->positions(characters_filled_in); 2630 details->positions(characters_filled_in);
(...skipping 227 matching lines...)
2858 VisitMarker marker(info()); 2858 VisitMarker marker(info());
2859 int element_count = elms_->length(); 2859 int element_count = elms_->length();
2860 for (int i = 0; i < element_count; i++) { 2860 for (int i = 0; i < element_count; i++) {
2861 TextElement elm = elms_->at(i); 2861 TextElement elm = elms_->at(i);
2862 if (elm.type == TextElement::ATOM) { 2862 if (elm.type == TextElement::ATOM) {
2863 Vector<const uc16> quarks = elm.data.u_atom->data(); 2863 Vector<const uc16> quarks = elm.data.u_atom->data();
2864 for (int j = 0; j < quarks.length(); j++) { 2864 for (int j = 0; j < quarks.length(); j++) {
2865 // We don't need special handling for case independence 2865 // We don't need special handling for case independence
2866 // because of the rule that case independence cannot make 2866 // because of the rule that case independence cannot make
2867 // a non-ASCII character match an ASCII character. 2867 // a non-ASCII character match an ASCII character.
2868 if (quarks[j] > String::kMaxAsciiCharCode) { 2868 if (quarks[j] > String::kMaxOneByteCharCode) {
Yang 2013/01/07 16:15:07 Does the comment still hold true for the Latin1 ch
2869 return set_replacement(NULL); 2869 return set_replacement(NULL);
2870 } 2870 }
2871 } 2871 }
2872 } else { 2872 } else {
2873 ASSERT(elm.type == TextElement::CHAR_CLASS); 2873 ASSERT(elm.type == TextElement::CHAR_CLASS);
2874 RegExpCharacterClass* cc = elm.data.u_char_class; 2874 RegExpCharacterClass* cc = elm.data.u_char_class;
2875 ZoneList<CharacterRange>* ranges = cc->ranges(zone()); 2875 ZoneList<CharacterRange>* ranges = cc->ranges(zone());
2876 if (!CharacterRange::IsCanonical(ranges)) { 2876 if (!CharacterRange::IsCanonical(ranges)) {
2877 CharacterRange::Canonicalize(ranges); 2877 CharacterRange::Canonicalize(ranges);
2878 } 2878 }
2879 // Now they are in order so we only need to look at the first. 2879 // Now they are in order so we only need to look at the first.
2880 int range_count = ranges->length(); 2880 int range_count = ranges->length();
2881 if (cc->is_negated()) { 2881 if (cc->is_negated()) {
2882 if (range_count != 0 && 2882 if (range_count != 0 &&
2883 ranges->at(0).from() == 0 && 2883 ranges->at(0).from() == 0 &&
2884 ranges->at(0).to() >= String::kMaxAsciiCharCode) { 2884 ranges->at(0).to() >= String::kMaxOneByteCharCode) {
2885 return set_replacement(NULL); 2885 return set_replacement(NULL);
2886 } 2886 }
2887 } else { 2887 } else {
2888 if (range_count == 0 || 2888 if (range_count == 0 ||
2889 ranges->at(0).from() > String::kMaxAsciiCharCode) { 2889 ranges->at(0).from() > String::kMaxOneByteCharCode) {
2890 return set_replacement(NULL); 2890 return set_replacement(NULL);
2891 } 2891 }
2892 } 2892 }
2893 } 2893 }
2894 } 2894 }
2895 return FilterSuccessor(depth - 1); 2895 return FilterSuccessor(depth - 1);
2896 } 2896 }
2897 2897
2898 2898
2899 RegExpNode* LoopChoiceNode::FilterASCII(int depth) { 2899 RegExpNode* LoopChoiceNode::FilterASCII(int depth) {
(...skipping 392 matching lines...)
3292 int cp_offset = trace->cp_offset() + elm.cp_offset; 3292 int cp_offset = trace->cp_offset() + elm.cp_offset;
3293 if (elm.type == TextElement::ATOM) { 3293 if (elm.type == TextElement::ATOM) {
3294 Vector<const uc16> quarks = elm.data.u_atom->data(); 3294 Vector<const uc16> quarks = elm.data.u_atom->data();
3295 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) { 3295 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {
3296 if (first_element_checked && i == 0 && j == 0) continue; 3296 if (first_element_checked && i == 0 && j == 0) continue;
3297 if (DeterminedAlready(quick_check, elm.cp_offset + j)) continue; 3297 if (DeterminedAlready(quick_check, elm.cp_offset + j)) continue;
3298 EmitCharacterFunction* emit_function = NULL; 3298 EmitCharacterFunction* emit_function = NULL;
3299 switch (pass) { 3299 switch (pass) {
3300 case NON_ASCII_MATCH: 3300 case NON_ASCII_MATCH:
3301 ASSERT(ascii); 3301 ASSERT(ascii);
3302 if (quarks[j] > String::kMaxAsciiCharCode) { 3302 if (quarks[j] > String::kMaxOneByteCharCode) {
3303 assembler->GoTo(backtrack); 3303 assembler->GoTo(backtrack);
3304 return; 3304 return;
3305 } 3305 }
3306 break; 3306 break;
3307 case NON_LETTER_CHARACTER_MATCH: 3307 case NON_LETTER_CHARACTER_MATCH:
3308 emit_function = &EmitAtomNonLetter; 3308 emit_function = &EmitAtomNonLetter;
3309 break; 3309 break;
3310 case SIMPLE_CHARACTER_MATCH: 3310 case SIMPLE_CHARACTER_MATCH:
3311 emit_function = &EmitSimpleCharacter; 3311 emit_function = &EmitSimpleCharacter;
3312 break; 3312 break;
(...skipping 178 matching lines...)
3491 ZoneList<CharacterRange>* ranges = node->ranges(zone()); 3491 ZoneList<CharacterRange>* ranges = node->ranges(zone());
3492 if (!CharacterRange::IsCanonical(ranges)) { 3492 if (!CharacterRange::IsCanonical(ranges)) {
3493 CharacterRange::Canonicalize(ranges); 3493 CharacterRange::Canonicalize(ranges);
3494 } 3494 }
3495 if (node->is_negated()) { 3495 if (node->is_negated()) {
3496 return ranges->length() == 0 ? on_success() : NULL; 3496 return ranges->length() == 0 ? on_success() : NULL;
3497 } 3497 }
3498 if (ranges->length() != 1) return NULL; 3498 if (ranges->length() != 1) return NULL;
3499 uint32_t max_char; 3499 uint32_t max_char;
3500 if (compiler->ascii()) { 3500 if (compiler->ascii()) {
3501 max_char = String::kMaxAsciiCharCode; 3501 max_char = String::kMaxOneByteCharCode;
3502 } else { 3502 } else {
3503 max_char = String::kMaxUtf16CodeUnit; 3503 max_char = String::kMaxUtf16CodeUnit;
3504 } 3504 }
3505 return ranges->at(0).IsEverything(max_char) ? on_success() : NULL; 3505 return ranges->at(0).IsEverything(max_char) ? on_success() : NULL;
3506 } 3506 }
3507 3507
3508 3508
3509 // Finds the fixed match length of a sequence of nodes that goes from 3509 // Finds the fixed match length of a sequence of nodes that goes from
3510 // this alternative and back to this choice node. If there are variable 3510 // this alternative and back to this choice node. If there are variable
3511 // length nodes or other complications in the way then return a sentinel 3511 // length nodes or other complications in the way then return a sentinel
(...skipping 179 matching lines...)
3691 for (int i = 0; i < kMapSize; i++) map_->at(i) = true; 3691 for (int i = 0; i < kMapSize; i++) map_->at(i) = true;
3692 } 3692 }
3693 } 3693 }
3694 3694
3695 3695
3696 BoyerMooreLookahead::BoyerMooreLookahead( 3696 BoyerMooreLookahead::BoyerMooreLookahead(
3697 int length, RegExpCompiler* compiler, Zone* zone) 3697 int length, RegExpCompiler* compiler, Zone* zone)
3698 : length_(length), 3698 : length_(length),
3699 compiler_(compiler) { 3699 compiler_(compiler) {
3700 if (compiler->ascii()) { 3700 if (compiler->ascii()) {
3701 max_char_ = String::kMaxAsciiCharCode; 3701 max_char_ = String::kMaxOneByteCharCode;
3702 } else { 3702 } else {
3703 max_char_ = String::kMaxUtf16CodeUnit; 3703 max_char_ = String::kMaxUtf16CodeUnit;
3704 } 3704 }
3705 bitmaps_ = new(zone) ZoneList<BoyerMoorePositionInfo*>(length, zone); 3705 bitmaps_ = new(zone) ZoneList<BoyerMoorePositionInfo*>(length, zone);
3706 for (int i = 0; i < length; i++) { 3706 for (int i = 0; i < length; i++) {
3707 bitmaps_->Add(new(zone) BoyerMoorePositionInfo(zone), zone); 3707 bitmaps_->Add(new(zone) BoyerMoorePositionInfo(zone), zone);
3708 } 3708 }
3709 } 3709 }
3710 3710
3711 3711
(...skipping 1618 matching lines...)
5330 } 5330 }
5331 5331
5332 5332
5333 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges, 5333 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges,
5334 bool is_ascii, 5334 bool is_ascii,
5335 Zone* zone) { 5335 Zone* zone) {
5336 Isolate* isolate = Isolate::Current(); 5336 Isolate* isolate = Isolate::Current();
5337 uc16 bottom = from(); 5337 uc16 bottom = from();
5338 uc16 top = to(); 5338 uc16 top = to();
5339 if (is_ascii) { 5339 if (is_ascii) {
5340 if (bottom > String::kMaxAsciiCharCode) return; 5340 if (bottom > String::kMaxOneByteCharCode) return;
5341 if (top > String::kMaxAsciiCharCode) top = String::kMaxAsciiCharCode; 5341 if (top > String::kMaxOneByteCharCode) top = String::kMaxOneByteCharCode;
5342 } 5342 }
5343 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 5343 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
5344 if (top == bottom) { 5344 if (top == bottom) {
5345 // If this is a singleton we just expand the one character. 5345 // If this is a singleton we just expand the one character.
5346 int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars); 5346 int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars);
5347 for (int i = 0; i < length; i++) { 5347 for (int i = 0; i < length; i++) {
5348 uc32 chr = chars[i]; 5348 uc32 chr = chars[i];
5349 if (chr != bottom) { 5349 if (chr != bottom) {
5350 ranges->Add(CharacterRange::Singleton(chars[i]), zone); 5350 ranges->Add(CharacterRange::Singleton(chars[i]), zone);
5351 } 5351 }
(...skipping 526 matching lines...)
5878 if (offset >= bm->length()) { 5878 if (offset >= bm->length()) {
5879 if (initial_offset == 0) set_bm_info(not_at_start, bm); 5879 if (initial_offset == 0) set_bm_info(not_at_start, bm);
5880 return; 5880 return;
5881 } 5881 }
5882 uc16 character = atom->data()[j]; 5882 uc16 character = atom->data()[j];
5883 if (bm->compiler()->ignore_case()) { 5883 if (bm->compiler()->ignore_case()) {
5884 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 5884 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
5885 int length = GetCaseIndependentLetters( 5885 int length = GetCaseIndependentLetters(
5886 ISOLATE, 5886 ISOLATE,
5887 character, 5887 character,
5888 bm->max_char() == String::kMaxAsciiCharCode, 5888 bm->max_char() == String::kMaxOneByteCharCode,
5889 chars); 5889 chars);
5890 for (int j = 0; j < length; j++) { 5890 for (int j = 0; j < length; j++) {
5891 bm->Set(offset, chars[j]); 5891 bm->Set(offset, chars[j]);
5892 } 5892 }
5893 } else { 5893 } else {
5894 if (character <= max_char) bm->Set(offset, character); 5894 if (character <= max_char) bm->Set(offset, character);
5895 } 5895 }
5896 } 5896 }
5897 } else { 5897 } else {
5898 ASSERT(text.type == TextElement::CHAR_CLASS); 5898 ASSERT(text.type == TextElement::CHAR_CLASS);
(...skipping 260 matching lines...)
6159 } 6159 }
6160 6160
6161 return compiler.Assemble(&macro_assembler, 6161 return compiler.Assemble(&macro_assembler,
6162 node, 6162 node,
6163 data->capture_count, 6163 data->capture_count,
6164 pattern); 6164 pattern);
6165 } 6165 }
6166 6166
6167 6167
6168 }} // namespace v8::internal 6168 }} // namespace v8::internal
OLDNEW
« no previous file with comments | « src/json-parser.h ('k') | src/log.cc » ('j') | src/objects.h » ('J')

Powered by Google App Engine