| OLD | NEW |
| 1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 915 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 926 } | 926 } |
| 927 | 927 |
| 928 | 928 |
| 929 void RegExpText::AppendToText(RegExpText* text, Zone* zone) { | 929 void RegExpText::AppendToText(RegExpText* text, Zone* zone) { |
| 930 for (int i = 0; i < elements()->length(); i++) | 930 for (int i = 0; i < elements()->length(); i++) |
| 931 text->AddElement(elements()->at(i), zone); | 931 text->AddElement(elements()->at(i), zone); |
| 932 } | 932 } |
| 933 | 933 |
| 934 | 934 |
| 935 TextElement TextElement::Atom(RegExpAtom* atom) { | 935 TextElement TextElement::Atom(RegExpAtom* atom) { |
| 936 TextElement result = TextElement(ATOM); | 936 return TextElement(ATOM, atom); |
| 937 result.data.u_atom = atom; | |
| 938 return result; | |
| 939 } | 937 } |
| 940 | 938 |
| 941 | 939 |
| 942 TextElement TextElement::CharClass( | 940 TextElement TextElement::CharClass(RegExpCharacterClass* char_class) { |
| 943 RegExpCharacterClass* char_class) { | 941 return TextElement(CHAR_CLASS, char_class); |
| 944 TextElement result = TextElement(CHAR_CLASS); | |
| 945 result.data.u_char_class = char_class; | |
| 946 return result; | |
| 947 } | 942 } |
| 948 | 943 |
| 949 | 944 |
| 950 int TextElement::length() { | 945 int TextElement::length() const { |
| 951 if (text_type == ATOM) { | 946 switch (text_type()) { |
| 952 return data.u_atom->length(); | 947 case ATOM: |
| 953 } else { | 948 return atom()->length(); |
| 954 ASSERT(text_type == CHAR_CLASS); | 949 |
| 955 return 1; | 950 case CHAR_CLASS: |
| 951 return 1; |
| 956 } | 952 } |
| 953 UNREACHABLE(); |
| 954 return 0; |
| 957 } | 955 } |
| 958 | 956 |
| 959 | 957 |
| 960 DispatchTable* ChoiceNode::GetTable(bool ignore_case) { | 958 DispatchTable* ChoiceNode::GetTable(bool ignore_case) { |
| 961 if (table_ == NULL) { | 959 if (table_ == NULL) { |
| 962 table_ = new(zone()) DispatchTable(zone()); | 960 table_ = new(zone()) DispatchTable(zone()); |
| 963 DispatchTableConstructor cons(table_, ignore_case, zone()); | 961 DispatchTableConstructor cons(table_, ignore_case, zone()); |
| 964 cons.BuildTable(this); | 962 cons.BuildTable(this); |
| 965 } | 963 } |
| 966 return table_; | 964 return table_; |
| (...skipping 1587 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2554 ASSERT(characters_filled_in < details->characters()); | 2552 ASSERT(characters_filled_in < details->characters()); |
| 2555 int characters = details->characters(); | 2553 int characters = details->characters(); |
| 2556 int char_mask; | 2554 int char_mask; |
| 2557 if (compiler->ascii()) { | 2555 if (compiler->ascii()) { |
| 2558 char_mask = String::kMaxOneByteCharCode; | 2556 char_mask = String::kMaxOneByteCharCode; |
| 2559 } else { | 2557 } else { |
| 2560 char_mask = String::kMaxUtf16CodeUnit; | 2558 char_mask = String::kMaxUtf16CodeUnit; |
| 2561 } | 2559 } |
| 2562 for (int k = 0; k < elms_->length(); k++) { | 2560 for (int k = 0; k < elms_->length(); k++) { |
| 2563 TextElement elm = elms_->at(k); | 2561 TextElement elm = elms_->at(k); |
| 2564 if (elm.text_type == TextElement::ATOM) { | 2562 if (elm.text_type() == TextElement::ATOM) { |
| 2565 Vector<const uc16> quarks = elm.data.u_atom->data(); | 2563 Vector<const uc16> quarks = elm.atom()->data(); |
| 2566 for (int i = 0; i < characters && i < quarks.length(); i++) { | 2564 for (int i = 0; i < characters && i < quarks.length(); i++) { |
| 2567 QuickCheckDetails::Position* pos = | 2565 QuickCheckDetails::Position* pos = |
| 2568 details->positions(characters_filled_in); | 2566 details->positions(characters_filled_in); |
| 2569 uc16 c = quarks[i]; | 2567 uc16 c = quarks[i]; |
| 2570 if (c > char_mask) { | 2568 if (c > char_mask) { |
| 2571 // If we expect a non-ASCII character from an ASCII string, | 2569 // If we expect a non-ASCII character from an ASCII string, |
| 2572 // there is no way we can match. Not even case independent | 2570 // there is no way we can match. Not even case independent |
| 2573 // matching can turn an ASCII character into non-ASCII or | 2571 // matching can turn an ASCII character into non-ASCII or |
| 2574 // vice versa. | 2572 // vice versa. |
| 2575 details->set_cannot_match(); | 2573 details->set_cannot_match(); |
| (...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2617 } | 2615 } |
| 2618 characters_filled_in++; | 2616 characters_filled_in++; |
| 2619 ASSERT(characters_filled_in <= details->characters()); | 2617 ASSERT(characters_filled_in <= details->characters()); |
| 2620 if (characters_filled_in == details->characters()) { | 2618 if (characters_filled_in == details->characters()) { |
| 2621 return; | 2619 return; |
| 2622 } | 2620 } |
| 2623 } | 2621 } |
| 2624 } else { | 2622 } else { |
| 2625 QuickCheckDetails::Position* pos = | 2623 QuickCheckDetails::Position* pos = |
| 2626 details->positions(characters_filled_in); | 2624 details->positions(characters_filled_in); |
| 2627 RegExpCharacterClass* tree = elm.data.u_char_class; | 2625 RegExpCharacterClass* tree = elm.char_class(); |
| 2628 ZoneList<CharacterRange>* ranges = tree->ranges(zone()); | 2626 ZoneList<CharacterRange>* ranges = tree->ranges(zone()); |
| 2629 if (tree->is_negated()) { | 2627 if (tree->is_negated()) { |
| 2630 // A quick check uses multi-character mask and compare. There is no | 2628 // A quick check uses multi-character mask and compare. There is no |
| 2631 // useful way to incorporate a negative char class into this scheme | 2629 // useful way to incorporate a negative char class into this scheme |
| 2632 // so we just conservatively create a mask and value that will always | 2630 // so we just conservatively create a mask and value that will always |
| 2633 // succeed. | 2631 // succeed. |
| 2634 pos->mask = 0; | 2632 pos->mask = 0; |
| 2635 pos->value = 0; | 2633 pos->value = 0; |
| 2636 } else { | 2634 } else { |
| 2637 int first_range = 0; | 2635 int first_range = 0; |
| (...skipping 169 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2807 | 2805 |
| 2808 | 2806 |
| 2809 RegExpNode* TextNode::FilterASCII(int depth, bool ignore_case) { | 2807 RegExpNode* TextNode::FilterASCII(int depth, bool ignore_case) { |
| 2810 if (info()->replacement_calculated) return replacement(); | 2808 if (info()->replacement_calculated) return replacement(); |
| 2811 if (depth < 0) return this; | 2809 if (depth < 0) return this; |
| 2812 ASSERT(!info()->visited); | 2810 ASSERT(!info()->visited); |
| 2813 VisitMarker marker(info()); | 2811 VisitMarker marker(info()); |
| 2814 int element_count = elms_->length(); | 2812 int element_count = elms_->length(); |
| 2815 for (int i = 0; i < element_count; i++) { | 2813 for (int i = 0; i < element_count; i++) { |
| 2816 TextElement elm = elms_->at(i); | 2814 TextElement elm = elms_->at(i); |
| 2817 if (elm.text_type == TextElement::ATOM) { | 2815 if (elm.text_type() == TextElement::ATOM) { |
| 2818 Vector<const uc16> quarks = elm.data.u_atom->data(); | 2816 Vector<const uc16> quarks = elm.atom()->data(); |
| 2819 for (int j = 0; j < quarks.length(); j++) { | 2817 for (int j = 0; j < quarks.length(); j++) { |
| 2820 uint16_t c = quarks[j]; | 2818 uint16_t c = quarks[j]; |
| 2821 if (c <= String::kMaxOneByteCharCode) continue; | 2819 if (c <= String::kMaxOneByteCharCode) continue; |
| 2822 if (!ignore_case) return set_replacement(NULL); | 2820 if (!ignore_case) return set_replacement(NULL); |
| 2823 // Here, we need to check for characters whose upper and lower cases | 2821 // Here, we need to check for characters whose upper and lower cases |
| 2824 // are outside the Latin-1 range. | 2822 // are outside the Latin-1 range. |
| 2825 uint16_t converted = unibrow::Latin1::ConvertNonLatin1ToLatin1(c); | 2823 uint16_t converted = unibrow::Latin1::ConvertNonLatin1ToLatin1(c); |
| 2826 // Character is outside Latin-1 completely | 2824 // Character is outside Latin-1 completely |
| 2827 if (converted == 0) return set_replacement(NULL); | 2825 if (converted == 0) return set_replacement(NULL); |
| 2828 // Convert quark to Latin-1 in place. | 2826 // Convert quark to Latin-1 in place. |
| 2829 uint16_t* copy = const_cast<uint16_t*>(quarks.start()); | 2827 uint16_t* copy = const_cast<uint16_t*>(quarks.start()); |
| 2830 copy[j] = converted; | 2828 copy[j] = converted; |
| 2831 } | 2829 } |
| 2832 } else { | 2830 } else { |
| 2833 ASSERT(elm.text_type == TextElement::CHAR_CLASS); | 2831 ASSERT(elm.text_type() == TextElement::CHAR_CLASS); |
| 2834 RegExpCharacterClass* cc = elm.data.u_char_class; | 2832 RegExpCharacterClass* cc = elm.char_class(); |
| 2835 ZoneList<CharacterRange>* ranges = cc->ranges(zone()); | 2833 ZoneList<CharacterRange>* ranges = cc->ranges(zone()); |
| 2836 if (!CharacterRange::IsCanonical(ranges)) { | 2834 if (!CharacterRange::IsCanonical(ranges)) { |
| 2837 CharacterRange::Canonicalize(ranges); | 2835 CharacterRange::Canonicalize(ranges); |
| 2838 } | 2836 } |
| 2839 // Now they are in order so we only need to look at the first. | 2837 // Now they are in order so we only need to look at the first. |
| 2840 int range_count = ranges->length(); | 2838 int range_count = ranges->length(); |
| 2841 if (cc->is_negated()) { | 2839 if (cc->is_negated()) { |
| 2842 if (range_count != 0 && | 2840 if (range_count != 0 && |
| 2843 ranges->at(0).from() == 0 && | 2841 ranges->at(0).from() == 0 && |
| 2844 ranges->at(0).to() >= String::kMaxOneByteCharCode) { | 2842 ranges->at(0).to() >= String::kMaxOneByteCharCode) { |
| (...skipping 404 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3249 bool first_element_checked, | 3247 bool first_element_checked, |
| 3250 int* checked_up_to) { | 3248 int* checked_up_to) { |
| 3251 Isolate* isolate = Isolate::Current(); | 3249 Isolate* isolate = Isolate::Current(); |
| 3252 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 3250 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
| 3253 bool ascii = compiler->ascii(); | 3251 bool ascii = compiler->ascii(); |
| 3254 Label* backtrack = trace->backtrack(); | 3252 Label* backtrack = trace->backtrack(); |
| 3255 QuickCheckDetails* quick_check = trace->quick_check_performed(); | 3253 QuickCheckDetails* quick_check = trace->quick_check_performed(); |
| 3256 int element_count = elms_->length(); | 3254 int element_count = elms_->length(); |
| 3257 for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) { | 3255 for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) { |
| 3258 TextElement elm = elms_->at(i); | 3256 TextElement elm = elms_->at(i); |
| 3259 int cp_offset = trace->cp_offset() + elm.cp_offset; | 3257 int cp_offset = trace->cp_offset() + elm.cp_offset(); |
| 3260 if (elm.text_type == TextElement::ATOM) { | 3258 if (elm.text_type() == TextElement::ATOM) { |
| 3261 Vector<const uc16> quarks = elm.data.u_atom->data(); | 3259 Vector<const uc16> quarks = elm.atom()->data(); |
| 3262 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) { | 3260 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) { |
| 3263 if (first_element_checked && i == 0 && j == 0) continue; | 3261 if (first_element_checked && i == 0 && j == 0) continue; |
| 3264 if (DeterminedAlready(quick_check, elm.cp_offset + j)) continue; | 3262 if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue; |
| 3265 EmitCharacterFunction* emit_function = NULL; | 3263 EmitCharacterFunction* emit_function = NULL; |
| 3266 switch (pass) { | 3264 switch (pass) { |
| 3267 case NON_ASCII_MATCH: | 3265 case NON_ASCII_MATCH: |
| 3268 ASSERT(ascii); | 3266 ASSERT(ascii); |
| 3269 if (quarks[j] > String::kMaxOneByteCharCode) { | 3267 if (quarks[j] > String::kMaxOneByteCharCode) { |
| 3270 assembler->GoTo(backtrack); | 3268 assembler->GoTo(backtrack); |
| 3271 return; | 3269 return; |
| 3272 } | 3270 } |
| 3273 break; | 3271 break; |
| 3274 case NON_LETTER_CHARACTER_MATCH: | 3272 case NON_LETTER_CHARACTER_MATCH: |
| (...skipping 13 matching lines...) Expand all Loading... |
| 3288 compiler, | 3286 compiler, |
| 3289 quarks[j], | 3287 quarks[j], |
| 3290 backtrack, | 3288 backtrack, |
| 3291 cp_offset + j, | 3289 cp_offset + j, |
| 3292 *checked_up_to < cp_offset + j, | 3290 *checked_up_to < cp_offset + j, |
| 3293 preloaded); | 3291 preloaded); |
| 3294 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to); | 3292 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to); |
| 3295 } | 3293 } |
| 3296 } | 3294 } |
| 3297 } else { | 3295 } else { |
| 3298 ASSERT_EQ(elm.text_type, TextElement::CHAR_CLASS); | 3296 ASSERT_EQ(TextElement::CHAR_CLASS, elm.text_type()); |
| 3299 if (pass == CHARACTER_CLASS_MATCH) { | 3297 if (pass == CHARACTER_CLASS_MATCH) { |
| 3300 if (first_element_checked && i == 0) continue; | 3298 if (first_element_checked && i == 0) continue; |
| 3301 if (DeterminedAlready(quick_check, elm.cp_offset)) continue; | 3299 if (DeterminedAlready(quick_check, elm.cp_offset())) continue; |
| 3302 RegExpCharacterClass* cc = elm.data.u_char_class; | 3300 RegExpCharacterClass* cc = elm.char_class(); |
| 3303 EmitCharClass(assembler, | 3301 EmitCharClass(assembler, |
| 3304 cc, | 3302 cc, |
| 3305 ascii, | 3303 ascii, |
| 3306 backtrack, | 3304 backtrack, |
| 3307 cp_offset, | 3305 cp_offset, |
| 3308 *checked_up_to < cp_offset, | 3306 *checked_up_to < cp_offset, |
| 3309 preloaded, | 3307 preloaded, |
| 3310 zone()); | 3308 zone()); |
| 3311 UpdateBoundsCheck(cp_offset, checked_up_to); | 3309 UpdateBoundsCheck(cp_offset, checked_up_to); |
| 3312 } | 3310 } |
| 3313 } | 3311 } |
| 3314 } | 3312 } |
| 3315 } | 3313 } |
| 3316 | 3314 |
| 3317 | 3315 |
| 3318 int TextNode::Length() { | 3316 int TextNode::Length() { |
| 3319 TextElement elm = elms_->last(); | 3317 TextElement elm = elms_->last(); |
| 3320 ASSERT(elm.cp_offset >= 0); | 3318 ASSERT(elm.cp_offset() >= 0); |
| 3321 if (elm.text_type == TextElement::ATOM) { | 3319 return elm.cp_offset() + elm.length(); |
| 3322 return elm.cp_offset + elm.data.u_atom->data().length(); | |
| 3323 } else { | |
| 3324 return elm.cp_offset + 1; | |
| 3325 } | |
| 3326 } | 3320 } |
| 3327 | 3321 |
| 3328 | 3322 |
| 3329 bool TextNode::SkipPass(int int_pass, bool ignore_case) { | 3323 bool TextNode::SkipPass(int int_pass, bool ignore_case) { |
| 3330 TextEmitPassType pass = static_cast<TextEmitPassType>(int_pass); | 3324 TextEmitPassType pass = static_cast<TextEmitPassType>(int_pass); |
| 3331 if (ignore_case) { | 3325 if (ignore_case) { |
| 3332 return pass == SIMPLE_CHARACTER_MATCH; | 3326 return pass == SIMPLE_CHARACTER_MATCH; |
| 3333 } else { | 3327 } else { |
| 3334 return pass == NON_LETTER_CHARACTER_MATCH || pass == CASE_CHARACTER_MATCH; | 3328 return pass == NON_LETTER_CHARACTER_MATCH || pass == CASE_CHARACTER_MATCH; |
| 3335 } | 3329 } |
| (...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3417 cp_offset_ = 0; | 3411 cp_offset_ = 0; |
| 3418 } | 3412 } |
| 3419 bound_checked_up_to_ = Max(0, bound_checked_up_to_ - by); | 3413 bound_checked_up_to_ = Max(0, bound_checked_up_to_ - by); |
| 3420 } | 3414 } |
| 3421 | 3415 |
| 3422 | 3416 |
| 3423 void TextNode::MakeCaseIndependent(bool is_ascii) { | 3417 void TextNode::MakeCaseIndependent(bool is_ascii) { |
| 3424 int element_count = elms_->length(); | 3418 int element_count = elms_->length(); |
| 3425 for (int i = 0; i < element_count; i++) { | 3419 for (int i = 0; i < element_count; i++) { |
| 3426 TextElement elm = elms_->at(i); | 3420 TextElement elm = elms_->at(i); |
| 3427 if (elm.text_type == TextElement::CHAR_CLASS) { | 3421 if (elm.text_type() == TextElement::CHAR_CLASS) { |
| 3428 RegExpCharacterClass* cc = elm.data.u_char_class; | 3422 RegExpCharacterClass* cc = elm.char_class(); |
| 3429 // None of the standard character classes is different in the case | 3423 // None of the standard character classes is different in the case |
| 3430 // independent case and it slows us down if we don't know that. | 3424 // independent case and it slows us down if we don't know that. |
| 3431 if (cc->is_standard(zone())) continue; | 3425 if (cc->is_standard(zone())) continue; |
| 3432 ZoneList<CharacterRange>* ranges = cc->ranges(zone()); | 3426 ZoneList<CharacterRange>* ranges = cc->ranges(zone()); |
| 3433 int range_count = ranges->length(); | 3427 int range_count = ranges->length(); |
| 3434 for (int j = 0; j < range_count; j++) { | 3428 for (int j = 0; j < range_count; j++) { |
| 3435 ranges->at(j).AddCaseEquivalents(ranges, is_ascii, zone()); | 3429 ranges->at(j).AddCaseEquivalents(ranges, is_ascii, zone()); |
| 3436 } | 3430 } |
| 3437 } | 3431 } |
| 3438 } | 3432 } |
| 3439 } | 3433 } |
| 3440 | 3434 |
| 3441 | 3435 |
| 3442 int TextNode::GreedyLoopTextLength() { | 3436 int TextNode::GreedyLoopTextLength() { |
| 3443 TextElement elm = elms_->at(elms_->length() - 1); | 3437 TextElement elm = elms_->at(elms_->length() - 1); |
| 3444 if (elm.text_type == TextElement::CHAR_CLASS) { | 3438 return elm.cp_offset() + elm.length(); |
| 3445 return elm.cp_offset + 1; | |
| 3446 } else { | |
| 3447 return elm.cp_offset + elm.data.u_atom->data().length(); | |
| 3448 } | |
| 3449 } | 3439 } |
| 3450 | 3440 |
| 3451 | 3441 |
| 3452 RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode( | 3442 RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode( |
| 3453 RegExpCompiler* compiler) { | 3443 RegExpCompiler* compiler) { |
| 3454 if (elms_->length() != 1) return NULL; | 3444 if (elms_->length() != 1) return NULL; |
| 3455 TextElement elm = elms_->at(0); | 3445 TextElement elm = elms_->at(0); |
| 3456 if (elm.text_type != TextElement::CHAR_CLASS) return NULL; | 3446 if (elm.text_type() != TextElement::CHAR_CLASS) return NULL; |
| 3457 RegExpCharacterClass* node = elm.data.u_char_class; | 3447 RegExpCharacterClass* node = elm.char_class(); |
| 3458 ZoneList<CharacterRange>* ranges = node->ranges(zone()); | 3448 ZoneList<CharacterRange>* ranges = node->ranges(zone()); |
| 3459 if (!CharacterRange::IsCanonical(ranges)) { | 3449 if (!CharacterRange::IsCanonical(ranges)) { |
| 3460 CharacterRange::Canonicalize(ranges); | 3450 CharacterRange::Canonicalize(ranges); |
| 3461 } | 3451 } |
| 3462 if (node->is_negated()) { | 3452 if (node->is_negated()) { |
| 3463 return ranges->length() == 0 ? on_success() : NULL; | 3453 return ranges->length() == 0 ? on_success() : NULL; |
| 3464 } | 3454 } |
| 3465 if (ranges->length() != 1) return NULL; | 3455 if (ranges->length() != 1) return NULL; |
| 3466 uint32_t max_char; | 3456 uint32_t max_char; |
| 3467 if (compiler->ascii()) { | 3457 if (compiler->ascii()) { |
| (...skipping 1053 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4521 } | 4511 } |
| 4522 } | 4512 } |
| 4523 | 4513 |
| 4524 | 4514 |
| 4525 void DotPrinter::VisitText(TextNode* that) { | 4515 void DotPrinter::VisitText(TextNode* that) { |
| 4526 Zone* zone = that->zone(); | 4516 Zone* zone = that->zone(); |
| 4527 stream()->Add(" n%p [label=\"", that); | 4517 stream()->Add(" n%p [label=\"", that); |
| 4528 for (int i = 0; i < that->elements()->length(); i++) { | 4518 for (int i = 0; i < that->elements()->length(); i++) { |
| 4529 if (i > 0) stream()->Add(" "); | 4519 if (i > 0) stream()->Add(" "); |
| 4530 TextElement elm = that->elements()->at(i); | 4520 TextElement elm = that->elements()->at(i); |
| 4531 switch (elm.text_type) { | 4521 switch (elm.text_type()) { |
| 4532 case TextElement::ATOM: { | 4522 case TextElement::ATOM: { |
| 4533 stream()->Add("'%w'", elm.data.u_atom->data()); | 4523 stream()->Add("'%w'", elm.atom()->data()); |
| 4534 break; | 4524 break; |
| 4535 } | 4525 } |
| 4536 case TextElement::CHAR_CLASS: { | 4526 case TextElement::CHAR_CLASS: { |
| 4537 RegExpCharacterClass* node = elm.data.u_char_class; | 4527 RegExpCharacterClass* node = elm.char_class(); |
| 4538 stream()->Add("["); | 4528 stream()->Add("["); |
| 4539 if (node->is_negated()) | 4529 if (node->is_negated()) |
| 4540 stream()->Add("^"); | 4530 stream()->Add("^"); |
| 4541 for (int j = 0; j < node->ranges(zone)->length(); j++) { | 4531 for (int j = 0; j < node->ranges(zone)->length(); j++) { |
| 4542 CharacterRange range = node->ranges(zone)->at(j); | 4532 CharacterRange range = node->ranges(zone)->at(j); |
| 4543 stream()->Add("%k-%k", range.from(), range.to()); | 4533 stream()->Add("%k-%k", range.from(), range.to()); |
| 4544 } | 4534 } |
| 4545 stream()->Add("]"); | 4535 stream()->Add("]"); |
| 4546 break; | 4536 break; |
| 4547 } | 4537 } |
| (...skipping 1161 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5709 } | 5699 } |
| 5710 | 5700 |
| 5711 | 5701 |
| 5712 void TextNode::CalculateOffsets() { | 5702 void TextNode::CalculateOffsets() { |
| 5713 int element_count = elements()->length(); | 5703 int element_count = elements()->length(); |
| 5714 // Set up the offsets of the elements relative to the start. This is a fixed | 5704 // Set up the offsets of the elements relative to the start. This is a fixed |
| 5715 // quantity since a TextNode can only contain fixed-width things. | 5705 // quantity since a TextNode can only contain fixed-width things. |
| 5716 int cp_offset = 0; | 5706 int cp_offset = 0; |
| 5717 for (int i = 0; i < element_count; i++) { | 5707 for (int i = 0; i < element_count; i++) { |
| 5718 TextElement& elm = elements()->at(i); | 5708 TextElement& elm = elements()->at(i); |
| 5719 elm.cp_offset = cp_offset; | 5709 elm.set_cp_offset(cp_offset); |
| 5720 if (elm.text_type == TextElement::ATOM) { | 5710 cp_offset += elm.length(); |
| 5721 cp_offset += elm.data.u_atom->data().length(); | |
| 5722 } else { | |
| 5723 cp_offset++; | |
| 5724 } | |
| 5725 } | 5711 } |
| 5726 } | 5712 } |
| 5727 | 5713 |
| 5728 | 5714 |
| 5729 void Analysis::VisitText(TextNode* that) { | 5715 void Analysis::VisitText(TextNode* that) { |
| 5730 if (ignore_case_) { | 5716 if (ignore_case_) { |
| 5731 that->MakeCaseIndependent(is_ascii_); | 5717 that->MakeCaseIndependent(is_ascii_); |
| 5732 } | 5718 } |
| 5733 EnsureAnalyzed(that->on_success()); | 5719 EnsureAnalyzed(that->on_success()); |
| 5734 if (!has_failed()) { | 5720 if (!has_failed()) { |
| (...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5830 bool not_at_start) { | 5816 bool not_at_start) { |
| 5831 if (initial_offset >= bm->length()) return; | 5817 if (initial_offset >= bm->length()) return; |
| 5832 int offset = initial_offset; | 5818 int offset = initial_offset; |
| 5833 int max_char = bm->max_char(); | 5819 int max_char = bm->max_char(); |
| 5834 for (int i = 0; i < elements()->length(); i++) { | 5820 for (int i = 0; i < elements()->length(); i++) { |
| 5835 if (offset >= bm->length()) { | 5821 if (offset >= bm->length()) { |
| 5836 if (initial_offset == 0) set_bm_info(not_at_start, bm); | 5822 if (initial_offset == 0) set_bm_info(not_at_start, bm); |
| 5837 return; | 5823 return; |
| 5838 } | 5824 } |
| 5839 TextElement text = elements()->at(i); | 5825 TextElement text = elements()->at(i); |
| 5840 if (text.text_type == TextElement::ATOM) { | 5826 if (text.text_type() == TextElement::ATOM) { |
| 5841 RegExpAtom* atom = text.data.u_atom; | 5827 RegExpAtom* atom = text.atom(); |
| 5842 for (int j = 0; j < atom->length(); j++, offset++) { | 5828 for (int j = 0; j < atom->length(); j++, offset++) { |
| 5843 if (offset >= bm->length()) { | 5829 if (offset >= bm->length()) { |
| 5844 if (initial_offset == 0) set_bm_info(not_at_start, bm); | 5830 if (initial_offset == 0) set_bm_info(not_at_start, bm); |
| 5845 return; | 5831 return; |
| 5846 } | 5832 } |
| 5847 uc16 character = atom->data()[j]; | 5833 uc16 character = atom->data()[j]; |
| 5848 if (bm->compiler()->ignore_case()) { | 5834 if (bm->compiler()->ignore_case()) { |
| 5849 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 5835 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
| 5850 int length = GetCaseIndependentLetters( | 5836 int length = GetCaseIndependentLetters( |
| 5851 ISOLATE, | 5837 ISOLATE, |
| 5852 character, | 5838 character, |
| 5853 bm->max_char() == String::kMaxOneByteCharCode, | 5839 bm->max_char() == String::kMaxOneByteCharCode, |
| 5854 chars); | 5840 chars); |
| 5855 for (int j = 0; j < length; j++) { | 5841 for (int j = 0; j < length; j++) { |
| 5856 bm->Set(offset, chars[j]); | 5842 bm->Set(offset, chars[j]); |
| 5857 } | 5843 } |
| 5858 } else { | 5844 } else { |
| 5859 if (character <= max_char) bm->Set(offset, character); | 5845 if (character <= max_char) bm->Set(offset, character); |
| 5860 } | 5846 } |
| 5861 } | 5847 } |
| 5862 } else { | 5848 } else { |
| 5863 ASSERT(text.text_type == TextElement::CHAR_CLASS); | 5849 ASSERT_EQ(TextElement::CHAR_CLASS, text.text_type()); |
| 5864 RegExpCharacterClass* char_class = text.data.u_char_class; | 5850 RegExpCharacterClass* char_class = text.char_class(); |
| 5865 ZoneList<CharacterRange>* ranges = char_class->ranges(zone()); | 5851 ZoneList<CharacterRange>* ranges = char_class->ranges(zone()); |
| 5866 if (char_class->is_negated()) { | 5852 if (char_class->is_negated()) { |
| 5867 bm->SetAll(offset); | 5853 bm->SetAll(offset); |
| 5868 } else { | 5854 } else { |
| 5869 for (int k = 0; k < ranges->length(); k++) { | 5855 for (int k = 0; k < ranges->length(); k++) { |
| 5870 CharacterRange& range = ranges->at(k); | 5856 CharacterRange& range = ranges->at(k); |
| 5871 if (range.from() > max_char) continue; | 5857 if (range.from() > max_char) continue; |
| 5872 int to = Min(max_char, static_cast<int>(range.to())); | 5858 int to = Min(max_char, static_cast<int>(range.to())); |
| 5873 bm->SetInterval(offset, Interval(range.from(), to)); | 5859 bm->SetInterval(offset, Interval(range.from(), to)); |
| 5874 } | 5860 } |
| (...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5966 last = range.to() + 1; | 5952 last = range.to() + 1; |
| 5967 } | 5953 } |
| 5968 } | 5954 } |
| 5969 } | 5955 } |
| 5970 AddRange(CharacterRange(last, String::kMaxUtf16CodeUnit)); | 5956 AddRange(CharacterRange(last, String::kMaxUtf16CodeUnit)); |
| 5971 } | 5957 } |
| 5972 | 5958 |
| 5973 | 5959 |
| 5974 void DispatchTableConstructor::VisitText(TextNode* that) { | 5960 void DispatchTableConstructor::VisitText(TextNode* that) { |
| 5975 TextElement elm = that->elements()->at(0); | 5961 TextElement elm = that->elements()->at(0); |
| 5976 switch (elm.text_type) { | 5962 switch (elm.text_type()) { |
| 5977 case TextElement::ATOM: { | 5963 case TextElement::ATOM: { |
| 5978 uc16 c = elm.data.u_atom->data()[0]; | 5964 uc16 c = elm.atom()->data()[0]; |
| 5979 AddRange(CharacterRange(c, c)); | 5965 AddRange(CharacterRange(c, c)); |
| 5980 break; | 5966 break; |
| 5981 } | 5967 } |
| 5982 case TextElement::CHAR_CLASS: { | 5968 case TextElement::CHAR_CLASS: { |
| 5983 RegExpCharacterClass* tree = elm.data.u_char_class; | 5969 RegExpCharacterClass* tree = elm.char_class(); |
| 5984 ZoneList<CharacterRange>* ranges = tree->ranges(that->zone()); | 5970 ZoneList<CharacterRange>* ranges = tree->ranges(that->zone()); |
| 5985 if (tree->is_negated()) { | 5971 if (tree->is_negated()) { |
| 5986 AddInverse(ranges); | 5972 AddInverse(ranges); |
| 5987 } else { | 5973 } else { |
| 5988 for (int i = 0; i < ranges->length(); i++) | 5974 for (int i = 0; i < ranges->length(); i++) |
| 5989 AddRange(ranges->at(i)); | 5975 AddRange(ranges->at(i)); |
| 5990 } | 5976 } |
| 5991 break; | 5977 break; |
| 5992 } | 5978 } |
| 5993 default: { | 5979 default: { |
| (...skipping 131 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6125 } | 6111 } |
| 6126 | 6112 |
| 6127 return compiler.Assemble(¯o_assembler, | 6113 return compiler.Assemble(¯o_assembler, |
| 6128 node, | 6114 node, |
| 6129 data->capture_count, | 6115 data->capture_count, |
| 6130 pattern); | 6116 pattern); |
| 6131 } | 6117 } |
| 6132 | 6118 |
| 6133 | 6119 |
| 6134 }} // namespace v8::internal | 6120 }} // namespace v8::internal |
| OLD | NEW |