| OLD | NEW |
| 1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 917 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 928 } | 928 } |
| 929 | 929 |
| 930 | 930 |
| 931 void RegExpText::AppendToText(RegExpText* text, Zone* zone) { | 931 void RegExpText::AppendToText(RegExpText* text, Zone* zone) { |
| 932 for (int i = 0; i < elements()->length(); i++) | 932 for (int i = 0; i < elements()->length(); i++) |
| 933 text->AddElement(elements()->at(i), zone); | 933 text->AddElement(elements()->at(i), zone); |
| 934 } | 934 } |
| 935 | 935 |
| 936 | 936 |
| 937 TextElement TextElement::Atom(RegExpAtom* atom) { | 937 TextElement TextElement::Atom(RegExpAtom* atom) { |
| 938 TextElement result = TextElement(ATOM); | 938 return TextElement(ATOM, atom); |
| 939 result.data.u_atom = atom; | |
| 940 return result; | |
| 941 } | 939 } |
| 942 | 940 |
| 943 | 941 |
| 944 TextElement TextElement::CharClass( | 942 TextElement TextElement::CharClass(RegExpCharacterClass* char_class) { |
| 945 RegExpCharacterClass* char_class) { | 943 return TextElement(CHAR_CLASS, char_class); |
| 946 TextElement result = TextElement(CHAR_CLASS); | |
| 947 result.data.u_char_class = char_class; | |
| 948 return result; | |
| 949 } | 944 } |
| 950 | 945 |
| 951 | 946 |
| 952 int TextElement::length() { | 947 int TextElement::length() const { |
| 953 if (text_type == ATOM) { | 948 switch (text_type()) { |
| 954 return data.u_atom->length(); | 949 case ATOM: |
| 955 } else { | 950 return atom()->length(); |
| 956 ASSERT(text_type == CHAR_CLASS); | 951 |
| 957 return 1; | 952 case CHAR_CLASS: |
| 953 return 1; |
| 958 } | 954 } |
| 955 UNREACHABLE(); |
| 956 return 0; |
| 959 } | 957 } |
| 960 | 958 |
| 961 | 959 |
| 962 DispatchTable* ChoiceNode::GetTable(bool ignore_case) { | 960 DispatchTable* ChoiceNode::GetTable(bool ignore_case) { |
| 963 if (table_ == NULL) { | 961 if (table_ == NULL) { |
| 964 table_ = new(zone()) DispatchTable(zone()); | 962 table_ = new(zone()) DispatchTable(zone()); |
| 965 DispatchTableConstructor cons(table_, ignore_case, zone()); | 963 DispatchTableConstructor cons(table_, ignore_case, zone()); |
| 966 cons.BuildTable(this); | 964 cons.BuildTable(this); |
| 967 } | 965 } |
| 968 return table_; | 966 return table_; |
| (...skipping 1587 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2556 ASSERT(characters_filled_in < details->characters()); | 2554 ASSERT(characters_filled_in < details->characters()); |
| 2557 int characters = details->characters(); | 2555 int characters = details->characters(); |
| 2558 int char_mask; | 2556 int char_mask; |
| 2559 if (compiler->ascii()) { | 2557 if (compiler->ascii()) { |
| 2560 char_mask = String::kMaxOneByteCharCode; | 2558 char_mask = String::kMaxOneByteCharCode; |
| 2561 } else { | 2559 } else { |
| 2562 char_mask = String::kMaxUtf16CodeUnit; | 2560 char_mask = String::kMaxUtf16CodeUnit; |
| 2563 } | 2561 } |
| 2564 for (int k = 0; k < elms_->length(); k++) { | 2562 for (int k = 0; k < elms_->length(); k++) { |
| 2565 TextElement elm = elms_->at(k); | 2563 TextElement elm = elms_->at(k); |
| 2566 if (elm.text_type == TextElement::ATOM) { | 2564 if (elm.text_type() == TextElement::ATOM) { |
| 2567 Vector<const uc16> quarks = elm.data.u_atom->data(); | 2565 Vector<const uc16> quarks = elm.atom()->data(); |
| 2568 for (int i = 0; i < characters && i < quarks.length(); i++) { | 2566 for (int i = 0; i < characters && i < quarks.length(); i++) { |
| 2569 QuickCheckDetails::Position* pos = | 2567 QuickCheckDetails::Position* pos = |
| 2570 details->positions(characters_filled_in); | 2568 details->positions(characters_filled_in); |
| 2571 uc16 c = quarks[i]; | 2569 uc16 c = quarks[i]; |
| 2572 if (c > char_mask) { | 2570 if (c > char_mask) { |
| 2573 // If we expect a non-ASCII character from an ASCII string, | 2571 // If we expect a non-ASCII character from an ASCII string, |
| 2574 // there is no way we can match. Not even case independent | 2572 // there is no way we can match. Not even case independent |
| 2575 // matching can turn an ASCII character into non-ASCII or | 2573 // matching can turn an ASCII character into non-ASCII or |
| 2576 // vice versa. | 2574 // vice versa. |
| 2577 details->set_cannot_match(); | 2575 details->set_cannot_match(); |
| (...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2619 } | 2617 } |
| 2620 characters_filled_in++; | 2618 characters_filled_in++; |
| 2621 ASSERT(characters_filled_in <= details->characters()); | 2619 ASSERT(characters_filled_in <= details->characters()); |
| 2622 if (characters_filled_in == details->characters()) { | 2620 if (characters_filled_in == details->characters()) { |
| 2623 return; | 2621 return; |
| 2624 } | 2622 } |
| 2625 } | 2623 } |
| 2626 } else { | 2624 } else { |
| 2627 QuickCheckDetails::Position* pos = | 2625 QuickCheckDetails::Position* pos = |
| 2628 details->positions(characters_filled_in); | 2626 details->positions(characters_filled_in); |
| 2629 RegExpCharacterClass* tree = elm.data.u_char_class; | 2627 RegExpCharacterClass* tree = elm.char_class(); |
| 2630 ZoneList<CharacterRange>* ranges = tree->ranges(zone()); | 2628 ZoneList<CharacterRange>* ranges = tree->ranges(zone()); |
| 2631 if (tree->is_negated()) { | 2629 if (tree->is_negated()) { |
| 2632 // A quick check uses multi-character mask and compare. There is no | 2630 // A quick check uses multi-character mask and compare. There is no |
| 2633 // useful way to incorporate a negative char class into this scheme | 2631 // useful way to incorporate a negative char class into this scheme |
| 2634 // so we just conservatively create a mask and value that will always | 2632 // so we just conservatively create a mask and value that will always |
| 2635 // succeed. | 2633 // succeed. |
| 2636 pos->mask = 0; | 2634 pos->mask = 0; |
| 2637 pos->value = 0; | 2635 pos->value = 0; |
| 2638 } else { | 2636 } else { |
| 2639 int first_range = 0; | 2637 int first_range = 0; |
| (...skipping 169 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2809 | 2807 |
| 2810 | 2808 |
| 2811 RegExpNode* TextNode::FilterASCII(int depth, bool ignore_case) { | 2809 RegExpNode* TextNode::FilterASCII(int depth, bool ignore_case) { |
| 2812 if (info()->replacement_calculated) return replacement(); | 2810 if (info()->replacement_calculated) return replacement(); |
| 2813 if (depth < 0) return this; | 2811 if (depth < 0) return this; |
| 2814 ASSERT(!info()->visited); | 2812 ASSERT(!info()->visited); |
| 2815 VisitMarker marker(info()); | 2813 VisitMarker marker(info()); |
| 2816 int element_count = elms_->length(); | 2814 int element_count = elms_->length(); |
| 2817 for (int i = 0; i < element_count; i++) { | 2815 for (int i = 0; i < element_count; i++) { |
| 2818 TextElement elm = elms_->at(i); | 2816 TextElement elm = elms_->at(i); |
| 2819 if (elm.text_type == TextElement::ATOM) { | 2817 if (elm.text_type() == TextElement::ATOM) { |
| 2820 Vector<const uc16> quarks = elm.data.u_atom->data(); | 2818 Vector<const uc16> quarks = elm.atom()->data(); |
| 2821 for (int j = 0; j < quarks.length(); j++) { | 2819 for (int j = 0; j < quarks.length(); j++) { |
| 2822 uint16_t c = quarks[j]; | 2820 uint16_t c = quarks[j]; |
| 2823 if (c <= String::kMaxOneByteCharCode) continue; | 2821 if (c <= String::kMaxOneByteCharCode) continue; |
| 2824 if (!ignore_case) return set_replacement(NULL); | 2822 if (!ignore_case) return set_replacement(NULL); |
| 2825 // Here, we need to check for characters whose upper and lower cases | 2823 // Here, we need to check for characters whose upper and lower cases |
| 2826 // are outside the Latin-1 range. | 2824 // are outside the Latin-1 range. |
| 2827 uint16_t converted = unibrow::Latin1::ConvertNonLatin1ToLatin1(c); | 2825 uint16_t converted = unibrow::Latin1::ConvertNonLatin1ToLatin1(c); |
| 2828 // Character is outside Latin-1 completely | 2826 // Character is outside Latin-1 completely |
| 2829 if (converted == 0) return set_replacement(NULL); | 2827 if (converted == 0) return set_replacement(NULL); |
| 2830 // Convert quark to Latin-1 in place. | 2828 // Convert quark to Latin-1 in place. |
| 2831 uint16_t* copy = const_cast<uint16_t*>(quarks.start()); | 2829 uint16_t* copy = const_cast<uint16_t*>(quarks.start()); |
| 2832 copy[j] = converted; | 2830 copy[j] = converted; |
| 2833 } | 2831 } |
| 2834 } else { | 2832 } else { |
| 2835 ASSERT(elm.text_type == TextElement::CHAR_CLASS); | 2833 ASSERT(elm.text_type() == TextElement::CHAR_CLASS); |
| 2836 RegExpCharacterClass* cc = elm.data.u_char_class; | 2834 RegExpCharacterClass* cc = elm.char_class(); |
| 2837 ZoneList<CharacterRange>* ranges = cc->ranges(zone()); | 2835 ZoneList<CharacterRange>* ranges = cc->ranges(zone()); |
| 2838 if (!CharacterRange::IsCanonical(ranges)) { | 2836 if (!CharacterRange::IsCanonical(ranges)) { |
| 2839 CharacterRange::Canonicalize(ranges); | 2837 CharacterRange::Canonicalize(ranges); |
| 2840 } | 2838 } |
| 2841 // Now they are in order so we only need to look at the first. | 2839 // Now they are in order so we only need to look at the first. |
| 2842 int range_count = ranges->length(); | 2840 int range_count = ranges->length(); |
| 2843 if (cc->is_negated()) { | 2841 if (cc->is_negated()) { |
| 2844 if (range_count != 0 && | 2842 if (range_count != 0 && |
| 2845 ranges->at(0).from() == 0 && | 2843 ranges->at(0).from() == 0 && |
| 2846 ranges->at(0).to() >= String::kMaxOneByteCharCode) { | 2844 ranges->at(0).to() >= String::kMaxOneByteCharCode) { |
| (...skipping 404 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3251 bool first_element_checked, | 3249 bool first_element_checked, |
| 3252 int* checked_up_to) { | 3250 int* checked_up_to) { |
| 3253 Isolate* isolate = Isolate::Current(); | 3251 Isolate* isolate = Isolate::Current(); |
| 3254 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 3252 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
| 3255 bool ascii = compiler->ascii(); | 3253 bool ascii = compiler->ascii(); |
| 3256 Label* backtrack = trace->backtrack(); | 3254 Label* backtrack = trace->backtrack(); |
| 3257 QuickCheckDetails* quick_check = trace->quick_check_performed(); | 3255 QuickCheckDetails* quick_check = trace->quick_check_performed(); |
| 3258 int element_count = elms_->length(); | 3256 int element_count = elms_->length(); |
| 3259 for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) { | 3257 for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) { |
| 3260 TextElement elm = elms_->at(i); | 3258 TextElement elm = elms_->at(i); |
| 3261 int cp_offset = trace->cp_offset() + elm.cp_offset; | 3259 int cp_offset = trace->cp_offset() + elm.cp_offset(); |
| 3262 if (elm.text_type == TextElement::ATOM) { | 3260 if (elm.text_type() == TextElement::ATOM) { |
| 3263 Vector<const uc16> quarks = elm.data.u_atom->data(); | 3261 Vector<const uc16> quarks = elm.atom()->data(); |
| 3264 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) { | 3262 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) { |
| 3265 if (first_element_checked && i == 0 && j == 0) continue; | 3263 if (first_element_checked && i == 0 && j == 0) continue; |
| 3266 if (DeterminedAlready(quick_check, elm.cp_offset + j)) continue; | 3264 if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue; |
| 3267 EmitCharacterFunction* emit_function = NULL; | 3265 EmitCharacterFunction* emit_function = NULL; |
| 3268 switch (pass) { | 3266 switch (pass) { |
| 3269 case NON_ASCII_MATCH: | 3267 case NON_ASCII_MATCH: |
| 3270 ASSERT(ascii); | 3268 ASSERT(ascii); |
| 3271 if (quarks[j] > String::kMaxOneByteCharCode) { | 3269 if (quarks[j] > String::kMaxOneByteCharCode) { |
| 3272 assembler->GoTo(backtrack); | 3270 assembler->GoTo(backtrack); |
| 3273 return; | 3271 return; |
| 3274 } | 3272 } |
| 3275 break; | 3273 break; |
| 3276 case NON_LETTER_CHARACTER_MATCH: | 3274 case NON_LETTER_CHARACTER_MATCH: |
| (...skipping 13 matching lines...) Expand all Loading... |
| 3290 compiler, | 3288 compiler, |
| 3291 quarks[j], | 3289 quarks[j], |
| 3292 backtrack, | 3290 backtrack, |
| 3293 cp_offset + j, | 3291 cp_offset + j, |
| 3294 *checked_up_to < cp_offset + j, | 3292 *checked_up_to < cp_offset + j, |
| 3295 preloaded); | 3293 preloaded); |
| 3296 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to); | 3294 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to); |
| 3297 } | 3295 } |
| 3298 } | 3296 } |
| 3299 } else { | 3297 } else { |
| 3300 ASSERT_EQ(elm.text_type, TextElement::CHAR_CLASS); | 3298 ASSERT_EQ(TextElement::CHAR_CLASS, elm.text_type()); |
| 3301 if (pass == CHARACTER_CLASS_MATCH) { | 3299 if (pass == CHARACTER_CLASS_MATCH) { |
| 3302 if (first_element_checked && i == 0) continue; | 3300 if (first_element_checked && i == 0) continue; |
| 3303 if (DeterminedAlready(quick_check, elm.cp_offset)) continue; | 3301 if (DeterminedAlready(quick_check, elm.cp_offset())) continue; |
| 3304 RegExpCharacterClass* cc = elm.data.u_char_class; | 3302 RegExpCharacterClass* cc = elm.char_class(); |
| 3305 EmitCharClass(assembler, | 3303 EmitCharClass(assembler, |
| 3306 cc, | 3304 cc, |
| 3307 ascii, | 3305 ascii, |
| 3308 backtrack, | 3306 backtrack, |
| 3309 cp_offset, | 3307 cp_offset, |
| 3310 *checked_up_to < cp_offset, | 3308 *checked_up_to < cp_offset, |
| 3311 preloaded, | 3309 preloaded, |
| 3312 zone()); | 3310 zone()); |
| 3313 UpdateBoundsCheck(cp_offset, checked_up_to); | 3311 UpdateBoundsCheck(cp_offset, checked_up_to); |
| 3314 } | 3312 } |
| 3315 } | 3313 } |
| 3316 } | 3314 } |
| 3317 } | 3315 } |
| 3318 | 3316 |
| 3319 | 3317 |
| 3320 int TextNode::Length() { | 3318 int TextNode::Length() { |
| 3321 TextElement elm = elms_->last(); | 3319 TextElement elm = elms_->last(); |
| 3322 ASSERT(elm.cp_offset >= 0); | 3320 ASSERT(elm.cp_offset() >= 0); |
| 3323 if (elm.text_type == TextElement::ATOM) { | 3321 return elm.cp_offset() + elm.length(); |
| 3324 return elm.cp_offset + elm.data.u_atom->data().length(); | |
| 3325 } else { | |
| 3326 return elm.cp_offset + 1; | |
| 3327 } | |
| 3328 } | 3322 } |
| 3329 | 3323 |
| 3330 | 3324 |
| 3331 bool TextNode::SkipPass(int int_pass, bool ignore_case) { | 3325 bool TextNode::SkipPass(int int_pass, bool ignore_case) { |
| 3332 TextEmitPassType pass = static_cast<TextEmitPassType>(int_pass); | 3326 TextEmitPassType pass = static_cast<TextEmitPassType>(int_pass); |
| 3333 if (ignore_case) { | 3327 if (ignore_case) { |
| 3334 return pass == SIMPLE_CHARACTER_MATCH; | 3328 return pass == SIMPLE_CHARACTER_MATCH; |
| 3335 } else { | 3329 } else { |
| 3336 return pass == NON_LETTER_CHARACTER_MATCH || pass == CASE_CHARACTER_MATCH; | 3330 return pass == NON_LETTER_CHARACTER_MATCH || pass == CASE_CHARACTER_MATCH; |
| 3337 } | 3331 } |
| (...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3419 cp_offset_ = 0; | 3413 cp_offset_ = 0; |
| 3420 } | 3414 } |
| 3421 bound_checked_up_to_ = Max(0, bound_checked_up_to_ - by); | 3415 bound_checked_up_to_ = Max(0, bound_checked_up_to_ - by); |
| 3422 } | 3416 } |
| 3423 | 3417 |
| 3424 | 3418 |
| 3425 void TextNode::MakeCaseIndependent(bool is_ascii) { | 3419 void TextNode::MakeCaseIndependent(bool is_ascii) { |
| 3426 int element_count = elms_->length(); | 3420 int element_count = elms_->length(); |
| 3427 for (int i = 0; i < element_count; i++) { | 3421 for (int i = 0; i < element_count; i++) { |
| 3428 TextElement elm = elms_->at(i); | 3422 TextElement elm = elms_->at(i); |
| 3429 if (elm.text_type == TextElement::CHAR_CLASS) { | 3423 if (elm.text_type() == TextElement::CHAR_CLASS) { |
| 3430 RegExpCharacterClass* cc = elm.data.u_char_class; | 3424 RegExpCharacterClass* cc = elm.char_class(); |
| 3431 // None of the standard character classes is different in the case | 3425 // None of the standard character classes is different in the case |
| 3432 // independent case and it slows us down if we don't know that. | 3426 // independent case and it slows us down if we don't know that. |
| 3433 if (cc->is_standard(zone())) continue; | 3427 if (cc->is_standard(zone())) continue; |
| 3434 ZoneList<CharacterRange>* ranges = cc->ranges(zone()); | 3428 ZoneList<CharacterRange>* ranges = cc->ranges(zone()); |
| 3435 int range_count = ranges->length(); | 3429 int range_count = ranges->length(); |
| 3436 for (int j = 0; j < range_count; j++) { | 3430 for (int j = 0; j < range_count; j++) { |
| 3437 ranges->at(j).AddCaseEquivalents(ranges, is_ascii, zone()); | 3431 ranges->at(j).AddCaseEquivalents(ranges, is_ascii, zone()); |
| 3438 } | 3432 } |
| 3439 } | 3433 } |
| 3440 } | 3434 } |
| 3441 } | 3435 } |
| 3442 | 3436 |
| 3443 | 3437 |
| 3444 int TextNode::GreedyLoopTextLength() { | 3438 int TextNode::GreedyLoopTextLength() { |
| 3445 TextElement elm = elms_->at(elms_->length() - 1); | 3439 TextElement elm = elms_->at(elms_->length() - 1); |
| 3446 if (elm.text_type == TextElement::CHAR_CLASS) { | 3440 return elm.cp_offset() + elm.length(); |
| 3447 return elm.cp_offset + 1; | |
| 3448 } else { | |
| 3449 return elm.cp_offset + elm.data.u_atom->data().length(); | |
| 3450 } | |
| 3451 } | 3441 } |
| 3452 | 3442 |
| 3453 | 3443 |
| 3454 RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode( | 3444 RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode( |
| 3455 RegExpCompiler* compiler) { | 3445 RegExpCompiler* compiler) { |
| 3456 if (elms_->length() != 1) return NULL; | 3446 if (elms_->length() != 1) return NULL; |
| 3457 TextElement elm = elms_->at(0); | 3447 TextElement elm = elms_->at(0); |
| 3458 if (elm.text_type != TextElement::CHAR_CLASS) return NULL; | 3448 if (elm.text_type() != TextElement::CHAR_CLASS) return NULL; |
| 3459 RegExpCharacterClass* node = elm.data.u_char_class; | 3449 RegExpCharacterClass* node = elm.char_class(); |
| 3460 ZoneList<CharacterRange>* ranges = node->ranges(zone()); | 3450 ZoneList<CharacterRange>* ranges = node->ranges(zone()); |
| 3461 if (!CharacterRange::IsCanonical(ranges)) { | 3451 if (!CharacterRange::IsCanonical(ranges)) { |
| 3462 CharacterRange::Canonicalize(ranges); | 3452 CharacterRange::Canonicalize(ranges); |
| 3463 } | 3453 } |
| 3464 if (node->is_negated()) { | 3454 if (node->is_negated()) { |
| 3465 return ranges->length() == 0 ? on_success() : NULL; | 3455 return ranges->length() == 0 ? on_success() : NULL; |
| 3466 } | 3456 } |
| 3467 if (ranges->length() != 1) return NULL; | 3457 if (ranges->length() != 1) return NULL; |
| 3468 uint32_t max_char; | 3458 uint32_t max_char; |
| 3469 if (compiler->ascii()) { | 3459 if (compiler->ascii()) { |
| (...skipping 1053 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4523 } | 4513 } |
| 4524 } | 4514 } |
| 4525 | 4515 |
| 4526 | 4516 |
| 4527 void DotPrinter::VisitText(TextNode* that) { | 4517 void DotPrinter::VisitText(TextNode* that) { |
| 4528 Zone* zone = that->zone(); | 4518 Zone* zone = that->zone(); |
| 4529 stream()->Add(" n%p [label=\"", that); | 4519 stream()->Add(" n%p [label=\"", that); |
| 4530 for (int i = 0; i < that->elements()->length(); i++) { | 4520 for (int i = 0; i < that->elements()->length(); i++) { |
| 4531 if (i > 0) stream()->Add(" "); | 4521 if (i > 0) stream()->Add(" "); |
| 4532 TextElement elm = that->elements()->at(i); | 4522 TextElement elm = that->elements()->at(i); |
| 4533 switch (elm.text_type) { | 4523 switch (elm.text_type()) { |
| 4534 case TextElement::ATOM: { | 4524 case TextElement::ATOM: { |
| 4535 stream()->Add("'%w'", elm.data.u_atom->data()); | 4525 stream()->Add("'%w'", elm.atom()->data()); |
| 4536 break; | 4526 break; |
| 4537 } | 4527 } |
| 4538 case TextElement::CHAR_CLASS: { | 4528 case TextElement::CHAR_CLASS: { |
| 4539 RegExpCharacterClass* node = elm.data.u_char_class; | 4529 RegExpCharacterClass* node = elm.char_class(); |
| 4540 stream()->Add("["); | 4530 stream()->Add("["); |
| 4541 if (node->is_negated()) | 4531 if (node->is_negated()) |
| 4542 stream()->Add("^"); | 4532 stream()->Add("^"); |
| 4543 for (int j = 0; j < node->ranges(zone)->length(); j++) { | 4533 for (int j = 0; j < node->ranges(zone)->length(); j++) { |
| 4544 CharacterRange range = node->ranges(zone)->at(j); | 4534 CharacterRange range = node->ranges(zone)->at(j); |
| 4545 stream()->Add("%k-%k", range.from(), range.to()); | 4535 stream()->Add("%k-%k", range.from(), range.to()); |
| 4546 } | 4536 } |
| 4547 stream()->Add("]"); | 4537 stream()->Add("]"); |
| 4548 break; | 4538 break; |
| 4549 } | 4539 } |
| (...skipping 1161 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5711 } | 5701 } |
| 5712 | 5702 |
| 5713 | 5703 |
| 5714 void TextNode::CalculateOffsets() { | 5704 void TextNode::CalculateOffsets() { |
| 5715 int element_count = elements()->length(); | 5705 int element_count = elements()->length(); |
| 5716 // Set up the offsets of the elements relative to the start. This is a fixed | 5706 // Set up the offsets of the elements relative to the start. This is a fixed |
| 5717 // quantity since a TextNode can only contain fixed-width things. | 5707 // quantity since a TextNode can only contain fixed-width things. |
| 5718 int cp_offset = 0; | 5708 int cp_offset = 0; |
| 5719 for (int i = 0; i < element_count; i++) { | 5709 for (int i = 0; i < element_count; i++) { |
| 5720 TextElement& elm = elements()->at(i); | 5710 TextElement& elm = elements()->at(i); |
| 5721 elm.cp_offset = cp_offset; | 5711 elm.set_cp_offset(cp_offset); |
| 5722 if (elm.text_type == TextElement::ATOM) { | 5712 cp_offset += elm.length(); |
| 5723 cp_offset += elm.data.u_atom->data().length(); | |
| 5724 } else { | |
| 5725 cp_offset++; | |
| 5726 } | |
| 5727 } | 5713 } |
| 5728 } | 5714 } |
| 5729 | 5715 |
| 5730 | 5716 |
| 5731 void Analysis::VisitText(TextNode* that) { | 5717 void Analysis::VisitText(TextNode* that) { |
| 5732 if (ignore_case_) { | 5718 if (ignore_case_) { |
| 5733 that->MakeCaseIndependent(is_ascii_); | 5719 that->MakeCaseIndependent(is_ascii_); |
| 5734 } | 5720 } |
| 5735 EnsureAnalyzed(that->on_success()); | 5721 EnsureAnalyzed(that->on_success()); |
| 5736 if (!has_failed()) { | 5722 if (!has_failed()) { |
| (...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5832 bool not_at_start) { | 5818 bool not_at_start) { |
| 5833 if (initial_offset >= bm->length()) return; | 5819 if (initial_offset >= bm->length()) return; |
| 5834 int offset = initial_offset; | 5820 int offset = initial_offset; |
| 5835 int max_char = bm->max_char(); | 5821 int max_char = bm->max_char(); |
| 5836 for (int i = 0; i < elements()->length(); i++) { | 5822 for (int i = 0; i < elements()->length(); i++) { |
| 5837 if (offset >= bm->length()) { | 5823 if (offset >= bm->length()) { |
| 5838 if (initial_offset == 0) set_bm_info(not_at_start, bm); | 5824 if (initial_offset == 0) set_bm_info(not_at_start, bm); |
| 5839 return; | 5825 return; |
| 5840 } | 5826 } |
| 5841 TextElement text = elements()->at(i); | 5827 TextElement text = elements()->at(i); |
| 5842 if (text.text_type == TextElement::ATOM) { | 5828 if (text.text_type() == TextElement::ATOM) { |
| 5843 RegExpAtom* atom = text.data.u_atom; | 5829 RegExpAtom* atom = text.atom(); |
| 5844 for (int j = 0; j < atom->length(); j++, offset++) { | 5830 for (int j = 0; j < atom->length(); j++, offset++) { |
| 5845 if (offset >= bm->length()) { | 5831 if (offset >= bm->length()) { |
| 5846 if (initial_offset == 0) set_bm_info(not_at_start, bm); | 5832 if (initial_offset == 0) set_bm_info(not_at_start, bm); |
| 5847 return; | 5833 return; |
| 5848 } | 5834 } |
| 5849 uc16 character = atom->data()[j]; | 5835 uc16 character = atom->data()[j]; |
| 5850 if (bm->compiler()->ignore_case()) { | 5836 if (bm->compiler()->ignore_case()) { |
| 5851 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 5837 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
| 5852 int length = GetCaseIndependentLetters( | 5838 int length = GetCaseIndependentLetters( |
| 5853 ISOLATE, | 5839 ISOLATE, |
| 5854 character, | 5840 character, |
| 5855 bm->max_char() == String::kMaxOneByteCharCode, | 5841 bm->max_char() == String::kMaxOneByteCharCode, |
| 5856 chars); | 5842 chars); |
| 5857 for (int j = 0; j < length; j++) { | 5843 for (int j = 0; j < length; j++) { |
| 5858 bm->Set(offset, chars[j]); | 5844 bm->Set(offset, chars[j]); |
| 5859 } | 5845 } |
| 5860 } else { | 5846 } else { |
| 5861 if (character <= max_char) bm->Set(offset, character); | 5847 if (character <= max_char) bm->Set(offset, character); |
| 5862 } | 5848 } |
| 5863 } | 5849 } |
| 5864 } else { | 5850 } else { |
| 5865 ASSERT(text.text_type == TextElement::CHAR_CLASS); | 5851 ASSERT_EQ(TextElement::CHAR_CLASS, text.text_type()); |
| 5866 RegExpCharacterClass* char_class = text.data.u_char_class; | 5852 RegExpCharacterClass* char_class = text.char_class(); |
| 5867 ZoneList<CharacterRange>* ranges = char_class->ranges(zone()); | 5853 ZoneList<CharacterRange>* ranges = char_class->ranges(zone()); |
| 5868 if (char_class->is_negated()) { | 5854 if (char_class->is_negated()) { |
| 5869 bm->SetAll(offset); | 5855 bm->SetAll(offset); |
| 5870 } else { | 5856 } else { |
| 5871 for (int k = 0; k < ranges->length(); k++) { | 5857 for (int k = 0; k < ranges->length(); k++) { |
| 5872 CharacterRange& range = ranges->at(k); | 5858 CharacterRange& range = ranges->at(k); |
| 5873 if (range.from() > max_char) continue; | 5859 if (range.from() > max_char) continue; |
| 5874 int to = Min(max_char, static_cast<int>(range.to())); | 5860 int to = Min(max_char, static_cast<int>(range.to())); |
| 5875 bm->SetInterval(offset, Interval(range.from(), to)); | 5861 bm->SetInterval(offset, Interval(range.from(), to)); |
| 5876 } | 5862 } |
| (...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5968 last = range.to() + 1; | 5954 last = range.to() + 1; |
| 5969 } | 5955 } |
| 5970 } | 5956 } |
| 5971 } | 5957 } |
| 5972 AddRange(CharacterRange(last, String::kMaxUtf16CodeUnit)); | 5958 AddRange(CharacterRange(last, String::kMaxUtf16CodeUnit)); |
| 5973 } | 5959 } |
| 5974 | 5960 |
| 5975 | 5961 |
| 5976 void DispatchTableConstructor::VisitText(TextNode* that) { | 5962 void DispatchTableConstructor::VisitText(TextNode* that) { |
| 5977 TextElement elm = that->elements()->at(0); | 5963 TextElement elm = that->elements()->at(0); |
| 5978 switch (elm.text_type) { | 5964 switch (elm.text_type()) { |
| 5979 case TextElement::ATOM: { | 5965 case TextElement::ATOM: { |
| 5980 uc16 c = elm.data.u_atom->data()[0]; | 5966 uc16 c = elm.atom()->data()[0]; |
| 5981 AddRange(CharacterRange(c, c)); | 5967 AddRange(CharacterRange(c, c)); |
| 5982 break; | 5968 break; |
| 5983 } | 5969 } |
| 5984 case TextElement::CHAR_CLASS: { | 5970 case TextElement::CHAR_CLASS: { |
| 5985 RegExpCharacterClass* tree = elm.data.u_char_class; | 5971 RegExpCharacterClass* tree = elm.char_class(); |
| 5986 ZoneList<CharacterRange>* ranges = tree->ranges(that->zone()); | 5972 ZoneList<CharacterRange>* ranges = tree->ranges(that->zone()); |
| 5987 if (tree->is_negated()) { | 5973 if (tree->is_negated()) { |
| 5988 AddInverse(ranges); | 5974 AddInverse(ranges); |
| 5989 } else { | 5975 } else { |
| 5990 for (int i = 0; i < ranges->length(); i++) | 5976 for (int i = 0; i < ranges->length(); i++) |
| 5991 AddRange(ranges->at(i)); | 5977 AddRange(ranges->at(i)); |
| 5992 } | 5978 } |
| 5993 break; | 5979 break; |
| 5994 } | 5980 } |
| 5995 default: { | 5981 default: { |
| (...skipping 136 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6132 } | 6118 } |
| 6133 | 6119 |
| 6134 return compiler.Assemble(¯o_assembler, | 6120 return compiler.Assemble(¯o_assembler, |
| 6135 node, | 6121 node, |
| 6136 data->capture_count, | 6122 data->capture_count, |
| 6137 pattern); | 6123 pattern); |
| 6138 } | 6124 } |
| 6139 | 6125 |
| 6140 | 6126 |
| 6141 }} // namespace v8::internal | 6127 }} // namespace v8::internal |
| OLD | NEW |