OLD | NEW |
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 915 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
926 } | 926 } |
927 | 927 |
928 | 928 |
929 void RegExpText::AppendToText(RegExpText* text, Zone* zone) { | 929 void RegExpText::AppendToText(RegExpText* text, Zone* zone) { |
930 for (int i = 0; i < elements()->length(); i++) | 930 for (int i = 0; i < elements()->length(); i++) |
931 text->AddElement(elements()->at(i), zone); | 931 text->AddElement(elements()->at(i), zone); |
932 } | 932 } |
933 | 933 |
934 | 934 |
935 TextElement TextElement::Atom(RegExpAtom* atom) { | 935 TextElement TextElement::Atom(RegExpAtom* atom) { |
936 TextElement result = TextElement(ATOM); | 936 return TextElement(ATOM, atom); |
937 result.data.u_atom = atom; | |
938 return result; | |
939 } | 937 } |
940 | 938 |
941 | 939 |
942 TextElement TextElement::CharClass( | 940 TextElement TextElement::CharClass(RegExpCharacterClass* char_class) { |
943 RegExpCharacterClass* char_class) { | 941 return TextElement(CHAR_CLASS, char_class); |
944 TextElement result = TextElement(CHAR_CLASS); | |
945 result.data.u_char_class = char_class; | |
946 return result; | |
947 } | 942 } |
948 | 943 |
949 | 944 |
950 int TextElement::length() { | 945 int TextElement::length() const { |
951 if (text_type == ATOM) { | 946 switch (text_type()) { |
952 return data.u_atom->length(); | 947 case ATOM: |
953 } else { | 948 return atom()->length(); |
954 ASSERT(text_type == CHAR_CLASS); | 949 |
955 return 1; | 950 case CHAR_CLASS: |
| 951 return 1; |
956 } | 952 } |
| 953 UNREACHABLE(); |
| 954 return 0; |
957 } | 955 } |
958 | 956 |
959 | 957 |
960 DispatchTable* ChoiceNode::GetTable(bool ignore_case) { | 958 DispatchTable* ChoiceNode::GetTable(bool ignore_case) { |
961 if (table_ == NULL) { | 959 if (table_ == NULL) { |
962 table_ = new(zone()) DispatchTable(zone()); | 960 table_ = new(zone()) DispatchTable(zone()); |
963 DispatchTableConstructor cons(table_, ignore_case, zone()); | 961 DispatchTableConstructor cons(table_, ignore_case, zone()); |
964 cons.BuildTable(this); | 962 cons.BuildTable(this); |
965 } | 963 } |
966 return table_; | 964 return table_; |
(...skipping 1587 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2554 ASSERT(characters_filled_in < details->characters()); | 2552 ASSERT(characters_filled_in < details->characters()); |
2555 int characters = details->characters(); | 2553 int characters = details->characters(); |
2556 int char_mask; | 2554 int char_mask; |
2557 if (compiler->ascii()) { | 2555 if (compiler->ascii()) { |
2558 char_mask = String::kMaxOneByteCharCode; | 2556 char_mask = String::kMaxOneByteCharCode; |
2559 } else { | 2557 } else { |
2560 char_mask = String::kMaxUtf16CodeUnit; | 2558 char_mask = String::kMaxUtf16CodeUnit; |
2561 } | 2559 } |
2562 for (int k = 0; k < elms_->length(); k++) { | 2560 for (int k = 0; k < elms_->length(); k++) { |
2563 TextElement elm = elms_->at(k); | 2561 TextElement elm = elms_->at(k); |
2564 if (elm.text_type == TextElement::ATOM) { | 2562 if (elm.text_type() == TextElement::ATOM) { |
2565 Vector<const uc16> quarks = elm.data.u_atom->data(); | 2563 Vector<const uc16> quarks = elm.atom()->data(); |
2566 for (int i = 0; i < characters && i < quarks.length(); i++) { | 2564 for (int i = 0; i < characters && i < quarks.length(); i++) { |
2567 QuickCheckDetails::Position* pos = | 2565 QuickCheckDetails::Position* pos = |
2568 details->positions(characters_filled_in); | 2566 details->positions(characters_filled_in); |
2569 uc16 c = quarks[i]; | 2567 uc16 c = quarks[i]; |
2570 if (c > char_mask) { | 2568 if (c > char_mask) { |
2571 // If we expect a non-ASCII character from an ASCII string, | 2569 // If we expect a non-ASCII character from an ASCII string, |
2572 // there is no way we can match. Not even case independent | 2570 // there is no way we can match. Not even case independent |
2573 // matching can turn an ASCII character into non-ASCII or | 2571 // matching can turn an ASCII character into non-ASCII or |
2574 // vice versa. | 2572 // vice versa. |
2575 details->set_cannot_match(); | 2573 details->set_cannot_match(); |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2617 } | 2615 } |
2618 characters_filled_in++; | 2616 characters_filled_in++; |
2619 ASSERT(characters_filled_in <= details->characters()); | 2617 ASSERT(characters_filled_in <= details->characters()); |
2620 if (characters_filled_in == details->characters()) { | 2618 if (characters_filled_in == details->characters()) { |
2621 return; | 2619 return; |
2622 } | 2620 } |
2623 } | 2621 } |
2624 } else { | 2622 } else { |
2625 QuickCheckDetails::Position* pos = | 2623 QuickCheckDetails::Position* pos = |
2626 details->positions(characters_filled_in); | 2624 details->positions(characters_filled_in); |
2627 RegExpCharacterClass* tree = elm.data.u_char_class; | 2625 RegExpCharacterClass* tree = elm.char_class(); |
2628 ZoneList<CharacterRange>* ranges = tree->ranges(zone()); | 2626 ZoneList<CharacterRange>* ranges = tree->ranges(zone()); |
2629 if (tree->is_negated()) { | 2627 if (tree->is_negated()) { |
2630 // A quick check uses multi-character mask and compare. There is no | 2628 // A quick check uses multi-character mask and compare. There is no |
2631 // useful way to incorporate a negative char class into this scheme | 2629 // useful way to incorporate a negative char class into this scheme |
2632 // so we just conservatively create a mask and value that will always | 2630 // so we just conservatively create a mask and value that will always |
2633 // succeed. | 2631 // succeed. |
2634 pos->mask = 0; | 2632 pos->mask = 0; |
2635 pos->value = 0; | 2633 pos->value = 0; |
2636 } else { | 2634 } else { |
2637 int first_range = 0; | 2635 int first_range = 0; |
(...skipping 169 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2807 | 2805 |
2808 | 2806 |
2809 RegExpNode* TextNode::FilterASCII(int depth, bool ignore_case) { | 2807 RegExpNode* TextNode::FilterASCII(int depth, bool ignore_case) { |
2810 if (info()->replacement_calculated) return replacement(); | 2808 if (info()->replacement_calculated) return replacement(); |
2811 if (depth < 0) return this; | 2809 if (depth < 0) return this; |
2812 ASSERT(!info()->visited); | 2810 ASSERT(!info()->visited); |
2813 VisitMarker marker(info()); | 2811 VisitMarker marker(info()); |
2814 int element_count = elms_->length(); | 2812 int element_count = elms_->length(); |
2815 for (int i = 0; i < element_count; i++) { | 2813 for (int i = 0; i < element_count; i++) { |
2816 TextElement elm = elms_->at(i); | 2814 TextElement elm = elms_->at(i); |
2817 if (elm.text_type == TextElement::ATOM) { | 2815 if (elm.text_type() == TextElement::ATOM) { |
2818 Vector<const uc16> quarks = elm.data.u_atom->data(); | 2816 Vector<const uc16> quarks = elm.atom()->data(); |
2819 for (int j = 0; j < quarks.length(); j++) { | 2817 for (int j = 0; j < quarks.length(); j++) { |
2820 uint16_t c = quarks[j]; | 2818 uint16_t c = quarks[j]; |
2821 if (c <= String::kMaxOneByteCharCode) continue; | 2819 if (c <= String::kMaxOneByteCharCode) continue; |
2822 if (!ignore_case) return set_replacement(NULL); | 2820 if (!ignore_case) return set_replacement(NULL); |
2823 // Here, we need to check for characters whose upper and lower cases | 2821 // Here, we need to check for characters whose upper and lower cases |
2824 // are outside the Latin-1 range. | 2822 // are outside the Latin-1 range. |
2825 uint16_t converted = unibrow::Latin1::ConvertNonLatin1ToLatin1(c); | 2823 uint16_t converted = unibrow::Latin1::ConvertNonLatin1ToLatin1(c); |
2826 // Character is outside Latin-1 completely | 2824 // Character is outside Latin-1 completely |
2827 if (converted == 0) return set_replacement(NULL); | 2825 if (converted == 0) return set_replacement(NULL); |
2828 // Convert quark to Latin-1 in place. | 2826 // Convert quark to Latin-1 in place. |
2829 uint16_t* copy = const_cast<uint16_t*>(quarks.start()); | 2827 uint16_t* copy = const_cast<uint16_t*>(quarks.start()); |
2830 copy[j] = converted; | 2828 copy[j] = converted; |
2831 } | 2829 } |
2832 } else { | 2830 } else { |
2833 ASSERT(elm.text_type == TextElement::CHAR_CLASS); | 2831 ASSERT(elm.text_type() == TextElement::CHAR_CLASS); |
2834 RegExpCharacterClass* cc = elm.data.u_char_class; | 2832 RegExpCharacterClass* cc = elm.char_class(); |
2835 ZoneList<CharacterRange>* ranges = cc->ranges(zone()); | 2833 ZoneList<CharacterRange>* ranges = cc->ranges(zone()); |
2836 if (!CharacterRange::IsCanonical(ranges)) { | 2834 if (!CharacterRange::IsCanonical(ranges)) { |
2837 CharacterRange::Canonicalize(ranges); | 2835 CharacterRange::Canonicalize(ranges); |
2838 } | 2836 } |
2839 // Now they are in order so we only need to look at the first. | 2837 // Now they are in order so we only need to look at the first. |
2840 int range_count = ranges->length(); | 2838 int range_count = ranges->length(); |
2841 if (cc->is_negated()) { | 2839 if (cc->is_negated()) { |
2842 if (range_count != 0 && | 2840 if (range_count != 0 && |
2843 ranges->at(0).from() == 0 && | 2841 ranges->at(0).from() == 0 && |
2844 ranges->at(0).to() >= String::kMaxOneByteCharCode) { | 2842 ranges->at(0).to() >= String::kMaxOneByteCharCode) { |
(...skipping 404 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3249 bool first_element_checked, | 3247 bool first_element_checked, |
3250 int* checked_up_to) { | 3248 int* checked_up_to) { |
3251 Isolate* isolate = Isolate::Current(); | 3249 Isolate* isolate = Isolate::Current(); |
3252 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 3250 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
3253 bool ascii = compiler->ascii(); | 3251 bool ascii = compiler->ascii(); |
3254 Label* backtrack = trace->backtrack(); | 3252 Label* backtrack = trace->backtrack(); |
3255 QuickCheckDetails* quick_check = trace->quick_check_performed(); | 3253 QuickCheckDetails* quick_check = trace->quick_check_performed(); |
3256 int element_count = elms_->length(); | 3254 int element_count = elms_->length(); |
3257 for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) { | 3255 for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) { |
3258 TextElement elm = elms_->at(i); | 3256 TextElement elm = elms_->at(i); |
3259 int cp_offset = trace->cp_offset() + elm.cp_offset; | 3257 int cp_offset = trace->cp_offset() + elm.cp_offset(); |
3260 if (elm.text_type == TextElement::ATOM) { | 3258 if (elm.text_type() == TextElement::ATOM) { |
3261 Vector<const uc16> quarks = elm.data.u_atom->data(); | 3259 Vector<const uc16> quarks = elm.atom()->data(); |
3262 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) { | 3260 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) { |
3263 if (first_element_checked && i == 0 && j == 0) continue; | 3261 if (first_element_checked && i == 0 && j == 0) continue; |
3264 if (DeterminedAlready(quick_check, elm.cp_offset + j)) continue; | 3262 if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue; |
3265 EmitCharacterFunction* emit_function = NULL; | 3263 EmitCharacterFunction* emit_function = NULL; |
3266 switch (pass) { | 3264 switch (pass) { |
3267 case NON_ASCII_MATCH: | 3265 case NON_ASCII_MATCH: |
3268 ASSERT(ascii); | 3266 ASSERT(ascii); |
3269 if (quarks[j] > String::kMaxOneByteCharCode) { | 3267 if (quarks[j] > String::kMaxOneByteCharCode) { |
3270 assembler->GoTo(backtrack); | 3268 assembler->GoTo(backtrack); |
3271 return; | 3269 return; |
3272 } | 3270 } |
3273 break; | 3271 break; |
3274 case NON_LETTER_CHARACTER_MATCH: | 3272 case NON_LETTER_CHARACTER_MATCH: |
(...skipping 13 matching lines...) Expand all Loading... |
3288 compiler, | 3286 compiler, |
3289 quarks[j], | 3287 quarks[j], |
3290 backtrack, | 3288 backtrack, |
3291 cp_offset + j, | 3289 cp_offset + j, |
3292 *checked_up_to < cp_offset + j, | 3290 *checked_up_to < cp_offset + j, |
3293 preloaded); | 3291 preloaded); |
3294 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to); | 3292 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to); |
3295 } | 3293 } |
3296 } | 3294 } |
3297 } else { | 3295 } else { |
3298 ASSERT_EQ(elm.text_type, TextElement::CHAR_CLASS); | 3296 ASSERT_EQ(TextElement::CHAR_CLASS, elm.text_type()); |
3299 if (pass == CHARACTER_CLASS_MATCH) { | 3297 if (pass == CHARACTER_CLASS_MATCH) { |
3300 if (first_element_checked && i == 0) continue; | 3298 if (first_element_checked && i == 0) continue; |
3301 if (DeterminedAlready(quick_check, elm.cp_offset)) continue; | 3299 if (DeterminedAlready(quick_check, elm.cp_offset())) continue; |
3302 RegExpCharacterClass* cc = elm.data.u_char_class; | 3300 RegExpCharacterClass* cc = elm.char_class(); |
3303 EmitCharClass(assembler, | 3301 EmitCharClass(assembler, |
3304 cc, | 3302 cc, |
3305 ascii, | 3303 ascii, |
3306 backtrack, | 3304 backtrack, |
3307 cp_offset, | 3305 cp_offset, |
3308 *checked_up_to < cp_offset, | 3306 *checked_up_to < cp_offset, |
3309 preloaded, | 3307 preloaded, |
3310 zone()); | 3308 zone()); |
3311 UpdateBoundsCheck(cp_offset, checked_up_to); | 3309 UpdateBoundsCheck(cp_offset, checked_up_to); |
3312 } | 3310 } |
3313 } | 3311 } |
3314 } | 3312 } |
3315 } | 3313 } |
3316 | 3314 |
3317 | 3315 |
3318 int TextNode::Length() { | 3316 int TextNode::Length() { |
3319 TextElement elm = elms_->last(); | 3317 TextElement elm = elms_->last(); |
3320 ASSERT(elm.cp_offset >= 0); | 3318 ASSERT(elm.cp_offset() >= 0); |
3321 if (elm.text_type == TextElement::ATOM) { | 3319 return elm.cp_offset() + elm.length(); |
3322 return elm.cp_offset + elm.data.u_atom->data().length(); | |
3323 } else { | |
3324 return elm.cp_offset + 1; | |
3325 } | |
3326 } | 3320 } |
3327 | 3321 |
3328 | 3322 |
3329 bool TextNode::SkipPass(int int_pass, bool ignore_case) { | 3323 bool TextNode::SkipPass(int int_pass, bool ignore_case) { |
3330 TextEmitPassType pass = static_cast<TextEmitPassType>(int_pass); | 3324 TextEmitPassType pass = static_cast<TextEmitPassType>(int_pass); |
3331 if (ignore_case) { | 3325 if (ignore_case) { |
3332 return pass == SIMPLE_CHARACTER_MATCH; | 3326 return pass == SIMPLE_CHARACTER_MATCH; |
3333 } else { | 3327 } else { |
3334 return pass == NON_LETTER_CHARACTER_MATCH || pass == CASE_CHARACTER_MATCH; | 3328 return pass == NON_LETTER_CHARACTER_MATCH || pass == CASE_CHARACTER_MATCH; |
3335 } | 3329 } |
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3417 cp_offset_ = 0; | 3411 cp_offset_ = 0; |
3418 } | 3412 } |
3419 bound_checked_up_to_ = Max(0, bound_checked_up_to_ - by); | 3413 bound_checked_up_to_ = Max(0, bound_checked_up_to_ - by); |
3420 } | 3414 } |
3421 | 3415 |
3422 | 3416 |
3423 void TextNode::MakeCaseIndependent(bool is_ascii) { | 3417 void TextNode::MakeCaseIndependent(bool is_ascii) { |
3424 int element_count = elms_->length(); | 3418 int element_count = elms_->length(); |
3425 for (int i = 0; i < element_count; i++) { | 3419 for (int i = 0; i < element_count; i++) { |
3426 TextElement elm = elms_->at(i); | 3420 TextElement elm = elms_->at(i); |
3427 if (elm.text_type == TextElement::CHAR_CLASS) { | 3421 if (elm.text_type() == TextElement::CHAR_CLASS) { |
3428 RegExpCharacterClass* cc = elm.data.u_char_class; | 3422 RegExpCharacterClass* cc = elm.char_class(); |
3429 // None of the standard character classes is different in the case | 3423 // None of the standard character classes is different in the case |
3430 // independent case and it slows us down if we don't know that. | 3424 // independent case and it slows us down if we don't know that. |
3431 if (cc->is_standard(zone())) continue; | 3425 if (cc->is_standard(zone())) continue; |
3432 ZoneList<CharacterRange>* ranges = cc->ranges(zone()); | 3426 ZoneList<CharacterRange>* ranges = cc->ranges(zone()); |
3433 int range_count = ranges->length(); | 3427 int range_count = ranges->length(); |
3434 for (int j = 0; j < range_count; j++) { | 3428 for (int j = 0; j < range_count; j++) { |
3435 ranges->at(j).AddCaseEquivalents(ranges, is_ascii, zone()); | 3429 ranges->at(j).AddCaseEquivalents(ranges, is_ascii, zone()); |
3436 } | 3430 } |
3437 } | 3431 } |
3438 } | 3432 } |
3439 } | 3433 } |
3440 | 3434 |
3441 | 3435 |
3442 int TextNode::GreedyLoopTextLength() { | 3436 int TextNode::GreedyLoopTextLength() { |
3443 TextElement elm = elms_->at(elms_->length() - 1); | 3437 TextElement elm = elms_->at(elms_->length() - 1); |
3444 if (elm.text_type == TextElement::CHAR_CLASS) { | 3438 return elm.cp_offset() + elm.length(); |
3445 return elm.cp_offset + 1; | |
3446 } else { | |
3447 return elm.cp_offset + elm.data.u_atom->data().length(); | |
3448 } | |
3449 } | 3439 } |
3450 | 3440 |
3451 | 3441 |
3452 RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode( | 3442 RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode( |
3453 RegExpCompiler* compiler) { | 3443 RegExpCompiler* compiler) { |
3454 if (elms_->length() != 1) return NULL; | 3444 if (elms_->length() != 1) return NULL; |
3455 TextElement elm = elms_->at(0); | 3445 TextElement elm = elms_->at(0); |
3456 if (elm.text_type != TextElement::CHAR_CLASS) return NULL; | 3446 if (elm.text_type() != TextElement::CHAR_CLASS) return NULL; |
3457 RegExpCharacterClass* node = elm.data.u_char_class; | 3447 RegExpCharacterClass* node = elm.char_class(); |
3458 ZoneList<CharacterRange>* ranges = node->ranges(zone()); | 3448 ZoneList<CharacterRange>* ranges = node->ranges(zone()); |
3459 if (!CharacterRange::IsCanonical(ranges)) { | 3449 if (!CharacterRange::IsCanonical(ranges)) { |
3460 CharacterRange::Canonicalize(ranges); | 3450 CharacterRange::Canonicalize(ranges); |
3461 } | 3451 } |
3462 if (node->is_negated()) { | 3452 if (node->is_negated()) { |
3463 return ranges->length() == 0 ? on_success() : NULL; | 3453 return ranges->length() == 0 ? on_success() : NULL; |
3464 } | 3454 } |
3465 if (ranges->length() != 1) return NULL; | 3455 if (ranges->length() != 1) return NULL; |
3466 uint32_t max_char; | 3456 uint32_t max_char; |
3467 if (compiler->ascii()) { | 3457 if (compiler->ascii()) { |
(...skipping 1053 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4521 } | 4511 } |
4522 } | 4512 } |
4523 | 4513 |
4524 | 4514 |
4525 void DotPrinter::VisitText(TextNode* that) { | 4515 void DotPrinter::VisitText(TextNode* that) { |
4526 Zone* zone = that->zone(); | 4516 Zone* zone = that->zone(); |
4527 stream()->Add(" n%p [label=\"", that); | 4517 stream()->Add(" n%p [label=\"", that); |
4528 for (int i = 0; i < that->elements()->length(); i++) { | 4518 for (int i = 0; i < that->elements()->length(); i++) { |
4529 if (i > 0) stream()->Add(" "); | 4519 if (i > 0) stream()->Add(" "); |
4530 TextElement elm = that->elements()->at(i); | 4520 TextElement elm = that->elements()->at(i); |
4531 switch (elm.text_type) { | 4521 switch (elm.text_type()) { |
4532 case TextElement::ATOM: { | 4522 case TextElement::ATOM: { |
4533 stream()->Add("'%w'", elm.data.u_atom->data()); | 4523 stream()->Add("'%w'", elm.atom()->data()); |
4534 break; | 4524 break; |
4535 } | 4525 } |
4536 case TextElement::CHAR_CLASS: { | 4526 case TextElement::CHAR_CLASS: { |
4537 RegExpCharacterClass* node = elm.data.u_char_class; | 4527 RegExpCharacterClass* node = elm.char_class(); |
4538 stream()->Add("["); | 4528 stream()->Add("["); |
4539 if (node->is_negated()) | 4529 if (node->is_negated()) |
4540 stream()->Add("^"); | 4530 stream()->Add("^"); |
4541 for (int j = 0; j < node->ranges(zone)->length(); j++) { | 4531 for (int j = 0; j < node->ranges(zone)->length(); j++) { |
4542 CharacterRange range = node->ranges(zone)->at(j); | 4532 CharacterRange range = node->ranges(zone)->at(j); |
4543 stream()->Add("%k-%k", range.from(), range.to()); | 4533 stream()->Add("%k-%k", range.from(), range.to()); |
4544 } | 4534 } |
4545 stream()->Add("]"); | 4535 stream()->Add("]"); |
4546 break; | 4536 break; |
4547 } | 4537 } |
(...skipping 1161 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5709 } | 5699 } |
5710 | 5700 |
5711 | 5701 |
5712 void TextNode::CalculateOffsets() { | 5702 void TextNode::CalculateOffsets() { |
5713 int element_count = elements()->length(); | 5703 int element_count = elements()->length(); |
5714 // Set up the offsets of the elements relative to the start. This is a fixed | 5704 // Set up the offsets of the elements relative to the start. This is a fixed |
5715 // quantity since a TextNode can only contain fixed-width things. | 5705 // quantity since a TextNode can only contain fixed-width things. |
5716 int cp_offset = 0; | 5706 int cp_offset = 0; |
5717 for (int i = 0; i < element_count; i++) { | 5707 for (int i = 0; i < element_count; i++) { |
5718 TextElement& elm = elements()->at(i); | 5708 TextElement& elm = elements()->at(i); |
5719 elm.cp_offset = cp_offset; | 5709 elm.set_cp_offset(cp_offset); |
5720 if (elm.text_type == TextElement::ATOM) { | 5710 cp_offset += elm.length(); |
5721 cp_offset += elm.data.u_atom->data().length(); | |
5722 } else { | |
5723 cp_offset++; | |
5724 } | |
5725 } | 5711 } |
5726 } | 5712 } |
5727 | 5713 |
5728 | 5714 |
5729 void Analysis::VisitText(TextNode* that) { | 5715 void Analysis::VisitText(TextNode* that) { |
5730 if (ignore_case_) { | 5716 if (ignore_case_) { |
5731 that->MakeCaseIndependent(is_ascii_); | 5717 that->MakeCaseIndependent(is_ascii_); |
5732 } | 5718 } |
5733 EnsureAnalyzed(that->on_success()); | 5719 EnsureAnalyzed(that->on_success()); |
5734 if (!has_failed()) { | 5720 if (!has_failed()) { |
(...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5830 bool not_at_start) { | 5816 bool not_at_start) { |
5831 if (initial_offset >= bm->length()) return; | 5817 if (initial_offset >= bm->length()) return; |
5832 int offset = initial_offset; | 5818 int offset = initial_offset; |
5833 int max_char = bm->max_char(); | 5819 int max_char = bm->max_char(); |
5834 for (int i = 0; i < elements()->length(); i++) { | 5820 for (int i = 0; i < elements()->length(); i++) { |
5835 if (offset >= bm->length()) { | 5821 if (offset >= bm->length()) { |
5836 if (initial_offset == 0) set_bm_info(not_at_start, bm); | 5822 if (initial_offset == 0) set_bm_info(not_at_start, bm); |
5837 return; | 5823 return; |
5838 } | 5824 } |
5839 TextElement text = elements()->at(i); | 5825 TextElement text = elements()->at(i); |
5840 if (text.text_type == TextElement::ATOM) { | 5826 if (text.text_type() == TextElement::ATOM) { |
5841 RegExpAtom* atom = text.data.u_atom; | 5827 RegExpAtom* atom = text.atom(); |
5842 for (int j = 0; j < atom->length(); j++, offset++) { | 5828 for (int j = 0; j < atom->length(); j++, offset++) { |
5843 if (offset >= bm->length()) { | 5829 if (offset >= bm->length()) { |
5844 if (initial_offset == 0) set_bm_info(not_at_start, bm); | 5830 if (initial_offset == 0) set_bm_info(not_at_start, bm); |
5845 return; | 5831 return; |
5846 } | 5832 } |
5847 uc16 character = atom->data()[j]; | 5833 uc16 character = atom->data()[j]; |
5848 if (bm->compiler()->ignore_case()) { | 5834 if (bm->compiler()->ignore_case()) { |
5849 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 5835 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
5850 int length = GetCaseIndependentLetters( | 5836 int length = GetCaseIndependentLetters( |
5851 ISOLATE, | 5837 ISOLATE, |
5852 character, | 5838 character, |
5853 bm->max_char() == String::kMaxOneByteCharCode, | 5839 bm->max_char() == String::kMaxOneByteCharCode, |
5854 chars); | 5840 chars); |
5855 for (int j = 0; j < length; j++) { | 5841 for (int j = 0; j < length; j++) { |
5856 bm->Set(offset, chars[j]); | 5842 bm->Set(offset, chars[j]); |
5857 } | 5843 } |
5858 } else { | 5844 } else { |
5859 if (character <= max_char) bm->Set(offset, character); | 5845 if (character <= max_char) bm->Set(offset, character); |
5860 } | 5846 } |
5861 } | 5847 } |
5862 } else { | 5848 } else { |
5863 ASSERT(text.text_type == TextElement::CHAR_CLASS); | 5849 ASSERT_EQ(TextElement::CHAR_CLASS, text.text_type()); |
5864 RegExpCharacterClass* char_class = text.data.u_char_class; | 5850 RegExpCharacterClass* char_class = text.char_class(); |
5865 ZoneList<CharacterRange>* ranges = char_class->ranges(zone()); | 5851 ZoneList<CharacterRange>* ranges = char_class->ranges(zone()); |
5866 if (char_class->is_negated()) { | 5852 if (char_class->is_negated()) { |
5867 bm->SetAll(offset); | 5853 bm->SetAll(offset); |
5868 } else { | 5854 } else { |
5869 for (int k = 0; k < ranges->length(); k++) { | 5855 for (int k = 0; k < ranges->length(); k++) { |
5870 CharacterRange& range = ranges->at(k); | 5856 CharacterRange& range = ranges->at(k); |
5871 if (range.from() > max_char) continue; | 5857 if (range.from() > max_char) continue; |
5872 int to = Min(max_char, static_cast<int>(range.to())); | 5858 int to = Min(max_char, static_cast<int>(range.to())); |
5873 bm->SetInterval(offset, Interval(range.from(), to)); | 5859 bm->SetInterval(offset, Interval(range.from(), to)); |
5874 } | 5860 } |
(...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5966 last = range.to() + 1; | 5952 last = range.to() + 1; |
5967 } | 5953 } |
5968 } | 5954 } |
5969 } | 5955 } |
5970 AddRange(CharacterRange(last, String::kMaxUtf16CodeUnit)); | 5956 AddRange(CharacterRange(last, String::kMaxUtf16CodeUnit)); |
5971 } | 5957 } |
5972 | 5958 |
5973 | 5959 |
5974 void DispatchTableConstructor::VisitText(TextNode* that) { | 5960 void DispatchTableConstructor::VisitText(TextNode* that) { |
5975 TextElement elm = that->elements()->at(0); | 5961 TextElement elm = that->elements()->at(0); |
5976 switch (elm.text_type) { | 5962 switch (elm.text_type()) { |
5977 case TextElement::ATOM: { | 5963 case TextElement::ATOM: { |
5978 uc16 c = elm.data.u_atom->data()[0]; | 5964 uc16 c = elm.atom()->data()[0]; |
5979 AddRange(CharacterRange(c, c)); | 5965 AddRange(CharacterRange(c, c)); |
5980 break; | 5966 break; |
5981 } | 5967 } |
5982 case TextElement::CHAR_CLASS: { | 5968 case TextElement::CHAR_CLASS: { |
5983 RegExpCharacterClass* tree = elm.data.u_char_class; | 5969 RegExpCharacterClass* tree = elm.char_class(); |
5984 ZoneList<CharacterRange>* ranges = tree->ranges(that->zone()); | 5970 ZoneList<CharacterRange>* ranges = tree->ranges(that->zone()); |
5985 if (tree->is_negated()) { | 5971 if (tree->is_negated()) { |
5986 AddInverse(ranges); | 5972 AddInverse(ranges); |
5987 } else { | 5973 } else { |
5988 for (int i = 0; i < ranges->length(); i++) | 5974 for (int i = 0; i < ranges->length(); i++) |
5989 AddRange(ranges->at(i)); | 5975 AddRange(ranges->at(i)); |
5990 } | 5976 } |
5991 break; | 5977 break; |
5992 } | 5978 } |
5993 default: { | 5979 default: { |
(...skipping 131 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6125 } | 6111 } |
6126 | 6112 |
6127 return compiler.Assemble(¯o_assembler, | 6113 return compiler.Assemble(¯o_assembler, |
6128 node, | 6114 node, |
6129 data->capture_count, | 6115 data->capture_count, |
6130 pattern); | 6116 pattern); |
6131 } | 6117 } |
6132 | 6118 |
6133 | 6119 |
6134 }} // namespace v8::internal | 6120 }} // namespace v8::internal |
OLD | NEW |