Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(12)

Side by Side Diff: src/jsregexp.cc

Issue 22815033: Fix crash due RegExpAtom method called on RegExpCharacterClass object. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: Created 7 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/jsregexp.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 915 matching lines...) Expand 10 before | Expand all | Expand 10 after
926 } 926 }
927 927
928 928
929 void RegExpText::AppendToText(RegExpText* text, Zone* zone) { 929 void RegExpText::AppendToText(RegExpText* text, Zone* zone) {
930 for (int i = 0; i < elements()->length(); i++) 930 for (int i = 0; i < elements()->length(); i++)
931 text->AddElement(elements()->at(i), zone); 931 text->AddElement(elements()->at(i), zone);
932 } 932 }
933 933
934 934
935 TextElement TextElement::Atom(RegExpAtom* atom) { 935 TextElement TextElement::Atom(RegExpAtom* atom) {
936 TextElement result = TextElement(ATOM); 936 return TextElement(ATOM, atom);
937 result.data.u_atom = atom;
938 return result;
939 } 937 }
940 938
941 939
942 TextElement TextElement::CharClass( 940 TextElement TextElement::CharClass(RegExpCharacterClass* char_class) {
943 RegExpCharacterClass* char_class) { 941 return TextElement(CHAR_CLASS, char_class);
944 TextElement result = TextElement(CHAR_CLASS);
945 result.data.u_char_class = char_class;
946 return result;
947 } 942 }
948 943
949 944
950 int TextElement::length() { 945 int TextElement::length() const {
951 if (text_type == ATOM) { 946 switch (text_type()) {
952 return data.u_atom->length(); 947 case ATOM:
953 } else { 948 return atom()->length();
954 ASSERT(text_type == CHAR_CLASS); 949
955 return 1; 950 case CHAR_CLASS:
951 return 1;
956 } 952 }
953 UNREACHABLE();
954 return 0;
957 } 955 }
958 956
959 957
960 DispatchTable* ChoiceNode::GetTable(bool ignore_case) { 958 DispatchTable* ChoiceNode::GetTable(bool ignore_case) {
961 if (table_ == NULL) { 959 if (table_ == NULL) {
962 table_ = new(zone()) DispatchTable(zone()); 960 table_ = new(zone()) DispatchTable(zone());
963 DispatchTableConstructor cons(table_, ignore_case, zone()); 961 DispatchTableConstructor cons(table_, ignore_case, zone());
964 cons.BuildTable(this); 962 cons.BuildTable(this);
965 } 963 }
966 return table_; 964 return table_;
(...skipping 1587 matching lines...) Expand 10 before | Expand all | Expand 10 after
2554 ASSERT(characters_filled_in < details->characters()); 2552 ASSERT(characters_filled_in < details->characters());
2555 int characters = details->characters(); 2553 int characters = details->characters();
2556 int char_mask; 2554 int char_mask;
2557 if (compiler->ascii()) { 2555 if (compiler->ascii()) {
2558 char_mask = String::kMaxOneByteCharCode; 2556 char_mask = String::kMaxOneByteCharCode;
2559 } else { 2557 } else {
2560 char_mask = String::kMaxUtf16CodeUnit; 2558 char_mask = String::kMaxUtf16CodeUnit;
2561 } 2559 }
2562 for (int k = 0; k < elms_->length(); k++) { 2560 for (int k = 0; k < elms_->length(); k++) {
2563 TextElement elm = elms_->at(k); 2561 TextElement elm = elms_->at(k);
2564 if (elm.text_type == TextElement::ATOM) { 2562 if (elm.text_type() == TextElement::ATOM) {
2565 Vector<const uc16> quarks = elm.data.u_atom->data(); 2563 Vector<const uc16> quarks = elm.atom()->data();
2566 for (int i = 0; i < characters && i < quarks.length(); i++) { 2564 for (int i = 0; i < characters && i < quarks.length(); i++) {
2567 QuickCheckDetails::Position* pos = 2565 QuickCheckDetails::Position* pos =
2568 details->positions(characters_filled_in); 2566 details->positions(characters_filled_in);
2569 uc16 c = quarks[i]; 2567 uc16 c = quarks[i];
2570 if (c > char_mask) { 2568 if (c > char_mask) {
2571 // If we expect a non-ASCII character from an ASCII string, 2569 // If we expect a non-ASCII character from an ASCII string,
2572 // there is no way we can match. Not even case independent 2570 // there is no way we can match. Not even case independent
2573 // matching can turn an ASCII character into non-ASCII or 2571 // matching can turn an ASCII character into non-ASCII or
2574 // vice versa. 2572 // vice versa.
2575 details->set_cannot_match(); 2573 details->set_cannot_match();
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
2617 } 2615 }
2618 characters_filled_in++; 2616 characters_filled_in++;
2619 ASSERT(characters_filled_in <= details->characters()); 2617 ASSERT(characters_filled_in <= details->characters());
2620 if (characters_filled_in == details->characters()) { 2618 if (characters_filled_in == details->characters()) {
2621 return; 2619 return;
2622 } 2620 }
2623 } 2621 }
2624 } else { 2622 } else {
2625 QuickCheckDetails::Position* pos = 2623 QuickCheckDetails::Position* pos =
2626 details->positions(characters_filled_in); 2624 details->positions(characters_filled_in);
2627 RegExpCharacterClass* tree = elm.data.u_char_class; 2625 RegExpCharacterClass* tree = elm.char_class();
2628 ZoneList<CharacterRange>* ranges = tree->ranges(zone()); 2626 ZoneList<CharacterRange>* ranges = tree->ranges(zone());
2629 if (tree->is_negated()) { 2627 if (tree->is_negated()) {
2630 // A quick check uses multi-character mask and compare. There is no 2628 // A quick check uses multi-character mask and compare. There is no
2631 // useful way to incorporate a negative char class into this scheme 2629 // useful way to incorporate a negative char class into this scheme
2632 // so we just conservatively create a mask and value that will always 2630 // so we just conservatively create a mask and value that will always
2633 // succeed. 2631 // succeed.
2634 pos->mask = 0; 2632 pos->mask = 0;
2635 pos->value = 0; 2633 pos->value = 0;
2636 } else { 2634 } else {
2637 int first_range = 0; 2635 int first_range = 0;
(...skipping 169 matching lines...) Expand 10 before | Expand all | Expand 10 after
2807 2805
2808 2806
2809 RegExpNode* TextNode::FilterASCII(int depth, bool ignore_case) { 2807 RegExpNode* TextNode::FilterASCII(int depth, bool ignore_case) {
2810 if (info()->replacement_calculated) return replacement(); 2808 if (info()->replacement_calculated) return replacement();
2811 if (depth < 0) return this; 2809 if (depth < 0) return this;
2812 ASSERT(!info()->visited); 2810 ASSERT(!info()->visited);
2813 VisitMarker marker(info()); 2811 VisitMarker marker(info());
2814 int element_count = elms_->length(); 2812 int element_count = elms_->length();
2815 for (int i = 0; i < element_count; i++) { 2813 for (int i = 0; i < element_count; i++) {
2816 TextElement elm = elms_->at(i); 2814 TextElement elm = elms_->at(i);
2817 if (elm.text_type == TextElement::ATOM) { 2815 if (elm.text_type() == TextElement::ATOM) {
2818 Vector<const uc16> quarks = elm.data.u_atom->data(); 2816 Vector<const uc16> quarks = elm.atom()->data();
2819 for (int j = 0; j < quarks.length(); j++) { 2817 for (int j = 0; j < quarks.length(); j++) {
2820 uint16_t c = quarks[j]; 2818 uint16_t c = quarks[j];
2821 if (c <= String::kMaxOneByteCharCode) continue; 2819 if (c <= String::kMaxOneByteCharCode) continue;
2822 if (!ignore_case) return set_replacement(NULL); 2820 if (!ignore_case) return set_replacement(NULL);
2823 // Here, we need to check for characters whose upper and lower cases 2821 // Here, we need to check for characters whose upper and lower cases
2824 // are outside the Latin-1 range. 2822 // are outside the Latin-1 range.
2825 uint16_t converted = unibrow::Latin1::ConvertNonLatin1ToLatin1(c); 2823 uint16_t converted = unibrow::Latin1::ConvertNonLatin1ToLatin1(c);
2826 // Character is outside Latin-1 completely 2824 // Character is outside Latin-1 completely
2827 if (converted == 0) return set_replacement(NULL); 2825 if (converted == 0) return set_replacement(NULL);
2828 // Convert quark to Latin-1 in place. 2826 // Convert quark to Latin-1 in place.
2829 uint16_t* copy = const_cast<uint16_t*>(quarks.start()); 2827 uint16_t* copy = const_cast<uint16_t*>(quarks.start());
2830 copy[j] = converted; 2828 copy[j] = converted;
2831 } 2829 }
2832 } else { 2830 } else {
2833 ASSERT(elm.text_type == TextElement::CHAR_CLASS); 2831 ASSERT(elm.text_type() == TextElement::CHAR_CLASS);
2834 RegExpCharacterClass* cc = elm.data.u_char_class; 2832 RegExpCharacterClass* cc = elm.char_class();
2835 ZoneList<CharacterRange>* ranges = cc->ranges(zone()); 2833 ZoneList<CharacterRange>* ranges = cc->ranges(zone());
2836 if (!CharacterRange::IsCanonical(ranges)) { 2834 if (!CharacterRange::IsCanonical(ranges)) {
2837 CharacterRange::Canonicalize(ranges); 2835 CharacterRange::Canonicalize(ranges);
2838 } 2836 }
2839 // Now they are in order so we only need to look at the first. 2837 // Now they are in order so we only need to look at the first.
2840 int range_count = ranges->length(); 2838 int range_count = ranges->length();
2841 if (cc->is_negated()) { 2839 if (cc->is_negated()) {
2842 if (range_count != 0 && 2840 if (range_count != 0 &&
2843 ranges->at(0).from() == 0 && 2841 ranges->at(0).from() == 0 &&
2844 ranges->at(0).to() >= String::kMaxOneByteCharCode) { 2842 ranges->at(0).to() >= String::kMaxOneByteCharCode) {
(...skipping 404 matching lines...) Expand 10 before | Expand all | Expand 10 after
3249 bool first_element_checked, 3247 bool first_element_checked,
3250 int* checked_up_to) { 3248 int* checked_up_to) {
3251 Isolate* isolate = Isolate::Current(); 3249 Isolate* isolate = Isolate::Current();
3252 RegExpMacroAssembler* assembler = compiler->macro_assembler(); 3250 RegExpMacroAssembler* assembler = compiler->macro_assembler();
3253 bool ascii = compiler->ascii(); 3251 bool ascii = compiler->ascii();
3254 Label* backtrack = trace->backtrack(); 3252 Label* backtrack = trace->backtrack();
3255 QuickCheckDetails* quick_check = trace->quick_check_performed(); 3253 QuickCheckDetails* quick_check = trace->quick_check_performed();
3256 int element_count = elms_->length(); 3254 int element_count = elms_->length();
3257 for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) { 3255 for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) {
3258 TextElement elm = elms_->at(i); 3256 TextElement elm = elms_->at(i);
3259 int cp_offset = trace->cp_offset() + elm.cp_offset; 3257 int cp_offset = trace->cp_offset() + elm.cp_offset();
3260 if (elm.text_type == TextElement::ATOM) { 3258 if (elm.text_type() == TextElement::ATOM) {
3261 Vector<const uc16> quarks = elm.data.u_atom->data(); 3259 Vector<const uc16> quarks = elm.atom()->data();
3262 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) { 3260 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {
3263 if (first_element_checked && i == 0 && j == 0) continue; 3261 if (first_element_checked && i == 0 && j == 0) continue;
3264 if (DeterminedAlready(quick_check, elm.cp_offset + j)) continue; 3262 if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue;
3265 EmitCharacterFunction* emit_function = NULL; 3263 EmitCharacterFunction* emit_function = NULL;
3266 switch (pass) { 3264 switch (pass) {
3267 case NON_ASCII_MATCH: 3265 case NON_ASCII_MATCH:
3268 ASSERT(ascii); 3266 ASSERT(ascii);
3269 if (quarks[j] > String::kMaxOneByteCharCode) { 3267 if (quarks[j] > String::kMaxOneByteCharCode) {
3270 assembler->GoTo(backtrack); 3268 assembler->GoTo(backtrack);
3271 return; 3269 return;
3272 } 3270 }
3273 break; 3271 break;
3274 case NON_LETTER_CHARACTER_MATCH: 3272 case NON_LETTER_CHARACTER_MATCH:
(...skipping 13 matching lines...) Expand all
3288 compiler, 3286 compiler,
3289 quarks[j], 3287 quarks[j],
3290 backtrack, 3288 backtrack,
3291 cp_offset + j, 3289 cp_offset + j,
3292 *checked_up_to < cp_offset + j, 3290 *checked_up_to < cp_offset + j,
3293 preloaded); 3291 preloaded);
3294 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to); 3292 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to);
3295 } 3293 }
3296 } 3294 }
3297 } else { 3295 } else {
3298 ASSERT_EQ(elm.text_type, TextElement::CHAR_CLASS); 3296 ASSERT_EQ(TextElement::CHAR_CLASS, elm.text_type());
3299 if (pass == CHARACTER_CLASS_MATCH) { 3297 if (pass == CHARACTER_CLASS_MATCH) {
3300 if (first_element_checked && i == 0) continue; 3298 if (first_element_checked && i == 0) continue;
3301 if (DeterminedAlready(quick_check, elm.cp_offset)) continue; 3299 if (DeterminedAlready(quick_check, elm.cp_offset())) continue;
3302 RegExpCharacterClass* cc = elm.data.u_char_class; 3300 RegExpCharacterClass* cc = elm.char_class();
3303 EmitCharClass(assembler, 3301 EmitCharClass(assembler,
3304 cc, 3302 cc,
3305 ascii, 3303 ascii,
3306 backtrack, 3304 backtrack,
3307 cp_offset, 3305 cp_offset,
3308 *checked_up_to < cp_offset, 3306 *checked_up_to < cp_offset,
3309 preloaded, 3307 preloaded,
3310 zone()); 3308 zone());
3311 UpdateBoundsCheck(cp_offset, checked_up_to); 3309 UpdateBoundsCheck(cp_offset, checked_up_to);
3312 } 3310 }
3313 } 3311 }
3314 } 3312 }
3315 } 3313 }
3316 3314
3317 3315
3318 int TextNode::Length() { 3316 int TextNode::Length() {
3319 TextElement elm = elms_->last(); 3317 TextElement elm = elms_->last();
3320 ASSERT(elm.cp_offset >= 0); 3318 ASSERT(elm.cp_offset() >= 0);
3321 if (elm.text_type == TextElement::ATOM) { 3319 return elm.cp_offset() + elm.length();
3322 return elm.cp_offset + elm.data.u_atom->data().length();
3323 } else {
3324 return elm.cp_offset + 1;
3325 }
3326 } 3320 }
3327 3321
3328 3322
3329 bool TextNode::SkipPass(int int_pass, bool ignore_case) { 3323 bool TextNode::SkipPass(int int_pass, bool ignore_case) {
3330 TextEmitPassType pass = static_cast<TextEmitPassType>(int_pass); 3324 TextEmitPassType pass = static_cast<TextEmitPassType>(int_pass);
3331 if (ignore_case) { 3325 if (ignore_case) {
3332 return pass == SIMPLE_CHARACTER_MATCH; 3326 return pass == SIMPLE_CHARACTER_MATCH;
3333 } else { 3327 } else {
3334 return pass == NON_LETTER_CHARACTER_MATCH || pass == CASE_CHARACTER_MATCH; 3328 return pass == NON_LETTER_CHARACTER_MATCH || pass == CASE_CHARACTER_MATCH;
3335 } 3329 }
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after
3417 cp_offset_ = 0; 3411 cp_offset_ = 0;
3418 } 3412 }
3419 bound_checked_up_to_ = Max(0, bound_checked_up_to_ - by); 3413 bound_checked_up_to_ = Max(0, bound_checked_up_to_ - by);
3420 } 3414 }
3421 3415
3422 3416
3423 void TextNode::MakeCaseIndependent(bool is_ascii) { 3417 void TextNode::MakeCaseIndependent(bool is_ascii) {
3424 int element_count = elms_->length(); 3418 int element_count = elms_->length();
3425 for (int i = 0; i < element_count; i++) { 3419 for (int i = 0; i < element_count; i++) {
3426 TextElement elm = elms_->at(i); 3420 TextElement elm = elms_->at(i);
3427 if (elm.text_type == TextElement::CHAR_CLASS) { 3421 if (elm.text_type() == TextElement::CHAR_CLASS) {
3428 RegExpCharacterClass* cc = elm.data.u_char_class; 3422 RegExpCharacterClass* cc = elm.char_class();
3429 // None of the standard character classes is different in the case 3423 // None of the standard character classes is different in the case
3430 // independent case and it slows us down if we don't know that. 3424 // independent case and it slows us down if we don't know that.
3431 if (cc->is_standard(zone())) continue; 3425 if (cc->is_standard(zone())) continue;
3432 ZoneList<CharacterRange>* ranges = cc->ranges(zone()); 3426 ZoneList<CharacterRange>* ranges = cc->ranges(zone());
3433 int range_count = ranges->length(); 3427 int range_count = ranges->length();
3434 for (int j = 0; j < range_count; j++) { 3428 for (int j = 0; j < range_count; j++) {
3435 ranges->at(j).AddCaseEquivalents(ranges, is_ascii, zone()); 3429 ranges->at(j).AddCaseEquivalents(ranges, is_ascii, zone());
3436 } 3430 }
3437 } 3431 }
3438 } 3432 }
3439 } 3433 }
3440 3434
3441 3435
3442 int TextNode::GreedyLoopTextLength() { 3436 int TextNode::GreedyLoopTextLength() {
3443 TextElement elm = elms_->at(elms_->length() - 1); 3437 TextElement elm = elms_->at(elms_->length() - 1);
3444 if (elm.text_type == TextElement::CHAR_CLASS) { 3438 return elm.cp_offset() + elm.length();
3445 return elm.cp_offset + 1;
3446 } else {
3447 return elm.cp_offset + elm.data.u_atom->data().length();
3448 }
3449 } 3439 }
3450 3440
3451 3441
3452 RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode( 3442 RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode(
3453 RegExpCompiler* compiler) { 3443 RegExpCompiler* compiler) {
3454 if (elms_->length() != 1) return NULL; 3444 if (elms_->length() != 1) return NULL;
3455 TextElement elm = elms_->at(0); 3445 TextElement elm = elms_->at(0);
3456 if (elm.text_type != TextElement::CHAR_CLASS) return NULL; 3446 if (elm.text_type() != TextElement::CHAR_CLASS) return NULL;
3457 RegExpCharacterClass* node = elm.data.u_char_class; 3447 RegExpCharacterClass* node = elm.char_class();
3458 ZoneList<CharacterRange>* ranges = node->ranges(zone()); 3448 ZoneList<CharacterRange>* ranges = node->ranges(zone());
3459 if (!CharacterRange::IsCanonical(ranges)) { 3449 if (!CharacterRange::IsCanonical(ranges)) {
3460 CharacterRange::Canonicalize(ranges); 3450 CharacterRange::Canonicalize(ranges);
3461 } 3451 }
3462 if (node->is_negated()) { 3452 if (node->is_negated()) {
3463 return ranges->length() == 0 ? on_success() : NULL; 3453 return ranges->length() == 0 ? on_success() : NULL;
3464 } 3454 }
3465 if (ranges->length() != 1) return NULL; 3455 if (ranges->length() != 1) return NULL;
3466 uint32_t max_char; 3456 uint32_t max_char;
3467 if (compiler->ascii()) { 3457 if (compiler->ascii()) {
(...skipping 1053 matching lines...) Expand 10 before | Expand all | Expand 10 after
4521 } 4511 }
4522 } 4512 }
4523 4513
4524 4514
4525 void DotPrinter::VisitText(TextNode* that) { 4515 void DotPrinter::VisitText(TextNode* that) {
4526 Zone* zone = that->zone(); 4516 Zone* zone = that->zone();
4527 stream()->Add(" n%p [label=\"", that); 4517 stream()->Add(" n%p [label=\"", that);
4528 for (int i = 0; i < that->elements()->length(); i++) { 4518 for (int i = 0; i < that->elements()->length(); i++) {
4529 if (i > 0) stream()->Add(" "); 4519 if (i > 0) stream()->Add(" ");
4530 TextElement elm = that->elements()->at(i); 4520 TextElement elm = that->elements()->at(i);
4531 switch (elm.text_type) { 4521 switch (elm.text_type()) {
4532 case TextElement::ATOM: { 4522 case TextElement::ATOM: {
4533 stream()->Add("'%w'", elm.data.u_atom->data()); 4523 stream()->Add("'%w'", elm.atom()->data());
4534 break; 4524 break;
4535 } 4525 }
4536 case TextElement::CHAR_CLASS: { 4526 case TextElement::CHAR_CLASS: {
4537 RegExpCharacterClass* node = elm.data.u_char_class; 4527 RegExpCharacterClass* node = elm.char_class();
4538 stream()->Add("["); 4528 stream()->Add("[");
4539 if (node->is_negated()) 4529 if (node->is_negated())
4540 stream()->Add("^"); 4530 stream()->Add("^");
4541 for (int j = 0; j < node->ranges(zone)->length(); j++) { 4531 for (int j = 0; j < node->ranges(zone)->length(); j++) {
4542 CharacterRange range = node->ranges(zone)->at(j); 4532 CharacterRange range = node->ranges(zone)->at(j);
4543 stream()->Add("%k-%k", range.from(), range.to()); 4533 stream()->Add("%k-%k", range.from(), range.to());
4544 } 4534 }
4545 stream()->Add("]"); 4535 stream()->Add("]");
4546 break; 4536 break;
4547 } 4537 }
(...skipping 1161 matching lines...) Expand 10 before | Expand all | Expand 10 after
5709 } 5699 }
5710 5700
5711 5701
5712 void TextNode::CalculateOffsets() { 5702 void TextNode::CalculateOffsets() {
5713 int element_count = elements()->length(); 5703 int element_count = elements()->length();
5714 // Set up the offsets of the elements relative to the start. This is a fixed 5704 // Set up the offsets of the elements relative to the start. This is a fixed
5715 // quantity since a TextNode can only contain fixed-width things. 5705 // quantity since a TextNode can only contain fixed-width things.
5716 int cp_offset = 0; 5706 int cp_offset = 0;
5717 for (int i = 0; i < element_count; i++) { 5707 for (int i = 0; i < element_count; i++) {
5718 TextElement& elm = elements()->at(i); 5708 TextElement& elm = elements()->at(i);
5719 elm.cp_offset = cp_offset; 5709 elm.set_cp_offset(cp_offset);
5720 if (elm.text_type == TextElement::ATOM) { 5710 cp_offset += elm.length();
5721 cp_offset += elm.data.u_atom->data().length();
5722 } else {
5723 cp_offset++;
5724 }
5725 } 5711 }
5726 } 5712 }
5727 5713
5728 5714
5729 void Analysis::VisitText(TextNode* that) { 5715 void Analysis::VisitText(TextNode* that) {
5730 if (ignore_case_) { 5716 if (ignore_case_) {
5731 that->MakeCaseIndependent(is_ascii_); 5717 that->MakeCaseIndependent(is_ascii_);
5732 } 5718 }
5733 EnsureAnalyzed(that->on_success()); 5719 EnsureAnalyzed(that->on_success());
5734 if (!has_failed()) { 5720 if (!has_failed()) {
(...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after
5830 bool not_at_start) { 5816 bool not_at_start) {
5831 if (initial_offset >= bm->length()) return; 5817 if (initial_offset >= bm->length()) return;
5832 int offset = initial_offset; 5818 int offset = initial_offset;
5833 int max_char = bm->max_char(); 5819 int max_char = bm->max_char();
5834 for (int i = 0; i < elements()->length(); i++) { 5820 for (int i = 0; i < elements()->length(); i++) {
5835 if (offset >= bm->length()) { 5821 if (offset >= bm->length()) {
5836 if (initial_offset == 0) set_bm_info(not_at_start, bm); 5822 if (initial_offset == 0) set_bm_info(not_at_start, bm);
5837 return; 5823 return;
5838 } 5824 }
5839 TextElement text = elements()->at(i); 5825 TextElement text = elements()->at(i);
5840 if (text.text_type == TextElement::ATOM) { 5826 if (text.text_type() == TextElement::ATOM) {
5841 RegExpAtom* atom = text.data.u_atom; 5827 RegExpAtom* atom = text.atom();
5842 for (int j = 0; j < atom->length(); j++, offset++) { 5828 for (int j = 0; j < atom->length(); j++, offset++) {
5843 if (offset >= bm->length()) { 5829 if (offset >= bm->length()) {
5844 if (initial_offset == 0) set_bm_info(not_at_start, bm); 5830 if (initial_offset == 0) set_bm_info(not_at_start, bm);
5845 return; 5831 return;
5846 } 5832 }
5847 uc16 character = atom->data()[j]; 5833 uc16 character = atom->data()[j];
5848 if (bm->compiler()->ignore_case()) { 5834 if (bm->compiler()->ignore_case()) {
5849 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 5835 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
5850 int length = GetCaseIndependentLetters( 5836 int length = GetCaseIndependentLetters(
5851 ISOLATE, 5837 ISOLATE,
5852 character, 5838 character,
5853 bm->max_char() == String::kMaxOneByteCharCode, 5839 bm->max_char() == String::kMaxOneByteCharCode,
5854 chars); 5840 chars);
5855 for (int j = 0; j < length; j++) { 5841 for (int j = 0; j < length; j++) {
5856 bm->Set(offset, chars[j]); 5842 bm->Set(offset, chars[j]);
5857 } 5843 }
5858 } else { 5844 } else {
5859 if (character <= max_char) bm->Set(offset, character); 5845 if (character <= max_char) bm->Set(offset, character);
5860 } 5846 }
5861 } 5847 }
5862 } else { 5848 } else {
5863 ASSERT(text.text_type == TextElement::CHAR_CLASS); 5849 ASSERT_EQ(TextElement::CHAR_CLASS, text.text_type());
5864 RegExpCharacterClass* char_class = text.data.u_char_class; 5850 RegExpCharacterClass* char_class = text.char_class();
5865 ZoneList<CharacterRange>* ranges = char_class->ranges(zone()); 5851 ZoneList<CharacterRange>* ranges = char_class->ranges(zone());
5866 if (char_class->is_negated()) { 5852 if (char_class->is_negated()) {
5867 bm->SetAll(offset); 5853 bm->SetAll(offset);
5868 } else { 5854 } else {
5869 for (int k = 0; k < ranges->length(); k++) { 5855 for (int k = 0; k < ranges->length(); k++) {
5870 CharacterRange& range = ranges->at(k); 5856 CharacterRange& range = ranges->at(k);
5871 if (range.from() > max_char) continue; 5857 if (range.from() > max_char) continue;
5872 int to = Min(max_char, static_cast<int>(range.to())); 5858 int to = Min(max_char, static_cast<int>(range.to()));
5873 bm->SetInterval(offset, Interval(range.from(), to)); 5859 bm->SetInterval(offset, Interval(range.from(), to));
5874 } 5860 }
(...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after
5966 last = range.to() + 1; 5952 last = range.to() + 1;
5967 } 5953 }
5968 } 5954 }
5969 } 5955 }
5970 AddRange(CharacterRange(last, String::kMaxUtf16CodeUnit)); 5956 AddRange(CharacterRange(last, String::kMaxUtf16CodeUnit));
5971 } 5957 }
5972 5958
5973 5959
5974 void DispatchTableConstructor::VisitText(TextNode* that) { 5960 void DispatchTableConstructor::VisitText(TextNode* that) {
5975 TextElement elm = that->elements()->at(0); 5961 TextElement elm = that->elements()->at(0);
5976 switch (elm.text_type) { 5962 switch (elm.text_type()) {
5977 case TextElement::ATOM: { 5963 case TextElement::ATOM: {
5978 uc16 c = elm.data.u_atom->data()[0]; 5964 uc16 c = elm.atom()->data()[0];
5979 AddRange(CharacterRange(c, c)); 5965 AddRange(CharacterRange(c, c));
5980 break; 5966 break;
5981 } 5967 }
5982 case TextElement::CHAR_CLASS: { 5968 case TextElement::CHAR_CLASS: {
5983 RegExpCharacterClass* tree = elm.data.u_char_class; 5969 RegExpCharacterClass* tree = elm.char_class();
5984 ZoneList<CharacterRange>* ranges = tree->ranges(that->zone()); 5970 ZoneList<CharacterRange>* ranges = tree->ranges(that->zone());
5985 if (tree->is_negated()) { 5971 if (tree->is_negated()) {
5986 AddInverse(ranges); 5972 AddInverse(ranges);
5987 } else { 5973 } else {
5988 for (int i = 0; i < ranges->length(); i++) 5974 for (int i = 0; i < ranges->length(); i++)
5989 AddRange(ranges->at(i)); 5975 AddRange(ranges->at(i));
5990 } 5976 }
5991 break; 5977 break;
5992 } 5978 }
5993 default: { 5979 default: {
(...skipping 131 matching lines...) Expand 10 before | Expand all | Expand 10 after
6125 } 6111 }
6126 6112
6127 return compiler.Assemble(&macro_assembler, 6113 return compiler.Assemble(&macro_assembler,
6128 node, 6114 node,
6129 data->capture_count, 6115 data->capture_count,
6130 pattern); 6116 pattern);
6131 } 6117 }
6132 6118
6133 6119
6134 }} // namespace v8::internal 6120 }} // namespace v8::internal
OLDNEW
« no previous file with comments | « src/jsregexp.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698