src/jsregexp.cc - Issue 22815033: Fix crash due RegExpAtom method called on RegExpCharacterClass object.

Side by Side Diff: src/jsregexp.cc

Issue 22815033: Fix crash due RegExpAtom method called on RegExpCharacterClass object. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge

Patch Set: Created 7 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2012 the V8 project authors. All rights reserved.	1 // Copyright 2012 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 915 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
926 }	926 }

927	927

928	928

929 void RegExpText::AppendToText(RegExpText* text, Zone* zone) {	929 void RegExpText::AppendToText(RegExpText* text, Zone* zone) {

930 for (int i = 0; i < elements()->length(); i++)	930 for (int i = 0; i < elements()->length(); i++)

931 text->AddElement(elements()->at(i), zone);	931 text->AddElement(elements()->at(i), zone);

932 }	932 }

933	933

934	934

935 TextElement TextElement::Atom(RegExpAtom* atom) {	935 TextElement TextElement::Atom(RegExpAtom* atom) {

936 TextElement result = TextElement(ATOM);	936 return TextElement(ATOM, atom);

937 result.data.u_atom = atom;

938 return result;

939 }	937 }

940	938

941	939

942 TextElement TextElement::CharClass(	940 TextElement TextElement::CharClass(RegExpCharacterClass* char_class) {

943 RegExpCharacterClass* char_class) {	941 return TextElement(CHAR_CLASS, char_class);

944 TextElement result = TextElement(CHAR_CLASS);

945 result.data.u_char_class = char_class;

946 return result;

947 }	942 }

948	943

949	944

950 int TextElement::length() {	945 int TextElement::length() const {

951 if (text_type == ATOM) {	946 switch (text_type()) {

952 return data.u_atom->length();	947 case ATOM:

953 } else {	948 return atom()->length();

954 ASSERT(text_type == CHAR_CLASS);	949

955 return 1;	950 case CHAR_CLASS:

	951 return 1;

956 }	952 }

	953 UNREACHABLE();

	954 return 0;

957 }	955 }

958	956

959	957

960 DispatchTable* ChoiceNode::GetTable(bool ignore_case) {	958 DispatchTable* ChoiceNode::GetTable(bool ignore_case) {

961 if (table_ == NULL) {	959 if (table_ == NULL) {

962 table_ = new(zone()) DispatchTable(zone());	960 table_ = new(zone()) DispatchTable(zone());

963 DispatchTableConstructor cons(table_, ignore_case, zone());	961 DispatchTableConstructor cons(table_, ignore_case, zone());

964 cons.BuildTable(this);	962 cons.BuildTable(this);

965 }	963 }

966 return table_;	964 return table_;

(...skipping 1587 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2554 ASSERT(characters_filled_in < details->characters());	2552 ASSERT(characters_filled_in < details->characters());

2555 int characters = details->characters();	2553 int characters = details->characters();

2556 int char_mask;	2554 int char_mask;

2557 if (compiler->ascii()) {	2555 if (compiler->ascii()) {

2558 char_mask = String::kMaxOneByteCharCode;	2556 char_mask = String::kMaxOneByteCharCode;

2559 } else {	2557 } else {

2560 char_mask = String::kMaxUtf16CodeUnit;	2558 char_mask = String::kMaxUtf16CodeUnit;

2561 }	2559 }

2562 for (int k = 0; k < elms_->length(); k++) {	2560 for (int k = 0; k < elms_->length(); k++) {

2563 TextElement elm = elms_->at(k);	2561 TextElement elm = elms_->at(k);

2564 if (elm.text_type == TextElement::ATOM) {	2562 if (elm.text_type() == TextElement::ATOM) {

2565 Vector<const uc16> quarks = elm.data.u_atom->data();	2563 Vector<const uc16> quarks = elm.atom()->data();

2566 for (int i = 0; i < characters && i < quarks.length(); i++) {	2564 for (int i = 0; i < characters && i < quarks.length(); i++) {

2567 QuickCheckDetails::Position* pos =	2565 QuickCheckDetails::Position* pos =

2568 details->positions(characters_filled_in);	2566 details->positions(characters_filled_in);

2569 uc16 c = quarks[i];	2567 uc16 c = quarks[i];

2570 if (c > char_mask) {	2568 if (c > char_mask) {

2571 // If we expect a non-ASCII character from an ASCII string,	2569 // If we expect a non-ASCII character from an ASCII string,

2572 // there is no way we can match. Not even case independent	2570 // there is no way we can match. Not even case independent

2573 // matching can turn an ASCII character into non-ASCII or	2571 // matching can turn an ASCII character into non-ASCII or

2574 // vice versa.	2572 // vice versa.

2575 details->set_cannot_match();	2573 details->set_cannot_match();

(...skipping 41 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2617 }	2615 }

2618 characters_filled_in++;	2616 characters_filled_in++;

2619 ASSERT(characters_filled_in <= details->characters());	2617 ASSERT(characters_filled_in <= details->characters());

2620 if (characters_filled_in == details->characters()) {	2618 if (characters_filled_in == details->characters()) {

2621 return;	2619 return;

2622 }	2620 }

2623 }	2621 }

2624 } else {	2622 } else {

2625 QuickCheckDetails::Position* pos =	2623 QuickCheckDetails::Position* pos =

2626 details->positions(characters_filled_in);	2624 details->positions(characters_filled_in);

2627 RegExpCharacterClass* tree = elm.data.u_char_class;	2625 RegExpCharacterClass* tree = elm.char_class();

2628 ZoneList<CharacterRange>* ranges = tree->ranges(zone());	2626 ZoneList<CharacterRange>* ranges = tree->ranges(zone());

2629 if (tree->is_negated()) {	2627 if (tree->is_negated()) {

2630 // A quick check uses multi-character mask and compare. There is no	2628 // A quick check uses multi-character mask and compare. There is no

2631 // useful way to incorporate a negative char class into this scheme	2629 // useful way to incorporate a negative char class into this scheme

2632 // so we just conservatively create a mask and value that will always	2630 // so we just conservatively create a mask and value that will always

2633 // succeed.	2631 // succeed.

2634 pos->mask = 0;	2632 pos->mask = 0;

2635 pos->value = 0;	2633 pos->value = 0;

2636 } else {	2634 } else {

2637 int first_range = 0;	2635 int first_range = 0;

(...skipping 169 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2807	2805

2808	2806

2809 RegExpNode* TextNode::FilterASCII(int depth, bool ignore_case) {	2807 RegExpNode* TextNode::FilterASCII(int depth, bool ignore_case) {

2810 if (info()->replacement_calculated) return replacement();	2808 if (info()->replacement_calculated) return replacement();

2811 if (depth < 0) return this;	2809 if (depth < 0) return this;

2812 ASSERT(!info()->visited);	2810 ASSERT(!info()->visited);

2813 VisitMarker marker(info());	2811 VisitMarker marker(info());

2814 int element_count = elms_->length();	2812 int element_count = elms_->length();

2815 for (int i = 0; i < element_count; i++) {	2813 for (int i = 0; i < element_count; i++) {

2816 TextElement elm = elms_->at(i);	2814 TextElement elm = elms_->at(i);

2817 if (elm.text_type == TextElement::ATOM) {	2815 if (elm.text_type() == TextElement::ATOM) {

2818 Vector<const uc16> quarks = elm.data.u_atom->data();	2816 Vector<const uc16> quarks = elm.atom()->data();

2819 for (int j = 0; j < quarks.length(); j++) {	2817 for (int j = 0; j < quarks.length(); j++) {

2820 uint16_t c = quarks[j];	2818 uint16_t c = quarks[j];

2821 if (c <= String::kMaxOneByteCharCode) continue;	2819 if (c <= String::kMaxOneByteCharCode) continue;

2822 if (!ignore_case) return set_replacement(NULL);	2820 if (!ignore_case) return set_replacement(NULL);

2823 // Here, we need to check for characters whose upper and lower cases	2821 // Here, we need to check for characters whose upper and lower cases

2824 // are outside the Latin-1 range.	2822 // are outside the Latin-1 range.

2825 uint16_t converted = unibrow::Latin1::ConvertNonLatin1ToLatin1(c);	2823 uint16_t converted = unibrow::Latin1::ConvertNonLatin1ToLatin1(c);

2826 // Character is outside Latin-1 completely	2824 // Character is outside Latin-1 completely

2827 if (converted == 0) return set_replacement(NULL);	2825 if (converted == 0) return set_replacement(NULL);

2828 // Convert quark to Latin-1 in place.	2826 // Convert quark to Latin-1 in place.

2829 uint16_t* copy = const_cast<uint16_t*>(quarks.start());	2827 uint16_t* copy = const_cast<uint16_t*>(quarks.start());

2830 copy[j] = converted;	2828 copy[j] = converted;

2831 }	2829 }

2832 } else {	2830 } else {

2833 ASSERT(elm.text_type == TextElement::CHAR_CLASS);	2831 ASSERT(elm.text_type() == TextElement::CHAR_CLASS);

2834 RegExpCharacterClass* cc = elm.data.u_char_class;	2832 RegExpCharacterClass* cc = elm.char_class();

2835 ZoneList<CharacterRange>* ranges = cc->ranges(zone());	2833 ZoneList<CharacterRange>* ranges = cc->ranges(zone());

2836 if (!CharacterRange::IsCanonical(ranges)) {	2834 if (!CharacterRange::IsCanonical(ranges)) {

2837 CharacterRange::Canonicalize(ranges);	2835 CharacterRange::Canonicalize(ranges);

2838 }	2836 }

2839 // Now they are in order so we only need to look at the first.	2837 // Now they are in order so we only need to look at the first.

2840 int range_count = ranges->length();	2838 int range_count = ranges->length();

2841 if (cc->is_negated()) {	2839 if (cc->is_negated()) {

2842 if (range_count != 0 &&	2840 if (range_count != 0 &&

2843 ranges->at(0).from() == 0 &&	2841 ranges->at(0).from() == 0 &&

2844 ranges->at(0).to() >= String::kMaxOneByteCharCode) {	2842 ranges->at(0).to() >= String::kMaxOneByteCharCode) {

(...skipping 404 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3249 bool first_element_checked,	3247 bool first_element_checked,

3250 int* checked_up_to) {	3248 int* checked_up_to) {

3251 Isolate* isolate = Isolate::Current();	3249 Isolate* isolate = Isolate::Current();

3252 RegExpMacroAssembler* assembler = compiler->macro_assembler();	3250 RegExpMacroAssembler* assembler = compiler->macro_assembler();

3253 bool ascii = compiler->ascii();	3251 bool ascii = compiler->ascii();

3254 Label* backtrack = trace->backtrack();	3252 Label* backtrack = trace->backtrack();

3255 QuickCheckDetails* quick_check = trace->quick_check_performed();	3253 QuickCheckDetails* quick_check = trace->quick_check_performed();

3256 int element_count = elms_->length();	3254 int element_count = elms_->length();

3257 for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) {	3255 for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) {

3258 TextElement elm = elms_->at(i);	3256 TextElement elm = elms_->at(i);

3259 int cp_offset = trace->cp_offset() + elm.cp_offset;	3257 int cp_offset = trace->cp_offset() + elm.cp_offset();

3260 if (elm.text_type == TextElement::ATOM) {	3258 if (elm.text_type() == TextElement::ATOM) {

3261 Vector<const uc16> quarks = elm.data.u_atom->data();	3259 Vector<const uc16> quarks = elm.atom()->data();

3262 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {	3260 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {

3263 if (first_element_checked && i == 0 && j == 0) continue;	3261 if (first_element_checked && i == 0 && j == 0) continue;

3264 if (DeterminedAlready(quick_check, elm.cp_offset + j)) continue;	3262 if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue;

3265 EmitCharacterFunction* emit_function = NULL;	3263 EmitCharacterFunction* emit_function = NULL;

3266 switch (pass) {	3264 switch (pass) {

3267 case NON_ASCII_MATCH:	3265 case NON_ASCII_MATCH:

3268 ASSERT(ascii);	3266 ASSERT(ascii);

3269 if (quarks[j] > String::kMaxOneByteCharCode) {	3267 if (quarks[j] > String::kMaxOneByteCharCode) {

3270 assembler->GoTo(backtrack);	3268 assembler->GoTo(backtrack);

3271 return;	3269 return;

3272 }	3270 }

3273 break;	3271 break;

3274 case NON_LETTER_CHARACTER_MATCH:	3272 case NON_LETTER_CHARACTER_MATCH:

(...skipping 13 matching lines...) Expand all Loading...
3288 compiler,	3286 compiler,

3289 quarks[j],	3287 quarks[j],

3290 backtrack,	3288 backtrack,

3291 cp_offset + j,	3289 cp_offset + j,

3292 *checked_up_to < cp_offset + j,	3290 *checked_up_to < cp_offset + j,

3293 preloaded);	3291 preloaded);

3294 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to);	3292 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to);

3295 }	3293 }

3296 }	3294 }

3297 } else {	3295 } else {

3298 ASSERT_EQ(elm.text_type, TextElement::CHAR_CLASS);	3296 ASSERT_EQ(TextElement::CHAR_CLASS, elm.text_type());

3299 if (pass == CHARACTER_CLASS_MATCH) {	3297 if (pass == CHARACTER_CLASS_MATCH) {

3300 if (first_element_checked && i == 0) continue;	3298 if (first_element_checked && i == 0) continue;

3301 if (DeterminedAlready(quick_check, elm.cp_offset)) continue;	3299 if (DeterminedAlready(quick_check, elm.cp_offset())) continue;

3302 RegExpCharacterClass* cc = elm.data.u_char_class;	3300 RegExpCharacterClass* cc = elm.char_class();

3303 EmitCharClass(assembler,	3301 EmitCharClass(assembler,

3304 cc,	3302 cc,

3305 ascii,	3303 ascii,

3306 backtrack,	3304 backtrack,

3307 cp_offset,	3305 cp_offset,

3308 *checked_up_to < cp_offset,	3306 *checked_up_to < cp_offset,

3309 preloaded,	3307 preloaded,

3310 zone());	3308 zone());

3311 UpdateBoundsCheck(cp_offset, checked_up_to);	3309 UpdateBoundsCheck(cp_offset, checked_up_to);

3312 }	3310 }

3313 }	3311 }

3314 }	3312 }

3315 }	3313 }

3316	3314

3317	3315

3318 int TextNode::Length() {	3316 int TextNode::Length() {

3319 TextElement elm = elms_->last();	3317 TextElement elm = elms_->last();

3320 ASSERT(elm.cp_offset >= 0);	3318 ASSERT(elm.cp_offset() >= 0);

3321 if (elm.text_type == TextElement::ATOM) {	3319 return elm.cp_offset() + elm.length();

3322 return elm.cp_offset + elm.data.u_atom->data().length();

3323 } else {

3324 return elm.cp_offset + 1;

3325 }

3326 }	3320 }

3327	3321

3328	3322

3329 bool TextNode::SkipPass(int int_pass, bool ignore_case) {	3323 bool TextNode::SkipPass(int int_pass, bool ignore_case) {

3330 TextEmitPassType pass = static_cast<TextEmitPassType>(int_pass);	3324 TextEmitPassType pass = static_cast<TextEmitPassType>(int_pass);

3331 if (ignore_case) {	3325 if (ignore_case) {

3332 return pass == SIMPLE_CHARACTER_MATCH;	3326 return pass == SIMPLE_CHARACTER_MATCH;

3333 } else {	3327 } else {

3334 return pass == NON_LETTER_CHARACTER_MATCH \|\| pass == CASE_CHARACTER_MATCH;	3328 return pass == NON_LETTER_CHARACTER_MATCH \|\| pass == CASE_CHARACTER_MATCH;

3335 }	3329 }

(...skipping 81 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3417 cp_offset_ = 0;	3411 cp_offset_ = 0;

3418 }	3412 }

3419 bound_checked_up_to_ = Max(0, bound_checked_up_to_ - by);	3413 bound_checked_up_to_ = Max(0, bound_checked_up_to_ - by);

3420 }	3414 }

3421	3415

3422	3416

3423 void TextNode::MakeCaseIndependent(bool is_ascii) {	3417 void TextNode::MakeCaseIndependent(bool is_ascii) {

3424 int element_count = elms_->length();	3418 int element_count = elms_->length();

3425 for (int i = 0; i < element_count; i++) {	3419 for (int i = 0; i < element_count; i++) {

3426 TextElement elm = elms_->at(i);	3420 TextElement elm = elms_->at(i);

3427 if (elm.text_type == TextElement::CHAR_CLASS) {	3421 if (elm.text_type() == TextElement::CHAR_CLASS) {

3428 RegExpCharacterClass* cc = elm.data.u_char_class;	3422 RegExpCharacterClass* cc = elm.char_class();

3429 // None of the standard character classes is different in the case	3423 // None of the standard character classes is different in the case

3430 // independent case and it slows us down if we don't know that.	3424 // independent case and it slows us down if we don't know that.

3431 if (cc->is_standard(zone())) continue;	3425 if (cc->is_standard(zone())) continue;

3432 ZoneList<CharacterRange>* ranges = cc->ranges(zone());	3426 ZoneList<CharacterRange>* ranges = cc->ranges(zone());

3433 int range_count = ranges->length();	3427 int range_count = ranges->length();

3434 for (int j = 0; j < range_count; j++) {	3428 for (int j = 0; j < range_count; j++) {

3435 ranges->at(j).AddCaseEquivalents(ranges, is_ascii, zone());	3429 ranges->at(j).AddCaseEquivalents(ranges, is_ascii, zone());

3436 }	3430 }

3437 }	3431 }

3438 }	3432 }

3439 }	3433 }

3440	3434

3441	3435

3442 int TextNode::GreedyLoopTextLength() {	3436 int TextNode::GreedyLoopTextLength() {

3443 TextElement elm = elms_->at(elms_->length() - 1);	3437 TextElement elm = elms_->at(elms_->length() - 1);

3444 if (elm.text_type == TextElement::CHAR_CLASS) {	3438 return elm.cp_offset() + elm.length();

3445 return elm.cp_offset + 1;

3446 } else {

3447 return elm.cp_offset + elm.data.u_atom->data().length();

3448 }

3449 }	3439 }

3450	3440

3451	3441

3452 RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode(	3442 RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode(

3453 RegExpCompiler* compiler) {	3443 RegExpCompiler* compiler) {

3454 if (elms_->length() != 1) return NULL;	3444 if (elms_->length() != 1) return NULL;

3455 TextElement elm = elms_->at(0);	3445 TextElement elm = elms_->at(0);

3456 if (elm.text_type != TextElement::CHAR_CLASS) return NULL;	3446 if (elm.text_type() != TextElement::CHAR_CLASS) return NULL;

3457 RegExpCharacterClass* node = elm.data.u_char_class;	3447 RegExpCharacterClass* node = elm.char_class();

3458 ZoneList<CharacterRange>* ranges = node->ranges(zone());	3448 ZoneList<CharacterRange>* ranges = node->ranges(zone());

3459 if (!CharacterRange::IsCanonical(ranges)) {	3449 if (!CharacterRange::IsCanonical(ranges)) {

3460 CharacterRange::Canonicalize(ranges);	3450 CharacterRange::Canonicalize(ranges);

3461 }	3451 }

3462 if (node->is_negated()) {	3452 if (node->is_negated()) {

3463 return ranges->length() == 0 ? on_success() : NULL;	3453 return ranges->length() == 0 ? on_success() : NULL;

3464 }	3454 }

3465 if (ranges->length() != 1) return NULL;	3455 if (ranges->length() != 1) return NULL;

3466 uint32_t max_char;	3456 uint32_t max_char;

3467 if (compiler->ascii()) {	3457 if (compiler->ascii()) {

(...skipping 1053 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4521 }	4511 }

4522 }	4512 }

4523	4513

4524	4514

4525 void DotPrinter::VisitText(TextNode* that) {	4515 void DotPrinter::VisitText(TextNode* that) {

4526 Zone* zone = that->zone();	4516 Zone* zone = that->zone();

4527 stream()->Add(" n%p [label=\"", that);	4517 stream()->Add(" n%p [label=\"", that);

4528 for (int i = 0; i < that->elements()->length(); i++) {	4518 for (int i = 0; i < that->elements()->length(); i++) {

4529 if (i > 0) stream()->Add(" ");	4519 if (i > 0) stream()->Add(" ");

4530 TextElement elm = that->elements()->at(i);	4520 TextElement elm = that->elements()->at(i);

4531 switch (elm.text_type) {	4521 switch (elm.text_type()) {

4532 case TextElement::ATOM: {	4522 case TextElement::ATOM: {

4533 stream()->Add("'%w'", elm.data.u_atom->data());	4523 stream()->Add("'%w'", elm.atom()->data());

4534 break;	4524 break;

4535 }	4525 }

4536 case TextElement::CHAR_CLASS: {	4526 case TextElement::CHAR_CLASS: {

4537 RegExpCharacterClass* node = elm.data.u_char_class;	4527 RegExpCharacterClass* node = elm.char_class();

4538 stream()->Add("[");	4528 stream()->Add("[");

4539 if (node->is_negated())	4529 if (node->is_negated())

4540 stream()->Add("^");	4530 stream()->Add("^");

4541 for (int j = 0; j < node->ranges(zone)->length(); j++) {	4531 for (int j = 0; j < node->ranges(zone)->length(); j++) {

4542 CharacterRange range = node->ranges(zone)->at(j);	4532 CharacterRange range = node->ranges(zone)->at(j);

4543 stream()->Add("%k-%k", range.from(), range.to());	4533 stream()->Add("%k-%k", range.from(), range.to());

4544 }	4534 }

4545 stream()->Add("]");	4535 stream()->Add("]");

4546 break;	4536 break;

4547 }	4537 }

(...skipping 1161 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5709 }	5699 }

5710	5700

5711	5701

5712 void TextNode::CalculateOffsets() {	5702 void TextNode::CalculateOffsets() {

5713 int element_count = elements()->length();	5703 int element_count = elements()->length();

5714 // Set up the offsets of the elements relative to the start. This is a fixed	5704 // Set up the offsets of the elements relative to the start. This is a fixed

5715 // quantity since a TextNode can only contain fixed-width things.	5705 // quantity since a TextNode can only contain fixed-width things.

5716 int cp_offset = 0;	5706 int cp_offset = 0;

5717 for (int i = 0; i < element_count; i++) {	5707 for (int i = 0; i < element_count; i++) {

5718 TextElement& elm = elements()->at(i);	5708 TextElement& elm = elements()->at(i);

5719 elm.cp_offset = cp_offset;	5709 elm.set_cp_offset(cp_offset);

5720 if (elm.text_type == TextElement::ATOM) {	5710 cp_offset += elm.length();

5721 cp_offset += elm.data.u_atom->data().length();

5722 } else {

5723 cp_offset++;

5724 }

5725 }	5711 }

5726 }	5712 }

5727	5713

5728	5714

5729 void Analysis::VisitText(TextNode* that) {	5715 void Analysis::VisitText(TextNode* that) {

5730 if (ignore_case_) {	5716 if (ignore_case_) {

5731 that->MakeCaseIndependent(is_ascii_);	5717 that->MakeCaseIndependent(is_ascii_);

5732 }	5718 }

5733 EnsureAnalyzed(that->on_success());	5719 EnsureAnalyzed(that->on_success());

5734 if (!has_failed()) {	5720 if (!has_failed()) {

(...skipping 95 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5830 bool not_at_start) {	5816 bool not_at_start) {

5831 if (initial_offset >= bm->length()) return;	5817 if (initial_offset >= bm->length()) return;

5832 int offset = initial_offset;	5818 int offset = initial_offset;

5833 int max_char = bm->max_char();	5819 int max_char = bm->max_char();

5834 for (int i = 0; i < elements()->length(); i++) {	5820 for (int i = 0; i < elements()->length(); i++) {

5835 if (offset >= bm->length()) {	5821 if (offset >= bm->length()) {

5836 if (initial_offset == 0) set_bm_info(not_at_start, bm);	5822 if (initial_offset == 0) set_bm_info(not_at_start, bm);

5837 return;	5823 return;

5838 }	5824 }

5839 TextElement text = elements()->at(i);	5825 TextElement text = elements()->at(i);

5840 if (text.text_type == TextElement::ATOM) {	5826 if (text.text_type() == TextElement::ATOM) {

5841 RegExpAtom* atom = text.data.u_atom;	5827 RegExpAtom* atom = text.atom();

5842 for (int j = 0; j < atom->length(); j++, offset++) {	5828 for (int j = 0; j < atom->length(); j++, offset++) {

5843 if (offset >= bm->length()) {	5829 if (offset >= bm->length()) {

5844 if (initial_offset == 0) set_bm_info(not_at_start, bm);	5830 if (initial_offset == 0) set_bm_info(not_at_start, bm);

5845 return;	5831 return;

5846 }	5832 }

5847 uc16 character = atom->data()[j];	5833 uc16 character = atom->data()[j];

5848 if (bm->compiler()->ignore_case()) {	5834 if (bm->compiler()->ignore_case()) {

5849 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];	5835 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];

5850 int length = GetCaseIndependentLetters(	5836 int length = GetCaseIndependentLetters(

5851 ISOLATE,	5837 ISOLATE,

5852 character,	5838 character,

5853 bm->max_char() == String::kMaxOneByteCharCode,	5839 bm->max_char() == String::kMaxOneByteCharCode,

5854 chars);	5840 chars);

5855 for (int j = 0; j < length; j++) {	5841 for (int j = 0; j < length; j++) {

5856 bm->Set(offset, chars[j]);	5842 bm->Set(offset, chars[j]);

5857 }	5843 }

5858 } else {	5844 } else {

5859 if (character <= max_char) bm->Set(offset, character);	5845 if (character <= max_char) bm->Set(offset, character);

5860 }	5846 }

5861 }	5847 }

5862 } else {	5848 } else {

5863 ASSERT(text.text_type == TextElement::CHAR_CLASS);	5849 ASSERT_EQ(TextElement::CHAR_CLASS, text.text_type());

5864 RegExpCharacterClass* char_class = text.data.u_char_class;	5850 RegExpCharacterClass* char_class = text.char_class();

5865 ZoneList<CharacterRange>* ranges = char_class->ranges(zone());	5851 ZoneList<CharacterRange>* ranges = char_class->ranges(zone());

5866 if (char_class->is_negated()) {	5852 if (char_class->is_negated()) {

5867 bm->SetAll(offset);	5853 bm->SetAll(offset);

5868 } else {	5854 } else {

5869 for (int k = 0; k < ranges->length(); k++) {	5855 for (int k = 0; k < ranges->length(); k++) {

5870 CharacterRange& range = ranges->at(k);	5856 CharacterRange& range = ranges->at(k);

5871 if (range.from() > max_char) continue;	5857 if (range.from() > max_char) continue;

5872 int to = Min(max_char, static_cast<int>(range.to()));	5858 int to = Min(max_char, static_cast<int>(range.to()));

5873 bm->SetInterval(offset, Interval(range.from(), to));	5859 bm->SetInterval(offset, Interval(range.from(), to));

5874 }	5860 }

(...skipping 91 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5966 last = range.to() + 1;	5952 last = range.to() + 1;

5967 }	5953 }

5968 }	5954 }

5969 }	5955 }

5970 AddRange(CharacterRange(last, String::kMaxUtf16CodeUnit));	5956 AddRange(CharacterRange(last, String::kMaxUtf16CodeUnit));

5971 }	5957 }

5972	5958

5973	5959

5974 void DispatchTableConstructor::VisitText(TextNode* that) {	5960 void DispatchTableConstructor::VisitText(TextNode* that) {

5975 TextElement elm = that->elements()->at(0);	5961 TextElement elm = that->elements()->at(0);

5976 switch (elm.text_type) {	5962 switch (elm.text_type()) {

5977 case TextElement::ATOM: {	5963 case TextElement::ATOM: {

5978 uc16 c = elm.data.u_atom->data()[0];	5964 uc16 c = elm.atom()->data()[0];

5979 AddRange(CharacterRange(c, c));	5965 AddRange(CharacterRange(c, c));

5980 break;	5966 break;

5981 }	5967 }

5982 case TextElement::CHAR_CLASS: {	5968 case TextElement::CHAR_CLASS: {

5983 RegExpCharacterClass* tree = elm.data.u_char_class;	5969 RegExpCharacterClass* tree = elm.char_class();

5984 ZoneList<CharacterRange>* ranges = tree->ranges(that->zone());	5970 ZoneList<CharacterRange>* ranges = tree->ranges(that->zone());

5985 if (tree->is_negated()) {	5971 if (tree->is_negated()) {

5986 AddInverse(ranges);	5972 AddInverse(ranges);

5987 } else {	5973 } else {

5988 for (int i = 0; i < ranges->length(); i++)	5974 for (int i = 0; i < ranges->length(); i++)

5989 AddRange(ranges->at(i));	5975 AddRange(ranges->at(i));

5990 }	5976 }

5991 break;	5977 break;

5992 }	5978 }

5993 default: {	5979 default: {

(...skipping 131 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
6125 }	6111 }

6126	6112

6127 return compiler.Assemble(&macro_assembler,	6113 return compiler.Assemble(&macro_assembler,

6128 node,	6114 node,

6129 data->capture_count,	6115 data->capture_count,

6130 pattern);	6116 pattern);

6131 }	6117 }

6132	6118

6133	6119

6134 }} // namespace v8::internal	6120 }} // namespace v8::internal

OLD	NEW

« no previous file with comments | « src/jsregexp.h ('k') | no next file » | no next file with comments »