| OLD | NEW |
| 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 242 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 253 bool in_cache = !cached.is_null(); | 253 bool in_cache = !cached.is_null(); |
| 254 LOG(RegExpCompileEvent(re, in_cache)); | 254 LOG(RegExpCompileEvent(re, in_cache)); |
| 255 | 255 |
| 256 Handle<Object> result; | 256 Handle<Object> result; |
| 257 if (in_cache) { | 257 if (in_cache) { |
| 258 re->set_data(*cached); | 258 re->set_data(*cached); |
| 259 result = re; | 259 result = re; |
| 260 } else { | 260 } else { |
| 261 FlattenString(pattern); | 261 FlattenString(pattern); |
| 262 ZoneScope zone_scope(DELETE_ON_EXIT); | 262 ZoneScope zone_scope(DELETE_ON_EXIT); |
| 263 RegExpParseResult parse_result; | 263 RegExpCompileData parse_result; |
| 264 FlatStringReader reader(pattern); | 264 FlatStringReader reader(pattern); |
| 265 if (!ParseRegExp(&reader, flags.is_multiline(), &parse_result)) { | 265 if (!ParseRegExp(&reader, flags.is_multiline(), &parse_result)) { |
| 266 // Throw an exception if we fail to parse the pattern. | 266 // Throw an exception if we fail to parse the pattern. |
| 267 ThrowRegExpException(re, | 267 ThrowRegExpException(re, |
| 268 pattern, | 268 pattern, |
| 269 parse_result.error, | 269 parse_result.error, |
| 270 "malformed_regexp"); | 270 "malformed_regexp"); |
| 271 return Handle<Object>::null(); | 271 return Handle<Object>::null(); |
| 272 } | 272 } |
| 273 RegExpAtom* atom = parse_result.tree->AsAtom(); | 273 |
| 274 if (atom != NULL && !flags.is_ignore_case()) { | 274 if (parse_result.simple && !flags.is_ignore_case()) { |
| 275 if (parse_result.has_character_escapes) { | 275 // Parse-tree is a single atom that is equal to the pattern. |
| 276 Vector<const uc16> atom_pattern = atom->data(); | 276 result = AtomCompile(re, pattern, flags, pattern); |
| 277 Handle<String> atom_string = | 277 } else if (parse_result.tree->IsAtom() && |
| 278 Factory::NewStringFromTwoByte(atom_pattern); | 278 !flags.is_ignore_case() && |
| 279 result = AtomCompile(re, pattern, flags, atom_string); | 279 parse_result.capture_count == 0) { |
| 280 } else { | 280 // TODO(lrn) Accept capture_count > 0 on atoms. |
| 281 result = AtomCompile(re, pattern, flags, pattern); | 281 RegExpAtom* atom = parse_result.tree->AsAtom(); |
| 282 } | 282 Vector<const uc16> atom_pattern = atom->data(); |
| 283 Handle<String> atom_string = |
| 284 Factory::NewStringFromTwoByte(atom_pattern); |
| 285 result = AtomCompile(re, pattern, flags, atom_string); |
| 286 } else if (FLAG_irregexp) { |
| 287 result = IrregexpPrepare(re, pattern, flags); |
| 283 } else { | 288 } else { |
| 284 if (FLAG_irregexp) { | 289 result = JscrePrepare(re, pattern, flags); |
| 285 result = IrregexpPrepare(re, pattern, flags); | |
| 286 } else { | |
| 287 result = JscrePrepare(re, pattern, flags); | |
| 288 } | |
| 289 } | 290 } |
| 290 Object* data = re->data(); | 291 Object* data = re->data(); |
| 291 if (data->IsFixedArray()) { | 292 if (data->IsFixedArray()) { |
| 292 // If compilation succeeded then the data is set on the regexp | 293 // If compilation succeeded then the data is set on the regexp |
| 293 // and we can store it in the cache. | 294 // and we can store it in the cache. |
| 294 Handle<FixedArray> data(FixedArray::cast(re->data())); | 295 Handle<FixedArray> data(FixedArray::cast(re->data())); |
| 295 CompilationCache::PutRegExp(pattern, flags, data); | 296 CompilationCache::PutRegExp(pattern, flags, data); |
| 296 } | 297 } |
| 297 } | 298 } |
| 298 | 299 |
| 299 return result; | 300 return result; |
| 300 } | 301 } |
| 301 | 302 |
| 302 | 303 |
| 303 Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp, | 304 Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp, |
| 304 Handle<String> subject, | 305 Handle<String> subject, |
| 305 Handle<Object> index) { | 306 Handle<Object> index) { |
| 306 switch (regexp->TypeTag()) { | 307 switch (regexp->TypeTag()) { |
| 307 case JSRegExp::ATOM: | 308 case JSRegExp::ATOM: |
| 308 return AtomExec(regexp, subject, index); | 309 return AtomExec(regexp, subject, index); |
| 309 case JSRegExp::IRREGEXP: { | 310 case JSRegExp::IRREGEXP: { |
| 310 Handle<Object> result = IrregexpExec(regexp, subject, index); | 311 Handle<Object> result = IrregexpExec(regexp, subject, index); |
| 311 if (!result.is_null()) { | 312 if (!result.is_null() || Top::has_pending_exception()) { |
| 312 return result; | 313 return result; |
| 313 } | 314 } |
| 314 // We couldn't handle the regexp using Irregexp, so fall back | 315 // We couldn't handle the regexp using Irregexp, so fall back |
| 315 // on JSCRE. | 316 // on JSCRE. |
| 316 // Reset the JSRegExp to use JSCRE. | 317 // Reset the JSRegExp to use JSCRE. |
| 317 JscrePrepare(regexp, | 318 JscrePrepare(regexp, |
| 318 Handle<String>(regexp->Pattern()), | 319 Handle<String>(regexp->Pattern()), |
| 319 regexp->GetFlags()); | 320 regexp->GetFlags()); |
| 320 // Fall-through to JSCRE. | 321 // Fall-through to JSCRE. |
| 321 } | 322 } |
| 322 case JSRegExp::JSCRE: | 323 case JSRegExp::JSCRE: |
| 323 if (FLAG_disable_jscre) { | 324 if (FLAG_disable_jscre) { |
| 324 UNIMPLEMENTED(); | 325 UNIMPLEMENTED(); |
| 325 } | 326 } |
| 326 return JscreExec(regexp, subject, index); | 327 return JscreExec(regexp, subject, index); |
| 327 default: | 328 default: |
| 328 UNREACHABLE(); | 329 UNREACHABLE(); |
| 329 return Handle<Object>::null(); | 330 return Handle<Object>::null(); |
| 330 } | 331 } |
| 331 } | 332 } |
| 332 | 333 |
| 333 | 334 |
| 334 Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp, | 335 Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp, |
| 335 Handle<String> subject) { | 336 Handle<String> subject) { |
| 336 switch (regexp->TypeTag()) { | 337 switch (regexp->TypeTag()) { |
| 337 case JSRegExp::ATOM: | 338 case JSRegExp::ATOM: |
| 338 return AtomExecGlobal(regexp, subject); | 339 return AtomExecGlobal(regexp, subject); |
| 339 case JSRegExp::IRREGEXP: { | 340 case JSRegExp::IRREGEXP: { |
| 340 Handle<Object> result = IrregexpExecGlobal(regexp, subject); | 341 Handle<Object> result = IrregexpExecGlobal(regexp, subject); |
| 341 if (!result.is_null()) { | 342 if (!result.is_null() || Top::has_pending_exception()) { |
| 342 return result; | 343 return result; |
| 343 } | 344 } |
| 344 // We couldn't handle the regexp using Irregexp, so fall back | 345 // Empty handle as result but no exception thrown means that |
| 345 // on JSCRE. | 346 // the regexp contains features not yet handled by the irregexp |
| 346 // Reset the JSRegExp to use JSCRE. | 347 // compiler. |
| 348 // We have to fall back on JSCRE. Reset the JSRegExp to use JSCRE. |
| 347 JscrePrepare(regexp, | 349 JscrePrepare(regexp, |
| 348 Handle<String>(regexp->Pattern()), | 350 Handle<String>(regexp->Pattern()), |
| 349 regexp->GetFlags()); | 351 regexp->GetFlags()); |
| 350 // Fall-through to JSCRE. | 352 // Fall-through to JSCRE. |
| 351 } | 353 } |
| 352 case JSRegExp::JSCRE: | 354 case JSRegExp::JSCRE: |
| 353 if (FLAG_disable_jscre) { | 355 if (FLAG_disable_jscre) { |
| 354 UNIMPLEMENTED(); | 356 UNIMPLEMENTED(); |
| 355 } | 357 } |
| 356 return JscreExecGlobal(regexp, subject); | 358 return JscreExecGlobal(regexp, subject); |
| (...skipping 318 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 675 } else { | 677 } else { |
| 676 // Exited loop with the exception in matches. | 678 // Exited loop with the exception in matches. |
| 677 return matches; | 679 return matches; |
| 678 } | 680 } |
| 679 } | 681 } |
| 680 | 682 |
| 681 | 683 |
| 682 // Irregexp implementation. | 684 // Irregexp implementation. |
| 683 | 685 |
| 684 | 686 |
| 687 // Retrieves a compiled version of the regexp for either ASCII or non-ASCII |
| 688 // strings. If the compiled version doesn't already exist, it is compiled |
| 689 // from the source pattern. |
| 690 // Irregexp is not feature complete yet. If there is something in the |
| 691 // regexp that the compiler cannot currently handle, an empty |
| 692 // handle is returned, but no exception is thrown. |
| 685 static Handle<FixedArray> GetCompiledIrregexp(Handle<JSRegExp> re, | 693 static Handle<FixedArray> GetCompiledIrregexp(Handle<JSRegExp> re, |
| 686 bool is_ascii) { | 694 bool is_ascii) { |
| 687 ASSERT(re->DataAt(JSRegExp::kIrregexpDataIndex)->IsFixedArray()); | 695 ASSERT(re->DataAt(JSRegExp::kIrregexpDataIndex)->IsFixedArray()); |
| 688 Handle<FixedArray> alternatives( | 696 Handle<FixedArray> alternatives( |
| 689 FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex))); | 697 FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex))); |
| 690 ASSERT_EQ(2, alternatives->length()); | 698 ASSERT_EQ(2, alternatives->length()); |
| 691 | 699 |
| 692 int index = is_ascii ? 0 : 1; | 700 int index = is_ascii ? 0 : 1; |
| 693 Object* entry = alternatives->get(index); | 701 Object* entry = alternatives->get(index); |
| 694 if (!entry->IsNull()) { | 702 if (!entry->IsNull()) { |
| 695 return Handle<FixedArray>(FixedArray::cast(entry)); | 703 return Handle<FixedArray>(FixedArray::cast(entry)); |
| 696 } | 704 } |
| 697 | 705 |
| 698 // Compile the RegExp. | 706 // Compile the RegExp. |
| 699 ZoneScope zone_scope(DELETE_ON_EXIT); | 707 ZoneScope zone_scope(DELETE_ON_EXIT); |
| 700 | 708 |
| 701 JSRegExp::Flags flags = re->GetFlags(); | 709 JSRegExp::Flags flags = re->GetFlags(); |
| 702 | 710 |
| 703 Handle<String> pattern(re->Pattern()); | 711 Handle<String> pattern(re->Pattern()); |
| 704 StringShape shape(*pattern); | 712 StringShape shape(*pattern); |
| 705 if (!pattern->IsFlat(shape)) { | 713 if (!pattern->IsFlat(shape)) { |
| 706 pattern->Flatten(shape); | 714 pattern->Flatten(shape); |
| 707 } | 715 } |
| 708 | 716 |
| 709 RegExpParseResult parse_result; | 717 RegExpCompileData compile_data; |
| 710 FlatStringReader reader(pattern); | 718 FlatStringReader reader(pattern); |
| 711 if (!ParseRegExp(&reader, flags.is_multiline(), &parse_result)) { | 719 if (!ParseRegExp(&reader, flags.is_multiline(), &compile_data)) { |
| 712 // Throw an exception if we fail to parse the pattern. | 720 // Throw an exception if we fail to parse the pattern. |
| 713 // THIS SHOULD NOT HAPPEN. We already parsed it successfully once. | 721 // THIS SHOULD NOT HAPPEN. We already parsed it successfully once. |
| 714 ThrowRegExpException(re, | 722 ThrowRegExpException(re, |
| 715 pattern, | 723 pattern, |
| 716 parse_result.error, | 724 compile_data.error, |
| 717 "malformed_regexp"); | 725 "malformed_regexp"); |
| 718 return Handle<FixedArray>::null(); | 726 return Handle<FixedArray>::null(); |
| 719 } | 727 } |
| 720 Handle<FixedArray> compiled_entry = | 728 Handle<FixedArray> compiled_entry = |
| 721 RegExpEngine::Compile(&parse_result, | 729 RegExpEngine::Compile(&compile_data, |
| 722 NULL, | |
| 723 flags.is_ignore_case(), | 730 flags.is_ignore_case(), |
| 724 flags.is_multiline(), | 731 flags.is_multiline(), |
| 725 pattern, | 732 pattern, |
| 726 is_ascii); | 733 is_ascii); |
| 727 if (!compiled_entry.is_null()) { | 734 if (!compiled_entry.is_null()) { |
| 728 alternatives->set(index, *compiled_entry); | 735 alternatives->set(index, *compiled_entry); |
| 729 } | 736 } |
| 730 return compiled_entry; | 737 return compiled_entry; |
| 731 } | 738 } |
| 732 | 739 |
| (...skipping 172 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 905 start_offset += slice->start(); | 912 start_offset += slice->start(); |
| 906 end_offset += slice->start(); | 913 end_offset += slice->start(); |
| 907 subject = Handle<String>(slice->buffer()); | 914 subject = Handle<String>(slice->buffer()); |
| 908 } | 915 } |
| 909 | 916 |
| 910 // String is now either Sequential or External | 917 // String is now either Sequential or External |
| 911 StringShape flatshape(*subject); | 918 StringShape flatshape(*subject); |
| 912 bool is_ascii = flatshape.IsAsciiRepresentation(); | 919 bool is_ascii = flatshape.IsAsciiRepresentation(); |
| 913 int char_size_shift = is_ascii ? 0 : 1; | 920 int char_size_shift = is_ascii ? 0 : 1; |
| 914 | 921 |
| 922 RegExpMacroAssemblerIA32::Result res; |
| 923 |
| 915 if (flatshape.IsExternal()) { | 924 if (flatshape.IsExternal()) { |
| 916 const byte* address; | 925 const byte* address; |
| 917 if (is_ascii) { | 926 if (is_ascii) { |
| 918 ExternalAsciiString* ext = ExternalAsciiString::cast(*subject); | 927 ExternalAsciiString* ext = ExternalAsciiString::cast(*subject); |
| 919 address = reinterpret_cast<const byte*>(ext->resource()->data()); | 928 address = reinterpret_cast<const byte*>(ext->resource()->data()); |
| 920 } else { | 929 } else { |
| 921 ExternalTwoByteString* ext = ExternalTwoByteString::cast(*subject); | 930 ExternalTwoByteString* ext = ExternalTwoByteString::cast(*subject); |
| 922 address = reinterpret_cast<const byte*>(ext->resource()->data()); | 931 address = reinterpret_cast<const byte*>(ext->resource()->data()); |
| 923 } | 932 } |
| 924 rc = RegExpMacroAssemblerIA32::Execute( | 933 res = RegExpMacroAssemblerIA32::Execute( |
| 925 *code, | 934 *code, |
| 926 &address, | 935 &address, |
| 927 start_offset << char_size_shift, | 936 start_offset << char_size_shift, |
| 928 end_offset << char_size_shift, | 937 end_offset << char_size_shift, |
| 929 offsets_vector, | 938 offsets_vector, |
| 930 previous_index == 0); | 939 previous_index == 0); |
| 931 } else { // Sequential string | 940 } else { // Sequential string |
| 932 Address char_address = | 941 Address char_address = |
| 933 is_ascii ? SeqAsciiString::cast(*subject)->GetCharsAddress() | 942 is_ascii ? SeqAsciiString::cast(*subject)->GetCharsAddress() |
| 934 : SeqTwoByteString::cast(*subject)->GetCharsAddress(); | 943 : SeqTwoByteString::cast(*subject)->GetCharsAddress(); |
| 935 int byte_offset = char_address - reinterpret_cast<Address>(*subject); | 944 int byte_offset = char_address - reinterpret_cast<Address>(*subject); |
| 936 rc = RegExpMacroAssemblerIA32::Execute( | 945 res = RegExpMacroAssemblerIA32::Execute( |
| 937 *code, | 946 *code, |
| 938 subject.location(), | 947 subject.location(), |
| 939 byte_offset + (start_offset << char_size_shift), | 948 byte_offset + (start_offset << char_size_shift), |
| 940 byte_offset + (end_offset << char_size_shift), | 949 byte_offset + (end_offset << char_size_shift), |
| 941 offsets_vector, | 950 offsets_vector, |
| 942 previous_index == 0); | 951 previous_index == 0); |
| 943 } | 952 } |
| 944 | 953 |
| 954 if (res == RegExpMacroAssemblerIA32::EXCEPTION) { |
| 955 ASSERT(Top::has_pending_exception()); |
| 956 return Handle<Object>::null(); |
| 957 } |
| 958 rc = (res == RegExpMacroAssemblerIA32::SUCCESS); |
| 959 |
| 945 if (rc) { | 960 if (rc) { |
| 946 // Capture values are relative to start_offset only. | 961 // Capture values are relative to start_offset only. |
| 947 for (int i = 0; i < offsets_vector_length; i++) { | 962 for (int i = 0; i < offsets_vector_length; i++) { |
| 948 if (offsets_vector[i] >= 0) { | 963 if (offsets_vector[i] >= 0) { |
| 949 offsets_vector[i] += previous_index; | 964 offsets_vector[i] += previous_index; |
| 950 } | 965 } |
| 951 } | 966 } |
| 952 } | 967 } |
| 953 break; | 968 break; |
| 954 #else | 969 #else |
| (...skipping 1641 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2596 | 2611 |
| 2597 | 2612 |
| 2598 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler, | 2613 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler, |
| 2599 RegExpNode* on_success) { | 2614 RegExpNode* on_success) { |
| 2600 return new TextNode(elements(), on_success); | 2615 return new TextNode(elements(), on_success); |
| 2601 } | 2616 } |
| 2602 | 2617 |
| 2603 | 2618 |
| 2604 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, | 2619 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, |
| 2605 RegExpNode* on_success) { | 2620 RegExpNode* on_success) { |
| 2606 ZoneList<TextElement>* elms = new ZoneList<TextElement>(1); | 2621 return new TextNode(this, on_success); |
| 2607 elms->Add(TextElement::CharClass(this)); | |
| 2608 return new TextNode(elms, on_success); | |
| 2609 } | 2622 } |
| 2610 | 2623 |
| 2611 | 2624 |
| 2612 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler, | 2625 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler, |
| 2613 RegExpNode* on_success) { | 2626 RegExpNode* on_success) { |
| 2614 ZoneList<RegExpTree*>* alternatives = this->alternatives(); | 2627 ZoneList<RegExpTree*>* alternatives = this->alternatives(); |
| 2615 int length = alternatives->length(); | 2628 int length = alternatives->length(); |
| 2616 ChoiceNode* result = new ChoiceNode(length); | 2629 ChoiceNode* result = new ChoiceNode(length); |
| 2617 for (int i = 0; i < length; i++) { | 2630 for (int i = 0; i < length; i++) { |
| 2618 GuardedAlternative alternative(alternatives->at(i)->ToNode(compiler, | 2631 GuardedAlternative alternative(alternatives->at(i)->ToNode(compiler, |
| (...skipping 639 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3258 return entry->out_set(); | 3271 return entry->out_set(); |
| 3259 else | 3272 else |
| 3260 return empty(); | 3273 return empty(); |
| 3261 } | 3274 } |
| 3262 | 3275 |
| 3263 | 3276 |
| 3264 // ------------------------------------------------------------------- | 3277 // ------------------------------------------------------------------- |
| 3265 // Analysis | 3278 // Analysis |
| 3266 | 3279 |
| 3267 | 3280 |
| 3268 void Analysis::EnsureAnalyzed(RegExpNode* that) { | 3281 void AssertionPropagation::EnsureAnalyzed(RegExpNode* that) { |
| 3269 if (that->info()->been_analyzed || that->info()->being_analyzed) | 3282 if (that->info()->been_analyzed || that->info()->being_analyzed) |
| 3270 return; | 3283 return; |
| 3271 that->info()->being_analyzed = true; | 3284 that->info()->being_analyzed = true; |
| 3272 that->Accept(this); | 3285 that->Accept(this); |
| 3273 that->info()->being_analyzed = false; | 3286 that->info()->being_analyzed = false; |
| 3274 that->info()->been_analyzed = true; | 3287 that->info()->been_analyzed = true; |
| 3275 } | 3288 } |
| 3276 | 3289 |
| 3277 | 3290 |
| 3278 void Analysis::VisitEnd(EndNode* that) { | 3291 void AssertionPropagation::VisitEnd(EndNode* that) { |
| 3279 // nothing to do | 3292 // nothing to do |
| 3280 } | 3293 } |
| 3281 | 3294 |
| 3282 | 3295 |
| 3283 void TextNode::CalculateOffsets() { | 3296 void TextNode::CalculateOffsets() { |
| 3284 int element_count = elements()->length(); | 3297 int element_count = elements()->length(); |
| 3285 // Set up the offsets of the elements relative to the start. This is a fixed | 3298 // Set up the offsets of the elements relative to the start. This is a fixed |
| 3286 // quantity since a TextNode can only contain fixed-width things. | 3299 // quantity since a TextNode can only contain fixed-width things. |
| 3287 int cp_offset = 0; | 3300 int cp_offset = 0; |
| 3288 for (int i = 0; i < element_count; i++) { | 3301 for (int i = 0; i < element_count; i++) { |
| 3289 TextElement& elm = elements()->at(i); | 3302 TextElement& elm = elements()->at(i); |
| 3290 elm.cp_offset = cp_offset; | 3303 elm.cp_offset = cp_offset; |
| 3291 if (elm.type == TextElement::ATOM) { | 3304 if (elm.type == TextElement::ATOM) { |
| 3292 cp_offset += elm.data.u_atom->data().length(); | 3305 cp_offset += elm.data.u_atom->data().length(); |
| 3293 } else { | 3306 } else { |
| 3294 cp_offset++; | 3307 cp_offset++; |
| 3295 Vector<const uc16> quarks = elm.data.u_atom->data(); | 3308 Vector<const uc16> quarks = elm.data.u_atom->data(); |
| 3296 } | 3309 } |
| 3297 } | 3310 } |
| 3298 } | 3311 } |
| 3299 | 3312 |
| 3300 | 3313 |
| 3301 void Analysis::VisitText(TextNode* that) { | 3314 void AssertionPropagation::VisitText(TextNode* that) { |
| 3302 if (ignore_case_) { | 3315 if (ignore_case_) { |
| 3303 that->MakeCaseIndependent(); | 3316 that->MakeCaseIndependent(); |
| 3304 } | 3317 } |
| 3305 EnsureAnalyzed(that->on_success()); | 3318 EnsureAnalyzed(that->on_success()); |
| 3306 NodeInfo* info = that->info(); | 3319 NodeInfo* info = that->info(); |
| 3307 NodeInfo* next_info = that->on_success()->info(); | 3320 NodeInfo* next_info = that->on_success()->info(); |
| 3308 // If the following node is interested in what it follows then this | 3321 // If the following node is interested in what it follows then this |
| 3309 // node must determine it. | 3322 // node must determine it. |
| 3310 info->determine_newline = next_info->follows_newline_interest; | 3323 info->determine_newline = next_info->follows_newline_interest; |
| 3311 info->determine_word = next_info->follows_word_interest; | 3324 info->determine_word = next_info->follows_word_interest; |
| 3312 info->determine_start = next_info->follows_start_interest; | 3325 info->determine_start = next_info->follows_start_interest; |
| 3313 that->CalculateOffsets(); | 3326 that->CalculateOffsets(); |
| 3314 } | 3327 } |
| 3315 | 3328 |
| 3316 | 3329 |
| 3317 void Analysis::VisitAction(ActionNode* that) { | 3330 void AssertionPropagation::VisitAction(ActionNode* that) { |
| 3318 RegExpNode* target = that->on_success(); | 3331 RegExpNode* target = that->on_success(); |
| 3319 EnsureAnalyzed(target); | 3332 EnsureAnalyzed(target); |
| 3320 // If the next node is interested in what it follows then this node | 3333 // If the next node is interested in what it follows then this node |
| 3321 // has to be interested too so it can pass the information on. | 3334 // has to be interested too so it can pass the information on. |
| 3322 that->info()->AddFromFollowing(target->info()); | 3335 that->info()->AddFromFollowing(target->info()); |
| 3323 } | 3336 } |
| 3324 | 3337 |
| 3325 | 3338 |
| 3326 void Analysis::VisitChoice(ChoiceNode* that) { | 3339 void AssertionPropagation::VisitChoice(ChoiceNode* that) { |
| 3327 NodeInfo* info = that->info(); | 3340 NodeInfo* info = that->info(); |
| 3328 for (int i = 0; i < that->alternatives()->length(); i++) { | 3341 for (int i = 0; i < that->alternatives()->length(); i++) { |
| 3329 RegExpNode* node = that->alternatives()->at(i).node(); | 3342 RegExpNode* node = that->alternatives()->at(i).node(); |
| 3330 EnsureAnalyzed(node); | 3343 EnsureAnalyzed(node); |
| 3331 // Anything the following nodes need to know has to be known by | 3344 // Anything the following nodes need to know has to be known by |
| 3332 // this node also, so it can pass it on. | 3345 // this node also, so it can pass it on. |
| 3333 info->AddFromFollowing(node->info()); | 3346 info->AddFromFollowing(node->info()); |
| 3334 } | 3347 } |
| 3335 } | 3348 } |
| 3336 | 3349 |
| 3337 | 3350 |
| 3338 void Analysis::VisitBackReference(BackReferenceNode* that) { | 3351 void AssertionPropagation::VisitBackReference(BackReferenceNode* that) { |
| 3339 EnsureAnalyzed(that->on_success()); | 3352 EnsureAnalyzed(that->on_success()); |
| 3340 } | 3353 } |
| 3341 | 3354 |
| 3342 | 3355 |
| 3343 // ------------------------------------------------------------------- | 3356 // ------------------------------------------------------------------- |
| 3344 // Assumption expansion | 3357 // Assumption expansion |
| 3345 | 3358 |
| 3346 | 3359 |
| 3347 RegExpNode* RegExpNode::EnsureExpanded(NodeInfo* info) { | 3360 RegExpNode* RegExpNode::EnsureExpanded(NodeInfo* info) { |
| 3348 siblings_.Ensure(this); | 3361 siblings_.Ensure(this); |
| (...skipping 294 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3643 } | 3656 } |
| 3644 } | 3657 } |
| 3645 | 3658 |
| 3646 | 3659 |
| 3647 void DispatchTableConstructor::VisitAction(ActionNode* that) { | 3660 void DispatchTableConstructor::VisitAction(ActionNode* that) { |
| 3648 RegExpNode* target = that->on_success(); | 3661 RegExpNode* target = that->on_success(); |
| 3649 target->Accept(this); | 3662 target->Accept(this); |
| 3650 } | 3663 } |
| 3651 | 3664 |
| 3652 | 3665 |
| 3653 Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input, | 3666 #ifdef DEBUG |
| 3654 RegExpNode** node_return, | 3667 |
| 3668 |
| 3669 class VisitNodeScope { |
| 3670 public: |
| 3671 explicit VisitNodeScope(RegExpNode* node) : node_(node) { |
| 3672 ASSERT(!node->info()->visited); |
| 3673 node->info()->visited = true; |
| 3674 } |
| 3675 ~VisitNodeScope() { |
| 3676 node_->info()->visited = false; |
| 3677 } |
| 3678 private: |
| 3679 RegExpNode* node_; |
| 3680 }; |
| 3681 |
| 3682 |
| 3683 class NodeValidator : public NodeVisitor { |
| 3684 public: |
| 3685 virtual void ValidateInfo(NodeInfo* info) = 0; |
| 3686 #define DECLARE_VISIT(Type) \ |
| 3687 virtual void Visit##Type(Type##Node* that); |
| 3688 FOR_EACH_NODE_TYPE(DECLARE_VISIT) |
| 3689 #undef DECLARE_VISIT |
| 3690 }; |
| 3691 |
| 3692 |
| 3693 class PostAnalysisNodeValidator : public NodeValidator { |
| 3694 public: |
| 3695 virtual void ValidateInfo(NodeInfo* info); |
| 3696 }; |
| 3697 |
| 3698 |
| 3699 class PostExpansionNodeValidator : public NodeValidator { |
| 3700 public: |
| 3701 virtual void ValidateInfo(NodeInfo* info); |
| 3702 }; |
| 3703 |
| 3704 |
| 3705 void PostAnalysisNodeValidator::ValidateInfo(NodeInfo* info) { |
| 3706 ASSERT(info->been_analyzed); |
| 3707 } |
| 3708 |
| 3709 |
| 3710 void PostExpansionNodeValidator::ValidateInfo(NodeInfo* info) { |
| 3711 ASSERT_EQ(info->determine_newline, info->does_determine_newline); |
| 3712 ASSERT_EQ(info->determine_start, info->does_determine_start); |
| 3713 ASSERT_EQ(info->determine_word, info->does_determine_word); |
| 3714 ASSERT_EQ(info->follows_word_interest, |
| 3715 (info->follows_word != NodeInfo::UNKNOWN)); |
| 3716 if (false) { |
| 3717 // These are still unimplemented. |
| 3718 ASSERT_EQ(info->follows_start_interest, |
| 3719 (info->follows_start != NodeInfo::UNKNOWN)); |
| 3720 ASSERT_EQ(info->follows_newline_interest, |
| 3721 (info->follows_newline != NodeInfo::UNKNOWN)); |
| 3722 } |
| 3723 } |
| 3724 |
| 3725 |
| 3726 void NodeValidator::VisitAction(ActionNode* that) { |
| 3727 if (that->info()->visited) return; |
| 3728 VisitNodeScope scope(that); |
| 3729 ValidateInfo(that->info()); |
| 3730 that->on_success()->Accept(this); |
| 3731 } |
| 3732 |
| 3733 |
| 3734 void NodeValidator::VisitBackReference(BackReferenceNode* that) { |
| 3735 if (that->info()->visited) return; |
| 3736 VisitNodeScope scope(that); |
| 3737 ValidateInfo(that->info()); |
| 3738 that->on_success()->Accept(this); |
| 3739 } |
| 3740 |
| 3741 |
| 3742 void NodeValidator::VisitChoice(ChoiceNode* that) { |
| 3743 if (that->info()->visited) return; |
| 3744 VisitNodeScope scope(that); |
| 3745 ValidateInfo(that->info()); |
| 3746 ZoneList<GuardedAlternative>* alts = that->alternatives(); |
| 3747 for (int i = 0; i < alts->length(); i++) |
| 3748 alts->at(i).node()->Accept(this); |
| 3749 } |
| 3750 |
| 3751 |
| 3752 void NodeValidator::VisitEnd(EndNode* that) { |
| 3753 if (that->info()->visited) return; |
| 3754 VisitNodeScope scope(that); |
| 3755 ValidateInfo(that->info()); |
| 3756 } |
| 3757 |
| 3758 |
| 3759 void NodeValidator::VisitText(TextNode* that) { |
| 3760 if (that->info()->visited) return; |
| 3761 VisitNodeScope scope(that); |
| 3762 ValidateInfo(that->info()); |
| 3763 that->on_success()->Accept(this); |
| 3764 } |
| 3765 |
| 3766 |
| 3767 #endif |
| 3768 |
| 3769 |
| 3770 Handle<FixedArray> RegExpEngine::Compile(RegExpCompileData* data, |
| 3655 bool ignore_case, | 3771 bool ignore_case, |
| 3656 bool is_multiline, | 3772 bool is_multiline, |
| 3657 Handle<String> pattern, | 3773 Handle<String> pattern, |
| 3658 bool is_ascii) { | 3774 bool is_ascii) { |
| 3659 RegExpCompiler compiler(input->capture_count, ignore_case, is_ascii); | 3775 RegExpCompiler compiler(data->capture_count, ignore_case, is_ascii); |
| 3660 // Wrap the body of the regexp in capture #0. | 3776 // Wrap the body of the regexp in capture #0. |
| 3661 RegExpNode* captured_body = RegExpCapture::ToNode(input->tree, | 3777 RegExpNode* captured_body = RegExpCapture::ToNode(data->tree, |
| 3662 0, | 3778 0, |
| 3663 &compiler, | 3779 &compiler, |
| 3664 compiler.accept()); | 3780 compiler.accept()); |
| 3665 // Add a .*? at the beginning, outside the body capture. | 3781 // Add a .*? at the beginning, outside the body capture. |
| 3666 // Note: We could choose to not add this if the regexp is anchored at | 3782 // Note: We could choose to not add this if the regexp is anchored at |
| 3667 // the start of the input but I'm not sure how best to do that and | 3783 // the start of the input but I'm not sure how best to do that and |
| 3668 // since we don't even handle ^ yet I'm saving that optimization for | 3784 // since we don't even handle ^ yet I'm saving that optimization for |
| 3669 // later. | 3785 // later. |
| 3670 RegExpNode* node = RegExpQuantifier::ToNode(0, | 3786 RegExpNode* node = RegExpQuantifier::ToNode(0, |
| 3671 RegExpQuantifier::kInfinity, | 3787 RegExpQuantifier::kInfinity, |
| 3672 false, | 3788 false, |
| 3673 new RegExpCharacterClass('*'), | 3789 new RegExpCharacterClass('*'), |
| 3674 &compiler, | 3790 &compiler, |
| 3675 captured_body); | 3791 captured_body); |
| 3676 if (node_return != NULL) *node_return = node; | 3792 AssertionPropagation analysis(ignore_case); |
| 3677 Analysis analysis(ignore_case); | |
| 3678 analysis.EnsureAnalyzed(node); | 3793 analysis.EnsureAnalyzed(node); |
| 3679 | 3794 |
| 3680 NodeInfo info = *node->info(); | 3795 NodeInfo info = *node->info(); |
| 3796 data->has_lookbehind = info.HasLookbehind(); |
| 3797 if (data->has_lookbehind) { |
| 3798 // If this node needs information about the preceding text we let |
| 3799 // it start with a character class that consumes a single character |
| 3800 // and proceeds to wherever is appropriate. This means that if |
| 3801 // has_lookbehind is set the code generator must start one character |
| 3802 // before the start position. |
| 3803 node = new TextNode(new RegExpCharacterClass('*'), node); |
| 3804 analysis.EnsureAnalyzed(node); |
| 3805 } |
| 3806 |
| 3807 #ifdef DEBUG |
| 3808 PostAnalysisNodeValidator post_analysis_validator; |
| 3809 node->Accept(&post_analysis_validator); |
| 3810 #endif |
| 3811 |
| 3681 node = node->EnsureExpanded(&info); | 3812 node = node->EnsureExpanded(&info); |
| 3682 | 3813 |
| 3814 #ifdef DEBUG |
| 3815 PostExpansionNodeValidator post_expansion_validator; |
| 3816 node->Accept(&post_expansion_validator); |
| 3817 #endif |
| 3818 |
| 3819 data->node = node; |
| 3820 |
| 3683 if (is_multiline && !FLAG_attempt_multiline_irregexp) { | 3821 if (is_multiline && !FLAG_attempt_multiline_irregexp) { |
| 3684 return Handle<FixedArray>::null(); | 3822 return Handle<FixedArray>::null(); |
| 3685 } | 3823 } |
| 3686 | 3824 |
| 3825 if (data->has_lookbehind) { |
| 3826 return Handle<FixedArray>::null(); |
| 3827 } |
| 3828 |
| 3687 if (FLAG_irregexp_native) { | 3829 if (FLAG_irregexp_native) { |
| 3688 #ifdef ARM | 3830 #ifdef ARM |
| 3689 // Unimplemented, fall-through to bytecode implementation. | 3831 // Unimplemented, fall-through to bytecode implementation. |
| 3690 #else // IA32 | 3832 #else // IA32 |
| 3691 RegExpMacroAssemblerIA32::Mode mode; | 3833 RegExpMacroAssemblerIA32::Mode mode; |
| 3692 if (is_ascii) { | 3834 if (is_ascii) { |
| 3693 mode = RegExpMacroAssemblerIA32::ASCII; | 3835 mode = RegExpMacroAssemblerIA32::ASCII; |
| 3694 } else { | 3836 } else { |
| 3695 mode = RegExpMacroAssemblerIA32::UC16; | 3837 mode = RegExpMacroAssemblerIA32::UC16; |
| 3696 } | 3838 } |
| 3697 RegExpMacroAssemblerIA32 macro_assembler(mode, | 3839 RegExpMacroAssemblerIA32 macro_assembler(mode, |
| 3698 (input->capture_count + 1) * 2); | 3840 (data->capture_count + 1) * 2); |
| 3699 return compiler.Assemble(¯o_assembler, | 3841 return compiler.Assemble(¯o_assembler, |
| 3700 node, | 3842 node, |
| 3701 input->capture_count, | 3843 data->capture_count, |
| 3702 pattern); | 3844 pattern); |
| 3703 #endif | 3845 #endif |
| 3704 } | 3846 } |
| 3705 EmbeddedVector<byte, 1024> codes; | 3847 EmbeddedVector<byte, 1024> codes; |
| 3706 RegExpMacroAssemblerIrregexp macro_assembler(codes); | 3848 RegExpMacroAssemblerIrregexp macro_assembler(codes); |
| 3707 return compiler.Assemble(¯o_assembler, | 3849 return compiler.Assemble(¯o_assembler, |
| 3708 node, | 3850 node, |
| 3709 input->capture_count, | 3851 data->capture_count, |
| 3710 pattern); | 3852 pattern); |
| 3711 } | 3853 } |
| 3712 | 3854 |
| 3713 | 3855 |
| 3714 }} // namespace v8::internal | 3856 }} // namespace v8::internal |
| OLD | NEW |