Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1)

Side by Side Diff: src/jsregexp.cc

Issue 14886: Bring toiger up to date with bleeding edge 984. (Closed) Base URL: http://v8.googlecode.com/svn/branches/experimental/toiger/
Patch Set: Created 12 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/jsregexp.h ('k') | src/macro-assembler-arm.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 242 matching lines...) Expand 10 before | Expand all | Expand 10 after
253 bool in_cache = !cached.is_null(); 253 bool in_cache = !cached.is_null();
254 LOG(RegExpCompileEvent(re, in_cache)); 254 LOG(RegExpCompileEvent(re, in_cache));
255 255
256 Handle<Object> result; 256 Handle<Object> result;
257 if (in_cache) { 257 if (in_cache) {
258 re->set_data(*cached); 258 re->set_data(*cached);
259 result = re; 259 result = re;
260 } else { 260 } else {
261 FlattenString(pattern); 261 FlattenString(pattern);
262 ZoneScope zone_scope(DELETE_ON_EXIT); 262 ZoneScope zone_scope(DELETE_ON_EXIT);
263 RegExpParseResult parse_result; 263 RegExpCompileData parse_result;
264 FlatStringReader reader(pattern); 264 FlatStringReader reader(pattern);
265 if (!ParseRegExp(&reader, flags.is_multiline(), &parse_result)) { 265 if (!ParseRegExp(&reader, flags.is_multiline(), &parse_result)) {
266 // Throw an exception if we fail to parse the pattern. 266 // Throw an exception if we fail to parse the pattern.
267 ThrowRegExpException(re, 267 ThrowRegExpException(re,
268 pattern, 268 pattern,
269 parse_result.error, 269 parse_result.error,
270 "malformed_regexp"); 270 "malformed_regexp");
271 return Handle<Object>::null(); 271 return Handle<Object>::null();
272 } 272 }
273 RegExpAtom* atom = parse_result.tree->AsAtom(); 273
274 if (atom != NULL && !flags.is_ignore_case()) { 274 if (parse_result.simple && !flags.is_ignore_case()) {
275 if (parse_result.has_character_escapes) { 275 // Parse-tree is a single atom that is equal to the pattern.
276 Vector<const uc16> atom_pattern = atom->data(); 276 result = AtomCompile(re, pattern, flags, pattern);
277 Handle<String> atom_string = 277 } else if (parse_result.tree->IsAtom() &&
278 Factory::NewStringFromTwoByte(atom_pattern); 278 !flags.is_ignore_case() &&
279 result = AtomCompile(re, pattern, flags, atom_string); 279 parse_result.capture_count == 0) {
280 } else { 280 // TODO(lrn) Accept capture_count > 0 on atoms.
281 result = AtomCompile(re, pattern, flags, pattern); 281 RegExpAtom* atom = parse_result.tree->AsAtom();
282 } 282 Vector<const uc16> atom_pattern = atom->data();
283 Handle<String> atom_string =
284 Factory::NewStringFromTwoByte(atom_pattern);
285 result = AtomCompile(re, pattern, flags, atom_string);
286 } else if (FLAG_irregexp) {
287 result = IrregexpPrepare(re, pattern, flags);
283 } else { 288 } else {
284 if (FLAG_irregexp) { 289 result = JscrePrepare(re, pattern, flags);
285 result = IrregexpPrepare(re, pattern, flags);
286 } else {
287 result = JscrePrepare(re, pattern, flags);
288 }
289 } 290 }
290 Object* data = re->data(); 291 Object* data = re->data();
291 if (data->IsFixedArray()) { 292 if (data->IsFixedArray()) {
292 // If compilation succeeded then the data is set on the regexp 293 // If compilation succeeded then the data is set on the regexp
293 // and we can store it in the cache. 294 // and we can store it in the cache.
294 Handle<FixedArray> data(FixedArray::cast(re->data())); 295 Handle<FixedArray> data(FixedArray::cast(re->data()));
295 CompilationCache::PutRegExp(pattern, flags, data); 296 CompilationCache::PutRegExp(pattern, flags, data);
296 } 297 }
297 } 298 }
298 299
299 return result; 300 return result;
300 } 301 }
301 302
302 303
303 Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp, 304 Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp,
304 Handle<String> subject, 305 Handle<String> subject,
305 Handle<Object> index) { 306 Handle<Object> index) {
306 switch (regexp->TypeTag()) { 307 switch (regexp->TypeTag()) {
307 case JSRegExp::ATOM: 308 case JSRegExp::ATOM:
308 return AtomExec(regexp, subject, index); 309 return AtomExec(regexp, subject, index);
309 case JSRegExp::IRREGEXP: { 310 case JSRegExp::IRREGEXP: {
310 Handle<Object> result = IrregexpExec(regexp, subject, index); 311 Handle<Object> result = IrregexpExec(regexp, subject, index);
311 if (!result.is_null()) { 312 if (!result.is_null() || Top::has_pending_exception()) {
312 return result; 313 return result;
313 } 314 }
314 // We couldn't handle the regexp using Irregexp, so fall back 315 // We couldn't handle the regexp using Irregexp, so fall back
315 // on JSCRE. 316 // on JSCRE.
316 // Reset the JSRegExp to use JSCRE. 317 // Reset the JSRegExp to use JSCRE.
317 JscrePrepare(regexp, 318 JscrePrepare(regexp,
318 Handle<String>(regexp->Pattern()), 319 Handle<String>(regexp->Pattern()),
319 regexp->GetFlags()); 320 regexp->GetFlags());
320 // Fall-through to JSCRE. 321 // Fall-through to JSCRE.
321 } 322 }
322 case JSRegExp::JSCRE: 323 case JSRegExp::JSCRE:
323 if (FLAG_disable_jscre) { 324 if (FLAG_disable_jscre) {
324 UNIMPLEMENTED(); 325 UNIMPLEMENTED();
325 } 326 }
326 return JscreExec(regexp, subject, index); 327 return JscreExec(regexp, subject, index);
327 default: 328 default:
328 UNREACHABLE(); 329 UNREACHABLE();
329 return Handle<Object>::null(); 330 return Handle<Object>::null();
330 } 331 }
331 } 332 }
332 333
333 334
334 Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp, 335 Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp,
335 Handle<String> subject) { 336 Handle<String> subject) {
336 switch (regexp->TypeTag()) { 337 switch (regexp->TypeTag()) {
337 case JSRegExp::ATOM: 338 case JSRegExp::ATOM:
338 return AtomExecGlobal(regexp, subject); 339 return AtomExecGlobal(regexp, subject);
339 case JSRegExp::IRREGEXP: { 340 case JSRegExp::IRREGEXP: {
340 Handle<Object> result = IrregexpExecGlobal(regexp, subject); 341 Handle<Object> result = IrregexpExecGlobal(regexp, subject);
341 if (!result.is_null()) { 342 if (!result.is_null() || Top::has_pending_exception()) {
342 return result; 343 return result;
343 } 344 }
344 // We couldn't handle the regexp using Irregexp, so fall back 345 // Empty handle as result but no exception thrown means that
345 // on JSCRE. 346 // the regexp contains features not yet handled by the irregexp
346 // Reset the JSRegExp to use JSCRE. 347 // compiler.
348 // We have to fall back on JSCRE. Reset the JSRegExp to use JSCRE.
347 JscrePrepare(regexp, 349 JscrePrepare(regexp,
348 Handle<String>(regexp->Pattern()), 350 Handle<String>(regexp->Pattern()),
349 regexp->GetFlags()); 351 regexp->GetFlags());
350 // Fall-through to JSCRE. 352 // Fall-through to JSCRE.
351 } 353 }
352 case JSRegExp::JSCRE: 354 case JSRegExp::JSCRE:
353 if (FLAG_disable_jscre) { 355 if (FLAG_disable_jscre) {
354 UNIMPLEMENTED(); 356 UNIMPLEMENTED();
355 } 357 }
356 return JscreExecGlobal(regexp, subject); 358 return JscreExecGlobal(regexp, subject);
(...skipping 318 matching lines...) Expand 10 before | Expand all | Expand 10 after
675 } else { 677 } else {
676 // Exited loop with the exception in matches. 678 // Exited loop with the exception in matches.
677 return matches; 679 return matches;
678 } 680 }
679 } 681 }
680 682
681 683
682 // Irregexp implementation. 684 // Irregexp implementation.
683 685
684 686
687 // Retrieves a compiled version of the regexp for either ASCII or non-ASCII
688 // strings. If the compiled version doesn't already exist, it is compiled
689 // from the source pattern.
690 // Irregexp is not feature complete yet. If there is something in the
691 // regexp that the compiler cannot currently handle, an empty
692 // handle is returned, but no exception is thrown.
685 static Handle<FixedArray> GetCompiledIrregexp(Handle<JSRegExp> re, 693 static Handle<FixedArray> GetCompiledIrregexp(Handle<JSRegExp> re,
686 bool is_ascii) { 694 bool is_ascii) {
687 ASSERT(re->DataAt(JSRegExp::kIrregexpDataIndex)->IsFixedArray()); 695 ASSERT(re->DataAt(JSRegExp::kIrregexpDataIndex)->IsFixedArray());
688 Handle<FixedArray> alternatives( 696 Handle<FixedArray> alternatives(
689 FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex))); 697 FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex)));
690 ASSERT_EQ(2, alternatives->length()); 698 ASSERT_EQ(2, alternatives->length());
691 699
692 int index = is_ascii ? 0 : 1; 700 int index = is_ascii ? 0 : 1;
693 Object* entry = alternatives->get(index); 701 Object* entry = alternatives->get(index);
694 if (!entry->IsNull()) { 702 if (!entry->IsNull()) {
695 return Handle<FixedArray>(FixedArray::cast(entry)); 703 return Handle<FixedArray>(FixedArray::cast(entry));
696 } 704 }
697 705
698 // Compile the RegExp. 706 // Compile the RegExp.
699 ZoneScope zone_scope(DELETE_ON_EXIT); 707 ZoneScope zone_scope(DELETE_ON_EXIT);
700 708
701 JSRegExp::Flags flags = re->GetFlags(); 709 JSRegExp::Flags flags = re->GetFlags();
702 710
703 Handle<String> pattern(re->Pattern()); 711 Handle<String> pattern(re->Pattern());
704 StringShape shape(*pattern); 712 StringShape shape(*pattern);
705 if (!pattern->IsFlat(shape)) { 713 if (!pattern->IsFlat(shape)) {
706 pattern->Flatten(shape); 714 pattern->Flatten(shape);
707 } 715 }
708 716
709 RegExpParseResult parse_result; 717 RegExpCompileData compile_data;
710 FlatStringReader reader(pattern); 718 FlatStringReader reader(pattern);
711 if (!ParseRegExp(&reader, flags.is_multiline(), &parse_result)) { 719 if (!ParseRegExp(&reader, flags.is_multiline(), &compile_data)) {
712 // Throw an exception if we fail to parse the pattern. 720 // Throw an exception if we fail to parse the pattern.
713 // THIS SHOULD NOT HAPPEN. We already parsed it successfully once. 721 // THIS SHOULD NOT HAPPEN. We already parsed it successfully once.
714 ThrowRegExpException(re, 722 ThrowRegExpException(re,
715 pattern, 723 pattern,
716 parse_result.error, 724 compile_data.error,
717 "malformed_regexp"); 725 "malformed_regexp");
718 return Handle<FixedArray>::null(); 726 return Handle<FixedArray>::null();
719 } 727 }
720 Handle<FixedArray> compiled_entry = 728 Handle<FixedArray> compiled_entry =
721 RegExpEngine::Compile(&parse_result, 729 RegExpEngine::Compile(&compile_data,
722 NULL,
723 flags.is_ignore_case(), 730 flags.is_ignore_case(),
724 flags.is_multiline(), 731 flags.is_multiline(),
725 pattern, 732 pattern,
726 is_ascii); 733 is_ascii);
727 if (!compiled_entry.is_null()) { 734 if (!compiled_entry.is_null()) {
728 alternatives->set(index, *compiled_entry); 735 alternatives->set(index, *compiled_entry);
729 } 736 }
730 return compiled_entry; 737 return compiled_entry;
731 } 738 }
732 739
(...skipping 172 matching lines...) Expand 10 before | Expand all | Expand 10 after
905 start_offset += slice->start(); 912 start_offset += slice->start();
906 end_offset += slice->start(); 913 end_offset += slice->start();
907 subject = Handle<String>(slice->buffer()); 914 subject = Handle<String>(slice->buffer());
908 } 915 }
909 916
910 // String is now either Sequential or External 917 // String is now either Sequential or External
911 StringShape flatshape(*subject); 918 StringShape flatshape(*subject);
912 bool is_ascii = flatshape.IsAsciiRepresentation(); 919 bool is_ascii = flatshape.IsAsciiRepresentation();
913 int char_size_shift = is_ascii ? 0 : 1; 920 int char_size_shift = is_ascii ? 0 : 1;
914 921
922 RegExpMacroAssemblerIA32::Result res;
923
915 if (flatshape.IsExternal()) { 924 if (flatshape.IsExternal()) {
916 const byte* address; 925 const byte* address;
917 if (is_ascii) { 926 if (is_ascii) {
918 ExternalAsciiString* ext = ExternalAsciiString::cast(*subject); 927 ExternalAsciiString* ext = ExternalAsciiString::cast(*subject);
919 address = reinterpret_cast<const byte*>(ext->resource()->data()); 928 address = reinterpret_cast<const byte*>(ext->resource()->data());
920 } else { 929 } else {
921 ExternalTwoByteString* ext = ExternalTwoByteString::cast(*subject); 930 ExternalTwoByteString* ext = ExternalTwoByteString::cast(*subject);
922 address = reinterpret_cast<const byte*>(ext->resource()->data()); 931 address = reinterpret_cast<const byte*>(ext->resource()->data());
923 } 932 }
924 rc = RegExpMacroAssemblerIA32::Execute( 933 res = RegExpMacroAssemblerIA32::Execute(
925 *code, 934 *code,
926 &address, 935 &address,
927 start_offset << char_size_shift, 936 start_offset << char_size_shift,
928 end_offset << char_size_shift, 937 end_offset << char_size_shift,
929 offsets_vector, 938 offsets_vector,
930 previous_index == 0); 939 previous_index == 0);
931 } else { // Sequential string 940 } else { // Sequential string
932 Address char_address = 941 Address char_address =
933 is_ascii ? SeqAsciiString::cast(*subject)->GetCharsAddress() 942 is_ascii ? SeqAsciiString::cast(*subject)->GetCharsAddress()
934 : SeqTwoByteString::cast(*subject)->GetCharsAddress(); 943 : SeqTwoByteString::cast(*subject)->GetCharsAddress();
935 int byte_offset = char_address - reinterpret_cast<Address>(*subject); 944 int byte_offset = char_address - reinterpret_cast<Address>(*subject);
936 rc = RegExpMacroAssemblerIA32::Execute( 945 res = RegExpMacroAssemblerIA32::Execute(
937 *code, 946 *code,
938 subject.location(), 947 subject.location(),
939 byte_offset + (start_offset << char_size_shift), 948 byte_offset + (start_offset << char_size_shift),
940 byte_offset + (end_offset << char_size_shift), 949 byte_offset + (end_offset << char_size_shift),
941 offsets_vector, 950 offsets_vector,
942 previous_index == 0); 951 previous_index == 0);
943 } 952 }
944 953
954 if (res == RegExpMacroAssemblerIA32::EXCEPTION) {
955 ASSERT(Top::has_pending_exception());
956 return Handle<Object>::null();
957 }
958 rc = (res == RegExpMacroAssemblerIA32::SUCCESS);
959
945 if (rc) { 960 if (rc) {
946 // Capture values are relative to start_offset only. 961 // Capture values are relative to start_offset only.
947 for (int i = 0; i < offsets_vector_length; i++) { 962 for (int i = 0; i < offsets_vector_length; i++) {
948 if (offsets_vector[i] >= 0) { 963 if (offsets_vector[i] >= 0) {
949 offsets_vector[i] += previous_index; 964 offsets_vector[i] += previous_index;
950 } 965 }
951 } 966 }
952 } 967 }
953 break; 968 break;
954 #else 969 #else
(...skipping 1641 matching lines...) Expand 10 before | Expand all | Expand 10 after
2596 2611
2597 2612
2598 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler, 2613 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler,
2599 RegExpNode* on_success) { 2614 RegExpNode* on_success) {
2600 return new TextNode(elements(), on_success); 2615 return new TextNode(elements(), on_success);
2601 } 2616 }
2602 2617
2603 2618
2604 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, 2619 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
2605 RegExpNode* on_success) { 2620 RegExpNode* on_success) {
2606 ZoneList<TextElement>* elms = new ZoneList<TextElement>(1); 2621 return new TextNode(this, on_success);
2607 elms->Add(TextElement::CharClass(this));
2608 return new TextNode(elms, on_success);
2609 } 2622 }
2610 2623
2611 2624
2612 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler, 2625 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler,
2613 RegExpNode* on_success) { 2626 RegExpNode* on_success) {
2614 ZoneList<RegExpTree*>* alternatives = this->alternatives(); 2627 ZoneList<RegExpTree*>* alternatives = this->alternatives();
2615 int length = alternatives->length(); 2628 int length = alternatives->length();
2616 ChoiceNode* result = new ChoiceNode(length); 2629 ChoiceNode* result = new ChoiceNode(length);
2617 for (int i = 0; i < length; i++) { 2630 for (int i = 0; i < length; i++) {
2618 GuardedAlternative alternative(alternatives->at(i)->ToNode(compiler, 2631 GuardedAlternative alternative(alternatives->at(i)->ToNode(compiler,
(...skipping 639 matching lines...) Expand 10 before | Expand all | Expand 10 after
3258 return entry->out_set(); 3271 return entry->out_set();
3259 else 3272 else
3260 return empty(); 3273 return empty();
3261 } 3274 }
3262 3275
3263 3276
3264 // ------------------------------------------------------------------- 3277 // -------------------------------------------------------------------
3265 // Analysis 3278 // Analysis
3266 3279
3267 3280
3268 void Analysis::EnsureAnalyzed(RegExpNode* that) { 3281 void AssertionPropagation::EnsureAnalyzed(RegExpNode* that) {
3269 if (that->info()->been_analyzed || that->info()->being_analyzed) 3282 if (that->info()->been_analyzed || that->info()->being_analyzed)
3270 return; 3283 return;
3271 that->info()->being_analyzed = true; 3284 that->info()->being_analyzed = true;
3272 that->Accept(this); 3285 that->Accept(this);
3273 that->info()->being_analyzed = false; 3286 that->info()->being_analyzed = false;
3274 that->info()->been_analyzed = true; 3287 that->info()->been_analyzed = true;
3275 } 3288 }
3276 3289
3277 3290
3278 void Analysis::VisitEnd(EndNode* that) { 3291 void AssertionPropagation::VisitEnd(EndNode* that) {
3279 // nothing to do 3292 // nothing to do
3280 } 3293 }
3281 3294
3282 3295
3283 void TextNode::CalculateOffsets() { 3296 void TextNode::CalculateOffsets() {
3284 int element_count = elements()->length(); 3297 int element_count = elements()->length();
3285 // Set up the offsets of the elements relative to the start. This is a fixed 3298 // Set up the offsets of the elements relative to the start. This is a fixed
3286 // quantity since a TextNode can only contain fixed-width things. 3299 // quantity since a TextNode can only contain fixed-width things.
3287 int cp_offset = 0; 3300 int cp_offset = 0;
3288 for (int i = 0; i < element_count; i++) { 3301 for (int i = 0; i < element_count; i++) {
3289 TextElement& elm = elements()->at(i); 3302 TextElement& elm = elements()->at(i);
3290 elm.cp_offset = cp_offset; 3303 elm.cp_offset = cp_offset;
3291 if (elm.type == TextElement::ATOM) { 3304 if (elm.type == TextElement::ATOM) {
3292 cp_offset += elm.data.u_atom->data().length(); 3305 cp_offset += elm.data.u_atom->data().length();
3293 } else { 3306 } else {
3294 cp_offset++; 3307 cp_offset++;
3295 Vector<const uc16> quarks = elm.data.u_atom->data(); 3308 Vector<const uc16> quarks = elm.data.u_atom->data();
3296 } 3309 }
3297 } 3310 }
3298 } 3311 }
3299 3312
3300 3313
3301 void Analysis::VisitText(TextNode* that) { 3314 void AssertionPropagation::VisitText(TextNode* that) {
3302 if (ignore_case_) { 3315 if (ignore_case_) {
3303 that->MakeCaseIndependent(); 3316 that->MakeCaseIndependent();
3304 } 3317 }
3305 EnsureAnalyzed(that->on_success()); 3318 EnsureAnalyzed(that->on_success());
3306 NodeInfo* info = that->info(); 3319 NodeInfo* info = that->info();
3307 NodeInfo* next_info = that->on_success()->info(); 3320 NodeInfo* next_info = that->on_success()->info();
3308 // If the following node is interested in what it follows then this 3321 // If the following node is interested in what it follows then this
3309 // node must determine it. 3322 // node must determine it.
3310 info->determine_newline = next_info->follows_newline_interest; 3323 info->determine_newline = next_info->follows_newline_interest;
3311 info->determine_word = next_info->follows_word_interest; 3324 info->determine_word = next_info->follows_word_interest;
3312 info->determine_start = next_info->follows_start_interest; 3325 info->determine_start = next_info->follows_start_interest;
3313 that->CalculateOffsets(); 3326 that->CalculateOffsets();
3314 } 3327 }
3315 3328
3316 3329
3317 void Analysis::VisitAction(ActionNode* that) { 3330 void AssertionPropagation::VisitAction(ActionNode* that) {
3318 RegExpNode* target = that->on_success(); 3331 RegExpNode* target = that->on_success();
3319 EnsureAnalyzed(target); 3332 EnsureAnalyzed(target);
3320 // If the next node is interested in what it follows then this node 3333 // If the next node is interested in what it follows then this node
3321 // has to be interested too so it can pass the information on. 3334 // has to be interested too so it can pass the information on.
3322 that->info()->AddFromFollowing(target->info()); 3335 that->info()->AddFromFollowing(target->info());
3323 } 3336 }
3324 3337
3325 3338
3326 void Analysis::VisitChoice(ChoiceNode* that) { 3339 void AssertionPropagation::VisitChoice(ChoiceNode* that) {
3327 NodeInfo* info = that->info(); 3340 NodeInfo* info = that->info();
3328 for (int i = 0; i < that->alternatives()->length(); i++) { 3341 for (int i = 0; i < that->alternatives()->length(); i++) {
3329 RegExpNode* node = that->alternatives()->at(i).node(); 3342 RegExpNode* node = that->alternatives()->at(i).node();
3330 EnsureAnalyzed(node); 3343 EnsureAnalyzed(node);
3331 // Anything the following nodes need to know has to be known by 3344 // Anything the following nodes need to know has to be known by
3332 // this node also, so it can pass it on. 3345 // this node also, so it can pass it on.
3333 info->AddFromFollowing(node->info()); 3346 info->AddFromFollowing(node->info());
3334 } 3347 }
3335 } 3348 }
3336 3349
3337 3350
3338 void Analysis::VisitBackReference(BackReferenceNode* that) { 3351 void AssertionPropagation::VisitBackReference(BackReferenceNode* that) {
3339 EnsureAnalyzed(that->on_success()); 3352 EnsureAnalyzed(that->on_success());
3340 } 3353 }
3341 3354
3342 3355
3343 // ------------------------------------------------------------------- 3356 // -------------------------------------------------------------------
3344 // Assumption expansion 3357 // Assumption expansion
3345 3358
3346 3359
3347 RegExpNode* RegExpNode::EnsureExpanded(NodeInfo* info) { 3360 RegExpNode* RegExpNode::EnsureExpanded(NodeInfo* info) {
3348 siblings_.Ensure(this); 3361 siblings_.Ensure(this);
(...skipping 294 matching lines...) Expand 10 before | Expand all | Expand 10 after
3643 } 3656 }
3644 } 3657 }
3645 3658
3646 3659
3647 void DispatchTableConstructor::VisitAction(ActionNode* that) { 3660 void DispatchTableConstructor::VisitAction(ActionNode* that) {
3648 RegExpNode* target = that->on_success(); 3661 RegExpNode* target = that->on_success();
3649 target->Accept(this); 3662 target->Accept(this);
3650 } 3663 }
3651 3664
3652 3665
3653 Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input, 3666 #ifdef DEBUG
3654 RegExpNode** node_return, 3667
3668
3669 class VisitNodeScope {
3670 public:
3671 explicit VisitNodeScope(RegExpNode* node) : node_(node) {
3672 ASSERT(!node->info()->visited);
3673 node->info()->visited = true;
3674 }
3675 ~VisitNodeScope() {
3676 node_->info()->visited = false;
3677 }
3678 private:
3679 RegExpNode* node_;
3680 };
3681
3682
3683 class NodeValidator : public NodeVisitor {
3684 public:
3685 virtual void ValidateInfo(NodeInfo* info) = 0;
3686 #define DECLARE_VISIT(Type) \
3687 virtual void Visit##Type(Type##Node* that);
3688 FOR_EACH_NODE_TYPE(DECLARE_VISIT)
3689 #undef DECLARE_VISIT
3690 };
3691
3692
3693 class PostAnalysisNodeValidator : public NodeValidator {
3694 public:
3695 virtual void ValidateInfo(NodeInfo* info);
3696 };
3697
3698
3699 class PostExpansionNodeValidator : public NodeValidator {
3700 public:
3701 virtual void ValidateInfo(NodeInfo* info);
3702 };
3703
3704
3705 void PostAnalysisNodeValidator::ValidateInfo(NodeInfo* info) {
3706 ASSERT(info->been_analyzed);
3707 }
3708
3709
3710 void PostExpansionNodeValidator::ValidateInfo(NodeInfo* info) {
3711 ASSERT_EQ(info->determine_newline, info->does_determine_newline);
3712 ASSERT_EQ(info->determine_start, info->does_determine_start);
3713 ASSERT_EQ(info->determine_word, info->does_determine_word);
3714 ASSERT_EQ(info->follows_word_interest,
3715 (info->follows_word != NodeInfo::UNKNOWN));
3716 if (false) {
3717 // These are still unimplemented.
3718 ASSERT_EQ(info->follows_start_interest,
3719 (info->follows_start != NodeInfo::UNKNOWN));
3720 ASSERT_EQ(info->follows_newline_interest,
3721 (info->follows_newline != NodeInfo::UNKNOWN));
3722 }
3723 }
3724
3725
3726 void NodeValidator::VisitAction(ActionNode* that) {
3727 if (that->info()->visited) return;
3728 VisitNodeScope scope(that);
3729 ValidateInfo(that->info());
3730 that->on_success()->Accept(this);
3731 }
3732
3733
3734 void NodeValidator::VisitBackReference(BackReferenceNode* that) {
3735 if (that->info()->visited) return;
3736 VisitNodeScope scope(that);
3737 ValidateInfo(that->info());
3738 that->on_success()->Accept(this);
3739 }
3740
3741
3742 void NodeValidator::VisitChoice(ChoiceNode* that) {
3743 if (that->info()->visited) return;
3744 VisitNodeScope scope(that);
3745 ValidateInfo(that->info());
3746 ZoneList<GuardedAlternative>* alts = that->alternatives();
3747 for (int i = 0; i < alts->length(); i++)
3748 alts->at(i).node()->Accept(this);
3749 }
3750
3751
3752 void NodeValidator::VisitEnd(EndNode* that) {
3753 if (that->info()->visited) return;
3754 VisitNodeScope scope(that);
3755 ValidateInfo(that->info());
3756 }
3757
3758
3759 void NodeValidator::VisitText(TextNode* that) {
3760 if (that->info()->visited) return;
3761 VisitNodeScope scope(that);
3762 ValidateInfo(that->info());
3763 that->on_success()->Accept(this);
3764 }
3765
3766
3767 #endif
3768
3769
3770 Handle<FixedArray> RegExpEngine::Compile(RegExpCompileData* data,
3655 bool ignore_case, 3771 bool ignore_case,
3656 bool is_multiline, 3772 bool is_multiline,
3657 Handle<String> pattern, 3773 Handle<String> pattern,
3658 bool is_ascii) { 3774 bool is_ascii) {
3659 RegExpCompiler compiler(input->capture_count, ignore_case, is_ascii); 3775 RegExpCompiler compiler(data->capture_count, ignore_case, is_ascii);
3660 // Wrap the body of the regexp in capture #0. 3776 // Wrap the body of the regexp in capture #0.
3661 RegExpNode* captured_body = RegExpCapture::ToNode(input->tree, 3777 RegExpNode* captured_body = RegExpCapture::ToNode(data->tree,
3662 0, 3778 0,
3663 &compiler, 3779 &compiler,
3664 compiler.accept()); 3780 compiler.accept());
3665 // Add a .*? at the beginning, outside the body capture. 3781 // Add a .*? at the beginning, outside the body capture.
3666 // Note: We could choose to not add this if the regexp is anchored at 3782 // Note: We could choose to not add this if the regexp is anchored at
3667 // the start of the input but I'm not sure how best to do that and 3783 // the start of the input but I'm not sure how best to do that and
3668 // since we don't even handle ^ yet I'm saving that optimization for 3784 // since we don't even handle ^ yet I'm saving that optimization for
3669 // later. 3785 // later.
3670 RegExpNode* node = RegExpQuantifier::ToNode(0, 3786 RegExpNode* node = RegExpQuantifier::ToNode(0,
3671 RegExpQuantifier::kInfinity, 3787 RegExpQuantifier::kInfinity,
3672 false, 3788 false,
3673 new RegExpCharacterClass('*'), 3789 new RegExpCharacterClass('*'),
3674 &compiler, 3790 &compiler,
3675 captured_body); 3791 captured_body);
3676 if (node_return != NULL) *node_return = node; 3792 AssertionPropagation analysis(ignore_case);
3677 Analysis analysis(ignore_case);
3678 analysis.EnsureAnalyzed(node); 3793 analysis.EnsureAnalyzed(node);
3679 3794
3680 NodeInfo info = *node->info(); 3795 NodeInfo info = *node->info();
3796 data->has_lookbehind = info.HasLookbehind();
3797 if (data->has_lookbehind) {
3798 // If this node needs information about the preceding text we let
3799 // it start with a character class that consumes a single character
3800 // and proceeds to wherever is appropriate. This means that if
3801 // has_lookbehind is set the code generator must start one character
3802 // before the start position.
3803 node = new TextNode(new RegExpCharacterClass('*'), node);
3804 analysis.EnsureAnalyzed(node);
3805 }
3806
3807 #ifdef DEBUG
3808 PostAnalysisNodeValidator post_analysis_validator;
3809 node->Accept(&post_analysis_validator);
3810 #endif
3811
3681 node = node->EnsureExpanded(&info); 3812 node = node->EnsureExpanded(&info);
3682 3813
3814 #ifdef DEBUG
3815 PostExpansionNodeValidator post_expansion_validator;
3816 node->Accept(&post_expansion_validator);
3817 #endif
3818
3819 data->node = node;
3820
3683 if (is_multiline && !FLAG_attempt_multiline_irregexp) { 3821 if (is_multiline && !FLAG_attempt_multiline_irregexp) {
3684 return Handle<FixedArray>::null(); 3822 return Handle<FixedArray>::null();
3685 } 3823 }
3686 3824
3825 if (data->has_lookbehind) {
3826 return Handle<FixedArray>::null();
3827 }
3828
3687 if (FLAG_irregexp_native) { 3829 if (FLAG_irregexp_native) {
3688 #ifdef ARM 3830 #ifdef ARM
3689 // Unimplemented, fall-through to bytecode implementation. 3831 // Unimplemented, fall-through to bytecode implementation.
3690 #else // IA32 3832 #else // IA32
3691 RegExpMacroAssemblerIA32::Mode mode; 3833 RegExpMacroAssemblerIA32::Mode mode;
3692 if (is_ascii) { 3834 if (is_ascii) {
3693 mode = RegExpMacroAssemblerIA32::ASCII; 3835 mode = RegExpMacroAssemblerIA32::ASCII;
3694 } else { 3836 } else {
3695 mode = RegExpMacroAssemblerIA32::UC16; 3837 mode = RegExpMacroAssemblerIA32::UC16;
3696 } 3838 }
3697 RegExpMacroAssemblerIA32 macro_assembler(mode, 3839 RegExpMacroAssemblerIA32 macro_assembler(mode,
3698 (input->capture_count + 1) * 2); 3840 (data->capture_count + 1) * 2);
3699 return compiler.Assemble(&macro_assembler, 3841 return compiler.Assemble(&macro_assembler,
3700 node, 3842 node,
3701 input->capture_count, 3843 data->capture_count,
3702 pattern); 3844 pattern);
3703 #endif 3845 #endif
3704 } 3846 }
3705 EmbeddedVector<byte, 1024> codes; 3847 EmbeddedVector<byte, 1024> codes;
3706 RegExpMacroAssemblerIrregexp macro_assembler(codes); 3848 RegExpMacroAssemblerIrregexp macro_assembler(codes);
3707 return compiler.Assemble(&macro_assembler, 3849 return compiler.Assemble(&macro_assembler,
3708 node, 3850 node,
3709 input->capture_count, 3851 data->capture_count,
3710 pattern); 3852 pattern);
3711 } 3853 }
3712 3854
3713 3855
3714 }} // namespace v8::internal 3856 }} // namespace v8::internal
OLDNEW
« no previous file with comments | « src/jsregexp.h ('k') | src/macro-assembler-arm.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698