OLD | NEW |
---|---|
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 455 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
466 Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<JSRegExp> regexp, | 466 Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<JSRegExp> regexp, |
467 int num_captures, | 467 int num_captures, |
468 Handle<String> two_byte_subject, | 468 Handle<String> two_byte_subject, |
469 int previous_index, | 469 int previous_index, |
470 int* offsets_vector, | 470 int* offsets_vector, |
471 int offsets_vector_length) { | 471 int offsets_vector_length) { |
472 #ifdef DEBUG | 472 #ifdef DEBUG |
473 if (FLAG_trace_regexp_bytecodes) { | 473 if (FLAG_trace_regexp_bytecodes) { |
474 String* pattern = regexp->Pattern(); | 474 String* pattern = regexp->Pattern(); |
475 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); | 475 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); |
476 PrintF("\n\nSubject string: '%s'\n\n", *(two_byte_subject->ToCString())); | 476 //PrintF("\n\nSubject string: '%s'\n\n", *(two_byte_subject->ToCString())); |
Christian Plesner Hansen
2008/11/27 06:49:39
Either comment it back in or remove it.
| |
477 } | 477 } |
478 #endif | 478 #endif |
479 ASSERT(StringShape(*two_byte_subject).IsTwoByteRepresentation()); | 479 ASSERT(StringShape(*two_byte_subject).IsTwoByteRepresentation()); |
480 ASSERT(two_byte_subject->IsFlat(StringShape(*two_byte_subject))); | 480 ASSERT(two_byte_subject->IsFlat(StringShape(*two_byte_subject))); |
481 bool rc; | 481 bool rc; |
482 | 482 |
483 for (int i = (num_captures + 1) * 2 - 1; i >= 0; i--) { | 483 for (int i = (num_captures + 1) * 2 - 1; i >= 0; i--) { |
484 offsets_vector[i] = -1; | 484 offsets_vector[i] = -1; |
485 } | 485 } |
486 | 486 |
(...skipping 437 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
924 is_case_independent_(ignore_case) { | 924 is_case_independent_(ignore_case) { |
925 accept_ = new EndNode(EndNode::ACCEPT); | 925 accept_ = new EndNode(EndNode::ACCEPT); |
926 backtrack_ = new EndNode(EndNode::BACKTRACK); | 926 backtrack_ = new EndNode(EndNode::BACKTRACK); |
927 } | 927 } |
928 | 928 |
929 | 929 |
930 Handle<FixedArray> RegExpCompiler::Assemble( | 930 Handle<FixedArray> RegExpCompiler::Assemble( |
931 RegExpMacroAssembler* macro_assembler, | 931 RegExpMacroAssembler* macro_assembler, |
932 RegExpNode* start, | 932 RegExpNode* start, |
933 int capture_count) { | 933 int capture_count) { |
934 if (!FLAG_attempt_case_independent && is_case_independent_) { | |
935 return Handle<FixedArray>::null(); | |
936 } | |
937 macro_assembler_ = macro_assembler; | 934 macro_assembler_ = macro_assembler; |
938 List <RegExpNode*> work_list(0); | 935 List <RegExpNode*> work_list(0); |
939 work_list_ = &work_list; | 936 work_list_ = &work_list; |
940 Label fail; | 937 Label fail; |
941 macro_assembler->PushBacktrack(&fail); | 938 macro_assembler->PushBacktrack(&fail); |
942 if (!start->GoTo(this)) { | 939 if (!start->GoTo(this)) { |
943 fail.Unuse(); | 940 fail.Unuse(); |
944 return Handle<FixedArray>::null(); | 941 return Handle<FixedArray>::null(); |
945 } | 942 } |
946 while (!work_list.is_empty()) { | 943 while (!work_list.is_empty()) { |
(...skipping 402 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1349 cp_offset += elm.data.u_atom->data().length(); | 1346 cp_offset += elm.data.u_atom->data().length(); |
1350 } | 1347 } |
1351 } | 1348 } |
1352 | 1349 |
1353 compiler->AddWork(on_failure_); | 1350 compiler->AddWork(on_failure_); |
1354 macro_assembler->AdvanceCurrentPosition(cp_offset); | 1351 macro_assembler->AdvanceCurrentPosition(cp_offset); |
1355 return on_success()->GoTo(compiler); | 1352 return on_success()->GoTo(compiler); |
1356 } | 1353 } |
1357 | 1354 |
1358 | 1355 |
1356 void TextNode::MakeCaseIndependent() { | |
1357 int element_count = elms_->length(); | |
1358 for (int i = 0; i < element_count; i++) { | |
1359 TextElement elm = elms_->at(i); | |
1360 if (elm.type == TextElement::CHAR_CLASS) { | |
1361 RegExpCharacterClass* cc = elm.data.u_char_class; | |
1362 ZoneList<CharacterRange>* ranges = cc->ranges(); | |
1363 int range_count = ranges->length(); | |
1364 for (int i = 0; i < range_count; i++) { | |
1365 ranges->at(i).AddCaseEquivalents(ranges); | |
1366 } | |
1367 } | |
1368 } | |
1369 } | |
1370 | |
1371 | |
1359 bool ChoiceNode::Emit(RegExpCompiler* compiler) { | 1372 bool ChoiceNode::Emit(RegExpCompiler* compiler) { |
1360 int choice_count = alternatives_->length(); | 1373 int choice_count = alternatives_->length(); |
1361 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | 1374 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
1362 Bind(macro_assembler); | 1375 Bind(macro_assembler); |
1363 // For now we just call all choices one after the other. The idea ultimately | 1376 // For now we just call all choices one after the other. The idea ultimately |
1364 // is to use the Dispatch table to try only the relevant ones. | 1377 // is to use the Dispatch table to try only the relevant ones. |
1365 for (int i = 0; i < choice_count - 1; i++) { | 1378 for (int i = 0; i < choice_count - 1; i++) { |
1366 GuardedAlternative alternative = alternatives_->at(i); | 1379 GuardedAlternative alternative = alternatives_->at(i); |
1367 Label after; | 1380 Label after; |
1368 Label after_no_pop_cp; | 1381 Label after_no_pop_cp; |
(...skipping 1021 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2390 that->info()->been_analyzed = true; | 2403 that->info()->been_analyzed = true; |
2391 } | 2404 } |
2392 | 2405 |
2393 | 2406 |
2394 void Analysis::VisitEnd(EndNode* that) { | 2407 void Analysis::VisitEnd(EndNode* that) { |
2395 // nothing to do | 2408 // nothing to do |
2396 } | 2409 } |
2397 | 2410 |
2398 | 2411 |
2399 void Analysis::VisitText(TextNode* that) { | 2412 void Analysis::VisitText(TextNode* that) { |
2413 if (case_independent_) { | |
2414 that->MakeCaseIndependent(); | |
2415 } | |
2400 EnsureAnalyzed(that->on_success()); | 2416 EnsureAnalyzed(that->on_success()); |
2401 EnsureAnalyzed(that->on_failure()); | 2417 EnsureAnalyzed(that->on_failure()); |
2402 } | 2418 } |
2403 | 2419 |
2404 | 2420 |
2405 void Analysis::VisitAction(ActionNode* that) { | 2421 void Analysis::VisitAction(ActionNode* that) { |
2406 RegExpNode* next = that->on_success(); | 2422 RegExpNode* next = that->on_success(); |
2407 EnsureAnalyzed(next); | 2423 EnsureAnalyzed(next); |
2408 that->info()->determine_newline = next->info()->prev_determine_newline(); | 2424 that->info()->determine_newline = next->info()->prev_determine_newline(); |
2409 that->info()->determine_word = next->info()->prev_determine_word(); | 2425 that->info()->determine_word = next->info()->prev_determine_word(); |
(...skipping 155 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2565 // since we don't even handle ^ yet I'm saving that optimization for | 2581 // since we don't even handle ^ yet I'm saving that optimization for |
2566 // later. | 2582 // later. |
2567 RegExpNode* node = RegExpQuantifier::ToNode(0, | 2583 RegExpNode* node = RegExpQuantifier::ToNode(0, |
2568 RegExpQuantifier::kInfinity, | 2584 RegExpQuantifier::kInfinity, |
2569 false, | 2585 false, |
2570 new RegExpCharacterClass('*'), | 2586 new RegExpCharacterClass('*'), |
2571 &compiler, | 2587 &compiler, |
2572 captured_body, | 2588 captured_body, |
2573 compiler.backtrack()); | 2589 compiler.backtrack()); |
2574 if (node_return != NULL) *node_return = node; | 2590 if (node_return != NULL) *node_return = node; |
2575 Analysis analysis; | 2591 Analysis analysis(ignore_case); |
2576 analysis.EnsureAnalyzed(node); | 2592 analysis.EnsureAnalyzed(node); |
2577 | 2593 |
2578 if (!FLAG_irregexp) { | 2594 if (!FLAG_irregexp) { |
2579 return Handle<FixedArray>::null(); | 2595 return Handle<FixedArray>::null(); |
2580 } | 2596 } |
2581 | 2597 |
2582 #if !(defined ARM || defined __arm__ || defined __thumb__) | 2598 #if !(defined ARM || defined __arm__ || defined __thumb__) |
2583 if (FLAG_irregexp_native) { // Flag only checked in IA32 mode. | 2599 if (FLAG_irregexp_native) { // Flag only checked in IA32 mode. |
2584 // TODO(lrn) Move compilation to a later point in the life-cycle | 2600 // TODO(lrn) Move compilation to a later point in the life-cycle |
2585 // of the RegExp. We don't know the type of input string yet. | 2601 // of the RegExp. We don't know the type of input string yet. |
2586 // For now, always assume two-byte strings. | 2602 // For now, always assume two-byte strings. |
2587 RegExpMacroAssemblerIA32 macro_assembler(RegExpMacroAssemblerIA32::UC16, | 2603 RegExpMacroAssemblerIA32 macro_assembler(RegExpMacroAssemblerIA32::UC16, |
2588 (input->capture_count + 1) * 2); | 2604 (input->capture_count + 1) * 2); |
2589 return compiler.Assemble(¯o_assembler, | 2605 return compiler.Assemble(¯o_assembler, |
2590 node, | 2606 node, |
2591 input->capture_count); | 2607 input->capture_count); |
2592 } | 2608 } |
2593 #endif | 2609 #endif |
2594 byte codes[1024]; | 2610 byte codes[1024]; |
2595 IrregexpAssembler assembler(Vector<byte>(codes, 1024)); | 2611 IrregexpAssembler assembler(Vector<byte>(codes, 1024)); |
2596 RegExpMacroAssemblerIrregexp macro_assembler(&assembler); | 2612 RegExpMacroAssemblerIrregexp macro_assembler(&assembler); |
2597 return compiler.Assemble(¯o_assembler, | 2613 return compiler.Assemble(¯o_assembler, |
2598 node, | 2614 node, |
2599 input->capture_count); | 2615 input->capture_count); |
2600 } | 2616 } |
2601 | 2617 |
2602 | 2618 |
2603 }} // namespace v8::internal | 2619 }} // namespace v8::internal |
OLD | NEW |