Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(15)

Side by Side Diff: src/jsregexp.cc

Issue 12473: * Complete case independent support in Irregexp. (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: Created 12 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« src/jsregexp.h ('K') | « src/jsregexp.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 455 matching lines...) Expand 10 before | Expand all | Expand 10 after
466 Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<JSRegExp> regexp, 466 Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<JSRegExp> regexp,
467 int num_captures, 467 int num_captures,
468 Handle<String> two_byte_subject, 468 Handle<String> two_byte_subject,
469 int previous_index, 469 int previous_index,
470 int* offsets_vector, 470 int* offsets_vector,
471 int offsets_vector_length) { 471 int offsets_vector_length) {
472 #ifdef DEBUG 472 #ifdef DEBUG
473 if (FLAG_trace_regexp_bytecodes) { 473 if (FLAG_trace_regexp_bytecodes) {
474 String* pattern = regexp->Pattern(); 474 String* pattern = regexp->Pattern();
475 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); 475 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString()));
476 PrintF("\n\nSubject string: '%s'\n\n", *(two_byte_subject->ToCString())); 476 //PrintF("\n\nSubject string: '%s'\n\n", *(two_byte_subject->ToCString()));
Christian Plesner Hansen 2008/11/27 06:49:39 Either comment it back in or remove it.
477 } 477 }
478 #endif 478 #endif
479 ASSERT(StringShape(*two_byte_subject).IsTwoByteRepresentation()); 479 ASSERT(StringShape(*two_byte_subject).IsTwoByteRepresentation());
480 ASSERT(two_byte_subject->IsFlat(StringShape(*two_byte_subject))); 480 ASSERT(two_byte_subject->IsFlat(StringShape(*two_byte_subject)));
481 bool rc; 481 bool rc;
482 482
483 for (int i = (num_captures + 1) * 2 - 1; i >= 0; i--) { 483 for (int i = (num_captures + 1) * 2 - 1; i >= 0; i--) {
484 offsets_vector[i] = -1; 484 offsets_vector[i] = -1;
485 } 485 }
486 486
(...skipping 437 matching lines...) Expand 10 before | Expand all | Expand 10 after
924 is_case_independent_(ignore_case) { 924 is_case_independent_(ignore_case) {
925 accept_ = new EndNode(EndNode::ACCEPT); 925 accept_ = new EndNode(EndNode::ACCEPT);
926 backtrack_ = new EndNode(EndNode::BACKTRACK); 926 backtrack_ = new EndNode(EndNode::BACKTRACK);
927 } 927 }
928 928
929 929
930 Handle<FixedArray> RegExpCompiler::Assemble( 930 Handle<FixedArray> RegExpCompiler::Assemble(
931 RegExpMacroAssembler* macro_assembler, 931 RegExpMacroAssembler* macro_assembler,
932 RegExpNode* start, 932 RegExpNode* start,
933 int capture_count) { 933 int capture_count) {
934 if (!FLAG_attempt_case_independent && is_case_independent_) {
935 return Handle<FixedArray>::null();
936 }
937 macro_assembler_ = macro_assembler; 934 macro_assembler_ = macro_assembler;
938 List <RegExpNode*> work_list(0); 935 List <RegExpNode*> work_list(0);
939 work_list_ = &work_list; 936 work_list_ = &work_list;
940 Label fail; 937 Label fail;
941 macro_assembler->PushBacktrack(&fail); 938 macro_assembler->PushBacktrack(&fail);
942 if (!start->GoTo(this)) { 939 if (!start->GoTo(this)) {
943 fail.Unuse(); 940 fail.Unuse();
944 return Handle<FixedArray>::null(); 941 return Handle<FixedArray>::null();
945 } 942 }
946 while (!work_list.is_empty()) { 943 while (!work_list.is_empty()) {
(...skipping 402 matching lines...) Expand 10 before | Expand all | Expand 10 after
1349 cp_offset += elm.data.u_atom->data().length(); 1346 cp_offset += elm.data.u_atom->data().length();
1350 } 1347 }
1351 } 1348 }
1352 1349
1353 compiler->AddWork(on_failure_); 1350 compiler->AddWork(on_failure_);
1354 macro_assembler->AdvanceCurrentPosition(cp_offset); 1351 macro_assembler->AdvanceCurrentPosition(cp_offset);
1355 return on_success()->GoTo(compiler); 1352 return on_success()->GoTo(compiler);
1356 } 1353 }
1357 1354
1358 1355
1356 void TextNode::MakeCaseIndependent() {
1357 int element_count = elms_->length();
1358 for (int i = 0; i < element_count; i++) {
1359 TextElement elm = elms_->at(i);
1360 if (elm.type == TextElement::CHAR_CLASS) {
1361 RegExpCharacterClass* cc = elm.data.u_char_class;
1362 ZoneList<CharacterRange>* ranges = cc->ranges();
1363 int range_count = ranges->length();
1364 for (int i = 0; i < range_count; i++) {
1365 ranges->at(i).AddCaseEquivalents(ranges);
1366 }
1367 }
1368 }
1369 }
1370
1371
1359 bool ChoiceNode::Emit(RegExpCompiler* compiler) { 1372 bool ChoiceNode::Emit(RegExpCompiler* compiler) {
1360 int choice_count = alternatives_->length(); 1373 int choice_count = alternatives_->length();
1361 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); 1374 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
1362 Bind(macro_assembler); 1375 Bind(macro_assembler);
1363 // For now we just call all choices one after the other. The idea ultimately 1376 // For now we just call all choices one after the other. The idea ultimately
1364 // is to use the Dispatch table to try only the relevant ones. 1377 // is to use the Dispatch table to try only the relevant ones.
1365 for (int i = 0; i < choice_count - 1; i++) { 1378 for (int i = 0; i < choice_count - 1; i++) {
1366 GuardedAlternative alternative = alternatives_->at(i); 1379 GuardedAlternative alternative = alternatives_->at(i);
1367 Label after; 1380 Label after;
1368 Label after_no_pop_cp; 1381 Label after_no_pop_cp;
(...skipping 1021 matching lines...) Expand 10 before | Expand all | Expand 10 after
2390 that->info()->been_analyzed = true; 2403 that->info()->been_analyzed = true;
2391 } 2404 }
2392 2405
2393 2406
2394 void Analysis::VisitEnd(EndNode* that) { 2407 void Analysis::VisitEnd(EndNode* that) {
2395 // nothing to do 2408 // nothing to do
2396 } 2409 }
2397 2410
2398 2411
2399 void Analysis::VisitText(TextNode* that) { 2412 void Analysis::VisitText(TextNode* that) {
2413 if (case_independent_) {
2414 that->MakeCaseIndependent();
2415 }
2400 EnsureAnalyzed(that->on_success()); 2416 EnsureAnalyzed(that->on_success());
2401 EnsureAnalyzed(that->on_failure()); 2417 EnsureAnalyzed(that->on_failure());
2402 } 2418 }
2403 2419
2404 2420
2405 void Analysis::VisitAction(ActionNode* that) { 2421 void Analysis::VisitAction(ActionNode* that) {
2406 RegExpNode* next = that->on_success(); 2422 RegExpNode* next = that->on_success();
2407 EnsureAnalyzed(next); 2423 EnsureAnalyzed(next);
2408 that->info()->determine_newline = next->info()->prev_determine_newline(); 2424 that->info()->determine_newline = next->info()->prev_determine_newline();
2409 that->info()->determine_word = next->info()->prev_determine_word(); 2425 that->info()->determine_word = next->info()->prev_determine_word();
(...skipping 155 matching lines...) Expand 10 before | Expand all | Expand 10 after
2565 // since we don't even handle ^ yet I'm saving that optimization for 2581 // since we don't even handle ^ yet I'm saving that optimization for
2566 // later. 2582 // later.
2567 RegExpNode* node = RegExpQuantifier::ToNode(0, 2583 RegExpNode* node = RegExpQuantifier::ToNode(0,
2568 RegExpQuantifier::kInfinity, 2584 RegExpQuantifier::kInfinity,
2569 false, 2585 false,
2570 new RegExpCharacterClass('*'), 2586 new RegExpCharacterClass('*'),
2571 &compiler, 2587 &compiler,
2572 captured_body, 2588 captured_body,
2573 compiler.backtrack()); 2589 compiler.backtrack());
2574 if (node_return != NULL) *node_return = node; 2590 if (node_return != NULL) *node_return = node;
2575 Analysis analysis; 2591 Analysis analysis(ignore_case);
2576 analysis.EnsureAnalyzed(node); 2592 analysis.EnsureAnalyzed(node);
2577 2593
2578 if (!FLAG_irregexp) { 2594 if (!FLAG_irregexp) {
2579 return Handle<FixedArray>::null(); 2595 return Handle<FixedArray>::null();
2580 } 2596 }
2581 2597
2582 #if !(defined ARM || defined __arm__ || defined __thumb__) 2598 #if !(defined ARM || defined __arm__ || defined __thumb__)
2583 if (FLAG_irregexp_native) { // Flag only checked in IA32 mode. 2599 if (FLAG_irregexp_native) { // Flag only checked in IA32 mode.
2584 // TODO(lrn) Move compilation to a later point in the life-cycle 2600 // TODO(lrn) Move compilation to a later point in the life-cycle
2585 // of the RegExp. We don't know the type of input string yet. 2601 // of the RegExp. We don't know the type of input string yet.
2586 // For now, always assume two-byte strings. 2602 // For now, always assume two-byte strings.
2587 RegExpMacroAssemblerIA32 macro_assembler(RegExpMacroAssemblerIA32::UC16, 2603 RegExpMacroAssemblerIA32 macro_assembler(RegExpMacroAssemblerIA32::UC16,
2588 (input->capture_count + 1) * 2); 2604 (input->capture_count + 1) * 2);
2589 return compiler.Assemble(&macro_assembler, 2605 return compiler.Assemble(&macro_assembler,
2590 node, 2606 node,
2591 input->capture_count); 2607 input->capture_count);
2592 } 2608 }
2593 #endif 2609 #endif
2594 byte codes[1024]; 2610 byte codes[1024];
2595 IrregexpAssembler assembler(Vector<byte>(codes, 1024)); 2611 IrregexpAssembler assembler(Vector<byte>(codes, 1024));
2596 RegExpMacroAssemblerIrregexp macro_assembler(&assembler); 2612 RegExpMacroAssemblerIrregexp macro_assembler(&assembler);
2597 return compiler.Assemble(&macro_assembler, 2613 return compiler.Assemble(&macro_assembler,
2598 node, 2614 node,
2599 input->capture_count); 2615 input->capture_count);
2600 } 2616 }
2601 2617
2602 2618
2603 }} // namespace v8::internal 2619 }} // namespace v8::internal
OLDNEW
« src/jsregexp.h ('K') | « src/jsregexp.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698