Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(189)

Side by Side Diff: src/jsregexp.cc

Issue 16410: Some irregexp optimizations around keeping track of when the current characte... (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: Created 12 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/jsregexp.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 1405 matching lines...) Expand 10 before | Expand all | Expand 10 after
1416 // This is called as we come into a loop choice node and some other tricky 1416 // This is called as we come into a loop choice node and some other tricky
1417 // nodes. It normalises the state of the code generator to ensure we can 1417 // nodes. It normalises the state of the code generator to ensure we can
1418 // generate generic code. 1418 // generate generic code.
1419 bool GenerationVariant::Flush(RegExpCompiler* compiler, RegExpNode* successor) { 1419 bool GenerationVariant::Flush(RegExpCompiler* compiler, RegExpNode* successor) {
1420 RegExpMacroAssembler* assembler = compiler->macro_assembler(); 1420 RegExpMacroAssembler* assembler = compiler->macro_assembler();
1421 1421
1422 ASSERT(actions_ != NULL || 1422 ASSERT(actions_ != NULL ||
1423 cp_offset_ != 0 || 1423 cp_offset_ != 0 ||
1424 backtrack() != NULL || 1424 backtrack() != NULL ||
1425 characters_preloaded_ != 0 || 1425 characters_preloaded_ != 0 ||
1426 quick_check_performed_.characters() != 0); 1426 quick_check_performed_.characters() != 0 ||
1427 bound_checked_up_to_ != 0);
1427 1428
1428 if (actions_ == NULL && backtrack() == NULL) { 1429 if (actions_ == NULL && backtrack() == NULL) {
1429 // Here we just have some deferred cp advances to fix and we are back to 1430 // Here we just have some deferred cp advances to fix and we are back to
1430 // a normal situation. We may also have to forget some information gained 1431 // a normal situation. We may also have to forget some information gained
1431 // through a quick check that was already performed. 1432 // through a quick check that was already performed.
1432 if (cp_offset_ != 0) assembler->AdvanceCurrentPosition(cp_offset_); 1433 if (cp_offset_ != 0) assembler->AdvanceCurrentPosition(cp_offset_);
1433 // Create a new trivial state and generate the node with that. 1434 // Create a new trivial state and generate the node with that.
1434 GenerationVariant new_state; 1435 GenerationVariant new_state;
1435 return successor->Emit(compiler, &new_state); 1436 return successor->Emit(compiler, &new_state);
1436 } 1437 }
(...skipping 203 matching lines...) Expand 10 before | Expand all | Expand 10 after
1640 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); 1641 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
1641 checked = check; 1642 checked = check;
1642 } 1643 }
1643 macro_assembler->CheckNotCharacter(c, on_failure); 1644 macro_assembler->CheckNotCharacter(c, on_failure);
1644 } 1645 }
1645 return checked; 1646 return checked;
1646 } 1647 }
1647 1648
1648 1649
1649 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler, 1650 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,
1651 bool ascii,
1650 uc16 c1, 1652 uc16 c1,
1651 uc16 c2, 1653 uc16 c2,
1652 Label* on_failure) { 1654 Label* on_failure) {
1655 uc16 char_mask;
1656 if (ascii) {
1657 char_mask = String::kMaxAsciiCharCode;
1658 } else {
1659 char_mask = String::kMaxUC16CharCode;
1660 }
1653 uc16 exor = c1 ^ c2; 1661 uc16 exor = c1 ^ c2;
1654 // Check whether exor has only one bit set. 1662 // Check whether exor has only one bit set.
1655 if (((exor - 1) & exor) == 0) { 1663 if (((exor - 1) & exor) == 0) {
1656 // If c1 and c2 differ only by one bit. 1664 // If c1 and c2 differ only by one bit.
1657 // Ecma262UnCanonicalize always gives the highest number last. 1665 // Ecma262UnCanonicalize always gives the highest number last.
1658 ASSERT(c2 > c1); 1666 ASSERT(c2 > c1);
1659 uc16 mask = String::kMaxUC16CharCode ^ exor; 1667 uc16 mask = char_mask ^ exor;
1660 macro_assembler->CheckNotCharacterAfterAnd(c1, mask, on_failure); 1668 macro_assembler->CheckNotCharacterAfterAnd(c1, mask, on_failure);
1661 return true; 1669 return true;
1662 } 1670 }
1663 ASSERT(c2 > c1); 1671 ASSERT(c2 > c1);
1664 uc16 diff = c2 - c1; 1672 uc16 diff = c2 - c1;
1665 if (((diff - 1) & diff) == 0 && c1 >= diff) { 1673 if (((diff - 1) & diff) == 0 && c1 >= diff) {
1666 // If the characters differ by 2^n but don't differ by one bit then 1674 // If the characters differ by 2^n but don't differ by one bit then
1667 // subtract the difference from the found character, then do the or 1675 // subtract the difference from the found character, then do the or
1668 // trick. We avoid the theoretical case where negative numbers are 1676 // trick. We avoid the theoretical case where negative numbers are
1669 // involved in order to simplify code generation. 1677 // involved in order to simplify code generation.
1670 uc16 mask = String::kMaxUC16CharCode ^ diff; 1678 uc16 mask = char_mask ^ diff;
1671 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff, 1679 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff,
1672 diff, 1680 diff,
1673 mask, 1681 mask,
1674 on_failure); 1682 on_failure);
1675 return true; 1683 return true;
1676 } 1684 }
1677 return false; 1685 return false;
1678 } 1686 }
1679 1687
1680 1688
1681 // Only emits letters (things that have case). Only used for case independent 1689 // Only emits letters (things that have case). Only used for case independent
1682 // matches. 1690 // matches.
1683 static inline bool EmitAtomLetter( 1691 static inline bool EmitAtomLetter(
1684 RegExpMacroAssembler* macro_assembler, 1692 RegExpMacroAssembler* macro_assembler,
1693 bool ascii,
1685 uc16 c, 1694 uc16 c,
1686 Label* on_failure, 1695 Label* on_failure,
1687 int cp_offset, 1696 int cp_offset,
1688 bool check, 1697 bool check,
1689 bool preloaded) { 1698 bool preloaded) {
1690 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 1699 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1691 int length = uncanonicalize.get(c, '\0', chars); 1700 int length = uncanonicalize.get(c, '\0', chars);
1692 if (length <= 1) return false; 1701 if (length <= 1) return false;
1693 // We may not need to check against the end of the input string 1702 // We may not need to check against the end of the input string
1694 // if this character lies before a character that matched. 1703 // if this character lies before a character that matched.
1695 if (!preloaded) { 1704 if (!preloaded) {
1696 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); 1705 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
1697 } 1706 }
1698 Label ok; 1707 Label ok;
1699 ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4); 1708 ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4);
1700 switch (length) { 1709 switch (length) {
1701 case 2: { 1710 case 2: {
1702 if (ShortCutEmitCharacterPair(macro_assembler, 1711 if (ShortCutEmitCharacterPair(macro_assembler,
1712 ascii,
1703 chars[0], 1713 chars[0],
1704 chars[1], 1714 chars[1],
1705 on_failure)) { 1715 on_failure)) {
1706 } else { 1716 } else {
1707 macro_assembler->CheckCharacter(chars[0], &ok); 1717 macro_assembler->CheckCharacter(chars[0], &ok);
1708 macro_assembler->CheckNotCharacter(chars[1], on_failure); 1718 macro_assembler->CheckNotCharacter(chars[1], on_failure);
1709 macro_assembler->Bind(&ok); 1719 macro_assembler->Bind(&ok);
1710 } 1720 }
1711 break; 1721 break;
1712 } 1722 }
(...skipping 287 matching lines...) Expand 10 before | Expand all | Expand 10 after
2000 if (details->characters() == 1) { 2010 if (details->characters() == 1) {
2001 // If number of characters preloaded is 1 then we used a byte or 16 bit 2011 // If number of characters preloaded is 1 then we used a byte or 16 bit
2002 // load so the value is already masked down. 2012 // load so the value is already masked down.
2003 uint32_t char_mask; 2013 uint32_t char_mask;
2004 if (compiler->ascii()) { 2014 if (compiler->ascii()) {
2005 char_mask = String::kMaxAsciiCharCode; 2015 char_mask = String::kMaxAsciiCharCode;
2006 } else { 2016 } else {
2007 char_mask = String::kMaxUC16CharCode; 2017 char_mask = String::kMaxUC16CharCode;
2008 } 2018 }
2009 if ((mask & char_mask) == char_mask) need_mask = false; 2019 if ((mask & char_mask) == char_mask) need_mask = false;
2020 mask &= char_mask;
2010 } else { 2021 } else {
2011 // For 2-character preloads in ASCII mode we also use a 16 bit load with 2022 // For 2-character preloads in ASCII mode we also use a 16 bit load with
2012 // zero extend. 2023 // zero extend.
2013 if (details->characters() == 2 && compiler->ascii()) { 2024 if (details->characters() == 2 && compiler->ascii()) {
2014 if ((mask & 0xffff) == 0xffff) need_mask = false; 2025 if ((mask & 0xffff) == 0xffff) need_mask = false;
2015 } else { 2026 } else {
2016 if (mask == 0xffffffff) need_mask = false; 2027 if (mask == 0xffffffff) need_mask = false;
2017 } 2028 }
2018 } 2029 }
2019 2030
(...skipping 296 matching lines...) Expand 10 before | Expand all | Expand 10 after
2316 assembler->LoadCurrentCharacter(cp_offset + j, 2327 assembler->LoadCurrentCharacter(cp_offset + j,
2317 backtrack, 2328 backtrack,
2318 *checked_up_to < cp_offset + j); 2329 *checked_up_to < cp_offset + j);
2319 } 2330 }
2320 assembler->CheckNotCharacter(quarks[j], backtrack); 2331 assembler->CheckNotCharacter(quarks[j], backtrack);
2321 } 2332 }
2322 } else { 2333 } else {
2323 ASSERT_EQ(pass, CASE_CHARACTER_MATCH); 2334 ASSERT_EQ(pass, CASE_CHARACTER_MATCH);
2324 ASSERT(compiler->ignore_case()); 2335 ASSERT(compiler->ignore_case());
2325 bound_checked = EmitAtomLetter(assembler, 2336 bound_checked = EmitAtomLetter(assembler,
2337 compiler->ascii(),
2326 quarks[j], 2338 quarks[j],
2327 backtrack, 2339 backtrack,
2328 cp_offset + j, 2340 cp_offset + j,
2329 *checked_up_to < cp_offset + j, 2341 *checked_up_to < cp_offset + j,
2330 preloaded); 2342 preloaded);
2331 } 2343 }
2332 if (pass != NON_ASCII_MATCH && bound_checked) { 2344 if (pass != NON_ASCII_MATCH && bound_checked) {
2333 if (cp_offset + j > *checked_up_to) { 2345 if (cp_offset + j > *checked_up_to) {
2334 *checked_up_to = cp_offset + j; 2346 *checked_up_to = cp_offset + j;
2335 } 2347 }
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after
2396 return true; 2408 return true;
2397 } 2409 }
2398 2410
2399 if (compiler->ascii()) { 2411 if (compiler->ascii()) {
2400 int dummy = 0; 2412 int dummy = 0;
2401 TextEmitPass(compiler, NON_ASCII_MATCH, false, variant, false, &dummy); 2413 TextEmitPass(compiler, NON_ASCII_MATCH, false, variant, false, &dummy);
2402 } 2414 }
2403 2415
2404 bool first_elt_done = false; 2416 bool first_elt_done = false;
2405 int bound_checked_to = variant->cp_offset() - 1; 2417 int bound_checked_to = variant->cp_offset() - 1;
2406 QuickCheckDetails* quick_check = variant->quick_check_performed(); 2418 bound_checked_to += variant->bound_checked_up_to();
2407 bound_checked_to += Max(quick_check->characters(),
2408 variant->characters_preloaded());
2409 2419
2410 // If a character is preloaded into the current character register then 2420 // If a character is preloaded into the current character register then
2411 // check that now. 2421 // check that now.
2412 if (variant->characters_preloaded() == 1) { 2422 if (variant->characters_preloaded() == 1) {
2413 TextEmitPass(compiler, 2423 TextEmitPass(compiler,
2414 CHARACTER_MATCH, 2424 CHARACTER_MATCH,
2415 true, 2425 true,
2416 variant, 2426 variant,
2417 false, 2427 false,
2418 &bound_checked_to); 2428 &bound_checked_to);
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after
2465 ASSERT(by > 0); 2475 ASSERT(by > 0);
2466 // We don't have an instruction for shifting the current character register 2476 // We don't have an instruction for shifting the current character register
2467 // down or for using a shifted value for anything so lets just forget that 2477 // down or for using a shifted value for anything so lets just forget that
2468 // we preloaded any characters into it. 2478 // we preloaded any characters into it.
2469 characters_preloaded_ = 0; 2479 characters_preloaded_ = 0;
2470 // Adjust the offsets of the quick check performed information. This 2480 // Adjust the offsets of the quick check performed information. This
2471 // information is used to find out what we already determined about the 2481 // information is used to find out what we already determined about the
2472 // characters by means of mask and compare. 2482 // characters by means of mask and compare.
2473 quick_check_performed_.Advance(by, ascii); 2483 quick_check_performed_.Advance(by, ascii);
2474 cp_offset_ += by; 2484 cp_offset_ += by;
2485 bound_checked_up_to_ = Max(0, bound_checked_up_to_ - by);
2475 } 2486 }
2476 2487
2477 2488
2478 void TextNode::MakeCaseIndependent() { 2489 void TextNode::MakeCaseIndependent() {
2479 int element_count = elms_->length(); 2490 int element_count = elms_->length();
2480 for (int i = 0; i < element_count; i++) { 2491 for (int i = 0; i < element_count; i++) {
2481 TextElement elm = elms_->at(i); 2492 TextElement elm = elms_->at(i);
2482 if (elm.type == TextElement::CHAR_CLASS) { 2493 if (elm.type == TextElement::CHAR_CLASS) {
2483 RegExpCharacterClass* cc = elm.data.u_char_class; 2494 RegExpCharacterClass* cc = elm.data.u_char_class;
2484 ZoneList<CharacterRange>* ranges = cc->ranges(); 2495 ZoneList<CharacterRange>* ranges = cc->ranges();
(...skipping 287 matching lines...) Expand 10 before | Expand all | Expand 10 after
2772 return false; 2783 return false;
2773 } 2784 }
2774 } 2785 }
2775 2786
2776 Label second_choice; // For use in greedy matches. 2787 Label second_choice; // For use in greedy matches.
2777 macro_assembler->Bind(&second_choice); 2788 macro_assembler->Bind(&second_choice);
2778 2789
2779 int first_normal_choice = greedy_loop ? 1 : 0; 2790 int first_normal_choice = greedy_loop ? 1 : 0;
2780 2791
2781 int preload_characters = CalculatePreloadCharacters(compiler); 2792 int preload_characters = CalculatePreloadCharacters(compiler);
2782 bool preload_is_current = false; 2793 bool preload_is_current =
2783 bool preload_has_checked_bounds = false; 2794 (current_variant->characters_preloaded() == preload_characters);
2795 bool preload_has_checked_bounds = preload_is_current;
2784 2796
2785 AlternativeGenerationList alt_gens(choice_count); 2797 AlternativeGenerationList alt_gens(choice_count);
2786 2798
2787 // For now we just call all choices one after the other. The idea ultimately 2799 // For now we just call all choices one after the other. The idea ultimately
2788 // is to use the Dispatch table to try only the relevant ones. 2800 // is to use the Dispatch table to try only the relevant ones.
2789 for (int i = first_normal_choice; i < choice_count; i++) { 2801 for (int i = first_normal_choice; i < choice_count; i++) {
2790 GuardedAlternative alternative = alternatives_->at(i); 2802 GuardedAlternative alternative = alternatives_->at(i);
2791 AlternativeGeneration* alt_gen(alt_gens.at(i)); 2803 AlternativeGeneration* alt_gen(alt_gens.at(i));
2792 alt_gen->quick_check_details.set_characters(preload_characters); 2804 alt_gen->quick_check_details.set_characters(preload_characters);
2793 ZoneList<Guard*>* guards = alternative.guards(); 2805 ZoneList<Guard*>* guards = alternative.guards();
2794 int guard_count = (guards == NULL) ? 0 : guards->length(); 2806 int guard_count = (guards == NULL) ? 0 : guards->length();
2795
2796 GenerationVariant new_variant(*current_variant); 2807 GenerationVariant new_variant(*current_variant);
2797 new_variant.set_characters_preloaded(preload_is_current ? 2808 new_variant.set_characters_preloaded(preload_is_current ?
2798 preload_characters : 2809 preload_characters :
2799 0); 2810 0);
2811 if (preload_has_checked_bounds) {
2812 new_variant.set_bound_checked_up_to(preload_characters);
2813 }
2800 new_variant.quick_check_performed()->Clear(); 2814 new_variant.quick_check_performed()->Clear();
2801 alt_gen->expects_preload = preload_is_current; 2815 alt_gen->expects_preload = preload_is_current;
2802 bool generate_full_check_inline = false; 2816 bool generate_full_check_inline = false;
2803 if (alternative.node()->EmitQuickCheck(compiler, 2817 if (alternative.node()->EmitQuickCheck(compiler,
2804 &new_variant, 2818 &new_variant,
2805 preload_has_checked_bounds, 2819 preload_has_checked_bounds,
2806 &alt_gen->possible_success, 2820 &alt_gen->possible_success,
2807 &alt_gen->quick_check_details, 2821 &alt_gen->quick_check_details,
2808 i < choice_count - 1)) { 2822 i < choice_count - 1)) {
2809 // Quick check was generated for this choice. 2823 // Quick check was generated for this choice.
2810 preload_is_current = true; 2824 preload_is_current = true;
2811 preload_has_checked_bounds = true; 2825 preload_has_checked_bounds = true;
2812 // On the last choice in the ChoiceNode we generated the quick 2826 // On the last choice in the ChoiceNode we generated the quick
2813 // check to fall through on possible success. So now we need to 2827 // check to fall through on possible success. So now we need to
2814 // generate the full check inline. 2828 // generate the full check inline.
2815 if (i == choice_count - 1) { 2829 if (i == choice_count - 1) {
2816 macro_assembler->Bind(&alt_gen->possible_success); 2830 macro_assembler->Bind(&alt_gen->possible_success);
2817 new_variant.set_quick_check_performed(&alt_gen->quick_check_details); 2831 new_variant.set_quick_check_performed(&alt_gen->quick_check_details);
2818 new_variant.set_characters_preloaded(preload_characters); 2832 new_variant.set_characters_preloaded(preload_characters);
2833 new_variant.set_bound_checked_up_to(preload_characters);
2819 generate_full_check_inline = true; 2834 generate_full_check_inline = true;
2820 } 2835 }
2821 } else { 2836 } else {
2822 // No quick check was generated. Put the full code here. 2837 // No quick check was generated. Put the full code here.
2838 // If this is not the first choice then there could be slow checks from
2839 // previous cases that go here when they fail. There's no reason to
2840 // insist that they preload characters since the slow check we are about
2841 // to generate probably can't use it.
2842 if (i != first_normal_choice) {
2843 alt_gen->expects_preload = false;
2844 new_variant.set_characters_preloaded(0);
2845 }
2823 if (i < choice_count - 1) { 2846 if (i < choice_count - 1) {
2824 new_variant.set_backtrack(&alt_gen->after); 2847 new_variant.set_backtrack(&alt_gen->after);
2825 } 2848 }
2826 generate_full_check_inline = true; 2849 generate_full_check_inline = true;
2827 } 2850 }
2828 if (generate_full_check_inline) { 2851 if (generate_full_check_inline) {
2829 if (preload_is_current) {
2830 new_variant.set_characters_preloaded(preload_characters);
2831 }
2832 for (int j = 0; j < guard_count; j++) { 2852 for (int j = 0; j < guard_count; j++) {
2833 GenerateGuard(macro_assembler, guards->at(j), &new_variant); 2853 GenerateGuard(macro_assembler, guards->at(j), &new_variant);
2834 } 2854 }
2835 if (!alternative.node()->Emit(compiler, &new_variant)) { 2855 if (!alternative.node()->Emit(compiler, &new_variant)) {
2836 greedy_loop_label.Unuse(); 2856 greedy_loop_label.Unuse();
2837 return false; 2857 return false;
2838 } 2858 }
2839 preload_is_current = false; 2859 preload_is_current = false;
2840 } 2860 }
2841 macro_assembler->Bind(&alt_gen->after); 2861 macro_assembler->Bind(&alt_gen->after);
(...skipping 1453 matching lines...) Expand 10 before | Expand all | Expand 10 after
4295 EmbeddedVector<byte, 1024> codes; 4315 EmbeddedVector<byte, 1024> codes;
4296 RegExpMacroAssemblerIrregexp macro_assembler(codes); 4316 RegExpMacroAssemblerIrregexp macro_assembler(codes);
4297 return compiler.Assemble(&macro_assembler, 4317 return compiler.Assemble(&macro_assembler,
4298 node, 4318 node,
4299 data->capture_count, 4319 data->capture_count,
4300 pattern); 4320 pattern);
4301 } 4321 }
4302 4322
4303 4323
4304 }} // namespace v8::internal 4324 }} // namespace v8::internal
OLDNEW
« no previous file with comments | « src/jsregexp.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698