| Index: src/jsregexp.cc
|
| ===================================================================
|
| --- src/jsregexp.cc (revision 1004)
|
| +++ src/jsregexp.cc (working copy)
|
| @@ -214,25 +214,15 @@
|
| vector_ = static_offsets_vector_;
|
| }
|
| }
|
| -
|
| -
|
| inline ~OffsetsVector() {
|
| if (offsets_vector_length_ > kStaticOffsetsVectorSize) {
|
| DeleteArray(vector_);
|
| vector_ = NULL;
|
| }
|
| }
|
| + inline int* vector() { return vector_; }
|
| + inline int length() { return offsets_vector_length_; }
|
|
|
| -
|
| - inline int* vector() {
|
| - return vector_;
|
| - }
|
| -
|
| -
|
| - inline int length() {
|
| - return offsets_vector_length_;
|
| - }
|
| -
|
| private:
|
| int* vector_;
|
| int offsets_vector_length_;
|
| @@ -803,6 +793,11 @@
|
| }
|
| #endif
|
| LOG(RegExpExecEvent(regexp, previous_index, subject));
|
| +
|
| + if (!subject->IsFlat(StringShape(*subject))) {
|
| + FlattenString(subject);
|
| + }
|
| +
|
| return IrregexpExecOnce(irregexp,
|
| num_captures,
|
| subject,
|
| @@ -837,11 +832,12 @@
|
| subject->Flatten(shape);
|
| }
|
|
|
| - do {
|
| + while (true) {
|
| if (previous_index > subject->length() || previous_index < 0) {
|
| // Per ECMA-262 15.10.6.2, if the previous index is greater than the
|
| // string length, there is no match.
|
| matches = Factory::null_value();
|
| + return result;
|
| } else {
|
| #ifdef DEBUG
|
| if (FLAG_trace_regexp_bytecodes) {
|
| @@ -865,17 +861,12 @@
|
| if (offsets.vector()[0] == offsets.vector()[1]) {
|
| previous_index++;
|
| }
|
| + } else if (matches->IsNull()) {
|
| + return result;
|
| + } else {
|
| + return matches;
|
| }
|
| }
|
| - } while (matches->IsJSArray());
|
| -
|
| - // If we exited the loop with an exception, throw it.
|
| - if (matches->IsNull()) {
|
| - // Exited loop normally.
|
| - return result;
|
| - } else {
|
| - // Exited loop with the exception in matches.
|
| - return matches;
|
| }
|
| }
|
|
|
| @@ -886,14 +877,11 @@
|
| int previous_index,
|
| int* offsets_vector,
|
| int offsets_vector_length) {
|
| + ASSERT(subject->IsFlat(StringShape(*subject)));
|
| bool rc;
|
|
|
| int tag = Smi::cast(irregexp->get(kIrregexpImplementationIndex))->value();
|
|
|
| - if (!subject->IsFlat(StringShape(*subject))) {
|
| - FlattenString(subject);
|
| - }
|
| -
|
| switch (tag) {
|
| case RegExpMacroAssembler::kIA32Implementation: {
|
| #ifndef ARM
|
| @@ -997,9 +985,9 @@
|
|
|
| Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1));
|
| // The captures come in (start, end+1) pairs.
|
| - for (int i = 0; i < 2 * (num_captures+1); i += 2) {
|
| + for (int i = 0; i < 2 * (num_captures + 1); i += 2) {
|
| array->set(i, Smi::FromInt(offsets_vector[i]));
|
| - array->set(i+1, Smi::FromInt(offsets_vector[i+1]));
|
| + array->set(i + 1, Smi::FromInt(offsets_vector[i + 1]));
|
| }
|
| return Factory::NewJSArrayWithElements(array);
|
| }
|
| @@ -1344,25 +1332,26 @@
|
| }
|
|
|
|
|
| -void GenerationVariant::PushAffectedRegisters(RegExpMacroAssembler* macro,
|
| +void GenerationVariant::PushAffectedRegisters(RegExpMacroAssembler* assembler,
|
| int max_register,
|
| OutSet& affected_registers) {
|
| for (int reg = 0; reg <= max_register; reg++) {
|
| - if (affected_registers.Get(reg)) macro->PushRegister(reg);
|
| + if (affected_registers.Get(reg)) assembler->PushRegister(reg);
|
| }
|
| }
|
|
|
|
|
| -void GenerationVariant::RestoreAffectedRegisters(RegExpMacroAssembler* macro,
|
| - int max_register,
|
| - OutSet& affected_registers) {
|
| +void GenerationVariant::RestoreAffectedRegisters(
|
| + RegExpMacroAssembler* assembler,
|
| + int max_register,
|
| + OutSet& affected_registers) {
|
| for (int reg = max_register; reg >= 0; reg--) {
|
| - if (affected_registers.Get(reg)) macro->PopRegister(reg);
|
| + if (affected_registers.Get(reg)) assembler->PopRegister(reg);
|
| }
|
| }
|
|
|
|
|
| -void GenerationVariant::PerformDeferredActions(RegExpMacroAssembler* macro,
|
| +void GenerationVariant::PerformDeferredActions(RegExpMacroAssembler* assembler,
|
| int max_register,
|
| OutSet& affected_registers) {
|
| for (int reg = 0; reg <= max_register; reg++) {
|
| @@ -1410,13 +1399,13 @@
|
| }
|
| }
|
| if (store_position != -1) {
|
| - macro->WriteCurrentPositionToRegister(reg, store_position);
|
| + assembler->WriteCurrentPositionToRegister(reg, store_position);
|
| } else {
|
| if (absolute) {
|
| - macro->SetRegister(reg, value);
|
| + assembler->SetRegister(reg, value);
|
| } else {
|
| if (value != 0) {
|
| - macro->AdvanceRegister(reg, value);
|
| + assembler->AdvanceRegister(reg, value);
|
| }
|
| }
|
| }
|
| @@ -1428,14 +1417,19 @@
|
| // nodes. It normalises the state of the code generator to ensure we can
|
| // generate generic code.
|
| bool GenerationVariant::Flush(RegExpCompiler* compiler, RegExpNode* successor) {
|
| - RegExpMacroAssembler* macro = compiler->macro_assembler();
|
| + RegExpMacroAssembler* assembler = compiler->macro_assembler();
|
|
|
| - ASSERT(actions_ != NULL || cp_offset_ != 0 || backtrack() != NULL);
|
| + ASSERT(actions_ != NULL ||
|
| + cp_offset_ != 0 ||
|
| + backtrack() != NULL ||
|
| + characters_preloaded_ != 0 ||
|
| + quick_check_performed_.characters() != 0);
|
|
|
| if (actions_ == NULL && backtrack() == NULL) {
|
| // Here we just have some deferred cp advances to fix and we are back to
|
| - // a normal situation.
|
| - macro->AdvanceCurrentPosition(cp_offset_);
|
| + // a normal situation. We may also have to forget some information gained
|
| + // through a quick check that was already performed.
|
| + if (cp_offset_ != 0) assembler->AdvanceCurrentPosition(cp_offset_);
|
| // Create a new trivial state and generate the node with that.
|
| GenerationVariant new_state;
|
| return successor->Emit(compiler, &new_state);
|
| @@ -1444,50 +1438,50 @@
|
| // Generate deferred actions here along with code to undo them again.
|
| OutSet affected_registers;
|
| int max_register = FindAffectedRegisters(&affected_registers);
|
| - PushAffectedRegisters(macro, max_register, affected_registers);
|
| - PerformDeferredActions(macro, max_register, affected_registers);
|
| + PushAffectedRegisters(assembler, max_register, affected_registers);
|
| + PerformDeferredActions(assembler, max_register, affected_registers);
|
| if (backtrack() != NULL) {
|
| // Here we have a concrete backtrack location. These are set up by choice
|
| // nodes and so they indicate that we have a deferred save of the current
|
| // position which we may need to emit here.
|
| - macro->PushCurrentPosition();
|
| + assembler->PushCurrentPosition();
|
| }
|
| if (cp_offset_ != 0) {
|
| - macro->AdvanceCurrentPosition(cp_offset_);
|
| + assembler->AdvanceCurrentPosition(cp_offset_);
|
| }
|
|
|
| // Create a new trivial state and generate the node with that.
|
| Label undo;
|
| - macro->PushBacktrack(&undo);
|
| + assembler->PushBacktrack(&undo);
|
| GenerationVariant new_state;
|
| bool ok = successor->Emit(compiler, &new_state);
|
|
|
| // On backtrack we need to restore state.
|
| - macro->Bind(&undo);
|
| + assembler->Bind(&undo);
|
| if (!ok) return false;
|
| if (backtrack() != NULL) {
|
| - macro->PopCurrentPosition();
|
| + assembler->PopCurrentPosition();
|
| }
|
| - RestoreAffectedRegisters(macro, max_register, affected_registers);
|
| + RestoreAffectedRegisters(assembler, max_register, affected_registers);
|
| if (backtrack() == NULL) {
|
| - macro->Backtrack();
|
| + assembler->Backtrack();
|
| } else {
|
| - macro->GoTo(backtrack());
|
| + assembler->GoTo(backtrack());
|
| }
|
|
|
| return true;
|
| }
|
|
|
|
|
| -void EndNode::EmitInfoChecks(RegExpMacroAssembler* macro,
|
| +void EndNode::EmitInfoChecks(RegExpMacroAssembler* assembler,
|
| GenerationVariant* variant) {
|
| if (info()->at_end) {
|
| Label succeed;
|
| // LoadCurrentCharacter will go to the label if we are at the end of the
|
| // input string.
|
| - macro->LoadCurrentCharacter(0, &succeed);
|
| - macro->GoTo(variant->backtrack());
|
| - macro->Bind(&succeed);
|
| + assembler->LoadCurrentCharacter(0, &succeed);
|
| + assembler->GoTo(variant->backtrack());
|
| + assembler->Bind(&succeed);
|
| }
|
| }
|
|
|
| @@ -1497,16 +1491,16 @@
|
| if (!variant->is_trivial()) {
|
| return variant->Flush(compiler, this);
|
| }
|
| - RegExpMacroAssembler* macro = compiler->macro_assembler();
|
| + RegExpMacroAssembler* assembler = compiler->macro_assembler();
|
| if (!label()->is_bound()) {
|
| - macro->Bind(label());
|
| + assembler->Bind(label());
|
| }
|
| - EmitInfoChecks(macro, variant);
|
| - macro->ReadCurrentPositionFromRegister(current_position_register_);
|
| - macro->ReadStackPointerFromRegister(stack_pointer_register_);
|
| + EmitInfoChecks(assembler, variant);
|
| + assembler->ReadCurrentPositionFromRegister(current_position_register_);
|
| + assembler->ReadStackPointerFromRegister(stack_pointer_register_);
|
| // Now that we have unwound the stack we find at the top of the stack the
|
| // backtrack that the BeginSubmatch node got.
|
| - macro->Backtrack();
|
| + assembler->Backtrack();
|
| return true;
|
| }
|
|
|
| @@ -1515,18 +1509,18 @@
|
| if (!variant->is_trivial()) {
|
| return variant->Flush(compiler, this);
|
| }
|
| - RegExpMacroAssembler* macro = compiler->macro_assembler();
|
| + RegExpMacroAssembler* assembler = compiler->macro_assembler();
|
| if (!label()->is_bound()) {
|
| - macro->Bind(label());
|
| + assembler->Bind(label());
|
| }
|
| switch (action_) {
|
| case ACCEPT:
|
| - EmitInfoChecks(macro, variant);
|
| - macro->Succeed();
|
| + EmitInfoChecks(assembler, variant);
|
| + assembler->Succeed();
|
| return true;
|
| case BACKTRACK:
|
| ASSERT(!info()->at_end);
|
| - macro->GoTo(variant->backtrack());
|
| + assembler->GoTo(variant->backtrack());
|
| return true;
|
| case NEGATIVE_SUBMATCH_SUCCESS:
|
| // This case is handled in a different virtual method.
|
| @@ -1629,30 +1623,26 @@
|
| static unibrow::Mapping<unibrow::CanonicalizationRange> canonrange;
|
|
|
|
|
| -static inline void EmitAtomNonLetters(
|
| +// Only emits non-letters (things that don't have case). Only used for case
|
| +// independent matches.
|
| +static inline bool EmitAtomNonLetter(
|
| RegExpMacroAssembler* macro_assembler,
|
| - TextElement elm,
|
| - Vector<const uc16> quarks,
|
| + uc16 c,
|
| Label* on_failure,
|
| int cp_offset,
|
| - bool check_offset) {
|
| + bool check,
|
| + bool preloaded) {
|
| unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
|
| - // It is vital that this loop is backwards due to the unchecked character
|
| - // load below.
|
| - for (int i = quarks.length() - 1; i >= 0; i--) {
|
| - uc16 c = quarks[i];
|
| - int length = uncanonicalize.get(c, '\0', chars);
|
| - if (length <= 1) {
|
| - if (check_offset && i == quarks.length() - 1) {
|
| - macro_assembler->LoadCurrentCharacter(cp_offset + i, on_failure);
|
| - } else {
|
| - // Here we don't need to check against the end of the input string
|
| - // since this character lies before a character that matched.
|
| - macro_assembler->LoadCurrentCharacterUnchecked(cp_offset + i);
|
| - }
|
| - macro_assembler->CheckNotCharacter(c, on_failure);
|
| + int length = uncanonicalize.get(c, '\0', chars);
|
| + bool checked = false;
|
| + if (length <= 1) {
|
| + if (!preloaded) {
|
| + macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
|
| + checked = check;
|
| }
|
| + macro_assembler->CheckNotCharacter(c, on_failure);
|
| }
|
| + return checked;
|
| }
|
|
|
|
|
| @@ -1666,7 +1656,8 @@
|
| // If c1 and c2 differ only by one bit.
|
| // Ecma262UnCanonicalize always gives the highest number last.
|
| ASSERT(c2 > c1);
|
| - macro_assembler->CheckNotCharacterAfterOr(c2, exor, on_failure);
|
| + uc16 mask = String::kMaxUC16CharCode ^ exor;
|
| + macro_assembler->CheckNotCharacterAfterAnd(c1, mask, on_failure);
|
| return true;
|
| }
|
| ASSERT(c2 > c1);
|
| @@ -1676,65 +1667,63 @@
|
| // subtract the difference from the found character, then do the or
|
| // trick. We avoid the theoretical case where negative numbers are
|
| // involved in order to simplify code generation.
|
| - macro_assembler->CheckNotCharacterAfterMinusOr(c2 - diff,
|
| - diff,
|
| - on_failure);
|
| + uc16 mask = String::kMaxUC16CharCode ^ diff;
|
| + macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff,
|
| + diff,
|
| + mask,
|
| + on_failure);
|
| return true;
|
| }
|
| return false;
|
| }
|
|
|
|
|
| -static inline void EmitAtomLetters(
|
| +// Only emits letters (things that have case). Only used for case independent
|
| +// matches.
|
| +static inline bool EmitAtomLetter(
|
| RegExpMacroAssembler* macro_assembler,
|
| - TextElement elm,
|
| - Vector<const uc16> quarks,
|
| + uc16 c,
|
| Label* on_failure,
|
| int cp_offset,
|
| - bool check_offset) {
|
| + bool check,
|
| + bool preloaded) {
|
| unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
|
| - // It is vital that this loop is backwards due to the unchecked character
|
| - // load below.
|
| - for (int i = quarks.length() - 1; i >= 0; i--) {
|
| - uc16 c = quarks[i];
|
| - int length = uncanonicalize.get(c, '\0', chars);
|
| - if (length <= 1) continue;
|
| - if (check_offset && i == quarks.length() - 1) {
|
| - macro_assembler->LoadCurrentCharacter(cp_offset + i, on_failure);
|
| - } else {
|
| - // Here we don't need to check against the end of the input string
|
| - // since this character lies before a character that matched.
|
| - macro_assembler->LoadCurrentCharacterUnchecked(cp_offset + i);
|
| - }
|
| - Label ok;
|
| - ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4);
|
| - switch (length) {
|
| - case 2: {
|
| - if (ShortCutEmitCharacterPair(macro_assembler,
|
| - chars[0],
|
| - chars[1],
|
| - on_failure)) {
|
| - } else {
|
| - macro_assembler->CheckCharacter(chars[0], &ok);
|
| - macro_assembler->CheckNotCharacter(chars[1], on_failure);
|
| - macro_assembler->Bind(&ok);
|
| - }
|
| - break;
|
| - }
|
| - case 4:
|
| - macro_assembler->CheckCharacter(chars[3], &ok);
|
| - // Fall through!
|
| - case 3:
|
| + int length = uncanonicalize.get(c, '\0', chars);
|
| + if (length <= 1) return false;
|
| + // We may not need to check against the end of the input string
|
| + // if this character lies before a character that matched.
|
| + if (!preloaded) {
|
| + macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
|
| + }
|
| + Label ok;
|
| + ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4);
|
| + switch (length) {
|
| + case 2: {
|
| + if (ShortCutEmitCharacterPair(macro_assembler,
|
| + chars[0],
|
| + chars[1],
|
| + on_failure)) {
|
| + } else {
|
| macro_assembler->CheckCharacter(chars[0], &ok);
|
| - macro_assembler->CheckCharacter(chars[1], &ok);
|
| - macro_assembler->CheckNotCharacter(chars[2], on_failure);
|
| + macro_assembler->CheckNotCharacter(chars[1], on_failure);
|
| macro_assembler->Bind(&ok);
|
| - break;
|
| - default:
|
| - UNREACHABLE();
|
| - break;
|
| + }
|
| + break;
|
| }
|
| + case 4:
|
| + macro_assembler->CheckCharacter(chars[3], &ok);
|
| + // Fall through!
|
| + case 3:
|
| + macro_assembler->CheckCharacter(chars[0], &ok);
|
| + macro_assembler->CheckCharacter(chars[1], &ok);
|
| + macro_assembler->CheckNotCharacter(chars[2], on_failure);
|
| + macro_assembler->Bind(&ok);
|
| + break;
|
| + default:
|
| + UNREACHABLE();
|
| + break;
|
| }
|
| + return true;
|
| }
|
|
|
|
|
| @@ -1743,7 +1732,8 @@
|
| int cp_offset,
|
| Label* on_failure,
|
| bool check_offset,
|
| - bool ascii) {
|
| + bool ascii,
|
| + bool preloaded) {
|
| ZoneList<CharacterRange>* ranges = cc->ranges();
|
| int max_char;
|
| if (ascii) {
|
| @@ -1789,15 +1779,11 @@
|
| return;
|
| }
|
|
|
| - if (check_offset) {
|
| - macro_assembler->LoadCurrentCharacter(cp_offset, on_failure);
|
| - } else {
|
| - // Here we don't need to check against the end of the input string
|
| - // since this character lies before a character that matched.
|
| - macro_assembler->LoadCurrentCharacterUnchecked(cp_offset);
|
| + if (!preloaded) {
|
| + macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset);
|
| }
|
|
|
| - for (int i = 0; i <= last_valid_range; i++) {
|
| + for (int i = 0; i < last_valid_range; i++) {
|
| CharacterRange& range = ranges->at(i);
|
| Label next_range;
|
| uc16 from = range.from();
|
| @@ -1858,6 +1844,10 @@
|
| }
|
|
|
|
|
| +RegExpNode::~RegExpNode() {
|
| +}
|
| +
|
| +
|
| RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler,
|
| GenerationVariant* variant) {
|
| // TODO(erikcorry): Implement support.
|
| @@ -1908,115 +1898,573 @@
|
| }
|
|
|
|
|
| -// This generates the code to match a text node. A text node can contain
|
| -// straight character sequences (possibly to be matched in a case-independent
|
| -// way) and character classes. In order to be most efficient we test for the
|
| -// simple things first and then move on to the more complicated things. The
|
| -// simplest thing is a non-letter or a letter if we are matching case. The
|
| -// next-most simple thing is a case-independent letter. The least simple is
|
| -// a character class. Another optimization is that we test the last one first.
|
| -// If that succeeds we don't need to test for the end of the string when we
|
| -// load other characters.
|
| -bool TextNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) {
|
| - RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
|
| - Label *backtrack = variant->backtrack();
|
| - LimitResult limit_result = LimitVersions(compiler, variant);
|
| - if (limit_result == FAIL) return false;
|
| - if (limit_result == DONE) return true;
|
| - ASSERT(limit_result == CONTINUE);
|
| +int ActionNode::EatsAtLeast(int recursion_depth) {
|
| + if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
|
| + if (type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input!
|
| + return on_success()->EatsAtLeast(recursion_depth + 1);
|
| +}
|
|
|
| - int element_count = elms_->length();
|
| - ASSERT(element_count != 0);
|
| - if (info()->at_end) {
|
| - macro_assembler->GoTo(backtrack);
|
| - return true;
|
| +
|
| +int TextNode::EatsAtLeast(int recursion_depth) {
|
| + int answer = Length();
|
| + if (answer >= 4) return answer;
|
| + if (recursion_depth > RegExpCompiler::kMaxRecursion) return answer;
|
| + return answer + on_success()->EatsAtLeast(recursion_depth + 1);
|
| +}
|
| +
|
| +
|
| +
|
| +int ChoiceNode::EatsAtLeastHelper(int recursion_depth, int start_from_node) {
|
| + if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
|
| + int min = 100;
|
| + int choice_count = alternatives_->length();
|
| + for (int i = start_from_node; i < choice_count; i++) {
|
| + RegExpNode* node = alternatives_->at(i).node();
|
| + int node_eats_at_least = node->EatsAtLeast(recursion_depth + 1);
|
| + if (node_eats_at_least < min) min = node_eats_at_least;
|
| }
|
| - // First check for non-ASCII text.
|
| - // TODO(plesner): We should do this at node level.
|
| + return min;
|
| +}
|
| +
|
| +
|
| +int LoopChoiceNode::EatsAtLeast(int recursion_depth) {
|
| + return 0;
|
| +}
|
| +
|
| +
|
| +int ChoiceNode::EatsAtLeast(int recursion_depth) {
|
| + return EatsAtLeastHelper(recursion_depth, 0);
|
| +}
|
| +
|
| +
|
| +// Takes the left-most 1-bit and smears it out, setting all bits to its right.
|
| +static inline uint32_t SmearBitsRight(uint32_t v) {
|
| + v |= v >> 1;
|
| + v |= v >> 2;
|
| + v |= v >> 4;
|
| + v |= v >> 8;
|
| + v |= v >> 16;
|
| + return v;
|
| +}
|
| +
|
| +
|
| +bool QuickCheckDetails::Rationalize(bool asc) {
|
| + bool found_useful_op = false;
|
| + uint32_t char_mask;
|
| + if (asc) {
|
| + char_mask = String::kMaxAsciiCharCode;
|
| + } else {
|
| + char_mask = String::kMaxUC16CharCode;
|
| + }
|
| + mask_ = 0;
|
| + value_ = 0;
|
| + int char_shift = 0;
|
| + for (int i = 0; i < characters_; i++) {
|
| + Position* pos = &positions_[i];
|
| + if ((pos->mask & String::kMaxAsciiCharCode) != 0) {
|
| + found_useful_op = true;
|
| + }
|
| + mask_ |= (pos->mask & char_mask) << char_shift;
|
| + value_ |= (pos->value & char_mask) << char_shift;
|
| + char_shift += asc ? 8 : 16;
|
| + }
|
| + return found_useful_op;
|
| +}
|
| +
|
| +
|
| +bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler,
|
| + GenerationVariant* variant,
|
| + bool preload_has_checked_bounds,
|
| + Label* on_possible_success,
|
| + QuickCheckDetails* details,
|
| + bool fall_through_on_failure) {
|
| + if (details->characters() == 0) return false;
|
| + GetQuickCheckDetails(details, compiler, 0);
|
| + if (!details->Rationalize(compiler->ascii())) return false;
|
| + uint32_t mask = details->mask();
|
| + uint32_t value = details->value();
|
| +
|
| + RegExpMacroAssembler* assembler = compiler->macro_assembler();
|
| +
|
| + if (variant->characters_preloaded() != details->characters()) {
|
| + assembler->LoadCurrentCharacter(variant->cp_offset(),
|
| + variant->backtrack(),
|
| + !preload_has_checked_bounds,
|
| + details->characters());
|
| + }
|
| +
|
| +
|
| + bool need_mask = true;
|
| +
|
| + if (details->characters() == 1) {
|
| + // If number of characters preloaded is 1 then we used a byte or 16 bit
|
| + // load so the value is already masked down.
|
| + uint32_t char_mask;
|
| + if (compiler->ascii()) {
|
| + char_mask = String::kMaxAsciiCharCode;
|
| + } else {
|
| + char_mask = String::kMaxUC16CharCode;
|
| + }
|
| + if ((mask & char_mask) == char_mask) need_mask = false;
|
| + } else {
|
| + // For 2-character preloads in ASCII mode we also use a 16 bit load with
|
| + // zero extend.
|
| + if (details->characters() == 2 && compiler->ascii()) {
|
| + if ((mask & 0xffff) == 0xffff) need_mask = false;
|
| + } else {
|
| + if (mask == 0xffffffff) need_mask = false;
|
| + }
|
| + }
|
| +
|
| + if (fall_through_on_failure) {
|
| + if (need_mask) {
|
| + assembler->CheckCharacterAfterAnd(value, mask, on_possible_success);
|
| + } else {
|
| + assembler->CheckCharacter(value, on_possible_success);
|
| + }
|
| + } else {
|
| + if (need_mask) {
|
| + assembler->CheckNotCharacterAfterAnd(value, mask, variant->backtrack());
|
| + } else {
|
| + assembler->CheckNotCharacter(value, variant->backtrack());
|
| + }
|
| + }
|
| + return true;
|
| +}
|
| +
|
| +
|
| +// Here is the meat of GetQuickCheckDetails (see also the comment on the
|
| +// super-class in the .h file).
|
| +//
|
| +// We iterate along the text object, building up for each character a
|
| +// mask and value that can be used to test for a quick failure to match.
|
| +// The masks and values for the positions will be combined into a single
|
| +// machine word for the current character width in order to be used in
|
| +// generating a quick check.
|
| +void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
|
| + RegExpCompiler* compiler,
|
| + int characters_filled_in) {
|
| + ASSERT(characters_filled_in < details->characters());
|
| + int characters = details->characters();
|
| + int char_mask;
|
| + int char_shift;
|
| if (compiler->ascii()) {
|
| - for (int i = element_count - 1; i >= 0; i--) {
|
| - TextElement elm = elms_->at(i);
|
| - if (elm.type == TextElement::ATOM) {
|
| - Vector<const uc16> quarks = elm.data.u_atom->data();
|
| - for (int j = quarks.length() - 1; j >= 0; j--) {
|
| - if (quarks[j] > String::kMaxAsciiCharCode) {
|
| - macro_assembler->GoTo(backtrack);
|
| - return true;
|
| + char_mask = String::kMaxAsciiCharCode;
|
| + char_shift = 8;
|
| + } else {
|
| + char_mask = String::kMaxUC16CharCode;
|
| + char_shift = 16;
|
| + }
|
| + for (int k = 0; k < elms_->length(); k++) {
|
| + TextElement elm = elms_->at(k);
|
| + if (elm.type == TextElement::ATOM) {
|
| + Vector<const uc16> quarks = elm.data.u_atom->data();
|
| + for (int i = 0; i < characters && i < quarks.length(); i++) {
|
| + QuickCheckDetails::Position* pos =
|
| + details->positions(characters_filled_in);
|
| + if (compiler->ignore_case()) {
|
| + unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
|
| + uc16 c = quarks[i];
|
| + int length = uncanonicalize.get(c, '\0', chars);
|
| + if (length < 2) {
|
| + // This letter has no case equivalents, so it's nice and simple
|
| + // and the mask-compare will determine definitely whether we have
|
| + // a match at this character position.
|
| + pos->mask = char_mask;
|
| + pos->value = c;
|
| + pos->determines_perfectly = true;
|
| + } else {
|
| + uint32_t common_bits = char_mask;
|
| + uint32_t bits = chars[0];
|
| + for (int j = 1; j < length; j++) {
|
| + uint32_t differing_bits = ((chars[j] & common_bits) ^ bits);
|
| + common_bits ^= differing_bits;
|
| + bits &= common_bits;
|
| + }
|
| + // If length is 2 and common bits has only one zero in it then
|
| + // our mask and compare instruction will determine definitely
|
| + // whether we have a match at this character position. Otherwise
|
| + // it can only be an approximate check.
|
| + uint32_t one_zero = (common_bits | ~char_mask);
|
| + if (length == 2 && ((~one_zero) & ((~one_zero) - 1)) == 0) {
|
| + pos->determines_perfectly = true;
|
| + }
|
| + pos->mask = common_bits;
|
| + pos->value = bits;
|
| }
|
| + } else {
|
| + // Don't ignore case. Nice simple case where the mask-compare will
|
| + // determine definitely whether we have a match at this character
|
| + // position.
|
| + pos->mask = char_mask;
|
| + pos->value = quarks[i];
|
| + pos->determines_perfectly = true;
|
| }
|
| + characters_filled_in++;
|
| + ASSERT(characters_filled_in <= details->characters());
|
| + if (characters_filled_in == details->characters()) {
|
| + return;
|
| + }
|
| + }
|
| + } else {
|
| + QuickCheckDetails::Position* pos =
|
| + details->positions(characters_filled_in);
|
| + RegExpCharacterClass* tree = elm.data.u_char_class;
|
| + ZoneList<CharacterRange>* ranges = tree->ranges();
|
| + CharacterRange range = ranges->at(0);
|
| + if (tree->is_negated()) {
|
| + // A quick check uses multi-character mask and compare. There is no
|
| + // useful way to incorporate a negative char class into this scheme
|
| + // so we just conservatively create a mask and value that will always
|
| + // succeed.
|
| + pos->mask = 0;
|
| + pos->value = 0;
|
| } else {
|
| - ASSERT_EQ(elm.type, TextElement::CHAR_CLASS);
|
| + uint32_t differing_bits = (range.from() ^ range.to());
|
| + // A mask and compare is only perfect if the differing bits form a
|
| + // number like 00011111 with one single block of trailing 1s.
|
| + if ((differing_bits & (differing_bits + 1)) == 0) {
|
| + pos->determines_perfectly = true;
|
| + }
|
| + uint32_t common_bits = ~SmearBitsRight(differing_bits);
|
| + uint32_t bits = (range.from() & common_bits);
|
| + for (int i = 1; i < ranges->length(); i++) {
|
| + // Here we are combining more ranges into the mask and compare
|
| + // value. With each new range the mask becomes more sparse and
|
| + // so the chances of a false positive rise. A character class
|
| + // with multiple ranges is assumed never to be equivalent to a
|
| + // mask and compare operation.
|
| + pos->determines_perfectly = false;
|
| + CharacterRange range = ranges->at(i);
|
| + uint32_t new_common_bits = (range.from() ^ range.to());
|
| + new_common_bits = ~SmearBitsRight(new_common_bits);
|
| + common_bits &= new_common_bits;
|
| + bits &= new_common_bits;
|
| + uint32_t differing_bits = (range.from() & common_bits) ^ bits;
|
| + common_bits ^= differing_bits;
|
| + bits &= common_bits;
|
| + }
|
| + pos->mask = common_bits;
|
| + pos->value = bits;
|
| }
|
| + characters_filled_in++;
|
| + ASSERT(characters_filled_in <= details->characters());
|
| + if (characters_filled_in == details->characters()) {
|
| + return;
|
| + }
|
| }
|
| }
|
| - // Second, handle straight character matches.
|
| - int checked_up_to = -1;
|
| - for (int i = element_count - 1; i >= 0; i--) {
|
| + ASSERT(characters_filled_in != details->characters());
|
| + on_success()-> GetQuickCheckDetails(details, compiler, characters_filled_in);
|
| +}
|
| +
|
| +
|
| +void QuickCheckDetails::Clear() {
|
| + for (int i = 0; i < characters_; i++) {
|
| + positions_[i].mask = 0;
|
| + positions_[i].value = 0;
|
| + positions_[i].determines_perfectly = false;
|
| + }
|
| + characters_ = 0;
|
| +}
|
| +
|
| +
|
| +void QuickCheckDetails::Advance(int by, bool ascii) {
|
| + ASSERT(by > 0);
|
| + if (by >= characters_) {
|
| + Clear();
|
| + return;
|
| + }
|
| + for (int i = 0; i < characters_ - by; i++) {
|
| + positions_[i] = positions_[by + i];
|
| + }
|
| + for (int i = characters_ - by; i < characters_; i++) {
|
| + positions_[i].mask = 0;
|
| + positions_[i].value = 0;
|
| + positions_[i].determines_perfectly = false;
|
| + }
|
| + characters_ -= by;
|
| + // We could change mask_ and value_ here but we would never advance unless
|
| + // they had already been used in a check and they won't be used again because
|
| + // it would gain us nothing. So there's no point.
|
| +}
|
| +
|
| +
|
| +void QuickCheckDetails::Merge(QuickCheckDetails* other, int from_index) {
|
| + ASSERT(characters_ == other->characters_);
|
| + for (int i = from_index; i < characters_; i++) {
|
| + QuickCheckDetails::Position* pos = positions(i);
|
| + QuickCheckDetails::Position* other_pos = other->positions(i);
|
| + if (pos->mask != other_pos->mask ||
|
| + pos->value != other_pos->value ||
|
| + !other_pos->determines_perfectly) {
|
| + // Our mask-compare operation will be approximate unless we have the
|
| + // exact same operation on both sides of the alternation.
|
| + pos->determines_perfectly = false;
|
| + }
|
| + pos->mask &= other_pos->mask;
|
| + pos->value &= pos->mask;
|
| + other_pos->value &= pos->mask;
|
| + uc16 differing_bits = (pos->value ^ other_pos->value);
|
| + pos->mask &= ~differing_bits;
|
| + pos->value &= pos->mask;
|
| + }
|
| +}
|
| +
|
| +
|
| +void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
|
| + RegExpCompiler* compiler,
|
| + int characters_filled_in) {
|
| + int choice_count = alternatives_->length();
|
| + ASSERT(choice_count > 0);
|
| + alternatives_->at(0).node()->GetQuickCheckDetails(details,
|
| + compiler,
|
| + characters_filled_in);
|
| + for (int i = 1; i < choice_count; i++) {
|
| + QuickCheckDetails new_details(details->characters());
|
| + RegExpNode* node = alternatives_->at(i).node();
|
| + node->GetQuickCheckDetails(&new_details, compiler, characters_filled_in);
|
| + // Here we merge the quick match details of the two branches.
|
| + details->Merge(&new_details, characters_filled_in);
|
| + }
|
| +}
|
| +
|
| +
|
| +// We call this repeatedly to generate code for each pass over the text node.
|
| +// The passes are in increasing order of difficulty because we hope one
|
| +// of the first passes will fail in which case we are saved the work of the
|
| +// later passes. for example for the case independent regexp /%[asdfghjkl]a/
|
| +// we will check the '%' in the first pass, the case independent 'a' in the
|
| +// second pass and the character class in the last pass.
|
| +//
|
| +// The passes are done from right to left, so for example to test for /bar/
|
| +// we will first test for an 'r' with offset 2, then an 'a' with offset 1
|
| +// and then a 'b' with offset 0. This means we can avoid the end-of-input
|
| +// bounds check most of the time. In the example we only need to check for
|
| +// end-of-input when loading the putative 'r'.
|
| +//
|
| +// A slight complication involves the fact that the first character may already
|
| +// be fetched into a register by the previous node. In this case we want to
|
| +// do the test for that character first. We do this in separate passes. The
|
| +// 'preloaded' argument indicates that we are doing such a 'pass'. If such a
|
| +// pass has been performed then subsequent passes will have true in
|
| +// first_element_checked to indicate that that character does not need to be
|
| +// checked again.
|
| +//
|
| +// In addition to all this we are passed a GenerationVariant, which can
|
| +// contain an AlternativeGeneration object. In this AlternativeGeneration
|
| +// object we can see details of any quick check that was already passed in
|
| +// order to get to the code we are now generating. The quick check can involve
|
| +// loading characters, which means we do not need to recheck the bounds
|
| +// up to the limit the quick check already checked. In addition the quick
|
| +// check can have involved a mask and compare operation which may simplify
|
| +// or obviate the need for further checks at some character positions.
|
| +void TextNode::TextEmitPass(RegExpCompiler* compiler,
|
| + TextEmitPassType pass,
|
| + bool preloaded,
|
| + GenerationVariant* variant,
|
| + bool first_element_checked,
|
| + int* checked_up_to) {
|
| + RegExpMacroAssembler* assembler = compiler->macro_assembler();
|
| + bool ascii = compiler->ascii();
|
| + Label* backtrack = variant->backtrack();
|
| + QuickCheckDetails* quick_check = variant->quick_check_performed();
|
| + int element_count = elms_->length();
|
| + for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) {
|
| TextElement elm = elms_->at(i);
|
| - ASSERT(elm.cp_offset >= 0);
|
| int cp_offset = variant->cp_offset() + elm.cp_offset;
|
| if (elm.type == TextElement::ATOM) {
|
| - Vector<const uc16> quarks = elm.data.u_atom->data();
|
| - int last_cp_offset = cp_offset + quarks.length();
|
| - if (compiler->ignore_case()) {
|
| - EmitAtomNonLetters(macro_assembler,
|
| - elm,
|
| - quarks,
|
| - backtrack,
|
| - cp_offset,
|
| - checked_up_to < last_cp_offset);
|
| - } else {
|
| - macro_assembler->CheckCharacters(quarks,
|
| - cp_offset,
|
| - backtrack,
|
| - checked_up_to < last_cp_offset);
|
| + if (pass == NON_ASCII_MATCH ||
|
| + pass == CHARACTER_MATCH ||
|
| + pass == CASE_CHARACTER_MATCH) {
|
| + Vector<const uc16> quarks = elm.data.u_atom->data();
|
| + for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {
|
| + bool bound_checked = true; // Most ops will check their bounds.
|
| + if (first_element_checked && i == 0 && j == 0) continue;
|
| + if (quick_check != NULL &&
|
| + elm.cp_offset + j < quick_check->characters() &&
|
| + quick_check->positions(elm.cp_offset + j)->determines_perfectly) {
|
| + continue;
|
| + }
|
| + if (pass == NON_ASCII_MATCH) {
|
| + ASSERT(ascii);
|
| + if (quarks[j] > String::kMaxAsciiCharCode) {
|
| + assembler->GoTo(backtrack);
|
| + return;
|
| + }
|
| + } else if (pass == CHARACTER_MATCH) {
|
| + if (compiler->ignore_case()) {
|
| + bound_checked = EmitAtomNonLetter(assembler,
|
| + quarks[j],
|
| + backtrack,
|
| + cp_offset + j,
|
| + *checked_up_to < cp_offset + j,
|
| + preloaded);
|
| + } else {
|
| + if (!preloaded) {
|
| + assembler->LoadCurrentCharacter(cp_offset + j,
|
| + backtrack,
|
| + *checked_up_to < cp_offset + j);
|
| + }
|
| + assembler->CheckNotCharacter(quarks[j], backtrack);
|
| + }
|
| + } else {
|
| + ASSERT_EQ(pass, CASE_CHARACTER_MATCH);
|
| + ASSERT(compiler->ignore_case());
|
| + bound_checked = EmitAtomLetter(assembler,
|
| + quarks[j],
|
| + backtrack,
|
| + cp_offset + j,
|
| + *checked_up_to < cp_offset + j,
|
| + preloaded);
|
| + }
|
| + if (pass != NON_ASCII_MATCH && bound_checked) {
|
| + if (cp_offset + j > *checked_up_to) {
|
| + *checked_up_to = cp_offset + j;
|
| + }
|
| + }
|
| + }
|
| }
|
| - if (last_cp_offset > checked_up_to) checked_up_to = last_cp_offset - 1;
|
| } else {
|
| ASSERT_EQ(elm.type, TextElement::CHAR_CLASS);
|
| - }
|
| - }
|
| - // Third, handle case independent letter matches if any.
|
| - if (compiler->ignore_case()) {
|
| - for (int i = element_count - 1; i >= 0; i--) {
|
| - TextElement elm = elms_->at(i);
|
| - int cp_offset = variant->cp_offset() + elm.cp_offset;
|
| - if (elm.type == TextElement::ATOM) {
|
| - Vector<const uc16> quarks = elm.data.u_atom->data();
|
| - int last_cp_offset = cp_offset + quarks.length();
|
| - EmitAtomLetters(macro_assembler,
|
| - elm,
|
| - quarks,
|
| - backtrack,
|
| - cp_offset,
|
| - checked_up_to < last_cp_offset);
|
| - if (last_cp_offset > checked_up_to) checked_up_to = last_cp_offset - 1;
|
| + if (first_element_checked && i == 0) continue;
|
| + if (quick_check != NULL &&
|
| + elm.cp_offset < quick_check->characters() &&
|
| + quick_check->positions(elm.cp_offset)->determines_perfectly) {
|
| + continue;
|
| }
|
| + if (pass == CHARACTER_CLASS_MATCH) {
|
| + RegExpCharacterClass* cc = elm.data.u_char_class;
|
| + EmitCharClass(assembler,
|
| + cc,
|
| + cp_offset,
|
| + backtrack,
|
| + *checked_up_to < cp_offset,
|
| + ascii,
|
| + preloaded);
|
| + if (cp_offset > *checked_up_to) {
|
| + *checked_up_to = cp_offset;
|
| + }
|
| + }
|
| }
|
| }
|
| - // If the fast character matches passed then do the character classes.
|
| - for (int i = element_count - 1; i >= 0; i--) {
|
| - TextElement elm = elms_->at(i);
|
| - int cp_offset = variant->cp_offset() + elm.cp_offset;
|
| - if (elm.type == TextElement::CHAR_CLASS) {
|
| - RegExpCharacterClass* cc = elm.data.u_char_class;
|
| - EmitCharClass(macro_assembler,
|
| - cc,
|
| - cp_offset,
|
| - backtrack,
|
| - checked_up_to < cp_offset,
|
| - compiler->ascii());
|
| - if (cp_offset > checked_up_to) checked_up_to = cp_offset;
|
| +}
|
| +
|
| +
|
| +int TextNode::Length() {
|
| + TextElement elm = elms_->last();
|
| + ASSERT(elm.cp_offset >= 0);
|
| + if (elm.type == TextElement::ATOM) {
|
| + return elm.cp_offset + elm.data.u_atom->data().length();
|
| + } else {
|
| + return elm.cp_offset + 1;
|
| + }
|
| +}
|
| +
|
| +
|
| +// This generates the code to match a text node. A text node can contain
|
| +// straight character sequences (possibly to be matched in a case-independent
|
| +// way) and character classes. For efficiency we do not do this in a single
|
| +// pass from left to right. Instead we pass over the text node several times,
|
| +// emitting code for some character positions every time. See the comment on
|
| +// TextEmitPass for details.
|
| +bool TextNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) {
|
| + LimitResult limit_result = LimitVersions(compiler, variant);
|
| + if (limit_result == FAIL) return false;
|
| + if (limit_result == DONE) return true;
|
| + ASSERT(limit_result == CONTINUE);
|
| +
|
| + if (info()->follows_word_interest ||
|
| + info()->follows_newline_interest ||
|
| + info()->follows_start_interest) {
|
| + return false;
|
| + }
|
| +
|
| + if (info()->at_end) {
|
| + compiler->macro_assembler()->GoTo(variant->backtrack());
|
| + return true;
|
| + }
|
| +
|
| + if (compiler->ascii()) {
|
| + int dummy = 0;
|
| + TextEmitPass(compiler, NON_ASCII_MATCH, false, variant, false, &dummy);
|
| + }
|
| +
|
| + bool first_elt_done = false;
|
| + int bound_checked_to = variant->cp_offset() - 1;
|
| + QuickCheckDetails* quick_check = variant->quick_check_performed();
|
| + if (quick_check != NULL) {
|
| + bound_checked_to += quick_check->characters();
|
| + }
|
| +
|
| + // If a character is preloaded into the current character register then
|
| + // check that now.
|
| + if (variant->characters_preloaded() == 1) {
|
| + TextEmitPass(compiler,
|
| + CHARACTER_MATCH,
|
| + true,
|
| + variant,
|
| + false,
|
| + &bound_checked_to);
|
| + if (compiler->ignore_case()) {
|
| + TextEmitPass(compiler,
|
| + CASE_CHARACTER_MATCH,
|
| + true,
|
| + variant,
|
| + false,
|
| + &bound_checked_to);
|
| }
|
| + TextEmitPass(compiler,
|
| + CHARACTER_CLASS_MATCH,
|
| + true,
|
| + variant,
|
| + false,
|
| + &bound_checked_to);
|
| + first_elt_done = true;
|
| }
|
|
|
| - GenerationVariant new_variant(*variant);
|
| - new_variant.set_cp_offset(checked_up_to + 1);
|
| + TextEmitPass(compiler,
|
| + CHARACTER_MATCH,
|
| + false,
|
| + variant,
|
| + first_elt_done,
|
| + &bound_checked_to);
|
| + if (compiler->ignore_case()) {
|
| + TextEmitPass(compiler,
|
| + CASE_CHARACTER_MATCH,
|
| + false,
|
| + variant,
|
| + first_elt_done,
|
| + &bound_checked_to);
|
| + }
|
| + TextEmitPass(compiler,
|
| + CHARACTER_CLASS_MATCH,
|
| + false,
|
| + variant,
|
| + first_elt_done,
|
| + &bound_checked_to);
|
| +
|
| + GenerationVariant successor_variant(*variant);
|
| + successor_variant.AdvanceVariant(Length(), compiler->ascii());
|
| RecursionCheck rc(compiler);
|
| - return on_success()->Emit(compiler, &new_variant);
|
| + return on_success()->Emit(compiler, &successor_variant);
|
| }
|
|
|
|
|
| +void GenerationVariant::AdvanceVariant(int by, bool ascii) {
|
| + ASSERT(by > 0);
|
| + // We don't have an instruction for shifting the current character register
|
| + // down or for using a shifted value for anything so lets just forget that
|
| + // we preloaded any characters into it.
|
| + characters_preloaded_ = 0;
|
| + // Adjust the offsets of the quick check performed information. This
|
| + // information is used to find out what we already determined about the
|
| + // characters by means of mask and compare.
|
| + quick_check_performed_.Advance(by, ascii);
|
| + cp_offset_ += by;
|
| +}
|
| +
|
| +
|
| void TextNode::MakeCaseIndependent() {
|
| int element_count = elms_->length();
|
| for (int i = 0; i < element_count; i++) {
|
| @@ -2110,6 +2558,156 @@
|
| }
|
|
|
|
|
| +int ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler,
|
| + int start_from_node) {
|
| + bool ascii = compiler->ascii();
|
| + int preload_characters = ChoiceNode::EatsAtLeastHelper(0, start_from_node);
|
| +#ifdef CAN_READ_UNALIGNED
|
| + if (ascii) {
|
| + if (preload_characters > 4) preload_characters = 4;
|
| + // We can't preload 3 characters because there is no machine instruction
|
| + // to do that. We can't just load 4 because we could be reading
|
| + // beyond the end of the string, which could cause a memory fault.
|
| + if (preload_characters == 3) preload_characters = 2;
|
| + } else {
|
| + if (preload_characters > 2) preload_characters = 2;
|
| + }
|
| +#else
|
| + if (preload_characters > 1) preload_characters = 1;
|
| +#endif
|
| + return preload_characters;
|
| +}
|
| +
|
| +
|
| +// This class is used when generating the alternatives in a choice node. It
|
| +// records the way the alternative is being code generated.
|
| +class AlternativeGeneration: public Malloced {
|
| + public:
|
| + AlternativeGeneration()
|
| + : possible_success(),
|
| + expects_preload(false),
|
| + after(),
|
| + quick_check_details() { }
|
| + Label possible_success;
|
| + bool expects_preload;
|
| + Label after;
|
| + QuickCheckDetails quick_check_details;
|
| +};
|
| +
|
| +
|
| +// Creates a list of AlternativeGenerations. If the list has a reasonable
|
| +// size then it is on the stack, otherwise the excess is on the heap.
|
| +class AlternativeGenerationList {
|
| + public:
|
| + explicit AlternativeGenerationList(int count)
|
| + : alt_gens_(count) {
|
| + for (int i = 0; i < count && i < kAFew; i++) {
|
| + alt_gens_.Add(a_few_alt_gens_ + i);
|
| + }
|
| + for (int i = kAFew; i < count; i++) {
|
| + alt_gens_.Add(new AlternativeGeneration());
|
| + }
|
| + }
|
| + ~AlternativeGenerationList() {
|
| + for (int i = 0; i < alt_gens_.length(); i++) {
|
| + alt_gens_[i]->possible_success.Unuse();
|
| + alt_gens_[i]->after.Unuse();
|
| + }
|
| + for (int i = kAFew; i < alt_gens_.length(); i++) {
|
| + delete alt_gens_[i];
|
| + alt_gens_[i] = NULL;
|
| + }
|
| + }
|
| +
|
| + AlternativeGeneration* at(int i) {
|
| + return alt_gens_[i];
|
| + }
|
| + private:
|
| + static const int kAFew = 10;
|
| + ZoneList<AlternativeGeneration*> alt_gens_;
|
| + AlternativeGeneration a_few_alt_gens_[kAFew];
|
| +};
|
| +
|
| +
|
| +/* Code generation for choice nodes.
|
| + *
|
| + * We generate quick checks that do a mask and compare to eliminate a
|
| + * choice. If the quick check succeeds then it jumps to the continuation to
|
| + * do slow checks and check subsequent nodes. If it fails (the common case)
|
| + * it falls through to the next choice.
|
| + *
|
| + * Here is the desired flow graph. Nodes directly below each other imply
|
| + * fallthrough. Alternatives 1 and 2 have quick checks. Alternative
|
| + * 3 doesn't have a quick check so we have to call the slow check.
|
| + * Nodes are marked Qn for quick checks and Sn for slow checks. The entire
|
| + * regexp continuation is generated directly after the Sn node, up to the
|
| + * next GoTo if we decide to reuse some already generated code. Some
|
| + * nodes expect preload_characters to be preloaded into the current
|
| + * character register. R nodes do this preloading. Vertices are marked
|
| + * F for failures and S for success (possible success in the case of quick
|
| + * nodes). L, V, < and > are used as arrow heads.
|
| + *
|
| + * ----------> R
|
| + * |
|
| + * V
|
| + * Q1 -----> S1
|
| + * | S /
|
| + * F| /
|
| + * | F/
|
| + * | /
|
| + * | R
|
| + * | /
|
| + * V L
|
| + * Q2 -----> S2
|
| + * | S /
|
| + * F| /
|
| + * | F/
|
| + * | /
|
| + * | R
|
| + * | /
|
| + * V L
|
| + * S3
|
| + * |
|
| + * F|
|
| + * |
|
| + * R
|
| + * |
|
| + * backtrack V
|
| + * <----------Q4
|
| + * \ F |
|
| + * \ |S
|
| + * \ F V
|
| + * \-----S4
|
| + *
|
| + * For greedy loops we reverse our expectation and expect to match rather
|
| + * than fail. Therefore we want the loop code to look like this (U is the
|
| + * unwind code that steps back in the greedy loop). The following alternatives
|
| + * look the same as above.
|
| + * _____
|
| + * / \
|
| + * V |
|
| + * ----------> S1 |
|
| + * /| |
|
| + * / |S |
|
| + * F/ \_____/
|
| + * /
|
| + * |<-----------
|
| + * | \
|
| + * V \
|
| + * Q2 ---> S2 \
|
| + * | S / |
|
| + * F| / |
|
| + * | F/ |
|
| + * | / |
|
| + * | R |
|
| + * | / |
|
| + * F VL |
|
| + * <------U |
|
| + * back |S |
|
| + * \______________/
|
| + */
|
| +
|
| +
|
| bool ChoiceNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) {
|
| RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
|
| int choice_count = alternatives_->length();
|
| @@ -2136,7 +2734,8 @@
|
| int text_length = GreedyLoopTextLength(&(alternatives_->at(0)));
|
| bool greedy_loop = false;
|
| Label greedy_loop_label;
|
| - GenerationVariant counter_backtrack_variant(&greedy_loop_label);
|
| + GenerationVariant counter_backtrack_variant;
|
| + counter_backtrack_variant.set_backtrack(&greedy_loop_label);
|
| if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) {
|
| // Here we have special handling for greedy loops containing only text nodes
|
| // and other simple nodes. These are handled by pushing the current
|
| @@ -2150,7 +2749,8 @@
|
| macro_assembler->PushCurrentPosition();
|
| current_variant = &counter_backtrack_variant;
|
| Label greedy_match_failed;
|
| - GenerationVariant greedy_match_variant(&greedy_match_failed);
|
| + GenerationVariant greedy_match_variant;
|
| + greedy_match_variant.set_backtrack(&greedy_match_failed);
|
| Label loop_label;
|
| macro_assembler->Bind(&loop_label);
|
| greedy_match_variant.set_stop_node(this);
|
| @@ -2167,32 +2767,70 @@
|
| Label second_choice; // For use in greedy matches.
|
| macro_assembler->Bind(&second_choice);
|
|
|
| + int first_normal_choice = greedy_loop ? 1 : 0;
|
| +
|
| + int preload_characters = CalculatePreloadCharacters(compiler,
|
| + first_normal_choice);
|
| + bool preload_is_current = false;
|
| + bool preload_has_checked_bounds = false;
|
| +
|
| + AlternativeGenerationList alt_gens(choice_count);
|
| +
|
| // For now we just call all choices one after the other. The idea ultimately
|
| // is to use the Dispatch table to try only the relevant ones.
|
| - for (int i = greedy_loop ? 1 : 0; i < choice_count - 1; i++) {
|
| + for (int i = first_normal_choice; i < choice_count; i++) {
|
| GuardedAlternative alternative = alternatives_->at(i);
|
| - Label after;
|
| + AlternativeGeneration* alt_gen(alt_gens.at(i));
|
| + alt_gen->quick_check_details.set_characters(preload_characters);
|
| ZoneList<Guard*>* guards = alternative.guards();
|
| int guard_count = (guards == NULL) ? 0 : guards->length();
|
| +
|
| GenerationVariant new_variant(*current_variant);
|
| - new_variant.set_backtrack(&after);
|
| - for (int j = 0; j < guard_count; j++) {
|
| - GenerateGuard(macro_assembler, guards->at(j), &new_variant);
|
| + new_variant.set_characters_preloaded(preload_is_current ?
|
| + preload_characters :
|
| + 0);
|
| + new_variant.quick_check_performed()->Clear();
|
| + alt_gen->expects_preload = preload_is_current;
|
| + bool generate_full_check_inline = false;
|
| + if (alternative.node()->EmitQuickCheck(compiler,
|
| + &new_variant,
|
| + preload_has_checked_bounds,
|
| + &alt_gen->possible_success,
|
| + &alt_gen->quick_check_details,
|
| + i < choice_count - 1)) {
|
| + // Quick check was generated for this choice.
|
| + preload_is_current = true;
|
| + preload_has_checked_bounds = true;
|
| + // On the last choice in the ChoiceNode we generated the quick
|
| + // check to fall through on possible success. So now we need to
|
| + // generate the full check inline.
|
| + if (i == choice_count - 1) {
|
| + macro_assembler->Bind(&alt_gen->possible_success);
|
| + new_variant.set_quick_check_performed(&alt_gen->quick_check_details);
|
| + generate_full_check_inline = true;
|
| + }
|
| + } else {
|
| + // No quick check was generated. Put the full code here.
|
| + if (i < choice_count - 1) {
|
| + new_variant.set_backtrack(&alt_gen->after);
|
| + }
|
| + generate_full_check_inline = true;
|
| }
|
| - if (!alternative.node()->Emit(compiler, &new_variant)) {
|
| - after.Unuse();
|
| - return false;
|
| + if (generate_full_check_inline) {
|
| + if (preload_is_current) {
|
| + new_variant.set_characters_preloaded(preload_characters);
|
| + }
|
| + for (int j = 0; j < guard_count; j++) {
|
| + GenerateGuard(macro_assembler, guards->at(j), &new_variant);
|
| + }
|
| + if (!alternative.node()->Emit(compiler, &new_variant)) {
|
| + greedy_loop_label.Unuse();
|
| + return false;
|
| + }
|
| + preload_is_current = false;
|
| }
|
| - macro_assembler->Bind(&after);
|
| + macro_assembler->Bind(&alt_gen->after);
|
| }
|
| - GuardedAlternative alternative = alternatives_->at(choice_count - 1);
|
| - ZoneList<Guard*>* guards = alternative.guards();
|
| - int guard_count = (guards == NULL) ? 0 : guards->length();
|
| - for (int j = 0; j < guard_count; j++) {
|
| - GenerateGuard(macro_assembler, guards->at(j), current_variant);
|
| - }
|
| - bool ok = alternative.node()->Emit(compiler, current_variant);
|
| - if (!ok) return false;
|
| if (greedy_loop) {
|
| macro_assembler->Bind(&greedy_loop_label);
|
| // If we have unwound to the bottom then backtrack.
|
| @@ -2201,12 +2839,68 @@
|
| macro_assembler->AdvanceCurrentPosition(-text_length);
|
| macro_assembler->GoTo(&second_choice);
|
| }
|
| + // At this point we need to generate slow checks for the alternatives where
|
| + // the quick check was inlined. We can recognize these because the associated
|
| + // label was bound.
|
| + for (int i = first_normal_choice; i < choice_count - 1; i++) {
|
| + AlternativeGeneration* alt_gen = alt_gens.at(i);
|
| + if (!EmitOutOfLineContinuation(compiler,
|
| + current_variant,
|
| + alternatives_->at(i),
|
| + alt_gen,
|
| + preload_characters,
|
| + alt_gens.at(i + 1)->expects_preload)) {
|
| + return false;
|
| + }
|
| + }
|
| return true;
|
| }
|
|
|
|
|
| +bool ChoiceNode::EmitOutOfLineContinuation(RegExpCompiler* compiler,
|
| + GenerationVariant* variant,
|
| + GuardedAlternative alternative,
|
| + AlternativeGeneration* alt_gen,
|
| + int preload_characters,
|
| + bool next_expects_preload) {
|
| + if (!alt_gen->possible_success.is_linked()) return true;
|
| +
|
| + RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
|
| + macro_assembler->Bind(&alt_gen->possible_success);
|
| + GenerationVariant out_of_line_variant(*variant);
|
| + out_of_line_variant.set_characters_preloaded(preload_characters);
|
| + out_of_line_variant.set_quick_check_performed(&alt_gen->quick_check_details);
|
| + ZoneList<Guard*>* guards = alternative.guards();
|
| + int guard_count = (guards == NULL) ? 0 : guards->length();
|
| + if (next_expects_preload) {
|
| + Label reload_current_char;
|
| + out_of_line_variant.set_backtrack(&reload_current_char);
|
| + for (int j = 0; j < guard_count; j++) {
|
| + GenerateGuard(macro_assembler, guards->at(j), &out_of_line_variant);
|
| + }
|
| + bool ok = alternative.node()->Emit(compiler, &out_of_line_variant);
|
| + macro_assembler->Bind(&reload_current_char);
|
| + // Reload the current character, since the next quick check expects that.
|
| + // We don't need to check bounds here because we only get into this
|
| + // code through a quick check which already did the checked load.
|
| + macro_assembler->LoadCurrentCharacter(variant->cp_offset(),
|
| + NULL,
|
| + false,
|
| + preload_characters);
|
| + macro_assembler->GoTo(&(alt_gen->after));
|
| + return ok;
|
| + } else {
|
| + out_of_line_variant.set_backtrack(&(alt_gen->after));
|
| + for (int j = 0; j < guard_count; j++) {
|
| + GenerateGuard(macro_assembler, guards->at(j), &out_of_line_variant);
|
| + }
|
| + return alternative.node()->Emit(compiler, &out_of_line_variant);
|
| + }
|
| +}
|
| +
|
| +
|
| bool ActionNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) {
|
| - RegExpMacroAssembler* macro = compiler->macro_assembler();
|
| + RegExpMacroAssembler* assembler = compiler->macro_assembler();
|
| LimitResult limit_result = LimitVersions(compiler, variant);
|
| if (limit_result == DONE) return true;
|
| if (limit_result == FAIL) return false;
|
| @@ -2238,9 +2932,9 @@
|
| }
|
| case BEGIN_SUBMATCH:
|
| if (!variant->is_trivial()) return variant->Flush(compiler, this);
|
| - macro->WriteCurrentPositionToRegister(
|
| + assembler->WriteCurrentPositionToRegister(
|
| data_.u_submatch.current_position_register, 0);
|
| - macro->WriteStackPointerToRegister(
|
| + assembler->WriteStackPointerToRegister(
|
| data_.u_submatch.stack_pointer_register);
|
| return on_success()->Emit(compiler, variant);
|
| case POSITIVE_SUBMATCH_SUCCESS:
|
| @@ -2255,13 +2949,13 @@
|
| Label at_end;
|
| // Load current character jumps to the label if we are beyond the string
|
| // end.
|
| - macro->LoadCurrentCharacter(0, &at_end);
|
| - macro->GoTo(variant->backtrack());
|
| - macro->Bind(&at_end);
|
| + assembler->LoadCurrentCharacter(0, &at_end);
|
| + assembler->GoTo(variant->backtrack());
|
| + assembler->Bind(&at_end);
|
| }
|
| - macro->ReadCurrentPositionFromRegister(
|
| + assembler->ReadCurrentPositionFromRegister(
|
| data_.u_submatch.current_position_register);
|
| - macro->ReadStackPointerFromRegister(
|
| + assembler->ReadStackPointerFromRegister(
|
| data_.u_submatch.stack_pointer_register);
|
| return on_success()->Emit(compiler, variant);
|
| default:
|
| @@ -2273,7 +2967,7 @@
|
|
|
| bool BackReferenceNode::Emit(RegExpCompiler* compiler,
|
| GenerationVariant* variant) {
|
| - RegExpMacroAssembler* macro = compiler->macro_assembler();
|
| + RegExpMacroAssembler* assembler = compiler->macro_assembler();
|
| if (!variant->is_trivial()) {
|
| return variant->Flush(compiler, this);
|
| }
|
| @@ -2289,12 +2983,15 @@
|
| if (info()->at_end) {
|
| // If we are constrained to match at the end of the input then succeed
|
| // iff the back reference is empty.
|
| - macro->CheckNotRegistersEqual(start_reg_, end_reg_, variant->backtrack());
|
| + assembler->CheckNotRegistersEqual(start_reg_,
|
| + end_reg_,
|
| + variant->backtrack());
|
| } else {
|
| if (compiler->ignore_case()) {
|
| - macro->CheckNotBackReferenceIgnoreCase(start_reg_, variant->backtrack());
|
| + assembler->CheckNotBackReferenceIgnoreCase(start_reg_,
|
| + variant->backtrack());
|
| } else {
|
| - macro->CheckNotBackReference(start_reg_, variant->backtrack());
|
| + assembler->CheckNotBackReference(start_reg_, variant->backtrack());
|
| }
|
| }
|
| return on_success()->Emit(compiler, variant);
|
|
|