src/jsregexp.cc - Issue 17203: Periodic merge from bleeding_edge to experimental code generator...

Unified Diff: src/jsregexp.cc

Issue 17203: Periodic merge from bleeding_edge to experimental code generator... (Closed) Base URL: http://v8.googlecode.com/svn/branches/experimental/toiger/

Patch Set: Created 11 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/jsregexp.cc

===================================================================

--- src/jsregexp.cc (revision 1032)

+++ src/jsregexp.cc (working copy)

@@ -267,11 +267,9 @@

} else if (parse_result.tree->IsAtom() &&

!flags.is_ignore_case() &&

parse_result.capture_count == 0) {

- // TODO(lrn) Accept capture_count > 0 on atoms.

RegExpAtom* atom = parse_result.tree->AsAtom();

Vector<const uc16> atom_pattern = atom->data();

- Handle<String> atom_string =

- Factory::NewStringFromTwoByte(atom_pattern);

+ Handle<String> atom_string = Factory::NewStringFromTwoByte(atom_pattern);

result = AtomCompile(re, pattern, flags, atom_string);

} else if (FLAG_irregexp) {

result = IrregexpPrepare(re, pattern, flags);

@@ -375,7 +373,6 @@

return Handle<Smi>(Smi::FromInt(-1));

}

- LOG(RegExpExecEvent(re, start_index, subject));

int value = Runtime::StringMatch(subject, needle, start_index);

if (value == -1) return Factory::null_value();

@@ -395,7 +392,6 @@

int subject_length = subject->length();

int needle_length = needle->length();

while (true) {

- LOG(RegExpExecEvent(re, index, subject));

int value = -1;

if (index + needle_length <= subject_length) {

value = Runtime::StringMatch(subject, needle, index);

@@ -512,8 +508,9 @@

// Throw an exception.

Handle<JSArray> array = Factory::NewJSArray(2);

SetElement(array, 0, pattern);

- SetElement(array, 1, Factory::NewStringFromUtf8(CStrVector(

- (error_message == NULL) ? "Unknown regexp error" : error_message)));

+ const char* message =

+ (error_message == NULL) ? "Unknown regexp error" : error_message;

+ SetElement(array, 1, Factory::NewStringFromUtf8(CStrVector(message)));

Handle<Object> regexp_err =

Factory::NewSyntaxError("malformed_regexp", array);

Top::Throw(*regexp_err);

@@ -576,8 +573,6 @@

reinterpret_cast<v8::jscre::JscreRegExp*>(

internal->GetDataStartAddress());

- LOG(RegExpExecEvent(regexp, previous_index, subject));

rc = v8::jscre::jsRegExpExecute(js_regexp,

two_byte_subject,

subject->length(),

@@ -792,7 +787,6 @@

PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString()));

}

#endif

- LOG(RegExpExecEvent(regexp, previous_index, subject));

if (!subject->IsFlat(StringShape(*subject))) {

FlattenString(subject);

@@ -846,7 +840,6 @@

PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString()));

}

#endif

- LOG(RegExpExecEvent(regexp, previous_index, subject));

matches = IrregexpExecOnce(irregexp,

IrregexpNumberOfCaptures(irregexp),

subject,

@@ -1423,7 +1416,8 @@

cp_offset_ != 0 ||

backtrack() != NULL ||

characters_preloaded_ != 0 ||

- quick_check_performed_.characters() != 0);

+ quick_check_performed_.characters() != 0 ||

+ bound_checked_up_to_ != 0);

if (actions_ == NULL && backtrack() == NULL) {

// Here we just have some deferred cp advances to fix and we are back to

@@ -1647,16 +1641,23 @@

static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,

+ bool ascii,

uc16 c1,

uc16 c2,

Label* on_failure) {

+ uc16 char_mask;

+ if (ascii) {

+ char_mask = String::kMaxAsciiCharCode;

+ } else {

+ char_mask = String::kMaxUC16CharCode;

+ }

uc16 exor = c1 ^ c2;

// Check whether exor has only one bit set.

if (((exor - 1) & exor) == 0) {

// If c1 and c2 differ only by one bit.

// Ecma262UnCanonicalize always gives the highest number last.

ASSERT(c2 > c1);

- uc16 mask = String::kMaxUC16CharCode ^ exor;

+ uc16 mask = char_mask ^ exor;

macro_assembler->CheckNotCharacterAfterAnd(c1, mask, on_failure);

return true;

}

@@ -1667,7 +1668,7 @@

// subtract the difference from the found character, then do the or

// trick. We avoid the theoretical case where negative numbers are

// involved in order to simplify code generation.

- uc16 mask = String::kMaxUC16CharCode ^ diff;

+ uc16 mask = char_mask ^ diff;

macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff,

diff,

mask,

@@ -1682,6 +1683,7 @@

// matches.

static inline bool EmitAtomLetter(

RegExpMacroAssembler* macro_assembler,

+ bool ascii,

uc16 c,

Label* on_failure,

int cp_offset,

@@ -1700,6 +1702,7 @@

switch (length) {

case 2: {

if (ShortCutEmitCharacterPair(macro_assembler,

+ ascii,

chars[0],

chars[1],

on_failure)) {

@@ -1734,6 +1737,14 @@

bool check_offset,

bool ascii,

bool preloaded) {

+ if (cc->is_standard() &&

+ macro_assembler->CheckSpecialCharacterClass(cc->standard_type(),

+ cp_offset,

+ check_offset,

+ on_failure)) {

+ return;

+ }

ZoneList<CharacterRange>* ranges = cc->ranges();

int max_char;

if (ascii) {

@@ -2007,6 +2018,7 @@

char_mask = String::kMaxUC16CharCode;

}

if ((mask & char_mask) == char_mask) need_mask = false;

+ mask &= char_mask;

} else {

// For 2-character preloads in ASCII mode we also use a 16 bit load with

// zero extend.

@@ -2323,6 +2335,7 @@

ASSERT_EQ(pass, CASE_CHARACTER_MATCH);

ASSERT(compiler->ignore_case());

bound_checked = EmitAtomLetter(assembler,

+ compiler->ascii(),

quarks[j],

backtrack,

cp_offset + j,

@@ -2403,9 +2416,7 @@

bool first_elt_done = false;

int bound_checked_to = variant->cp_offset() - 1;

- QuickCheckDetails* quick_check = variant->quick_check_performed();

- bound_checked_to += Max(quick_check->characters(),

- variant->characters_preloaded());

+ bound_checked_to += variant->bound_checked_up_to();

// If a character is preloaded into the current character register then

// check that now.

@@ -2472,6 +2483,7 @@

// characters by means of mask and compare.

quick_check_performed_.Advance(by, ascii);

cp_offset_ += by;

+ bound_checked_up_to_ = Max(0, bound_checked_up_to_ - by);

}

@@ -2779,8 +2791,9 @@

int first_normal_choice = greedy_loop ? 1 : 0;

int preload_characters = CalculatePreloadCharacters(compiler);

- bool preload_is_current = false;

- bool preload_has_checked_bounds = false;

+ bool preload_is_current =

+ (current_variant->characters_preloaded() == preload_characters);

+ bool preload_has_checked_bounds = preload_is_current;

AlternativeGenerationList alt_gens(choice_count);

@@ -2792,11 +2805,13 @@

alt_gen->quick_check_details.set_characters(preload_characters);

ZoneList<Guard*>* guards = alternative.guards();

int guard_count = (guards == NULL) ? 0 : guards->length();

GenerationVariant new_variant(*current_variant);

new_variant.set_characters_preloaded(preload_is_current ?

preload_characters :

0);

+ if (preload_has_checked_bounds) {

+ new_variant.set_bound_checked_up_to(preload_characters);

+ }

new_variant.quick_check_performed()->Clear();

alt_gen->expects_preload = preload_is_current;

bool generate_full_check_inline = false;

@@ -2816,19 +2831,25 @@

macro_assembler->Bind(&alt_gen->possible_success);

new_variant.set_quick_check_performed(&alt_gen->quick_check_details);

new_variant.set_characters_preloaded(preload_characters);

+ new_variant.set_bound_checked_up_to(preload_characters);

generate_full_check_inline = true;

}

} else {

// No quick check was generated. Put the full code here.

+ // If this is not the first choice then there could be slow checks from

+ // previous cases that go here when they fail. There's no reason to

+ // insist that they preload characters since the slow check we are about

+ // to generate probably can't use it.

+ if (i != first_normal_choice) {

+ alt_gen->expects_preload = false;

+ new_variant.set_characters_preloaded(0);

+ }

if (i < choice_count - 1) {

new_variant.set_backtrack(&alt_gen->after);

}

generate_full_check_inline = true;

}

if (generate_full_check_inline) {

- if (preload_is_current) {

- new_variant.set_characters_preloaded(preload_characters);

- }

for (int j = 0; j < guard_count; j++) {

GenerateGuard(macro_assembler, guards->at(j), &new_variant);

}

@@ -3325,7 +3346,23 @@

// -------------------------------------------------------------------

// Tree to graph conversion

+static const int kSpaceRangeCount = 20;

+static const int kSpaceRangeAsciiCount = 4;

+static const uc16 kSpaceRanges[kSpaceRangeCount] = { 0x0009, 0x000D, 0x0020,

+ 0x0020, 0x00A0, 0x00A0, 0x1680, 0x1680, 0x180E, 0x180E, 0x2000, 0x200A,

+ 0x2028, 0x2029, 0x202F, 0x202F, 0x205F, 0x205F, 0x3000, 0x3000 };

+static const int kWordRangeCount = 8;

+static const uc16 kWordRanges[kWordRangeCount] = { '0', '9', 'A', 'Z', '_',

+ '_', 'a', 'z' };

+static const int kDigitRangeCount = 2;

+static const uc16 kDigitRanges[kDigitRangeCount] = { '0', '9' };

+static const int kLineTerminatorRangeCount = 6;

+static const uc16 kLineTerminatorRanges[kLineTerminatorRangeCount] = { 0x000A,

+ 0x000A, 0x000D, 0x000D, 0x2028, 0x2029 };

RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler,

RegExpNode* on_success) {

ZoneList<TextElement>* elms = new ZoneList<TextElement>(1);

@@ -3339,7 +3376,78 @@

return new TextNode(elements(), on_success);

}

+static bool CompareInverseRanges(ZoneList<CharacterRange>* ranges,

+ const uc16* special_class,

+ int length) {

+ ASSERT(ranges->length() != 0);

+ ASSERT(length != 0);

+ ASSERT(special_class[0] != 0);

+ if (ranges->length() != (length >> 1) + 1) {

+ return false;

+ }

+ CharacterRange range = ranges->at(0);

+ if (range.from() != 0) {

+ return false;

+ }

+ for (int i = 0; i < length; i += 2) {

+ if (special_class[i] != (range.to() + 1)) {

+ return false;

+ }

+ range = ranges->at((i >> 1) + 1);

+ if (special_class[i+1] != range.from() - 1) {

+ return false;

+ }

+ if (range.to() != 0xffff) {

+ return false;

+ }

+ return true;

+static bool CompareRanges(ZoneList<CharacterRange>* ranges,

+ const uc16* special_class,

+ int length) {

+ if (ranges->length() * 2 != length) {

+ return false;

+ }

+ for (int i = 0; i < length; i += 2) {

+ CharacterRange range = ranges->at(i >> 1);

+ if (range.from() != special_class[i] || range.to() != special_class[i+1]) {

+ return false;

+ }

+ return true;

+bool RegExpCharacterClass::is_standard() {

+ // TODO(lrn): Remove need for this function, by not throwing away information

+ // along the way.

+ if (is_negated_) {

+ return false;

+ }

+ if (set_.is_standard()) {

+ return true;

+ }

+ if (CompareRanges(set_.ranges(), kSpaceRanges, kSpaceRangeCount)) {

+ set_.set_standard_set_type('s');

+ return true;

+ }

+ if (CompareInverseRanges(set_.ranges(), kSpaceRanges, kSpaceRangeCount)) {

+ set_.set_standard_set_type('S');

+ return true;

+ }

+ if (CompareInverseRanges(set_.ranges(),

+ kLineTerminatorRanges,

+ kLineTerminatorRangeCount)) {

+ set_.set_standard_set_type('.');

+ return true;

+ }

+ return false;

RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,

RegExpNode* on_success) {

return new TextNode(this, on_success);

@@ -3580,32 +3688,6 @@

}

-static const int kSpaceRangeCount = 20;

-static const uc16 kSpaceRanges[kSpaceRangeCount] = {

- 0x0009, 0x000D, 0x0020, 0x0020, 0x00A0, 0x00A0, 0x1680,

- 0x1680, 0x180E, 0x180E, 0x2000, 0x200A, 0x2028, 0x2029,

- 0x202F, 0x202F, 0x205F, 0x205F, 0x3000, 0x3000

-};

-static const int kWordRangeCount = 8;

-static const uc16 kWordRanges[kWordRangeCount] = {

- '0', '9', 'A', 'Z', '_', '_', 'a', 'z'

-};

-static const int kDigitRangeCount = 2;

-static const uc16 kDigitRanges[kDigitRangeCount] = {

- '0', '9'

-};

-static const int kLineTerminatorRangeCount = 6;

-static const uc16 kLineTerminatorRanges[kLineTerminatorRangeCount] = {

- 0x000A, 0x000A, 0x000D, 0x000D, 0x2028, 0x2029

-};

static void AddClass(const uc16* elmv,

int elmc,

ZoneList<CharacterRange>* ranges) {

@@ -3801,6 +3883,16 @@

}

+ZoneList<CharacterRange>* CharacterSet::ranges() {

+ if (ranges_ == NULL) {

+ ranges_ = new ZoneList<CharacterRange>(2);

+ CharacterRange::AddClassEscape(standard_set_type_, ranges_);

+ }

+ return ranges_;

// -------------------------------------------------------------------

// Interest propagation

« no previous file with comments | « src/jsregexp.h ('k') | src/log.h » ('j') | no next file with comments »