Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(70)

Unified Diff: src/jsregexp.cc

Issue 17203: Periodic merge from bleeding_edge to experimental code generator... (Closed) Base URL: http://v8.googlecode.com/svn/branches/experimental/toiger/
Patch Set: Created 11 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/jsregexp.h ('k') | src/log.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/jsregexp.cc
===================================================================
--- src/jsregexp.cc (revision 1032)
+++ src/jsregexp.cc (working copy)
@@ -267,11 +267,9 @@
} else if (parse_result.tree->IsAtom() &&
!flags.is_ignore_case() &&
parse_result.capture_count == 0) {
- // TODO(lrn) Accept capture_count > 0 on atoms.
RegExpAtom* atom = parse_result.tree->AsAtom();
Vector<const uc16> atom_pattern = atom->data();
- Handle<String> atom_string =
- Factory::NewStringFromTwoByte(atom_pattern);
+ Handle<String> atom_string = Factory::NewStringFromTwoByte(atom_pattern);
result = AtomCompile(re, pattern, flags, atom_string);
} else if (FLAG_irregexp) {
result = IrregexpPrepare(re, pattern, flags);
@@ -375,7 +373,6 @@
return Handle<Smi>(Smi::FromInt(-1));
}
- LOG(RegExpExecEvent(re, start_index, subject));
int value = Runtime::StringMatch(subject, needle, start_index);
if (value == -1) return Factory::null_value();
@@ -395,7 +392,6 @@
int subject_length = subject->length();
int needle_length = needle->length();
while (true) {
- LOG(RegExpExecEvent(re, index, subject));
int value = -1;
if (index + needle_length <= subject_length) {
value = Runtime::StringMatch(subject, needle, index);
@@ -512,8 +508,9 @@
// Throw an exception.
Handle<JSArray> array = Factory::NewJSArray(2);
SetElement(array, 0, pattern);
- SetElement(array, 1, Factory::NewStringFromUtf8(CStrVector(
- (error_message == NULL) ? "Unknown regexp error" : error_message)));
+ const char* message =
+ (error_message == NULL) ? "Unknown regexp error" : error_message;
+ SetElement(array, 1, Factory::NewStringFromUtf8(CStrVector(message)));
Handle<Object> regexp_err =
Factory::NewSyntaxError("malformed_regexp", array);
Top::Throw(*regexp_err);
@@ -576,8 +573,6 @@
reinterpret_cast<v8::jscre::JscreRegExp*>(
internal->GetDataStartAddress());
- LOG(RegExpExecEvent(regexp, previous_index, subject));
-
rc = v8::jscre::jsRegExpExecute(js_regexp,
two_byte_subject,
subject->length(),
@@ -792,7 +787,6 @@
PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString()));
}
#endif
- LOG(RegExpExecEvent(regexp, previous_index, subject));
if (!subject->IsFlat(StringShape(*subject))) {
FlattenString(subject);
@@ -846,7 +840,6 @@
PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString()));
}
#endif
- LOG(RegExpExecEvent(regexp, previous_index, subject));
matches = IrregexpExecOnce(irregexp,
IrregexpNumberOfCaptures(irregexp),
subject,
@@ -1423,7 +1416,8 @@
cp_offset_ != 0 ||
backtrack() != NULL ||
characters_preloaded_ != 0 ||
- quick_check_performed_.characters() != 0);
+ quick_check_performed_.characters() != 0 ||
+ bound_checked_up_to_ != 0);
if (actions_ == NULL && backtrack() == NULL) {
// Here we just have some deferred cp advances to fix and we are back to
@@ -1647,16 +1641,23 @@
static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,
+ bool ascii,
uc16 c1,
uc16 c2,
Label* on_failure) {
+ uc16 char_mask;
+ if (ascii) {
+ char_mask = String::kMaxAsciiCharCode;
+ } else {
+ char_mask = String::kMaxUC16CharCode;
+ }
uc16 exor = c1 ^ c2;
// Check whether exor has only one bit set.
if (((exor - 1) & exor) == 0) {
// If c1 and c2 differ only by one bit.
// Ecma262UnCanonicalize always gives the highest number last.
ASSERT(c2 > c1);
- uc16 mask = String::kMaxUC16CharCode ^ exor;
+ uc16 mask = char_mask ^ exor;
macro_assembler->CheckNotCharacterAfterAnd(c1, mask, on_failure);
return true;
}
@@ -1667,7 +1668,7 @@
// subtract the difference from the found character, then do the or
// trick. We avoid the theoretical case where negative numbers are
// involved in order to simplify code generation.
- uc16 mask = String::kMaxUC16CharCode ^ diff;
+ uc16 mask = char_mask ^ diff;
macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff,
diff,
mask,
@@ -1682,6 +1683,7 @@
// matches.
static inline bool EmitAtomLetter(
RegExpMacroAssembler* macro_assembler,
+ bool ascii,
uc16 c,
Label* on_failure,
int cp_offset,
@@ -1700,6 +1702,7 @@
switch (length) {
case 2: {
if (ShortCutEmitCharacterPair(macro_assembler,
+ ascii,
chars[0],
chars[1],
on_failure)) {
@@ -1734,6 +1737,14 @@
bool check_offset,
bool ascii,
bool preloaded) {
+ if (cc->is_standard() &&
+ macro_assembler->CheckSpecialCharacterClass(cc->standard_type(),
+ cp_offset,
+ check_offset,
+ on_failure)) {
+ return;
+ }
+
ZoneList<CharacterRange>* ranges = cc->ranges();
int max_char;
if (ascii) {
@@ -2007,6 +2018,7 @@
char_mask = String::kMaxUC16CharCode;
}
if ((mask & char_mask) == char_mask) need_mask = false;
+ mask &= char_mask;
} else {
// For 2-character preloads in ASCII mode we also use a 16 bit load with
// zero extend.
@@ -2323,6 +2335,7 @@
ASSERT_EQ(pass, CASE_CHARACTER_MATCH);
ASSERT(compiler->ignore_case());
bound_checked = EmitAtomLetter(assembler,
+ compiler->ascii(),
quarks[j],
backtrack,
cp_offset + j,
@@ -2403,9 +2416,7 @@
bool first_elt_done = false;
int bound_checked_to = variant->cp_offset() - 1;
- QuickCheckDetails* quick_check = variant->quick_check_performed();
- bound_checked_to += Max(quick_check->characters(),
- variant->characters_preloaded());
+ bound_checked_to += variant->bound_checked_up_to();
// If a character is preloaded into the current character register then
// check that now.
@@ -2472,6 +2483,7 @@
// characters by means of mask and compare.
quick_check_performed_.Advance(by, ascii);
cp_offset_ += by;
+ bound_checked_up_to_ = Max(0, bound_checked_up_to_ - by);
}
@@ -2779,8 +2791,9 @@
int first_normal_choice = greedy_loop ? 1 : 0;
int preload_characters = CalculatePreloadCharacters(compiler);
- bool preload_is_current = false;
- bool preload_has_checked_bounds = false;
+ bool preload_is_current =
+ (current_variant->characters_preloaded() == preload_characters);
+ bool preload_has_checked_bounds = preload_is_current;
AlternativeGenerationList alt_gens(choice_count);
@@ -2792,11 +2805,13 @@
alt_gen->quick_check_details.set_characters(preload_characters);
ZoneList<Guard*>* guards = alternative.guards();
int guard_count = (guards == NULL) ? 0 : guards->length();
-
GenerationVariant new_variant(*current_variant);
new_variant.set_characters_preloaded(preload_is_current ?
preload_characters :
0);
+ if (preload_has_checked_bounds) {
+ new_variant.set_bound_checked_up_to(preload_characters);
+ }
new_variant.quick_check_performed()->Clear();
alt_gen->expects_preload = preload_is_current;
bool generate_full_check_inline = false;
@@ -2816,19 +2831,25 @@
macro_assembler->Bind(&alt_gen->possible_success);
new_variant.set_quick_check_performed(&alt_gen->quick_check_details);
new_variant.set_characters_preloaded(preload_characters);
+ new_variant.set_bound_checked_up_to(preload_characters);
generate_full_check_inline = true;
}
} else {
// No quick check was generated. Put the full code here.
+ // If this is not the first choice then there could be slow checks from
+ // previous cases that go here when they fail. There's no reason to
+ // insist that they preload characters since the slow check we are about
+ // to generate probably can't use it.
+ if (i != first_normal_choice) {
+ alt_gen->expects_preload = false;
+ new_variant.set_characters_preloaded(0);
+ }
if (i < choice_count - 1) {
new_variant.set_backtrack(&alt_gen->after);
}
generate_full_check_inline = true;
}
if (generate_full_check_inline) {
- if (preload_is_current) {
- new_variant.set_characters_preloaded(preload_characters);
- }
for (int j = 0; j < guard_count; j++) {
GenerateGuard(macro_assembler, guards->at(j), &new_variant);
}
@@ -3325,7 +3346,23 @@
// -------------------------------------------------------------------
// Tree to graph conversion
+static const int kSpaceRangeCount = 20;
+static const int kSpaceRangeAsciiCount = 4;
+static const uc16 kSpaceRanges[kSpaceRangeCount] = { 0x0009, 0x000D, 0x0020,
+ 0x0020, 0x00A0, 0x00A0, 0x1680, 0x1680, 0x180E, 0x180E, 0x2000, 0x200A,
+ 0x2028, 0x2029, 0x202F, 0x202F, 0x205F, 0x205F, 0x3000, 0x3000 };
+static const int kWordRangeCount = 8;
+static const uc16 kWordRanges[kWordRangeCount] = { '0', '9', 'A', 'Z', '_',
+ '_', 'a', 'z' };
+
+static const int kDigitRangeCount = 2;
+static const uc16 kDigitRanges[kDigitRangeCount] = { '0', '9' };
+
+static const int kLineTerminatorRangeCount = 6;
+static const uc16 kLineTerminatorRanges[kLineTerminatorRangeCount] = { 0x000A,
+ 0x000A, 0x000D, 0x000D, 0x2028, 0x2029 };
+
RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success) {
ZoneList<TextElement>* elms = new ZoneList<TextElement>(1);
@@ -3339,7 +3376,78 @@
return new TextNode(elements(), on_success);
}
+static bool CompareInverseRanges(ZoneList<CharacterRange>* ranges,
+ const uc16* special_class,
+ int length) {
+ ASSERT(ranges->length() != 0);
+ ASSERT(length != 0);
+ ASSERT(special_class[0] != 0);
+ if (ranges->length() != (length >> 1) + 1) {
+ return false;
+ }
+ CharacterRange range = ranges->at(0);
+ if (range.from() != 0) {
+ return false;
+ }
+ for (int i = 0; i < length; i += 2) {
+ if (special_class[i] != (range.to() + 1)) {
+ return false;
+ }
+ range = ranges->at((i >> 1) + 1);
+ if (special_class[i+1] != range.from() - 1) {
+ return false;
+ }
+ }
+ if (range.to() != 0xffff) {
+ return false;
+ }
+ return true;
+}
+
+static bool CompareRanges(ZoneList<CharacterRange>* ranges,
+ const uc16* special_class,
+ int length) {
+ if (ranges->length() * 2 != length) {
+ return false;
+ }
+ for (int i = 0; i < length; i += 2) {
+ CharacterRange range = ranges->at(i >> 1);
+ if (range.from() != special_class[i] || range.to() != special_class[i+1]) {
+ return false;
+ }
+ }
+ return true;
+}
+
+
+bool RegExpCharacterClass::is_standard() {
+ // TODO(lrn): Remove need for this function, by not throwing away information
+ // along the way.
+ if (is_negated_) {
+ return false;
+ }
+ if (set_.is_standard()) {
+ return true;
+ }
+ if (CompareRanges(set_.ranges(), kSpaceRanges, kSpaceRangeCount)) {
+ set_.set_standard_set_type('s');
+ return true;
+ }
+ if (CompareInverseRanges(set_.ranges(), kSpaceRanges, kSpaceRangeCount)) {
+ set_.set_standard_set_type('S');
+ return true;
+ }
+ if (CompareInverseRanges(set_.ranges(),
+ kLineTerminatorRanges,
+ kLineTerminatorRangeCount)) {
+ set_.set_standard_set_type('.');
+ return true;
+ }
+ return false;
+}
+
+
RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success) {
return new TextNode(this, on_success);
@@ -3580,32 +3688,6 @@
}
-static const int kSpaceRangeCount = 20;
-static const uc16 kSpaceRanges[kSpaceRangeCount] = {
- 0x0009, 0x000D, 0x0020, 0x0020, 0x00A0, 0x00A0, 0x1680,
- 0x1680, 0x180E, 0x180E, 0x2000, 0x200A, 0x2028, 0x2029,
- 0x202F, 0x202F, 0x205F, 0x205F, 0x3000, 0x3000
-};
-
-
-static const int kWordRangeCount = 8;
-static const uc16 kWordRanges[kWordRangeCount] = {
- '0', '9', 'A', 'Z', '_', '_', 'a', 'z'
-};
-
-
-static const int kDigitRangeCount = 2;
-static const uc16 kDigitRanges[kDigitRangeCount] = {
- '0', '9'
-};
-
-
-static const int kLineTerminatorRangeCount = 6;
-static const uc16 kLineTerminatorRanges[kLineTerminatorRangeCount] = {
- 0x000A, 0x000A, 0x000D, 0x000D, 0x2028, 0x2029
-};
-
-
static void AddClass(const uc16* elmv,
int elmc,
ZoneList<CharacterRange>* ranges) {
@@ -3801,6 +3883,16 @@
}
+ZoneList<CharacterRange>* CharacterSet::ranges() {
+ if (ranges_ == NULL) {
+ ranges_ = new ZoneList<CharacterRange>(2);
+ CharacterRange::AddClassEscape(standard_set_type_, ranges_);
+ }
+ return ranges_;
+}
+
+
+
// -------------------------------------------------------------------
// Interest propagation
« no previous file with comments | « src/jsregexp.h ('k') | src/log.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698