Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(165)

Unified Diff: src/jsregexp.cc

Issue 559913002: Rename ascii to one-byte where applicable. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: Created 6 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/jsregexp.h ('k') | src/liveedit.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/jsregexp.cc
diff --git a/src/jsregexp.cc b/src/jsregexp.cc
index 8a94e819e2c1cbbc04e79e904b80b2e6fef4acdf..0d9f83af37ba48b7ee7b2cf24e81e3ed34d151ab 100644
--- a/src/jsregexp.cc
+++ b/src/jsregexp.cc
@@ -290,25 +290,18 @@ int RegExpImpl::AtomExecRaw(Handle<JSRegExp> regexp,
DCHECK(needle_content.IsFlat());
DCHECK(subject_content.IsFlat());
// dispatch on type of strings
- index = (needle_content.IsAscii()
- ? (subject_content.IsAscii()
- ? SearchString(isolate,
- subject_content.ToOneByteVector(),
- needle_content.ToOneByteVector(),
- index)
- : SearchString(isolate,
- subject_content.ToUC16Vector(),
- needle_content.ToOneByteVector(),
- index))
- : (subject_content.IsAscii()
- ? SearchString(isolate,
- subject_content.ToOneByteVector(),
- needle_content.ToUC16Vector(),
- index)
- : SearchString(isolate,
- subject_content.ToUC16Vector(),
- needle_content.ToUC16Vector(),
- index)));
+ index =
+ (needle_content.IsOneByte()
+ ? (subject_content.IsOneByte()
+ ? SearchString(isolate, subject_content.ToOneByteVector(),
+ needle_content.ToOneByteVector(), index)
+ : SearchString(isolate, subject_content.ToUC16Vector(),
+ needle_content.ToOneByteVector(), index))
+ : (subject_content.IsOneByte()
+ ? SearchString(isolate, subject_content.ToOneByteVector(),
+ needle_content.ToUC16Vector(), index)
+ : SearchString(isolate, subject_content.ToUC16Vector(),
+ needle_content.ToUC16Vector(), index)));
if (index == -1) {
return i / 2; // Return number of matches.
} else {
@@ -346,14 +339,15 @@ Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re,
// Irregexp implementation.
// Ensures that the regexp object contains a compiled version of the
-// source for either ASCII or non-ASCII strings.
+// source for either one-byte or two-byte subject strings.
// If the compiled version doesn't already exist, it is compiled
// from the source pattern.
// If compilation fails, an exception is thrown and this function
// returns false.
-bool RegExpImpl::EnsureCompiledIrregexp(
- Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii) {
- Object* compiled_code = re->DataAt(JSRegExp::code_index(is_ascii));
+bool RegExpImpl::EnsureCompiledIrregexp(Handle<JSRegExp> re,
+ Handle<String> sample_subject,
+ bool is_one_byte) {
+ Object* compiled_code = re->DataAt(JSRegExp::code_index(is_one_byte));
#ifdef V8_INTERPRETED_REGEXP
if (compiled_code->IsByteArray()) return true;
#else // V8_INTERPRETED_REGEXP (RegExp native code)
@@ -361,18 +355,18 @@ bool RegExpImpl::EnsureCompiledIrregexp(
#endif
// We could potentially have marked this as flushable, but have kept
// a saved version if we did not flush it yet.
- Object* saved_code = re->DataAt(JSRegExp::saved_code_index(is_ascii));
+ Object* saved_code = re->DataAt(JSRegExp::saved_code_index(is_one_byte));
if (saved_code->IsCode()) {
// Reinstate the code in the original place.
- re->SetDataAt(JSRegExp::code_index(is_ascii), saved_code);
+ re->SetDataAt(JSRegExp::code_index(is_one_byte), saved_code);
DCHECK(compiled_code->IsSmi());
return true;
}
- return CompileIrregexp(re, sample_subject, is_ascii);
+ return CompileIrregexp(re, sample_subject, is_one_byte);
}
-static void CreateRegExpErrorObjectAndThrow(Handle<JSRegExp> re, bool is_ascii,
+static void CreateRegExpErrorObjectAndThrow(Handle<JSRegExp> re,
Handle<String> error_message,
Isolate* isolate) {
Factory* factory = isolate->factory();
@@ -389,14 +383,14 @@ static void CreateRegExpErrorObjectAndThrow(Handle<JSRegExp> re, bool is_ascii,
bool RegExpImpl::CompileIrregexp(Handle<JSRegExp> re,
Handle<String> sample_subject,
- bool is_ascii) {
+ bool is_one_byte) {
// Compile the RegExp.
Isolate* isolate = re->GetIsolate();
Zone zone(isolate);
PostponeInterruptsScope postpone(isolate);
// If we had a compilation error the last time this is saved at the
// saved code index.
- Object* entry = re->DataAt(JSRegExp::code_index(is_ascii));
+ Object* entry = re->DataAt(JSRegExp::code_index(is_one_byte));
// When arriving here entry can only be a smi, either representing an
// uncompiled regexp, a previous compilation error, or code that has
// been flushed.
@@ -410,10 +404,10 @@ bool RegExpImpl::CompileIrregexp(Handle<JSRegExp> re,
// A previous compilation failed and threw an error which we store in
// the saved code index (we store the error message, not the actual
// error). Recreate the error object and throw it.
- Object* error_string = re->DataAt(JSRegExp::saved_code_index(is_ascii));
+ Object* error_string = re->DataAt(JSRegExp::saved_code_index(is_one_byte));
DCHECK(error_string->IsString());
Handle<String> error_message(String::cast(error_string));
- CreateRegExpErrorObjectAndThrow(re, is_ascii, error_message, isolate);
+ CreateRegExpErrorObjectAndThrow(re, error_message, isolate);
return false;
}
@@ -434,25 +428,19 @@ bool RegExpImpl::CompileIrregexp(Handle<JSRegExp> re,
"malformed_regexp"));
return false;
}
- RegExpEngine::CompilationResult result =
- RegExpEngine::Compile(&compile_data,
- flags.is_ignore_case(),
- flags.is_global(),
- flags.is_multiline(),
- pattern,
- sample_subject,
- is_ascii,
- &zone);
+ RegExpEngine::CompilationResult result = RegExpEngine::Compile(
+ &compile_data, flags.is_ignore_case(), flags.is_global(),
+ flags.is_multiline(), pattern, sample_subject, is_one_byte, &zone);
if (result.error_message != NULL) {
// Unable to compile regexp.
Handle<String> error_message = isolate->factory()->NewStringFromUtf8(
CStrVector(result.error_message)).ToHandleChecked();
- CreateRegExpErrorObjectAndThrow(re, is_ascii, error_message, isolate);
+ CreateRegExpErrorObjectAndThrow(re, error_message, isolate);
return false;
}
Handle<FixedArray> data = Handle<FixedArray>(FixedArray::cast(re->data()));
- data->set(JSRegExp::code_index(is_ascii), result.code);
+ data->set(JSRegExp::code_index(is_one_byte), result.code);
int register_max = IrregexpMaxRegisterCount(*data);
if (result.num_registers > register_max) {
SetIrregexpMaxRegisterCount(*data, result.num_registers);
@@ -483,13 +471,13 @@ int RegExpImpl::IrregexpNumberOfRegisters(FixedArray* re) {
}
-ByteArray* RegExpImpl::IrregexpByteCode(FixedArray* re, bool is_ascii) {
- return ByteArray::cast(re->get(JSRegExp::code_index(is_ascii)));
+ByteArray* RegExpImpl::IrregexpByteCode(FixedArray* re, bool is_one_byte) {
+ return ByteArray::cast(re->get(JSRegExp::code_index(is_one_byte)));
}
-Code* RegExpImpl::IrregexpNativeCode(FixedArray* re, bool is_ascii) {
- return Code::cast(re->get(JSRegExp::code_index(is_ascii)));
+Code* RegExpImpl::IrregexpNativeCode(FixedArray* re, bool is_one_byte) {
+ return Code::cast(re->get(JSRegExp::code_index(is_one_byte)));
}
@@ -510,9 +498,9 @@ int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp,
Handle<String> subject) {
subject = String::Flatten(subject);
- // Check the asciiness of the underlying storage.
- bool is_ascii = subject->IsOneByteRepresentationUnderneath();
- if (!EnsureCompiledIrregexp(regexp, subject, is_ascii)) return -1;
+ // Check representation of the underlying storage.
+ bool is_one_byte = subject->IsOneByteRepresentationUnderneath();
+ if (!EnsureCompiledIrregexp(regexp, subject, is_one_byte)) return -1;
#ifdef V8_INTERPRETED_REGEXP
// Byte-code regexp needs space allocated for all its registers.
@@ -542,13 +530,13 @@ int RegExpImpl::IrregexpExecRaw(Handle<JSRegExp> regexp,
DCHECK(index <= subject->length());
DCHECK(subject->IsFlat());
- bool is_ascii = subject->IsOneByteRepresentationUnderneath();
+ bool is_one_byte = subject->IsOneByteRepresentationUnderneath();
#ifndef V8_INTERPRETED_REGEXP
DCHECK(output_size >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2);
do {
- EnsureCompiledIrregexp(regexp, subject, is_ascii);
- Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii), isolate);
+ EnsureCompiledIrregexp(regexp, subject, is_one_byte);
+ Handle<Code> code(IrregexpNativeCode(*irregexp, is_one_byte), isolate);
// The stack is used to allocate registers for the compiled regexp code.
// This means that in case of failure, the output registers array is left
// untouched and contains the capture results from the previous successful
@@ -575,10 +563,10 @@ int RegExpImpl::IrregexpExecRaw(Handle<JSRegExp> regexp,
// must restart from scratch.
// In this case, it means we must make sure we are prepared to handle
// the, potentially, different subject (the string can switch between
- // being internal and external, and even between being ASCII and UC16,
+ // being internal and external, and even between being Latin1 and UC16,
// but the characters are always the same).
IrregexpPrepare(regexp, subject);
- is_ascii = subject->IsOneByteRepresentationUnderneath();
+ is_one_byte = subject->IsOneByteRepresentationUnderneath();
} while (true);
UNREACHABLE();
return RE_EXCEPTION;
@@ -596,7 +584,8 @@ int RegExpImpl::IrregexpExecRaw(Handle<JSRegExp> regexp,
for (int i = number_of_capture_registers - 1; i >= 0; i--) {
raw_output[i] = -1;
}
- Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_ascii), isolate);
+ Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_one_byte),
+ isolate);
IrregexpResult result = IrregexpInterpreter::Match(isolate,
byte_codes,
@@ -997,7 +986,7 @@ class FrequencyCollator {
class RegExpCompiler {
public:
- RegExpCompiler(int capture_count, bool ignore_case, bool is_ascii,
+ RegExpCompiler(int capture_count, bool ignore_case, bool is_one_byte,
Zone* zone);
int AllocateRegister() {
@@ -1030,7 +1019,7 @@ class RegExpCompiler {
void SetRegExpTooBig() { reg_exp_too_big_ = true; }
inline bool ignore_case() { return ignore_case_; }
- inline bool ascii() { return ascii_; }
+ inline bool one_byte() { return one_byte_; }
FrequencyCollator* frequency_collator() { return &frequency_collator_; }
int current_expansion_factor() { return current_expansion_factor_; }
@@ -1049,7 +1038,7 @@ class RegExpCompiler {
int recursion_depth_;
RegExpMacroAssembler* macro_assembler_;
bool ignore_case_;
- bool ascii_;
+ bool one_byte_;
bool reg_exp_too_big_;
int current_expansion_factor_;
FrequencyCollator frequency_collator_;
@@ -1075,13 +1064,13 @@ static RegExpEngine::CompilationResult IrregexpRegExpTooBig(Isolate* isolate) {
// Attempts to compile the regexp using an Irregexp code generator. Returns
// a fixed array or a null handle depending on whether it succeeded.
-RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case, bool ascii,
- Zone* zone)
+RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case,
+ bool one_byte, Zone* zone)
: next_register_(2 * (capture_count + 1)),
work_list_(NULL),
recursion_depth_(0),
ignore_case_(ignore_case),
- ascii_(ascii),
+ one_byte_(one_byte),
reg_exp_too_big_(false),
current_expansion_factor_(1),
frequency_collator_(),
@@ -1592,9 +1581,8 @@ void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler,
// Returns the number of characters in the equivalence class, omitting those
// that cannot occur in the source string because it is ASCII.
-static int GetCaseIndependentLetters(Isolate* isolate,
- uc16 character,
- bool ascii_subject,
+static int GetCaseIndependentLetters(Isolate* isolate, uc16 character,
+ bool one_byte_subject,
unibrow::uchar* letters) {
int length =
isolate->jsregexp_uncanonicalize()->get(character, '\0', letters);
@@ -1604,11 +1592,14 @@ static int GetCaseIndependentLetters(Isolate* isolate,
letters[0] = character;
length = 1;
}
- if (!ascii_subject || character <= String::kMaxOneByteCharCode) {
+ if (!one_byte_subject || character <= String::kMaxOneByteCharCode) {
return length;
}
+
// The standard requires that non-ASCII characters cannot have ASCII
// character codes in their equivalence class.
+ // TODO(dcarney): issue 3550 this is not actually true for Latin1 anymore,
+ // is it? For example, \u00C5 is equivalent to \u212B.
Yang 2014/09/10 08:26:36 This is one of the TODOs I mentioned.
dcarney 2014/09/10 09:35:12 I checked other browsers I think originally, and w
return 0;
}
@@ -1644,18 +1635,19 @@ static inline bool EmitAtomNonLetter(Isolate* isolate,
bool check,
bool preloaded) {
RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
- bool ascii = compiler->ascii();
+ bool one_byte = compiler->one_byte();
unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
- int length = GetCaseIndependentLetters(isolate, c, ascii, chars);
+ int length = GetCaseIndependentLetters(isolate, c, one_byte, chars);
if (length < 1) {
- // This can't match. Must be an ASCII subject and a non-ASCII character.
- // We do not need to do anything since the ASCII pass already handled this.
+ // This can't match. Must be an one-byte subject and a non-one-byte
+ // character. We do not need to do anything since the one-byte pass
+ // already handled this.
return false; // Bounds not checked.
}
bool checked = false;
// We handle the length > 1 case in a later pass.
if (length == 1) {
- if (ascii && c > String::kMaxOneByteCharCodeU) {
+ if (one_byte && c > String::kMaxOneByteCharCodeU) {
// Can't match - see above.
return false; // Bounds not checked.
}
@@ -1670,12 +1662,10 @@ static inline bool EmitAtomNonLetter(Isolate* isolate,
static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,
- bool ascii,
- uc16 c1,
- uc16 c2,
+ bool one_byte, uc16 c1, uc16 c2,
Label* on_failure) {
uc16 char_mask;
- if (ascii) {
+ if (one_byte) {
char_mask = String::kMaxOneByteCharCode;
} else {
char_mask = String::kMaxUtf16CodeUnit;
@@ -1726,9 +1716,9 @@ static inline bool EmitAtomLetter(Isolate* isolate,
bool check,
bool preloaded) {
RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
- bool ascii = compiler->ascii();
+ bool one_byte = compiler->one_byte();
unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
- int length = GetCaseIndependentLetters(isolate, c, ascii, chars);
+ int length = GetCaseIndependentLetters(isolate, c, one_byte, chars);
if (length <= 1) return false;
// We may not need to check against the end of the input string
// if this character lies before a character that matched.
@@ -1739,11 +1729,8 @@ static inline bool EmitAtomLetter(Isolate* isolate,
DCHECK(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4);
switch (length) {
case 2: {
- if (ShortCutEmitCharacterPair(macro_assembler,
- ascii,
- chars[0],
- chars[1],
- on_failure)) {
+ if (ShortCutEmitCharacterPair(macro_assembler, one_byte, chars[0],
+ chars[1], on_failure)) {
} else {
macro_assembler->CheckCharacter(chars[0], &ok);
macro_assembler->CheckNotCharacter(chars[1], on_failure);
@@ -1918,7 +1905,7 @@ static void SplitSearchSpace(ZoneList<int>* ranges,
// new_start_index is the index of the first edge that is beyond the
// current kSize space.
- // For very large search spaces we do a binary chop search of the non-ASCII
+ // For very large search spaces we do a binary chop search of the non-Latin1
// space instead of just going to the end of the current kSize space. The
// heuristics are complicated a little by the fact that any 128-character
// encoding space can be quickly tested with a table lookup, so we don't
@@ -1927,14 +1914,13 @@ static void SplitSearchSpace(ZoneList<int>* ranges,
// for example, we only want to match every second character (eg. the lower
// case characters on some Unicode pages).
int binary_chop_index = (end_index + start_index) / 2;
- // The first test ensures that we get to the code that handles the ASCII
+ // The first test ensures that we get to the code that handles the Latin1
// range with a single not-taken branch, speeding up this important
- // character range (even non-ASCII charset-based text has spaces and
+ // character range (even non-Latin1 charset-based text has spaces and
// punctuation).
- if (*border - 1 > String::kMaxOneByteCharCode && // ASCII case.
+ if (*border - 1 > String::kMaxOneByteCharCode && // Latin1 case.
end_index - start_index > (*new_start_index - start_index) * 2 &&
- last - first > kSize * 2 &&
- binary_chop_index > *new_start_index &&
+ last - first > kSize * 2 && binary_chop_index > *new_start_index &&
ranges->at(binary_chop_index) >= first + 2 * kSize) {
int scan_forward_for_section_border = binary_chop_index;;
int new_border = (ranges->at(binary_chop_index) | kMask) + 1;
@@ -2121,20 +2107,16 @@ static void GenerateBranches(RegExpMacroAssembler* masm,
static void EmitCharClass(RegExpMacroAssembler* macro_assembler,
- RegExpCharacterClass* cc,
- bool ascii,
- Label* on_failure,
- int cp_offset,
- bool check_offset,
- bool preloaded,
- Zone* zone) {
+ RegExpCharacterClass* cc, bool one_byte,
+ Label* on_failure, int cp_offset, bool check_offset,
+ bool preloaded, Zone* zone) {
ZoneList<CharacterRange>* ranges = cc->ranges(zone);
if (!CharacterRange::IsCanonical(ranges)) {
CharacterRange::Canonicalize(ranges);
}
int max_char;
- if (ascii) {
+ if (one_byte) {
max_char = String::kMaxOneByteCharCode;
} else {
max_char = String::kMaxUtf16CodeUnit;
@@ -2464,7 +2446,7 @@ bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler,
GetQuickCheckDetails(
details, compiler, 0, trace->at_start() == Trace::FALSE_VALUE);
if (details->cannot_match()) return false;
- if (!details->Rationalize(compiler->ascii())) return false;
+ if (!details->Rationalize(compiler->one_byte())) return false;
DCHECK(details->characters() == 1 ||
compiler->macro_assembler()->CanReadUnaligned());
uint32_t mask = details->mask();
@@ -2486,7 +2468,7 @@ bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler,
// If number of characters preloaded is 1 then we used a byte or 16 bit
// load so the value is already masked down.
uint32_t char_mask;
- if (compiler->ascii()) {
+ if (compiler->one_byte()) {
char_mask = String::kMaxOneByteCharCode;
} else {
char_mask = String::kMaxUtf16CodeUnit;
@@ -2494,11 +2476,11 @@ bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler,
if ((mask & char_mask) == char_mask) need_mask = false;
mask &= char_mask;
} else {
- // For 2-character preloads in ASCII mode or 1-character preloads in
- // TWO_BYTE mode we also use a 16 bit load with zero extend.
- if (details->characters() == 2 && compiler->ascii()) {
+ // For 2-character preloads in one-byte mode or 1-character preloads in
+ // two-byte mode we also use a 16 bit load with zero extend.
+ if (details->characters() == 2 && compiler->one_byte()) {
if ((mask & 0xffff) == 0xffff) need_mask = false;
- } else if (details->characters() == 1 && !compiler->ascii()) {
+ } else if (details->characters() == 1 && !compiler->one_byte()) {
if ((mask & 0xffff) == 0xffff) need_mask = false;
} else {
if (mask == 0xffffffff) need_mask = false;
@@ -2538,7 +2520,7 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
DCHECK(characters_filled_in < details->characters());
int characters = details->characters();
int char_mask;
- if (compiler->ascii()) {
+ if (compiler->one_byte()) {
char_mask = String::kMaxOneByteCharCode;
} else {
char_mask = String::kMaxUtf16CodeUnit;
@@ -2552,18 +2534,20 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
details->positions(characters_filled_in);
uc16 c = quarks[i];
if (c > char_mask) {
- // If we expect a non-ASCII character from an ASCII string,
- // there is no way we can match. Not even case independent
- // matching can turn an ASCII character into non-ASCII or
+ // If we expect a non-Latin1 character from an one-byte string,
+ // there is no way we can match. Not even case-independent
+ // matching can turn an Latin1 character into non-Latin1 or
// vice versa.
+ // TODO(dcarney): issue 3550. Verify that this works as expected.
+ // For example, \u0178 is uppercase of \u00ff (y-umlaut).
Yang 2014/09/10 08:26:36 This is the other.
details->set_cannot_match();
pos->determines_perfectly = false;
return;
}
if (compiler->ignore_case()) {
unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
- int length = GetCaseIndependentLetters(isolate, c, compiler->ascii(),
- chars);
+ int length = GetCaseIndependentLetters(isolate, c,
+ compiler->one_byte(), chars);
DCHECK(length != 0); // Can only happen if c > char_mask (see above).
if (length == 1) {
// This letter has no case equivalents, so it's nice and simple
@@ -2692,7 +2676,7 @@ void QuickCheckDetails::Clear() {
}
-void QuickCheckDetails::Advance(int by, bool ascii) {
+void QuickCheckDetails::Advance(int by, bool one_byte) {
DCHECK(by >= 0);
if (by >= characters_) {
Clear();
@@ -2756,7 +2740,7 @@ class VisitMarker {
};
-RegExpNode* SeqRegExpNode::FilterASCII(int depth, bool ignore_case) {
+RegExpNode* SeqRegExpNode::FilterOneByte(int depth, bool ignore_case) {
if (info()->replacement_calculated) return replacement();
if (depth < 0) return this;
DCHECK(!info()->visited);
@@ -2766,7 +2750,7 @@ RegExpNode* SeqRegExpNode::FilterASCII(int depth, bool ignore_case) {
RegExpNode* SeqRegExpNode::FilterSuccessor(int depth, bool ignore_case) {
- RegExpNode* next = on_success_->FilterASCII(depth - 1, ignore_case);
+ RegExpNode* next = on_success_->FilterOneByte(depth - 1, ignore_case);
if (next == NULL) return set_replacement(NULL);
on_success_ = next;
return set_replacement(this);
@@ -2790,7 +2774,7 @@ static bool RangesContainLatin1Equivalents(ZoneList<CharacterRange>* ranges) {
}
-RegExpNode* TextNode::FilterASCII(int depth, bool ignore_case) {
+RegExpNode* TextNode::FilterOneByte(int depth, bool ignore_case) {
if (info()->replacement_calculated) return replacement();
if (depth < 0) return this;
DCHECK(!info()->visited);
@@ -2844,7 +2828,7 @@ RegExpNode* TextNode::FilterASCII(int depth, bool ignore_case) {
}
-RegExpNode* LoopChoiceNode::FilterASCII(int depth, bool ignore_case) {
+RegExpNode* LoopChoiceNode::FilterOneByte(int depth, bool ignore_case) {
if (info()->replacement_calculated) return replacement();
if (depth < 0) return this;
if (info()->visited) return this;
@@ -2852,17 +2836,17 @@ RegExpNode* LoopChoiceNode::FilterASCII(int depth, bool ignore_case) {
VisitMarker marker(info());
RegExpNode* continue_replacement =
- continue_node_->FilterASCII(depth - 1, ignore_case);
+ continue_node_->FilterOneByte(depth - 1, ignore_case);
// If we can't continue after the loop then there is no sense in doing the
// loop.
if (continue_replacement == NULL) return set_replacement(NULL);
}
- return ChoiceNode::FilterASCII(depth - 1, ignore_case);
+ return ChoiceNode::FilterOneByte(depth - 1, ignore_case);
}
-RegExpNode* ChoiceNode::FilterASCII(int depth, bool ignore_case) {
+RegExpNode* ChoiceNode::FilterOneByte(int depth, bool ignore_case) {
if (info()->replacement_calculated) return replacement();
if (depth < 0) return this;
if (info()->visited) return this;
@@ -2882,7 +2866,7 @@ RegExpNode* ChoiceNode::FilterASCII(int depth, bool ignore_case) {
for (int i = 0; i < choice_count; i++) {
GuardedAlternative alternative = alternatives_->at(i);
RegExpNode* replacement =
- alternative.node()->FilterASCII(depth - 1, ignore_case);
+ alternative.node()->FilterOneByte(depth - 1, ignore_case);
DCHECK(replacement != this); // No missing EMPTY_MATCH_CHECK.
if (replacement != NULL) {
alternatives_->at(i).set_node(replacement);
@@ -2902,7 +2886,7 @@ RegExpNode* ChoiceNode::FilterASCII(int depth, bool ignore_case) {
new(zone()) ZoneList<GuardedAlternative>(surviving, zone());
for (int i = 0; i < choice_count; i++) {
RegExpNode* replacement =
- alternatives_->at(i).node()->FilterASCII(depth - 1, ignore_case);
+ alternatives_->at(i).node()->FilterOneByte(depth - 1, ignore_case);
if (replacement != NULL) {
alternatives_->at(i).set_node(replacement);
new_alternatives->Add(alternatives_->at(i), zone());
@@ -2913,8 +2897,8 @@ RegExpNode* ChoiceNode::FilterASCII(int depth, bool ignore_case) {
}
-RegExpNode* NegativeLookaheadChoiceNode::FilterASCII(int depth,
- bool ignore_case) {
+RegExpNode* NegativeLookaheadChoiceNode::FilterOneByte(int depth,
+ bool ignore_case) {
if (info()->replacement_calculated) return replacement();
if (depth < 0) return this;
if (info()->visited) return this;
@@ -2922,12 +2906,12 @@ RegExpNode* NegativeLookaheadChoiceNode::FilterASCII(int depth,
// Alternative 0 is the negative lookahead, alternative 1 is what comes
// afterwards.
RegExpNode* node = alternatives_->at(1).node();
- RegExpNode* replacement = node->FilterASCII(depth - 1, ignore_case);
+ RegExpNode* replacement = node->FilterOneByte(depth - 1, ignore_case);
if (replacement == NULL) return set_replacement(NULL);
alternatives_->at(1).set_node(replacement);
RegExpNode* neg_node = alternatives_->at(0).node();
- RegExpNode* neg_replacement = neg_node->FilterASCII(depth - 1, ignore_case);
+ RegExpNode* neg_replacement = neg_node->FilterOneByte(depth - 1, ignore_case);
// If the negative lookahead is always going to fail then
// we don't need to check it.
if (neg_replacement == NULL) return set_replacement(replacement);
@@ -3036,7 +3020,7 @@ static void EmitHat(RegExpCompiler* compiler,
if (!assembler->CheckSpecialCharacterClass('n',
new_trace.backtrack())) {
// Newline means \n, \r, 0x2028 or 0x2029.
- if (!compiler->ascii()) {
+ if (!compiler->one_byte()) {
assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok);
}
assembler->CheckCharacter('\n', &ok);
@@ -3234,7 +3218,7 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler,
int* checked_up_to) {
RegExpMacroAssembler* assembler = compiler->macro_assembler();
Isolate* isolate = assembler->zone()->isolate();
- bool ascii = compiler->ascii();
+ bool one_byte = compiler->one_byte();
Label* backtrack = trace->backtrack();
QuickCheckDetails* quick_check = trace->quick_check_performed();
int element_count = elms_->length();
@@ -3248,8 +3232,8 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler,
if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue;
EmitCharacterFunction* emit_function = NULL;
switch (pass) {
- case NON_ASCII_MATCH:
- DCHECK(ascii);
+ case NON_LATIN1_MATCH:
+ DCHECK(one_byte);
if (quarks[j] > String::kMaxOneByteCharCode) {
assembler->GoTo(backtrack);
return;
@@ -3284,14 +3268,8 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler,
if (first_element_checked && i == 0) continue;
if (DeterminedAlready(quick_check, elm.cp_offset())) continue;
RegExpCharacterClass* cc = elm.char_class();
- EmitCharClass(assembler,
- cc,
- ascii,
- backtrack,
- cp_offset,
- *checked_up_to < cp_offset,
- preloaded,
- zone());
+ EmitCharClass(assembler, cc, one_byte, backtrack, cp_offset,
+ *checked_up_to < cp_offset, preloaded, zone());
UpdateBoundsCheck(cp_offset, checked_up_to);
}
}
@@ -3332,9 +3310,9 @@ void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) {
return;
}
- if (compiler->ascii()) {
+ if (compiler->one_byte()) {
int dummy = 0;
- TextEmitPass(compiler, NON_ASCII_MATCH, false, trace, false, &dummy);
+ TextEmitPass(compiler, NON_LATIN1_MATCH, false, trace, false, &dummy);
}
bool first_elt_done = false;
@@ -3390,7 +3368,7 @@ void Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) {
// Adjust the offsets of the quick check performed information. This
// information is used to find out what we already determined about the
// characters by means of mask and compare.
- quick_check_performed_.Advance(by, compiler->ascii());
+ quick_check_performed_.Advance(by, compiler->one_byte());
cp_offset_ += by;
if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) {
compiler->SetRegExpTooBig();
@@ -3400,7 +3378,7 @@ void Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) {
}
-void TextNode::MakeCaseIndependent(bool is_ascii) {
+void TextNode::MakeCaseIndependent(bool is_one_byte) {
int element_count = elms_->length();
for (int i = 0; i < element_count; i++) {
TextElement elm = elms_->at(i);
@@ -3412,7 +3390,7 @@ void TextNode::MakeCaseIndependent(bool is_ascii) {
ZoneList<CharacterRange>* ranges = cc->ranges(zone());
int range_count = ranges->length();
for (int j = 0; j < range_count; j++) {
- ranges->at(j).AddCaseEquivalents(ranges, is_ascii, zone());
+ ranges->at(j).AddCaseEquivalents(ranges, is_one_byte, zone());
}
}
}
@@ -3440,7 +3418,7 @@ RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode(
}
if (ranges->length() != 1) return NULL;
uint32_t max_char;
- if (compiler->ascii()) {
+ if (compiler->one_byte()) {
max_char = String::kMaxOneByteCharCode;
} else {
max_char = String::kMaxUtf16CodeUnit;
@@ -3517,8 +3495,8 @@ int ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler,
int eats_at_least) {
int preload_characters = Min(4, eats_at_least);
if (compiler->macro_assembler()->CanReadUnaligned()) {
- bool ascii = compiler->ascii();
- if (ascii) {
+ bool one_byte = compiler->one_byte();
+ if (one_byte) {
if (preload_characters > 4) preload_characters = 4;
// We can't preload 3 characters because there is no machine instruction
// to do that. We can't just load 4 because we could be reading
@@ -3644,7 +3622,7 @@ BoyerMooreLookahead::BoyerMooreLookahead(
int length, RegExpCompiler* compiler, Zone* zone)
: length_(length),
compiler_(compiler) {
- if (compiler->ascii()) {
+ if (compiler->one_byte()) {
max_char_ = String::kMaxOneByteCharCode;
} else {
max_char_ = String::kMaxUtf16CodeUnit;
@@ -3712,8 +3690,9 @@ int BoyerMooreLookahead::FindBestInterval(
// dividing by 2 we switch off the skipping if the probability of skipping
// is less than 50%. This is because the multibyte mask-and-compare
// skipping in quickcheck is more likely to do well on this case.
- bool in_quickcheck_range = ((i - remembered_from < 4) ||
- (compiler_->ascii() ? remembered_from <= 4 : remembered_from <= 2));
+ bool in_quickcheck_range =
+ ((i - remembered_from < 4) ||
+ (compiler_->one_byte() ? remembered_from <= 4 : remembered_from <= 2));
// Called 'probability' but it is only a rough estimate and can actually
// be outside the 0-kSize range.
int probability = (in_quickcheck_range ? kSize / 2 : kSize) - frequency;
@@ -3931,8 +3910,7 @@ void ChoiceNode::SetUpPreLoad(RegExpCompiler* compiler,
if (state->eats_at_least_ == PreloadState::kEatsAtLeastNotYetInitialized) {
// Save some time by looking at most one machine word ahead.
state->eats_at_least_ =
- EatsAtLeast(compiler->ascii() ? 4 : 2,
- kRecursionBudget,
+ EatsAtLeast(compiler->one_byte() ? 4 : 2, kRecursionBudget,
current_trace->at_start() == Trace::FALSE_VALUE);
}
state->preload_characters_ =
@@ -5347,12 +5325,11 @@ void CharacterRange::Split(ZoneList<CharacterRange>* base,
void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges,
- bool is_ascii,
- Zone* zone) {
+ bool is_one_byte, Zone* zone) {
Isolate* isolate = zone->isolate();
uc16 bottom = from();
uc16 top = to();
- if (is_ascii && !RangeContainsLatin1Equivalents(*this)) {
+ if (is_one_byte && !RangeContainsLatin1Equivalents(*this)) {
if (bottom > String::kMaxOneByteCharCode) return;
if (top > String::kMaxOneByteCharCode) top = String::kMaxOneByteCharCode;
}
@@ -5771,7 +5748,7 @@ void TextNode::CalculateOffsets() {
void Analysis::VisitText(TextNode* that) {
if (ignore_case_) {
- that->MakeCaseIndependent(is_ascii_);
+ that->MakeCaseIndependent(is_one_byte_);
}
EnsureAnalyzed(that->on_success());
if (!has_failed()) {
@@ -6047,18 +6024,13 @@ void DispatchTableConstructor::VisitAction(ActionNode* that) {
RegExpEngine::CompilationResult RegExpEngine::Compile(
- RegExpCompileData* data,
- bool ignore_case,
- bool is_global,
- bool is_multiline,
- Handle<String> pattern,
- Handle<String> sample_subject,
- bool is_ascii,
- Zone* zone) {
+ RegExpCompileData* data, bool ignore_case, bool is_global,
+ bool is_multiline, Handle<String> pattern, Handle<String> sample_subject,
+ bool is_one_byte, Zone* zone) {
if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) {
return IrregexpRegExpTooBig(zone->isolate());
}
- RegExpCompiler compiler(data->capture_count, ignore_case, is_ascii, zone);
+ RegExpCompiler compiler(data->capture_count, ignore_case, is_one_byte, zone);
// Sample some characters from the middle of the string.
static const int kSampleSize = 128;
@@ -6105,18 +6077,18 @@ RegExpEngine::CompilationResult RegExpEngine::Compile(
node = loop_node;
}
}
- if (is_ascii) {
- node = node->FilterASCII(RegExpCompiler::kMaxRecursion, ignore_case);
+ if (is_one_byte) {
+ node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case);
// Do it again to propagate the new nodes to places where they were not
// put because they had not been calculated yet.
if (node != NULL) {
- node = node->FilterASCII(RegExpCompiler::kMaxRecursion, ignore_case);
+ node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case);
}
}
if (node == NULL) node = new(zone) EndNode(EndNode::BACKTRACK, zone);
data->node = node;
- Analysis analysis(ignore_case, is_ascii);
+ Analysis analysis(ignore_case, is_one_byte);
analysis.EnsureAnalyzed(node);
if (analysis.has_failed()) {
const char* error_message = analysis.error_message();
@@ -6128,8 +6100,8 @@ RegExpEngine::CompilationResult RegExpEngine::Compile(
// Native regexp implementation.
NativeRegExpMacroAssembler::Mode mode =
- is_ascii ? NativeRegExpMacroAssembler::ASCII
- : NativeRegExpMacroAssembler::UC16;
+ is_one_byte ? NativeRegExpMacroAssembler::LATIN1
+ : NativeRegExpMacroAssembler::UC16;
#if V8_TARGET_ARCH_IA32
RegExpMacroAssemblerIA32 macro_assembler(mode, (data->capture_count + 1) * 2,
« no previous file with comments | « src/jsregexp.h ('k') | src/liveedit.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698