src/jsregexp.cc - Issue 559913002: Rename ascii to one-byte where applicable.

Unified Diff: src/jsregexp.cc

Issue 559913002: Rename ascii to one-byte where applicable. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge

Patch Set: Created 6 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/jsregexp.cc

diff --git a/src/jsregexp.cc b/src/jsregexp.cc

index 8a94e819e2c1cbbc04e79e904b80b2e6fef4acdf..0d9f83af37ba48b7ee7b2cf24e81e3ed34d151ab 100644

--- a/src/jsregexp.cc

+++ b/src/jsregexp.cc

@@ -290,25 +290,18 @@ int RegExpImpl::AtomExecRaw(Handle<JSRegExp> regexp,

DCHECK(needle_content.IsFlat());

DCHECK(subject_content.IsFlat());

// dispatch on type of strings

- index = (needle_content.IsAscii()

- ? (subject_content.IsAscii()

- ? SearchString(isolate,

- subject_content.ToOneByteVector(),

- needle_content.ToOneByteVector(),

- index)

- : SearchString(isolate,

- subject_content.ToUC16Vector(),

- needle_content.ToOneByteVector(),

- index))

- : (subject_content.IsAscii()

- ? SearchString(isolate,

- subject_content.ToOneByteVector(),

- needle_content.ToUC16Vector(),

- index)

- : SearchString(isolate,

- subject_content.ToUC16Vector(),

- needle_content.ToUC16Vector(),

- index)));

+ index =

+ (needle_content.IsOneByte()

+ ? (subject_content.IsOneByte()

+ ? SearchString(isolate, subject_content.ToOneByteVector(),

+ needle_content.ToOneByteVector(), index)

+ : SearchString(isolate, subject_content.ToUC16Vector(),

+ needle_content.ToOneByteVector(), index))

+ : (subject_content.IsOneByte()

+ ? SearchString(isolate, subject_content.ToOneByteVector(),

+ needle_content.ToUC16Vector(), index)

+ : SearchString(isolate, subject_content.ToUC16Vector(),

+ needle_content.ToUC16Vector(), index)));

if (index == -1) {

return i / 2; // Return number of matches.

} else {

@@ -346,14 +339,15 @@ Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re,

// Irregexp implementation.

// Ensures that the regexp object contains a compiled version of the

-// source for either ASCII or non-ASCII strings.

+// source for either one-byte or two-byte subject strings.

// If the compiled version doesn't already exist, it is compiled

// from the source pattern.

// If compilation fails, an exception is thrown and this function

// returns false.

-bool RegExpImpl::EnsureCompiledIrregexp(

- Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii) {

- Object* compiled_code = re->DataAt(JSRegExp::code_index(is_ascii));

+bool RegExpImpl::EnsureCompiledIrregexp(Handle<JSRegExp> re,

+ Handle<String> sample_subject,

+ bool is_one_byte) {

+ Object* compiled_code = re->DataAt(JSRegExp::code_index(is_one_byte));

#ifdef V8_INTERPRETED_REGEXP

if (compiled_code->IsByteArray()) return true;

#else // V8_INTERPRETED_REGEXP (RegExp native code)

@@ -361,18 +355,18 @@ bool RegExpImpl::EnsureCompiledIrregexp(

#endif

// We could potentially have marked this as flushable, but have kept

// a saved version if we did not flush it yet.

- Object* saved_code = re->DataAt(JSRegExp::saved_code_index(is_ascii));

+ Object* saved_code = re->DataAt(JSRegExp::saved_code_index(is_one_byte));

if (saved_code->IsCode()) {

// Reinstate the code in the original place.

- re->SetDataAt(JSRegExp::code_index(is_ascii), saved_code);

+ re->SetDataAt(JSRegExp::code_index(is_one_byte), saved_code);

DCHECK(compiled_code->IsSmi());

return true;

}

- return CompileIrregexp(re, sample_subject, is_ascii);

+ return CompileIrregexp(re, sample_subject, is_one_byte);

}

-static void CreateRegExpErrorObjectAndThrow(Handle<JSRegExp> re, bool is_ascii,

+static void CreateRegExpErrorObjectAndThrow(Handle<JSRegExp> re,

Handle<String> error_message,

Isolate* isolate) {

Factory* factory = isolate->factory();

@@ -389,14 +383,14 @@ static void CreateRegExpErrorObjectAndThrow(Handle<JSRegExp> re, bool is_ascii,

bool RegExpImpl::CompileIrregexp(Handle<JSRegExp> re,

Handle<String> sample_subject,

- bool is_ascii) {

+ bool is_one_byte) {

// Compile the RegExp.

Isolate* isolate = re->GetIsolate();

Zone zone(isolate);

PostponeInterruptsScope postpone(isolate);

// If we had a compilation error the last time this is saved at the

// saved code index.

- Object* entry = re->DataAt(JSRegExp::code_index(is_ascii));

+ Object* entry = re->DataAt(JSRegExp::code_index(is_one_byte));

// When arriving here entry can only be a smi, either representing an

// uncompiled regexp, a previous compilation error, or code that has

// been flushed.

@@ -410,10 +404,10 @@ bool RegExpImpl::CompileIrregexp(Handle<JSRegExp> re,

// A previous compilation failed and threw an error which we store in

// the saved code index (we store the error message, not the actual

// error). Recreate the error object and throw it.

- Object* error_string = re->DataAt(JSRegExp::saved_code_index(is_ascii));

+ Object* error_string = re->DataAt(JSRegExp::saved_code_index(is_one_byte));

DCHECK(error_string->IsString());

Handle<String> error_message(String::cast(error_string));

- CreateRegExpErrorObjectAndThrow(re, is_ascii, error_message, isolate);

+ CreateRegExpErrorObjectAndThrow(re, error_message, isolate);

return false;

}

@@ -434,25 +428,19 @@ bool RegExpImpl::CompileIrregexp(Handle<JSRegExp> re,

"malformed_regexp"));

return false;

}

- RegExpEngine::CompilationResult result =

- RegExpEngine::Compile(&compile_data,

- flags.is_ignore_case(),

- flags.is_global(),

- flags.is_multiline(),

- pattern,

- sample_subject,

- is_ascii,

- &zone);

+ RegExpEngine::CompilationResult result = RegExpEngine::Compile(

+ &compile_data, flags.is_ignore_case(), flags.is_global(),

+ flags.is_multiline(), pattern, sample_subject, is_one_byte, &zone);

if (result.error_message != NULL) {

// Unable to compile regexp.

Handle<String> error_message = isolate->factory()->NewStringFromUtf8(

CStrVector(result.error_message)).ToHandleChecked();

- CreateRegExpErrorObjectAndThrow(re, is_ascii, error_message, isolate);

+ CreateRegExpErrorObjectAndThrow(re, error_message, isolate);

return false;

}

Handle<FixedArray> data = Handle<FixedArray>(FixedArray::cast(re->data()));

- data->set(JSRegExp::code_index(is_ascii), result.code);

+ data->set(JSRegExp::code_index(is_one_byte), result.code);

int register_max = IrregexpMaxRegisterCount(*data);

if (result.num_registers > register_max) {

SetIrregexpMaxRegisterCount(*data, result.num_registers);

@@ -483,13 +471,13 @@ int RegExpImpl::IrregexpNumberOfRegisters(FixedArray* re) {

}

-ByteArray* RegExpImpl::IrregexpByteCode(FixedArray* re, bool is_ascii) {

- return ByteArray::cast(re->get(JSRegExp::code_index(is_ascii)));

+ByteArray* RegExpImpl::IrregexpByteCode(FixedArray* re, bool is_one_byte) {

+ return ByteArray::cast(re->get(JSRegExp::code_index(is_one_byte)));

}

-Code* RegExpImpl::IrregexpNativeCode(FixedArray* re, bool is_ascii) {

- return Code::cast(re->get(JSRegExp::code_index(is_ascii)));

+Code* RegExpImpl::IrregexpNativeCode(FixedArray* re, bool is_one_byte) {

+ return Code::cast(re->get(JSRegExp::code_index(is_one_byte)));

}

@@ -510,9 +498,9 @@ int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp,

Handle<String> subject) {

subject = String::Flatten(subject);

- // Check the asciiness of the underlying storage.

- bool is_ascii = subject->IsOneByteRepresentationUnderneath();

- if (!EnsureCompiledIrregexp(regexp, subject, is_ascii)) return -1;

+ // Check representation of the underlying storage.

+ bool is_one_byte = subject->IsOneByteRepresentationUnderneath();

+ if (!EnsureCompiledIrregexp(regexp, subject, is_one_byte)) return -1;

#ifdef V8_INTERPRETED_REGEXP

// Byte-code regexp needs space allocated for all its registers.

@@ -542,13 +530,13 @@ int RegExpImpl::IrregexpExecRaw(Handle<JSRegExp> regexp,

DCHECK(index <= subject->length());

DCHECK(subject->IsFlat());

- bool is_ascii = subject->IsOneByteRepresentationUnderneath();

+ bool is_one_byte = subject->IsOneByteRepresentationUnderneath();

#ifndef V8_INTERPRETED_REGEXP

DCHECK(output_size >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2);

do {

- EnsureCompiledIrregexp(regexp, subject, is_ascii);

- Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii), isolate);

+ EnsureCompiledIrregexp(regexp, subject, is_one_byte);

+ Handle<Code> code(IrregexpNativeCode(*irregexp, is_one_byte), isolate);

// The stack is used to allocate registers for the compiled regexp code.

// This means that in case of failure, the output registers array is left

// untouched and contains the capture results from the previous successful

@@ -575,10 +563,10 @@ int RegExpImpl::IrregexpExecRaw(Handle<JSRegExp> regexp,

// must restart from scratch.

// In this case, it means we must make sure we are prepared to handle

// the, potentially, different subject (the string can switch between

- // being internal and external, and even between being ASCII and UC16,

+ // being internal and external, and even between being Latin1 and UC16,

// but the characters are always the same).

IrregexpPrepare(regexp, subject);

- is_ascii = subject->IsOneByteRepresentationUnderneath();

+ is_one_byte = subject->IsOneByteRepresentationUnderneath();

} while (true);

UNREACHABLE();

return RE_EXCEPTION;

@@ -596,7 +584,8 @@ int RegExpImpl::IrregexpExecRaw(Handle<JSRegExp> regexp,

for (int i = number_of_capture_registers - 1; i >= 0; i--) {

raw_output[i] = -1;

}

- Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_ascii), isolate);

+ Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_one_byte),

+ isolate);

IrregexpResult result = IrregexpInterpreter::Match(isolate,

byte_codes,

@@ -997,7 +986,7 @@ class FrequencyCollator {

class RegExpCompiler {

public:

- RegExpCompiler(int capture_count, bool ignore_case, bool is_ascii,

+ RegExpCompiler(int capture_count, bool ignore_case, bool is_one_byte,

Zone* zone);

int AllocateRegister() {

@@ -1030,7 +1019,7 @@ class RegExpCompiler {

void SetRegExpTooBig() { reg_exp_too_big_ = true; }

inline bool ignore_case() { return ignore_case_; }

- inline bool ascii() { return ascii_; }

+ inline bool one_byte() { return one_byte_; }

FrequencyCollator* frequency_collator() { return &frequency_collator_; }

int current_expansion_factor() { return current_expansion_factor_; }

@@ -1049,7 +1038,7 @@ class RegExpCompiler {

int recursion_depth_;

RegExpMacroAssembler* macro_assembler_;

bool ignore_case_;

- bool ascii_;

+ bool one_byte_;

bool reg_exp_too_big_;

int current_expansion_factor_;

FrequencyCollator frequency_collator_;

@@ -1075,13 +1064,13 @@ static RegExpEngine::CompilationResult IrregexpRegExpTooBig(Isolate* isolate) {

// Attempts to compile the regexp using an Irregexp code generator. Returns

// a fixed array or a null handle depending on whether it succeeded.

-RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case, bool ascii,

- Zone* zone)

+RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case,

+ bool one_byte, Zone* zone)

: next_register_(2 * (capture_count + 1)),

work_list_(NULL),

recursion_depth_(0),

ignore_case_(ignore_case),

- ascii_(ascii),

+ one_byte_(one_byte),

reg_exp_too_big_(false),

current_expansion_factor_(1),

frequency_collator_(),

@@ -1592,9 +1581,8 @@ void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler,

// Returns the number of characters in the equivalence class, omitting those

// that cannot occur in the source string because it is ASCII.

-static int GetCaseIndependentLetters(Isolate* isolate,

- uc16 character,

- bool ascii_subject,

+static int GetCaseIndependentLetters(Isolate* isolate, uc16 character,

+ bool one_byte_subject,

unibrow::uchar* letters) {

int length =

isolate->jsregexp_uncanonicalize()->get(character, '\0', letters);

@@ -1604,11 +1592,14 @@ static int GetCaseIndependentLetters(Isolate* isolate,

letters[0] = character;

length = 1;

}

- if (!ascii_subject || character <= String::kMaxOneByteCharCode) {

+ if (!one_byte_subject || character <= String::kMaxOneByteCharCode) {

return length;

}

// The standard requires that non-ASCII characters cannot have ASCII

// character codes in their equivalence class.

+ // TODO(dcarney): issue 3550 this is not actually true for Latin1 anymore,

+ // is it? For example, \u00C5 is equivalent to \u212B.

Yang 2014/09/10 08:26:36 This is one of the TODOs I mentioned.

dcarney 2014/09/10 09:35:12 I checked other browsers I think originally, and w

return 0;

}

@@ -1644,18 +1635,19 @@ static inline bool EmitAtomNonLetter(Isolate* isolate,

bool check,

bool preloaded) {

RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();

- bool ascii = compiler->ascii();

+ bool one_byte = compiler->one_byte();

unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];

- int length = GetCaseIndependentLetters(isolate, c, ascii, chars);

+ int length = GetCaseIndependentLetters(isolate, c, one_byte, chars);

if (length < 1) {

- // This can't match. Must be an ASCII subject and a non-ASCII character.

- // We do not need to do anything since the ASCII pass already handled this.

+ // This can't match. Must be an one-byte subject and a non-one-byte

+ // character. We do not need to do anything since the one-byte pass

+ // already handled this.

return false; // Bounds not checked.

}

bool checked = false;

// We handle the length > 1 case in a later pass.

if (length == 1) {

- if (ascii && c > String::kMaxOneByteCharCodeU) {

+ if (one_byte && c > String::kMaxOneByteCharCodeU) {

// Can't match - see above.

return false; // Bounds not checked.

}

@@ -1670,12 +1662,10 @@ static inline bool EmitAtomNonLetter(Isolate* isolate,

static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,

- bool ascii,

- uc16 c1,

- uc16 c2,

+ bool one_byte, uc16 c1, uc16 c2,

Label* on_failure) {

uc16 char_mask;

- if (ascii) {

+ if (one_byte) {

char_mask = String::kMaxOneByteCharCode;

} else {

char_mask = String::kMaxUtf16CodeUnit;

@@ -1726,9 +1716,9 @@ static inline bool EmitAtomLetter(Isolate* isolate,

bool check,

bool preloaded) {

RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();

- bool ascii = compiler->ascii();

+ bool one_byte = compiler->one_byte();

unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];

- int length = GetCaseIndependentLetters(isolate, c, ascii, chars);

+ int length = GetCaseIndependentLetters(isolate, c, one_byte, chars);

if (length <= 1) return false;

// We may not need to check against the end of the input string

// if this character lies before a character that matched.

@@ -1739,11 +1729,8 @@ static inline bool EmitAtomLetter(Isolate* isolate,

DCHECK(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4);

switch (length) {

case 2: {

- if (ShortCutEmitCharacterPair(macro_assembler,

- ascii,

- chars[0],

- chars[1],

- on_failure)) {

+ if (ShortCutEmitCharacterPair(macro_assembler, one_byte, chars[0],

+ chars[1], on_failure)) {

} else {

macro_assembler->CheckCharacter(chars[0], &ok);

macro_assembler->CheckNotCharacter(chars[1], on_failure);

@@ -1918,7 +1905,7 @@ static void SplitSearchSpace(ZoneList<int>* ranges,

// new_start_index is the index of the first edge that is beyond the

// current kSize space.

- // For very large search spaces we do a binary chop search of the non-ASCII

+ // For very large search spaces we do a binary chop search of the non-Latin1

// space instead of just going to the end of the current kSize space. The

// heuristics are complicated a little by the fact that any 128-character

// encoding space can be quickly tested with a table lookup, so we don't

@@ -1927,14 +1914,13 @@ static void SplitSearchSpace(ZoneList<int>* ranges,

// for example, we only want to match every second character (eg. the lower

// case characters on some Unicode pages).

int binary_chop_index = (end_index + start_index) / 2;

- // The first test ensures that we get to the code that handles the ASCII

+ // The first test ensures that we get to the code that handles the Latin1

// range with a single not-taken branch, speeding up this important

- // character range (even non-ASCII charset-based text has spaces and

+ // character range (even non-Latin1 charset-based text has spaces and

// punctuation).

- if (*border - 1 > String::kMaxOneByteCharCode && // ASCII case.

+ if (*border - 1 > String::kMaxOneByteCharCode && // Latin1 case.

end_index - start_index > (*new_start_index - start_index) * 2 &&

- last - first > kSize * 2 &&

- binary_chop_index > *new_start_index &&

+ last - first > kSize * 2 && binary_chop_index > *new_start_index &&

ranges->at(binary_chop_index) >= first + 2 * kSize) {

int scan_forward_for_section_border = binary_chop_index;;

int new_border = (ranges->at(binary_chop_index) | kMask) + 1;

@@ -2121,20 +2107,16 @@ static void GenerateBranches(RegExpMacroAssembler* masm,

static void EmitCharClass(RegExpMacroAssembler* macro_assembler,

- RegExpCharacterClass* cc,

- bool ascii,

- Label* on_failure,

- int cp_offset,

- bool check_offset,

- bool preloaded,

- Zone* zone) {

+ RegExpCharacterClass* cc, bool one_byte,

+ Label* on_failure, int cp_offset, bool check_offset,

+ bool preloaded, Zone* zone) {

ZoneList<CharacterRange>* ranges = cc->ranges(zone);

if (!CharacterRange::IsCanonical(ranges)) {

CharacterRange::Canonicalize(ranges);

}

int max_char;

- if (ascii) {

+ if (one_byte) {

max_char = String::kMaxOneByteCharCode;

} else {

max_char = String::kMaxUtf16CodeUnit;

@@ -2464,7 +2446,7 @@ bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler,

GetQuickCheckDetails(

details, compiler, 0, trace->at_start() == Trace::FALSE_VALUE);

if (details->cannot_match()) return false;

- if (!details->Rationalize(compiler->ascii())) return false;

+ if (!details->Rationalize(compiler->one_byte())) return false;

DCHECK(details->characters() == 1 ||

compiler->macro_assembler()->CanReadUnaligned());

uint32_t mask = details->mask();

@@ -2486,7 +2468,7 @@ bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler,

// If number of characters preloaded is 1 then we used a byte or 16 bit

// load so the value is already masked down.

uint32_t char_mask;

- if (compiler->ascii()) {

+ if (compiler->one_byte()) {

char_mask = String::kMaxOneByteCharCode;

} else {

char_mask = String::kMaxUtf16CodeUnit;

@@ -2494,11 +2476,11 @@ bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler,

if ((mask & char_mask) == char_mask) need_mask = false;

mask &= char_mask;

} else {

- // For 2-character preloads in ASCII mode or 1-character preloads in

- // TWO_BYTE mode we also use a 16 bit load with zero extend.

- if (details->characters() == 2 && compiler->ascii()) {

+ // For 2-character preloads in one-byte mode or 1-character preloads in

+ // two-byte mode we also use a 16 bit load with zero extend.

+ if (details->characters() == 2 && compiler->one_byte()) {

if ((mask & 0xffff) == 0xffff) need_mask = false;

- } else if (details->characters() == 1 && !compiler->ascii()) {

+ } else if (details->characters() == 1 && !compiler->one_byte()) {

if ((mask & 0xffff) == 0xffff) need_mask = false;

} else {

if (mask == 0xffffffff) need_mask = false;

@@ -2538,7 +2520,7 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,

DCHECK(characters_filled_in < details->characters());

int characters = details->characters();

int char_mask;

- if (compiler->ascii()) {

+ if (compiler->one_byte()) {

char_mask = String::kMaxOneByteCharCode;

} else {

char_mask = String::kMaxUtf16CodeUnit;

@@ -2552,18 +2534,20 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,

details->positions(characters_filled_in);

uc16 c = quarks[i];

if (c > char_mask) {

- // If we expect a non-ASCII character from an ASCII string,

- // there is no way we can match. Not even case independent

- // matching can turn an ASCII character into non-ASCII or

+ // If we expect a non-Latin1 character from an one-byte string,

+ // there is no way we can match. Not even case-independent

+ // matching can turn an Latin1 character into non-Latin1 or

// vice versa.

+ // TODO(dcarney): issue 3550. Verify that this works as expected.

+ // For example, \u0178 is uppercase of \u00ff (y-umlaut).

Yang 2014/09/10 08:26:36 This is the other.

details->set_cannot_match();

pos->determines_perfectly = false;

return;

}

if (compiler->ignore_case()) {

unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];

- int length = GetCaseIndependentLetters(isolate, c, compiler->ascii(),

- chars);

+ int length = GetCaseIndependentLetters(isolate, c,

+ compiler->one_byte(), chars);

DCHECK(length != 0); // Can only happen if c > char_mask (see above).

if (length == 1) {

// This letter has no case equivalents, so it's nice and simple

@@ -2692,7 +2676,7 @@ void QuickCheckDetails::Clear() {

}

-void QuickCheckDetails::Advance(int by, bool ascii) {

+void QuickCheckDetails::Advance(int by, bool one_byte) {

DCHECK(by >= 0);

if (by >= characters_) {

Clear();

@@ -2756,7 +2740,7 @@ class VisitMarker {

};

-RegExpNode* SeqRegExpNode::FilterASCII(int depth, bool ignore_case) {

+RegExpNode* SeqRegExpNode::FilterOneByte(int depth, bool ignore_case) {

if (info()->replacement_calculated) return replacement();

if (depth < 0) return this;

DCHECK(!info()->visited);

@@ -2766,7 +2750,7 @@ RegExpNode* SeqRegExpNode::FilterASCII(int depth, bool ignore_case) {

RegExpNode* SeqRegExpNode::FilterSuccessor(int depth, bool ignore_case) {

- RegExpNode* next = on_success_->FilterASCII(depth - 1, ignore_case);

+ RegExpNode* next = on_success_->FilterOneByte(depth - 1, ignore_case);

if (next == NULL) return set_replacement(NULL);

on_success_ = next;

return set_replacement(this);

@@ -2790,7 +2774,7 @@ static bool RangesContainLatin1Equivalents(ZoneList<CharacterRange>* ranges) {

}

-RegExpNode* TextNode::FilterASCII(int depth, bool ignore_case) {

+RegExpNode* TextNode::FilterOneByte(int depth, bool ignore_case) {

if (info()->replacement_calculated) return replacement();

if (depth < 0) return this;

DCHECK(!info()->visited);

@@ -2844,7 +2828,7 @@ RegExpNode* TextNode::FilterASCII(int depth, bool ignore_case) {

}

-RegExpNode* LoopChoiceNode::FilterASCII(int depth, bool ignore_case) {

+RegExpNode* LoopChoiceNode::FilterOneByte(int depth, bool ignore_case) {

if (info()->replacement_calculated) return replacement();

if (depth < 0) return this;

if (info()->visited) return this;

@@ -2852,17 +2836,17 @@ RegExpNode* LoopChoiceNode::FilterASCII(int depth, bool ignore_case) {

VisitMarker marker(info());

RegExpNode* continue_replacement =

- continue_node_->FilterASCII(depth - 1, ignore_case);

+ continue_node_->FilterOneByte(depth - 1, ignore_case);

// If we can't continue after the loop then there is no sense in doing the

// loop.

if (continue_replacement == NULL) return set_replacement(NULL);

}

- return ChoiceNode::FilterASCII(depth - 1, ignore_case);

+ return ChoiceNode::FilterOneByte(depth - 1, ignore_case);

}

-RegExpNode* ChoiceNode::FilterASCII(int depth, bool ignore_case) {

+RegExpNode* ChoiceNode::FilterOneByte(int depth, bool ignore_case) {

if (info()->replacement_calculated) return replacement();

if (depth < 0) return this;

if (info()->visited) return this;

@@ -2882,7 +2866,7 @@ RegExpNode* ChoiceNode::FilterASCII(int depth, bool ignore_case) {

for (int i = 0; i < choice_count; i++) {

GuardedAlternative alternative = alternatives_->at(i);

RegExpNode* replacement =

- alternative.node()->FilterASCII(depth - 1, ignore_case);

+ alternative.node()->FilterOneByte(depth - 1, ignore_case);

DCHECK(replacement != this); // No missing EMPTY_MATCH_CHECK.

if (replacement != NULL) {

alternatives_->at(i).set_node(replacement);

@@ -2902,7 +2886,7 @@ RegExpNode* ChoiceNode::FilterASCII(int depth, bool ignore_case) {

new(zone()) ZoneList<GuardedAlternative>(surviving, zone());

for (int i = 0; i < choice_count; i++) {

RegExpNode* replacement =

- alternatives_->at(i).node()->FilterASCII(depth - 1, ignore_case);

+ alternatives_->at(i).node()->FilterOneByte(depth - 1, ignore_case);

if (replacement != NULL) {

alternatives_->at(i).set_node(replacement);

new_alternatives->Add(alternatives_->at(i), zone());

@@ -2913,8 +2897,8 @@ RegExpNode* ChoiceNode::FilterASCII(int depth, bool ignore_case) {

}

-RegExpNode* NegativeLookaheadChoiceNode::FilterASCII(int depth,

- bool ignore_case) {

+RegExpNode* NegativeLookaheadChoiceNode::FilterOneByte(int depth,

+ bool ignore_case) {

if (info()->replacement_calculated) return replacement();

if (depth < 0) return this;

if (info()->visited) return this;

@@ -2922,12 +2906,12 @@ RegExpNode* NegativeLookaheadChoiceNode::FilterASCII(int depth,

// Alternative 0 is the negative lookahead, alternative 1 is what comes

// afterwards.

RegExpNode* node = alternatives_->at(1).node();

- RegExpNode* replacement = node->FilterASCII(depth - 1, ignore_case);

+ RegExpNode* replacement = node->FilterOneByte(depth - 1, ignore_case);

if (replacement == NULL) return set_replacement(NULL);

alternatives_->at(1).set_node(replacement);

RegExpNode* neg_node = alternatives_->at(0).node();

- RegExpNode* neg_replacement = neg_node->FilterASCII(depth - 1, ignore_case);

+ RegExpNode* neg_replacement = neg_node->FilterOneByte(depth - 1, ignore_case);

// If the negative lookahead is always going to fail then

// we don't need to check it.

if (neg_replacement == NULL) return set_replacement(replacement);

@@ -3036,7 +3020,7 @@ static void EmitHat(RegExpCompiler* compiler,

if (!assembler->CheckSpecialCharacterClass('n',

new_trace.backtrack())) {

// Newline means \n, \r, 0x2028 or 0x2029.

- if (!compiler->ascii()) {

+ if (!compiler->one_byte()) {

assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok);

}

assembler->CheckCharacter('\n', &ok);

@@ -3234,7 +3218,7 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler,

int* checked_up_to) {

RegExpMacroAssembler* assembler = compiler->macro_assembler();

Isolate* isolate = assembler->zone()->isolate();

- bool ascii = compiler->ascii();

+ bool one_byte = compiler->one_byte();

Label* backtrack = trace->backtrack();

QuickCheckDetails* quick_check = trace->quick_check_performed();

int element_count = elms_->length();

@@ -3248,8 +3232,8 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler,

if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue;

EmitCharacterFunction* emit_function = NULL;

switch (pass) {

- case NON_ASCII_MATCH:

- DCHECK(ascii);

+ case NON_LATIN1_MATCH:

+ DCHECK(one_byte);

if (quarks[j] > String::kMaxOneByteCharCode) {

assembler->GoTo(backtrack);

return;

@@ -3284,14 +3268,8 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler,

if (first_element_checked && i == 0) continue;

if (DeterminedAlready(quick_check, elm.cp_offset())) continue;

RegExpCharacterClass* cc = elm.char_class();

- EmitCharClass(assembler,

- cc,

- ascii,

- backtrack,

- cp_offset,

- *checked_up_to < cp_offset,

- preloaded,

- zone());

+ EmitCharClass(assembler, cc, one_byte, backtrack, cp_offset,

+ *checked_up_to < cp_offset, preloaded, zone());

UpdateBoundsCheck(cp_offset, checked_up_to);

}

@@ -3332,9 +3310,9 @@ void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) {

return;

}

- if (compiler->ascii()) {

+ if (compiler->one_byte()) {

int dummy = 0;

- TextEmitPass(compiler, NON_ASCII_MATCH, false, trace, false, &dummy);

+ TextEmitPass(compiler, NON_LATIN1_MATCH, false, trace, false, &dummy);

}

bool first_elt_done = false;

@@ -3390,7 +3368,7 @@ void Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) {

// Adjust the offsets of the quick check performed information. This

// information is used to find out what we already determined about the

// characters by means of mask and compare.

- quick_check_performed_.Advance(by, compiler->ascii());

+ quick_check_performed_.Advance(by, compiler->one_byte());

cp_offset_ += by;

if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) {

compiler->SetRegExpTooBig();

@@ -3400,7 +3378,7 @@ void Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) {

}

-void TextNode::MakeCaseIndependent(bool is_ascii) {

+void TextNode::MakeCaseIndependent(bool is_one_byte) {

int element_count = elms_->length();

for (int i = 0; i < element_count; i++) {

TextElement elm = elms_->at(i);

@@ -3412,7 +3390,7 @@ void TextNode::MakeCaseIndependent(bool is_ascii) {

ZoneList<CharacterRange>* ranges = cc->ranges(zone());

int range_count = ranges->length();

for (int j = 0; j < range_count; j++) {

- ranges->at(j).AddCaseEquivalents(ranges, is_ascii, zone());

+ ranges->at(j).AddCaseEquivalents(ranges, is_one_byte, zone());

}

@@ -3440,7 +3418,7 @@ RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode(

}

if (ranges->length() != 1) return NULL;

uint32_t max_char;

- if (compiler->ascii()) {

+ if (compiler->one_byte()) {

max_char = String::kMaxOneByteCharCode;

} else {

max_char = String::kMaxUtf16CodeUnit;

@@ -3517,8 +3495,8 @@ int ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler,

int eats_at_least) {

int preload_characters = Min(4, eats_at_least);

if (compiler->macro_assembler()->CanReadUnaligned()) {

- bool ascii = compiler->ascii();

- if (ascii) {

+ bool one_byte = compiler->one_byte();

+ if (one_byte) {

if (preload_characters > 4) preload_characters = 4;

// We can't preload 3 characters because there is no machine instruction

// to do that. We can't just load 4 because we could be reading

@@ -3644,7 +3622,7 @@ BoyerMooreLookahead::BoyerMooreLookahead(

int length, RegExpCompiler* compiler, Zone* zone)

: length_(length),

compiler_(compiler) {

- if (compiler->ascii()) {

+ if (compiler->one_byte()) {

max_char_ = String::kMaxOneByteCharCode;

} else {

max_char_ = String::kMaxUtf16CodeUnit;

@@ -3712,8 +3690,9 @@ int BoyerMooreLookahead::FindBestInterval(

// dividing by 2 we switch off the skipping if the probability of skipping

// is less than 50%. This is because the multibyte mask-and-compare

// skipping in quickcheck is more likely to do well on this case.

- bool in_quickcheck_range = ((i - remembered_from < 4) ||

- (compiler_->ascii() ? remembered_from <= 4 : remembered_from <= 2));

+ bool in_quickcheck_range =

+ ((i - remembered_from < 4) ||

+ (compiler_->one_byte() ? remembered_from <= 4 : remembered_from <= 2));

// Called 'probability' but it is only a rough estimate and can actually

// be outside the 0-kSize range.

int probability = (in_quickcheck_range ? kSize / 2 : kSize) - frequency;

@@ -3931,8 +3910,7 @@ void ChoiceNode::SetUpPreLoad(RegExpCompiler* compiler,

if (state->eats_at_least_ == PreloadState::kEatsAtLeastNotYetInitialized) {

// Save some time by looking at most one machine word ahead.

state->eats_at_least_ =

- EatsAtLeast(compiler->ascii() ? 4 : 2,

- kRecursionBudget,

+ EatsAtLeast(compiler->one_byte() ? 4 : 2, kRecursionBudget,

current_trace->at_start() == Trace::FALSE_VALUE);

}

state->preload_characters_ =

@@ -5347,12 +5325,11 @@ void CharacterRange::Split(ZoneList<CharacterRange>* base,

void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges,

- bool is_ascii,

- Zone* zone) {

+ bool is_one_byte, Zone* zone) {

Isolate* isolate = zone->isolate();

uc16 bottom = from();

uc16 top = to();

- if (is_ascii && !RangeContainsLatin1Equivalents(*this)) {

+ if (is_one_byte && !RangeContainsLatin1Equivalents(*this)) {

if (bottom > String::kMaxOneByteCharCode) return;

if (top > String::kMaxOneByteCharCode) top = String::kMaxOneByteCharCode;

}

@@ -5771,7 +5748,7 @@ void TextNode::CalculateOffsets() {

void Analysis::VisitText(TextNode* that) {

if (ignore_case_) {

- that->MakeCaseIndependent(is_ascii_);

+ that->MakeCaseIndependent(is_one_byte_);

}

EnsureAnalyzed(that->on_success());

if (!has_failed()) {

@@ -6047,18 +6024,13 @@ void DispatchTableConstructor::VisitAction(ActionNode* that) {

RegExpEngine::CompilationResult RegExpEngine::Compile(

- RegExpCompileData* data,

- bool ignore_case,

- bool is_global,

- bool is_multiline,

- Handle<String> pattern,

- Handle<String> sample_subject,

- bool is_ascii,

- Zone* zone) {

+ RegExpCompileData* data, bool ignore_case, bool is_global,

+ bool is_multiline, Handle<String> pattern, Handle<String> sample_subject,

+ bool is_one_byte, Zone* zone) {

if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) {

return IrregexpRegExpTooBig(zone->isolate());

}

- RegExpCompiler compiler(data->capture_count, ignore_case, is_ascii, zone);

+ RegExpCompiler compiler(data->capture_count, ignore_case, is_one_byte, zone);

// Sample some characters from the middle of the string.

static const int kSampleSize = 128;

@@ -6105,18 +6077,18 @@ RegExpEngine::CompilationResult RegExpEngine::Compile(

node = loop_node;

}

- if (is_ascii) {

- node = node->FilterASCII(RegExpCompiler::kMaxRecursion, ignore_case);

+ if (is_one_byte) {

+ node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case);

// Do it again to propagate the new nodes to places where they were not

// put because they had not been calculated yet.

if (node != NULL) {

- node = node->FilterASCII(RegExpCompiler::kMaxRecursion, ignore_case);

+ node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case);

}

if (node == NULL) node = new(zone) EndNode(EndNode::BACKTRACK, zone);

data->node = node;

- Analysis analysis(ignore_case, is_ascii);

+ Analysis analysis(ignore_case, is_one_byte);

analysis.EnsureAnalyzed(node);

if (analysis.has_failed()) {

const char* error_message = analysis.error_message();

@@ -6128,8 +6100,8 @@ RegExpEngine::CompilationResult RegExpEngine::Compile(

// Native regexp implementation.

NativeRegExpMacroAssembler::Mode mode =

- is_ascii ? NativeRegExpMacroAssembler::ASCII

- : NativeRegExpMacroAssembler::UC16;

+ is_one_byte ? NativeRegExpMacroAssembler::LATIN1

+ : NativeRegExpMacroAssembler::UC16;

#if V8_TARGET_ARCH_IA32

RegExpMacroAssemblerIA32 macro_assembler(mode, (data->capture_count + 1) * 2,

« no previous file with comments | « src/jsregexp.h ('k') | src/liveedit.cc » ('j') | no next file with comments »