src/jsregexp.cc - Issue 2811033: [Isolates] Remove even more statics.

Side by Side Diff: src/jsregexp.cc

Issue 2811033: [Isolates] Remove even more statics. (Closed) Base URL: http://v8.googlecode.com/svn/branches/experimental/isolates/

Patch Set: rebase and address comments Created 10 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2006-2009 the V8 project authors. All rights reserved.	1 // Copyright 2006-2009 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 1230 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1241 case Guard::GEQ:	1241 case Guard::GEQ:

1242 ASSERT(!trace->mentions_reg(guard->reg()));	1242 ASSERT(!trace->mentions_reg(guard->reg()));

1243 macro_assembler->IfRegisterLT(guard->reg(),	1243 macro_assembler->IfRegisterLT(guard->reg(),

1244 guard->value(),	1244 guard->value(),

1245 trace->backtrack());	1245 trace->backtrack());

1246 break;	1246 break;

1247 }	1247 }

1248 }	1248 }

1249	1249

1250	1250

1251 static unibrow::Mapping<unibrow::Ecma262UnCanonicalize> uncanonicalize;

1252 static unibrow::Mapping<unibrow::CanonicalizationRange> canonrange;

1253

1254

1255 // Returns the number of characters in the equivalence class, omitting those	1251 // Returns the number of characters in the equivalence class, omitting those

1256 // that cannot occur in the source string because it is ASCII.	1252 // that cannot occur in the source string because it is ASCII.

1257 static int GetCaseIndependentLetters(uc16 character,	1253 static int GetCaseIndependentLetters(Isolate* isolate,

	1254 uc16 character,

1258 bool ascii_subject,	1255 bool ascii_subject,

1259 unibrow::uchar* letters) {	1256 unibrow::uchar* letters) {

1260 int length = uncanonicalize.get(character, '\0', letters);	1257 int length =

	1258 isolate->jsregexp_uncanonicalize()->get(character, '\0', letters);

1261 // Unibrow returns 0 or 1 for characters where case independependence is	1259 // Unibrow returns 0 or 1 for characters where case independependence is

1262 // trivial.	1260 // trivial.

1263 if (length == 0) {	1261 if (length == 0) {

1264 letters[0] = character;	1262 letters[0] = character;

1265 length = 1;	1263 length = 1;

1266 }	1264 }

1267 if (!ascii_subject \|\| character <= String::kMaxAsciiCharCode) {	1265 if (!ascii_subject \|\| character <= String::kMaxAsciiCharCode) {

1268 return length;	1266 return length;

1269 }	1267 }

1270 // The standard requires that non-ASCII characters cannot have ASCII	1268 // The standard requires that non-ASCII characters cannot have ASCII

1271 // character codes in their equivalence class.	1269 // character codes in their equivalence class.

1272 return 0;	1270 return 0;

1273 }	1271 }

1274	1272

1275	1273

1276 static inline bool EmitSimpleCharacter(RegExpCompiler* compiler,	1274 static inline bool EmitSimpleCharacter(Isolate* isolate,

	1275 RegExpCompiler* compiler,

1277 uc16 c,	1276 uc16 c,

1278 Label* on_failure,	1277 Label* on_failure,

1279 int cp_offset,	1278 int cp_offset,

1280 bool check,	1279 bool check,

1281 bool preloaded) {	1280 bool preloaded) {

1282 RegExpMacroAssembler* assembler = compiler->macro_assembler();	1281 RegExpMacroAssembler* assembler = compiler->macro_assembler();

1283 bool bound_checked = false;	1282 bool bound_checked = false;

1284 if (!preloaded) {	1283 if (!preloaded) {

1285 assembler->LoadCurrentCharacter(	1284 assembler->LoadCurrentCharacter(

1286 cp_offset,	1285 cp_offset,

1287 on_failure,	1286 on_failure,

1288 check);	1287 check);

1289 bound_checked = true;	1288 bound_checked = true;

1290 }	1289 }

1291 assembler->CheckNotCharacter(c, on_failure);	1290 assembler->CheckNotCharacter(c, on_failure);

1292 return bound_checked;	1291 return bound_checked;

1293 }	1292 }

1294	1293

1295	1294

1296 // Only emits non-letters (things that don't have case). Only used for case	1295 // Only emits non-letters (things that don't have case). Only used for case

1297 // independent matches.	1296 // independent matches.

1298 static inline bool EmitAtomNonLetter(RegExpCompiler* compiler,	1297 static inline bool EmitAtomNonLetter(Isolate* isolate,

	1298 RegExpCompiler* compiler,

1299 uc16 c,	1299 uc16 c,

1300 Label* on_failure,	1300 Label* on_failure,

1301 int cp_offset,	1301 int cp_offset,

1302 bool check,	1302 bool check,

1303 bool preloaded) {	1303 bool preloaded) {

1304 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();	1304 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();

1305 bool ascii = compiler->ascii();	1305 bool ascii = compiler->ascii();

1306 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];	1306 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];

1307 int length = GetCaseIndependentLetters(c, ascii, chars);	1307 int length = GetCaseIndependentLetters(isolate, c, ascii, chars);

1308 if (length < 1) {	1308 if (length < 1) {

1309 // This can't match. Must be an ASCII subject and a non-ASCII character.	1309 // This can't match. Must be an ASCII subject and a non-ASCII character.

1310 // We do not need to do anything since the ASCII pass already handled this.	1310 // We do not need to do anything since the ASCII pass already handled this.

1311 return false; // Bounds not checked.	1311 return false; // Bounds not checked.

1312 }	1312 }

1313 bool checked = false;	1313 bool checked = false;

1314 // We handle the length > 1 case in a later pass.	1314 // We handle the length > 1 case in a later pass.

1315 if (length == 1) {	1315 if (length == 1) {

1316 if (ascii && c > String::kMaxAsciiCharCodeU) {	1316 if (ascii && c > String::kMaxAsciiCharCodeU) {

1317 // Can't match - see above.	1317 // Can't match - see above.

(...skipping 41 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1359 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff,	1359 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff,

1360 diff,	1360 diff,

1361 mask,	1361 mask,

1362 on_failure);	1362 on_failure);

1363 return true;	1363 return true;

1364 }	1364 }

1365 return false;	1365 return false;

1366 }	1366 }

1367	1367

1368	1368

1369 typedef bool EmitCharacterFunction(RegExpCompiler* compiler,	1369 typedef bool EmitCharacterFunction(Isolate* isolate,

	1370 RegExpCompiler* compiler,

1370 uc16 c,	1371 uc16 c,

1371 Label* on_failure,	1372 Label* on_failure,

1372 int cp_offset,	1373 int cp_offset,

1373 bool check,	1374 bool check,

1374 bool preloaded);	1375 bool preloaded);

1375	1376

1376 // Only emits letters (things that have case). Only used for case independent	1377 // Only emits letters (things that have case). Only used for case independent

1377 // matches.	1378 // matches.

1378 static inline bool EmitAtomLetter(RegExpCompiler* compiler,	1379 static inline bool EmitAtomLetter(Isolate* isolate,

	1380 RegExpCompiler* compiler,

1379 uc16 c,	1381 uc16 c,

1380 Label* on_failure,	1382 Label* on_failure,

1381 int cp_offset,	1383 int cp_offset,

1382 bool check,	1384 bool check,

1383 bool preloaded) {	1385 bool preloaded) {

1384 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();	1386 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();

1385 bool ascii = compiler->ascii();	1387 bool ascii = compiler->ascii();

1386 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];	1388 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];

1387 int length = GetCaseIndependentLetters(c, ascii, chars);	1389 int length = GetCaseIndependentLetters(isolate, c, ascii, chars);

1388 if (length <= 1) return false;	1390 if (length <= 1) return false;

1389 // We may not need to check against the end of the input string	1391 // We may not need to check against the end of the input string

1390 // if this character lies before a character that matched.	1392 // if this character lies before a character that matched.

1391 if (!preloaded) {	1393 if (!preloaded) {

1392 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);	1394 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);

1393 }	1395 }

1394 Label ok;	1396 Label ok;

1395 ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4);	1397 ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4);

1396 switch (length) {	1398 switch (length) {

1397 case 2: {	1399 case 2: {

(...skipping 380 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1778 //	1780 //

1779 // We iterate along the text object, building up for each character a	1781 // We iterate along the text object, building up for each character a

1780 // mask and value that can be used to test for a quick failure to match.	1782 // mask and value that can be used to test for a quick failure to match.

1781 // The masks and values for the positions will be combined into a single	1783 // The masks and values for the positions will be combined into a single

1782 // machine word for the current character width in order to be used in	1784 // machine word for the current character width in order to be used in

1783 // generating a quick check.	1785 // generating a quick check.

1784 void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,	1786 void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,

1785 RegExpCompiler* compiler,	1787 RegExpCompiler* compiler,

1786 int characters_filled_in,	1788 int characters_filled_in,

1787 bool not_at_start) {	1789 bool not_at_start) {

	1790 Isolate* isolate = Isolate::Current();

1788 ASSERT(characters_filled_in < details->characters());	1791 ASSERT(characters_filled_in < details->characters());

1789 int characters = details->characters();	1792 int characters = details->characters();

1790 int char_mask;	1793 int char_mask;

1791 int char_shift;	1794 int char_shift;

1792 if (compiler->ascii()) {	1795 if (compiler->ascii()) {

1793 char_mask = String::kMaxAsciiCharCode;	1796 char_mask = String::kMaxAsciiCharCode;

1794 char_shift = 8;	1797 char_shift = 8;

1795 } else {	1798 } else {

1796 char_mask = String::kMaxUC16CharCode;	1799 char_mask = String::kMaxUC16CharCode;

1797 char_shift = 16;	1800 char_shift = 16;

(...skipping 10 matching lines...) Expand all Loading...
1808 // If we expect a non-ASCII character from an ASCII string,	1811 // If we expect a non-ASCII character from an ASCII string,

1809 // there is no way we can match. Not even case independent	1812 // there is no way we can match. Not even case independent

1810 // matching can turn an ASCII character into non-ASCII or	1813 // matching can turn an ASCII character into non-ASCII or

1811 // vice versa.	1814 // vice versa.

1812 details->set_cannot_match();	1815 details->set_cannot_match();

1813 pos->determines_perfectly = false;	1816 pos->determines_perfectly = false;

1814 return;	1817 return;

1815 }	1818 }

1816 if (compiler->ignore_case()) {	1819 if (compiler->ignore_case()) {

1817 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];	1820 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];

1818 int length = GetCaseIndependentLetters(c, compiler->ascii(), chars);	1821 int length = GetCaseIndependentLetters(isolate, c, compiler->ascii(),

	1822 chars);

1819 ASSERT(length != 0); // Can only happen if c > char_mask (see above).	1823 ASSERT(length != 0); // Can only happen if c > char_mask (see above).

1820 if (length == 1) {	1824 if (length == 1) {

1821 // This letter has no case equivalents, so it's nice and simple	1825 // This letter has no case equivalents, so it's nice and simple

1822 // and the mask-compare will determine definitely whether we have	1826 // and the mask-compare will determine definitely whether we have

1823 // a match at this character position.	1827 // a match at this character position.

1824 pos->mask = char_mask;	1828 pos->mask = char_mask;

1825 pos->value = c;	1829 pos->value = c;

1826 pos->determines_perfectly = true;	1830 pos->determines_perfectly = true;

1827 } else {	1831 } else {

1828 uint32_t common_bits = char_mask;	1832 uint32_t common_bits = char_mask;

(...skipping 479 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2308 // loading characters, which means we do not need to recheck the bounds	2312 // loading characters, which means we do not need to recheck the bounds

2309 // up to the limit the quick check already checked. In addition the quick	2313 // up to the limit the quick check already checked. In addition the quick

2310 // check can have involved a mask and compare operation which may simplify	2314 // check can have involved a mask and compare operation which may simplify

2311 // or obviate the need for further checks at some character positions.	2315 // or obviate the need for further checks at some character positions.

2312 void TextNode::TextEmitPass(RegExpCompiler* compiler,	2316 void TextNode::TextEmitPass(RegExpCompiler* compiler,

2313 TextEmitPassType pass,	2317 TextEmitPassType pass,

2314 bool preloaded,	2318 bool preloaded,

2315 Trace* trace,	2319 Trace* trace,

2316 bool first_element_checked,	2320 bool first_element_checked,

2317 int* checked_up_to) {	2321 int* checked_up_to) {

	2322 Isolate* isolate = Isolate::Current();

2318 RegExpMacroAssembler* assembler = compiler->macro_assembler();	2323 RegExpMacroAssembler* assembler = compiler->macro_assembler();

2319 bool ascii = compiler->ascii();	2324 bool ascii = compiler->ascii();

2320 Label* backtrack = trace->backtrack();	2325 Label* backtrack = trace->backtrack();

2321 QuickCheckDetails* quick_check = trace->quick_check_performed();	2326 QuickCheckDetails* quick_check = trace->quick_check_performed();

2322 int element_count = elms_->length();	2327 int element_count = elms_->length();

2323 for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) {	2328 for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) {

2324 TextElement elm = elms_->at(i);	2329 TextElement elm = elms_->at(i);

2325 int cp_offset = trace->cp_offset() + elm.cp_offset;	2330 int cp_offset = trace->cp_offset() + elm.cp_offset;

2326 if (elm.type == TextElement::ATOM) {	2331 if (elm.type == TextElement::ATOM) {

2327 Vector<const uc16> quarks = elm.data.u_atom->data();	2332 Vector<const uc16> quarks = elm.data.u_atom->data();

(...skipping 15 matching lines...) Expand all Loading...
2343 case SIMPLE_CHARACTER_MATCH:	2348 case SIMPLE_CHARACTER_MATCH:

2344 emit_function = &EmitSimpleCharacter;	2349 emit_function = &EmitSimpleCharacter;

2345 break;	2350 break;

2346 case CASE_CHARACTER_MATCH:	2351 case CASE_CHARACTER_MATCH:

2347 emit_function = &EmitAtomLetter;	2352 emit_function = &EmitAtomLetter;

2348 break;	2353 break;

2349 default:	2354 default:

2350 break;	2355 break;

2351 }	2356 }

2352 if (emit_function != NULL) {	2357 if (emit_function != NULL) {

2353 bool bound_checked = emit_function(compiler,	2358 bool bound_checked = emit_function(isolate,

	2359 compiler,

2354 quarks[j],	2360 quarks[j],

2355 backtrack,	2361 backtrack,

2356 cp_offset + j,	2362 cp_offset + j,

2357 *checked_up_to < cp_offset + j,	2363 *checked_up_to < cp_offset + j,

2358 preloaded);	2364 preloaded);

2359 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to);	2365 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to);

2360 }	2366 }

2361 }	2367 }

2362 } else {	2368 } else {

2363 ASSERT_EQ(elm.type, TextElement::CHAR_CLASS);	2369 ASSERT_EQ(elm.type, TextElement::CHAR_CLASS);

(...skipping 1616 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3980 table.AddRange(base->at(i), CharacterRangeSplitter::kInBase);	3986 table.AddRange(base->at(i), CharacterRangeSplitter::kInBase);

3981 for (int i = 0; i < overlay.length(); i += 2) {	3987 for (int i = 0; i < overlay.length(); i += 2) {

3982 table.AddRange(CharacterRange(overlay[i], overlay[i+1]),	3988 table.AddRange(CharacterRange(overlay[i], overlay[i+1]),

3983 CharacterRangeSplitter::kInOverlay);	3989 CharacterRangeSplitter::kInOverlay);

3984 }	3990 }

3985 CharacterRangeSplitter callback(included, excluded);	3991 CharacterRangeSplitter callback(included, excluded);

3986 table.ForEach(&callback);	3992 table.ForEach(&callback);

3987 }	3993 }

3988	3994

3989	3995

3990 static void AddUncanonicals(ZoneList<CharacterRange>* ranges,	3996 static void AddUncanonicals(Isolate* isolate,

	3997 ZoneList<CharacterRange>* ranges,

3991 int bottom,	3998 int bottom,

3992 int top);	3999 int top);

3993	4000

3994	4001

3995 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges,	4002 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges,

3996 bool is_ascii) {	4003 bool is_ascii) {

	4004 Isolate* isolate = Isolate::Current();

3997 uc16 bottom = from();	4005 uc16 bottom = from();

3998 uc16 top = to();	4006 uc16 top = to();

3999 if (is_ascii) {	4007 if (is_ascii) {

4000 if (bottom > String::kMaxAsciiCharCode) return;	4008 if (bottom > String::kMaxAsciiCharCode) return;

4001 if (top > String::kMaxAsciiCharCode) top = String::kMaxAsciiCharCode;	4009 if (top > String::kMaxAsciiCharCode) top = String::kMaxAsciiCharCode;

4002 }	4010 }

4003 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];	4011 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];

4004 if (top == bottom) {	4012 if (top == bottom) {

4005 // If this is a singleton we just expand the one character.	4013 // If this is a singleton we just expand the one character.

4006 int length = uncanonicalize.get(bottom, '\0', chars);	4014 int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars);

4007 for (int i = 0; i < length; i++) {	4015 for (int i = 0; i < length; i++) {

4008 uc32 chr = chars[i];	4016 uc32 chr = chars[i];

4009 if (chr != bottom) {	4017 if (chr != bottom) {

4010 ranges->Add(CharacterRange::Singleton(chars[i]));	4018 ranges->Add(CharacterRange::Singleton(chars[i]));

4011 }	4019 }

4012 }	4020 }

4013 } else if (bottom <= kRangeCanonicalizeMax &&	4021 } else if (bottom <= kRangeCanonicalizeMax &&

4014 top <= kRangeCanonicalizeMax) {	4022 top <= kRangeCanonicalizeMax) {

4015 // If this is a range we expand the characters block by block,	4023 // If this is a range we expand the characters block by block,

4016 // expanding contiguous subranges (blocks) one at a time.	4024 // expanding contiguous subranges (blocks) one at a time.

4017 // The approach is as follows. For a given start character we	4025 // The approach is as follows. For a given start character we

4018 // look up the block that contains it, for instance 'a' if the	4026 // look up the block that contains it, for instance 'a' if the

4019 // start character is 'c'. A block is characterized by the property	4027 // start character is 'c'. A block is characterized by the property

4020 // that all characters uncanonicalize in the same way as the first	4028 // that all characters uncanonicalize in the same way as the first

4021 // element, except that each entry in the result is incremented	4029 // element, except that each entry in the result is incremented

4022 // by the distance from the first element. So a-z is a block	4030 // by the distance from the first element. So a-z is a block

4023 // because 'a' uncanonicalizes to ['a', 'A'] and the k'th letter	4031 // because 'a' uncanonicalizes to ['a', 'A'] and the k'th letter

4024 // uncanonicalizes to ['a' + k, 'A' + k].	4032 // uncanonicalizes to ['a' + k, 'A' + k].

4025 // Once we've found the start point we look up its uncanonicalization	4033 // Once we've found the start point we look up its uncanonicalization

4026 // and produce a range for each element. For instance for [c-f]	4034 // and produce a range for each element. For instance for [c-f]

4027 // we look up ['a', 'A'] and produce [c-f] and [C-F]. We then only	4035 // we look up ['a', 'A'] and produce [c-f] and [C-F]. We then only

4028 // add a range if it is not already contained in the input, so [c-f]	4036 // add a range if it is not already contained in the input, so [c-f]

4029 // will be skipped but [C-F] will be added. If this range is not	4037 // will be skipped but [C-F] will be added. If this range is not

4030 // completely contained in a block we do this for all the blocks	4038 // completely contained in a block we do this for all the blocks

4031 // covered by the range.	4039 // covered by the range.

4032 unibrow::uchar range[unibrow::Ecma262UnCanonicalize::kMaxWidth];	4040 unibrow::uchar range[unibrow::Ecma262UnCanonicalize::kMaxWidth];

4033 // First, look up the block that contains the 'bottom' character.	4041 // First, look up the block that contains the 'bottom' character.

4034 int length = canonrange.get(bottom, '\0', range);	4042 int length = isolate->jsregexp_canonrange()->get(bottom, '\0', range);

4035 if (length == 0) {	4043 if (length == 0) {

4036 range[0] = bottom;	4044 range[0] = bottom;

4037 } else {	4045 } else {

4038 ASSERT_EQ(1, length);	4046 ASSERT_EQ(1, length);

4039 }	4047 }

4040 int pos = bottom;	4048 int pos = bottom;

4041 // The start of the current block. Note that except for the first	4049 // The start of the current block. Note that except for the first

4042 // iteration 'start' is always equal to 'pos'.	4050 // iteration 'start' is always equal to 'pos'.

4043 int start;	4051 int start;

4044 // If it is not the start point of a block the entry contains the	4052 // If it is not the start point of a block the entry contains the

4045 // offset of the character from the start point.	4053 // offset of the character from the start point.

4046 if ((range[0] & kStartMarker) == 0) {	4054 if ((range[0] & kStartMarker) == 0) {

4047 start = pos - range[0];	4055 start = pos - range[0];

4048 } else {	4056 } else {

4049 start = pos;	4057 start = pos;

4050 }	4058 }

4051 // Then we add the ranges one at a time, incrementing the current	4059 // Then we add the ranges one at a time, incrementing the current

4052 // position to be after the last block each time. The position	4060 // position to be after the last block each time. The position

4053 // always points to the start of a block.	4061 // always points to the start of a block.

4054 while (pos < top) {	4062 while (pos < top) {

4055 length = canonrange.get(start, '\0', range);	4063 length = isolate->jsregexp_canonrange()->get(start, '\0', range);

4056 if (length == 0) {	4064 if (length == 0) {

4057 range[0] = start;	4065 range[0] = start;

4058 } else {	4066 } else {

4059 ASSERT_EQ(1, length);	4067 ASSERT_EQ(1, length);

4060 }	4068 }

4061 ASSERT((range[0] & kStartMarker) != 0);	4069 ASSERT((range[0] & kStartMarker) != 0);

4062 // The start point of a block contains the distance to the end	4070 // The start point of a block contains the distance to the end

4063 // of the range.	4071 // of the range.

4064 int block_end = start + (range[0] & kPayloadMask) - 1;	4072 int block_end = start + (range[0] & kPayloadMask) - 1;

4065 int end = (block_end > top) ? top : block_end;	4073 int end = (block_end > top) ? top : block_end;

4066 length = uncanonicalize.get(start, '\0', range);	4074 length = isolate->jsregexp_uncanonicalize()->get(start, '\0', range);

4067 for (int i = 0; i < length; i++) {	4075 for (int i = 0; i < length; i++) {

4068 uc32 c = range[i];	4076 uc32 c = range[i];

4069 uc16 range_from = c + (pos - start);	4077 uc16 range_from = c + (pos - start);

4070 uc16 range_to = c + (end - start);	4078 uc16 range_to = c + (end - start);

4071 if (!(bottom <= range_from && range_to <= top)) {	4079 if (!(bottom <= range_from && range_to <= top)) {

4072 ranges->Add(CharacterRange(range_from, range_to));	4080 ranges->Add(CharacterRange(range_from, range_to));

4073 }	4081 }

4074 }	4082 }

4075 start = pos = block_end + 1;	4083 start = pos = block_end + 1;

4076 }	4084 }

4077 } else {	4085 } else {

4078 // Unibrow ranges don't work for high characters due to the "2^11 bug".	4086 // Unibrow ranges don't work for high characters due to the "2^11 bug".

4079 // Therefore we do something dumber for these ranges.	4087 // Therefore we do something dumber for these ranges.

4080 AddUncanonicals(ranges, bottom, top);	4088 AddUncanonicals(isolate, ranges, bottom, top);

4081 }	4089 }

4082 }	4090 }

4083	4091

4084	4092

4085 bool CharacterRange::IsCanonical(ZoneList<CharacterRange>* ranges) {	4093 bool CharacterRange::IsCanonical(ZoneList<CharacterRange>* ranges) {

4086 ASSERT_NOT_NULL(ranges);	4094 ASSERT_NOT_NULL(ranges);

4087 int n = ranges->length();	4095 int n = ranges->length();

4088 if (n <= 1) return true;	4096 if (n <= 1) return true;

4089 int max = ranges->at(0).to();	4097 int max = ranges->at(0).to();

4090 for (int i = 1; i < n; i++) {	4098 for (int i = 1; i < n; i++) {

(...skipping 79 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4170 result.SetElementsInSecondSet();	4178 result.SetElementsInSecondSet();

4171 } else if (j < range->length()) {	4179 } else if (j < range->length()) {

4172 // Argument range contains something not in word range.	4180 // Argument range contains something not in word range.

4173 result.SetElementsInFirstSet();	4181 result.SetElementsInFirstSet();

4174 }	4182 }

4175	4183

4176 return result;	4184 return result;

4177 }	4185 }

4178	4186

4179	4187

4180 static void AddUncanonicals(ZoneList<CharacterRange>* ranges,	4188 static void AddUncanonicals(Isolate* isolate,

	4189 ZoneList<CharacterRange>* ranges,

4181 int bottom,	4190 int bottom,

4182 int top) {	4191 int top) {

4183 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];	4192 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];

4184 // Zones with no case mappings. There is a DEBUG-mode loop to assert that	4193 // Zones with no case mappings. There is a DEBUG-mode loop to assert that

4185 // this table is correct.	4194 // this table is correct.

4186 // 0x0600 - 0x0fff	4195 // 0x0600 - 0x0fff

4187 // 0x1100 - 0x1cff	4196 // 0x1100 - 0x1cff

4188 // 0x2000 - 0x20ff	4197 // 0x2000 - 0x20ff

4189 // 0x2200 - 0x23ff	4198 // 0x2200 - 0x23ff

4190 // 0x2500 - 0x2bff	4199 // 0x2500 - 0x2bff

(...skipping 17 matching lines...) Expand all Loading...
4208 if (top <= CharacterRange::kRangeCanonicalizeMax) {	4217 if (top <= CharacterRange::kRangeCanonicalizeMax) {

4209 CharacterRange range(bottom, top);	4218 CharacterRange range(bottom, top);

4210 range.AddCaseEquivalents(ranges, false);	4219 range.AddCaseEquivalents(ranges, false);

4211 return;	4220 return;

4212 }	4221 }

4213	4222

4214 // Split up very large ranges. This helps remove ranges where there are no	4223 // Split up very large ranges. This helps remove ranges where there are no

4215 // case mappings.	4224 // case mappings.

4216 for (int i = 0; i < boundary_count; i++) {	4225 for (int i = 0; i < boundary_count; i++) {

4217 if (bottom < boundaries[i] && top >= boundaries[i]) {	4226 if (bottom < boundaries[i] && top >= boundaries[i]) {

4218 AddUncanonicals(ranges, bottom, boundaries[i] - 1);	4227 AddUncanonicals(isolate, ranges, bottom, boundaries[i] - 1);

4219 AddUncanonicals(ranges, boundaries[i], top);	4228 AddUncanonicals(isolate, ranges, boundaries[i], top);

4220 return;	4229 return;

4221 }	4230 }

4222 }	4231 }

4223	4232

4224 // If we are completely in a zone with no case mappings then we are done.	4233 // If we are completely in a zone with no case mappings then we are done.

4225 // We start at 2 so as not to except the ASCII range from mappings.	4234 // We start at 2 so as not to except the ASCII range from mappings.

4226 for (int i = kFirstRealCaselessZoneIndex; i < boundary_count; i += 2) {	4235 for (int i = kFirstRealCaselessZoneIndex; i < boundary_count; i += 2) {

4227 if (bottom >= boundaries[i] && top < boundaries[i + 1]) {	4236 if (bottom >= boundaries[i] && top < boundaries[i + 1]) {

4228 #ifdef DEBUG	4237 #ifdef DEBUG

4229 for (int j = bottom; j <= top; j++) {	4238 for (int j = bottom; j <= top; j++) {

4230 unsigned current_char = j;	4239 unsigned current_char = j;

4231 int length = uncanonicalize.get(current_char, '\0', chars);	4240 int length = isolate->jsregexp_uncanonicalize()->get(current_char,

	4241 '\0', chars);

4232 for (int k = 0; k < length; k++) {	4242 for (int k = 0; k < length; k++) {

4233 ASSERT(chars[k] == current_char);	4243 ASSERT(chars[k] == current_char);

4234 }	4244 }

4235 }	4245 }

4236 #endif	4246 #endif

4237 return;	4247 return;

4238 }	4248 }

4239 }	4249 }

4240	4250

4241 // Step through the range finding equivalent characters.	4251 // Step through the range finding equivalent characters.

4242 ZoneList<unibrow::uchar> *characters = new ZoneList<unibrow::uchar>(100);	4252 ZoneList<unibrow::uchar> *characters = new ZoneList<unibrow::uchar>(100);

4243 for (int i = bottom; i <= top; i++) {	4253 for (int i = bottom; i <= top; i++) {

4244 int length = uncanonicalize.get(i, '\0', chars);	4254 int length = isolate->jsregexp_uncanonicalize()->get(i, '\0', chars);

4245 for (int j = 0; j < length; j++) {	4255 for (int j = 0; j < length; j++) {

4246 uc32 chr = chars[j];	4256 uc32 chr = chars[j];

4247 if (chr != i && (chr < bottom \|\| chr > top)) {	4257 if (chr != i && (chr < bottom \|\| chr > top)) {

4248 characters->Add(chr);	4258 characters->Add(chr);

4249 }	4259 }

4250 }	4260 }

4251 }	4261 }

4252	4262

4253 // Step through the equivalent characters finding simple ranges and	4263 // Step through the equivalent characters finding simple ranges and

4254 // adding ranges to the character class.	4264 // adding ranges to the character class.

(...skipping 999 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5254 RegExpMacroAssemblerIrregexp macro_assembler(codes);	5264 RegExpMacroAssemblerIrregexp macro_assembler(codes);

5255 #endif // V8_INTERPRETED_REGEXP	5265 #endif // V8_INTERPRETED_REGEXP

5256	5266

5257 return compiler.Assemble(&macro_assembler,	5267 return compiler.Assemble(&macro_assembler,

5258 node,	5268 node,

5259 data->capture_count,	5269 data->capture_count,

5260 pattern);	5270 pattern);

5261 }	5271 }

5262	5272

5263	5273

5264 int OffsetsVector::static_offsets_vector_[

5265 OffsetsVector::kStaticOffsetsVectorSize];

5266

5267 }} // namespace v8::internal	5274 }} // namespace v8::internal

OLD	NEW

« no previous file with comments | « src/jsregexp.h ('k') | src/objects.h » ('j') | no next file with comments »