Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(81)

Side by Side Diff: src/jsregexp.cc

Issue 2811033: [Isolates] Remove even more statics. (Closed) Base URL: http://v8.googlecode.com/svn/branches/experimental/isolates/
Patch Set: rebase and address comments Created 10 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/jsregexp.h ('k') | src/objects.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2009 the V8 project authors. All rights reserved. 1 // Copyright 2006-2009 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 1230 matching lines...) Expand 10 before | Expand all | Expand 10 after
1241 case Guard::GEQ: 1241 case Guard::GEQ:
1242 ASSERT(!trace->mentions_reg(guard->reg())); 1242 ASSERT(!trace->mentions_reg(guard->reg()));
1243 macro_assembler->IfRegisterLT(guard->reg(), 1243 macro_assembler->IfRegisterLT(guard->reg(),
1244 guard->value(), 1244 guard->value(),
1245 trace->backtrack()); 1245 trace->backtrack());
1246 break; 1246 break;
1247 } 1247 }
1248 } 1248 }
1249 1249
1250 1250
1251 static unibrow::Mapping<unibrow::Ecma262UnCanonicalize> uncanonicalize;
1252 static unibrow::Mapping<unibrow::CanonicalizationRange> canonrange;
1253
1254
1255 // Returns the number of characters in the equivalence class, omitting those 1251 // Returns the number of characters in the equivalence class, omitting those
1256 // that cannot occur in the source string because it is ASCII. 1252 // that cannot occur in the source string because it is ASCII.
1257 static int GetCaseIndependentLetters(uc16 character, 1253 static int GetCaseIndependentLetters(Isolate* isolate,
1254 uc16 character,
1258 bool ascii_subject, 1255 bool ascii_subject,
1259 unibrow::uchar* letters) { 1256 unibrow::uchar* letters) {
1260 int length = uncanonicalize.get(character, '\0', letters); 1257 int length =
1258 isolate->jsregexp_uncanonicalize()->get(character, '\0', letters);
1261 // Unibrow returns 0 or 1 for characters where case independependence is 1259 // Unibrow returns 0 or 1 for characters where case independependence is
1262 // trivial. 1260 // trivial.
1263 if (length == 0) { 1261 if (length == 0) {
1264 letters[0] = character; 1262 letters[0] = character;
1265 length = 1; 1263 length = 1;
1266 } 1264 }
1267 if (!ascii_subject || character <= String::kMaxAsciiCharCode) { 1265 if (!ascii_subject || character <= String::kMaxAsciiCharCode) {
1268 return length; 1266 return length;
1269 } 1267 }
1270 // The standard requires that non-ASCII characters cannot have ASCII 1268 // The standard requires that non-ASCII characters cannot have ASCII
1271 // character codes in their equivalence class. 1269 // character codes in their equivalence class.
1272 return 0; 1270 return 0;
1273 } 1271 }
1274 1272
1275 1273
1276 static inline bool EmitSimpleCharacter(RegExpCompiler* compiler, 1274 static inline bool EmitSimpleCharacter(Isolate* isolate,
1275 RegExpCompiler* compiler,
1277 uc16 c, 1276 uc16 c,
1278 Label* on_failure, 1277 Label* on_failure,
1279 int cp_offset, 1278 int cp_offset,
1280 bool check, 1279 bool check,
1281 bool preloaded) { 1280 bool preloaded) {
1282 RegExpMacroAssembler* assembler = compiler->macro_assembler(); 1281 RegExpMacroAssembler* assembler = compiler->macro_assembler();
1283 bool bound_checked = false; 1282 bool bound_checked = false;
1284 if (!preloaded) { 1283 if (!preloaded) {
1285 assembler->LoadCurrentCharacter( 1284 assembler->LoadCurrentCharacter(
1286 cp_offset, 1285 cp_offset,
1287 on_failure, 1286 on_failure,
1288 check); 1287 check);
1289 bound_checked = true; 1288 bound_checked = true;
1290 } 1289 }
1291 assembler->CheckNotCharacter(c, on_failure); 1290 assembler->CheckNotCharacter(c, on_failure);
1292 return bound_checked; 1291 return bound_checked;
1293 } 1292 }
1294 1293
1295 1294
1296 // Only emits non-letters (things that don't have case). Only used for case 1295 // Only emits non-letters (things that don't have case). Only used for case
1297 // independent matches. 1296 // independent matches.
1298 static inline bool EmitAtomNonLetter(RegExpCompiler* compiler, 1297 static inline bool EmitAtomNonLetter(Isolate* isolate,
1298 RegExpCompiler* compiler,
1299 uc16 c, 1299 uc16 c,
1300 Label* on_failure, 1300 Label* on_failure,
1301 int cp_offset, 1301 int cp_offset,
1302 bool check, 1302 bool check,
1303 bool preloaded) { 1303 bool preloaded) {
1304 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); 1304 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
1305 bool ascii = compiler->ascii(); 1305 bool ascii = compiler->ascii();
1306 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 1306 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1307 int length = GetCaseIndependentLetters(c, ascii, chars); 1307 int length = GetCaseIndependentLetters(isolate, c, ascii, chars);
1308 if (length < 1) { 1308 if (length < 1) {
1309 // This can't match. Must be an ASCII subject and a non-ASCII character. 1309 // This can't match. Must be an ASCII subject and a non-ASCII character.
1310 // We do not need to do anything since the ASCII pass already handled this. 1310 // We do not need to do anything since the ASCII pass already handled this.
1311 return false; // Bounds not checked. 1311 return false; // Bounds not checked.
1312 } 1312 }
1313 bool checked = false; 1313 bool checked = false;
1314 // We handle the length > 1 case in a later pass. 1314 // We handle the length > 1 case in a later pass.
1315 if (length == 1) { 1315 if (length == 1) {
1316 if (ascii && c > String::kMaxAsciiCharCodeU) { 1316 if (ascii && c > String::kMaxAsciiCharCodeU) {
1317 // Can't match - see above. 1317 // Can't match - see above.
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
1359 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff, 1359 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff,
1360 diff, 1360 diff,
1361 mask, 1361 mask,
1362 on_failure); 1362 on_failure);
1363 return true; 1363 return true;
1364 } 1364 }
1365 return false; 1365 return false;
1366 } 1366 }
1367 1367
1368 1368
1369 typedef bool EmitCharacterFunction(RegExpCompiler* compiler, 1369 typedef bool EmitCharacterFunction(Isolate* isolate,
1370 RegExpCompiler* compiler,
1370 uc16 c, 1371 uc16 c,
1371 Label* on_failure, 1372 Label* on_failure,
1372 int cp_offset, 1373 int cp_offset,
1373 bool check, 1374 bool check,
1374 bool preloaded); 1375 bool preloaded);
1375 1376
1376 // Only emits letters (things that have case). Only used for case independent 1377 // Only emits letters (things that have case). Only used for case independent
1377 // matches. 1378 // matches.
1378 static inline bool EmitAtomLetter(RegExpCompiler* compiler, 1379 static inline bool EmitAtomLetter(Isolate* isolate,
1380 RegExpCompiler* compiler,
1379 uc16 c, 1381 uc16 c,
1380 Label* on_failure, 1382 Label* on_failure,
1381 int cp_offset, 1383 int cp_offset,
1382 bool check, 1384 bool check,
1383 bool preloaded) { 1385 bool preloaded) {
1384 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); 1386 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
1385 bool ascii = compiler->ascii(); 1387 bool ascii = compiler->ascii();
1386 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 1388 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1387 int length = GetCaseIndependentLetters(c, ascii, chars); 1389 int length = GetCaseIndependentLetters(isolate, c, ascii, chars);
1388 if (length <= 1) return false; 1390 if (length <= 1) return false;
1389 // We may not need to check against the end of the input string 1391 // We may not need to check against the end of the input string
1390 // if this character lies before a character that matched. 1392 // if this character lies before a character that matched.
1391 if (!preloaded) { 1393 if (!preloaded) {
1392 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); 1394 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
1393 } 1395 }
1394 Label ok; 1396 Label ok;
1395 ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4); 1397 ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4);
1396 switch (length) { 1398 switch (length) {
1397 case 2: { 1399 case 2: {
(...skipping 380 matching lines...) Expand 10 before | Expand all | Expand 10 after
1778 // 1780 //
1779 // We iterate along the text object, building up for each character a 1781 // We iterate along the text object, building up for each character a
1780 // mask and value that can be used to test for a quick failure to match. 1782 // mask and value that can be used to test for a quick failure to match.
1781 // The masks and values for the positions will be combined into a single 1783 // The masks and values for the positions will be combined into a single
1782 // machine word for the current character width in order to be used in 1784 // machine word for the current character width in order to be used in
1783 // generating a quick check. 1785 // generating a quick check.
1784 void TextNode::GetQuickCheckDetails(QuickCheckDetails* details, 1786 void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
1785 RegExpCompiler* compiler, 1787 RegExpCompiler* compiler,
1786 int characters_filled_in, 1788 int characters_filled_in,
1787 bool not_at_start) { 1789 bool not_at_start) {
1790 Isolate* isolate = Isolate::Current();
1788 ASSERT(characters_filled_in < details->characters()); 1791 ASSERT(characters_filled_in < details->characters());
1789 int characters = details->characters(); 1792 int characters = details->characters();
1790 int char_mask; 1793 int char_mask;
1791 int char_shift; 1794 int char_shift;
1792 if (compiler->ascii()) { 1795 if (compiler->ascii()) {
1793 char_mask = String::kMaxAsciiCharCode; 1796 char_mask = String::kMaxAsciiCharCode;
1794 char_shift = 8; 1797 char_shift = 8;
1795 } else { 1798 } else {
1796 char_mask = String::kMaxUC16CharCode; 1799 char_mask = String::kMaxUC16CharCode;
1797 char_shift = 16; 1800 char_shift = 16;
(...skipping 10 matching lines...) Expand all
1808 // If we expect a non-ASCII character from an ASCII string, 1811 // If we expect a non-ASCII character from an ASCII string,
1809 // there is no way we can match. Not even case independent 1812 // there is no way we can match. Not even case independent
1810 // matching can turn an ASCII character into non-ASCII or 1813 // matching can turn an ASCII character into non-ASCII or
1811 // vice versa. 1814 // vice versa.
1812 details->set_cannot_match(); 1815 details->set_cannot_match();
1813 pos->determines_perfectly = false; 1816 pos->determines_perfectly = false;
1814 return; 1817 return;
1815 } 1818 }
1816 if (compiler->ignore_case()) { 1819 if (compiler->ignore_case()) {
1817 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 1820 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1818 int length = GetCaseIndependentLetters(c, compiler->ascii(), chars); 1821 int length = GetCaseIndependentLetters(isolate, c, compiler->ascii(),
1822 chars);
1819 ASSERT(length != 0); // Can only happen if c > char_mask (see above). 1823 ASSERT(length != 0); // Can only happen if c > char_mask (see above).
1820 if (length == 1) { 1824 if (length == 1) {
1821 // This letter has no case equivalents, so it's nice and simple 1825 // This letter has no case equivalents, so it's nice and simple
1822 // and the mask-compare will determine definitely whether we have 1826 // and the mask-compare will determine definitely whether we have
1823 // a match at this character position. 1827 // a match at this character position.
1824 pos->mask = char_mask; 1828 pos->mask = char_mask;
1825 pos->value = c; 1829 pos->value = c;
1826 pos->determines_perfectly = true; 1830 pos->determines_perfectly = true;
1827 } else { 1831 } else {
1828 uint32_t common_bits = char_mask; 1832 uint32_t common_bits = char_mask;
(...skipping 479 matching lines...) Expand 10 before | Expand all | Expand 10 after
2308 // loading characters, which means we do not need to recheck the bounds 2312 // loading characters, which means we do not need to recheck the bounds
2309 // up to the limit the quick check already checked. In addition the quick 2313 // up to the limit the quick check already checked. In addition the quick
2310 // check can have involved a mask and compare operation which may simplify 2314 // check can have involved a mask and compare operation which may simplify
2311 // or obviate the need for further checks at some character positions. 2315 // or obviate the need for further checks at some character positions.
2312 void TextNode::TextEmitPass(RegExpCompiler* compiler, 2316 void TextNode::TextEmitPass(RegExpCompiler* compiler,
2313 TextEmitPassType pass, 2317 TextEmitPassType pass,
2314 bool preloaded, 2318 bool preloaded,
2315 Trace* trace, 2319 Trace* trace,
2316 bool first_element_checked, 2320 bool first_element_checked,
2317 int* checked_up_to) { 2321 int* checked_up_to) {
2322 Isolate* isolate = Isolate::Current();
2318 RegExpMacroAssembler* assembler = compiler->macro_assembler(); 2323 RegExpMacroAssembler* assembler = compiler->macro_assembler();
2319 bool ascii = compiler->ascii(); 2324 bool ascii = compiler->ascii();
2320 Label* backtrack = trace->backtrack(); 2325 Label* backtrack = trace->backtrack();
2321 QuickCheckDetails* quick_check = trace->quick_check_performed(); 2326 QuickCheckDetails* quick_check = trace->quick_check_performed();
2322 int element_count = elms_->length(); 2327 int element_count = elms_->length();
2323 for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) { 2328 for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) {
2324 TextElement elm = elms_->at(i); 2329 TextElement elm = elms_->at(i);
2325 int cp_offset = trace->cp_offset() + elm.cp_offset; 2330 int cp_offset = trace->cp_offset() + elm.cp_offset;
2326 if (elm.type == TextElement::ATOM) { 2331 if (elm.type == TextElement::ATOM) {
2327 Vector<const uc16> quarks = elm.data.u_atom->data(); 2332 Vector<const uc16> quarks = elm.data.u_atom->data();
(...skipping 15 matching lines...) Expand all
2343 case SIMPLE_CHARACTER_MATCH: 2348 case SIMPLE_CHARACTER_MATCH:
2344 emit_function = &EmitSimpleCharacter; 2349 emit_function = &EmitSimpleCharacter;
2345 break; 2350 break;
2346 case CASE_CHARACTER_MATCH: 2351 case CASE_CHARACTER_MATCH:
2347 emit_function = &EmitAtomLetter; 2352 emit_function = &EmitAtomLetter;
2348 break; 2353 break;
2349 default: 2354 default:
2350 break; 2355 break;
2351 } 2356 }
2352 if (emit_function != NULL) { 2357 if (emit_function != NULL) {
2353 bool bound_checked = emit_function(compiler, 2358 bool bound_checked = emit_function(isolate,
2359 compiler,
2354 quarks[j], 2360 quarks[j],
2355 backtrack, 2361 backtrack,
2356 cp_offset + j, 2362 cp_offset + j,
2357 *checked_up_to < cp_offset + j, 2363 *checked_up_to < cp_offset + j,
2358 preloaded); 2364 preloaded);
2359 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to); 2365 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to);
2360 } 2366 }
2361 } 2367 }
2362 } else { 2368 } else {
2363 ASSERT_EQ(elm.type, TextElement::CHAR_CLASS); 2369 ASSERT_EQ(elm.type, TextElement::CHAR_CLASS);
(...skipping 1616 matching lines...) Expand 10 before | Expand all | Expand 10 after
3980 table.AddRange(base->at(i), CharacterRangeSplitter::kInBase); 3986 table.AddRange(base->at(i), CharacterRangeSplitter::kInBase);
3981 for (int i = 0; i < overlay.length(); i += 2) { 3987 for (int i = 0; i < overlay.length(); i += 2) {
3982 table.AddRange(CharacterRange(overlay[i], overlay[i+1]), 3988 table.AddRange(CharacterRange(overlay[i], overlay[i+1]),
3983 CharacterRangeSplitter::kInOverlay); 3989 CharacterRangeSplitter::kInOverlay);
3984 } 3990 }
3985 CharacterRangeSplitter callback(included, excluded); 3991 CharacterRangeSplitter callback(included, excluded);
3986 table.ForEach(&callback); 3992 table.ForEach(&callback);
3987 } 3993 }
3988 3994
3989 3995
3990 static void AddUncanonicals(ZoneList<CharacterRange>* ranges, 3996 static void AddUncanonicals(Isolate* isolate,
3997 ZoneList<CharacterRange>* ranges,
3991 int bottom, 3998 int bottom,
3992 int top); 3999 int top);
3993 4000
3994 4001
3995 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges, 4002 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges,
3996 bool is_ascii) { 4003 bool is_ascii) {
4004 Isolate* isolate = Isolate::Current();
3997 uc16 bottom = from(); 4005 uc16 bottom = from();
3998 uc16 top = to(); 4006 uc16 top = to();
3999 if (is_ascii) { 4007 if (is_ascii) {
4000 if (bottom > String::kMaxAsciiCharCode) return; 4008 if (bottom > String::kMaxAsciiCharCode) return;
4001 if (top > String::kMaxAsciiCharCode) top = String::kMaxAsciiCharCode; 4009 if (top > String::kMaxAsciiCharCode) top = String::kMaxAsciiCharCode;
4002 } 4010 }
4003 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 4011 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
4004 if (top == bottom) { 4012 if (top == bottom) {
4005 // If this is a singleton we just expand the one character. 4013 // If this is a singleton we just expand the one character.
4006 int length = uncanonicalize.get(bottom, '\0', chars); 4014 int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars);
4007 for (int i = 0; i < length; i++) { 4015 for (int i = 0; i < length; i++) {
4008 uc32 chr = chars[i]; 4016 uc32 chr = chars[i];
4009 if (chr != bottom) { 4017 if (chr != bottom) {
4010 ranges->Add(CharacterRange::Singleton(chars[i])); 4018 ranges->Add(CharacterRange::Singleton(chars[i]));
4011 } 4019 }
4012 } 4020 }
4013 } else if (bottom <= kRangeCanonicalizeMax && 4021 } else if (bottom <= kRangeCanonicalizeMax &&
4014 top <= kRangeCanonicalizeMax) { 4022 top <= kRangeCanonicalizeMax) {
4015 // If this is a range we expand the characters block by block, 4023 // If this is a range we expand the characters block by block,
4016 // expanding contiguous subranges (blocks) one at a time. 4024 // expanding contiguous subranges (blocks) one at a time.
4017 // The approach is as follows. For a given start character we 4025 // The approach is as follows. For a given start character we
4018 // look up the block that contains it, for instance 'a' if the 4026 // look up the block that contains it, for instance 'a' if the
4019 // start character is 'c'. A block is characterized by the property 4027 // start character is 'c'. A block is characterized by the property
4020 // that all characters uncanonicalize in the same way as the first 4028 // that all characters uncanonicalize in the same way as the first
4021 // element, except that each entry in the result is incremented 4029 // element, except that each entry in the result is incremented
4022 // by the distance from the first element. So a-z is a block 4030 // by the distance from the first element. So a-z is a block
4023 // because 'a' uncanonicalizes to ['a', 'A'] and the k'th letter 4031 // because 'a' uncanonicalizes to ['a', 'A'] and the k'th letter
4024 // uncanonicalizes to ['a' + k, 'A' + k]. 4032 // uncanonicalizes to ['a' + k, 'A' + k].
4025 // Once we've found the start point we look up its uncanonicalization 4033 // Once we've found the start point we look up its uncanonicalization
4026 // and produce a range for each element. For instance for [c-f] 4034 // and produce a range for each element. For instance for [c-f]
4027 // we look up ['a', 'A'] and produce [c-f] and [C-F]. We then only 4035 // we look up ['a', 'A'] and produce [c-f] and [C-F]. We then only
4028 // add a range if it is not already contained in the input, so [c-f] 4036 // add a range if it is not already contained in the input, so [c-f]
4029 // will be skipped but [C-F] will be added. If this range is not 4037 // will be skipped but [C-F] will be added. If this range is not
4030 // completely contained in a block we do this for all the blocks 4038 // completely contained in a block we do this for all the blocks
4031 // covered by the range. 4039 // covered by the range.
4032 unibrow::uchar range[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 4040 unibrow::uchar range[unibrow::Ecma262UnCanonicalize::kMaxWidth];
4033 // First, look up the block that contains the 'bottom' character. 4041 // First, look up the block that contains the 'bottom' character.
4034 int length = canonrange.get(bottom, '\0', range); 4042 int length = isolate->jsregexp_canonrange()->get(bottom, '\0', range);
4035 if (length == 0) { 4043 if (length == 0) {
4036 range[0] = bottom; 4044 range[0] = bottom;
4037 } else { 4045 } else {
4038 ASSERT_EQ(1, length); 4046 ASSERT_EQ(1, length);
4039 } 4047 }
4040 int pos = bottom; 4048 int pos = bottom;
4041 // The start of the current block. Note that except for the first 4049 // The start of the current block. Note that except for the first
4042 // iteration 'start' is always equal to 'pos'. 4050 // iteration 'start' is always equal to 'pos'.
4043 int start; 4051 int start;
4044 // If it is not the start point of a block the entry contains the 4052 // If it is not the start point of a block the entry contains the
4045 // offset of the character from the start point. 4053 // offset of the character from the start point.
4046 if ((range[0] & kStartMarker) == 0) { 4054 if ((range[0] & kStartMarker) == 0) {
4047 start = pos - range[0]; 4055 start = pos - range[0];
4048 } else { 4056 } else {
4049 start = pos; 4057 start = pos;
4050 } 4058 }
4051 // Then we add the ranges one at a time, incrementing the current 4059 // Then we add the ranges one at a time, incrementing the current
4052 // position to be after the last block each time. The position 4060 // position to be after the last block each time. The position
4053 // always points to the start of a block. 4061 // always points to the start of a block.
4054 while (pos < top) { 4062 while (pos < top) {
4055 length = canonrange.get(start, '\0', range); 4063 length = isolate->jsregexp_canonrange()->get(start, '\0', range);
4056 if (length == 0) { 4064 if (length == 0) {
4057 range[0] = start; 4065 range[0] = start;
4058 } else { 4066 } else {
4059 ASSERT_EQ(1, length); 4067 ASSERT_EQ(1, length);
4060 } 4068 }
4061 ASSERT((range[0] & kStartMarker) != 0); 4069 ASSERT((range[0] & kStartMarker) != 0);
4062 // The start point of a block contains the distance to the end 4070 // The start point of a block contains the distance to the end
4063 // of the range. 4071 // of the range.
4064 int block_end = start + (range[0] & kPayloadMask) - 1; 4072 int block_end = start + (range[0] & kPayloadMask) - 1;
4065 int end = (block_end > top) ? top : block_end; 4073 int end = (block_end > top) ? top : block_end;
4066 length = uncanonicalize.get(start, '\0', range); 4074 length = isolate->jsregexp_uncanonicalize()->get(start, '\0', range);
4067 for (int i = 0; i < length; i++) { 4075 for (int i = 0; i < length; i++) {
4068 uc32 c = range[i]; 4076 uc32 c = range[i];
4069 uc16 range_from = c + (pos - start); 4077 uc16 range_from = c + (pos - start);
4070 uc16 range_to = c + (end - start); 4078 uc16 range_to = c + (end - start);
4071 if (!(bottom <= range_from && range_to <= top)) { 4079 if (!(bottom <= range_from && range_to <= top)) {
4072 ranges->Add(CharacterRange(range_from, range_to)); 4080 ranges->Add(CharacterRange(range_from, range_to));
4073 } 4081 }
4074 } 4082 }
4075 start = pos = block_end + 1; 4083 start = pos = block_end + 1;
4076 } 4084 }
4077 } else { 4085 } else {
4078 // Unibrow ranges don't work for high characters due to the "2^11 bug". 4086 // Unibrow ranges don't work for high characters due to the "2^11 bug".
4079 // Therefore we do something dumber for these ranges. 4087 // Therefore we do something dumber for these ranges.
4080 AddUncanonicals(ranges, bottom, top); 4088 AddUncanonicals(isolate, ranges, bottom, top);
4081 } 4089 }
4082 } 4090 }
4083 4091
4084 4092
4085 bool CharacterRange::IsCanonical(ZoneList<CharacterRange>* ranges) { 4093 bool CharacterRange::IsCanonical(ZoneList<CharacterRange>* ranges) {
4086 ASSERT_NOT_NULL(ranges); 4094 ASSERT_NOT_NULL(ranges);
4087 int n = ranges->length(); 4095 int n = ranges->length();
4088 if (n <= 1) return true; 4096 if (n <= 1) return true;
4089 int max = ranges->at(0).to(); 4097 int max = ranges->at(0).to();
4090 for (int i = 1; i < n; i++) { 4098 for (int i = 1; i < n; i++) {
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after
4170 result.SetElementsInSecondSet(); 4178 result.SetElementsInSecondSet();
4171 } else if (j < range->length()) { 4179 } else if (j < range->length()) {
4172 // Argument range contains something not in word range. 4180 // Argument range contains something not in word range.
4173 result.SetElementsInFirstSet(); 4181 result.SetElementsInFirstSet();
4174 } 4182 }
4175 4183
4176 return result; 4184 return result;
4177 } 4185 }
4178 4186
4179 4187
4180 static void AddUncanonicals(ZoneList<CharacterRange>* ranges, 4188 static void AddUncanonicals(Isolate* isolate,
4189 ZoneList<CharacterRange>* ranges,
4181 int bottom, 4190 int bottom,
4182 int top) { 4191 int top) {
4183 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 4192 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
4184 // Zones with no case mappings. There is a DEBUG-mode loop to assert that 4193 // Zones with no case mappings. There is a DEBUG-mode loop to assert that
4185 // this table is correct. 4194 // this table is correct.
4186 // 0x0600 - 0x0fff 4195 // 0x0600 - 0x0fff
4187 // 0x1100 - 0x1cff 4196 // 0x1100 - 0x1cff
4188 // 0x2000 - 0x20ff 4197 // 0x2000 - 0x20ff
4189 // 0x2200 - 0x23ff 4198 // 0x2200 - 0x23ff
4190 // 0x2500 - 0x2bff 4199 // 0x2500 - 0x2bff
(...skipping 17 matching lines...) Expand all
4208 if (top <= CharacterRange::kRangeCanonicalizeMax) { 4217 if (top <= CharacterRange::kRangeCanonicalizeMax) {
4209 CharacterRange range(bottom, top); 4218 CharacterRange range(bottom, top);
4210 range.AddCaseEquivalents(ranges, false); 4219 range.AddCaseEquivalents(ranges, false);
4211 return; 4220 return;
4212 } 4221 }
4213 4222
4214 // Split up very large ranges. This helps remove ranges where there are no 4223 // Split up very large ranges. This helps remove ranges where there are no
4215 // case mappings. 4224 // case mappings.
4216 for (int i = 0; i < boundary_count; i++) { 4225 for (int i = 0; i < boundary_count; i++) {
4217 if (bottom < boundaries[i] && top >= boundaries[i]) { 4226 if (bottom < boundaries[i] && top >= boundaries[i]) {
4218 AddUncanonicals(ranges, bottom, boundaries[i] - 1); 4227 AddUncanonicals(isolate, ranges, bottom, boundaries[i] - 1);
4219 AddUncanonicals(ranges, boundaries[i], top); 4228 AddUncanonicals(isolate, ranges, boundaries[i], top);
4220 return; 4229 return;
4221 } 4230 }
4222 } 4231 }
4223 4232
4224 // If we are completely in a zone with no case mappings then we are done. 4233 // If we are completely in a zone with no case mappings then we are done.
4225 // We start at 2 so as not to except the ASCII range from mappings. 4234 // We start at 2 so as not to except the ASCII range from mappings.
4226 for (int i = kFirstRealCaselessZoneIndex; i < boundary_count; i += 2) { 4235 for (int i = kFirstRealCaselessZoneIndex; i < boundary_count; i += 2) {
4227 if (bottom >= boundaries[i] && top < boundaries[i + 1]) { 4236 if (bottom >= boundaries[i] && top < boundaries[i + 1]) {
4228 #ifdef DEBUG 4237 #ifdef DEBUG
4229 for (int j = bottom; j <= top; j++) { 4238 for (int j = bottom; j <= top; j++) {
4230 unsigned current_char = j; 4239 unsigned current_char = j;
4231 int length = uncanonicalize.get(current_char, '\0', chars); 4240 int length = isolate->jsregexp_uncanonicalize()->get(current_char,
4241 '\0', chars);
4232 for (int k = 0; k < length; k++) { 4242 for (int k = 0; k < length; k++) {
4233 ASSERT(chars[k] == current_char); 4243 ASSERT(chars[k] == current_char);
4234 } 4244 }
4235 } 4245 }
4236 #endif 4246 #endif
4237 return; 4247 return;
4238 } 4248 }
4239 } 4249 }
4240 4250
4241 // Step through the range finding equivalent characters. 4251 // Step through the range finding equivalent characters.
4242 ZoneList<unibrow::uchar> *characters = new ZoneList<unibrow::uchar>(100); 4252 ZoneList<unibrow::uchar> *characters = new ZoneList<unibrow::uchar>(100);
4243 for (int i = bottom; i <= top; i++) { 4253 for (int i = bottom; i <= top; i++) {
4244 int length = uncanonicalize.get(i, '\0', chars); 4254 int length = isolate->jsregexp_uncanonicalize()->get(i, '\0', chars);
4245 for (int j = 0; j < length; j++) { 4255 for (int j = 0; j < length; j++) {
4246 uc32 chr = chars[j]; 4256 uc32 chr = chars[j];
4247 if (chr != i && (chr < bottom || chr > top)) { 4257 if (chr != i && (chr < bottom || chr > top)) {
4248 characters->Add(chr); 4258 characters->Add(chr);
4249 } 4259 }
4250 } 4260 }
4251 } 4261 }
4252 4262
4253 // Step through the equivalent characters finding simple ranges and 4263 // Step through the equivalent characters finding simple ranges and
4254 // adding ranges to the character class. 4264 // adding ranges to the character class.
(...skipping 999 matching lines...) Expand 10 before | Expand all | Expand 10 after
5254 RegExpMacroAssemblerIrregexp macro_assembler(codes); 5264 RegExpMacroAssemblerIrregexp macro_assembler(codes);
5255 #endif // V8_INTERPRETED_REGEXP 5265 #endif // V8_INTERPRETED_REGEXP
5256 5266
5257 return compiler.Assemble(&macro_assembler, 5267 return compiler.Assemble(&macro_assembler,
5258 node, 5268 node,
5259 data->capture_count, 5269 data->capture_count,
5260 pattern); 5270 pattern);
5261 } 5271 }
5262 5272
5263 5273
5264 int OffsetsVector::static_offsets_vector_[
5265 OffsetsVector::kStaticOffsetsVectorSize];
5266
5267 }} // namespace v8::internal 5274 }} // namespace v8::internal
OLDNEW
« no previous file with comments | « src/jsregexp.h ('k') | src/objects.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698