Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 249 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 260 "malformed_regexp"); | 260 "malformed_regexp"); |
| 261 return Handle<Object>::null(); | 261 return Handle<Object>::null(); |
| 262 } | 262 } |
| 263 | 263 |
| 264 if (parse_result.simple && !flags.is_ignore_case()) { | 264 if (parse_result.simple && !flags.is_ignore_case()) { |
| 265 // Parse-tree is a single atom that is equal to the pattern. | 265 // Parse-tree is a single atom that is equal to the pattern. |
| 266 result = AtomCompile(re, pattern, flags, pattern); | 266 result = AtomCompile(re, pattern, flags, pattern); |
| 267 } else if (parse_result.tree->IsAtom() && | 267 } else if (parse_result.tree->IsAtom() && |
| 268 !flags.is_ignore_case() && | 268 !flags.is_ignore_case() && |
| 269 parse_result.capture_count == 0) { | 269 parse_result.capture_count == 0) { |
| 270 // TODO(lrn) Accept capture_count > 0 on atoms. | |
| 271 RegExpAtom* atom = parse_result.tree->AsAtom(); | 270 RegExpAtom* atom = parse_result.tree->AsAtom(); |
| 272 Vector<const uc16> atom_pattern = atom->data(); | 271 Vector<const uc16> atom_pattern = atom->data(); |
| 273 Handle<String> atom_string = | 272 Handle<String> atom_string = Factory::NewStringFromTwoByte(atom_pattern); |
| 274 Factory::NewStringFromTwoByte(atom_pattern); | |
| 275 result = AtomCompile(re, pattern, flags, atom_string); | 273 result = AtomCompile(re, pattern, flags, atom_string); |
| 276 } else if (FLAG_irregexp) { | 274 } else if (FLAG_irregexp) { |
| 277 result = IrregexpPrepare(re, pattern, flags); | 275 result = IrregexpPrepare(re, pattern, flags); |
| 278 } else { | 276 } else { |
| 279 result = JscrePrepare(re, pattern, flags); | 277 result = JscrePrepare(re, pattern, flags); |
| 280 } | 278 } |
| 281 Object* data = re->data(); | 279 Object* data = re->data(); |
| 282 if (data->IsFixedArray()) { | 280 if (data->IsFixedArray()) { |
| 283 // If compilation succeeded then the data is set on the regexp | 281 // If compilation succeeded then the data is set on the regexp |
| 284 // and we can store it in the cache. | 282 // and we can store it in the cache. |
| (...skipping 220 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 505 JscreCompileWithRetryAfterGC(two_byte_pattern, | 503 JscreCompileWithRetryAfterGC(two_byte_pattern, |
| 506 flags, | 504 flags, |
| 507 &number_of_captures, | 505 &number_of_captures, |
| 508 &error_message, | 506 &error_message, |
| 509 &code); | 507 &code); |
| 510 | 508 |
| 511 if (code == NULL) { | 509 if (code == NULL) { |
| 512 // Throw an exception. | 510 // Throw an exception. |
| 513 Handle<JSArray> array = Factory::NewJSArray(2); | 511 Handle<JSArray> array = Factory::NewJSArray(2); |
| 514 SetElement(array, 0, pattern); | 512 SetElement(array, 0, pattern); |
| 515 SetElement(array, 1, Factory::NewStringFromUtf8(CStrVector( | 513 const char* message = |
| 516 (error_message == NULL) ? "Unknown regexp error" : error_message))); | 514 (error_message == NULL) ? "Unknown regexp error" : error_message; |
| 515 SetElement(array, 1, Factory::NewStringFromUtf8(CStrVector(message))); | |
| 517 Handle<Object> regexp_err = | 516 Handle<Object> regexp_err = |
| 518 Factory::NewSyntaxError("malformed_regexp", array); | 517 Factory::NewSyntaxError("malformed_regexp", array); |
| 519 Top::Throw(*regexp_err); | 518 Top::Throw(*regexp_err); |
| 520 return Handle<Object>(); | 519 return Handle<Object>(); |
| 521 } | 520 } |
| 522 | 521 |
| 523 // Convert the return address to a ByteArray pointer. | 522 // Convert the return address to a ByteArray pointer. |
| 524 Handle<ByteArray> internal( | 523 Handle<ByteArray> internal( |
| 525 ByteArray::FromDataStartAddress(reinterpret_cast<Address>(code))); | 524 ByteArray::FromDataStartAddress(reinterpret_cast<Address>(code))); |
| 526 | 525 |
| (...skipping 1210 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1737 } | 1736 } |
| 1738 | 1737 |
| 1739 | 1738 |
| 1740 static void EmitCharClass(RegExpMacroAssembler* macro_assembler, | 1739 static void EmitCharClass(RegExpMacroAssembler* macro_assembler, |
| 1741 RegExpCharacterClass* cc, | 1740 RegExpCharacterClass* cc, |
| 1742 int cp_offset, | 1741 int cp_offset, |
| 1743 Label* on_failure, | 1742 Label* on_failure, |
| 1744 bool check_offset, | 1743 bool check_offset, |
| 1745 bool ascii, | 1744 bool ascii, |
| 1746 bool preloaded) { | 1745 bool preloaded) { |
| 1746 if (cc->is_standard() && | |
| 1747 macro_assembler->CheckSpecialCharacterClass(cc->standard_type(), | |
| 1748 cp_offset, check_offset, | |
|
Mads Ager (chromium)
2009/01/02 10:45:16
I would put one argument per line here.
| |
| 1749 on_failure)) { | |
| 1750 return; | |
| 1751 } | |
| 1752 | |
| 1747 ZoneList<CharacterRange>* ranges = cc->ranges(); | 1753 ZoneList<CharacterRange>* ranges = cc->ranges(); |
| 1748 int max_char; | 1754 int max_char; |
| 1749 if (ascii) { | 1755 if (ascii) { |
| 1750 max_char = String::kMaxAsciiCharCode; | 1756 max_char = String::kMaxAsciiCharCode; |
| 1751 } else { | 1757 } else { |
| 1752 max_char = String::kMaxUC16CharCode; | 1758 max_char = String::kMaxUC16CharCode; |
| 1753 } | 1759 } |
| 1754 | 1760 |
| 1755 Label success; | 1761 Label success; |
| 1756 | 1762 |
| (...skipping 1581 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3338 printer.PrintNode(label, node); | 3344 printer.PrintNode(label, node); |
| 3339 } | 3345 } |
| 3340 | 3346 |
| 3341 | 3347 |
| 3342 #endif // DEBUG | 3348 #endif // DEBUG |
| 3343 | 3349 |
| 3344 | 3350 |
| 3345 // ------------------------------------------------------------------- | 3351 // ------------------------------------------------------------------- |
| 3346 // Tree to graph conversion | 3352 // Tree to graph conversion |
| 3347 | 3353 |
| 3354 static const int kSpaceRangeCount = 20; | |
| 3355 static const int kSpaceRangeAsciiCount = 4; | |
| 3356 static const uc16 kSpaceRanges[kSpaceRangeCount] = { 0x0009, 0x000D, 0x0020, | |
| 3357 0x0020, 0x00A0, 0x00A0, 0x1680, 0x1680, 0x180E, 0x180E, 0x2000, 0x200A, | |
| 3358 0x2028, 0x2029, 0x202F, 0x202F, 0x205F, 0x205F, 0x3000, 0x3000 }; | |
| 3359 | |
| 3360 static const int kWordRangeCount = 8; | |
| 3361 static const uc16 kWordRanges[kWordRangeCount] = { '0', '9', 'A', 'Z', '_', | |
| 3362 '_', 'a', 'z' }; | |
| 3363 | |
| 3364 static const int kDigitRangeCount = 2; | |
| 3365 static const uc16 kDigitRanges[kDigitRangeCount] = { '0', '9' }; | |
| 3366 | |
| 3367 static const int kLineTerminatorRangeCount = 6; | |
| 3368 static const uc16 kLineTerminatorRanges[kLineTerminatorRangeCount] = { 0x000A, | |
| 3369 0x000A, 0x000D, 0x000D, 0x2028, 0x2029 }; | |
| 3348 | 3370 |
| 3349 RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler, | 3371 RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler, |
| 3350 RegExpNode* on_success) { | 3372 RegExpNode* on_success) { |
| 3351 ZoneList<TextElement>* elms = new ZoneList<TextElement>(1); | 3373 ZoneList<TextElement>* elms = new ZoneList<TextElement>(1); |
| 3352 elms->Add(TextElement::Atom(this)); | 3374 elms->Add(TextElement::Atom(this)); |
| 3353 return new TextNode(elms, on_success); | 3375 return new TextNode(elms, on_success); |
| 3354 } | 3376 } |
| 3355 | 3377 |
| 3356 | 3378 |
| 3357 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler, | 3379 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler, |
| 3358 RegExpNode* on_success) { | 3380 RegExpNode* on_success) { |
| 3359 return new TextNode(elements(), on_success); | 3381 return new TextNode(elements(), on_success); |
| 3360 } | 3382 } |
| 3361 | 3383 |
| 3384 static bool CompareInverseRanges(ZoneList<CharacterRange>* ranges, | |
| 3385 const uc16* special_class, | |
| 3386 int length) { | |
| 3387 ASSERT(ranges->length() != 0); | |
| 3388 ASSERT(length != 0); | |
| 3389 ASSERT(special_class[0] != 0); | |
| 3390 if (ranges->length() != (length>>1)+1) { | |
|
Mads Ager (chromium)
2009/01/02 10:45:16
Space around the binary operators?
| |
| 3391 return false; | |
| 3392 } | |
| 3393 CharacterRange range = ranges->at(0); | |
| 3394 if (range.from() != 0) { | |
| 3395 return false; | |
| 3396 } | |
| 3397 for (int i = 0; i < length; i += 2) { | |
| 3398 if (special_class[i] != (range.to() + 1)) { | |
| 3399 return false; | |
| 3400 } | |
| 3401 range = ranges->at((i>>1)+1); | |
|
Mads Ager (chromium)
2009/01/02 10:45:16
Space around the binary operators?
| |
| 3402 if (special_class[i+1] != range.from() - 1) { | |
|
Mads Ager (chromium)
2009/01/02 10:45:16
Space around the binary operator?
| |
| 3403 return false; | |
| 3404 } | |
| 3405 } | |
| 3406 if (range.to() != 0xffff) { | |
| 3407 return false; | |
| 3408 } | |
| 3409 return true; | |
| 3410 } | |
| 3411 | |
| 3412 | |
| 3413 static bool CompareRanges(ZoneList<CharacterRange>* ranges, | |
| 3414 const uc16* special_class, | |
| 3415 int length) { | |
| 3416 if (ranges->length() * 2 != length) { | |
| 3417 return false; | |
| 3418 } | |
| 3419 for (int i = 0; i < length; i+=2) { | |
|
Mads Ager (chromium)
2009/01/02 10:45:16
Space around the '+=' for consistency?
| |
| 3420 CharacterRange range = ranges->at(i >> 1); | |
| 3421 if (range.from() != special_class[i] || range.to() != special_class[i+1]) { | |
| 3422 return false; | |
| 3423 } | |
| 3424 } | |
| 3425 return true; | |
| 3426 } | |
| 3427 | |
| 3428 | |
| 3429 bool RegExpCharacterClass::is_standard() { | |
| 3430 // TODO(lrn): Remove need for this function, by not throwing away information | |
| 3431 // along the way. | |
| 3432 if (is_negated_) { | |
| 3433 return false; | |
| 3434 } | |
| 3435 if (set_.is_standard()) { | |
| 3436 return true; | |
| 3437 } | |
| 3438 if (CompareRanges(set_.ranges(), kSpaceRanges, kSpaceRangeCount)) { | |
| 3439 set_.set_standard_set_type('s'); | |
| 3440 return true; | |
| 3441 } | |
| 3442 if (CompareInverseRanges(set_.ranges(), kSpaceRanges, kSpaceRangeCount)) { | |
| 3443 set_.set_standard_set_type('S'); | |
| 3444 return true; | |
| 3445 } | |
| 3446 if (CompareInverseRanges(set_.ranges(), | |
| 3447 kLineTerminatorRanges, | |
| 3448 kLineTerminatorRangeCount)) { | |
| 3449 set_.set_standard_set_type('.'); | |
| 3450 return true; | |
| 3451 } | |
| 3452 return false; | |
| 3453 } | |
| 3454 | |
| 3362 | 3455 |
| 3363 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, | 3456 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, |
| 3364 RegExpNode* on_success) { | 3457 RegExpNode* on_success) { |
| 3365 return new TextNode(this, on_success); | 3458 return new TextNode(this, on_success); |
| 3366 } | 3459 } |
| 3367 | 3460 |
| 3368 | 3461 |
| 3369 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler, | 3462 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler, |
| 3370 RegExpNode* on_success) { | 3463 RegExpNode* on_success) { |
| 3371 ZoneList<RegExpTree*>* alternatives = this->alternatives(); | 3464 ZoneList<RegExpTree*>* alternatives = this->alternatives(); |
| (...skipping 221 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3593 RegExpNode* on_success) { | 3686 RegExpNode* on_success) { |
| 3594 ZoneList<RegExpTree*>* children = nodes(); | 3687 ZoneList<RegExpTree*>* children = nodes(); |
| 3595 RegExpNode* current = on_success; | 3688 RegExpNode* current = on_success; |
| 3596 for (int i = children->length() - 1; i >= 0; i--) { | 3689 for (int i = children->length() - 1; i >= 0; i--) { |
| 3597 current = children->at(i)->ToNode(compiler, current); | 3690 current = children->at(i)->ToNode(compiler, current); |
| 3598 } | 3691 } |
| 3599 return current; | 3692 return current; |
| 3600 } | 3693 } |
| 3601 | 3694 |
| 3602 | 3695 |
| 3603 static const int kSpaceRangeCount = 20; | |
| 3604 static const uc16 kSpaceRanges[kSpaceRangeCount] = { | |
| 3605 0x0009, 0x000D, 0x0020, 0x0020, 0x00A0, 0x00A0, 0x1680, | |
| 3606 0x1680, 0x180E, 0x180E, 0x2000, 0x200A, 0x2028, 0x2029, | |
| 3607 0x202F, 0x202F, 0x205F, 0x205F, 0x3000, 0x3000 | |
| 3608 }; | |
| 3609 | |
| 3610 | |
| 3611 static const int kWordRangeCount = 8; | |
| 3612 static const uc16 kWordRanges[kWordRangeCount] = { | |
| 3613 '0', '9', 'A', 'Z', '_', '_', 'a', 'z' | |
| 3614 }; | |
| 3615 | |
| 3616 | |
| 3617 static const int kDigitRangeCount = 2; | |
| 3618 static const uc16 kDigitRanges[kDigitRangeCount] = { | |
| 3619 '0', '9' | |
| 3620 }; | |
| 3621 | |
| 3622 | |
| 3623 static const int kLineTerminatorRangeCount = 6; | |
| 3624 static const uc16 kLineTerminatorRanges[kLineTerminatorRangeCount] = { | |
| 3625 0x000A, 0x000A, 0x000D, 0x000D, 0x2028, 0x2029 | |
| 3626 }; | |
| 3627 | |
| 3628 | |
| 3629 static void AddClass(const uc16* elmv, | 3696 static void AddClass(const uc16* elmv, |
| 3630 int elmc, | 3697 int elmc, |
| 3631 ZoneList<CharacterRange>* ranges) { | 3698 ZoneList<CharacterRange>* ranges) { |
| 3632 for (int i = 0; i < elmc; i += 2) { | 3699 for (int i = 0; i < elmc; i += 2) { |
| 3633 ASSERT(elmv[i] <= elmv[i + 1]); | 3700 ASSERT(elmv[i] <= elmv[i + 1]); |
| 3634 ranges->Add(CharacterRange(elmv[i], elmv[i + 1])); | 3701 ranges->Add(CharacterRange(elmv[i], elmv[i + 1])); |
| 3635 } | 3702 } |
| 3636 } | 3703 } |
| 3637 | 3704 |
| 3638 | 3705 |
| (...skipping 175 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3814 } | 3881 } |
| 3815 } | 3882 } |
| 3816 start = pos = block_end + 1; | 3883 start = pos = block_end + 1; |
| 3817 } | 3884 } |
| 3818 } else { | 3885 } else { |
| 3819 // TODO(plesner) when we've fixed the 2^11 bug in unibrow. | 3886 // TODO(plesner) when we've fixed the 2^11 bug in unibrow. |
| 3820 } | 3887 } |
| 3821 } | 3888 } |
| 3822 | 3889 |
| 3823 | 3890 |
| 3891 ZoneList<CharacterRange>* CharacterSet::ranges() { | |
| 3892 if (ranges_ == NULL) { | |
| 3893 ranges_ = new ZoneList<CharacterRange>(2); | |
| 3894 CharacterRange::AddClassEscape(standard_set_type_, ranges_); | |
| 3895 } | |
| 3896 return ranges_; | |
| 3897 } | |
| 3898 | |
| 3899 | |
| 3900 | |
| 3824 // ------------------------------------------------------------------- | 3901 // ------------------------------------------------------------------- |
| 3825 // Interest propagation | 3902 // Interest propagation |
| 3826 | 3903 |
| 3827 | 3904 |
| 3828 RegExpNode* RegExpNode::TryGetSibling(NodeInfo* info) { | 3905 RegExpNode* RegExpNode::TryGetSibling(NodeInfo* info) { |
| 3829 for (int i = 0; i < siblings_.length(); i++) { | 3906 for (int i = 0; i < siblings_.length(); i++) { |
| 3830 RegExpNode* sibling = siblings_.Get(i); | 3907 RegExpNode* sibling = siblings_.Get(i); |
| 3831 if (sibling->info()->Matches(info)) | 3908 if (sibling->info()->Matches(info)) |
| 3832 return sibling; | 3909 return sibling; |
| 3833 } | 3910 } |
| (...skipping 481 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4315 EmbeddedVector<byte, 1024> codes; | 4392 EmbeddedVector<byte, 1024> codes; |
| 4316 RegExpMacroAssemblerIrregexp macro_assembler(codes); | 4393 RegExpMacroAssemblerIrregexp macro_assembler(codes); |
| 4317 return compiler.Assemble(¯o_assembler, | 4394 return compiler.Assemble(¯o_assembler, |
| 4318 node, | 4395 node, |
| 4319 data->capture_count, | 4396 data->capture_count, |
| 4320 pattern); | 4397 pattern); |
| 4321 } | 4398 } |
| 4322 | 4399 |
| 4323 | 4400 |
| 4324 }} // namespace v8::internal | 4401 }} // namespace v8::internal |
| OLD | NEW |