Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(47)

Side by Side Diff: src/regexp/jsregexp.cc

Issue 1801573002: Version 5.0.71.17 (cherry-pick) (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@5.0
Patch Set: Created 4 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/regexp/jsregexp.h ('k') | src/regexp/regexp-ast.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/regexp/jsregexp.h" 5 #include "src/regexp/jsregexp.h"
6 6
7 #include "src/ast/ast.h" 7 #include "src/ast/ast.h"
8 #include "src/base/platform/platform.h" 8 #include "src/base/platform/platform.h"
9 #include "src/compilation-cache.h" 9 #include "src/compilation-cache.h"
10 #include "src/compiler.h" 10 #include "src/compiler.h"
(...skipping 4881 matching lines...) Expand 10 before | Expand all | Expand 10 after
4892 // - Lone trail surrogates. 4892 // - Lone trail surrogates.
4893 // Lone surrogates are valid code points, even though no actual characters. 4893 // Lone surrogates are valid code points, even though no actual characters.
4894 // They require special matching to make sure we do not split surrogate pairs. 4894 // They require special matching to make sure we do not split surrogate pairs.
4895 // We use the dispatch table to accomplish this. The base range is split up 4895 // We use the dispatch table to accomplish this. The base range is split up
4896 // by the table by the overlay ranges, and the Call callback is used to 4896 // by the table by the overlay ranges, and the Call callback is used to
4897 // filter and collect ranges for each category. 4897 // filter and collect ranges for each category.
4898 for (int i = 0; i < base->length(); i++) { 4898 for (int i = 0; i < base->length(); i++) {
4899 table_.AddRange(base->at(i), kBase, zone_); 4899 table_.AddRange(base->at(i), kBase, zone_);
4900 } 4900 }
4901 // Add overlay ranges. 4901 // Add overlay ranges.
4902 table_.AddRange(CharacterRange(0, kLeadSurrogateStart - 1), kBmpCodePoints, 4902 table_.AddRange(CharacterRange::Range(0, kLeadSurrogateStart - 1),
4903 zone_); 4903 kBmpCodePoints, zone_);
4904 table_.AddRange(CharacterRange(kLeadSurrogateStart, kLeadSurrogateEnd), 4904 table_.AddRange(CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd),
4905 kLeadSurrogates, zone_); 4905 kLeadSurrogates, zone_);
4906 table_.AddRange(CharacterRange(kTrailSurrogateStart, kTrailSurrogateEnd), 4906 table_.AddRange(
4907 kTrailSurrogates, zone_); 4907 CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd),
4908 table_.AddRange(CharacterRange(kTrailSurrogateEnd + 1, kNonBmpStart - 1), 4908 kTrailSurrogates, zone_);
4909 kBmpCodePoints, zone_); 4909 table_.AddRange(
4910 table_.AddRange(CharacterRange(kNonBmpStart, kNonBmpEnd), kNonBmpCodePoints, 4910 CharacterRange::Range(kTrailSurrogateEnd + 1, kNonBmpStart - 1),
4911 zone_); 4911 kBmpCodePoints, zone_);
4912 table_.AddRange(CharacterRange::Range(kNonBmpStart, kNonBmpEnd),
4913 kNonBmpCodePoints, zone_);
4912 table_.ForEach(this); 4914 table_.ForEach(this);
4913 } 4915 }
4914 4916
4915 4917
4916 void UnicodeRangeSplitter::Call(uc32 from, DispatchTable::Entry entry) { 4918 void UnicodeRangeSplitter::Call(uc32 from, DispatchTable::Entry entry) {
4917 OutSet* outset = entry.out_set(); 4919 OutSet* outset = entry.out_set();
4918 if (!outset->Get(kBase)) return; 4920 if (!outset->Get(kBase)) return;
4919 ZoneList<CharacterRange>** target = NULL; 4921 ZoneList<CharacterRange>** target = NULL;
4920 if (outset->Get(kBmpCodePoints)) { 4922 if (outset->Get(kBmpCodePoints)) {
4921 target = &bmp_; 4923 target = &bmp_;
(...skipping 866 matching lines...) Expand 10 before | Expand all | Expand 10 after
5788 5790
5789 5791
5790 static void AddClass(const int* elmv, 5792 static void AddClass(const int* elmv,
5791 int elmc, 5793 int elmc,
5792 ZoneList<CharacterRange>* ranges, 5794 ZoneList<CharacterRange>* ranges,
5793 Zone* zone) { 5795 Zone* zone) {
5794 elmc--; 5796 elmc--;
5795 DCHECK(elmv[elmc] == kRangeEndMarker); 5797 DCHECK(elmv[elmc] == kRangeEndMarker);
5796 for (int i = 0; i < elmc; i += 2) { 5798 for (int i = 0; i < elmc; i += 2) {
5797 DCHECK(elmv[i] < elmv[i + 1]); 5799 DCHECK(elmv[i] < elmv[i + 1]);
5798 ranges->Add(CharacterRange(elmv[i], elmv[i + 1] - 1), zone); 5800 ranges->Add(CharacterRange::Range(elmv[i], elmv[i + 1] - 1), zone);
5799 } 5801 }
5800 } 5802 }
5801 5803
5802 5804
5803 static void AddClassNegated(const int *elmv, 5805 static void AddClassNegated(const int *elmv,
5804 int elmc, 5806 int elmc,
5805 ZoneList<CharacterRange>* ranges, 5807 ZoneList<CharacterRange>* ranges,
5806 Zone* zone) { 5808 Zone* zone) {
5807 elmc--; 5809 elmc--;
5808 DCHECK(elmv[elmc] == kRangeEndMarker); 5810 DCHECK(elmv[elmc] == kRangeEndMarker);
5809 DCHECK(elmv[0] != 0x0000); 5811 DCHECK(elmv[0] != 0x0000);
5810 DCHECK(elmv[elmc - 1] != String::kMaxCodePoint); 5812 DCHECK(elmv[elmc - 1] != String::kMaxCodePoint);
5811 uc16 last = 0x0000; 5813 uc16 last = 0x0000;
5812 for (int i = 0; i < elmc; i += 2) { 5814 for (int i = 0; i < elmc; i += 2) {
5813 DCHECK(last <= elmv[i] - 1); 5815 DCHECK(last <= elmv[i] - 1);
5814 DCHECK(elmv[i] < elmv[i + 1]); 5816 DCHECK(elmv[i] < elmv[i + 1]);
5815 ranges->Add(CharacterRange(last, elmv[i] - 1), zone); 5817 ranges->Add(CharacterRange::Range(last, elmv[i] - 1), zone);
5816 last = elmv[i + 1]; 5818 last = elmv[i + 1];
5817 } 5819 }
5818 ranges->Add(CharacterRange(last, String::kMaxCodePoint), zone); 5820 ranges->Add(CharacterRange::Range(last, String::kMaxCodePoint), zone);
5819 } 5821 }
5820 5822
5821 5823
5822 void CharacterRange::AddClassEscape(uc16 type, 5824 void CharacterRange::AddClassEscape(uc16 type,
5823 ZoneList<CharacterRange>* ranges, 5825 ZoneList<CharacterRange>* ranges,
5824 Zone* zone) { 5826 Zone* zone) {
5825 switch (type) { 5827 switch (type) {
5826 case 's': 5828 case 's':
5827 AddClass(kSpaceRanges, kSpaceRangeCount, ranges, zone); 5829 AddClass(kSpaceRanges, kSpaceRangeCount, ranges, zone);
5828 break; 5830 break;
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after
5926 block_end = equivalents[0]; 5928 block_end = equivalents[0];
5927 } 5929 }
5928 int end = (block_end > top) ? top : block_end; 5930 int end = (block_end > top) ? top : block_end;
5929 length = isolate->jsregexp_uncanonicalize()->get(block_end, '\0', 5931 length = isolate->jsregexp_uncanonicalize()->get(block_end, '\0',
5930 equivalents); 5932 equivalents);
5931 for (int i = 0; i < length; i++) { 5933 for (int i = 0; i < length; i++) {
5932 uc32 c = equivalents[i]; 5934 uc32 c = equivalents[i];
5933 uc32 range_from = c - (block_end - pos); 5935 uc32 range_from = c - (block_end - pos);
5934 uc32 range_to = c - (block_end - end); 5936 uc32 range_to = c - (block_end - end);
5935 if (!(bottom <= range_from && range_to <= top)) { 5937 if (!(bottom <= range_from && range_to <= top)) {
5936 ranges->Add(CharacterRange(range_from, range_to), zone); 5938 ranges->Add(CharacterRange::Range(range_from, range_to), zone);
5937 } 5939 }
5938 } 5940 }
5939 pos = end + 1; 5941 pos = end + 1;
5940 } 5942 }
5941 } 5943 }
5942 } 5944 }
5943 } 5945 }
5944 5946
5945 5947
5946 bool CharacterRange::IsCanonical(ZoneList<CharacterRange>* ranges) { 5948 bool CharacterRange::IsCanonical(ZoneList<CharacterRange>* ranges) {
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after
6020 MoveRanges(list, start_pos, start_pos + 1, count - start_pos); 6022 MoveRanges(list, start_pos, start_pos + 1, count - start_pos);
6021 } 6023 }
6022 list->at(start_pos) = insert; 6024 list->at(start_pos) = insert;
6023 return count + 1; 6025 return count + 1;
6024 } 6026 }
6025 if (start_pos + 1 == end_pos) { 6027 if (start_pos + 1 == end_pos) {
6026 // Replace single existing range at position start_pos. 6028 // Replace single existing range at position start_pos.
6027 CharacterRange to_replace = list->at(start_pos); 6029 CharacterRange to_replace = list->at(start_pos);
6028 int new_from = Min(to_replace.from(), from); 6030 int new_from = Min(to_replace.from(), from);
6029 int new_to = Max(to_replace.to(), to); 6031 int new_to = Max(to_replace.to(), to);
6030 list->at(start_pos) = CharacterRange(new_from, new_to); 6032 list->at(start_pos) = CharacterRange::Range(new_from, new_to);
6031 return count; 6033 return count;
6032 } 6034 }
6033 // Replace a number of existing ranges from start_pos to end_pos - 1. 6035 // Replace a number of existing ranges from start_pos to end_pos - 1.
6034 // Move the remaining ranges down. 6036 // Move the remaining ranges down.
6035 6037
6036 int new_from = Min(list->at(start_pos).from(), from); 6038 int new_from = Min(list->at(start_pos).from(), from);
6037 int new_to = Max(list->at(end_pos - 1).to(), to); 6039 int new_to = Max(list->at(end_pos - 1).to(), to);
6038 if (end_pos < count) { 6040 if (end_pos < count) {
6039 MoveRanges(list, end_pos, start_pos + 1, count - end_pos); 6041 MoveRanges(list, end_pos, start_pos + 1, count - end_pos);
6040 } 6042 }
6041 list->at(start_pos) = CharacterRange(new_from, new_to); 6043 list->at(start_pos) = CharacterRange::Range(new_from, new_to);
6042 return count - (end_pos - start_pos) + 1; 6044 return count - (end_pos - start_pos) + 1;
6043 } 6045 }
6044 6046
6045 6047
6046 void CharacterSet::Canonicalize() { 6048 void CharacterSet::Canonicalize() {
6047 // Special/default classes are always considered canonical. The result 6049 // Special/default classes are always considered canonical. The result
6048 // of calling ranges() will be sorted. 6050 // of calling ranges() will be sorted.
6049 if (ranges_ == NULL) return; 6051 if (ranges_ == NULL) return;
6050 CharacterRange::Canonicalize(ranges_); 6052 CharacterRange::Canonicalize(ranges_);
6051 } 6053 }
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
6090 6092
6091 void CharacterRange::Negate(ZoneList<CharacterRange>* ranges, 6093 void CharacterRange::Negate(ZoneList<CharacterRange>* ranges,
6092 ZoneList<CharacterRange>* negated_ranges, 6094 ZoneList<CharacterRange>* negated_ranges,
6093 Zone* zone) { 6095 Zone* zone) {
6094 DCHECK(CharacterRange::IsCanonical(ranges)); 6096 DCHECK(CharacterRange::IsCanonical(ranges));
6095 DCHECK_EQ(0, negated_ranges->length()); 6097 DCHECK_EQ(0, negated_ranges->length());
6096 int range_count = ranges->length(); 6098 int range_count = ranges->length();
6097 uc32 from = 0; 6099 uc32 from = 0;
6098 int i = 0; 6100 int i = 0;
6099 if (range_count > 0 && ranges->at(0).from() == 0) { 6101 if (range_count > 0 && ranges->at(0).from() == 0) {
6100 from = ranges->at(0).to(); 6102 from = ranges->at(0).to() + 1;
6101 i = 1; 6103 i = 1;
6102 } 6104 }
6103 while (i < range_count) { 6105 while (i < range_count) {
6104 CharacterRange range = ranges->at(i); 6106 CharacterRange range = ranges->at(i);
6105 negated_ranges->Add(CharacterRange(from + 1, range.from() - 1), zone); 6107 negated_ranges->Add(CharacterRange::Range(from, range.from() - 1), zone);
6106 from = range.to(); 6108 from = range.to() + 1;
6107 i++; 6109 i++;
6108 } 6110 }
6109 if (from < String::kMaxCodePoint) { 6111 if (from < String::kMaxCodePoint) {
6110 negated_ranges->Add(CharacterRange(from + 1, String::kMaxCodePoint), zone); 6112 negated_ranges->Add(CharacterRange::Range(from, String::kMaxCodePoint),
6113 zone);
6111 } 6114 }
6112 } 6115 }
6113 6116
6114 6117
6115 // ------------------------------------------------------------------- 6118 // -------------------------------------------------------------------
6116 // Splay tree 6119 // Splay tree
6117 6120
6118 6121
6119 OutSet* OutSet::Extend(unsigned value, Zone* zone) { 6122 OutSet* OutSet::Extend(unsigned value, Zone* zone) {
6120 if (Get(value)) 6123 if (Get(value))
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after
6179 ZoneSplayTree<Config>::Locator loc; 6182 ZoneSplayTree<Config>::Locator loc;
6180 if (tree()->FindGreatestLessThan(current.from(), &loc)) { 6183 if (tree()->FindGreatestLessThan(current.from(), &loc)) {
6181 Entry* entry = &loc.value(); 6184 Entry* entry = &loc.value();
6182 // If we've found a range that overlaps with this one, and it 6185 // If we've found a range that overlaps with this one, and it
6183 // starts strictly to the left of this one, we have to fix it 6186 // starts strictly to the left of this one, we have to fix it
6184 // because the following code only handles ranges that start on 6187 // because the following code only handles ranges that start on
6185 // or after the start point of the range we're adding. 6188 // or after the start point of the range we're adding.
6186 if (entry->from() < current.from() && entry->to() >= current.from()) { 6189 if (entry->from() < current.from() && entry->to() >= current.from()) {
6187 // Snap the overlapping range in half around the start point of 6190 // Snap the overlapping range in half around the start point of
6188 // the range we're adding. 6191 // the range we're adding.
6189 CharacterRange left(entry->from(), current.from() - 1); 6192 CharacterRange left =
6190 CharacterRange right(current.from(), entry->to()); 6193 CharacterRange::Range(entry->from(), current.from() - 1);
6194 CharacterRange right = CharacterRange::Range(current.from(), entry->to());
6191 // The left part of the overlapping range doesn't overlap. 6195 // The left part of the overlapping range doesn't overlap.
6192 // Truncate the whole entry to be just the left part. 6196 // Truncate the whole entry to be just the left part.
6193 entry->set_to(left.to()); 6197 entry->set_to(left.to());
6194 // The right part is the one that overlaps. We add this part 6198 // The right part is the one that overlaps. We add this part
6195 // to the map and let the next step deal with merging it with 6199 // to the map and let the next step deal with merging it with
6196 // the range we're adding. 6200 // the range we're adding.
6197 ZoneSplayTree<Config>::Locator loc; 6201 ZoneSplayTree<Config>::Locator loc;
6198 bool inserted = tree()->Insert(right.from(), &loc); 6202 bool inserted = tree()->Insert(right.from(), &loc);
6199 DCHECK(inserted); 6203 DCHECK(inserted);
6200 USE(inserted); 6204 USE(inserted);
(...skipping 285 matching lines...) Expand 10 before | Expand all | Expand 10 after
6486 public: 6490 public:
6487 explicit AddDispatchRange(DispatchTableConstructor* constructor) 6491 explicit AddDispatchRange(DispatchTableConstructor* constructor)
6488 : constructor_(constructor) { } 6492 : constructor_(constructor) { }
6489 void Call(uc32 from, DispatchTable::Entry entry); 6493 void Call(uc32 from, DispatchTable::Entry entry);
6490 private: 6494 private:
6491 DispatchTableConstructor* constructor_; 6495 DispatchTableConstructor* constructor_;
6492 }; 6496 };
6493 6497
6494 6498
6495 void AddDispatchRange::Call(uc32 from, DispatchTable::Entry entry) { 6499 void AddDispatchRange::Call(uc32 from, DispatchTable::Entry entry) {
6496 CharacterRange range(from, entry.to()); 6500 constructor_->AddRange(CharacterRange::Range(from, entry.to()));
6497 constructor_->AddRange(range);
6498 } 6501 }
6499 6502
6500 6503
6501 void DispatchTableConstructor::VisitChoice(ChoiceNode* node) { 6504 void DispatchTableConstructor::VisitChoice(ChoiceNode* node) {
6502 if (node->being_calculated()) 6505 if (node->being_calculated())
6503 return; 6506 return;
6504 DispatchTable* table = node->GetTable(ignore_case_); 6507 DispatchTable* table = node->GetTable(ignore_case_);
6505 AddDispatchRange adder(this); 6508 AddDispatchRange adder(this);
6506 table->ForEach(&adder); 6509 table->ForEach(&adder);
6507 } 6510 }
(...skipping 17 matching lines...) Expand all
6525 return Compare<uc16>(a->from(), b->from()); 6528 return Compare<uc16>(a->from(), b->from());
6526 } 6529 }
6527 6530
6528 6531
6529 void DispatchTableConstructor::AddInverse(ZoneList<CharacterRange>* ranges) { 6532 void DispatchTableConstructor::AddInverse(ZoneList<CharacterRange>* ranges) {
6530 ranges->Sort(CompareRangeByFrom); 6533 ranges->Sort(CompareRangeByFrom);
6531 uc16 last = 0; 6534 uc16 last = 0;
6532 for (int i = 0; i < ranges->length(); i++) { 6535 for (int i = 0; i < ranges->length(); i++) {
6533 CharacterRange range = ranges->at(i); 6536 CharacterRange range = ranges->at(i);
6534 if (last < range.from()) 6537 if (last < range.from())
6535 AddRange(CharacterRange(last, range.from() - 1)); 6538 AddRange(CharacterRange::Range(last, range.from() - 1));
6536 if (range.to() >= last) { 6539 if (range.to() >= last) {
6537 if (range.to() == String::kMaxUtf16CodeUnit) { 6540 if (range.to() == String::kMaxUtf16CodeUnit) {
6538 return; 6541 return;
6539 } else { 6542 } else {
6540 last = range.to() + 1; 6543 last = range.to() + 1;
6541 } 6544 }
6542 } 6545 }
6543 } 6546 }
6544 AddRange(CharacterRange(last, String::kMaxUtf16CodeUnit)); 6547 AddRange(CharacterRange::Range(last, String::kMaxUtf16CodeUnit));
6545 } 6548 }
6546 6549
6547 6550
6548 void DispatchTableConstructor::VisitText(TextNode* that) { 6551 void DispatchTableConstructor::VisitText(TextNode* that) {
6549 TextElement elm = that->elements()->at(0); 6552 TextElement elm = that->elements()->at(0);
6550 switch (elm.text_type()) { 6553 switch (elm.text_type()) {
6551 case TextElement::ATOM: { 6554 case TextElement::ATOM: {
6552 uc16 c = elm.atom()->data()[0]; 6555 uc16 c = elm.atom()->data()[0];
6553 AddRange(CharacterRange(c, c)); 6556 AddRange(CharacterRange::Range(c, c));
6554 break; 6557 break;
6555 } 6558 }
6556 case TextElement::CHAR_CLASS: { 6559 case TextElement::CHAR_CLASS: {
6557 RegExpCharacterClass* tree = elm.char_class(); 6560 RegExpCharacterClass* tree = elm.char_class();
6558 ZoneList<CharacterRange>* ranges = tree->ranges(that->zone()); 6561 ZoneList<CharacterRange>* ranges = tree->ranges(that->zone());
6559 if (tree->is_negated()) { 6562 if (tree->is_negated()) {
6560 AddInverse(ranges); 6563 AddInverse(ranges);
6561 } else { 6564 } else {
6562 for (int i = 0; i < ranges->length(); i++) 6565 for (int i = 0; i < ranges->length(); i++)
6563 AddRange(ranges->at(i)); 6566 AddRange(ranges->at(i));
(...skipping 295 matching lines...) Expand 10 before | Expand all | Expand 10 after
6859 6862
6860 6863
6861 void RegExpResultsCache::Clear(FixedArray* cache) { 6864 void RegExpResultsCache::Clear(FixedArray* cache) {
6862 for (int i = 0; i < kRegExpResultsCacheSize; i++) { 6865 for (int i = 0; i < kRegExpResultsCacheSize; i++) {
6863 cache->set(i, Smi::FromInt(0)); 6866 cache->set(i, Smi::FromInt(0));
6864 } 6867 }
6865 } 6868 }
6866 6869
6867 } // namespace internal 6870 } // namespace internal
6868 } // namespace v8 6871 } // namespace v8
OLDNEW
« no previous file with comments | « src/regexp/jsregexp.h ('k') | src/regexp/regexp-ast.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698