| OLD | NEW |
| 1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "src/regexp/jsregexp.h" | 5 #include "src/regexp/jsregexp.h" |
| 6 | 6 |
| 7 #include <memory> | 7 #include <memory> |
| 8 | 8 |
| 9 #include "src/base/platform/platform.h" | 9 #include "src/base/platform/platform.h" |
| 10 #include "src/compilation-cache.h" | 10 #include "src/compilation-cache.h" |
| (...skipping 3309 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3320 } | 3320 } |
| 3321 } | 3321 } |
| 3322 | 3322 |
| 3323 | 3323 |
| 3324 TextNode* TextNode::CreateForCharacterRanges(Zone* zone, | 3324 TextNode* TextNode::CreateForCharacterRanges(Zone* zone, |
| 3325 ZoneList<CharacterRange>* ranges, | 3325 ZoneList<CharacterRange>* ranges, |
| 3326 bool read_backward, | 3326 bool read_backward, |
| 3327 RegExpNode* on_success) { | 3327 RegExpNode* on_success) { |
| 3328 DCHECK_NOT_NULL(ranges); | 3328 DCHECK_NOT_NULL(ranges); |
| 3329 ZoneList<TextElement>* elms = new (zone) ZoneList<TextElement>(1, zone); | 3329 ZoneList<TextElement>* elms = new (zone) ZoneList<TextElement>(1, zone); |
| 3330 elms->Add( | 3330 elms->Add(TextElement::CharClass(new (zone) RegExpCharacterClass(ranges)), |
| 3331 TextElement::CharClass(new (zone) RegExpCharacterClass(ranges, false)), | 3331 zone); |
| 3332 zone); | |
| 3333 return new (zone) TextNode(elms, read_backward, on_success); | 3332 return new (zone) TextNode(elms, read_backward, on_success); |
| 3334 } | 3333 } |
| 3335 | 3334 |
| 3336 | 3335 |
| 3337 TextNode* TextNode::CreateForSurrogatePair(Zone* zone, CharacterRange lead, | 3336 TextNode* TextNode::CreateForSurrogatePair(Zone* zone, CharacterRange lead, |
| 3338 CharacterRange trail, | 3337 CharacterRange trail, |
| 3339 bool read_backward, | 3338 bool read_backward, |
| 3340 RegExpNode* on_success) { | 3339 RegExpNode* on_success) { |
| 3341 ZoneList<CharacterRange>* lead_ranges = CharacterRange::List(zone, lead); | 3340 ZoneList<CharacterRange>* lead_ranges = CharacterRange::List(zone, lead); |
| 3342 ZoneList<CharacterRange>* trail_ranges = CharacterRange::List(zone, trail); | 3341 ZoneList<CharacterRange>* trail_ranges = CharacterRange::List(zone, trail); |
| 3343 ZoneList<TextElement>* elms = new (zone) ZoneList<TextElement>(2, zone); | 3342 ZoneList<TextElement>* elms = new (zone) ZoneList<TextElement>(2, zone); |
| 3344 elms->Add(TextElement::CharClass( | 3343 elms->Add( |
| 3345 new (zone) RegExpCharacterClass(lead_ranges, false)), | 3344 TextElement::CharClass(new (zone) RegExpCharacterClass(lead_ranges)), |
| 3346 zone); | 3345 zone); |
| 3347 elms->Add(TextElement::CharClass( | 3346 elms->Add( |
| 3348 new (zone) RegExpCharacterClass(trail_ranges, false)), | 3347 TextElement::CharClass(new (zone) RegExpCharacterClass(trail_ranges)), |
| 3349 zone); | 3348 zone); |
| 3350 return new (zone) TextNode(elms, read_backward, on_success); | 3349 return new (zone) TextNode(elms, read_backward, on_success); |
| 3351 } | 3350 } |
| 3352 | 3351 |
| 3353 | 3352 |
| 3354 // This generates the code to match a text node. A text node can contain | 3353 // This generates the code to match a text node. A text node can contain |
| 3355 // straight character sequences (possibly to be matched in a case-independent | 3354 // straight character sequences (possibly to be matched in a case-independent |
| 3356 // way) and character classes. For efficiency we do not do this in a single | 3355 // way) and character classes. For efficiency we do not do this in a single |
| 3357 // pass from left to right. Instead we pass over the text node several times, | 3356 // pass from left to right. Instead we pass over the text node several times, |
| 3358 // emitting code for some character positions every time. See the comment on | 3357 // emitting code for some character positions every time. See the comment on |
| 3359 // TextEmitPass for details. | 3358 // TextEmitPass for details. |
| (...skipping 1484 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4844 return false; | 4843 return false; |
| 4845 } | 4844 } |
| 4846 } | 4845 } |
| 4847 return true; | 4846 return true; |
| 4848 } | 4847 } |
| 4849 | 4848 |
| 4850 | 4849 |
| 4851 bool RegExpCharacterClass::is_standard(Zone* zone) { | 4850 bool RegExpCharacterClass::is_standard(Zone* zone) { |
| 4852 // TODO(lrn): Remove need for this function, by not throwing away information | 4851 // TODO(lrn): Remove need for this function, by not throwing away information |
| 4853 // along the way. | 4852 // along the way. |
| 4854 if (is_negated_) { | 4853 if (is_negated()) { |
| 4855 return false; | 4854 return false; |
| 4856 } | 4855 } |
| 4857 if (set_.is_standard()) { | 4856 if (set_.is_standard()) { |
| 4858 return true; | 4857 return true; |
| 4859 } | 4858 } |
| 4860 if (CompareRanges(set_.ranges(zone), kSpaceRanges, kSpaceRangeCount)) { | 4859 if (CompareRanges(set_.ranges(zone), kSpaceRanges, kSpaceRangeCount)) { |
| 4861 set_.set_standard_set_type('s'); | 4860 set_.set_standard_set_type('s'); |
| 4862 return true; | 4861 return true; |
| 4863 } | 4862 } |
| 4864 if (CompareInverseRanges(set_.ranges(zone), kSpaceRanges, kSpaceRangeCount)) { | 4863 if (CompareInverseRanges(set_.ranges(zone), kSpaceRanges, kSpaceRangeCount)) { |
| (...skipping 272 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5137 | 5136 |
| 5138 | 5137 |
| 5139 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, | 5138 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, |
| 5140 RegExpNode* on_success) { | 5139 RegExpNode* on_success) { |
| 5141 set_.Canonicalize(); | 5140 set_.Canonicalize(); |
| 5142 Zone* zone = compiler->zone(); | 5141 Zone* zone = compiler->zone(); |
| 5143 ZoneList<CharacterRange>* ranges = this->ranges(zone); | 5142 ZoneList<CharacterRange>* ranges = this->ranges(zone); |
| 5144 if (compiler->needs_unicode_case_equivalents()) { | 5143 if (compiler->needs_unicode_case_equivalents()) { |
| 5145 AddUnicodeCaseEquivalents(ranges, zone); | 5144 AddUnicodeCaseEquivalents(ranges, zone); |
| 5146 } | 5145 } |
| 5147 if (compiler->unicode() && !compiler->one_byte()) { | 5146 if (compiler->unicode() && !compiler->one_byte() && |
| 5147 !contains_split_surrogate()) { |
| 5148 if (is_negated()) { | 5148 if (is_negated()) { |
| 5149 ZoneList<CharacterRange>* negated = | 5149 ZoneList<CharacterRange>* negated = |
| 5150 new (zone) ZoneList<CharacterRange>(2, zone); | 5150 new (zone) ZoneList<CharacterRange>(2, zone); |
| 5151 CharacterRange::Negate(ranges, negated, zone); | 5151 CharacterRange::Negate(ranges, negated, zone); |
| 5152 ranges = negated; | 5152 ranges = negated; |
| 5153 } | 5153 } |
| 5154 if (ranges->length() == 0) { | 5154 if (ranges->length() == 0) { |
| 5155 ranges->Add(CharacterRange::Everything(), zone); | 5155 ranges->Add(CharacterRange::Everything(), zone); |
| 5156 RegExpCharacterClass* fail = | 5156 RegExpCharacterClass* fail = |
| 5157 new (zone) RegExpCharacterClass(ranges, true); | 5157 new (zone) RegExpCharacterClass(ranges, NEGATED); |
| 5158 return new (zone) TextNode(fail, compiler->read_backward(), on_success); | 5158 return new (zone) TextNode(fail, compiler->read_backward(), on_success); |
| 5159 } | 5159 } |
| 5160 if (standard_type() == '*') { | 5160 if (standard_type() == '*') { |
| 5161 return UnanchoredAdvance(compiler, on_success); | 5161 return UnanchoredAdvance(compiler, on_success); |
| 5162 } else { | 5162 } else { |
| 5163 ChoiceNode* result = new (zone) ChoiceNode(2, zone); | 5163 ChoiceNode* result = new (zone) ChoiceNode(2, zone); |
| 5164 UnicodeRangeSplitter splitter(zone, ranges); | 5164 UnicodeRangeSplitter splitter(zone, ranges); |
| 5165 AddBmpCharacters(compiler, result, on_success, &splitter); | 5165 AddBmpCharacters(compiler, result, on_success, &splitter); |
| 5166 AddNonBmpSurrogatePairs(compiler, result, on_success, &splitter); | 5166 AddNonBmpSurrogatePairs(compiler, result, on_success, &splitter); |
| 5167 AddLoneLeadSurrogates(compiler, result, on_success, &splitter); | 5167 AddLoneLeadSurrogates(compiler, result, on_success, &splitter); |
| (...skipping 193 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5361 alternatives->at(write_posn++) = alternatives->at(i); | 5361 alternatives->at(write_posn++) = alternatives->at(i); |
| 5362 i++; | 5362 i++; |
| 5363 continue; | 5363 continue; |
| 5364 } | 5364 } |
| 5365 RegExpAtom* atom = alternative->AsAtom(); | 5365 RegExpAtom* atom = alternative->AsAtom(); |
| 5366 if (atom->length() != 1) { | 5366 if (atom->length() != 1) { |
| 5367 alternatives->at(write_posn++) = alternatives->at(i); | 5367 alternatives->at(write_posn++) = alternatives->at(i); |
| 5368 i++; | 5368 i++; |
| 5369 continue; | 5369 continue; |
| 5370 } | 5370 } |
| 5371 DCHECK(!unibrow::Utf16::IsLeadSurrogate(atom->data().at(0))); |
| 5372 bool contains_trail_surrogate = |
| 5373 unibrow::Utf16::IsTrailSurrogate(atom->data().at(0)); |
| 5371 int first_in_run = i; | 5374 int first_in_run = i; |
| 5372 i++; | 5375 i++; |
| 5373 while (i < length) { | 5376 while (i < length) { |
| 5374 alternative = alternatives->at(i); | 5377 alternative = alternatives->at(i); |
| 5375 if (!alternative->IsAtom()) break; | 5378 if (!alternative->IsAtom()) break; |
| 5376 atom = alternative->AsAtom(); | 5379 atom = alternative->AsAtom(); |
| 5377 if (atom->length() != 1) break; | 5380 if (atom->length() != 1) break; |
| 5381 DCHECK(!unibrow::Utf16::IsLeadSurrogate(atom->data().at(0))); |
| 5382 contains_trail_surrogate |= |
| 5383 unibrow::Utf16::IsTrailSurrogate(atom->data().at(0)); |
| 5378 i++; | 5384 i++; |
| 5379 } | 5385 } |
| 5380 if (i > first_in_run + 1) { | 5386 if (i > first_in_run + 1) { |
| 5381 // Found non-trivial run of single-character alternatives. | 5387 // Found non-trivial run of single-character alternatives. |
| 5382 int run_length = i - first_in_run; | 5388 int run_length = i - first_in_run; |
| 5383 ZoneList<CharacterRange>* ranges = | 5389 ZoneList<CharacterRange>* ranges = |
| 5384 new (zone) ZoneList<CharacterRange>(2, zone); | 5390 new (zone) ZoneList<CharacterRange>(2, zone); |
| 5385 for (int j = 0; j < run_length; j++) { | 5391 for (int j = 0; j < run_length; j++) { |
| 5386 RegExpAtom* old_atom = alternatives->at(j + first_in_run)->AsAtom(); | 5392 RegExpAtom* old_atom = alternatives->at(j + first_in_run)->AsAtom(); |
| 5387 DCHECK_EQ(old_atom->length(), 1); | 5393 DCHECK_EQ(old_atom->length(), 1); |
| 5388 ranges->Add(CharacterRange::Singleton(old_atom->data().at(0)), zone); | 5394 ranges->Add(CharacterRange::Singleton(old_atom->data().at(0)), zone); |
| 5389 } | 5395 } |
| 5396 RegExpCharacterClass::Flags flags; |
| 5397 if (compiler->unicode() && contains_trail_surrogate) { |
| 5398 flags = RegExpCharacterClass::CONTAINS_SPLIT_SURROGATE; |
| 5399 } |
| 5390 alternatives->at(write_posn++) = | 5400 alternatives->at(write_posn++) = |
| 5391 new (zone) RegExpCharacterClass(ranges, false); | 5401 new (zone) RegExpCharacterClass(ranges, flags); |
| 5392 } else { | 5402 } else { |
| 5393 // Just copy any trivial alternatives. | 5403 // Just copy any trivial alternatives. |
| 5394 for (int j = first_in_run; j < i; j++) { | 5404 for (int j = first_in_run; j < i; j++) { |
| 5395 alternatives->at(write_posn++) = alternatives->at(j); | 5405 alternatives->at(write_posn++) = alternatives->at(j); |
| 5396 } | 5406 } |
| 5397 } | 5407 } |
| 5398 } | 5408 } |
| 5399 alternatives->Rewind(write_posn); // Trim end of array. | 5409 alternatives->Rewind(write_posn); // Trim end of array. |
| 5400 } | 5410 } |
| 5401 | 5411 |
| (...skipping 1520 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6922 | 6932 |
| 6923 | 6933 |
| 6924 void RegExpResultsCache::Clear(FixedArray* cache) { | 6934 void RegExpResultsCache::Clear(FixedArray* cache) { |
| 6925 for (int i = 0; i < kRegExpResultsCacheSize; i++) { | 6935 for (int i = 0; i < kRegExpResultsCacheSize; i++) { |
| 6926 cache->set(i, Smi::kZero); | 6936 cache->set(i, Smi::kZero); |
| 6927 } | 6937 } |
| 6928 } | 6938 } |
| 6929 | 6939 |
| 6930 } // namespace internal | 6940 } // namespace internal |
| 6931 } // namespace v8 | 6941 } // namespace v8 |
| OLD | NEW |