| OLD | NEW |
| 1 // Copyright 2016 the V8 project authors. All rights reserved. | 1 // Copyright 2016 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "src/regexp/regexp-parser.h" | 5 #include "src/regexp/regexp-parser.h" |
| 6 | 6 |
| 7 #include "src/char-predicates-inl.h" | 7 #include "src/char-predicates-inl.h" |
| 8 #include "src/factory.h" | 8 #include "src/factory.h" |
| 9 #include "src/isolate.h" | 9 #include "src/isolate.h" |
| 10 #include "src/objects-inl.h" | 10 #include "src/objects-inl.h" |
| 11 #include "src/regexp/jsregexp.h" | 11 #include "src/regexp/jsregexp.h" |
| 12 #include "src/utils.h" | 12 #include "src/utils.h" |
| 13 | 13 |
| 14 #ifdef V8_I18N_SUPPORT | 14 #ifdef V8_I18N_SUPPORT |
| 15 #include "unicode/uset.h" | 15 #include "unicode/uset.h" |
| 16 #endif // V8_I18N_SUPPORT | 16 #endif // V8_I18N_SUPPORT |
| 17 | 17 |
| 18 namespace v8 { | 18 namespace v8 { |
| 19 namespace internal { | 19 namespace internal { |
| 20 | 20 |
| 21 RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error, | 21 RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error, |
| 22 JSRegExp::Flags flags, Isolate* isolate, Zone* zone) | 22 JSRegExp::Flags flags, Isolate* isolate, Zone* zone) |
| 23 : isolate_(isolate), | 23 : isolate_(isolate), |
| 24 zone_(zone), | 24 zone_(zone), |
| 25 error_(error), | 25 error_(error), |
| 26 captures_(NULL), | 26 captures_(NULL), |
| 27 in_(in), | 27 in_(in), |
| 28 current_(kEndMarker), | 28 current_(kEndMarker), |
| 29 flags_(flags), | 29 ignore_case_(flags & JSRegExp::kIgnoreCase), |
| 30 multiline_(flags & JSRegExp::kMultiline), |
| 31 unicode_(flags & JSRegExp::kUnicode), |
| 30 next_pos_(0), | 32 next_pos_(0), |
| 31 captures_started_(0), | 33 captures_started_(0), |
| 32 capture_count_(0), | 34 capture_count_(0), |
| 33 has_more_(true), | 35 has_more_(true), |
| 34 simple_(false), | 36 simple_(false), |
| 35 contains_anchor_(false), | 37 contains_anchor_(false), |
| 36 is_scanned_for_captures_(false), | 38 is_scanned_for_captures_(false), |
| 37 failed_(false) { | 39 failed_(false) { |
| 38 Advance(); | 40 Advance(); |
| 39 } | 41 } |
| 40 | 42 |
| 41 | |
| 42 template <bool update_position> | 43 template <bool update_position> |
| 43 uc32 RegExpParser::ReadNext() { | 44 inline uc32 RegExpParser::ReadNext() { |
| 44 int position = next_pos_; | 45 int position = next_pos_; |
| 45 uc32 c0 = in()->Get(position); | 46 uc32 c0 = in()->Get(position); |
| 46 position++; | 47 position++; |
| 47 // Read the whole surrogate pair in case of unicode flag, if possible. | 48 // Read the whole surrogate pair in case of unicode flag, if possible. |
| 48 if (unicode() && position < in()->length() && | 49 if (unicode() && position < in()->length() && |
| 49 unibrow::Utf16::IsLeadSurrogate(static_cast<uc16>(c0))) { | 50 unibrow::Utf16::IsLeadSurrogate(static_cast<uc16>(c0))) { |
| 50 uc16 c1 = in()->Get(position); | 51 uc16 c1 = in()->Get(position); |
| 51 if (unibrow::Utf16::IsTrailSurrogate(c1)) { | 52 if (unibrow::Utf16::IsTrailSurrogate(c1)) { |
| 52 c0 = unibrow::Utf16::CombineSurrogatePair(static_cast<uc16>(c0), c1); | 53 c0 = unibrow::Utf16::CombineSurrogatePair(static_cast<uc16>(c0), c1); |
| 53 position++; | 54 position++; |
| (...skipping 108 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 162 // Alternative :: | 163 // Alternative :: |
| 163 // [empty] | 164 // [empty] |
| 164 // Term Alternative | 165 // Term Alternative |
| 165 // Term :: | 166 // Term :: |
| 166 // Assertion | 167 // Assertion |
| 167 // Atom | 168 // Atom |
| 168 // Atom Quantifier | 169 // Atom Quantifier |
| 169 RegExpTree* RegExpParser::ParseDisjunction() { | 170 RegExpTree* RegExpParser::ParseDisjunction() { |
| 170 // Used to store current state while parsing subexpressions. | 171 // Used to store current state while parsing subexpressions. |
| 171 RegExpParserState initial_state(NULL, INITIAL, RegExpLookaround::LOOKAHEAD, 0, | 172 RegExpParserState initial_state(NULL, INITIAL, RegExpLookaround::LOOKAHEAD, 0, |
| 172 flags_, zone()); | 173 ignore_case(), unicode(), zone()); |
| 173 RegExpParserState* state = &initial_state; | 174 RegExpParserState* state = &initial_state; |
| 174 // Cache the builder in a local variable for quick access. | 175 // Cache the builder in a local variable for quick access. |
| 175 RegExpBuilder* builder = initial_state.builder(); | 176 RegExpBuilder* builder = initial_state.builder(); |
| 176 while (true) { | 177 while (true) { |
| 177 switch (current()) { | 178 switch (current()) { |
| 178 case kEndMarker: | 179 case kEndMarker: |
| 179 if (state->IsSubexpression()) { | 180 if (state->IsSubexpression()) { |
| 180 // Inside a parenthesized group when hitting end of input. | 181 // Inside a parenthesized group when hitting end of input. |
| 181 return ReportError(CStrVector("Unterminated group")); | 182 return ReportError(CStrVector("Unterminated group")); |
| 182 } | 183 } |
| (...skipping 113 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 296 return ReportError(CStrVector("Invalid group")); | 297 return ReportError(CStrVector("Invalid group")); |
| 297 } | 298 } |
| 298 Advance(2); | 299 Advance(2); |
| 299 } else { | 300 } else { |
| 300 if (captures_started_ >= kMaxCaptures) { | 301 if (captures_started_ >= kMaxCaptures) { |
| 301 return ReportError(CStrVector("Too many captures")); | 302 return ReportError(CStrVector("Too many captures")); |
| 302 } | 303 } |
| 303 captures_started_++; | 304 captures_started_++; |
| 304 } | 305 } |
| 305 // Store current state and begin new disjunction parsing. | 306 // Store current state and begin new disjunction parsing. |
| 306 state = | 307 state = new (zone()) RegExpParserState( |
| 307 new (zone()) RegExpParserState(state, subexpr_type, lookaround_type, | 308 state, subexpr_type, lookaround_type, captures_started_, |
| 308 captures_started_, flags_, zone()); | 309 ignore_case(), unicode(), zone()); |
| 309 builder = state->builder(); | 310 builder = state->builder(); |
| 310 continue; | 311 continue; |
| 311 } | 312 } |
| 312 case '[': { | 313 case '[': { |
| 313 RegExpTree* cc = ParseCharacterClass(CHECK_FAILED); | 314 RegExpTree* cc = ParseCharacterClass(CHECK_FAILED); |
| 314 builder->AddCharacterClass(cc->AsCharacterClass()); | 315 builder->AddCharacterClass(cc->AsCharacterClass()); |
| 315 break; | 316 break; |
| 316 } | 317 } |
| 317 // Atom :: | 318 // Atom :: |
| 318 // \ AtomEscape | 319 // \ AtomEscape |
| (...skipping 754 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1073 } | 1074 } |
| 1074 result->tree = tree; | 1075 result->tree = tree; |
| 1075 int capture_count = parser.captures_started(); | 1076 int capture_count = parser.captures_started(); |
| 1076 result->simple = tree->IsAtom() && parser.simple() && capture_count == 0; | 1077 result->simple = tree->IsAtom() && parser.simple() && capture_count == 0; |
| 1077 result->contains_anchor = parser.contains_anchor(); | 1078 result->contains_anchor = parser.contains_anchor(); |
| 1078 result->capture_count = capture_count; | 1079 result->capture_count = capture_count; |
| 1079 } | 1080 } |
| 1080 return !parser.failed(); | 1081 return !parser.failed(); |
| 1081 } | 1082 } |
| 1082 | 1083 |
| 1083 | 1084 RegExpBuilder::RegExpBuilder(Zone* zone, bool ignore_case, bool unicode) |
| 1084 RegExpBuilder::RegExpBuilder(Zone* zone, JSRegExp::Flags flags) | |
| 1085 : zone_(zone), | 1085 : zone_(zone), |
| 1086 pending_empty_(false), | 1086 pending_empty_(false), |
| 1087 flags_(flags), | 1087 ignore_case_(ignore_case), |
| 1088 unicode_(unicode), |
| 1088 characters_(NULL), | 1089 characters_(NULL), |
| 1089 pending_surrogate_(kNoPendingSurrogate), | 1090 pending_surrogate_(kNoPendingSurrogate), |
| 1090 terms_(), | 1091 terms_(), |
| 1091 alternatives_() | 1092 alternatives_() |
| 1092 #ifdef DEBUG | 1093 #ifdef DEBUG |
| 1093 , | 1094 , |
| 1094 last_added_(ADD_NONE) | 1095 last_added_(ADD_NONE) |
| 1095 #endif | 1096 #endif |
| 1096 { | 1097 { |
| 1097 } | 1098 } |
| (...skipping 263 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1361 return false; | 1362 return false; |
| 1362 } | 1363 } |
| 1363 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), | 1364 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), |
| 1364 zone()); | 1365 zone()); |
| 1365 LAST(ADD_TERM); | 1366 LAST(ADD_TERM); |
| 1366 return true; | 1367 return true; |
| 1367 } | 1368 } |
| 1368 | 1369 |
| 1369 } // namespace internal | 1370 } // namespace internal |
| 1370 } // namespace v8 | 1371 } // namespace v8 |
| OLD | NEW |