Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(475)

Side by Side Diff: src/parser.cc

Issue 13016: Allow [a-\d] in RegExp parser. (Closed)
Patch Set: Created 12 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | test/cctest/test-regexp.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 503 matching lines...) Expand 10 before | Expand all | Expand 10 after
514 514
515 uc32 ParseControlLetterEscape(bool* ok); 515 uc32 ParseControlLetterEscape(bool* ok);
516 uc32 ParseOctalLiteral(); 516 uc32 ParseOctalLiteral();
517 517
518 // Tries to parse the input as a back reference. If successful it 518 // Tries to parse the input as a back reference. If successful it
519 // stores the result in the output parameter and returns true. If 519 // stores the result in the output parameter and returns true. If
520 // it fails it will push back the characters read so the same characters 520 // it fails it will push back the characters read so the same characters
521 // can be reparsed. 521 // can be reparsed.
522 bool ParseBackReferenceIndex(int* index_out); 522 bool ParseBackReferenceIndex(int* index_out);
523 523
524 CharacterRange ParseClassAtom(bool* is_char_class, 524 CharacterRange ParseClassAtom(uc16* is_char_class,
525 ZoneList<CharacterRange>* ranges,
526 bool* ok); 525 bool* ok);
527 RegExpTree* ReportError(Vector<const char> message, bool* ok); 526 RegExpTree* ReportError(Vector<const char> message, bool* ok);
528 void Advance(); 527 void Advance();
529 void Advance(int dist); 528 void Advance(int dist);
530 void Reset(int pos); 529 void Reset(int pos);
531 530
532 bool HasCharacterEscapes(); 531 bool HasCharacterEscapes();
533 532
534 int captures_started() { return captures_ == NULL ? 0 : captures_->length(); } 533 int captures_started() { return captures_ == NULL ? 0 : captures_->length(); }
535 int position() { return next_pos_ - 1; } 534 int position() { return next_pos_ - 1; }
(...skipping 3615 matching lines...) Expand 10 before | Expand all | Expand 10 after
4151 } else if (type == ':') { 4150 } else if (type == ':') {
4152 return body; 4151 return body;
4153 } else { 4152 } else {
4154 ASSERT(type == '=' || type == '!'); 4153 ASSERT(type == '=' || type == '!');
4155 bool is_positive = (type == '='); 4154 bool is_positive = (type == '=');
4156 return new RegExpLookahead(body, is_positive); 4155 return new RegExpLookahead(body, is_positive);
4157 } 4156 }
4158 } 4157 }
4159 4158
4160 4159
4161 CharacterRange RegExpParser::ParseClassAtom(bool* is_char_class, 4160 CharacterRange RegExpParser::ParseClassAtom(uc16* char_class, bool* ok) {
4162 ZoneList<CharacterRange>* ranges, 4161 ASSERT_EQ(0, *char_class);
4163 bool* ok) {
4164 ASSERT_EQ(false, *is_char_class);
4165 uc32 first = current(); 4162 uc32 first = current();
4166 if (first == '\\') { 4163 if (first == '\\') {
4167 switch (Next()) { 4164 switch (Next()) {
4168 case 'w': case 'W': case 'd': case 'D': case 's': case 'S': { 4165 case 'w': case 'W': case 'd': case 'D': case 's': case 'S': {
4169 *is_char_class = true; 4166 *char_class = Next();
4170 uc32 c = Next();
4171 CharacterRange::AddClassEscape(c, ranges);
4172 Advance(2); 4167 Advance(2);
4173 return NULL; 4168 return CharacterRange::Singleton(0);
4174 } 4169 }
4175 default: 4170 default:
4176 uc32 c = ParseClassCharacterEscape(CHECK_OK); 4171 uc32 c = ParseClassCharacterEscape(CHECK_OK);
4177 return CharacterRange::Singleton(c); 4172 return CharacterRange::Singleton(c);
4178 } 4173 }
4179 } else { 4174 } else {
4180 Advance(); 4175 Advance();
4181 return CharacterRange::Singleton(first); 4176 return CharacterRange::Singleton(first);
4182 } 4177 }
4183 } 4178 }
4184 4179
4185 4180
4186 RegExpTree* RegExpParser::ParseCharacterClass(bool* ok) { 4181 RegExpTree* RegExpParser::ParseCharacterClass(bool* ok) {
4187 static const char* kUnterminated = "Unterminated character class"; 4182 static const char* kUnterminated = "Unterminated character class";
4188 static const char* kIllegal = "Illegal character class";
4189 static const char* kRangeOutOfOrder = "Range out of order in character class"; 4183 static const char* kRangeOutOfOrder = "Range out of order in character class";
4190 4184
4191 ASSERT_EQ(current(), '['); 4185 ASSERT_EQ(current(), '[');
4192 Advance(); 4186 Advance();
4193 bool is_negated = false; 4187 bool is_negated = false;
4194 if (current() == '^') { 4188 if (current() == '^') {
4195 is_negated = true; 4189 is_negated = true;
4196 Advance(); 4190 Advance();
4197 } 4191 }
4198 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2); 4192 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2);
4199 while (has_more() && current() != ']') { 4193 while (has_more() && current() != ']') {
4200 bool is_char_class = false; 4194 uc16 char_class = 0;
4201 CharacterRange first = ParseClassAtom(&is_char_class, ranges, CHECK_OK); 4195 CharacterRange first = ParseClassAtom(&char_class, CHECK_OK);
4202 if (!is_char_class) { 4196 if (char_class) {
4203 if (current() == '-') { 4197 CharacterRange::AddClassEscape(char_class, ranges);
4204 Advance(); 4198 continue;
4205 if (current() == kEndMarker) { 4199 }
4206 // If we reach the end we break out of the loop and let the 4200 if (current() == '-') {
4207 // following code report an error. 4201 Advance();
4208 break; 4202 if (current() == kEndMarker) {
4209 } else if (current() == ']') { 4203 // If we reach the end we break out of the loop and let the
4210 ranges->Add(first); 4204 // following code report an error.
4211 ranges->Add(CharacterRange::Singleton('-')); 4205 break;
4212 break; 4206 } else if (current() == ']') {
4213 }
4214 CharacterRange next =
4215 ParseClassAtom(&is_char_class, ranges, CHECK_OK);
4216 if (is_char_class) {
4217 return ReportError(CStrVector(kIllegal), CHECK_OK);
4218 }
4219 if (first.from() > next.to()) {
4220 return ReportError(CStrVector(kRangeOutOfOrder), CHECK_OK);
4221 }
4222 ranges->Add(CharacterRange::Range(first.from(), next.to()));
4223 } else {
4224 ranges->Add(first); 4207 ranges->Add(first);
4208 ranges->Add(CharacterRange::Singleton('-'));
4209 break;
4225 } 4210 }
4211 CharacterRange next = ParseClassAtom(&char_class, CHECK_OK);
4212 if (char_class) {
4213 ranges->Add(first);
4214 ranges->Add(CharacterRange::Singleton('-'));
4215 CharacterRange::AddClassEscape(char_class, ranges);
4216 continue;
4217 }
4218 if (first.from() > next.to()) {
4219 return ReportError(CStrVector(kRangeOutOfOrder), CHECK_OK);
4220 }
4221 ranges->Add(CharacterRange::Range(first.from(), next.to()));
4222 } else {
4223 ranges->Add(first);
4226 } 4224 }
4227 } 4225 }
4228 if (!has_more()) { 4226 if (!has_more()) {
4229 return ReportError(CStrVector(kUnterminated), CHECK_OK); 4227 return ReportError(CStrVector(kUnterminated), CHECK_OK);
4230 } 4228 }
4231 Advance(); 4229 Advance();
4232 if (ranges->length() == 0) { 4230 if (ranges->length() == 0) {
4233 ranges->Add(CharacterRange::Range(0, 0xffff)); 4231 ranges->Add(CharacterRange::Range(0, 0xffff));
4234 is_negated = !is_negated; 4232 is_negated = !is_negated;
4235 } 4233 }
(...skipping 116 matching lines...) Expand 10 before | Expand all | Expand 10 after
4352 start_position, 4350 start_position,
4353 is_expression); 4351 is_expression);
4354 return result; 4352 return result;
4355 } 4353 }
4356 4354
4357 4355
4358 #undef NEW 4356 #undef NEW
4359 4357
4360 4358
4361 } } // namespace v8::internal 4359 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « no previous file | test/cctest/test-regexp.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698