Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1)

Side by Side Diff: src/parser.cc

Issue 10759: Introduce text nodes (Closed)
Patch Set: Fixed repeated RemoveLast issue Created 12 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/jsregexp.cc ('k') | test/cctest/test-regexp.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 238 matching lines...) Expand 10 before | Expand all | Expand 10 after
249 } 249 }
250 250
251 T* last() { 251 T* last() {
252 ASSERT(last_ != NULL); 252 ASSERT(last_ != NULL);
253 return last_; 253 return last_;
254 } 254 }
255 255
256 T* RemoveLast() { 256 T* RemoveLast() {
257 ASSERT(last_ != NULL); 257 ASSERT(last_ != NULL);
258 T* result = last_; 258 T* result = last_;
259 last_ = NULL; 259 if (list_ != NULL && list_->length() > 0)
260 last_ = list_->RemoveLast();
Lasse Reichstein 2008/11/15 23:58:06 This allows removing more than the last added elem
Christian Plesner Hansen 2008/11/16 09:05:06 Not as such, no, but it's not a good property that
Lasse Reichstein 2008/11/16 10:53:15 Ah, so there was a need :)
261 else
262 last_ = NULL;
260 return result; 263 return result;
261 } 264 }
262 265
266 T* Get(int i) {
267 ASSERT(0 <= i && i < length());
268 if (list_ == NULL) {
269 ASSERT_EQ(0, i);
270 return last_;
271 } else {
272 if (i == list_->length()) {
273 ASSERT(last_ != NULL);
274 return last_;
275 } else {
276 return list_->at(i);
277 }
278 }
279 }
280
263 void Clear() { 281 void Clear() {
264 list_ = NULL; 282 list_ = NULL;
265 last_ = NULL; 283 last_ = NULL;
266 } 284 }
267 285
268 int length() { 286 int length() {
269 int length = (list_ == NULL) ? 0 : list_->length(); 287 int length = (list_ == NULL) ? 0 : list_->length();
270 return length + ((last_ == NULL) ? 0 : 1); 288 return length + ((last_ == NULL) ? 0 : 1);
271 } 289 }
272 290
(...skipping 14 matching lines...) Expand all
287 }; 305 };
288 306
289 // Accumulates RegExp atoms and assertions into lists of terms and alternatives. 307 // Accumulates RegExp atoms and assertions into lists of terms and alternatives.
290 class RegExpBuilder { 308 class RegExpBuilder {
291 public: 309 public:
292 RegExpBuilder(); 310 RegExpBuilder();
293 void AddCharacter(uc16 character); 311 void AddCharacter(uc16 character);
294 // "Adds" an empty expression. Does nothing except consume a 312 // "Adds" an empty expression. Does nothing except consume a
295 // following quantifier 313 // following quantifier
296 void AddEmpty(); 314 void AddEmpty();
297 void AddAtom(RegExpTree* tree); 315 void AddTerm(RegExpTree* tree);
Lasse Reichstein 2008/11/15 23:58:06 I would prefer if this is still called AddAtom (se
Christian Plesner Hansen 2008/11/16 09:05:06 You're right -- I've been using the term 'atom' in
298 void AddAssertion(RegExpTree* tree); 316 void AddAssertion(RegExpTree* tree);
299 void NewAlternative(); // '|' 317 void NewAlternative(); // '|'
300 void AddQuantifierToAtom(int min, int max, bool is_greedy); 318 void AddQuantifierToAtom(int min, int max, bool is_greedy);
301 RegExpTree* ToRegExp(); 319 RegExpTree* ToRegExp();
302 private: 320 private:
303 void FlushCharacters(); 321 void FlushCharacters();
322 void FlushText();
304 bool FlushTerms(); 323 bool FlushTerms();
305 bool pending_empty_; 324 bool pending_empty_;
306 ZoneList<uc16>* characters_; 325 ZoneList<uc16>* characters_;
307 BufferedZoneList<RegExpTree, 2> terms_; 326 BufferedZoneList<RegExpTree, 2> terms_;
327 BufferedZoneList<RegExpTree, 2> text_;
308 BufferedZoneList<RegExpTree, 2> alternatives_; 328 BufferedZoneList<RegExpTree, 2> alternatives_;
309 #ifdef DEBUG 329 #ifdef DEBUG
310 enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_; 330 enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_;
311 #define LAST(x) last_added_ = x; 331 #define LAST(x) last_added_ = x;
312 #else 332 #else
313 #define LAST(x) 333 #define LAST(x)
314 #endif 334 #endif
315 }; 335 };
316 336
317 337
318 RegExpBuilder::RegExpBuilder() 338 RegExpBuilder::RegExpBuilder()
319 : pending_empty_(false), characters_(NULL), terms_(), alternatives_() 339 : pending_empty_(false), characters_(NULL), terms_(), alternatives_()
320 #ifdef DEBUG 340 #ifdef DEBUG
321 , last_added_(ADD_NONE) 341 , last_added_(ADD_NONE)
322 #endif 342 #endif
323 {} 343 {}
324 344
325 345
326 void RegExpBuilder::FlushCharacters() { 346 void RegExpBuilder::FlushCharacters() {
327 pending_empty_ = false; 347 pending_empty_ = false;
328 if (characters_ != NULL) { 348 if (characters_ != NULL) {
329 RegExpTree* atom = new RegExpAtom(characters_->ToConstVector()); 349 RegExpTree* atom = new RegExpAtom(characters_->ToConstVector());
330 characters_ = NULL; 350 characters_ = NULL;
331 terms_.Add(atom); 351 text_.Add(atom);
332 LAST(ADD_ATOM); 352 LAST(ADD_ATOM);
333 } 353 }
334 } 354 }
335 355
336 356
357 void RegExpBuilder::FlushText() {
358 FlushCharacters();
359 int num_text = text_.length();
360 if (num_text == 0) {
361 return;
362 } else if (num_text == 1) {
363 terms_.Add(text_.last());
364 } else {
365 RegExpText* text = new RegExpText();
366 for (int i = 0; i < num_text; i++)
367 text_.Get(i)->AppendToText(text);
368 terms_.Add(text);
Lasse Reichstein 2008/11/15 23:58:06 The other places where a buffer is used, the conte
Christian Plesner Hansen 2008/11/16 09:05:06 If we're always using GetList anyway then why even
Lasse Reichstein 2008/11/16 10:53:15 The current uses all have special cases for one el
369 }
370 text_.Clear();
371 }
372
373
337 void RegExpBuilder::AddCharacter(uc16 c) { 374 void RegExpBuilder::AddCharacter(uc16 c) {
338 pending_empty_ = false; 375 pending_empty_ = false;
339 if (characters_ == NULL) { 376 if (characters_ == NULL) {
340 characters_ = new ZoneList<uc16>(4); 377 characters_ = new ZoneList<uc16>(4);
341 } 378 }
342 characters_->Add(c); 379 characters_->Add(c);
343 LAST(ADD_CHAR); 380 LAST(ADD_CHAR);
344 } 381 }
345 382
346 383
347 void RegExpBuilder::AddEmpty() { 384 void RegExpBuilder::AddEmpty() {
348 pending_empty_ = true; 385 pending_empty_ = true;
349 } 386 }
350 387
351 388
352 void RegExpBuilder::AddAtom(RegExpTree* atom) { 389 void RegExpBuilder::AddTerm(RegExpTree* term) {
Lasse Reichstein 2008/11/15 23:58:06 The original method was called AddAtom to match th
Christian Plesner Hansen 2008/11/16 09:05:06 You don't always know that you're adding a charact
Lasse Reichstein 2008/11/16 10:53:15 I can see the point. If the result of a sub-parse
353 FlushCharacters(); 390 if (term->IsTextElement()) {
354 terms_.Add(atom); 391 FlushCharacters();
392 text_.Add(term);
393 } else {
394 FlushText();
395 terms_.Add(term);
396 }
355 LAST(ADD_ATOM); 397 LAST(ADD_ATOM);
356 } 398 }
357 399
358 400
359 void RegExpBuilder::AddAssertion(RegExpTree* assert) { 401 void RegExpBuilder::AddAssertion(RegExpTree* assert) {
360 FlushCharacters(); 402 FlushText();
361 terms_.Add(assert); 403 terms_.Add(assert);
362 LAST(ADD_ASSERT); 404 LAST(ADD_ASSERT);
363 } 405 }
364 406
365 407
366 void RegExpBuilder::NewAlternative() { 408 void RegExpBuilder::NewAlternative() {
367 if (!FlushTerms()) { 409 if (!FlushTerms()) {
368 alternatives_.Add(RegExpEmpty::GetInstance()); 410 alternatives_.Add(RegExpEmpty::GetInstance());
369 } 411 }
370 } 412 }
371 413
372 414
373 bool RegExpBuilder::FlushTerms() { 415 bool RegExpBuilder::FlushTerms() {
374 FlushCharacters(); 416 FlushText();
375 int num_terms = terms_.length(); 417 int num_terms = terms_.length();
376 if (num_terms == 0) { 418 if (num_terms == 0) {
377 return false; 419 return false;
378 } 420 }
379 RegExpTree* alternative; 421 RegExpTree* alternative;
380 if (num_terms == 1) { 422 if (num_terms == 1) {
381 alternative = terms_.last(); 423 alternative = terms_.last();
382 } else { 424 } else {
383 alternative = new RegExpAlternative(terms_.GetList()); 425 alternative = new RegExpAlternative(terms_.GetList());
384 } 426 }
(...skipping 23 matching lines...) Expand all
408 return; 450 return;
409 } 451 }
410 RegExpTree* atom; 452 RegExpTree* atom;
411 if (characters_ != NULL) { 453 if (characters_ != NULL) {
412 ASSERT(last_added_ == ADD_CHAR); 454 ASSERT(last_added_ == ADD_CHAR);
413 // Last atom was character. 455 // Last atom was character.
414 Vector<const uc16> char_vector = characters_->ToConstVector(); 456 Vector<const uc16> char_vector = characters_->ToConstVector();
415 int num_chars = char_vector.length(); 457 int num_chars = char_vector.length();
416 if (num_chars > 1) { 458 if (num_chars > 1) {
417 Vector<const uc16> prefix = char_vector.SubVector(0, num_chars - 1); 459 Vector<const uc16> prefix = char_vector.SubVector(0, num_chars - 1);
418 terms_.Add(new RegExpAtom(prefix)); 460 text_.Add(new RegExpAtom(prefix));
419 char_vector = char_vector.SubVector(num_chars - 1, num_chars); 461 char_vector = char_vector.SubVector(num_chars - 1, num_chars);
420 } 462 }
421 characters_ = NULL; 463 characters_ = NULL;
422 atom = new RegExpAtom(char_vector); 464 atom = new RegExpAtom(char_vector);
465 FlushText();
466 } else if (text_.length() > 0) {
467 ASSERT(last_added_ == ADD_ATOM);
Lasse Reichstein 2008/11/15 23:58:06 The last thing added was a character class. The l
Christian Plesner Hansen 2008/11/16 09:05:06 What about "foo (?:bar) baz"?
Lasse Reichstein 2008/11/16 10:53:15 Indeed. It would have to be ADD_TEXT to capture th
468 atom = text_.RemoveLast();
469 FlushText();
423 } else if (terms_.length() > 0) { 470 } else if (terms_.length() > 0) {
424 ASSERT(last_added_ == ADD_ATOM); 471 ASSERT(last_added_ == ADD_ATOM);
425 atom = terms_.RemoveLast(); 472 atom = terms_.RemoveLast();
426 } else { 473 } else {
427 // Only call immediately after adding an atom or character! 474 // Only call immediately after adding an atom or character!
428 UNREACHABLE(); 475 UNREACHABLE();
429 return; 476 return;
430 } 477 }
431 terms_.Add(new RegExpQuantifier(min, max, is_greedy, atom)); 478 terms_.Add(new RegExpQuantifier(min, max, is_greedy, atom));
432 LAST(ADD_TERM); 479 LAST(ADD_TERM);
(...skipping 3145 matching lines...) Expand 10 before | Expand all | Expand 10 after
3578 RegExpAssertion::END_OF_INPUT; 3625 RegExpAssertion::END_OF_INPUT;
3579 builder.AddAssertion(new RegExpAssertion(type)); 3626 builder.AddAssertion(new RegExpAssertion(type));
3580 continue; 3627 continue;
3581 } 3628 }
3582 case '.': { 3629 case '.': {
3583 Advance(); 3630 Advance();
3584 // everything except \x0a, \x0d, \u2028 and \u2029 3631 // everything except \x0a, \x0d, \u2028 and \u2029
3585 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2); 3632 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2);
3586 CharacterRange::AddClassEscape('.', ranges); 3633 CharacterRange::AddClassEscape('.', ranges);
3587 RegExpTree* atom = new RegExpCharacterClass(ranges, false); 3634 RegExpTree* atom = new RegExpCharacterClass(ranges, false);
3588 builder.AddAtom(atom); 3635 builder.AddTerm(atom);
3589 break; 3636 break;
3590 } 3637 }
3591 case '(': { 3638 case '(': {
3592 RegExpTree* atom = ParseGroup(CHECK_OK); 3639 RegExpTree* atom = ParseGroup(CHECK_OK);
3593 builder.AddAtom(atom); 3640 builder.AddTerm(atom);
Lasse Reichstein 2008/11/16 10:53:15 At this point we could check whether the atom is a
3594 break; 3641 break;
3595 } 3642 }
3596 case '[': { 3643 case '[': {
3597 RegExpTree* atom = ParseCharacterClass(CHECK_OK); 3644 RegExpTree* atom = ParseCharacterClass(CHECK_OK);
3598 builder.AddAtom(atom); 3645 builder.AddTerm(atom);
3599 break; 3646 break;
3600 } 3647 }
3601 // Atom :: 3648 // Atom ::
3602 // \ AtomEscape 3649 // \ AtomEscape
3603 case '\\': 3650 case '\\':
3604 switch (next()) { 3651 switch (next()) {
3605 case kEndMarker: 3652 case kEndMarker:
3606 ReportError(CStrVector("\\ at end of pattern"), CHECK_OK); 3653 ReportError(CStrVector("\\ at end of pattern"), CHECK_OK);
3607 case 'b': 3654 case 'b':
3608 Advance(2); 3655 Advance(2);
3609 builder.AddAssertion( 3656 builder.AddAssertion(
3610 new RegExpAssertion(RegExpAssertion::BOUNDARY)); 3657 new RegExpAssertion(RegExpAssertion::BOUNDARY));
3611 continue; 3658 continue;
3612 case 'B': 3659 case 'B':
3613 Advance(2); 3660 Advance(2);
3614 builder.AddAssertion( 3661 builder.AddAssertion(
3615 new RegExpAssertion(RegExpAssertion::NON_BOUNDARY)); 3662 new RegExpAssertion(RegExpAssertion::NON_BOUNDARY));
3616 continue; 3663 continue;
3617 // AtomEscape :: 3664 // AtomEscape ::
3618 // CharacterClassEscape 3665 // CharacterClassEscape
3619 // 3666 //
3620 // CharacterClassEscape :: one of 3667 // CharacterClassEscape :: one of
3621 // d D s S w W 3668 // d D s S w W
3622 case 'd': case 'D': case 's': case 'S': case 'w': case 'W': { 3669 case 'd': case 'D': case 's': case 'S': case 'w': case 'W': {
3623 uc32 c = next(); 3670 uc32 c = next();
3624 Advance(2); 3671 Advance(2);
3625 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2); 3672 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2);
3626 CharacterRange::AddClassEscape(c, ranges); 3673 CharacterRange::AddClassEscape(c, ranges);
3627 RegExpTree* atom = new RegExpCharacterClass(ranges, false); 3674 RegExpTree* atom = new RegExpCharacterClass(ranges, false);
3628 builder.AddAtom(atom); 3675 builder.AddTerm(atom);
3629 goto has_read_atom; // Avoid setting has_character_escapes_. 3676 goto has_read_atom; // Avoid setting has_character_escapes_.
3630 } 3677 }
3631 case '1': case '2': case '3': case '4': case '5': case '6': 3678 case '1': case '2': case '3': case '4': case '5': case '6':
3632 case '7': case '8': case '9': { 3679 case '7': case '8': case '9': {
3633 int index = 0; 3680 int index = 0;
3634 if (ParseBackreferenceIndex(&index)) { 3681 if (ParseBackreferenceIndex(&index)) {
3635 RegExpCapture* capture = captures_->at(index - 1); 3682 RegExpCapture* capture = captures_->at(index - 1);
3636 if (capture == NULL || capture->available() != CAPTURE_AVAILABLE) { 3683 if (capture == NULL || capture->available() != CAPTURE_AVAILABLE) {
3637 // Prepare to ignore a following quantifier 3684 // Prepare to ignore a following quantifier
3638 builder.AddEmpty(); 3685 builder.AddEmpty();
3639 goto has_read_atom; 3686 goto has_read_atom;
3640 } 3687 }
3641 RegExpTree* atom = new RegExpBackreference(capture); 3688 RegExpTree* atom = new RegExpBackreference(capture);
3642 builder.AddAtom(atom); 3689 builder.AddTerm(atom);
3643 goto has_read_atom; // Avoid setting has_character_escapes_. 3690 goto has_read_atom; // Avoid setting has_character_escapes_.
3644 } 3691 }
3645 uc32 first_digit = next(); 3692 uc32 first_digit = next();
3646 if (first_digit == '8' || first_digit == '9') { 3693 if (first_digit == '8' || first_digit == '9') {
3647 // Treat as identity escape 3694 // Treat as identity escape
3648 builder.AddCharacter(first_digit); 3695 builder.AddCharacter(first_digit);
3649 Advance(2); 3696 Advance(2);
3650 break; 3697 break;
3651 } 3698 }
3652 } 3699 }
(...skipping 619 matching lines...) Expand 10 before | Expand all | Expand 10 after
4272 start_position, 4319 start_position,
4273 is_expression); 4320 is_expression);
4274 return result; 4321 return result;
4275 } 4322 }
4276 4323
4277 4324
4278 #undef NEW 4325 #undef NEW
4279 4326
4280 4327
4281 } } // namespace v8::internal 4328 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « src/jsregexp.cc ('k') | test/cctest/test-regexp.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698