Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 238 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 249 } | 249 } |
| 250 | 250 |
| 251 T* last() { | 251 T* last() { |
| 252 ASSERT(last_ != NULL); | 252 ASSERT(last_ != NULL); |
| 253 return last_; | 253 return last_; |
| 254 } | 254 } |
| 255 | 255 |
| 256 T* RemoveLast() { | 256 T* RemoveLast() { |
| 257 ASSERT(last_ != NULL); | 257 ASSERT(last_ != NULL); |
| 258 T* result = last_; | 258 T* result = last_; |
| 259 last_ = NULL; | 259 if (list_ != NULL && list_->length() > 0) |
| 260 last_ = list_->RemoveLast(); | |
|
Lasse Reichstein
2008/11/15 23:58:06
This allows removing more than the last added elem
Christian Plesner Hansen
2008/11/16 09:05:06
Not as such, no, but it's not a good property that
Lasse Reichstein
2008/11/16 10:53:15
Ah, so there was a need :)
| |
| 261 else | |
| 262 last_ = NULL; | |
| 260 return result; | 263 return result; |
| 261 } | 264 } |
| 262 | 265 |
| 266 T* Get(int i) { | |
| 267 ASSERT(0 <= i && i < length()); | |
| 268 if (list_ == NULL) { | |
| 269 ASSERT_EQ(0, i); | |
| 270 return last_; | |
| 271 } else { | |
| 272 if (i == list_->length()) { | |
| 273 ASSERT(last_ != NULL); | |
| 274 return last_; | |
| 275 } else { | |
| 276 return list_->at(i); | |
| 277 } | |
| 278 } | |
| 279 } | |
| 280 | |
| 263 void Clear() { | 281 void Clear() { |
| 264 list_ = NULL; | 282 list_ = NULL; |
| 265 last_ = NULL; | 283 last_ = NULL; |
| 266 } | 284 } |
| 267 | 285 |
| 268 int length() { | 286 int length() { |
| 269 int length = (list_ == NULL) ? 0 : list_->length(); | 287 int length = (list_ == NULL) ? 0 : list_->length(); |
| 270 return length + ((last_ == NULL) ? 0 : 1); | 288 return length + ((last_ == NULL) ? 0 : 1); |
| 271 } | 289 } |
| 272 | 290 |
| (...skipping 14 matching lines...) Expand all Loading... | |
| 287 }; | 305 }; |
| 288 | 306 |
| 289 // Accumulates RegExp atoms and assertions into lists of terms and alternatives. | 307 // Accumulates RegExp atoms and assertions into lists of terms and alternatives. |
| 290 class RegExpBuilder { | 308 class RegExpBuilder { |
| 291 public: | 309 public: |
| 292 RegExpBuilder(); | 310 RegExpBuilder(); |
| 293 void AddCharacter(uc16 character); | 311 void AddCharacter(uc16 character); |
| 294 // "Adds" an empty expression. Does nothing except consume a | 312 // "Adds" an empty expression. Does nothing except consume a |
| 295 // following quantifier | 313 // following quantifier |
| 296 void AddEmpty(); | 314 void AddEmpty(); |
| 297 void AddAtom(RegExpTree* tree); | 315 void AddTerm(RegExpTree* tree); |
|
Lasse Reichstein
2008/11/15 23:58:06
I would prefer if this is still called AddAtom (se
Christian Plesner Hansen
2008/11/16 09:05:06
You're right -- I've been using the term 'atom' in
| |
| 298 void AddAssertion(RegExpTree* tree); | 316 void AddAssertion(RegExpTree* tree); |
| 299 void NewAlternative(); // '|' | 317 void NewAlternative(); // '|' |
| 300 void AddQuantifierToAtom(int min, int max, bool is_greedy); | 318 void AddQuantifierToAtom(int min, int max, bool is_greedy); |
| 301 RegExpTree* ToRegExp(); | 319 RegExpTree* ToRegExp(); |
| 302 private: | 320 private: |
| 303 void FlushCharacters(); | 321 void FlushCharacters(); |
| 322 void FlushText(); | |
| 304 bool FlushTerms(); | 323 bool FlushTerms(); |
| 305 bool pending_empty_; | 324 bool pending_empty_; |
| 306 ZoneList<uc16>* characters_; | 325 ZoneList<uc16>* characters_; |
| 307 BufferedZoneList<RegExpTree, 2> terms_; | 326 BufferedZoneList<RegExpTree, 2> terms_; |
| 327 BufferedZoneList<RegExpTree, 2> text_; | |
| 308 BufferedZoneList<RegExpTree, 2> alternatives_; | 328 BufferedZoneList<RegExpTree, 2> alternatives_; |
| 309 #ifdef DEBUG | 329 #ifdef DEBUG |
| 310 enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_; | 330 enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_; |
| 311 #define LAST(x) last_added_ = x; | 331 #define LAST(x) last_added_ = x; |
| 312 #else | 332 #else |
| 313 #define LAST(x) | 333 #define LAST(x) |
| 314 #endif | 334 #endif |
| 315 }; | 335 }; |
| 316 | 336 |
| 317 | 337 |
| 318 RegExpBuilder::RegExpBuilder() | 338 RegExpBuilder::RegExpBuilder() |
| 319 : pending_empty_(false), characters_(NULL), terms_(), alternatives_() | 339 : pending_empty_(false), characters_(NULL), terms_(), alternatives_() |
| 320 #ifdef DEBUG | 340 #ifdef DEBUG |
| 321 , last_added_(ADD_NONE) | 341 , last_added_(ADD_NONE) |
| 322 #endif | 342 #endif |
| 323 {} | 343 {} |
| 324 | 344 |
| 325 | 345 |
| 326 void RegExpBuilder::FlushCharacters() { | 346 void RegExpBuilder::FlushCharacters() { |
| 327 pending_empty_ = false; | 347 pending_empty_ = false; |
| 328 if (characters_ != NULL) { | 348 if (characters_ != NULL) { |
| 329 RegExpTree* atom = new RegExpAtom(characters_->ToConstVector()); | 349 RegExpTree* atom = new RegExpAtom(characters_->ToConstVector()); |
| 330 characters_ = NULL; | 350 characters_ = NULL; |
| 331 terms_.Add(atom); | 351 text_.Add(atom); |
| 332 LAST(ADD_ATOM); | 352 LAST(ADD_ATOM); |
| 333 } | 353 } |
| 334 } | 354 } |
| 335 | 355 |
| 336 | 356 |
| 357 void RegExpBuilder::FlushText() { | |
| 358 FlushCharacters(); | |
| 359 int num_text = text_.length(); | |
| 360 if (num_text == 0) { | |
| 361 return; | |
| 362 } else if (num_text == 1) { | |
| 363 terms_.Add(text_.last()); | |
| 364 } else { | |
| 365 RegExpText* text = new RegExpText(); | |
| 366 for (int i = 0; i < num_text; i++) | |
| 367 text_.Get(i)->AppendToText(text); | |
| 368 terms_.Add(text); | |
|
Lasse Reichstein
2008/11/15 23:58:06
The other places where a buffer is used, the conte
Christian Plesner Hansen
2008/11/16 09:05:06
If we're always using GetList anyway then why even
Lasse Reichstein
2008/11/16 10:53:15
The current uses all have special cases for one el
| |
| 369 } | |
| 370 text_.Clear(); | |
| 371 } | |
| 372 | |
| 373 | |
| 337 void RegExpBuilder::AddCharacter(uc16 c) { | 374 void RegExpBuilder::AddCharacter(uc16 c) { |
| 338 pending_empty_ = false; | 375 pending_empty_ = false; |
| 339 if (characters_ == NULL) { | 376 if (characters_ == NULL) { |
| 340 characters_ = new ZoneList<uc16>(4); | 377 characters_ = new ZoneList<uc16>(4); |
| 341 } | 378 } |
| 342 characters_->Add(c); | 379 characters_->Add(c); |
| 343 LAST(ADD_CHAR); | 380 LAST(ADD_CHAR); |
| 344 } | 381 } |
| 345 | 382 |
| 346 | 383 |
| 347 void RegExpBuilder::AddEmpty() { | 384 void RegExpBuilder::AddEmpty() { |
| 348 pending_empty_ = true; | 385 pending_empty_ = true; |
| 349 } | 386 } |
| 350 | 387 |
| 351 | 388 |
| 352 void RegExpBuilder::AddAtom(RegExpTree* atom) { | 389 void RegExpBuilder::AddTerm(RegExpTree* term) { |
|
Lasse Reichstein
2008/11/15 23:58:06
The original method was called AddAtom to match th
Christian Plesner Hansen
2008/11/16 09:05:06
You don't always know that you're adding a charact
Lasse Reichstein
2008/11/16 10:53:15
I can see the point. If the result of a sub-parse
| |
| 353 FlushCharacters(); | 390 if (term->IsTextElement()) { |
| 354 terms_.Add(atom); | 391 FlushCharacters(); |
| 392 text_.Add(term); | |
| 393 } else { | |
| 394 FlushText(); | |
| 395 terms_.Add(term); | |
| 396 } | |
| 355 LAST(ADD_ATOM); | 397 LAST(ADD_ATOM); |
| 356 } | 398 } |
| 357 | 399 |
| 358 | 400 |
| 359 void RegExpBuilder::AddAssertion(RegExpTree* assert) { | 401 void RegExpBuilder::AddAssertion(RegExpTree* assert) { |
| 360 FlushCharacters(); | 402 FlushText(); |
| 361 terms_.Add(assert); | 403 terms_.Add(assert); |
| 362 LAST(ADD_ASSERT); | 404 LAST(ADD_ASSERT); |
| 363 } | 405 } |
| 364 | 406 |
| 365 | 407 |
| 366 void RegExpBuilder::NewAlternative() { | 408 void RegExpBuilder::NewAlternative() { |
| 367 if (!FlushTerms()) { | 409 if (!FlushTerms()) { |
| 368 alternatives_.Add(RegExpEmpty::GetInstance()); | 410 alternatives_.Add(RegExpEmpty::GetInstance()); |
| 369 } | 411 } |
| 370 } | 412 } |
| 371 | 413 |
| 372 | 414 |
| 373 bool RegExpBuilder::FlushTerms() { | 415 bool RegExpBuilder::FlushTerms() { |
| 374 FlushCharacters(); | 416 FlushText(); |
| 375 int num_terms = terms_.length(); | 417 int num_terms = terms_.length(); |
| 376 if (num_terms == 0) { | 418 if (num_terms == 0) { |
| 377 return false; | 419 return false; |
| 378 } | 420 } |
| 379 RegExpTree* alternative; | 421 RegExpTree* alternative; |
| 380 if (num_terms == 1) { | 422 if (num_terms == 1) { |
| 381 alternative = terms_.last(); | 423 alternative = terms_.last(); |
| 382 } else { | 424 } else { |
| 383 alternative = new RegExpAlternative(terms_.GetList()); | 425 alternative = new RegExpAlternative(terms_.GetList()); |
| 384 } | 426 } |
| (...skipping 23 matching lines...) Expand all Loading... | |
| 408 return; | 450 return; |
| 409 } | 451 } |
| 410 RegExpTree* atom; | 452 RegExpTree* atom; |
| 411 if (characters_ != NULL) { | 453 if (characters_ != NULL) { |
| 412 ASSERT(last_added_ == ADD_CHAR); | 454 ASSERT(last_added_ == ADD_CHAR); |
| 413 // Last atom was character. | 455 // Last atom was character. |
| 414 Vector<const uc16> char_vector = characters_->ToConstVector(); | 456 Vector<const uc16> char_vector = characters_->ToConstVector(); |
| 415 int num_chars = char_vector.length(); | 457 int num_chars = char_vector.length(); |
| 416 if (num_chars > 1) { | 458 if (num_chars > 1) { |
| 417 Vector<const uc16> prefix = char_vector.SubVector(0, num_chars - 1); | 459 Vector<const uc16> prefix = char_vector.SubVector(0, num_chars - 1); |
| 418 terms_.Add(new RegExpAtom(prefix)); | 460 text_.Add(new RegExpAtom(prefix)); |
| 419 char_vector = char_vector.SubVector(num_chars - 1, num_chars); | 461 char_vector = char_vector.SubVector(num_chars - 1, num_chars); |
| 420 } | 462 } |
| 421 characters_ = NULL; | 463 characters_ = NULL; |
| 422 atom = new RegExpAtom(char_vector); | 464 atom = new RegExpAtom(char_vector); |
| 465 FlushText(); | |
| 466 } else if (text_.length() > 0) { | |
| 467 ASSERT(last_added_ == ADD_ATOM); | |
|
Lasse Reichstein
2008/11/15 23:58:06
The last thing added was a character class.
The l
Christian Plesner Hansen
2008/11/16 09:05:06
What about "foo (?:bar) baz"?
Lasse Reichstein
2008/11/16 10:53:15
Indeed. It would have to be ADD_TEXT to capture th
| |
| 468 atom = text_.RemoveLast(); | |
| 469 FlushText(); | |
| 423 } else if (terms_.length() > 0) { | 470 } else if (terms_.length() > 0) { |
| 424 ASSERT(last_added_ == ADD_ATOM); | 471 ASSERT(last_added_ == ADD_ATOM); |
| 425 atom = terms_.RemoveLast(); | 472 atom = terms_.RemoveLast(); |
| 426 } else { | 473 } else { |
| 427 // Only call immediately after adding an atom or character! | 474 // Only call immediately after adding an atom or character! |
| 428 UNREACHABLE(); | 475 UNREACHABLE(); |
| 429 return; | 476 return; |
| 430 } | 477 } |
| 431 terms_.Add(new RegExpQuantifier(min, max, is_greedy, atom)); | 478 terms_.Add(new RegExpQuantifier(min, max, is_greedy, atom)); |
| 432 LAST(ADD_TERM); | 479 LAST(ADD_TERM); |
| (...skipping 3145 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3578 RegExpAssertion::END_OF_INPUT; | 3625 RegExpAssertion::END_OF_INPUT; |
| 3579 builder.AddAssertion(new RegExpAssertion(type)); | 3626 builder.AddAssertion(new RegExpAssertion(type)); |
| 3580 continue; | 3627 continue; |
| 3581 } | 3628 } |
| 3582 case '.': { | 3629 case '.': { |
| 3583 Advance(); | 3630 Advance(); |
| 3584 // everything except \x0a, \x0d, \u2028 and \u2029 | 3631 // everything except \x0a, \x0d, \u2028 and \u2029 |
| 3585 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2); | 3632 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2); |
| 3586 CharacterRange::AddClassEscape('.', ranges); | 3633 CharacterRange::AddClassEscape('.', ranges); |
| 3587 RegExpTree* atom = new RegExpCharacterClass(ranges, false); | 3634 RegExpTree* atom = new RegExpCharacterClass(ranges, false); |
| 3588 builder.AddAtom(atom); | 3635 builder.AddTerm(atom); |
| 3589 break; | 3636 break; |
| 3590 } | 3637 } |
| 3591 case '(': { | 3638 case '(': { |
| 3592 RegExpTree* atom = ParseGroup(CHECK_OK); | 3639 RegExpTree* atom = ParseGroup(CHECK_OK); |
| 3593 builder.AddAtom(atom); | 3640 builder.AddTerm(atom); |
|
Lasse Reichstein
2008/11/16 10:53:15
At this point we could check whether the atom is a
| |
| 3594 break; | 3641 break; |
| 3595 } | 3642 } |
| 3596 case '[': { | 3643 case '[': { |
| 3597 RegExpTree* atom = ParseCharacterClass(CHECK_OK); | 3644 RegExpTree* atom = ParseCharacterClass(CHECK_OK); |
| 3598 builder.AddAtom(atom); | 3645 builder.AddTerm(atom); |
| 3599 break; | 3646 break; |
| 3600 } | 3647 } |
| 3601 // Atom :: | 3648 // Atom :: |
| 3602 // \ AtomEscape | 3649 // \ AtomEscape |
| 3603 case '\\': | 3650 case '\\': |
| 3604 switch (next()) { | 3651 switch (next()) { |
| 3605 case kEndMarker: | 3652 case kEndMarker: |
| 3606 ReportError(CStrVector("\\ at end of pattern"), CHECK_OK); | 3653 ReportError(CStrVector("\\ at end of pattern"), CHECK_OK); |
| 3607 case 'b': | 3654 case 'b': |
| 3608 Advance(2); | 3655 Advance(2); |
| 3609 builder.AddAssertion( | 3656 builder.AddAssertion( |
| 3610 new RegExpAssertion(RegExpAssertion::BOUNDARY)); | 3657 new RegExpAssertion(RegExpAssertion::BOUNDARY)); |
| 3611 continue; | 3658 continue; |
| 3612 case 'B': | 3659 case 'B': |
| 3613 Advance(2); | 3660 Advance(2); |
| 3614 builder.AddAssertion( | 3661 builder.AddAssertion( |
| 3615 new RegExpAssertion(RegExpAssertion::NON_BOUNDARY)); | 3662 new RegExpAssertion(RegExpAssertion::NON_BOUNDARY)); |
| 3616 continue; | 3663 continue; |
| 3617 // AtomEscape :: | 3664 // AtomEscape :: |
| 3618 // CharacterClassEscape | 3665 // CharacterClassEscape |
| 3619 // | 3666 // |
| 3620 // CharacterClassEscape :: one of | 3667 // CharacterClassEscape :: one of |
| 3621 // d D s S w W | 3668 // d D s S w W |
| 3622 case 'd': case 'D': case 's': case 'S': case 'w': case 'W': { | 3669 case 'd': case 'D': case 's': case 'S': case 'w': case 'W': { |
| 3623 uc32 c = next(); | 3670 uc32 c = next(); |
| 3624 Advance(2); | 3671 Advance(2); |
| 3625 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2); | 3672 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2); |
| 3626 CharacterRange::AddClassEscape(c, ranges); | 3673 CharacterRange::AddClassEscape(c, ranges); |
| 3627 RegExpTree* atom = new RegExpCharacterClass(ranges, false); | 3674 RegExpTree* atom = new RegExpCharacterClass(ranges, false); |
| 3628 builder.AddAtom(atom); | 3675 builder.AddTerm(atom); |
| 3629 goto has_read_atom; // Avoid setting has_character_escapes_. | 3676 goto has_read_atom; // Avoid setting has_character_escapes_. |
| 3630 } | 3677 } |
| 3631 case '1': case '2': case '3': case '4': case '5': case '6': | 3678 case '1': case '2': case '3': case '4': case '5': case '6': |
| 3632 case '7': case '8': case '9': { | 3679 case '7': case '8': case '9': { |
| 3633 int index = 0; | 3680 int index = 0; |
| 3634 if (ParseBackreferenceIndex(&index)) { | 3681 if (ParseBackreferenceIndex(&index)) { |
| 3635 RegExpCapture* capture = captures_->at(index - 1); | 3682 RegExpCapture* capture = captures_->at(index - 1); |
| 3636 if (capture == NULL || capture->available() != CAPTURE_AVAILABLE) { | 3683 if (capture == NULL || capture->available() != CAPTURE_AVAILABLE) { |
| 3637 // Prepare to ignore a following quantifier | 3684 // Prepare to ignore a following quantifier |
| 3638 builder.AddEmpty(); | 3685 builder.AddEmpty(); |
| 3639 goto has_read_atom; | 3686 goto has_read_atom; |
| 3640 } | 3687 } |
| 3641 RegExpTree* atom = new RegExpBackreference(capture); | 3688 RegExpTree* atom = new RegExpBackreference(capture); |
| 3642 builder.AddAtom(atom); | 3689 builder.AddTerm(atom); |
| 3643 goto has_read_atom; // Avoid setting has_character_escapes_. | 3690 goto has_read_atom; // Avoid setting has_character_escapes_. |
| 3644 } | 3691 } |
| 3645 uc32 first_digit = next(); | 3692 uc32 first_digit = next(); |
| 3646 if (first_digit == '8' || first_digit == '9') { | 3693 if (first_digit == '8' || first_digit == '9') { |
| 3647 // Treat as identity escape | 3694 // Treat as identity escape |
| 3648 builder.AddCharacter(first_digit); | 3695 builder.AddCharacter(first_digit); |
| 3649 Advance(2); | 3696 Advance(2); |
| 3650 break; | 3697 break; |
| 3651 } | 3698 } |
| 3652 } | 3699 } |
| (...skipping 619 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4272 start_position, | 4319 start_position, |
| 4273 is_expression); | 4320 is_expression); |
| 4274 return result; | 4321 return result; |
| 4275 } | 4322 } |
| 4276 | 4323 |
| 4277 | 4324 |
| 4278 #undef NEW | 4325 #undef NEW |
| 4279 | 4326 |
| 4280 | 4327 |
| 4281 } } // namespace v8::internal | 4328 } } // namespace v8::internal |
| OLD | NEW |