Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(101)

Side by Side Diff: third_party/re2/re2/re2.h

Issue 1516543002: Update re2 (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Nits Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2003-2009 The RE2 Authors. All Rights Reserved. 1 // Copyright 2003-2009 The RE2 Authors. All Rights Reserved.
2 // Use of this source code is governed by a BSD-style 2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file. 3 // license that can be found in the LICENSE file.
4 4
5 #ifndef RE2_RE2_H 5 #ifndef RE2_RE2_H
6 #define RE2_RE2_H 6 #define RE2_RE2_H
7 7
8 // C++ interface to the re2 regular-expression library. 8 // C++ interface to the re2 regular-expression library.
9 // RE2 supports Perl-style regular expressions (with extensions like 9 // RE2 supports Perl-style regular expressions (with extensions like
10 // \d, \w, \s, ...). 10 // \d, \w, \s, ...).
11 // 11 //
12 // ----------------------------------------------------------------------- 12 // -----------------------------------------------------------------------
13 // REGEXP SYNTAX: 13 // REGEXP SYNTAX:
14 // 14 //
15 // This module uses the re2 library and hence supports 15 // This module uses the re2 library and hence supports
16 // its syntax for regular expressions, which is similar to Perl's with 16 // its syntax for regular expressions, which is similar to Perl's with
17 // some of the more complicated things thrown away. In particular, 17 // some of the more complicated things thrown away. In particular,
18 // backreferences and generalized assertions are not available, nor is \Z. 18 // backreferences and generalized assertions are not available, nor is \Z.
19 // 19 //
20 // See http://code.google.com/p/re2/wiki/Syntax for the syntax 20 // See https://github.com/google/re2/wiki/Syntax for the syntax
21 // supported by RE2, and a comparison with PCRE and PERL regexps. 21 // supported by RE2, and a comparison with PCRE and PERL regexps.
22 // 22 //
23 // For those not familiar with Perl's regular expressions, 23 // For those not familiar with Perl's regular expressions,
24 // here are some examples of the most commonly used extensions: 24 // here are some examples of the most commonly used extensions:
25 // 25 //
26 // "hello (\\w+) world" -- \w matches a "word" character 26 // "hello (\\w+) world" -- \w matches a "word" character
27 // "version (\\d+)" -- \d matches a digit 27 // "version (\\d+)" -- \d matches a digit
28 // "hello\\s+world" -- \s matches any whitespace character 28 // "hello\\s+world" -- \s matches any whitespace character
29 // "\\b(\\w+)\\b" -- \b matches non-empty string at word boundary 29 // "\\b(\\w+)\\b" -- \b matches non-empty string at word boundary
30 // "(?i)hello" -- (?i) turns on case-insensitive matching 30 // "(?i)hello" -- (?i) turns on case-insensitive matching
(...skipping 141 matching lines...) Expand 10 before | Expand all | Expand 10 after
172 // Octal(), or CRadix() to interpret the text in another base. The 172 // Octal(), or CRadix() to interpret the text in another base. The
173 // CRadix operator interprets C-style "0" (base-8) and "0x" (base-16) 173 // CRadix operator interprets C-style "0" (base-8) and "0x" (base-16)
174 // prefixes, but defaults to base-10. 174 // prefixes, but defaults to base-10.
175 // 175 //
176 // Example: 176 // Example:
177 // int a, b, c, d; 177 // int a, b, c, d;
178 // CHECK(RE2::FullMatch("100 40 0100 0x40", "(.*) (.*) (.*) (.*)", 178 // CHECK(RE2::FullMatch("100 40 0100 0x40", "(.*) (.*) (.*) (.*)",
179 // RE2::Octal(&a), RE2::Hex(&b), RE2::CRadix(&c), RE2::CRadix(&d)); 179 // RE2::Octal(&a), RE2::Hex(&b), RE2::CRadix(&c), RE2::CRadix(&d));
180 // will leave 64 in a, b, c, and d. 180 // will leave 64 in a, b, c, and d.
181 181
182
183 #include <stdint.h> 182 #include <stdint.h>
184 #include <map> 183 #include <map>
185 #include <string> 184 #include <string>
186 #include "re2/stringpiece.h" 185 #include "re2/stringpiece.h"
187 #include "re2/variadic_function.h" 186 #include "re2/variadic_function.h"
188 187
188 #ifndef RE2_HAVE_LONGLONG
189 #define RE2_HAVE_LONGLONG 1
190 #endif
191
189 namespace re2 { 192 namespace re2 {
190 193
191 using std::string; 194 using std::string;
192 using std::map; 195 using std::map;
193 class Mutex; 196 class Mutex;
194 class Prog; 197 class Prog;
195 class Regexp; 198 class Regexp;
196 199
197 // The following enum should be used only as a constructor argument to indicate 200 // The following enum should be used only as a constructor argument to indicate
198 // that the variable has static storage class, and that the constructor should 201 // that the variable has static storage class, and that the constructor should
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
233 ErrorBadCharRange, // bad character class range 236 ErrorBadCharRange, // bad character class range
234 ErrorMissingBracket, // missing closing ] 237 ErrorMissingBracket, // missing closing ]
235 ErrorMissingParen, // missing closing ) 238 ErrorMissingParen, // missing closing )
236 ErrorTrailingBackslash, // trailing \ at end of regexp 239 ErrorTrailingBackslash, // trailing \ at end of regexp
237 ErrorRepeatArgument, // repeat argument missing, e.g. "*" 240 ErrorRepeatArgument, // repeat argument missing, e.g. "*"
238 ErrorRepeatSize, // bad repetition argument 241 ErrorRepeatSize, // bad repetition argument
239 ErrorRepeatOp, // bad repetition operator 242 ErrorRepeatOp, // bad repetition operator
240 ErrorBadPerlOp, // bad perl operator 243 ErrorBadPerlOp, // bad perl operator
241 ErrorBadUTF8, // invalid UTF-8 in regexp 244 ErrorBadUTF8, // invalid UTF-8 in regexp
242 ErrorBadNamedCapture, // bad named capture group 245 ErrorBadNamedCapture, // bad named capture group
243 ErrorPatternTooLarge, // pattern too large (compile failed) 246 ErrorPatternTooLarge // pattern too large (compile failed)
244 }; 247 };
245 248
246 // Predefined common options. 249 // Predefined common options.
247 // If you need more complicated things, instantiate 250 // If you need more complicated things, instantiate
248 // an Option class, possibly passing one of these to 251 // an Option class, possibly passing one of these to
249 // the Option constructor, change the settings, and pass that 252 // the Option constructor, change the settings, and pass that
250 // Option class to the RE2 constructor. 253 // Option class to the RE2 constructor.
251 enum CannedOptions { 254 enum CannedOptions {
252 DefaultOptions = 0, 255 DefaultOptions = 0,
253 Latin1, // treat input as Latin-1 (default UTF-8) 256 Latin1, // treat input as Latin-1 (default UTF-8)
(...skipping 29 matching lines...) Expand all
283 ErrorCode error_code() const { return error_code_; } 286 ErrorCode error_code() const { return error_code_; }
284 287
285 // If RE2 could not be created properly, returns the offending 288 // If RE2 could not be created properly, returns the offending
286 // portion of the regexp. 289 // portion of the regexp.
287 const string& error_arg() const { return error_arg_; } 290 const string& error_arg() const { return error_arg_; }
288 291
289 // Returns the program size, a very approximate measure of a regexp's "cost". 292 // Returns the program size, a very approximate measure of a regexp's "cost".
290 // Larger numbers are more expensive than smaller numbers. 293 // Larger numbers are more expensive than smaller numbers.
291 int ProgramSize() const; 294 int ProgramSize() const;
292 295
296 // EXPERIMENTAL! SUBJECT TO CHANGE!
297 // Outputs the program fanout as a histogram bucketed by powers of 2.
298 // Returns the number of the largest non-empty bucket.
299 int ProgramFanout(map<int, int>* histogram) const;
300
293 // Returns the underlying Regexp; not for general use. 301 // Returns the underlying Regexp; not for general use.
294 // Returns entire_regexp_ so that callers don't need 302 // Returns entire_regexp_ so that callers don't need
295 // to know about prefix_ and prefix_foldcase_. 303 // to know about prefix_ and prefix_foldcase_.
296 re2::Regexp* Regexp() const { return entire_regexp_; } 304 re2::Regexp* Regexp() const { return entire_regexp_; }
297 305
298 /***** The useful part: the matching interface *****/ 306 /***** The useful part: the matching interface *****/
299 307
300 // Matches "text" against "pattern". If pointer arguments are 308 // Matches "text" against "pattern". If pointer arguments are
301 // supplied, copies matched sub-patterns into them. 309 // supplied, copies matched sub-patterns into them.
302 // 310 //
(...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after
387 static int GlobalReplace(string *str, 395 static int GlobalReplace(string *str,
388 const RE2& pattern, 396 const RE2& pattern,
389 const StringPiece& rewrite); 397 const StringPiece& rewrite);
390 398
391 // Like Replace, except that if the pattern matches, "rewrite" 399 // Like Replace, except that if the pattern matches, "rewrite"
392 // is copied into "out" with substitutions. The non-matching 400 // is copied into "out" with substitutions. The non-matching
393 // portions of "text" are ignored. 401 // portions of "text" are ignored.
394 // 402 //
395 // Returns true iff a match occurred and the extraction happened 403 // Returns true iff a match occurred and the extraction happened
396 // successfully; if no match occurs, the string is left unaffected. 404 // successfully; if no match occurs, the string is left unaffected.
405 //
406 // REQUIRES: "text" must not alias any part of "*out".
397 static bool Extract(const StringPiece &text, 407 static bool Extract(const StringPiece &text,
398 const RE2& pattern, 408 const RE2& pattern,
399 const StringPiece &rewrite, 409 const StringPiece &rewrite,
400 string *out); 410 string *out);
401 411
402 // Escapes all potentially meaningful regexp characters in 412 // Escapes all potentially meaningful regexp characters in
403 // 'unquoted'. The returned string, used as a regular expression, 413 // 'unquoted'. The returned string, used as a regular expression,
404 // will exactly match the original string. For example, 414 // will exactly match the original string. For example,
405 // 1.5-2.0? 415 // 1.5-2.0?
406 // may become: 416 // may become:
(...skipping 15 matching lines...) Expand all
422 // 432 //
423 // Returns true on success, false on error. 433 // Returns true on success, false on error.
424 bool PossibleMatchRange(string* min, string* max, int maxlen) const; 434 bool PossibleMatchRange(string* min, string* max, int maxlen) const;
425 435
426 // Generic matching interface 436 // Generic matching interface
427 437
428 // Type of match. 438 // Type of match.
429 enum Anchor { 439 enum Anchor {
430 UNANCHORED, // No anchoring 440 UNANCHORED, // No anchoring
431 ANCHOR_START, // Anchor at start only 441 ANCHOR_START, // Anchor at start only
432 ANCHOR_BOTH, // Anchor at start and end 442 ANCHOR_BOTH // Anchor at start and end
433 }; 443 };
434 444
435 // Return the number of capturing subpatterns, or -1 if the 445 // Return the number of capturing subpatterns, or -1 if the
436 // regexp wasn't valid on construction. The overall match ($0) 446 // regexp wasn't valid on construction. The overall match ($0)
437 // does not count: if the regexp is "(a)(b)", returns 2. 447 // does not count: if the regexp is "(a)(b)", returns 2.
438 int NumberOfCapturingGroups() const; 448 int NumberOfCapturingGroups() const;
439 449
440
441 // Return a map from names to capturing indices. 450 // Return a map from names to capturing indices.
442 // The map records the index of the leftmost group 451 // The map records the index of the leftmost group
443 // with the given name. 452 // with the given name.
444 // Only valid until the re is deleted. 453 // Only valid until the re is deleted.
445 const map<string, int>& NamedCapturingGroups() const; 454 const map<string, int>& NamedCapturingGroups() const;
446 455
447 // Return a map from capturing indices to names. 456 // Return a map from capturing indices to names.
448 // The map has no entries for unnamed groups. 457 // The map has no entries for unnamed groups.
449 // Only valid until the re is deleted. 458 // Only valid until the re is deleted.
450 const map<int, string>& CapturingGroupNames() const; 459 const map<int, string>& CapturingGroupNames() const;
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after
505 public: 514 public:
506 // The options are (defaults in parentheses): 515 // The options are (defaults in parentheses):
507 // 516 //
508 // utf8 (true) text and pattern are UTF-8; otherwise Latin-1 517 // utf8 (true) text and pattern are UTF-8; otherwise Latin-1
509 // posix_syntax (false) restrict regexps to POSIX egrep syntax 518 // posix_syntax (false) restrict regexps to POSIX egrep syntax
510 // longest_match (false) search for longest match, not first match 519 // longest_match (false) search for longest match, not first match
511 // log_errors (true) log syntax and execution errors to ERROR 520 // log_errors (true) log syntax and execution errors to ERROR
512 // max_mem (see below) approx. max memory footprint of RE2 521 // max_mem (see below) approx. max memory footprint of RE2
513 // literal (false) interpret string as literal, not regexp 522 // literal (false) interpret string as literal, not regexp
514 // never_nl (false) never match \n, even if it is in regexp 523 // never_nl (false) never match \n, even if it is in regexp
524 // dot_nl (false) dot matches everything including new line
515 // never_capture (false) parse all parens as non-capturing 525 // never_capture (false) parse all parens as non-capturing
516 // case_sensitive (true) match is case-sensitive (regexp can override 526 // case_sensitive (true) match is case-sensitive (regexp can override
517 // with (?i) unless in posix_syntax mode) 527 // with (?i) unless in posix_syntax mode)
518 // 528 //
519 // The following options are only consulted when posix_syntax == true. 529 // The following options are only consulted when posix_syntax == true.
520 // (When posix_syntax == false these features are always enabled and 530 // (When posix_syntax == false these features are always enabled and
521 // cannot be turned off.) 531 // cannot be turned off.)
522 // perl_classes (false) allow Perl's \d \s \w \D \S \W 532 // perl_classes (false) allow Perl's \d \s \w \D \S \W
523 // word_boundary (false) allow Perl's \b \B (word boundary and not) 533 // word_boundary (false) allow Perl's \b \B (word boundary and not)
524 // one_line (false) ^ and $ only match beginning and end of text 534 // one_line (false) ^ and $ only match beginning and end of text
(...skipping 20 matching lines...) Expand all
545 // The RE2 memory budget is statically divided between the two 555 // The RE2 memory budget is statically divided between the two
546 // Progs and then the DFAs: two thirds to the forward Prog 556 // Progs and then the DFAs: two thirds to the forward Prog
547 // and one third to the reverse Prog. The forward Prog gives half 557 // and one third to the reverse Prog. The forward Prog gives half
548 // of what it has left over to each of its DFAs. The reverse Prog 558 // of what it has left over to each of its DFAs. The reverse Prog
549 // gives it all to its longest-match DFA. 559 // gives it all to its longest-match DFA.
550 // 560 //
551 // Once a DFA fills its budget, it flushes its cache and starts over. 561 // Once a DFA fills its budget, it flushes its cache and starts over.
552 // If this happens too often, RE2 falls back on the NFA implementation. 562 // If this happens too often, RE2 falls back on the NFA implementation.
553 563
554 // For now, make the default budget something close to Code Search. 564 // For now, make the default budget something close to Code Search.
555 #ifndef WIN32
556 static const int kDefaultMaxMem = 8<<20; 565 static const int kDefaultMaxMem = 8<<20;
557 #endif
558 566
559 enum Encoding { 567 enum Encoding {
560 EncodingUTF8 = 1, 568 EncodingUTF8 = 1,
561 EncodingLatin1 569 EncodingLatin1
562 }; 570 };
563 571
564 Options(); 572 Options() :
573 encoding_(EncodingUTF8),
574 posix_syntax_(false),
575 longest_match_(false),
576 log_errors_(true),
577 max_mem_(kDefaultMaxMem),
578 literal_(false),
579 never_nl_(false),
580 dot_nl_(false),
581 never_capture_(false),
582 case_sensitive_(true),
583 perl_classes_(false),
584 word_boundary_(false),
585 one_line_(false) {
586 }
587
565 /*implicit*/ Options(CannedOptions); 588 /*implicit*/ Options(CannedOptions);
566 589
567 Encoding encoding() const { return encoding_; } 590 Encoding encoding() const { return encoding_; }
568 void set_encoding(Encoding encoding) { encoding_ = encoding; } 591 void set_encoding(Encoding encoding) { encoding_ = encoding; }
569 592
570 // Legacy interface to encoding. 593 // Legacy interface to encoding.
571 // TODO(rsc): Remove once clients have been converted. 594 // TODO(rsc): Remove once clients have been converted.
572 bool utf8() const { return encoding_ == EncodingUTF8; } 595 bool utf8() const { return encoding_ == EncodingUTF8; }
573 void set_utf8(bool b) { 596 void set_utf8(bool b) {
574 if (b) { 597 if (b) {
575 encoding_ = EncodingUTF8; 598 encoding_ = EncodingUTF8;
576 } else { 599 } else {
577 encoding_ = EncodingLatin1; 600 encoding_ = EncodingLatin1;
578 } 601 }
579 } 602 }
580 603
581 bool posix_syntax() const { return posix_syntax_; } 604 bool posix_syntax() const { return posix_syntax_; }
582 void set_posix_syntax(bool b) { posix_syntax_ = b; } 605 void set_posix_syntax(bool b) { posix_syntax_ = b; }
583 606
584 bool longest_match() const { return longest_match_; } 607 bool longest_match() const { return longest_match_; }
585 void set_longest_match(bool b) { longest_match_ = b; } 608 void set_longest_match(bool b) { longest_match_ = b; }
586 609
587 bool log_errors() const { return log_errors_; } 610 bool log_errors() const { return log_errors_; }
588 void set_log_errors(bool b) { log_errors_ = b; } 611 void set_log_errors(bool b) { log_errors_ = b; }
589 612
590 int max_mem() const { return max_mem_; } 613 int64_t max_mem() const { return max_mem_; }
591 void set_max_mem(int m) { max_mem_ = m; } 614 void set_max_mem(int64_t m) { max_mem_ = m; }
592 615
593 bool literal() const { return literal_; } 616 bool literal() const { return literal_; }
594 void set_literal(bool b) { literal_ = b; } 617 void set_literal(bool b) { literal_ = b; }
595 618
596 bool never_nl() const { return never_nl_; } 619 bool never_nl() const { return never_nl_; }
597 void set_never_nl(bool b) { never_nl_ = b; } 620 void set_never_nl(bool b) { never_nl_ = b; }
598 621
622 bool dot_nl() const { return dot_nl_; }
623 void set_dot_nl(bool b) { dot_nl_ = b; }
624
599 bool never_capture() const { return never_capture_; } 625 bool never_capture() const { return never_capture_; }
600 void set_never_capture(bool b) { never_capture_ = b; } 626 void set_never_capture(bool b) { never_capture_ = b; }
601 627
602 bool case_sensitive() const { return case_sensitive_; } 628 bool case_sensitive() const { return case_sensitive_; }
603 void set_case_sensitive(bool b) { case_sensitive_ = b; } 629 void set_case_sensitive(bool b) { case_sensitive_ = b; }
604 630
605 bool perl_classes() const { return perl_classes_; } 631 bool perl_classes() const { return perl_classes_; }
606 void set_perl_classes(bool b) { perl_classes_ = b; } 632 void set_perl_classes(bool b) { perl_classes_ = b; }
607 633
608 bool word_boundary() const { return word_boundary_; } 634 bool word_boundary() const { return word_boundary_; }
609 void set_word_boundary(bool b) { word_boundary_ = b; } 635 void set_word_boundary(bool b) { word_boundary_ = b; }
610 636
611 bool one_line() const { return one_line_; } 637 bool one_line() const { return one_line_; }
612 void set_one_line(bool b) { one_line_ = b; } 638 void set_one_line(bool b) { one_line_ = b; }
613 639
614 void Copy(const Options& src) { 640 void Copy(const Options& src) {
615 encoding_ = src.encoding_; 641 encoding_ = src.encoding_;
616 posix_syntax_ = src.posix_syntax_; 642 posix_syntax_ = src.posix_syntax_;
617 longest_match_ = src.longest_match_; 643 longest_match_ = src.longest_match_;
618 log_errors_ = src.log_errors_; 644 log_errors_ = src.log_errors_;
619 max_mem_ = src.max_mem_; 645 max_mem_ = src.max_mem_;
620 literal_ = src.literal_; 646 literal_ = src.literal_;
621 never_nl_ = src.never_nl_; 647 never_nl_ = src.never_nl_;
648 dot_nl_ = src.dot_nl_;
622 never_capture_ = src.never_capture_; 649 never_capture_ = src.never_capture_;
623 case_sensitive_ = src.case_sensitive_; 650 case_sensitive_ = src.case_sensitive_;
624 perl_classes_ = src.perl_classes_; 651 perl_classes_ = src.perl_classes_;
625 word_boundary_ = src.word_boundary_; 652 word_boundary_ = src.word_boundary_;
626 one_line_ = src.one_line_; 653 one_line_ = src.one_line_;
627 } 654 }
628 655
629 int ParseFlags() const; 656 int ParseFlags() const;
630 657
631 private: 658 private:
632 Encoding encoding_; 659 Encoding encoding_;
633 bool posix_syntax_; 660 bool posix_syntax_;
634 bool longest_match_; 661 bool longest_match_;
635 bool log_errors_; 662 bool log_errors_;
636 int64_t max_mem_; 663 int64_t max_mem_;
637 bool literal_; 664 bool literal_;
638 bool never_nl_; 665 bool never_nl_;
666 bool dot_nl_;
639 bool never_capture_; 667 bool never_capture_;
640 bool case_sensitive_; 668 bool case_sensitive_;
641 bool perl_classes_; 669 bool perl_classes_;
642 bool word_boundary_; 670 bool word_boundary_;
643 bool one_line_; 671 bool one_line_;
644 672
645 //DISALLOW_EVIL_CONSTRUCTORS(Options); 673 //DISALLOW_COPY_AND_ASSIGN(Options);
646 Options(const Options&); 674 Options(const Options&);
647 void operator=(const Options&); 675 void operator=(const Options&);
648 }; 676 };
649 677
650 // Returns the options set in the constructor. 678 // Returns the options set in the constructor.
651 const Options& options() const { return options_; }; 679 const Options& options() const { return options_; };
652 680
653 // Argument converters; see below. 681 // Argument converters; see below.
654 static inline Arg CRadix(short* x); 682 static inline Arg CRadix(short* x);
655 static inline Arg CRadix(unsigned short* x); 683 static inline Arg CRadix(unsigned short* x);
656 static inline Arg CRadix(int* x); 684 static inline Arg CRadix(int* x);
657 static inline Arg CRadix(unsigned int* x); 685 static inline Arg CRadix(unsigned int* x);
658 static inline Arg CRadix(long* x); 686 static inline Arg CRadix(long* x);
659 static inline Arg CRadix(unsigned long* x); 687 static inline Arg CRadix(unsigned long* x);
688 #if RE2_HAVE_LONGLONG
660 static inline Arg CRadix(long long* x); 689 static inline Arg CRadix(long long* x);
661 static inline Arg CRadix(unsigned long long* x); 690 static inline Arg CRadix(unsigned long long* x);
691 #endif
662 692
663 static inline Arg Hex(short* x); 693 static inline Arg Hex(short* x);
664 static inline Arg Hex(unsigned short* x); 694 static inline Arg Hex(unsigned short* x);
665 static inline Arg Hex(int* x); 695 static inline Arg Hex(int* x);
666 static inline Arg Hex(unsigned int* x); 696 static inline Arg Hex(unsigned int* x);
667 static inline Arg Hex(long* x); 697 static inline Arg Hex(long* x);
668 static inline Arg Hex(unsigned long* x); 698 static inline Arg Hex(unsigned long* x);
699 #if RE2_HAVE_LONGLONG
669 static inline Arg Hex(long long* x); 700 static inline Arg Hex(long long* x);
670 static inline Arg Hex(unsigned long long* x); 701 static inline Arg Hex(unsigned long long* x);
702 #endif
671 703
672 static inline Arg Octal(short* x); 704 static inline Arg Octal(short* x);
673 static inline Arg Octal(unsigned short* x); 705 static inline Arg Octal(unsigned short* x);
674 static inline Arg Octal(int* x); 706 static inline Arg Octal(int* x);
675 static inline Arg Octal(unsigned int* x); 707 static inline Arg Octal(unsigned int* x);
676 static inline Arg Octal(long* x); 708 static inline Arg Octal(long* x);
677 static inline Arg Octal(unsigned long* x); 709 static inline Arg Octal(unsigned long* x);
710 #if RE2_HAVE_LONGLONG
678 static inline Arg Octal(long long* x); 711 static inline Arg Octal(long long* x);
679 static inline Arg Octal(unsigned long long* x); 712 static inline Arg Octal(unsigned long long* x);
713 #endif
680 714
681 private: 715 private:
682 void Init(const StringPiece& pattern, const Options& options); 716 void Init(const StringPiece& pattern, const Options& options);
683 717
684 bool DoMatch(const StringPiece& text, 718 bool DoMatch(const StringPiece& text,
685 Anchor anchor, 719 Anchor anchor,
686 int* consumed, 720 int* consumed,
687 const Arg* const args[], 721 const Arg* const args[],
688 int n) const; 722 int n) const;
689 723
(...skipping 14 matching lines...) Expand all
704 mutable ErrorCode error_code_; // Error code 738 mutable ErrorCode error_code_; // Error code
705 mutable string error_arg_; // Fragment of regexp showing error 739 mutable string error_arg_; // Fragment of regexp showing error
706 mutable int num_captures_; // Number of capturing groups 740 mutable int num_captures_; // Number of capturing groups
707 741
708 // Map from capture names to indices 742 // Map from capture names to indices
709 mutable const map<string, int>* named_groups_; 743 mutable const map<string, int>* named_groups_;
710 744
711 // Map from capture indices to names 745 // Map from capture indices to names
712 mutable const map<int, string>* group_names_; 746 mutable const map<int, string>* group_names_;
713 747
714 //DISALLOW_EVIL_CONSTRUCTORS(RE2); 748 //DISALLOW_COPY_AND_ASSIGN(RE2);
715 RE2(const RE2&); 749 RE2(const RE2&);
716 void operator=(const RE2&); 750 void operator=(const RE2&);
717 }; 751 };
718 752
719 /***** Implementation details *****/ 753 /***** Implementation details *****/
720 754
721 // Hex/Octal/Binary? 755 // Hex/Octal/Binary?
722 756
723 // Special class for parsing into objects that define a ParseFrom() method 757 // Special class for parsing into objects that define a ParseFrom() method
724 template <class T> 758 template <class T>
(...skipping 24 matching lines...) Expand all
749 783
750 MAKE_PARSER(char, parse_char); 784 MAKE_PARSER(char, parse_char);
751 MAKE_PARSER(signed char, parse_char); 785 MAKE_PARSER(signed char, parse_char);
752 MAKE_PARSER(unsigned char, parse_uchar); 786 MAKE_PARSER(unsigned char, parse_uchar);
753 MAKE_PARSER(short, parse_short); 787 MAKE_PARSER(short, parse_short);
754 MAKE_PARSER(unsigned short, parse_ushort); 788 MAKE_PARSER(unsigned short, parse_ushort);
755 MAKE_PARSER(int, parse_int); 789 MAKE_PARSER(int, parse_int);
756 MAKE_PARSER(unsigned int, parse_uint); 790 MAKE_PARSER(unsigned int, parse_uint);
757 MAKE_PARSER(long, parse_long); 791 MAKE_PARSER(long, parse_long);
758 MAKE_PARSER(unsigned long, parse_ulong); 792 MAKE_PARSER(unsigned long, parse_ulong);
793 #if RE2_HAVE_LONGLONG
759 MAKE_PARSER(long long, parse_longlong); 794 MAKE_PARSER(long long, parse_longlong);
760 MAKE_PARSER(unsigned long long, parse_ulonglong); 795 MAKE_PARSER(unsigned long long, parse_ulonglong);
796 #endif
761 MAKE_PARSER(float, parse_float); 797 MAKE_PARSER(float, parse_float);
762 MAKE_PARSER(double, parse_double); 798 MAKE_PARSER(double, parse_double);
763 MAKE_PARSER(string, parse_string); 799 MAKE_PARSER(string, parse_string);
764 MAKE_PARSER(StringPiece, parse_stringpiece); 800 MAKE_PARSER(StringPiece, parse_stringpiece);
765 801
766 #undef MAKE_PARSER 802 #undef MAKE_PARSER
767 803
768 // Generic constructor 804 // Generic constructor templates
769 template <class T> Arg(T*, Parser parser);
770 // Generic constructor template
771 template <class T> Arg(T* p) 805 template <class T> Arg(T* p)
772 : arg_(p), parser_(_RE2_MatchObject<T>::Parse) { 806 : arg_(p), parser_(_RE2_MatchObject<T>::Parse) { }
773 } 807 template <class T> Arg(T* p, Parser parser)
808 : arg_(p), parser_(parser) { }
774 809
775 // Parse the data 810 // Parse the data
776 bool Parse(const char* str, int n) const; 811 bool Parse(const char* str, int n) const;
777 812
778 private: 813 private:
779 void* arg_; 814 void* arg_;
780 Parser parser_; 815 Parser parser_;
781 816
782 static bool parse_null (const char* str, int n, void* dest); 817 static bool parse_null (const char* str, int n, void* dest);
783 static bool parse_char (const char* str, int n, void* dest); 818 static bool parse_char (const char* str, int n, void* dest);
(...skipping 12 matching lines...) Expand all
796 static bool parse_ ## name ## _hex(const char* str, int n, void* dest); \ 831 static bool parse_ ## name ## _hex(const char* str, int n, void* dest); \
797 static bool parse_ ## name ## _octal(const char* str, int n, void* dest); \ 832 static bool parse_ ## name ## _octal(const char* str, int n, void* dest); \
798 static bool parse_ ## name ## _cradix(const char* str, int n, void* dest) 833 static bool parse_ ## name ## _cradix(const char* str, int n, void* dest)
799 834
800 DECLARE_INTEGER_PARSER(short); 835 DECLARE_INTEGER_PARSER(short);
801 DECLARE_INTEGER_PARSER(ushort); 836 DECLARE_INTEGER_PARSER(ushort);
802 DECLARE_INTEGER_PARSER(int); 837 DECLARE_INTEGER_PARSER(int);
803 DECLARE_INTEGER_PARSER(uint); 838 DECLARE_INTEGER_PARSER(uint);
804 DECLARE_INTEGER_PARSER(long); 839 DECLARE_INTEGER_PARSER(long);
805 DECLARE_INTEGER_PARSER(ulong); 840 DECLARE_INTEGER_PARSER(ulong);
841 #if RE2_HAVE_LONGLONG
806 DECLARE_INTEGER_PARSER(longlong); 842 DECLARE_INTEGER_PARSER(longlong);
807 DECLARE_INTEGER_PARSER(ulonglong); 843 DECLARE_INTEGER_PARSER(ulonglong);
844 #endif
808 845
809 #undef DECLARE_INTEGER_PARSER 846 #undef DECLARE_INTEGER_PARSER
810 }; 847 };
811 848
812 inline RE2::Arg::Arg() : arg_(NULL), parser_(parse_null) { } 849 inline RE2::Arg::Arg() : arg_(NULL), parser_(parse_null) { }
813 inline RE2::Arg::Arg(void* p) : arg_(p), parser_(parse_null) { } 850 inline RE2::Arg::Arg(void* p) : arg_(p), parser_(parse_null) { }
814 851
815 inline bool RE2::Arg::Parse(const char* str, int n) const { 852 inline bool RE2::Arg::Parse(const char* str, int n) const {
816 return (*parser_)(str, n, arg_); 853 return (*parser_)(str, n, arg_);
817 } 854 }
818 855
819 // This part of the parser, appropriate only for ints, deals with bases 856 // This part of the parser, appropriate only for ints, deals with bases
820 #define MAKE_INTEGER_PARSER(type, name) \ 857 #define MAKE_INTEGER_PARSER(type, name) \
821 inline RE2::Arg RE2::Hex(type* ptr) { \ 858 inline RE2::Arg RE2::Hex(type* ptr) { \
822 return RE2::Arg(ptr, RE2::Arg::parse_ ## name ## _hex); } \ 859 return RE2::Arg(ptr, RE2::Arg::parse_ ## name ## _hex); } \
823 inline RE2::Arg RE2::Octal(type* ptr) { \ 860 inline RE2::Arg RE2::Octal(type* ptr) { \
824 return RE2::Arg(ptr, RE2::Arg::parse_ ## name ## _octal); } \ 861 return RE2::Arg(ptr, RE2::Arg::parse_ ## name ## _octal); } \
825 inline RE2::Arg RE2::CRadix(type* ptr) { \ 862 inline RE2::Arg RE2::CRadix(type* ptr) { \
826 return RE2::Arg(ptr, RE2::Arg::parse_ ## name ## _cradix); } 863 return RE2::Arg(ptr, RE2::Arg::parse_ ## name ## _cradix); }
827 864
828 MAKE_INTEGER_PARSER(short, short); 865 MAKE_INTEGER_PARSER(short, short)
829 MAKE_INTEGER_PARSER(unsigned short, ushort); 866 MAKE_INTEGER_PARSER(unsigned short, ushort)
830 MAKE_INTEGER_PARSER(int, int); 867 MAKE_INTEGER_PARSER(int, int)
831 MAKE_INTEGER_PARSER(unsigned int, uint); 868 MAKE_INTEGER_PARSER(unsigned int, uint)
832 MAKE_INTEGER_PARSER(long, long); 869 MAKE_INTEGER_PARSER(long, long)
833 MAKE_INTEGER_PARSER(unsigned long, ulong); 870 MAKE_INTEGER_PARSER(unsigned long, ulong)
834 MAKE_INTEGER_PARSER(long long, longlong); 871 #if RE2_HAVE_LONGLONG
835 MAKE_INTEGER_PARSER(unsigned long long, ulonglong); 872 MAKE_INTEGER_PARSER(long long, longlong)
873 MAKE_INTEGER_PARSER(unsigned long long, ulonglong)
874 #endif
836 875
837 #undef MAKE_INTEGER_PARSER 876 #undef MAKE_INTEGER_PARSER
838 877
839 } // namespace re2 878 } // namespace re2
840 879
841 using re2::RE2; 880 using re2::RE2;
842 881
843 #endif /* RE2_RE2_H */ 882 #endif /* RE2_RE2_H */
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698