Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(38)

Side by Side Diff: third_party/re2/re2/re2.cc

Issue 1516543002: Update re2 (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Update instructions Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2003-2009 The RE2 Authors. All Rights Reserved. 1 // Copyright 2003-2009 The RE2 Authors. All Rights Reserved.
2 // Use of this source code is governed by a BSD-style 2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file. 3 // license that can be found in the LICENSE file.
4 4
5 // Regular expression interface RE2. 5 // Regular expression interface RE2.
6 // 6 //
7 // Originally the PCRE C++ wrapper, but adapted to use 7 // Originally the PCRE C++ wrapper, but adapted to use
8 // the new automata-based regular expression engines. 8 // the new automata-based regular expression engines.
9 9
10 #include "re2/re2.h" 10 #include "re2/re2.h"
11 11
12 #include <stdio.h> 12 #include <stdio.h>
13 #include <string> 13 #include <string>
14 #ifdef WIN32
15 #define strtoll _strtoi64
16 #define strtoull _strtoui64
17 #define strtof strtod
18 #else
19 #include <pthread.h>
20 #endif
21 #include <errno.h> 14 #include <errno.h>
22 #include "util/util.h" 15 #include "util/util.h"
23 #include "util/flags.h" 16 #include "util/flags.h"
17 #include "util/sparse_array.h"
24 #include "re2/prog.h" 18 #include "re2/prog.h"
25 #include "re2/regexp.h" 19 #include "re2/regexp.h"
26 20
27 DEFINE_bool(trace_re2, false, "trace RE2 execution"); 21 DEFINE_bool(trace_re2, false, "trace RE2 execution");
28 22
29 namespace re2 { 23 namespace re2 {
30 24
31 // Maximum number of args we can set 25 // Maximum number of args we can set
32 static const int kMaxArgs = 16; 26 static const int kMaxArgs = 16;
33 static const int kVecSize = 1+kMaxArgs; 27 static const int kVecSize = 1+kMaxArgs;
34 28
35 const VariadicFunction2<bool, const StringPiece&, const RE2&, RE2::Arg, RE2::Ful lMatchN> RE2::FullMatch = {}; 29 const VariadicFunction2<bool, const StringPiece&, const RE2&, RE2::Arg, RE2::Ful lMatchN> RE2::FullMatch = {};
36 const VariadicFunction2<bool, const StringPiece&, const RE2&, RE2::Arg, RE2::Par tialMatchN> RE2::PartialMatch = {}; 30 const VariadicFunction2<bool, const StringPiece&, const RE2&, RE2::Arg, RE2::Par tialMatchN> RE2::PartialMatch = {};
37 const VariadicFunction2<bool, StringPiece*, const RE2&, RE2::Arg, RE2::ConsumeN> RE2::Consume = {}; 31 const VariadicFunction2<bool, StringPiece*, const RE2&, RE2::Arg, RE2::ConsumeN> RE2::Consume = {};
38 const VariadicFunction2<bool, StringPiece*, const RE2&, RE2::Arg, RE2::FindAndCo nsumeN> RE2::FindAndConsume = {}; 32 const VariadicFunction2<bool, StringPiece*, const RE2&, RE2::Arg, RE2::FindAndCo nsumeN> RE2::FindAndConsume = {};
39 33
40 #define kDefaultMaxMem (8<<20) 34 // This will trigger LNK2005 error in MSVC.
41 35 #ifndef _MSC_VER
42 RE2::Options::Options() 36 const int RE2::Options::kDefaultMaxMem; // initialized in re2.h
43 : encoding_(EncodingUTF8), 37 #endif
44 posix_syntax_(false),
45 longest_match_(false),
46 log_errors_(true),
47 max_mem_(kDefaultMaxMem),
48 literal_(false),
49 never_nl_(false),
50 never_capture_(false),
51 case_sensitive_(true),
52 perl_classes_(false),
53 word_boundary_(false),
54 one_line_(false) {
55 }
56 38
57 RE2::Options::Options(RE2::CannedOptions opt) 39 RE2::Options::Options(RE2::CannedOptions opt)
58 : encoding_(opt == RE2::Latin1 ? EncodingLatin1 : EncodingUTF8), 40 : encoding_(opt == RE2::Latin1 ? EncodingLatin1 : EncodingUTF8),
59 posix_syntax_(opt == RE2::POSIX), 41 posix_syntax_(opt == RE2::POSIX),
60 longest_match_(opt == RE2::POSIX), 42 longest_match_(opt == RE2::POSIX),
61 log_errors_(opt != RE2::Quiet), 43 log_errors_(opt != RE2::Quiet),
62 max_mem_(kDefaultMaxMem), 44 max_mem_(kDefaultMaxMem),
63 literal_(false), 45 literal_(false),
64 never_nl_(false), 46 never_nl_(false),
47 dot_nl_(false),
65 never_capture_(false), 48 never_capture_(false),
66 case_sensitive_(true), 49 case_sensitive_(true),
67 perl_classes_(false), 50 perl_classes_(false),
68 word_boundary_(false), 51 word_boundary_(false),
69 one_line_(false) { 52 one_line_(false) {
70 } 53 }
71 54
72 // static empty things for use as const references. 55 // static empty things for use as const references.
73 // To avoid global constructors, initialized on demand. 56 // To avoid global constructors, initialized on demand.
74 GLOBAL_MUTEX(empty_mutex); 57 GLOBAL_MUTEX(empty_mutex);
(...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after
162 145
163 if (!posix_syntax()) 146 if (!posix_syntax())
164 flags |= Regexp::LikePerl; 147 flags |= Regexp::LikePerl;
165 148
166 if (literal()) 149 if (literal())
167 flags |= Regexp::Literal; 150 flags |= Regexp::Literal;
168 151
169 if (never_nl()) 152 if (never_nl())
170 flags |= Regexp::NeverNL; 153 flags |= Regexp::NeverNL;
171 154
155 if (dot_nl())
156 flags |= Regexp::DotNL;
157
172 if (never_capture()) 158 if (never_capture())
173 flags |= Regexp::NeverCapture; 159 flags |= Regexp::NeverCapture;
174 160
175 if (!case_sensitive()) 161 if (!case_sensitive())
176 flags |= Regexp::FoldCase; 162 flags |= Regexp::FoldCase;
177 163
178 if (perl_classes()) 164 if (perl_classes())
179 flags |= Regexp::PerlClasses; 165 flags |= Regexp::PerlClasses;
180 166
181 if (word_boundary()) 167 if (word_boundary())
(...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after
278 if (group_names_ != NULL && group_names_ != empty_group_names) 264 if (group_names_ != NULL && group_names_ != empty_group_names)
279 delete group_names_; 265 delete group_names_;
280 } 266 }
281 267
282 int RE2::ProgramSize() const { 268 int RE2::ProgramSize() const {
283 if (prog_ == NULL) 269 if (prog_ == NULL)
284 return -1; 270 return -1;
285 return prog_->size(); 271 return prog_->size();
286 } 272 }
287 273
274 int RE2::ProgramFanout(map<int, int>* histogram) const {
275 if (prog_ == NULL)
276 return -1;
277 SparseArray<int> fanout(prog_->size());
278 prog_->Fanout(&fanout);
279 histogram->clear();
280 for (SparseArray<int>::iterator i = fanout.begin(); i != fanout.end(); ++i) {
281 // TODO(junyer): Optimise this?
282 int bucket = 0;
283 while (1 << bucket < i->second) {
284 bucket++;
285 }
286 (*histogram)[bucket]++;
287 }
288 return histogram->rbegin()->first;
289 }
290
291 // Returns num_captures_, computing it if needed, or -1 if the
292 // regexp wasn't valid on construction.
293 int RE2::NumberOfCapturingGroups() const {
294 MutexLock l(mutex_);
295 if (suffix_regexp_ == NULL)
296 return -1;
297 if (num_captures_ == -1)
298 num_captures_ = suffix_regexp_->NumCaptures();
299 return num_captures_;
300 }
301
288 // Returns named_groups_, computing it if needed. 302 // Returns named_groups_, computing it if needed.
289 const map<string, int>& RE2::NamedCapturingGroups() const { 303 const map<string, int>& RE2::NamedCapturingGroups() const {
290 MutexLock l(mutex_); 304 MutexLock l(mutex_);
291 if (!ok()) 305 if (!ok())
292 return *empty_named_groups; 306 return *empty_named_groups;
293 if (named_groups_ == NULL) { 307 if (named_groups_ == NULL) {
294 named_groups_ = suffix_regexp_->NamedCaptures(); 308 named_groups_ = suffix_regexp_->NamedCaptures();
295 if (named_groups_ == NULL) 309 if (named_groups_ == NULL)
296 named_groups_ = empty_named_groups; 310 named_groups_ = empty_named_groups;
297 } 311 }
298 return *named_groups_; 312 return *named_groups_;
299 } 313 }
300 314
301 // Returns group_names_, computing it if needed. 315 // Returns group_names_, computing it if needed.
302 const map<int, string>& RE2::CapturingGroupNames() const { 316 const map<int, string>& RE2::CapturingGroupNames() const {
303 MutexLock l(mutex_); 317 MutexLock l(mutex_);
304 if (!ok()) 318 if (!ok())
305 return *empty_group_names; 319 return *empty_group_names;
306 if (group_names_ == NULL) { 320 if (group_names_ == NULL) {
307 group_names_ = suffix_regexp_->CaptureNames(); 321 group_names_ = suffix_regexp_->CaptureNames();
308 if (group_names_ == NULL) 322 if (group_names_ == NULL)
309 group_names_ = empty_group_names; 323 group_names_ = empty_group_names;
310 } 324 }
311 return *group_names_; 325 return *group_names_;
312 } 326 }
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after
364 return max; 378 return max;
365 } 379 }
366 380
367 bool RE2::Replace(string *str, 381 bool RE2::Replace(string *str,
368 const RE2& re, 382 const RE2& re,
369 const StringPiece& rewrite) { 383 const StringPiece& rewrite) {
370 StringPiece vec[kVecSize]; 384 StringPiece vec[kVecSize];
371 int nvec = 1 + MaxSubmatch(rewrite); 385 int nvec = 1 + MaxSubmatch(rewrite);
372 if (nvec > arraysize(vec)) 386 if (nvec > arraysize(vec))
373 return false; 387 return false;
374 if (!re.Match(*str, 0, str->size(), UNANCHORED, vec, nvec)) 388 if (!re.Match(*str, 0, static_cast<int>(str->size()), UNANCHORED, vec, nvec))
375 return false; 389 return false;
376 390
377 string s; 391 string s;
378 if (!re.Rewrite(&s, rewrite, vec, nvec)) 392 if (!re.Rewrite(&s, rewrite, vec, nvec))
379 return false; 393 return false;
380 394
381 assert(vec[0].begin() >= str->data()); 395 assert(vec[0].begin() >= str->data());
382 assert(vec[0].end() <= str->data()+str->size()); 396 assert(vec[0].end() <= str->data()+str->size());
383 str->replace(vec[0].data() - str->data(), vec[0].size(), s); 397 str->replace(vec[0].data() - str->data(), vec[0].size(), s);
384 return true; 398 return true;
385 } 399 }
386 400
387 int RE2::GlobalReplace(string *str, 401 int RE2::GlobalReplace(string *str,
388 const RE2& re, 402 const RE2& re,
389 const StringPiece& rewrite) { 403 const StringPiece& rewrite) {
390 StringPiece vec[kVecSize]; 404 StringPiece vec[kVecSize];
391 int nvec = 1 + MaxSubmatch(rewrite); 405 int nvec = 1 + MaxSubmatch(rewrite);
392 if (nvec > arraysize(vec)) 406 if (nvec > arraysize(vec))
393 return false; 407 return false;
394 408
395 const char* p = str->data(); 409 const char* p = str->data();
396 const char* ep = p + str->size(); 410 const char* ep = p + str->size();
397 const char* lastend = NULL; 411 const char* lastend = NULL;
398 string out; 412 string out;
399 int count = 0; 413 int count = 0;
400 while (p <= ep) { 414 while (p <= ep) {
401 if (!re.Match(*str, p - str->data(), str->size(), UNANCHORED, vec, nvec)) 415 if (!re.Match(*str, static_cast<int>(p - str->data()),
416 static_cast<int>(str->size()), UNANCHORED, vec, nvec))
402 break; 417 break;
403 if (p < vec[0].begin()) 418 if (p < vec[0].begin())
404 out.append(p, vec[0].begin() - p); 419 out.append(p, vec[0].begin() - p);
405 if (vec[0].begin() == lastend && vec[0].size() == 0) { 420 if (vec[0].begin() == lastend && vec[0].size() == 0) {
406 // Disallow empty match at end of last match: skip ahead. 421 // Disallow empty match at end of last match: skip ahead.
407 if (p < ep) 422 if (p < ep)
408 out.append(p, 1); 423 out.append(p, 1);
409 p++; 424 p++;
410 continue; 425 continue;
411 } 426 }
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after
475 result += unquoted[ii]; 490 result += unquoted[ii];
476 } 491 }
477 492
478 return result; 493 return result;
479 } 494 }
480 495
481 bool RE2::PossibleMatchRange(string* min, string* max, int maxlen) const { 496 bool RE2::PossibleMatchRange(string* min, string* max, int maxlen) const {
482 if (prog_ == NULL) 497 if (prog_ == NULL)
483 return false; 498 return false;
484 499
485 int n = prefix_.size(); 500 int n = static_cast<int>(prefix_.size());
486 if (n > maxlen) 501 if (n > maxlen)
487 n = maxlen; 502 n = maxlen;
488 503
489 // Determine initial min max from prefix_ literal. 504 // Determine initial min max from prefix_ literal.
490 string pmin, pmax; 505 string pmin, pmax;
491 pmin = prefix_.substr(0, n); 506 pmin = prefix_.substr(0, n);
492 pmax = prefix_.substr(0, n); 507 pmax = prefix_.substr(0, n);
493 if (prefix_foldcase_) { 508 if (prefix_foldcase_) {
494 // prefix is ASCII lowercase; change pmin to uppercase. 509 // prefix is ASCII lowercase; change pmin to uppercase.
495 for (int i = 0; i < n; i++) { 510 for (int i = 0; i < n; i++) {
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after
547 StringPiece* submatch, 562 StringPiece* submatch,
548 int nsubmatch) const { 563 int nsubmatch) const {
549 if (!ok() || suffix_regexp_ == NULL) { 564 if (!ok() || suffix_regexp_ == NULL) {
550 if (options_.log_errors()) 565 if (options_.log_errors())
551 LOG(ERROR) << "Invalid RE2: " << *error_; 566 LOG(ERROR) << "Invalid RE2: " << *error_;
552 return false; 567 return false;
553 } 568 }
554 569
555 if (startpos < 0 || startpos > endpos || endpos > text.size()) { 570 if (startpos < 0 || startpos > endpos || endpos > text.size()) {
556 if (options_.log_errors()) 571 if (options_.log_errors())
557 LOG(ERROR) << "RE2: invalid startpos, endpos pair."; 572 LOG(ERROR) << "RE2: invalid startpos, endpos pair. ["
573 << "startpos: " << startpos << ", "
574 << "endpos: " << endpos << ", "
575 << "text size: " << text.size() << "]";
558 return false; 576 return false;
559 } 577 }
560 578
561 StringPiece subtext = text; 579 StringPiece subtext = text;
562 subtext.remove_prefix(startpos); 580 subtext.remove_prefix(startpos);
563 subtext.remove_suffix(text.size() - endpos); 581 subtext.remove_suffix(text.size() - endpos);
564 582
565 // Use DFAs to find exact location of match, filter out non-matches. 583 // Use DFAs to find exact location of match, filter out non-matches.
566 584
567 // Don't ask for the location if we won't use it. 585 // Don't ask for the location if we won't use it.
(...skipping 16 matching lines...) Expand all
584 if (prog_->anchor_start() && prog_->anchor_end()) 602 if (prog_->anchor_start() && prog_->anchor_end())
585 re_anchor = ANCHOR_BOTH; 603 re_anchor = ANCHOR_BOTH;
586 else if (prog_->anchor_start() && re_anchor != ANCHOR_BOTH) 604 else if (prog_->anchor_start() && re_anchor != ANCHOR_BOTH)
587 re_anchor = ANCHOR_START; 605 re_anchor = ANCHOR_START;
588 606
589 // Check for the required prefix, if any. 607 // Check for the required prefix, if any.
590 int prefixlen = 0; 608 int prefixlen = 0;
591 if (!prefix_.empty()) { 609 if (!prefix_.empty()) {
592 if (startpos != 0) 610 if (startpos != 0)
593 return false; 611 return false;
594 prefixlen = prefix_.size(); 612 prefixlen = static_cast<int>(prefix_.size());
595 if (prefixlen > subtext.size()) 613 if (prefixlen > subtext.size())
596 return false; 614 return false;
597 if (prefix_foldcase_) { 615 if (prefix_foldcase_) {
598 if (ascii_strcasecmp(&prefix_[0], subtext.data(), prefixlen) != 0) 616 if (ascii_strcasecmp(&prefix_[0], subtext.data(), prefixlen) != 0)
599 return false; 617 return false;
600 } else { 618 } else {
601 if (memcmp(&prefix_[0], subtext.data(), prefixlen) != 0) 619 if (memcmp(&prefix_[0], subtext.data(), prefixlen) != 0)
602 return false; 620 return false;
603 } 621 }
604 subtext.remove_prefix(prefixlen); 622 subtext.remove_prefix(prefixlen);
(...skipping 220 matching lines...) Expand 10 before | Expand all | Expand 10 after
825 } else { 843 } else {
826 vec = new StringPiece[nvec]; 844 vec = new StringPiece[nvec];
827 heapvec = vec; 845 heapvec = vec;
828 } 846 }
829 847
830 if (!Match(text, 0, text.size(), anchor, vec, nvec)) { 848 if (!Match(text, 0, text.size(), anchor, vec, nvec)) {
831 delete[] heapvec; 849 delete[] heapvec;
832 return false; 850 return false;
833 } 851 }
834 852
835 if(consumed != NULL) 853 if (consumed != NULL)
836 *consumed = vec[0].end() - text.begin(); 854 *consumed = static_cast<int>(vec[0].end() - text.begin());
837 855
838 if (n == 0 || args == NULL) { 856 if (n == 0 || args == NULL) {
839 // We are not interested in results 857 // We are not interested in results
840 delete[] heapvec; 858 delete[] heapvec;
841 return true; 859 return true;
842 } 860 }
843 861
844 int ncap = NumberOfCapturingGroups(); 862 int ncap = NumberOfCapturingGroups();
845 if (ncap < n) { 863 if (ncap < n) {
846 // RE has fewer capturing groups than number of arg pointers passed in 864 // RE has fewer capturing groups than number of arg pointers passed in
847 VLOG(1) << "Asked for " << n << " but only have " << ncap; 865 VLOG(1) << "Asked for " << n << " but only have " << ncap;
848 delete[] heapvec; 866 delete[] heapvec;
849 return false; 867 return false;
850 } 868 }
851 869
852 // If we got here, we must have matched the whole pattern. 870 // If we got here, we must have matched the whole pattern.
853 for (int i = 0; i < n; i++) { 871 for (int i = 0; i < n; i++) {
854 const StringPiece& s = vec[i+1]; 872 const StringPiece& s = vec[i+1];
855 if (!args[i]->Parse(s.data(), s.size())) { 873 if (!args[i]->Parse(s.data(), s.size())) {
856 // TODO: Should we indicate what the error was? 874 // TODO: Should we indicate what the error was?
857 VLOG(1) << "Parse error on #" << i << " " << s << " " 875 VLOG(1) << "Parse error on #" << i << " " << s << " "
858 » << (void*)s.data() << "/" << s.size(); 876 << (void*)s.data() << "/" << s.size();
859 delete[] heapvec; 877 delete[] heapvec;
860 return false; 878 return false;
861 } 879 }
862 } 880 }
863 881
864 delete[] heapvec; 882 delete[] heapvec;
865 return true; 883 return true;
866 } 884 }
867 885
868 // Append the "rewrite" string, with backslash subsitutions from "vec", 886 // Append the "rewrite" string, with backslash subsitutions from "vec",
869 // to string "out". 887 // to string "out".
870 bool RE2::Rewrite(string *out, const StringPiece &rewrite, 888 bool RE2::Rewrite(string *out, const StringPiece &rewrite,
871 const StringPiece *vec, int veclen) const { 889 const StringPiece *vec, int veclen) const {
872 for (const char *s = rewrite.data(), *end = s + rewrite.size(); 890 for (const char *s = rewrite.data(), *end = s + rewrite.size();
873 s < end; s++) { 891 s < end; s++) {
874 int c = *s; 892 if (*s != '\\') {
875 if (c == '\\') { 893 out->push_back(*s);
876 s++; 894 continue;
877 c = (s < end) ? *s : -1; 895 }
878 if (isdigit(c)) { 896 s++;
879 int n = (c - '0'); 897 int c = (s < end) ? *s : -1;
880 if (n >= veclen) { 898 if (isdigit(c)) {
881 if (options_.log_errors()) { 899 int n = (c - '0');
882 LOG(ERROR) << "requested group " << n 900 if (n >= veclen) {
883 << " in regexp " << rewrite.data(); 901 if (options_.log_errors()) {
884 } 902 LOG(ERROR) << "requested group " << n
885 return false; 903 << " in regexp " << rewrite.data();
886 } 904 }
887 StringPiece snip = vec[n];
888 if (snip.size() > 0)
889 out->append(snip.data(), snip.size());
890 } else if (c == '\\') {
891 out->push_back('\\');
892 } else {
893 if (options_.log_errors())
894 LOG(ERROR) << "invalid rewrite pattern: " << rewrite.data();
895 return false; 905 return false;
896 } 906 }
907 StringPiece snip = vec[n];
908 if (snip.size() > 0)
909 out->append(snip.data(), snip.size());
910 } else if (c == '\\') {
911 out->push_back('\\');
897 } else { 912 } else {
898 out->push_back(c); 913 if (options_.log_errors())
914 LOG(ERROR) << "invalid rewrite pattern: " << rewrite.data();
915 return false;
899 } 916 }
900 } 917 }
901 return true; 918 return true;
902 } 919 }
903 920
904 // Return the number of capturing subpatterns, or -1 if the
905 // regexp wasn't valid on construction.
906 int RE2::NumberOfCapturingGroups() const {
907 if (suffix_regexp_ == NULL)
908 return -1;
909 ANNOTATE_BENIGN_RACE(&num_captures_, "benign race: in the worst case"
910 " multiple threads end up doing the same work in parallel.");
911 if (num_captures_ == -1)
912 num_captures_ = suffix_regexp_->NumCaptures();
913 return num_captures_;
914 }
915
916 // Checks that the rewrite string is well-formed with respect to this 921 // Checks that the rewrite string is well-formed with respect to this
917 // regular expression. 922 // regular expression.
918 bool RE2::CheckRewriteString(const StringPiece& rewrite, string* error) const { 923 bool RE2::CheckRewriteString(const StringPiece& rewrite, string* error) const {
919 int max_token = -1; 924 int max_token = -1;
920 for (const char *s = rewrite.data(), *end = s + rewrite.size(); 925 for (const char *s = rewrite.data(), *end = s + rewrite.size();
921 s < end; s++) { 926 s < end; s++) {
922 int c = *s; 927 int c = *s;
923 if (c != '\\') { 928 if (c != '\\') {
924 continue; 929 continue;
925 } 930 }
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after
980 bool RE2::Arg::parse_uchar(const char* str, int n, void* dest) { 985 bool RE2::Arg::parse_uchar(const char* str, int n, void* dest) {
981 if (n != 1) return false; 986 if (n != 1) return false;
982 if (dest == NULL) return true; 987 if (dest == NULL) return true;
983 *(reinterpret_cast<unsigned char*>(dest)) = str[0]; 988 *(reinterpret_cast<unsigned char*>(dest)) = str[0];
984 return true; 989 return true;
985 } 990 }
986 991
987 // Largest number spec that we are willing to parse 992 // Largest number spec that we are willing to parse
988 static const int kMaxNumberLength = 32; 993 static const int kMaxNumberLength = 32;
989 994
990 // REQUIRES "buf" must have length at least kMaxNumberLength+1 995 // REQUIRES "buf" must have length at least nbuf.
991 // Copies "str" into "buf" and null-terminates. 996 // Copies "str" into "buf" and null-terminates.
992 // Overwrites *np with the new length. 997 // Overwrites *np with the new length.
993 static const char* TerminateNumber(char* buf, const char* str, int* np) { 998 static const char* TerminateNumber(char* buf, int nbuf, const char* str, int* np ,
999 bool accept_spaces) {
994 int n = *np; 1000 int n = *np;
995 if (n <= 0) return ""; 1001 if (n <= 0) return "";
996 if (n > 0 && isspace(*str)) { 1002 if (n > 0 && isspace(*str)) {
997 // We are less forgiving than the strtoxxx() routines and do not 1003 // We are less forgiving than the strtoxxx() routines and do not
998 // allow leading spaces. 1004 // allow leading spaces. We do allow leading spaces for floats.
999 return ""; 1005 if (!accept_spaces) {
1006 return "";
1007 }
1008 while (n > 0 && isspace(*str)) {
1009 n--;
1010 str++;
1011 }
1000 } 1012 }
1001 1013
1002 // Although buf has a fixed maximum size, we can still handle 1014 // Although buf has a fixed maximum size, we can still handle
1003 // arbitrarily large integers correctly by omitting leading zeros. 1015 // arbitrarily large integers correctly by omitting leading zeros.
1004 // (Numbers that are still too long will be out of range.) 1016 // (Numbers that are still too long will be out of range.)
1005 // Before deciding whether str is too long, 1017 // Before deciding whether str is too long,
1006 // remove leading zeros with s/000+/00/. 1018 // remove leading zeros with s/000+/00/.
1007 // Leaving the leading two zeros in place means that 1019 // Leaving the leading two zeros in place means that
1008 // we don't change 0000x123 (invalid) into 0x123 (valid). 1020 // we don't change 0000x123 (invalid) into 0x123 (valid).
1009 // Skip over leading - before replacing. 1021 // Skip over leading - before replacing.
1010 bool neg = false; 1022 bool neg = false;
1011 if (n >= 1 && str[0] == '-') { 1023 if (n >= 1 && str[0] == '-') {
1012 neg = true; 1024 neg = true;
1013 n--; 1025 n--;
1014 str++; 1026 str++;
1015 } 1027 }
1016 1028
1017 if (n >= 3 && str[0] == '0' && str[1] == '0') { 1029 if (n >= 3 && str[0] == '0' && str[1] == '0') {
1018 while (n >= 3 && str[2] == '0') { 1030 while (n >= 3 && str[2] == '0') {
1019 n--; 1031 n--;
1020 str++; 1032 str++;
1021 } 1033 }
1022 } 1034 }
1023 1035
1024 if (neg) { // make room in buf for - 1036 if (neg) { // make room in buf for -
1025 n++; 1037 n++;
1026 str--; 1038 str--;
1027 } 1039 }
1028 1040
1029 if (n > kMaxNumberLength) return ""; 1041 if (n > nbuf-1) return "";
1030 1042
1031 memmove(buf, str, n); 1043 memmove(buf, str, n);
1032 if (neg) { 1044 if (neg) {
1033 buf[0] = '-'; 1045 buf[0] = '-';
1034 } 1046 }
1035 buf[n] = '\0'; 1047 buf[n] = '\0';
1036 *np = n; 1048 *np = n;
1037 return buf; 1049 return buf;
1038 } 1050 }
1039 1051
1040 bool RE2::Arg::parse_long_radix(const char* str, 1052 bool RE2::Arg::parse_long_radix(const char* str,
1041 int n, 1053 int n,
1042 void* dest, 1054 void* dest,
1043 int radix) { 1055 int radix) {
1044 if (n == 0) return false; 1056 if (n == 0) return false;
1045 char buf[kMaxNumberLength+1]; 1057 char buf[kMaxNumberLength+1];
1046 str = TerminateNumber(buf, str, &n); 1058 str = TerminateNumber(buf, sizeof buf, str, &n, false);
1047 char* end; 1059 char* end;
1048 errno = 0; 1060 errno = 0;
1049 long r = strtol(str, &end, radix); 1061 long r = strtol(str, &end, radix);
1050 if (end != str + n) return false; // Leftover junk 1062 if (end != str + n) return false; // Leftover junk
1051 if (errno) return false; 1063 if (errno) return false;
1052 if (dest == NULL) return true; 1064 if (dest == NULL) return true;
1053 *(reinterpret_cast<long*>(dest)) = r; 1065 *(reinterpret_cast<long*>(dest)) = r;
1054 return true; 1066 return true;
1055 } 1067 }
1056 1068
1057 bool RE2::Arg::parse_ulong_radix(const char* str, 1069 bool RE2::Arg::parse_ulong_radix(const char* str,
1058 int n, 1070 int n,
1059 void* dest, 1071 void* dest,
1060 int radix) { 1072 int radix) {
1061 if (n == 0) return false; 1073 if (n == 0) return false;
1062 char buf[kMaxNumberLength+1]; 1074 char buf[kMaxNumberLength+1];
1063 str = TerminateNumber(buf, str, &n); 1075 str = TerminateNumber(buf, sizeof buf, str, &n, false);
1064 if (str[0] == '-') { 1076 if (str[0] == '-') {
1065 // strtoul() will silently accept negative numbers and parse 1077 // strtoul() will silently accept negative numbers and parse
1066 // them. This module is more strict and treats them as errors. 1078 // them. This module is more strict and treats them as errors.
1067 return false; 1079 return false;
1068 } 1080 }
1069 1081
1070 char* end; 1082 char* end;
1071 errno = 0; 1083 errno = 0;
1072 unsigned long r = strtoul(str, &end, radix); 1084 unsigned long r = strtoul(str, &end, radix);
1073 if (end != str + n) return false; // Leftover junk 1085 if (end != str + n) return false; // Leftover junk
1074 if (errno) return false; 1086 if (errno) return false;
1075 if (dest == NULL) return true; 1087 if (dest == NULL) return true;
1076 *(reinterpret_cast<unsigned long*>(dest)) = r; 1088 *(reinterpret_cast<unsigned long*>(dest)) = r;
1077 return true; 1089 return true;
1078 } 1090 }
1079 1091
1080 bool RE2::Arg::parse_short_radix(const char* str, 1092 bool RE2::Arg::parse_short_radix(const char* str,
1081 int n, 1093 int n,
1082 void* dest, 1094 void* dest,
1083 int radix) { 1095 int radix) {
1084 long r; 1096 long r;
1085 if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse 1097 if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse
1086 if ((short)r != r) return false; // Out of range 1098 if ((short)r != r) return false; // Out of range
1087 if (dest == NULL) return true; 1099 if (dest == NULL) return true;
1088 *(reinterpret_cast<short*>(dest)) = r; 1100 *(reinterpret_cast<short*>(dest)) = (short)r;
1089 return true; 1101 return true;
1090 } 1102 }
1091 1103
1092 bool RE2::Arg::parse_ushort_radix(const char* str, 1104 bool RE2::Arg::parse_ushort_radix(const char* str,
1093 int n, 1105 int n,
1094 void* dest, 1106 void* dest,
1095 int radix) { 1107 int radix) {
1096 unsigned long r; 1108 unsigned long r;
1097 if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse 1109 if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse
1098 if ((ushort)r != r) return false; // Out of range 1110 if ((ushort)r != r) return false; // Out of range
1099 if (dest == NULL) return true; 1111 if (dest == NULL) return true;
1100 *(reinterpret_cast<unsigned short*>(dest)) = r; 1112 *(reinterpret_cast<unsigned short*>(dest)) = (ushort)r;
1101 return true; 1113 return true;
1102 } 1114 }
1103 1115
1104 bool RE2::Arg::parse_int_radix(const char* str, 1116 bool RE2::Arg::parse_int_radix(const char* str,
1105 int n, 1117 int n,
1106 void* dest, 1118 void* dest,
1107 int radix) { 1119 int radix) {
1108 long r; 1120 long r;
1109 if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse 1121 if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse
1110 if ((int)r != r) return false; // Out of range 1122 if ((int)r != r) return false; // Out of range
1111 if (dest == NULL) return true; 1123 if (dest == NULL) return true;
1112 *(reinterpret_cast<int*>(dest)) = r; 1124 *(reinterpret_cast<int*>(dest)) = r;
1113 return true; 1125 return true;
1114 } 1126 }
1115 1127
1116 bool RE2::Arg::parse_uint_radix(const char* str, 1128 bool RE2::Arg::parse_uint_radix(const char* str,
1117 int n, 1129 int n,
1118 void* dest, 1130 void* dest,
1119 int radix) { 1131 int radix) {
1120 unsigned long r; 1132 unsigned long r;
1121 if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse 1133 if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse
1122 if ((uint)r != r) return false; // Out of range 1134 if ((uint)r != r) return false; // Out of range
1123 if (dest == NULL) return true; 1135 if (dest == NULL) return true;
1124 *(reinterpret_cast<unsigned int*>(dest)) = r; 1136 *(reinterpret_cast<unsigned int*>(dest)) = r;
1125 return true; 1137 return true;
1126 } 1138 }
1127 1139
1140 #if RE2_HAVE_LONGLONG
1128 bool RE2::Arg::parse_longlong_radix(const char* str, 1141 bool RE2::Arg::parse_longlong_radix(const char* str,
1129 int n, 1142 int n,
1130 void* dest, 1143 void* dest,
1131 int radix) { 1144 int radix) {
1132 if (n == 0) return false; 1145 if (n == 0) return false;
1133 char buf[kMaxNumberLength+1]; 1146 char buf[kMaxNumberLength+1];
1134 str = TerminateNumber(buf, str, &n); 1147 str = TerminateNumber(buf, sizeof buf, str, &n, false);
1135 char* end; 1148 char* end;
1136 errno = 0; 1149 errno = 0;
1137 int64 r = strtoll(str, &end, radix); 1150 int64 r = strtoll(str, &end, radix);
1138 if (end != str + n) return false; // Leftover junk 1151 if (end != str + n) return false; // Leftover junk
1139 if (errno) return false; 1152 if (errno) return false;
1140 if (dest == NULL) return true; 1153 if (dest == NULL) return true;
1141 *(reinterpret_cast<int64*>(dest)) = r; 1154 *(reinterpret_cast<int64*>(dest)) = r;
1142 return true; 1155 return true;
1143 } 1156 }
1144 1157
1145 bool RE2::Arg::parse_ulonglong_radix(const char* str, 1158 bool RE2::Arg::parse_ulonglong_radix(const char* str,
1146 int n, 1159 int n,
1147 void* dest, 1160 void* dest,
1148 int radix) { 1161 int radix) {
1149 if (n == 0) return false; 1162 if (n == 0) return false;
1150 char buf[kMaxNumberLength+1]; 1163 char buf[kMaxNumberLength+1];
1151 str = TerminateNumber(buf, str, &n); 1164 str = TerminateNumber(buf, sizeof buf, str, &n, false);
1152 if (str[0] == '-') { 1165 if (str[0] == '-') {
1153 // strtoull() will silently accept negative numbers and parse 1166 // strtoull() will silently accept negative numbers and parse
1154 // them. This module is more strict and treats them as errors. 1167 // them. This module is more strict and treats them as errors.
1155 return false; 1168 return false;
1156 } 1169 }
1157 char* end; 1170 char* end;
1158 errno = 0; 1171 errno = 0;
1159 uint64 r = strtoull(str, &end, radix); 1172 uint64 r = strtoull(str, &end, radix);
1160 if (end != str + n) return false; // Leftover junk 1173 if (end != str + n) return false; // Leftover junk
1161 if (errno) return false; 1174 if (errno) return false;
1162 if (dest == NULL) return true; 1175 if (dest == NULL) return true;
1163 *(reinterpret_cast<uint64*>(dest)) = r; 1176 *(reinterpret_cast<uint64*>(dest)) = r;
1164 return true; 1177 return true;
1165 } 1178 }
1179 #endif
1166 1180
1167 static bool parse_double_float(const char* str, int n, bool isfloat, void *dest) { 1181 static bool parse_double_float(const char* str, int n, bool isfloat, void *dest) {
1168 if (n == 0) return false; 1182 if (n == 0) return false;
1169 static const int kMaxLength = 200; 1183 static const int kMaxLength = 200;
1170 char buf[kMaxLength]; 1184 char buf[kMaxLength+1];
1171 if (n >= kMaxLength) return false; 1185 str = TerminateNumber(buf, sizeof buf, str, &n, true);
1172 memcpy(buf, str, n); 1186 char* end;
1173 buf[n] = '\0';
1174 errno = 0; 1187 errno = 0;
1175 char* end;
1176 double r; 1188 double r;
1177 if (isfloat) { 1189 if (isfloat) {
1178 r = strtof(buf, &end); 1190 r = strtof(str, &end);
1179 } else { 1191 } else {
1180 r = strtod(buf, &end); 1192 r = strtod(str, &end);
1181 } 1193 }
1182 if (end != buf + n) return false; // Leftover junk 1194 if (end != str + n) return false; // Leftover junk
1183 if (errno) return false; 1195 if (errno) return false;
1184 if (dest == NULL) return true; 1196 if (dest == NULL) return true;
1185 if (isfloat) { 1197 if (isfloat) {
1186 *(reinterpret_cast<float*>(dest)) = r; 1198 *(reinterpret_cast<float*>(dest)) = (float)r;
1187 } else { 1199 } else {
1188 *(reinterpret_cast<double*>(dest)) = r; 1200 *(reinterpret_cast<double*>(dest)) = r;
1189 } 1201 }
1190 return true; 1202 return true;
1191 } 1203 }
1192 1204
1193 bool RE2::Arg::parse_double(const char* str, int n, void* dest) { 1205 bool RE2::Arg::parse_double(const char* str, int n, void* dest) {
1194 return parse_double_float(str, n, false, dest); 1206 return parse_double_float(str, n, false, dest);
1195 } 1207 }
1196 1208
(...skipping 21 matching lines...) Expand all
1218 DEFINE_INTEGER_PARSERS(int); 1230 DEFINE_INTEGER_PARSERS(int);
1219 DEFINE_INTEGER_PARSERS(uint); 1231 DEFINE_INTEGER_PARSERS(uint);
1220 DEFINE_INTEGER_PARSERS(long); 1232 DEFINE_INTEGER_PARSERS(long);
1221 DEFINE_INTEGER_PARSERS(ulong); 1233 DEFINE_INTEGER_PARSERS(ulong);
1222 DEFINE_INTEGER_PARSERS(longlong); 1234 DEFINE_INTEGER_PARSERS(longlong);
1223 DEFINE_INTEGER_PARSERS(ulonglong); 1235 DEFINE_INTEGER_PARSERS(ulonglong);
1224 1236
1225 #undef DEFINE_INTEGER_PARSERS 1237 #undef DEFINE_INTEGER_PARSERS
1226 1238
1227 } // namespace re2 1239 } // namespace re2
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698