| Index: third_party/re2/re2/re2.cc
|
| diff --git a/third_party/re2/re2/re2.cc b/third_party/re2/re2/re2.cc
|
| index b9e44fcdef04db5cc4f31cd441818562f2d37243..b3e582f0580e97a80a69287d12a9e58e8183fd04 100644
|
| --- a/third_party/re2/re2/re2.cc
|
| +++ b/third_party/re2/re2/re2.cc
|
| @@ -11,16 +11,10 @@
|
|
|
| #include <stdio.h>
|
| #include <string>
|
| -#ifdef WIN32
|
| -#define strtoll _strtoi64
|
| -#define strtoull _strtoui64
|
| -#define strtof strtod
|
| -#else
|
| -#include <pthread.h>
|
| -#endif
|
| #include <errno.h>
|
| #include "util/util.h"
|
| #include "util/flags.h"
|
| +#include "util/sparse_array.h"
|
| #include "re2/prog.h"
|
| #include "re2/regexp.h"
|
|
|
| @@ -37,22 +31,10 @@ const VariadicFunction2<bool, const StringPiece&, const RE2&, RE2::Arg, RE2::Par
|
| const VariadicFunction2<bool, StringPiece*, const RE2&, RE2::Arg, RE2::ConsumeN> RE2::Consume = {};
|
| const VariadicFunction2<bool, StringPiece*, const RE2&, RE2::Arg, RE2::FindAndConsumeN> RE2::FindAndConsume = {};
|
|
|
| -#define kDefaultMaxMem (8<<20)
|
| -
|
| -RE2::Options::Options()
|
| - : encoding_(EncodingUTF8),
|
| - posix_syntax_(false),
|
| - longest_match_(false),
|
| - log_errors_(true),
|
| - max_mem_(kDefaultMaxMem),
|
| - literal_(false),
|
| - never_nl_(false),
|
| - never_capture_(false),
|
| - case_sensitive_(true),
|
| - perl_classes_(false),
|
| - word_boundary_(false),
|
| - one_line_(false) {
|
| -}
|
| +// This will trigger LNK2005 error in MSVC.
|
| +#ifndef _MSC_VER
|
| +const int RE2::Options::kDefaultMaxMem; // initialized in re2.h
|
| +#endif
|
|
|
| RE2::Options::Options(RE2::CannedOptions opt)
|
| : encoding_(opt == RE2::Latin1 ? EncodingLatin1 : EncodingUTF8),
|
| @@ -62,6 +44,7 @@ RE2::Options::Options(RE2::CannedOptions opt)
|
| max_mem_(kDefaultMaxMem),
|
| literal_(false),
|
| never_nl_(false),
|
| + dot_nl_(false),
|
| never_capture_(false),
|
| case_sensitive_(true),
|
| perl_classes_(false),
|
| @@ -169,6 +152,9 @@ int RE2::Options::ParseFlags() const {
|
| if (never_nl())
|
| flags |= Regexp::NeverNL;
|
|
|
| + if (dot_nl())
|
| + flags |= Regexp::DotNL;
|
| +
|
| if (never_capture())
|
| flags |= Regexp::NeverCapture;
|
|
|
| @@ -285,8 +271,36 @@ int RE2::ProgramSize() const {
|
| return prog_->size();
|
| }
|
|
|
| +int RE2::ProgramFanout(map<int, int>* histogram) const {
|
| + if (prog_ == NULL)
|
| + return -1;
|
| + SparseArray<int> fanout(prog_->size());
|
| + prog_->Fanout(&fanout);
|
| + histogram->clear();
|
| + for (SparseArray<int>::iterator i = fanout.begin(); i != fanout.end(); ++i) {
|
| + // TODO(junyer): Optimise this?
|
| + int bucket = 0;
|
| + while (1 << bucket < i->second) {
|
| + bucket++;
|
| + }
|
| + (*histogram)[bucket]++;
|
| + }
|
| + return histogram->rbegin()->first;
|
| +}
|
| +
|
| +// Returns num_captures_, computing it if needed, or -1 if the
|
| +// regexp wasn't valid on construction.
|
| +int RE2::NumberOfCapturingGroups() const {
|
| + MutexLock l(mutex_);
|
| + if (suffix_regexp_ == NULL)
|
| + return -1;
|
| + if (num_captures_ == -1)
|
| + num_captures_ = suffix_regexp_->NumCaptures();
|
| + return num_captures_;
|
| +}
|
| +
|
| // Returns named_groups_, computing it if needed.
|
| -const map<string, int>& RE2::NamedCapturingGroups() const {
|
| +const map<string, int>& RE2::NamedCapturingGroups() const {
|
| MutexLock l(mutex_);
|
| if (!ok())
|
| return *empty_named_groups;
|
| @@ -299,7 +313,7 @@ const map<string, int>& RE2::NamedCapturingGroups() const {
|
| }
|
|
|
| // Returns group_names_, computing it if needed.
|
| -const map<int, string>& RE2::CapturingGroupNames() const {
|
| +const map<int, string>& RE2::CapturingGroupNames() const {
|
| MutexLock l(mutex_);
|
| if (!ok())
|
| return *empty_group_names;
|
| @@ -371,7 +385,7 @@ bool RE2::Replace(string *str,
|
| int nvec = 1 + MaxSubmatch(rewrite);
|
| if (nvec > arraysize(vec))
|
| return false;
|
| - if (!re.Match(*str, 0, str->size(), UNANCHORED, vec, nvec))
|
| + if (!re.Match(*str, 0, static_cast<int>(str->size()), UNANCHORED, vec, nvec))
|
| return false;
|
|
|
| string s;
|
| @@ -398,7 +412,8 @@ int RE2::GlobalReplace(string *str,
|
| string out;
|
| int count = 0;
|
| while (p <= ep) {
|
| - if (!re.Match(*str, p - str->data(), str->size(), UNANCHORED, vec, nvec))
|
| + if (!re.Match(*str, static_cast<int>(p - str->data()),
|
| + static_cast<int>(str->size()), UNANCHORED, vec, nvec))
|
| break;
|
| if (p < vec[0].begin())
|
| out.append(p, vec[0].begin() - p);
|
| @@ -482,7 +497,7 @@ bool RE2::PossibleMatchRange(string* min, string* max, int maxlen) const {
|
| if (prog_ == NULL)
|
| return false;
|
|
|
| - int n = prefix_.size();
|
| + int n = static_cast<int>(prefix_.size());
|
| if (n > maxlen)
|
| n = maxlen;
|
|
|
| @@ -554,7 +569,10 @@ bool RE2::Match(const StringPiece& text,
|
|
|
| if (startpos < 0 || startpos > endpos || endpos > text.size()) {
|
| if (options_.log_errors())
|
| - LOG(ERROR) << "RE2: invalid startpos, endpos pair.";
|
| + LOG(ERROR) << "RE2: invalid startpos, endpos pair. ["
|
| + << "startpos: " << startpos << ", "
|
| + << "endpos: " << endpos << ", "
|
| + << "text size: " << text.size() << "]";
|
| return false;
|
| }
|
|
|
| @@ -591,7 +609,7 @@ bool RE2::Match(const StringPiece& text,
|
| if (!prefix_.empty()) {
|
| if (startpos != 0)
|
| return false;
|
| - prefixlen = prefix_.size();
|
| + prefixlen = static_cast<int>(prefix_.size());
|
| if (prefixlen > subtext.size())
|
| return false;
|
| if (prefix_foldcase_) {
|
| @@ -832,8 +850,8 @@ bool RE2::DoMatch(const StringPiece& text,
|
| return false;
|
| }
|
|
|
| - if(consumed != NULL)
|
| - *consumed = vec[0].end() - text.begin();
|
| + if (consumed != NULL)
|
| + *consumed = static_cast<int>(vec[0].end() - text.begin());
|
|
|
| if (n == 0 || args == NULL) {
|
| // We are not interested in results
|
| @@ -855,7 +873,7 @@ bool RE2::DoMatch(const StringPiece& text,
|
| if (!args[i]->Parse(s.data(), s.size())) {
|
| // TODO: Should we indicate what the error was?
|
| VLOG(1) << "Parse error on #" << i << " " << s << " "
|
| - << (void*)s.data() << "/" << s.size();
|
| + << (void*)s.data() << "/" << s.size();
|
| delete[] heapvec;
|
| return false;
|
| }
|
| @@ -871,48 +889,35 @@ bool RE2::Rewrite(string *out, const StringPiece &rewrite,
|
| const StringPiece *vec, int veclen) const {
|
| for (const char *s = rewrite.data(), *end = s + rewrite.size();
|
| s < end; s++) {
|
| - int c = *s;
|
| - if (c == '\\') {
|
| - s++;
|
| - c = (s < end) ? *s : -1;
|
| - if (isdigit(c)) {
|
| - int n = (c - '0');
|
| - if (n >= veclen) {
|
| - if (options_.log_errors()) {
|
| - LOG(ERROR) << "requested group " << n
|
| - << " in regexp " << rewrite.data();
|
| - }
|
| - return false;
|
| + if (*s != '\\') {
|
| + out->push_back(*s);
|
| + continue;
|
| + }
|
| + s++;
|
| + int c = (s < end) ? *s : -1;
|
| + if (isdigit(c)) {
|
| + int n = (c - '0');
|
| + if (n >= veclen) {
|
| + if (options_.log_errors()) {
|
| + LOG(ERROR) << "requested group " << n
|
| + << " in regexp " << rewrite.data();
|
| }
|
| - StringPiece snip = vec[n];
|
| - if (snip.size() > 0)
|
| - out->append(snip.data(), snip.size());
|
| - } else if (c == '\\') {
|
| - out->push_back('\\');
|
| - } else {
|
| - if (options_.log_errors())
|
| - LOG(ERROR) << "invalid rewrite pattern: " << rewrite.data();
|
| return false;
|
| }
|
| + StringPiece snip = vec[n];
|
| + if (snip.size() > 0)
|
| + out->append(snip.data(), snip.size());
|
| + } else if (c == '\\') {
|
| + out->push_back('\\');
|
| } else {
|
| - out->push_back(c);
|
| + if (options_.log_errors())
|
| + LOG(ERROR) << "invalid rewrite pattern: " << rewrite.data();
|
| + return false;
|
| }
|
| }
|
| return true;
|
| }
|
|
|
| -// Return the number of capturing subpatterns, or -1 if the
|
| -// regexp wasn't valid on construction.
|
| -int RE2::NumberOfCapturingGroups() const {
|
| - if (suffix_regexp_ == NULL)
|
| - return -1;
|
| - ANNOTATE_BENIGN_RACE(&num_captures_, "benign race: in the worst case"
|
| - " multiple threads end up doing the same work in parallel.");
|
| - if (num_captures_ == -1)
|
| - num_captures_ = suffix_regexp_->NumCaptures();
|
| - return num_captures_;
|
| -}
|
| -
|
| // Checks that the rewrite string is well-formed with respect to this
|
| // regular expression.
|
| bool RE2::CheckRewriteString(const StringPiece& rewrite, string* error) const {
|
| @@ -987,16 +992,23 @@ bool RE2::Arg::parse_uchar(const char* str, int n, void* dest) {
|
| // Largest number spec that we are willing to parse
|
| static const int kMaxNumberLength = 32;
|
|
|
| -// REQUIRES "buf" must have length at least kMaxNumberLength+1
|
| +// REQUIRES "buf" must have length at least nbuf.
|
| // Copies "str" into "buf" and null-terminates.
|
| // Overwrites *np with the new length.
|
| -static const char* TerminateNumber(char* buf, const char* str, int* np) {
|
| +static const char* TerminateNumber(char* buf, int nbuf, const char* str, int* np,
|
| + bool accept_spaces) {
|
| int n = *np;
|
| if (n <= 0) return "";
|
| if (n > 0 && isspace(*str)) {
|
| // We are less forgiving than the strtoxxx() routines and do not
|
| - // allow leading spaces.
|
| - return "";
|
| + // allow leading spaces. We do allow leading spaces for floats.
|
| + if (!accept_spaces) {
|
| + return "";
|
| + }
|
| + while (n > 0 && isspace(*str)) {
|
| + n--;
|
| + str++;
|
| + }
|
| }
|
|
|
| // Although buf has a fixed maximum size, we can still handle
|
| @@ -1026,7 +1038,7 @@ static const char* TerminateNumber(char* buf, const char* str, int* np) {
|
| str--;
|
| }
|
|
|
| - if (n > kMaxNumberLength) return "";
|
| + if (n > nbuf-1) return "";
|
|
|
| memmove(buf, str, n);
|
| if (neg) {
|
| @@ -1043,7 +1055,7 @@ bool RE2::Arg::parse_long_radix(const char* str,
|
| int radix) {
|
| if (n == 0) return false;
|
| char buf[kMaxNumberLength+1];
|
| - str = TerminateNumber(buf, str, &n);
|
| + str = TerminateNumber(buf, sizeof buf, str, &n, false);
|
| char* end;
|
| errno = 0;
|
| long r = strtol(str, &end, radix);
|
| @@ -1060,7 +1072,7 @@ bool RE2::Arg::parse_ulong_radix(const char* str,
|
| int radix) {
|
| if (n == 0) return false;
|
| char buf[kMaxNumberLength+1];
|
| - str = TerminateNumber(buf, str, &n);
|
| + str = TerminateNumber(buf, sizeof buf, str, &n, false);
|
| if (str[0] == '-') {
|
| // strtoul() will silently accept negative numbers and parse
|
| // them. This module is more strict and treats them as errors.
|
| @@ -1085,7 +1097,7 @@ bool RE2::Arg::parse_short_radix(const char* str,
|
| if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse
|
| if ((short)r != r) return false; // Out of range
|
| if (dest == NULL) return true;
|
| - *(reinterpret_cast<short*>(dest)) = r;
|
| + *(reinterpret_cast<short*>(dest)) = (short)r;
|
| return true;
|
| }
|
|
|
| @@ -1097,7 +1109,7 @@ bool RE2::Arg::parse_ushort_radix(const char* str,
|
| if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse
|
| if ((ushort)r != r) return false; // Out of range
|
| if (dest == NULL) return true;
|
| - *(reinterpret_cast<unsigned short*>(dest)) = r;
|
| + *(reinterpret_cast<unsigned short*>(dest)) = (ushort)r;
|
| return true;
|
| }
|
|
|
| @@ -1125,13 +1137,14 @@ bool RE2::Arg::parse_uint_radix(const char* str,
|
| return true;
|
| }
|
|
|
| +#if RE2_HAVE_LONGLONG
|
| bool RE2::Arg::parse_longlong_radix(const char* str,
|
| int n,
|
| void* dest,
|
| int radix) {
|
| if (n == 0) return false;
|
| char buf[kMaxNumberLength+1];
|
| - str = TerminateNumber(buf, str, &n);
|
| + str = TerminateNumber(buf, sizeof buf, str, &n, false);
|
| char* end;
|
| errno = 0;
|
| int64 r = strtoll(str, &end, radix);
|
| @@ -1148,7 +1161,7 @@ bool RE2::Arg::parse_ulonglong_radix(const char* str,
|
| int radix) {
|
| if (n == 0) return false;
|
| char buf[kMaxNumberLength+1];
|
| - str = TerminateNumber(buf, str, &n);
|
| + str = TerminateNumber(buf, sizeof buf, str, &n, false);
|
| if (str[0] == '-') {
|
| // strtoull() will silently accept negative numbers and parse
|
| // them. This module is more strict and treats them as errors.
|
| @@ -1163,27 +1176,26 @@ bool RE2::Arg::parse_ulonglong_radix(const char* str,
|
| *(reinterpret_cast<uint64*>(dest)) = r;
|
| return true;
|
| }
|
| +#endif
|
|
|
| static bool parse_double_float(const char* str, int n, bool isfloat, void *dest) {
|
| if (n == 0) return false;
|
| static const int kMaxLength = 200;
|
| - char buf[kMaxLength];
|
| - if (n >= kMaxLength) return false;
|
| - memcpy(buf, str, n);
|
| - buf[n] = '\0';
|
| - errno = 0;
|
| + char buf[kMaxLength+1];
|
| + str = TerminateNumber(buf, sizeof buf, str, &n, true);
|
| char* end;
|
| + errno = 0;
|
| double r;
|
| if (isfloat) {
|
| - r = strtof(buf, &end);
|
| + r = strtof(str, &end);
|
| } else {
|
| - r = strtod(buf, &end);
|
| + r = strtod(str, &end);
|
| }
|
| - if (end != buf + n) return false; // Leftover junk
|
| + if (end != str + n) return false; // Leftover junk
|
| if (errno) return false;
|
| if (dest == NULL) return true;
|
| if (isfloat) {
|
| - *(reinterpret_cast<float*>(dest)) = r;
|
| + *(reinterpret_cast<float*>(dest)) = (float)r;
|
| } else {
|
| *(reinterpret_cast<double*>(dest)) = r;
|
| }
|
|
|