Index: third_party/re2/re2/re2.cc |
diff --git a/third_party/re2/re2/re2.cc b/third_party/re2/re2/re2.cc |
index b9e44fcdef04db5cc4f31cd441818562f2d37243..b3e582f0580e97a80a69287d12a9e58e8183fd04 100644 |
--- a/third_party/re2/re2/re2.cc |
+++ b/third_party/re2/re2/re2.cc |
@@ -11,16 +11,10 @@ |
#include <stdio.h> |
#include <string> |
-#ifdef WIN32 |
-#define strtoll _strtoi64 |
-#define strtoull _strtoui64 |
-#define strtof strtod |
-#else |
-#include <pthread.h> |
-#endif |
#include <errno.h> |
#include "util/util.h" |
#include "util/flags.h" |
+#include "util/sparse_array.h" |
#include "re2/prog.h" |
#include "re2/regexp.h" |
@@ -37,22 +31,10 @@ const VariadicFunction2<bool, const StringPiece&, const RE2&, RE2::Arg, RE2::Par |
const VariadicFunction2<bool, StringPiece*, const RE2&, RE2::Arg, RE2::ConsumeN> RE2::Consume = {}; |
const VariadicFunction2<bool, StringPiece*, const RE2&, RE2::Arg, RE2::FindAndConsumeN> RE2::FindAndConsume = {}; |
-#define kDefaultMaxMem (8<<20) |
- |
-RE2::Options::Options() |
- : encoding_(EncodingUTF8), |
- posix_syntax_(false), |
- longest_match_(false), |
- log_errors_(true), |
- max_mem_(kDefaultMaxMem), |
- literal_(false), |
- never_nl_(false), |
- never_capture_(false), |
- case_sensitive_(true), |
- perl_classes_(false), |
- word_boundary_(false), |
- one_line_(false) { |
-} |
+// This will trigger LNK2005 error in MSVC. |
+#ifndef _MSC_VER |
+const int RE2::Options::kDefaultMaxMem; // initialized in re2.h |
+#endif |
RE2::Options::Options(RE2::CannedOptions opt) |
: encoding_(opt == RE2::Latin1 ? EncodingLatin1 : EncodingUTF8), |
@@ -62,6 +44,7 @@ RE2::Options::Options(RE2::CannedOptions opt) |
max_mem_(kDefaultMaxMem), |
literal_(false), |
never_nl_(false), |
+ dot_nl_(false), |
never_capture_(false), |
case_sensitive_(true), |
perl_classes_(false), |
@@ -169,6 +152,9 @@ int RE2::Options::ParseFlags() const { |
if (never_nl()) |
flags |= Regexp::NeverNL; |
+ if (dot_nl()) |
+ flags |= Regexp::DotNL; |
+ |
if (never_capture()) |
flags |= Regexp::NeverCapture; |
@@ -285,8 +271,36 @@ int RE2::ProgramSize() const { |
return prog_->size(); |
} |
+int RE2::ProgramFanout(map<int, int>* histogram) const { |
+ if (prog_ == NULL) |
+ return -1; |
+ SparseArray<int> fanout(prog_->size()); |
+ prog_->Fanout(&fanout); |
+ histogram->clear(); |
+ for (SparseArray<int>::iterator i = fanout.begin(); i != fanout.end(); ++i) { |
+ // TODO(junyer): Optimise this? |
+ int bucket = 0; |
+ while (1 << bucket < i->second) { |
+ bucket++; |
+ } |
+ (*histogram)[bucket]++; |
+ } |
+ return histogram->rbegin()->first; |
+} |
+ |
+// Returns num_captures_, computing it if needed, or -1 if the |
+// regexp wasn't valid on construction. |
+int RE2::NumberOfCapturingGroups() const { |
+ MutexLock l(mutex_); |
+ if (suffix_regexp_ == NULL) |
+ return -1; |
+ if (num_captures_ == -1) |
+ num_captures_ = suffix_regexp_->NumCaptures(); |
+ return num_captures_; |
+} |
+ |
// Returns named_groups_, computing it if needed. |
-const map<string, int>& RE2::NamedCapturingGroups() const { |
+const map<string, int>& RE2::NamedCapturingGroups() const { |
MutexLock l(mutex_); |
if (!ok()) |
return *empty_named_groups; |
@@ -299,7 +313,7 @@ const map<string, int>& RE2::NamedCapturingGroups() const { |
} |
// Returns group_names_, computing it if needed. |
-const map<int, string>& RE2::CapturingGroupNames() const { |
+const map<int, string>& RE2::CapturingGroupNames() const { |
MutexLock l(mutex_); |
if (!ok()) |
return *empty_group_names; |
@@ -371,7 +385,7 @@ bool RE2::Replace(string *str, |
int nvec = 1 + MaxSubmatch(rewrite); |
if (nvec > arraysize(vec)) |
return false; |
- if (!re.Match(*str, 0, str->size(), UNANCHORED, vec, nvec)) |
+ if (!re.Match(*str, 0, static_cast<int>(str->size()), UNANCHORED, vec, nvec)) |
return false; |
string s; |
@@ -398,7 +412,8 @@ int RE2::GlobalReplace(string *str, |
string out; |
int count = 0; |
while (p <= ep) { |
- if (!re.Match(*str, p - str->data(), str->size(), UNANCHORED, vec, nvec)) |
+ if (!re.Match(*str, static_cast<int>(p - str->data()), |
+ static_cast<int>(str->size()), UNANCHORED, vec, nvec)) |
break; |
if (p < vec[0].begin()) |
out.append(p, vec[0].begin() - p); |
@@ -482,7 +497,7 @@ bool RE2::PossibleMatchRange(string* min, string* max, int maxlen) const { |
if (prog_ == NULL) |
return false; |
- int n = prefix_.size(); |
+ int n = static_cast<int>(prefix_.size()); |
if (n > maxlen) |
n = maxlen; |
@@ -554,7 +569,10 @@ bool RE2::Match(const StringPiece& text, |
if (startpos < 0 || startpos > endpos || endpos > text.size()) { |
if (options_.log_errors()) |
- LOG(ERROR) << "RE2: invalid startpos, endpos pair."; |
+ LOG(ERROR) << "RE2: invalid startpos, endpos pair. [" |
+ << "startpos: " << startpos << ", " |
+ << "endpos: " << endpos << ", " |
+ << "text size: " << text.size() << "]"; |
return false; |
} |
@@ -591,7 +609,7 @@ bool RE2::Match(const StringPiece& text, |
if (!prefix_.empty()) { |
if (startpos != 0) |
return false; |
- prefixlen = prefix_.size(); |
+ prefixlen = static_cast<int>(prefix_.size()); |
if (prefixlen > subtext.size()) |
return false; |
if (prefix_foldcase_) { |
@@ -832,8 +850,8 @@ bool RE2::DoMatch(const StringPiece& text, |
return false; |
} |
- if(consumed != NULL) |
- *consumed = vec[0].end() - text.begin(); |
+ if (consumed != NULL) |
+ *consumed = static_cast<int>(vec[0].end() - text.begin()); |
if (n == 0 || args == NULL) { |
// We are not interested in results |
@@ -855,7 +873,7 @@ bool RE2::DoMatch(const StringPiece& text, |
if (!args[i]->Parse(s.data(), s.size())) { |
// TODO: Should we indicate what the error was? |
VLOG(1) << "Parse error on #" << i << " " << s << " " |
- << (void*)s.data() << "/" << s.size(); |
+ << (void*)s.data() << "/" << s.size(); |
delete[] heapvec; |
return false; |
} |
@@ -871,48 +889,35 @@ bool RE2::Rewrite(string *out, const StringPiece &rewrite, |
const StringPiece *vec, int veclen) const { |
for (const char *s = rewrite.data(), *end = s + rewrite.size(); |
s < end; s++) { |
- int c = *s; |
- if (c == '\\') { |
- s++; |
- c = (s < end) ? *s : -1; |
- if (isdigit(c)) { |
- int n = (c - '0'); |
- if (n >= veclen) { |
- if (options_.log_errors()) { |
- LOG(ERROR) << "requested group " << n |
- << " in regexp " << rewrite.data(); |
- } |
- return false; |
+ if (*s != '\\') { |
+ out->push_back(*s); |
+ continue; |
+ } |
+ s++; |
+ int c = (s < end) ? *s : -1; |
+ if (isdigit(c)) { |
+ int n = (c - '0'); |
+ if (n >= veclen) { |
+ if (options_.log_errors()) { |
+ LOG(ERROR) << "requested group " << n |
+ << " in regexp " << rewrite.data(); |
} |
- StringPiece snip = vec[n]; |
- if (snip.size() > 0) |
- out->append(snip.data(), snip.size()); |
- } else if (c == '\\') { |
- out->push_back('\\'); |
- } else { |
- if (options_.log_errors()) |
- LOG(ERROR) << "invalid rewrite pattern: " << rewrite.data(); |
return false; |
} |
+ StringPiece snip = vec[n]; |
+ if (snip.size() > 0) |
+ out->append(snip.data(), snip.size()); |
+ } else if (c == '\\') { |
+ out->push_back('\\'); |
} else { |
- out->push_back(c); |
+ if (options_.log_errors()) |
+ LOG(ERROR) << "invalid rewrite pattern: " << rewrite.data(); |
+ return false; |
} |
} |
return true; |
} |
-// Return the number of capturing subpatterns, or -1 if the |
-// regexp wasn't valid on construction. |
-int RE2::NumberOfCapturingGroups() const { |
- if (suffix_regexp_ == NULL) |
- return -1; |
- ANNOTATE_BENIGN_RACE(&num_captures_, "benign race: in the worst case" |
- " multiple threads end up doing the same work in parallel."); |
- if (num_captures_ == -1) |
- num_captures_ = suffix_regexp_->NumCaptures(); |
- return num_captures_; |
-} |
- |
// Checks that the rewrite string is well-formed with respect to this |
// regular expression. |
bool RE2::CheckRewriteString(const StringPiece& rewrite, string* error) const { |
@@ -987,16 +992,23 @@ bool RE2::Arg::parse_uchar(const char* str, int n, void* dest) { |
// Largest number spec that we are willing to parse |
static const int kMaxNumberLength = 32; |
-// REQUIRES "buf" must have length at least kMaxNumberLength+1 |
+// REQUIRES "buf" must have length at least nbuf. |
// Copies "str" into "buf" and null-terminates. |
// Overwrites *np with the new length. |
-static const char* TerminateNumber(char* buf, const char* str, int* np) { |
+static const char* TerminateNumber(char* buf, int nbuf, const char* str, int* np, |
+ bool accept_spaces) { |
int n = *np; |
if (n <= 0) return ""; |
if (n > 0 && isspace(*str)) { |
// We are less forgiving than the strtoxxx() routines and do not |
- // allow leading spaces. |
- return ""; |
+ // allow leading spaces. We do allow leading spaces for floats. |
+ if (!accept_spaces) { |
+ return ""; |
+ } |
+ while (n > 0 && isspace(*str)) { |
+ n--; |
+ str++; |
+ } |
} |
// Although buf has a fixed maximum size, we can still handle |
@@ -1026,7 +1038,7 @@ static const char* TerminateNumber(char* buf, const char* str, int* np) { |
str--; |
} |
- if (n > kMaxNumberLength) return ""; |
+ if (n > nbuf-1) return ""; |
memmove(buf, str, n); |
if (neg) { |
@@ -1043,7 +1055,7 @@ bool RE2::Arg::parse_long_radix(const char* str, |
int radix) { |
if (n == 0) return false; |
char buf[kMaxNumberLength+1]; |
- str = TerminateNumber(buf, str, &n); |
+ str = TerminateNumber(buf, sizeof buf, str, &n, false); |
char* end; |
errno = 0; |
long r = strtol(str, &end, radix); |
@@ -1060,7 +1072,7 @@ bool RE2::Arg::parse_ulong_radix(const char* str, |
int radix) { |
if (n == 0) return false; |
char buf[kMaxNumberLength+1]; |
- str = TerminateNumber(buf, str, &n); |
+ str = TerminateNumber(buf, sizeof buf, str, &n, false); |
if (str[0] == '-') { |
// strtoul() will silently accept negative numbers and parse |
// them. This module is more strict and treats them as errors. |
@@ -1085,7 +1097,7 @@ bool RE2::Arg::parse_short_radix(const char* str, |
if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse |
if ((short)r != r) return false; // Out of range |
if (dest == NULL) return true; |
- *(reinterpret_cast<short*>(dest)) = r; |
+ *(reinterpret_cast<short*>(dest)) = (short)r; |
return true; |
} |
@@ -1097,7 +1109,7 @@ bool RE2::Arg::parse_ushort_radix(const char* str, |
if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse |
if ((ushort)r != r) return false; // Out of range |
if (dest == NULL) return true; |
- *(reinterpret_cast<unsigned short*>(dest)) = r; |
+ *(reinterpret_cast<unsigned short*>(dest)) = (ushort)r; |
return true; |
} |
@@ -1125,13 +1137,14 @@ bool RE2::Arg::parse_uint_radix(const char* str, |
return true; |
} |
+#if RE2_HAVE_LONGLONG |
bool RE2::Arg::parse_longlong_radix(const char* str, |
int n, |
void* dest, |
int radix) { |
if (n == 0) return false; |
char buf[kMaxNumberLength+1]; |
- str = TerminateNumber(buf, str, &n); |
+ str = TerminateNumber(buf, sizeof buf, str, &n, false); |
char* end; |
errno = 0; |
int64 r = strtoll(str, &end, radix); |
@@ -1148,7 +1161,7 @@ bool RE2::Arg::parse_ulonglong_radix(const char* str, |
int radix) { |
if (n == 0) return false; |
char buf[kMaxNumberLength+1]; |
- str = TerminateNumber(buf, str, &n); |
+ str = TerminateNumber(buf, sizeof buf, str, &n, false); |
if (str[0] == '-') { |
// strtoull() will silently accept negative numbers and parse |
// them. This module is more strict and treats them as errors. |
@@ -1163,27 +1176,26 @@ bool RE2::Arg::parse_ulonglong_radix(const char* str, |
*(reinterpret_cast<uint64*>(dest)) = r; |
return true; |
} |
+#endif |
static bool parse_double_float(const char* str, int n, bool isfloat, void *dest) { |
if (n == 0) return false; |
static const int kMaxLength = 200; |
- char buf[kMaxLength]; |
- if (n >= kMaxLength) return false; |
- memcpy(buf, str, n); |
- buf[n] = '\0'; |
- errno = 0; |
+ char buf[kMaxLength+1]; |
+ str = TerminateNumber(buf, sizeof buf, str, &n, true); |
char* end; |
+ errno = 0; |
double r; |
if (isfloat) { |
- r = strtof(buf, &end); |
+ r = strtof(str, &end); |
} else { |
- r = strtod(buf, &end); |
+ r = strtod(str, &end); |
} |
- if (end != buf + n) return false; // Leftover junk |
+ if (end != str + n) return false; // Leftover junk |
if (errno) return false; |
if (dest == NULL) return true; |
if (isfloat) { |
- *(reinterpret_cast<float*>(dest)) = r; |
+ *(reinterpret_cast<float*>(dest)) = (float)r; |
} else { |
*(reinterpret_cast<double*>(dest)) = r; |
} |