Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(161)

Unified Diff: third_party/re2/re2/re2.cc

Issue 1516543002: Update re2 (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: updated update instructions Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « third_party/re2/re2/re2.h ('k') | third_party/re2/re2/regexp.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: third_party/re2/re2/re2.cc
diff --git a/third_party/re2/re2/re2.cc b/third_party/re2/re2/re2.cc
index b9e44fcdef04db5cc4f31cd441818562f2d37243..b3e582f0580e97a80a69287d12a9e58e8183fd04 100644
--- a/third_party/re2/re2/re2.cc
+++ b/third_party/re2/re2/re2.cc
@@ -11,16 +11,10 @@
#include <stdio.h>
#include <string>
-#ifdef WIN32
-#define strtoll _strtoi64
-#define strtoull _strtoui64
-#define strtof strtod
-#else
-#include <pthread.h>
-#endif
#include <errno.h>
#include "util/util.h"
#include "util/flags.h"
+#include "util/sparse_array.h"
#include "re2/prog.h"
#include "re2/regexp.h"
@@ -37,22 +31,10 @@ const VariadicFunction2<bool, const StringPiece&, const RE2&, RE2::Arg, RE2::Par
const VariadicFunction2<bool, StringPiece*, const RE2&, RE2::Arg, RE2::ConsumeN> RE2::Consume = {};
const VariadicFunction2<bool, StringPiece*, const RE2&, RE2::Arg, RE2::FindAndConsumeN> RE2::FindAndConsume = {};
-#define kDefaultMaxMem (8<<20)
-
-RE2::Options::Options()
- : encoding_(EncodingUTF8),
- posix_syntax_(false),
- longest_match_(false),
- log_errors_(true),
- max_mem_(kDefaultMaxMem),
- literal_(false),
- never_nl_(false),
- never_capture_(false),
- case_sensitive_(true),
- perl_classes_(false),
- word_boundary_(false),
- one_line_(false) {
-}
+// This will trigger LNK2005 error in MSVC.
+#ifndef _MSC_VER
+const int RE2::Options::kDefaultMaxMem; // initialized in re2.h
+#endif
RE2::Options::Options(RE2::CannedOptions opt)
: encoding_(opt == RE2::Latin1 ? EncodingLatin1 : EncodingUTF8),
@@ -62,6 +44,7 @@ RE2::Options::Options(RE2::CannedOptions opt)
max_mem_(kDefaultMaxMem),
literal_(false),
never_nl_(false),
+ dot_nl_(false),
never_capture_(false),
case_sensitive_(true),
perl_classes_(false),
@@ -169,6 +152,9 @@ int RE2::Options::ParseFlags() const {
if (never_nl())
flags |= Regexp::NeverNL;
+ if (dot_nl())
+ flags |= Regexp::DotNL;
+
if (never_capture())
flags |= Regexp::NeverCapture;
@@ -285,8 +271,36 @@ int RE2::ProgramSize() const {
return prog_->size();
}
+int RE2::ProgramFanout(map<int, int>* histogram) const {
+ if (prog_ == NULL)
+ return -1;
+ SparseArray<int> fanout(prog_->size());
+ prog_->Fanout(&fanout);
+ histogram->clear();
+ for (SparseArray<int>::iterator i = fanout.begin(); i != fanout.end(); ++i) {
+ // TODO(junyer): Optimise this?
+ int bucket = 0;
+ while (1 << bucket < i->second) {
+ bucket++;
+ }
+ (*histogram)[bucket]++;
+ }
+ return histogram->rbegin()->first;
+}
+
+// Returns num_captures_, computing it if needed, or -1 if the
+// regexp wasn't valid on construction.
+int RE2::NumberOfCapturingGroups() const {
+ MutexLock l(mutex_);
+ if (suffix_regexp_ == NULL)
+ return -1;
+ if (num_captures_ == -1)
+ num_captures_ = suffix_regexp_->NumCaptures();
+ return num_captures_;
+}
+
// Returns named_groups_, computing it if needed.
-const map<string, int>& RE2::NamedCapturingGroups() const {
+const map<string, int>& RE2::NamedCapturingGroups() const {
MutexLock l(mutex_);
if (!ok())
return *empty_named_groups;
@@ -299,7 +313,7 @@ const map<string, int>& RE2::NamedCapturingGroups() const {
}
// Returns group_names_, computing it if needed.
-const map<int, string>& RE2::CapturingGroupNames() const {
+const map<int, string>& RE2::CapturingGroupNames() const {
MutexLock l(mutex_);
if (!ok())
return *empty_group_names;
@@ -371,7 +385,7 @@ bool RE2::Replace(string *str,
int nvec = 1 + MaxSubmatch(rewrite);
if (nvec > arraysize(vec))
return false;
- if (!re.Match(*str, 0, str->size(), UNANCHORED, vec, nvec))
+ if (!re.Match(*str, 0, static_cast<int>(str->size()), UNANCHORED, vec, nvec))
return false;
string s;
@@ -398,7 +412,8 @@ int RE2::GlobalReplace(string *str,
string out;
int count = 0;
while (p <= ep) {
- if (!re.Match(*str, p - str->data(), str->size(), UNANCHORED, vec, nvec))
+ if (!re.Match(*str, static_cast<int>(p - str->data()),
+ static_cast<int>(str->size()), UNANCHORED, vec, nvec))
break;
if (p < vec[0].begin())
out.append(p, vec[0].begin() - p);
@@ -482,7 +497,7 @@ bool RE2::PossibleMatchRange(string* min, string* max, int maxlen) const {
if (prog_ == NULL)
return false;
- int n = prefix_.size();
+ int n = static_cast<int>(prefix_.size());
if (n > maxlen)
n = maxlen;
@@ -554,7 +569,10 @@ bool RE2::Match(const StringPiece& text,
if (startpos < 0 || startpos > endpos || endpos > text.size()) {
if (options_.log_errors())
- LOG(ERROR) << "RE2: invalid startpos, endpos pair.";
+ LOG(ERROR) << "RE2: invalid startpos, endpos pair. ["
+ << "startpos: " << startpos << ", "
+ << "endpos: " << endpos << ", "
+ << "text size: " << text.size() << "]";
return false;
}
@@ -591,7 +609,7 @@ bool RE2::Match(const StringPiece& text,
if (!prefix_.empty()) {
if (startpos != 0)
return false;
- prefixlen = prefix_.size();
+ prefixlen = static_cast<int>(prefix_.size());
if (prefixlen > subtext.size())
return false;
if (prefix_foldcase_) {
@@ -832,8 +850,8 @@ bool RE2::DoMatch(const StringPiece& text,
return false;
}
- if(consumed != NULL)
- *consumed = vec[0].end() - text.begin();
+ if (consumed != NULL)
+ *consumed = static_cast<int>(vec[0].end() - text.begin());
if (n == 0 || args == NULL) {
// We are not interested in results
@@ -855,7 +873,7 @@ bool RE2::DoMatch(const StringPiece& text,
if (!args[i]->Parse(s.data(), s.size())) {
// TODO: Should we indicate what the error was?
VLOG(1) << "Parse error on #" << i << " " << s << " "
- << (void*)s.data() << "/" << s.size();
+ << (void*)s.data() << "/" << s.size();
delete[] heapvec;
return false;
}
@@ -871,48 +889,35 @@ bool RE2::Rewrite(string *out, const StringPiece &rewrite,
const StringPiece *vec, int veclen) const {
for (const char *s = rewrite.data(), *end = s + rewrite.size();
s < end; s++) {
- int c = *s;
- if (c == '\\') {
- s++;
- c = (s < end) ? *s : -1;
- if (isdigit(c)) {
- int n = (c - '0');
- if (n >= veclen) {
- if (options_.log_errors()) {
- LOG(ERROR) << "requested group " << n
- << " in regexp " << rewrite.data();
- }
- return false;
+ if (*s != '\\') {
+ out->push_back(*s);
+ continue;
+ }
+ s++;
+ int c = (s < end) ? *s : -1;
+ if (isdigit(c)) {
+ int n = (c - '0');
+ if (n >= veclen) {
+ if (options_.log_errors()) {
+ LOG(ERROR) << "requested group " << n
+ << " in regexp " << rewrite.data();
}
- StringPiece snip = vec[n];
- if (snip.size() > 0)
- out->append(snip.data(), snip.size());
- } else if (c == '\\') {
- out->push_back('\\');
- } else {
- if (options_.log_errors())
- LOG(ERROR) << "invalid rewrite pattern: " << rewrite.data();
return false;
}
+ StringPiece snip = vec[n];
+ if (snip.size() > 0)
+ out->append(snip.data(), snip.size());
+ } else if (c == '\\') {
+ out->push_back('\\');
} else {
- out->push_back(c);
+ if (options_.log_errors())
+ LOG(ERROR) << "invalid rewrite pattern: " << rewrite.data();
+ return false;
}
}
return true;
}
-// Return the number of capturing subpatterns, or -1 if the
-// regexp wasn't valid on construction.
-int RE2::NumberOfCapturingGroups() const {
- if (suffix_regexp_ == NULL)
- return -1;
- ANNOTATE_BENIGN_RACE(&num_captures_, "benign race: in the worst case"
- " multiple threads end up doing the same work in parallel.");
- if (num_captures_ == -1)
- num_captures_ = suffix_regexp_->NumCaptures();
- return num_captures_;
-}
-
// Checks that the rewrite string is well-formed with respect to this
// regular expression.
bool RE2::CheckRewriteString(const StringPiece& rewrite, string* error) const {
@@ -987,16 +992,23 @@ bool RE2::Arg::parse_uchar(const char* str, int n, void* dest) {
// Largest number spec that we are willing to parse
static const int kMaxNumberLength = 32;
-// REQUIRES "buf" must have length at least kMaxNumberLength+1
+// REQUIRES "buf" must have length at least nbuf.
// Copies "str" into "buf" and null-terminates.
// Overwrites *np with the new length.
-static const char* TerminateNumber(char* buf, const char* str, int* np) {
+static const char* TerminateNumber(char* buf, int nbuf, const char* str, int* np,
+ bool accept_spaces) {
int n = *np;
if (n <= 0) return "";
if (n > 0 && isspace(*str)) {
// We are less forgiving than the strtoxxx() routines and do not
- // allow leading spaces.
- return "";
+ // allow leading spaces. We do allow leading spaces for floats.
+ if (!accept_spaces) {
+ return "";
+ }
+ while (n > 0 && isspace(*str)) {
+ n--;
+ str++;
+ }
}
// Although buf has a fixed maximum size, we can still handle
@@ -1026,7 +1038,7 @@ static const char* TerminateNumber(char* buf, const char* str, int* np) {
str--;
}
- if (n > kMaxNumberLength) return "";
+ if (n > nbuf-1) return "";
memmove(buf, str, n);
if (neg) {
@@ -1043,7 +1055,7 @@ bool RE2::Arg::parse_long_radix(const char* str,
int radix) {
if (n == 0) return false;
char buf[kMaxNumberLength+1];
- str = TerminateNumber(buf, str, &n);
+ str = TerminateNumber(buf, sizeof buf, str, &n, false);
char* end;
errno = 0;
long r = strtol(str, &end, radix);
@@ -1060,7 +1072,7 @@ bool RE2::Arg::parse_ulong_radix(const char* str,
int radix) {
if (n == 0) return false;
char buf[kMaxNumberLength+1];
- str = TerminateNumber(buf, str, &n);
+ str = TerminateNumber(buf, sizeof buf, str, &n, false);
if (str[0] == '-') {
// strtoul() will silently accept negative numbers and parse
// them. This module is more strict and treats them as errors.
@@ -1085,7 +1097,7 @@ bool RE2::Arg::parse_short_radix(const char* str,
if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse
if ((short)r != r) return false; // Out of range
if (dest == NULL) return true;
- *(reinterpret_cast<short*>(dest)) = r;
+ *(reinterpret_cast<short*>(dest)) = (short)r;
return true;
}
@@ -1097,7 +1109,7 @@ bool RE2::Arg::parse_ushort_radix(const char* str,
if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse
if ((ushort)r != r) return false; // Out of range
if (dest == NULL) return true;
- *(reinterpret_cast<unsigned short*>(dest)) = r;
+ *(reinterpret_cast<unsigned short*>(dest)) = (ushort)r;
return true;
}
@@ -1125,13 +1137,14 @@ bool RE2::Arg::parse_uint_radix(const char* str,
return true;
}
+#if RE2_HAVE_LONGLONG
bool RE2::Arg::parse_longlong_radix(const char* str,
int n,
void* dest,
int radix) {
if (n == 0) return false;
char buf[kMaxNumberLength+1];
- str = TerminateNumber(buf, str, &n);
+ str = TerminateNumber(buf, sizeof buf, str, &n, false);
char* end;
errno = 0;
int64 r = strtoll(str, &end, radix);
@@ -1148,7 +1161,7 @@ bool RE2::Arg::parse_ulonglong_radix(const char* str,
int radix) {
if (n == 0) return false;
char buf[kMaxNumberLength+1];
- str = TerminateNumber(buf, str, &n);
+ str = TerminateNumber(buf, sizeof buf, str, &n, false);
if (str[0] == '-') {
// strtoull() will silently accept negative numbers and parse
// them. This module is more strict and treats them as errors.
@@ -1163,27 +1176,26 @@ bool RE2::Arg::parse_ulonglong_radix(const char* str,
*(reinterpret_cast<uint64*>(dest)) = r;
return true;
}
+#endif
static bool parse_double_float(const char* str, int n, bool isfloat, void *dest) {
if (n == 0) return false;
static const int kMaxLength = 200;
- char buf[kMaxLength];
- if (n >= kMaxLength) return false;
- memcpy(buf, str, n);
- buf[n] = '\0';
- errno = 0;
+ char buf[kMaxLength+1];
+ str = TerminateNumber(buf, sizeof buf, str, &n, true);
char* end;
+ errno = 0;
double r;
if (isfloat) {
- r = strtof(buf, &end);
+ r = strtof(str, &end);
} else {
- r = strtod(buf, &end);
+ r = strtod(str, &end);
}
- if (end != buf + n) return false; // Leftover junk
+ if (end != str + n) return false; // Leftover junk
if (errno) return false;
if (dest == NULL) return true;
if (isfloat) {
- *(reinterpret_cast<float*>(dest)) = r;
+ *(reinterpret_cast<float*>(dest)) = (float)r;
} else {
*(reinterpret_cast<double*>(dest)) = r;
}
« no previous file with comments | « third_party/re2/re2/re2.h ('k') | third_party/re2/re2/regexp.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698