| Index: third_party/re2/re2/dfa.cc
|
| diff --git a/third_party/re2/re2/dfa.cc b/third_party/re2/re2/dfa.cc
|
| index f1fc7b0caf1b57570b2a81f1cfd6ee6b766bd36b..1f54b9f942eb4804cc89a9ce3e6dadb75c3caf3f 100644
|
| --- a/third_party/re2/re2/dfa.cc
|
| +++ b/third_party/re2/re2/dfa.cc
|
| @@ -21,13 +21,11 @@
|
| //
|
| // See http://swtch.com/~rsc/regexp/ for a very bare-bones equivalent.
|
|
|
| -#include "re2/prog.h"
|
| -#include "re2/stringpiece.h"
|
| #include "util/atomicops.h"
|
| #include "util/flags.h"
|
| #include "util/sparse_set.h"
|
| -
|
| -#define NO_THREAD_SAFETY_ANALYSIS
|
| +#include "re2/prog.h"
|
| +#include "re2/stringpiece.h"
|
|
|
| DEFINE_bool(re2_dfa_bail_when_slow, true,
|
| "Whether the RE2 DFA should bail out early "
|
| @@ -96,7 +94,7 @@ class DFA {
|
| // States, linked by the next_ pointers. If in state s and reading
|
| // byte c, the next state should be s->next_[c].
|
| struct State {
|
| - inline bool IsMatch() const { return flag_ & kFlagMatch; }
|
| + inline bool IsMatch() const { return (flag_ & kFlagMatch) != 0; }
|
| void SaveMatch(vector<int>* v);
|
|
|
| int* inst_; // Instruction pointers in the state.
|
| @@ -145,7 +143,7 @@ class DFA {
|
| if (sizeof(size_t) == sizeof(uint32))
|
| return Hash32StringWithSeed(s, len, a->flag_);
|
| else
|
| - return Hash64StringWithSeed(s, len, a->flag_);
|
| + return static_cast<size_t>(Hash64StringWithSeed(s, len, a->flag_));
|
| }
|
| #ifdef STL_MSVC
|
| // Less than operator.
|
| @@ -230,9 +228,8 @@ class DFA {
|
| // sets *ismatch to true.
|
| // L >= mutex_
|
| void RunWorkqOnByte(Workq* q, Workq* nq,
|
| - int c, uint flag, bool* ismatch,
|
| - Prog::MatchKind kind,
|
| - int new_byte_loop);
|
| + int c, uint flag, bool* ismatch,
|
| + Prog::MatchKind kind);
|
|
|
| // Runs a Workq on a set of empty-string flags, producing a new Workq in nq.
|
| // L >= mutex_
|
| @@ -277,7 +274,7 @@ class DFA {
|
| vector<int>* matches;
|
|
|
| private:
|
| - DISALLOW_EVIL_CONSTRUCTORS(SearchParams);
|
| + DISALLOW_COPY_AND_ASSIGN(SearchParams);
|
| };
|
|
|
| // Before each search, the parameters to Search are analyzed by
|
| @@ -342,7 +339,6 @@ class DFA {
|
| // Constant after initialization.
|
| Prog* prog_; // The regular expression program to run.
|
| Prog::MatchKind kind_; // The kind of DFA.
|
| - int start_unanchored_; // start of unanchored program
|
| bool init_failed_; // initialization failed (out of memory)
|
|
|
| Mutex mutex_; // mutex_ >= cache_mutex_.r
|
| @@ -430,7 +426,7 @@ class DFA::Workq : public SparseSet {
|
| int maxmark_; // maximum number of marks
|
| int nextmark_; // id of next mark
|
| bool last_was_mark_; // last inserted was mark
|
| - DISALLOW_EVIL_CONSTRUCTORS(Workq);
|
| + DISALLOW_COPY_AND_ASSIGN(Workq);
|
| };
|
|
|
| DFA::DFA(Prog* prog, Prog::MatchKind kind, int64 max_mem)
|
| @@ -445,11 +441,8 @@ DFA::DFA(Prog* prog, Prog::MatchKind kind, int64 max_mem)
|
| if (DebugDFA)
|
| fprintf(stderr, "\nkind %d\n%s\n", (int)kind_, prog_->DumpUnanchored().c_str());
|
| int nmark = 0;
|
| - start_unanchored_ = 0;
|
| - if (kind_ == Prog::kLongestMatch) {
|
| + if (kind_ == Prog::kLongestMatch)
|
| nmark = prog->size();
|
| - start_unanchored_ = prog->start_unanchored();
|
| - }
|
| nastack_ = 2 * prog->size() + nmark;
|
|
|
| // Account for space needed for DFA, q0, q1, astack.
|
| @@ -458,7 +451,7 @@ DFA::DFA(Prog* prog, Prog::MatchKind kind, int64 max_mem)
|
| (sizeof(int)+sizeof(int)) * 2; // q0, q1
|
| mem_budget_ -= nastack_ * sizeof(int); // astack
|
| if (mem_budget_ < 0) {
|
| - LOG(INFO) << StringPrintf("DFA out of memory: prog size %lld mem %lld",
|
| + LOG(INFO) << StringPrintf("DFA out of memory: prog size %d mem %lld",
|
| prog_->size(), max_mem);
|
| init_failed_ = true;
|
| return;
|
| @@ -473,7 +466,7 @@ DFA::DFA(Prog* prog, Prog::MatchKind kind, int64 max_mem)
|
| int64 one_state = sizeof(State) + (prog_->size()+nmark)*sizeof(int) +
|
| (prog_->bytemap_range()+1)*sizeof(State*);
|
| if (state_budget_ < 20*one_state) {
|
| - LOG(INFO) << StringPrintf("DFA out of memory: prog size %lld mem %lld",
|
| + LOG(INFO) << StringPrintf("DFA out of memory: prog size %d mem %lld",
|
| prog_->size(), max_mem);
|
| init_failed_ = true;
|
| return;
|
| @@ -789,7 +782,7 @@ void DFA::ClearCache() {
|
| it != state_cache_.end(); ++it)
|
| v.push_back(*it);
|
| state_cache_.clear();
|
| - for (int i = 0; i < v.size(); i++)
|
| + for (size_t i = 0; i < v.size(); i++)
|
| delete[] reinterpret_cast<const char*>(v[i]);
|
| }
|
|
|
| @@ -871,8 +864,10 @@ void DFA::AddToQueue(Workq* q, int id, uint flag) {
|
| break;
|
|
|
| case kInstEmptyWidth:
|
| - if ((ip->empty() & flag) == ip->empty())
|
| - stk[nstk++] = ip->out();
|
| + // Continue on if we have all the right flag bits.
|
| + if (ip->empty() & ~flag)
|
| + break;
|
| + stk[nstk++] = ip->out();
|
| break;
|
| }
|
| }
|
| @@ -910,8 +905,7 @@ void DFA::RunWorkqOnEmptyString(Workq* oldq, Workq* newq, uint flag) {
|
| // regular expression program has been reached (the regexp has matched).
|
| void DFA::RunWorkqOnByte(Workq* oldq, Workq* newq,
|
| int c, uint flag, bool* ismatch,
|
| - Prog::MatchKind kind,
|
| - int new_byte_loop) {
|
| + Prog::MatchKind kind) {
|
| if (DEBUG_MODE)
|
| mutex_.AssertHeld();
|
|
|
| @@ -990,9 +984,8 @@ DFA::State* DFA::RunStateOnByte(State* state, int c) {
|
| }
|
|
|
| // If someone else already computed this, return it.
|
| - MaybeReadMemoryBarrier(); // On alpha we need to ensure read ordering
|
| - State* ns = state->next_[ByteMap(c)];
|
| - ANNOTATE_HAPPENS_AFTER(ns);
|
| + State* ns;
|
| + ATOMIC_LOAD_CONSUME(ns, &state->next_[ByteMap(c)]);
|
| if (ns != NULL)
|
| return ns;
|
|
|
| @@ -1022,8 +1015,8 @@ DFA::State* DFA::RunStateOnByte(State* state, int c) {
|
| // The state flag kFlagLastWord says whether the last
|
| // byte processed was a word character. Use that info to
|
| // insert empty-width (non-)word boundaries.
|
| - bool islastword = state->flag_ & kFlagLastWord;
|
| - bool isword = (c != kByteEndText && Prog::IsWordChar(c));
|
| + bool islastword = (state->flag_ & kFlagLastWord) != 0;
|
| + bool isword = (c != kByteEndText && Prog::IsWordChar(static_cast<uint8>(c)));
|
| if (isword == islastword)
|
| beforeflag |= kEmptyNonWordBoundary;
|
| else
|
| @@ -1036,8 +1029,8 @@ DFA::State* DFA::RunStateOnByte(State* state, int c) {
|
| swap(q0_, q1_);
|
| }
|
| bool ismatch = false;
|
| - RunWorkqOnByte(q0_, q1_, c, afterflag, &ismatch, kind_, start_unanchored_);
|
| -
|
| + RunWorkqOnByte(q0_, q1_, c, afterflag, &ismatch, kind_);
|
| +
|
| // Most of the time, we build the state from the output of
|
| // RunWorkqOnByte, so swap q0_ and q1_ here. However, so that
|
| // RE2::Set can tell exactly which match instructions
|
| @@ -1058,18 +1051,11 @@ DFA::State* DFA::RunStateOnByte(State* state, int c) {
|
|
|
| ns = WorkqToCachedState(q0_, flag);
|
|
|
| + // Flush ns before linking to it.
|
| // Write barrier before updating state->next_ so that the
|
| // main search loop can proceed without any locking, for speed.
|
| // (Otherwise it would need one mutex operation per input byte.)
|
| - // The annotations below tell race detectors that:
|
| - // a) the access to next_ should be ignored,
|
| - // b) 'ns' is properly published.
|
| - WriteMemoryBarrier(); // Flush ns before linking to it.
|
| -
|
| - ANNOTATE_IGNORE_WRITES_BEGIN();
|
| - ANNOTATE_HAPPENS_BEFORE(ns);
|
| - state->next_[ByteMap(c)] = ns;
|
| - ANNOTATE_IGNORE_WRITES_END();
|
| + ATOMIC_STORE_RELEASE(&state->next_[ByteMap(c)], ns);
|
| return ns;
|
| }
|
|
|
| @@ -1112,7 +1098,7 @@ class DFA::RWLocker {
|
| Mutex* mu_;
|
| bool writing_;
|
|
|
| - DISALLOW_EVIL_CONSTRUCTORS(RWLocker);
|
| + DISALLOW_COPY_AND_ASSIGN(RWLocker);
|
| };
|
|
|
| DFA::RWLocker::RWLocker(Mutex* mu)
|
| @@ -1212,7 +1198,7 @@ class DFA::StateSaver {
|
| bool is_special_; // whether original state was special
|
| State* special_; // if is_special_, the original state
|
|
|
| - DISALLOW_EVIL_CONSTRUCTORS(StateSaver);
|
| + DISALLOW_COPY_AND_ASSIGN(StateSaver);
|
| };
|
|
|
| DFA::StateSaver::StateSaver(DFA* dfa, State* state) {
|
| @@ -1390,9 +1376,8 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params,
|
| // Okay to use bytemap[] not ByteMap() here, because
|
| // c is known to be an actual byte and not kByteEndText.
|
|
|
| - MaybeReadMemoryBarrier(); // On alpha we need to ensure read ordering
|
| - State* ns = s->next_[bytemap[c]];
|
| - ANNOTATE_HAPPENS_AFTER(ns);
|
| + State* ns;
|
| + ATOMIC_LOAD_CONSUME(ns, &s->next_[bytemap[c]]);
|
| if (ns == NULL) {
|
| ns = RunStateOnByteUnlocked(s, c);
|
| if (ns == NULL) {
|
| @@ -1405,7 +1390,7 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params,
|
| // of 10 bytes per state computation, fail so that RE2 can
|
| // fall back to the NFA.
|
| if (FLAGS_re2_dfa_bail_when_slow && resetp != NULL &&
|
| - (p - resetp) < 10*state_cache_.size()) {
|
| + static_cast<unsigned long>(p - resetp) < 10*state_cache_.size()) {
|
| params->failed = true;
|
| return false;
|
| }
|
| @@ -1479,9 +1464,8 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params,
|
| lastbyte = params->text.begin()[-1] & 0xFF;
|
| }
|
|
|
| - MaybeReadMemoryBarrier(); // On alpha we need to ensure read ordering
|
| - State* ns = s->next_[ByteMap(lastbyte)];
|
| - ANNOTATE_HAPPENS_AFTER(ns);
|
| + State* ns;
|
| + ATOMIC_LOAD_CONSUME(ns, &s->next_[ByteMap(lastbyte)]);
|
| if (ns == NULL) {
|
| ns = RunStateOnByteUnlocked(s, lastbyte);
|
| if (ns == NULL) {
|
| @@ -1669,13 +1653,16 @@ bool DFA::AnalyzeSearch(SearchParams* params) {
|
| }
|
| }
|
|
|
| - if (DebugDFA)
|
| + if (DebugDFA) {
|
| + int fb;
|
| + ATOMIC_LOAD_RELAXED(fb, &info->firstbyte);
|
| fprintf(stderr, "anchored=%d fwd=%d flags=%#x state=%s firstbyte=%d\n",
|
| params->anchored, params->run_forward, flags,
|
| - DumpState(info->start).c_str(), info->firstbyte);
|
| + DumpState(info->start).c_str(), fb);
|
| + }
|
|
|
| params->start = info->start;
|
| - params->firstbyte = ANNOTATE_UNPROTECTED_READ(info->firstbyte);
|
| + ATOMIC_LOAD_ACQUIRE(params->firstbyte, &info->firstbyte);
|
|
|
| return true;
|
| }
|
| @@ -1683,17 +1670,15 @@ bool DFA::AnalyzeSearch(SearchParams* params) {
|
| // Fills in info if needed. Returns true on success, false on failure.
|
| bool DFA::AnalyzeSearchHelper(SearchParams* params, StartInfo* info,
|
| uint flags) {
|
| - // Quick check; okay because of memory barriers below.
|
| - if (ANNOTATE_UNPROTECTED_READ(info->firstbyte) != kFbUnknown) {
|
| - ANNOTATE_HAPPENS_AFTER(&info->firstbyte);
|
| + // Quick check.
|
| + int fb;
|
| + ATOMIC_LOAD_ACQUIRE(fb, &info->firstbyte);
|
| + if (fb != kFbUnknown)
|
| return true;
|
| - }
|
|
|
| MutexLock l(&mutex_);
|
| - if (info->firstbyte != kFbUnknown) {
|
| - ANNOTATE_HAPPENS_AFTER(&info->firstbyte);
|
| + if (info->firstbyte != kFbUnknown)
|
| return true;
|
| - }
|
|
|
| q0_->clear();
|
| AddToQueue(q0_,
|
| @@ -1704,16 +1689,14 @@ bool DFA::AnalyzeSearchHelper(SearchParams* params, StartInfo* info,
|
| return false;
|
|
|
| if (info->start == DeadState) {
|
| - ANNOTATE_HAPPENS_BEFORE(&info->firstbyte);
|
| - WriteMemoryBarrier(); // Synchronize with "quick check" above.
|
| - info->firstbyte = kFbNone;
|
| + // Synchronize with "quick check" above.
|
| + ATOMIC_STORE_RELEASE(&info->firstbyte, kFbNone);
|
| return true;
|
| }
|
|
|
| if (info->start == FullMatchState) {
|
| - ANNOTATE_HAPPENS_BEFORE(&info->firstbyte);
|
| - WriteMemoryBarrier(); // Synchronize with "quick check" above.
|
| - info->firstbyte = kFbNone; // will be ignored
|
| + // Synchronize with "quick check" above.
|
| + ATOMIC_STORE_RELEASE(&info->firstbyte, kFbNone); // will be ignored
|
| return true;
|
| }
|
|
|
| @@ -1724,9 +1707,8 @@ bool DFA::AnalyzeSearchHelper(SearchParams* params, StartInfo* info,
|
| for (int i = 0; i < 256; i++) {
|
| State* s = RunStateOnByte(info->start, i);
|
| if (s == NULL) {
|
| - ANNOTATE_HAPPENS_BEFORE(&info->firstbyte);
|
| - WriteMemoryBarrier(); // Synchronize with "quick check" above.
|
| - info->firstbyte = firstbyte;
|
| + // Synchronize with "quick check" above.
|
| + ATOMIC_STORE_RELEASE(&info->firstbyte, firstbyte);
|
| return false;
|
| }
|
| if (s == info->start)
|
| @@ -1739,9 +1721,8 @@ bool DFA::AnalyzeSearchHelper(SearchParams* params, StartInfo* info,
|
| break;
|
| }
|
| }
|
| - ANNOTATE_HAPPENS_BEFORE(&info->firstbyte);
|
| - WriteMemoryBarrier(); // Synchronize with "quick check" above.
|
| - info->firstbyte = firstbyte;
|
| + // Synchronize with "quick check" above.
|
| + ATOMIC_STORE_RELEASE(&info->firstbyte, firstbyte);
|
| return true;
|
| }
|
|
|
| @@ -1821,19 +1802,16 @@ DFA* Prog::GetDFA(MatchKind kind) {
|
| pdfa = &dfa_longest_;
|
| }
|
|
|
| - // Quick check; okay because of memory barrier below.
|
| - DFA *dfa = ANNOTATE_UNPROTECTED_READ(*pdfa);
|
| - if (dfa != NULL) {
|
| - ANNOTATE_HAPPENS_AFTER(dfa);
|
| + // Quick check.
|
| + DFA *dfa;
|
| + ATOMIC_LOAD_ACQUIRE(dfa, pdfa);
|
| + if (dfa != NULL)
|
| return dfa;
|
| - }
|
|
|
| MutexLock l(&dfa_mutex_);
|
| dfa = *pdfa;
|
| - if (dfa != NULL) {
|
| - ANNOTATE_HAPPENS_AFTER(dfa);
|
| + if (dfa != NULL)
|
| return dfa;
|
| - }
|
|
|
| // For a forward DFA, half the memory goes to each DFA.
|
| // For a reverse DFA, all the memory goes to the
|
| @@ -1850,9 +1828,7 @@ DFA* Prog::GetDFA(MatchKind kind) {
|
| delete_dfa_ = DeleteDFA;
|
|
|
| // Synchronize with "quick check" above.
|
| - ANNOTATE_HAPPENS_BEFORE(dfa);
|
| - WriteMemoryBarrier();
|
| - *pdfa = dfa;
|
| + ATOMIC_STORE_RELEASE(pdfa, dfa);
|
|
|
| return dfa;
|
| }
|
| @@ -1925,9 +1901,9 @@ bool Prog::SearchDFA(const StringPiece& text, const StringPiece& const_context,
|
| // as the beginning.
|
| if (match0) {
|
| if (reversed_)
|
| - *match0 = StringPiece(ep, text.end() - ep);
|
| + match0->set(ep, static_cast<int>(text.end() - ep));
|
| else
|
| - *match0 = StringPiece(text.begin(), ep - text.begin());
|
| + match0->set(text.begin(), static_cast<int>(ep - text.begin()));
|
| }
|
| return true;
|
| }
|
| @@ -1952,7 +1928,7 @@ int DFA::BuildAllStates() {
|
| q.push_back(params.start);
|
|
|
| // Flood to expand every state.
|
| - for (int i = 0; i < q.size(); i++) {
|
| + for (size_t i = 0; i < q.size(); i++) {
|
| State* s = q[i];
|
| for (int c = 0; c < 257; c++) {
|
| State* ns = RunStateOnByteUnlocked(s, c);
|
| @@ -1963,7 +1939,7 @@ int DFA::BuildAllStates() {
|
| }
|
| }
|
|
|
| - return q.size();
|
| + return static_cast<int>(q.size());
|
| }
|
|
|
| // Build out all states in DFA for kind. Returns number of states.
|
| @@ -2035,6 +2011,7 @@ bool DFA::PossibleMatchRange(string* min, string* max, int maxlen) {
|
| // Build minimum prefix.
|
| State* s = params.start;
|
| min->clear();
|
| + MutexLock lock(&mutex_);
|
| for (int i = 0; i < maxlen; i++) {
|
| if (previously_visited_states[s] > kMaxEltRepetitions) {
|
| VLOG(2) << "Hit kMaxEltRepetitions=" << kMaxEltRepetitions
|
| @@ -2044,7 +2021,7 @@ bool DFA::PossibleMatchRange(string* min, string* max, int maxlen) {
|
| previously_visited_states[s]++;
|
|
|
| // Stop if min is a match.
|
| - State* ns = RunStateOnByteUnlocked(s, kByteEndText);
|
| + State* ns = RunStateOnByte(s, kByteEndText);
|
| if (ns == NULL) // DFA out of memory
|
| return false;
|
| if (ns != DeadState && (ns == FullMatchState || ns->IsMatch()))
|
| @@ -2053,13 +2030,13 @@ bool DFA::PossibleMatchRange(string* min, string* max, int maxlen) {
|
| // Try to extend the string with low bytes.
|
| bool extended = false;
|
| for (int j = 0; j < 256; j++) {
|
| - ns = RunStateOnByteUnlocked(s, j);
|
| + ns = RunStateOnByte(s, j);
|
| if (ns == NULL) // DFA out of memory
|
| return false;
|
| if (ns == FullMatchState ||
|
| (ns > SpecialStateMax && ns->ninst_ > 0)) {
|
| extended = true;
|
| - min->append(1, j);
|
| + min->append(1, static_cast<char>(j));
|
| s = ns;
|
| break;
|
| }
|
| @@ -2083,13 +2060,13 @@ bool DFA::PossibleMatchRange(string* min, string* max, int maxlen) {
|
| // Try to extend the string with high bytes.
|
| bool extended = false;
|
| for (int j = 255; j >= 0; j--) {
|
| - State* ns = RunStateOnByteUnlocked(s, j);
|
| + State* ns = RunStateOnByte(s, j);
|
| if (ns == NULL)
|
| return false;
|
| if (ns == FullMatchState ||
|
| (ns > SpecialStateMax && ns->ninst_ > 0)) {
|
| extended = true;
|
| - max->append(1, j);
|
| + max->append(1, static_cast<char>(j));
|
| s = ns;
|
| break;
|
| }
|
| @@ -2122,11 +2099,12 @@ bool Prog::PossibleMatchRange(string* min, string* max, int maxlen) {
|
| MutexLock l(&dfa_mutex_);
|
| // Have to use dfa_longest_ to get all strings for full matches.
|
| // For example, (a|aa) never matches aa in first-match mode.
|
| - if (dfa_longest_ == NULL) {
|
| - dfa_longest_ = new DFA(this, Prog::kLongestMatch, dfa_mem_/2);
|
| + dfa = dfa_longest_;
|
| + if (dfa == NULL) {
|
| + dfa = new DFA(this, Prog::kLongestMatch, dfa_mem_/2);
|
| + ATOMIC_STORE_RELEASE(&dfa_longest_, dfa);
|
| delete_dfa_ = DeleteDFA;
|
| }
|
| - dfa = dfa_longest_;
|
| }
|
| return dfa->PossibleMatchRange(min, max, maxlen);
|
| }
|
|
|