| Index: third_party/re2/re2/testing/regexp_generator.cc
|
| diff --git a/third_party/re2/re2/testing/regexp_generator.cc b/third_party/re2/re2/testing/regexp_generator.cc
|
| deleted file mode 100644
|
| index fd085dbfc13b9447d4d43e56f390fd737b550648..0000000000000000000000000000000000000000
|
| --- a/third_party/re2/re2/testing/regexp_generator.cc
|
| +++ /dev/null
|
| @@ -1,264 +0,0 @@
|
| -// Copyright 2008 The RE2 Authors. All Rights Reserved.
|
| -// Use of this source code is governed by a BSD-style
|
| -// license that can be found in the LICENSE file.
|
| -
|
| -// Regular expression generator: generates all possible
|
| -// regular expressions within parameters (see regexp_generator.h for details).
|
| -
|
| -// The regexp generator first generates a sequence of commands in a simple
|
| -// postfix language. Each command in the language is a string,
|
| -// like "a" or "%s*" or "%s|%s".
|
| -//
|
| -// To evaluate a command, enough arguments are popped from the value stack to
|
| -// plug into the %s slots. Then the result is pushed onto the stack.
|
| -// For example, the command sequence
|
| -// a b %s%s c
|
| -// results in the stack
|
| -// ab c
|
| -//
|
| -// GeneratePostfix generates all possible command sequences.
|
| -// Then RunPostfix turns each sequence into a regular expression
|
| -// and passes the regexp to HandleRegexp.
|
| -
|
| -#include <string.h>
|
| -#include <string>
|
| -#include <stack>
|
| -#include <vector>
|
| -#include "util/test.h"
|
| -#include "re2/testing/regexp_generator.h"
|
| -
|
| -namespace re2 {
|
| -
|
| -// Returns a vector of the egrep regexp operators.
|
| -const vector<string>& RegexpGenerator::EgrepOps() {
|
| - static const char *ops[] = {
|
| - "%s%s",
|
| - "%s|%s",
|
| - "%s*",
|
| - "%s+",
|
| - "%s?",
|
| - "%s\\C*",
|
| - };
|
| - static vector<string> v(ops, ops + arraysize(ops));
|
| - return v;
|
| -}
|
| -
|
| -RegexpGenerator::RegexpGenerator(int maxatoms, int maxops,
|
| - const vector<string>& atoms,
|
| - const vector<string>& ops)
|
| - : maxatoms_(maxatoms), maxops_(maxops), atoms_(atoms), ops_(ops) {
|
| - // Degenerate case.
|
| - if (atoms_.size() == 0)
|
| - maxatoms_ = 0;
|
| - if (ops_.size() == 0)
|
| - maxops_ = 0;
|
| -}
|
| -
|
| -// Generates all possible regular expressions (within the parameters),
|
| -// calling HandleRegexp for each one.
|
| -void RegexpGenerator::Generate() {
|
| - vector<string> postfix;
|
| - GeneratePostfix(&postfix, 0, 0, 0);
|
| -}
|
| -
|
| -// Generates random regular expressions, calling HandleRegexp for each one.
|
| -void RegexpGenerator::GenerateRandom(int32 seed, int n) {
|
| - ACMRandom acm(seed);
|
| - acm_ = &acm;
|
| -
|
| - for (int i = 0; i < n; i++) {
|
| - vector<string> postfix;
|
| - GenerateRandomPostfix(&postfix, 0, 0, 0);
|
| - }
|
| -
|
| - acm_ = NULL;
|
| -}
|
| -
|
| -// Counts and returns the number of occurrences of "%s" in s.
|
| -static int CountArgs(const string& s) {
|
| - const char *p = s.c_str();
|
| - int n = 0;
|
| - while ((p = strstr(p, "%s")) != NULL) {
|
| - p += 2;
|
| - n++;
|
| - }
|
| - return n;
|
| -}
|
| -
|
| -// Generates all possible postfix command sequences.
|
| -// Each sequence is handed off to RunPostfix to generate a regular expression.
|
| -// The arguments are:
|
| -// post: the current postfix sequence
|
| -// nstk: the number of elements that would be on the stack after executing
|
| -// the sequence
|
| -// ops: the number of operators used in the sequence
|
| -// atoms: the number of atoms used in the sequence
|
| -// For example, if post were ["a", "b", "%s%s", "c"],
|
| -// then nstk = 2, ops = 1, atoms = 3.
|
| -//
|
| -// The initial call should be GeneratePostfix([empty vector], 0, 0, 0).
|
| -//
|
| -void RegexpGenerator::GeneratePostfix(vector<string>* post, int nstk,
|
| - int ops, int atoms) {
|
| - if (nstk == 1)
|
| - RunPostfix(*post);
|
| -
|
| - // Early out: if used too many operators or can't
|
| - // get back down to a single expression on the stack
|
| - // using binary operators, give up.
|
| - if (ops + nstk - 1 > maxops_)
|
| - return;
|
| -
|
| - // Add atoms if there is room.
|
| - if (atoms < maxatoms_) {
|
| - for (size_t i = 0; i < atoms_.size(); i++) {
|
| - post->push_back(atoms_[i]);
|
| - GeneratePostfix(post, nstk + 1, ops, atoms + 1);
|
| - post->pop_back();
|
| - }
|
| - }
|
| -
|
| - // Add operators if there are enough arguments.
|
| - if (ops < maxops_) {
|
| - for (size_t i = 0; i < ops_.size(); i++) {
|
| - const string& fmt = ops_[i];
|
| - int nargs = CountArgs(fmt);
|
| - if (nargs <= nstk) {
|
| - post->push_back(fmt);
|
| - GeneratePostfix(post, nstk - nargs + 1, ops + 1, atoms);
|
| - post->pop_back();
|
| - }
|
| - }
|
| - }
|
| -}
|
| -
|
| -// Generates a random postfix command sequence.
|
| -// Stops and returns true once a single sequence has been generated.
|
| -bool RegexpGenerator::GenerateRandomPostfix(vector<string>* post, int nstk,
|
| - int ops, int atoms) {
|
| - for (;;) {
|
| - // Stop if we get to a single element, but only sometimes.
|
| - if (nstk == 1 && acm_->Uniform(maxatoms_ + 1 - atoms) == 0) {
|
| - RunPostfix(*post);
|
| - return true;
|
| - }
|
| -
|
| - // Early out: if used too many operators or can't
|
| - // get back down to a single expression on the stack
|
| - // using binary operators, give up.
|
| - if (ops + nstk - 1 > maxops_)
|
| - return false;
|
| -
|
| - // Add operators if there are enough arguments.
|
| - if (ops < maxops_ && acm_->Uniform(2) == 0) {
|
| - const string& fmt = ops_[acm_->Uniform(static_cast<int32>(ops_.size()))];
|
| - int nargs = CountArgs(fmt);
|
| - if (nargs <= nstk) {
|
| - post->push_back(fmt);
|
| - bool ret = GenerateRandomPostfix(post, nstk - nargs + 1,
|
| - ops + 1, atoms);
|
| - post->pop_back();
|
| - if (ret)
|
| - return true;
|
| - }
|
| - }
|
| -
|
| - // Add atoms if there is room.
|
| - if (atoms < maxatoms_ && acm_->Uniform(2) == 0) {
|
| - post->push_back(atoms_[acm_->Uniform(static_cast<int32>(atoms_.size()))]);
|
| - bool ret = GenerateRandomPostfix(post, nstk + 1, ops, atoms + 1);
|
| - post->pop_back();
|
| - if (ret)
|
| - return true;
|
| - }
|
| - }
|
| -}
|
| -
|
| -// Interprets the postfix command sequence to create a regular expression
|
| -// passed to HandleRegexp. The results of operators like %s|%s are wrapped
|
| -// in (?: ) to avoid needing to maintain a precedence table.
|
| -void RegexpGenerator::RunPostfix(const vector<string>& post) {
|
| - stack<string> regexps;
|
| - for (size_t i = 0; i < post.size(); i++) {
|
| - switch (CountArgs(post[i])) {
|
| - default:
|
| - LOG(FATAL) << "Bad operator: " << post[i];
|
| - case 0:
|
| - regexps.push(post[i]);
|
| - break;
|
| - case 1: {
|
| - string a = regexps.top();
|
| - regexps.pop();
|
| - regexps.push("(?:" + StringPrintf(post[i].c_str(), a.c_str()) + ")");
|
| - break;
|
| - }
|
| - case 2: {
|
| - string b = regexps.top();
|
| - regexps.pop();
|
| - string a = regexps.top();
|
| - regexps.pop();
|
| - regexps.push("(?:" +
|
| - StringPrintf(post[i].c_str(), a.c_str(), b.c_str()) +
|
| - ")");
|
| - break;
|
| - }
|
| - }
|
| - }
|
| -
|
| - if (regexps.size() != 1) {
|
| - // Internal error - should never happen.
|
| - printf("Bad regexp program:\n");
|
| - for (size_t i = 0; i < post.size(); i++) {
|
| - printf(" %s\n", CEscape(post[i]).c_str());
|
| - }
|
| - printf("Stack after running program:\n");
|
| - while (!regexps.empty()) {
|
| - printf(" %s\n", CEscape(regexps.top()).c_str());
|
| - regexps.pop();
|
| - }
|
| - LOG(FATAL) << "Bad regexp program.";
|
| - }
|
| -
|
| - HandleRegexp(regexps.top());
|
| - HandleRegexp("^(?:" + regexps.top() + ")$");
|
| - HandleRegexp("^(?:" + regexps.top() + ")");
|
| - HandleRegexp("(?:" + regexps.top() + ")$");
|
| -}
|
| -
|
| -// Split s into an vector of strings, one for each UTF-8 character.
|
| -vector<string> Explode(const StringPiece& s) {
|
| - vector<string> v;
|
| -
|
| - for (const char *q = s.begin(); q < s.end(); ) {
|
| - const char* p = q;
|
| - Rune r;
|
| - q += chartorune(&r, q);
|
| - v.push_back(string(p, q - p));
|
| - }
|
| -
|
| - return v;
|
| -}
|
| -
|
| -// Split string everywhere a substring is found, returning
|
| -// vector of pieces.
|
| -vector<string> Split(const StringPiece& sep, const StringPiece& s) {
|
| - vector<string> v;
|
| -
|
| - if (sep.size() == 0)
|
| - return Explode(s);
|
| -
|
| - const char *p = s.begin();
|
| - for (const char *q = s.begin(); q + sep.size() <= s.end(); q++) {
|
| - if (StringPiece(q, sep.size()) == sep) {
|
| - v.push_back(string(p, q - p));
|
| - p = q + sep.size();
|
| - q = p - 1; // -1 for ++ in loop
|
| - continue;
|
| - }
|
| - }
|
| - if (p < s.end())
|
| - v.push_back(string(p, s.end() - p));
|
| - return v;
|
| -}
|
| -
|
| -} // namespace re2
|
|
|