| Index: third_party/re2/re2/testing/compile_test.cc
|
| diff --git a/third_party/re2/re2/testing/compile_test.cc b/third_party/re2/re2/testing/compile_test.cc
|
| deleted file mode 100644
|
| index d438b1933f0fbba3ea474abf394e93aa812427ff..0000000000000000000000000000000000000000
|
| --- a/third_party/re2/re2/testing/compile_test.cc
|
| +++ /dev/null
|
| @@ -1,175 +0,0 @@
|
| -// Copyright 2007 The RE2 Authors. All Rights Reserved.
|
| -// Use of this source code is governed by a BSD-style
|
| -// license that can be found in the LICENSE file.
|
| -
|
| -// Test prog.cc, compile.cc
|
| -
|
| -#include <string>
|
| -#include <vector>
|
| -#include "util/test.h"
|
| -#include "re2/regexp.h"
|
| -#include "re2/prog.h"
|
| -
|
| -DEFINE_string(show, "", "regular expression to compile and dump");
|
| -
|
| -namespace re2 {
|
| -
|
| -// Simple input/output tests checking that
|
| -// the regexp compiles to the expected code.
|
| -// These are just to sanity check the basic implementation.
|
| -// The real confidence tests happen by testing the NFA/DFA
|
| -// that run the compiled code.
|
| -
|
| -struct Test {
|
| - const char* regexp;
|
| - const char* code;
|
| -};
|
| -
|
| -static Test tests[] = {
|
| - { "a",
|
| - "1. byte [61-61] -> 2\n"
|
| - "2. match! 0\n" },
|
| - { "ab",
|
| - "1. byte [61-61] -> 2\n"
|
| - "2. byte [62-62] -> 3\n"
|
| - "3. match! 0\n" },
|
| - { "a|c",
|
| - "3. alt -> 1 | 2\n"
|
| - "1. byte [61-61] -> 4\n"
|
| - "2. byte [63-63] -> 4\n"
|
| - "4. match! 0\n" },
|
| - { "a|b",
|
| - "1. byte [61-62] -> 2\n"
|
| - "2. match! 0\n" },
|
| - { "[ab]",
|
| - "1. byte [61-62] -> 2\n"
|
| - "2. match! 0\n" },
|
| - { "a+",
|
| - "1. byte [61-61] -> 2\n"
|
| - "2. alt -> 1 | 3\n"
|
| - "3. match! 0\n" },
|
| - { "a+?",
|
| - "1. byte [61-61] -> 2\n"
|
| - "2. alt -> 3 | 1\n"
|
| - "3. match! 0\n" },
|
| - { "a*",
|
| - "2. alt -> 1 | 3\n"
|
| - "1. byte [61-61] -> 2\n"
|
| - "3. match! 0\n" },
|
| - { "a*?",
|
| - "2. alt -> 3 | 1\n"
|
| - "3. match! 0\n"
|
| - "1. byte [61-61] -> 2\n" },
|
| - { "a?",
|
| - "2. alt -> 1 | 3\n"
|
| - "1. byte [61-61] -> 3\n"
|
| - "3. match! 0\n" },
|
| - { "a??",
|
| - "2. alt -> 3 | 1\n"
|
| - "3. match! 0\n"
|
| - "1. byte [61-61] -> 3\n" },
|
| - { "a{4}",
|
| - "1. byte [61-61] -> 2\n"
|
| - "2. byte [61-61] -> 3\n"
|
| - "3. byte [61-61] -> 4\n"
|
| - "4. byte [61-61] -> 5\n"
|
| - "5. match! 0\n" },
|
| - { "(a)",
|
| - "2. capture 2 -> 1\n"
|
| - "1. byte [61-61] -> 3\n"
|
| - "3. capture 3 -> 4\n"
|
| - "4. match! 0\n" },
|
| - { "(?:a)",
|
| - "1. byte [61-61] -> 2\n"
|
| - "2. match! 0\n" },
|
| - { "",
|
| - "2. match! 0\n" },
|
| - { ".",
|
| - "3. alt -> 1 | 2\n"
|
| - "1. byte [00-09] -> 4\n"
|
| - "2. byte [0b-ff] -> 4\n"
|
| - "4. match! 0\n" },
|
| - { "[^ab]",
|
| - "5. alt -> 3 | 4\n"
|
| - "3. alt -> 1 | 2\n"
|
| - "4. byte [63-ff] -> 6\n"
|
| - "1. byte [00-09] -> 6\n"
|
| - "2. byte [0b-60] -> 6\n"
|
| - "6. match! 0\n" },
|
| - { "[Aa]",
|
| - "1. byte/i [61-61] -> 2\n"
|
| - "2. match! 0\n" },
|
| - // Issue 20992936
|
| - { "[[-`]",
|
| - "1. byte [5b-60] -> 2\n"
|
| - "2. match! 0\n" },
|
| -};
|
| -
|
| -TEST(TestRegexpCompileToProg, Simple) {
|
| - int failed = 0;
|
| - for (int i = 0; i < arraysize(tests); i++) {
|
| - const re2::Test& t = tests[i];
|
| - Regexp* re = Regexp::Parse(t.regexp, Regexp::PerlX|Regexp::Latin1, NULL);
|
| - if (re == NULL) {
|
| - LOG(ERROR) << "Cannot parse: " << t.regexp;
|
| - failed++;
|
| - continue;
|
| - }
|
| - Prog* prog = re->CompileToProg(0);
|
| - if (prog == NULL) {
|
| - LOG(ERROR) << "Cannot compile: " << t.regexp;
|
| - re->Decref();
|
| - failed++;
|
| - continue;
|
| - }
|
| - CHECK(re->CompileToProg(1) == NULL);
|
| - string s = prog->Dump();
|
| - if (s != t.code) {
|
| - LOG(ERROR) << "Incorrect compiled code for: " << t.regexp;
|
| - LOG(ERROR) << "Want:\n" << t.code;
|
| - LOG(ERROR) << "Got:\n" << s;
|
| - failed++;
|
| - }
|
| - delete prog;
|
| - re->Decref();
|
| - }
|
| - EXPECT_EQ(failed, 0);
|
| -}
|
| -
|
| -// The distinct byte ranges involved in the UTF-8 dot ([^\n]).
|
| -// Once, erroneously split between 0x3f and 0x40 because it is
|
| -// a 6-bit boundary.
|
| -static struct UTF8ByteRange {
|
| - int lo;
|
| - int hi;
|
| -} utf8ranges[] = {
|
| - { 0x00, 0x09 },
|
| - { 0x0A, 0x0A },
|
| - { 0x10, 0x7F },
|
| - { 0x80, 0x8F },
|
| - { 0x90, 0x9F },
|
| - { 0xA0, 0xBF },
|
| - { 0xC0, 0xC1 },
|
| - { 0xC2, 0xDF },
|
| - { 0xE0, 0xE0 },
|
| - { 0xE1, 0xEF },
|
| - { 0xF0, 0xF0 },
|
| - { 0xF1, 0xF3 },
|
| - { 0xF4, 0xF4 },
|
| - { 0xF5, 0xFF },
|
| -};
|
| -
|
| -TEST(TestCompile, ByteRanges) {
|
| - Regexp* re = Regexp::Parse(".", Regexp::PerlX, NULL);
|
| - EXPECT_TRUE(re != NULL);
|
| - Prog* prog = re->CompileToProg(0);
|
| - EXPECT_TRUE(prog != NULL);
|
| - EXPECT_EQ(prog->bytemap_range(), arraysize(utf8ranges));
|
| - for (int i = 0; i < arraysize(utf8ranges); i++)
|
| - for (int j = utf8ranges[i].lo; j <= utf8ranges[i].hi; j++)
|
| - EXPECT_EQ(prog->bytemap()[j], i) << " byte " << j;
|
| - delete prog;
|
| - re->Decref();
|
| -}
|
| -
|
| -} // namespace re2
|
|
|