third_party/re2/re2/testing/search_test.cc - Issue 1544433002: Replace RE2 import with a dependency

Side by Side Diff: third_party/re2/re2/testing/search_test.cc

Issue 1544433002: Replace RE2 import with a dependency (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Re-Added LICENSE and OWNERS file Created 4 years, 12 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
	(Empty)
1 // Copyright 2006-2007 The RE2 Authors. All Rights Reserved.

2 // Use of this source code is governed by a BSD-style

3 // license that can be found in the LICENSE file.

4

5 #include <stdlib.h>

6 #include <vector>

7 #include "util/test.h"

8 #include "re2/prog.h"

9 #include "re2/regexp.h"

10 #include "re2/testing/tester.h"

11 #include "re2/testing/exhaustive_tester.h"

12

13 namespace re2 {

14

15 struct RegexpTest {

16 const char* regexp;

17 const char* text;

18 };

19

20 RegexpTest simple_tests[] = {

21 { "a", "a" },

22 { "a", "zyzzyva" },

23 { "a+", "aa" },

24 { "(a+\|b)+", "ab" },

25 { "ab\|cd", "xabcdx" },

26 { "h.*od?", "hello\ngoodbye\n" },

27 { "h.*o", "hello\ngoodbye\n" },

28 { "h.*o", "goodbye\nhello\n" },

29 { "h.*o", "hello world" },

30 { "h.*o", "othello, world" },

31 { "[^\\s\\S]", "aaaaaaa" },

32 { "a", "aaaaaaa" },

33 { "a*", "aaaaaaa" },

34 { "a*", "" },

35 { "a*", NULL },

36 { "ab\|cd", "xabcdx" },

37 { "a", "cab" },

38 { "a*b", "cab" },

39 { "((((((((((((((((((((x))))))))))))))))))))", "x" },

40 { "[abcd]", "xxxabcdxxx" },

41 { "[^x]", "xxxabcdxxx" },

42 { "[abcd]+", "xxxabcdxxx" },

43 { "[^x]+", "xxxabcdxxx" },

44 { "(fo\|foo)", "fo" },

45 { "(foo\|fo)", "foo" },

46

47 { "aa", "aA" },

48 { "a", "Aa" },

49 { "a", "A" },

50 { "ABC", "abc" },

51 { "abc", "XABCY" },

52 { "ABC", "xabcy" },

53

54 // Make sure ^ and $ work.

55 // The pathological cases didn't work

56 // in the original grep code.

57 { "foo\|bar\|[A-Z]", "foo" },

58 { "^(foo\|bar\|[A-Z])", "foo" },

59 { "(foo\|bar\|[A-Z])$", "foo\n" },

60 { "(foo\|bar\|[A-Z])$", "foo" },

61 { "^(foo\|bar\|[A-Z])$", "foo\n" },

62 { "^(foo\|bar\|[A-Z])$", "foo" },

63 { "^(foo\|bar\|[A-Z])$", "bar" },

64 { "^(foo\|bar\|[A-Z])$", "X" },

65 { "^(foo\|bar\|[A-Z])$", "XY" },

66 { "^(fo\|foo)$", "fo" },

67 { "^(fo\|foo)$", "foo" },

68 { "^^(fo\|foo)$", "fo" },

69 { "^^(fo\|foo)$", "foo" },

70 { "^$", "" },

71 { "^$", "x" },

72 { "^^$", "" },

73 { "^$$", "" },

74 { "^^$", "x" },

75 { "^$$", "x" },

76 { "^^$$", "" },

77 { "^^$$", "x" },

78 { "^^^^^^^^$$$$$$$$", "" },

79 { "^", "x" },

80 { "$", "x" },

81

82 // Word boundaries.

83 { "\\bfoo\\b", "nofoo foo that" },

84 { "a\\b", "faoa x" },

85 { "\\bbar", "bar x" },

86 { "\\bbar", "foo\nbar x" },

87 { "bar\\b", "foobar" },

88 { "bar\\b", "foobar\nxxx" },

89 { "(foo\|bar\|[A-Z])\\b", "foo" },

90 { "(foo\|bar\|[A-Z])\\b", "foo\n" },

91 { "\\b", "" },

92 { "\\b", "x" },

93 { "\\b(foo\|bar\|[A-Z])", "foo" },

94 { "\\b(foo\|bar\|[A-Z])\\b", "X" },

95 { "\\b(foo\|bar\|[A-Z])\\b", "XY" },

96 { "\\b(foo\|bar\|[A-Z])\\b", "bar" },

97 { "\\b(foo\|bar\|[A-Z])\\b", "foo" },

98 { "\\b(foo\|bar\|[A-Z])\\b", "foo\n" },

99 { "\\b(foo\|bar\|[A-Z])\\b", "ffoo bbar N x" },

100 { "\\b(fo\|foo)\\b", "fo" },

101 { "\\b(fo\|foo)\\b", "foo" },

102 { "\\b\\b", "" },

103 { "\\b\\b", "x" },

104 { "\\b$", "" },

105 { "\\b$", "x" },

106 { "\\b$", "y x" },

107 { "\\b.$", "x" },

108 { "^\\b(fo\|foo)\\b", "fo" },

109 { "^\\b(fo\|foo)\\b", "foo" },

110 { "^\\b", "" },

111 { "^\\b", "x" },

112 { "^\\b\\b", "" },

113 { "^\\b\\b", "x" },

114 { "^\\b$", "" },

115 { "^\\b$", "x" },

116 { "^\\b.$", "x" },

117 { "^\\b.\\b$", "x" },

118 { "^^^^^^^^\\b$$$$$$$", "" },

119 { "^^^^^^^^\\b.$$$$$$", "x" },

120 { "^^^^^^^^\\b$$$$$$$", "x" },

121

122 // Non-word boundaries.

123 { "\\Bfoo\\B", "n foo xfoox that" },

124 { "a\\B", "faoa x" },

125 { "\\Bbar", "bar x" },

126 { "\\Bbar", "foo\nbar x" },

127 { "bar\\B", "foobar" },

128 { "bar\\B", "foobar\nxxx" },

129 { "(foo\|bar\|[A-Z])\\B", "foox" },

130 { "(foo\|bar\|[A-Z])\\B", "foo\n" },

131 { "\\B", "" },

132 { "\\B", "x" },

133 { "\\B(foo\|bar\|[A-Z])", "foo" },

134 { "\\B(foo\|bar\|[A-Z])\\B", "xXy" },

135 { "\\B(foo\|bar\|[A-Z])\\B", "XY" },

136 { "\\B(foo\|bar\|[A-Z])\\B", "XYZ" },

137 { "\\B(foo\|bar\|[A-Z])\\B", "abara" },

138 { "\\B(foo\|bar\|[A-Z])\\B", "xfoo_" },

139 { "\\B(foo\|bar\|[A-Z])\\B", "xfoo\n" },

140 { "\\B(foo\|bar\|[A-Z])\\B", "foo bar vNx" },

141 { "\\B(fo\|foo)\\B", "xfoo" },

142 { "\\B(foo\|fo)\\B", "xfooo" },

143 { "\\B\\B", "" },

144 { "\\B\\B", "x" },

145 { "\\B$", "" },

146 { "\\B$", "x" },

147 { "\\B$", "y x" },

148 { "\\B.$", "x" },

149 { "^\\B(fo\|foo)\\B", "fo" },

150 { "^\\B(fo\|foo)\\B", "foo" },

151 { "^\\B", "" },

152 { "^\\B", "x" },

153 { "^\\B\\B", "" },

154 { "^\\B\\B", "x" },

155 { "^\\B$", "" },

156 { "^\\B$", "x" },

157 { "^\\B.$", "x" },

158 { "^\\B.\\B$", "x" },

159 { "^^^^^^^^\\B$$$$$$$", "" },

160 { "^^^^^^^^\\B.$$$$$$", "x" },

161 { "^^^^^^^^\\B$$$$$$$", "x" },

162

163 // PCRE uses only ASCII for \b computation.

164 // All non-ASCII are not word characters.

165 { "\\bx\\b", "x" },

166 { "\\bx\\b", "x>" },

167 { "\\bx\\b", "<x" },

168 { "\\bx\\b", "<x>" },

169 { "\\bx\\b", "ax" },

170 { "\\bx\\b", "xb" },

171 { "\\bx\\b", "axb" },

172 { "\\bx\\b", "«x" },

173 { "\\bx\\b", "x»" },

174 { "\\bx\\b", "«x»" },

175 { "\\bx\\b", "axb" },

176 { "\\bx\\b", "áxβ" },

177 { "\\Bx\\B", "axb" },

178 { "\\Bx\\B", "áxβ" },

179

180 // Weird boundary cases.

181 { "^$^$", "" },

182 { "^$^", "" },

183 { "$^$", "" },

184

185 { "^$^$", "x" },

186 { "^$^", "x" },

187 { "$^$", "x" },

188

189 { "^$^$", "x\ny" },

190 { "^$^", "x\ny" },

191 { "$^$", "x\ny" },

192

193 { "^$^$", "x\n\ny" },

194 { "^$^", "x\n\ny" },

195 { "$^$", "x\n\ny" },

196

197 { "^(foo\\$)$", "foo$bar" },

198 { "(foo\\$)", "foo$bar" },

199 { "^...$", "abc" },

200

201 // UTF-8

202 { "^\xe6\x9c\xac$", "\xe6\x9c\xac" },

203 { "^...$", "\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e" },

204 { "^...$", ".\xe6\x9c\xac." },

205

206 { "^\\C\\C\\C$", "\xe6\x9c\xac" },

207 { "^\\C$", "\xe6\x9c\xac" },

208 { "^\\C\\C\\C$", "\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e" },

209

210 // Latin1

211 { "^...$", "\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e" },

212 { "^.........$", "\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e" },

213 { "^...$", ".\xe6\x9c\xac." },

214 { "^.....$", ".\xe6\x9c\xac." },

215

216 // Perl v Posix

217 { "\\B(fo\|foo)\\B", "xfooo" },

218 { "(fo\|foo)", "foo" },

219

220 // Octal escapes.

221 { "\\141", "a" },

222 { "\\060", "0" },

223 { "\\0600", "00" },

224 { "\\608", "08" },

225 { "\\01", "\01" },

226 { "\\018", "\01" "8" },

227

228 // Hexadecimal escapes

229 { "\\x{61}", "a" },

230 { "\\x61", "a" },

231 { "\\x{00000061}", "a" },

232

233 // Unicode scripts.

234 { "\\p{Greek}+", "aαβb" },

235 { "\\P{Greek}+", "aαβb" },

236 { "\\p{^Greek}+", "aαβb" },

237 { "\\P{^Greek}+", "aαβb" },

238

239 // Unicode properties. Nd is decimal number. N is any number.

240 { "[^0-9]+", "abc123" },

241 { "\\p{Nd}+", "abc123²³¼½¾₀₉" },

242 { "\\p{^Nd}+", "abc123²³¼½¾₀₉" },

243 { "\\P{Nd}+", "abc123²³¼½¾₀₉" },

244 { "\\P{^Nd}+", "abc123²³¼½¾₀₉" },

245 { "\\pN+", "abc123²³¼½¾₀₉" },

246 { "\\p{N}+", "abc123²³¼½¾₀₉" },

247 { "\\p{^N}+", "abc123²³¼½¾₀₉" },

248

249 { "\\p{Any}+", "abc123" },

250

251 // Character classes & case folding.

252 { "(?i)[@-A]+", "@AaB" }, // matches @Aa but not B

253 { "(?i)[A-Z]+", "aAzZ" },

254 { "(?i)[^\\\\]+", "Aa\\" }, // \\ is between A-Z and a-z -

255 // splits the ranges in an interesting way.

256

257 // would like to use, but PCRE mishandles in full-match, non-greedy mode

258 // { "(?i)[\\\\]+", "Aa" },

259

260 { "(?i)[acegikmoqsuwy]+", "acegikmoqsuwyACEGIKMOQSUWY" },

261

262 // Character classes & case folding.

263 { "[@-A]+", "@AaB" },

264 { "[A-Z]+", "aAzZ" },

265 { "[^\\\\]+", "Aa\\" },

266 { "[acegikmoqsuwy]+", "acegikmoqsuwyACEGIKMOQSUWY" },

267

268 // Anchoring. (^abc in aabcdef was a former bug)

269 // The tester checks for a match in the text and

270 // subpieces of the text with a byte removed on either side.

271 { "^abc", "abcdef" },

272 { "^abc", "aabcdef" },

273 { "^[ay]*[bx]+c", "abcdef" },

274 { "^[ay]*[bx]+c", "aabcdef" },

275 { "def$", "abcdef" },

276 { "def$", "abcdeff" },

277 { "d[ex][fy]$", "abcdef" },

278 { "d[ex][fy]$", "abcdeff" },

279 { "[dz][ex][fy]$", "abcdef" },

280 { "[dz][ex][fy]$", "abcdeff" },

281 { "(?m)^abc", "abcdef" },

282 { "(?m)^abc", "aabcdef" },

283 { "(?m)^[ay]*[bx]+c", "abcdef" },

284 { "(?m)^[ay]*[bx]+c", "aabcdef" },

285 { "(?m)def$", "abcdef" },

286 { "(?m)def$", "abcdeff" },

287 { "(?m)d[ex][fy]$", "abcdef" },

288 { "(?m)d[ex][fy]$", "abcdeff" },

289 { "(?m)[dz][ex][fy]$", "abcdef" },

290 { "(?m)[dz][ex][fy]$", "abcdeff" },

291 { "^", "a" },

292 { "^^", "a" },

293

294 // Context.

295 // The tester checks for a match in the text and

296 // subpieces of the text with a byte removed on either side.

297 { "a", "a" },

298 { "ab*", "a" },

299 { "a\\C*", "a" },

300

301 // Former bugs.

302 { "a\\C*\|ba\\C", "baba" },

303 };

304

305 TEST(Regexp, SearchTests) {

306 int failures = 0;

307 for (int i = 0; i < arraysize(simple_tests); i++) {

308 const RegexpTest& t = simple_tests[i];

309 if (!TestRegexpOnText(t.regexp, t.text))

310 failures++;

311

312 #ifdef LOGGING

313 // Build a dummy ExhaustiveTest call that will trigger just

314 // this one test, so that we log the test case.

315 vector<string> atom, alpha, ops;

316 atom.push_back(StringPiece(t.regexp).as_string());

317 alpha.push_back(StringPiece(t.text).as_string());

318 ExhaustiveTest(1, 0, atom, ops, 1, alpha, "", "");

319 #endif

320

321 }

322 EXPECT_EQ(failures, 0);

323 }

324

325 } // namespace re2

OLD	NEW

« no previous file with comments | « third_party/re2/re2/testing/required_prefix_test.cc ('k') | third_party/re2/re2/testing/set_test.cc » ('j') | no next file with comments »