appengine/monorail/search/query2ast.py - Issue 1941853002: [Monorail] Deflake the unit tests for parsing date queries.

Side by Side Diff: appengine/monorail/search/query2ast.py

Issue 1941853002: [Monorail] Deflake the unit tests for parsing date queries. (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master

Patch Set: Created 4 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 # Copyright 2016 The Chromium Authors. All rights reserved.	1 # Copyright 2016 The Chromium Authors. All rights reserved.

2 # Use of this source code is govered by a BSD-style	2 # Use of this source code is govered by a BSD-style

3 # license that can be found in the LICENSE file or at	3 # license that can be found in the LICENSE file or at

4 # https://developers.google.com/open-source/licenses/bsd	4 # https://developers.google.com/open-source/licenses/bsd

5	5

6 """A set of functions that integrate the GAE search index with Monorail."""	6 """A set of functions that integrate the GAE search index with Monorail."""

7	7

8 import collections	8 import collections

9 import datetime	9 import datetime

10 import logging	10 import logging

(...skipping 120 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
131 '-after': '>',	131 '-after': '>',

132 '-before': '<',	132 '-before': '<',

133 }	133 }

134	134

135 BUILTIN_ISSUE_FIELDS = {	135 BUILTIN_ISSUE_FIELDS = {

136 f_name: tracker_pb2.FieldDef(field_name=f_name, field_type=f_type)	136 f_name: tracker_pb2.FieldDef(field_name=f_name, field_type=f_type)

137 for f_name, f_type in _ISSUE_FIELDS_LIST}	137 for f_name, f_type in _ISSUE_FIELDS_LIST}

138	138

139	139

140 def ParseUserQuery(	140 def ParseUserQuery(

141 query, scope, builtin_fields, harmonized_config, warnings=None):	141 query, scope, builtin_fields, harmonized_config, warnings=None,

	142 now=None):

142 """Parse a user query and return a set of structure terms.	143 """Parse a user query and return a set of structure terms.

143	144

144 Args:	145 Args:

145 query: string with user's query. E.g., 'Priority=High'.	146 query: string with user's query. E.g., 'Priority=High'.

146 scope: string search terms that define the scope in which the	147 scope: string search terms that define the scope in which the

147 query should be executed. They are expressed in the same	148 query should be executed. They are expressed in the same

148 user query language. E.g., adding the canned query.	149 user query language. E.g., adding the canned query.

149 builtin_fields: dict {field_name: FieldDef(field_name, type)}	150 builtin_fields: dict {field_name: FieldDef(field_name, type)}

150 mapping field names to FieldDef objects for built-in fields.	151 mapping field names to FieldDef objects for built-in fields.

151 harmonized_config: config for all the projects being searched.	152 harmonized_config: config for all the projects being searched.

152 @@@ custom field name is not unique in cross project search.	153 @@@ custom field name is not unique in cross project search.

153 - custom_fields = {field_name: [fd, ...]}	154 - custom_fields = {field_name: [fd, ...]}

154 - query build needs to OR each possible interpretation	155 - query build needs to OR each possible interpretation

155 - could be label in one project and field in another project.	156 - could be label in one project and field in another project.

156 @@@ what about searching across all projects?	157 @@@ what about searching across all projects?

157 warnings: optional list to accumulate warning messages.	158 warnings: optional list to accumulate warning messages.

	159 now: optional timestamp for tests, otherwise time.time() is used.

158	160

159 Returns:	161 Returns:

160 A QueryAST with conjunctions (usually just one), where each has a list of	162 A QueryAST with conjunctions (usually just one), where each has a list of

161 Condition PBs with op, fields, str_values and int_values. E.g., the query	163 Condition PBs with op, fields, str_values and int_values. E.g., the query

162 [priority=high leak OR stars>100] over open issues would return	164 [priority=high leak OR stars>100] over open issues would return

163 QueryAST(	165 QueryAST(

164 Conjunction(Condition(EQ, [open_fd], [], [1]),	166 Conjunction(Condition(EQ, [open_fd], [], [1]),

165 Condition(EQ, [label_fd], ['priority-high'], []),	167 Condition(EQ, [label_fd], ['priority-high'], []),

166 Condition(TEXT_HAS, any_field_fd, ['leak'], [])),	168 Condition(TEXT_HAS, any_field_fd, ['leak'], [])),

167 Conjunction(Condition(EQ, [open_fd], [], [1]),	169 Conjunction(Condition(EQ, [open_fd], [], [1]),

(...skipping 19 matching lines...) Expand all Loading...
187 # Make a dictionary of all fields: built-in + custom in each project.	189 # Make a dictionary of all fields: built-in + custom in each project.

188 combined_fields = collections.defaultdict(	190 combined_fields = collections.defaultdict(

189 list, {field_name: [field_def]	191 list, {field_name: [field_def]

190 for field_name, field_def in builtin_fields.iteritems()})	192 for field_name, field_def in builtin_fields.iteritems()})

191 for fd in harmonized_config.field_defs:	193 for fd in harmonized_config.field_defs:

192 if fd.field_type != tracker_pb2.FieldTypes.ENUM_TYPE:	194 if fd.field_type != tracker_pb2.FieldTypes.ENUM_TYPE:

193 # Only do non-enum fields because enums are stored as labels	195 # Only do non-enum fields because enums are stored as labels

194 combined_fields[fd.field_name.lower()].append(fd)	196 combined_fields[fd.field_name.lower()].append(fd)

195	197

196 conjunctions = [	198 conjunctions = [

197 _ParseConjunction(sq, scope, combined_fields, warnings)	199 _ParseConjunction(sq, scope, combined_fields, warnings, now=now)

198 for sq in subqueries]	200 for sq in subqueries]

199 logging.info('search warnings: %r', warnings)	201 logging.info('search warnings: %r', warnings)

200 return ast_pb2.QueryAST(conjunctions=conjunctions)	202 return ast_pb2.QueryAST(conjunctions=conjunctions)

201	203

202	204

203 def _HasParens(s):	205 def _HasParens(s):

204 """Return True if there are parentheses in the given string."""	206 """Return True if there are parentheses in the given string."""

205 # Monorail cannot handle parenthesized expressions, so we tell the	207 # Monorail cannot handle parenthesized expressions, so we tell the

206 # user that immediately. Even inside a quoted string, the GAE search	208 # user that immediately. Even inside a quoted string, the GAE search

207 # engine will not handle parens in TEXT-type fields.	209 # engine will not handle parens in TEXT-type fields.

208 return '(' in s or ')' in s	210 return '(' in s or ')' in s

209	211

210	212

211 def _ParseConjunction(subquery, scope, fields, warnings):	213 def _ParseConjunction(subquery, scope, fields, warnings, now=None):

212 """Parse part of a user query into a Conjunction PB."""	214 """Parse part of a user query into a Conjunction PB."""

213 logging.info('Parsing sub query: %r in scope %r', subquery, scope)	215 logging.info('Parsing sub query: %r in scope %r', subquery, scope)

214 scoped_query = ('%s %s' % (scope, subquery)).lower()	216 scoped_query = ('%s %s' % (scope, subquery)).lower()

215 cond_strs = _ExtractConds(scoped_query)	217 cond_strs = _ExtractConds(scoped_query)

216 conds = [_ParseCond(cond_str, fields, warnings) for cond_str in cond_strs]	218 conds = [_ParseCond(cond_str, fields, warnings, now=now)

	219 for cond_str in cond_strs]

217 conds = [cond for cond in conds if cond]	220 conds = [cond for cond in conds if cond]

218 return ast_pb2.Conjunction(conds=conds)	221 return ast_pb2.Conjunction(conds=conds)

219	222

220	223

221 def _ParseCond(cond_str, fields, warnings):	224 def _ParseCond(cond_str, fields, warnings, now=None):

222 """Parse one user query condition string into a Condition PB."""	225 """Parse one user query condition string into a Condition PB."""

223 op_match = OP_RE.match(cond_str)	226 op_match = OP_RE.match(cond_str)

224 # Do not treat as key:value search terms if any of the special prefixes match.	227 # Do not treat as key:value search terms if any of the special prefixes match.

225 special_prefixes_match = any(	228 special_prefixes_match = any(

226 cond_str.startswith(p) for p in fulltext_helpers.NON_OP_PREFIXES)	229 cond_str.startswith(p) for p in fulltext_helpers.NON_OP_PREFIXES)

227 if op_match and not special_prefixes_match:	230 if op_match and not special_prefixes_match:

228 prefix = op_match.group('prefix')	231 prefix = op_match.group('prefix')

229 op = op_match.group('op')	232 op = op_match.group('op')

230 val = op_match.group('value')	233 val = op_match.group('value')

231 # Special case handling to continue to support old date query terms from	234 # Special case handling to continue to support old date query terms from

232 # codesite. See monorail:151 for more details.	235 # codesite. See monorail:151 for more details.

233 if prefix.startswith(_DATE_FIELDS):	236 if prefix.startswith(_DATE_FIELDS):

234 for date_suffix in _DATE_FIELD_SUFFIX_TO_OP:	237 for date_suffix in _DATE_FIELD_SUFFIX_TO_OP:

235 if prefix.endswith(date_suffix):	238 if prefix.endswith(date_suffix):

236 prefix = prefix.rstrip(date_suffix)	239 prefix = prefix.rstrip(date_suffix)

237 op = _DATE_FIELD_SUFFIX_TO_OP[date_suffix]	240 op = _DATE_FIELD_SUFFIX_TO_OP[date_suffix]

238 return _ParseStructuredTerm(prefix, op, val, fields)	241 return _ParseStructuredTerm(prefix, op, val, fields, now=now)

239	242

240 # Treat the cond as a full-text search term, which might be negated.	243 # Treat the cond as a full-text search term, which might be negated.

241 if cond_str.startswith('-'):	244 if cond_str.startswith('-'):

242 op = NOT_TEXT_HAS	245 op = NOT_TEXT_HAS

243 cond_str = cond_str[1:]	246 cond_str = cond_str[1:]

244 else:	247 else:

245 op = TEXT_HAS	248 op = TEXT_HAS

246	249

247 # Construct a full-text Query object as a dry-run to validate that	250 # Construct a full-text Query object as a dry-run to validate that

248 # the syntax is acceptable.	251 # the syntax is acceptable.

249 try:	252 try:

250 _fts_query = search.Query(cond_str)	253 _fts_query = search.Query(cond_str)

251 except search.QueryError:	254 except search.QueryError:

252 warnings.append('Ignoring full-text term: %s' % cond_str)	255 warnings.append('Ignoring full-text term: %s' % cond_str)

253 return None	256 return None

254	257

255 # Flag a potential user misunderstanding.	258 # Flag a potential user misunderstanding.

256 if cond_str.lower() in ('and', 'or', 'not'):	259 if cond_str.lower() in ('and', 'or', 'not'):

257 warnings.append(	260 warnings.append(

258 'The only supported boolean operator is OR (all capitals).')	261 'The only supported boolean operator is OR (all capitals).')

259	262

260 return ast_pb2.MakeCond(	263 return ast_pb2.MakeCond(

261 op, [BUILTIN_ISSUE_FIELDS[ast_pb2.ANY_FIELD]], [cond_str], [])	264 op, [BUILTIN_ISSUE_FIELDS[ast_pb2.ANY_FIELD]], [cond_str], [])

262	265

263	266

264 def _ParseStructuredTerm(prefix, op_str, value, fields):	267 def _ParseStructuredTerm(prefix, op_str, value, fields, now=None):

265 """Parse one user structured query term into an internal representation.	268 """Parse one user structured query term into an internal representation.

266	269

267 Args:	270 Args:

268 prefix: The query operator, usually a field name. E.g., summary. It can	271 prefix: The query operator, usually a field name. E.g., summary. It can

269 also be special operators like "is" to test boolean fields.	272 also be special operators like "is" to test boolean fields.

270 op_str: the comparison operator. Usually ":" or "=", but can be any OPS.	273 op_str: the comparison operator. Usually ":" or "=", but can be any OPS.

271 value: the value to compare against, e.g., term to find in that field.	274 value: the value to compare against, e.g., term to find in that field.

272 fields: dict {name_lower: [FieldDef, ...]} for built-in and custom fields.	275 fields: dict {name_lower: [FieldDef, ...]} for built-in and custom fields.

	276 now: optional timestamp for tests, otherwise time.time() is used.

273	277

274 Returns:	278 Returns:

275 A Condition PB.	279 A Condition PB.

276 """	280 """

277 unquoted_value = value.strip('"')	281 unquoted_value = value.strip('"')

278 # Quick-OR is a convenient way to write one condition that matches any one of	282 # Quick-OR is a convenient way to write one condition that matches any one of

279 # multiple values, like set membership. E.g., [Priority=High,Critical].	283 # multiple values, like set membership. E.g., [Priority=High,Critical].

280 quick_or_vals = [v.strip() for v in unquoted_value.split(',')]	284 quick_or_vals = [v.strip() for v in unquoted_value.split(',')]

281	285

282 if ((prefix == 'is' or prefix == '-is') and	286 if ((prefix == 'is' or prefix == '-is') and

(...skipping 18 matching lines...) Expand all Loading...
301 return ast_pb2.MakeCond(op, fields[unquoted_value], [], [])	305 return ast_pb2.MakeCond(op, fields[unquoted_value], [], [])

302 else: # Look for any label with that prefix.	306 else: # Look for any label with that prefix.

303 return ast_pb2.MakeCond(op, fields['label'], [unquoted_value], [])	307 return ast_pb2.MakeCond(op, fields['label'], [unquoted_value], [])

304	308

305 if prefix in fields: # search built-in and custom fields. E.g., summary.	309 if prefix in fields: # search built-in and custom fields. E.g., summary.

306 # Note: if first matching field is date-type, we assume they all are.	310 # Note: if first matching field is date-type, we assume they all are.

307 # TODO(jrobbins): better handling for rare case where multiple projects	311 # TODO(jrobbins): better handling for rare case where multiple projects

308 # define the same custom field name, and one is a date and another is not.	312 # define the same custom field name, and one is a date and another is not.

309 first_field = fields[prefix][0]	313 first_field = fields[prefix][0]

310 if first_field.field_type == DATE:	314 if first_field.field_type == DATE:

311 date_value = _ParseDateValue(unquoted_value)	315 date_value = _ParseDateValue(unquoted_value, now=now)

312 return ast_pb2.MakeCond(op, fields[prefix], [], [date_value])	316 return ast_pb2.MakeCond(op, fields[prefix], [], [date_value])

313 else:	317 else:

314 quick_or_ints = []	318 quick_or_ints = []

315 for qov in quick_or_vals:	319 for qov in quick_or_vals:

316 try:	320 try:

317 quick_or_ints.append(int(qov))	321 quick_or_ints.append(int(qov))

318 except ValueError:	322 except ValueError:

319 pass	323 pass

320 return ast_pb2.MakeCond(op, fields[prefix], quick_or_vals, quick_or_ints)	324 return ast_pb2.MakeCond(op, fields[prefix], quick_or_vals, quick_or_ints)

321	325

(...skipping 44 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
366 # as fulltext search. It is probably a tiny bit of source code.	370 # as fulltext search. It is probably a tiny bit of source code.

367 terms.append('"%s"' % word_label)	371 terms.append('"%s"' % word_label)

368	372

369 # Case 3: Simple words.	373 # Case 3: Simple words.

370 elif word:	374 elif word:

371 terms.append(word)	375 terms.append(word)

372	376

373 return terms	377 return terms

374	378

375	379

376 def _ParseDateValue(val):	380 def _ParseDateValue(val, now=None):

377 """Convert the user-entered date into timestamp."""	381 """Convert the user-entered date into timestamp."""

378 # Support timestamp value such as opened>1437671476	382 # Support timestamp value such as opened>1437671476

379 try:	383 try:

380 return int(val)	384 return int(val)

381 except ValueError:	385 except ValueError:

382 pass	386 pass

383	387

384 # TODO(jrobbins): future: take timezones into account.	388 # TODO(jrobbins): future: take timezones into account.

385 # TODO(jrobbins): for now, explain to users that "today" is	389 # TODO(jrobbins): for now, explain to users that "today" is

386 # actually now: the current time, not 12:01am in their timezone.	390 # actually now: the current time, not 12:01am in their timezone.

387 # In fact, it is not very useful because everything in the system	391 # In fact, it is not very useful because everything in the system

388 # happened before the current time.	392 # happened before the current time.

389 if val == 'today':	393 if val == 'today':

390 return _CalculatePastDate(0)	394 return _CalculatePastDate(0, now=now)

391 elif val.startswith('today-'):	395 elif val.startswith('today-'):

392 try:	396 try:

393 days_ago = int(val.split('-')[1])	397 days_ago = int(val.split('-')[1])

394 except ValueError:	398 except ValueError:

395 raise InvalidQueryError('Could not parse date: ' + val)	399 raise InvalidQueryError('Could not parse date: ' + val)

396 return _CalculatePastDate(days_ago)	400 return _CalculatePastDate(days_ago, now=now)

397	401

398 try:	402 try:

399 if '/' in val:	403 if '/' in val:

400 year, month, day = [int(x) for x in val.split('/')]	404 year, month, day = [int(x) for x in val.split('/')]

401 elif '-' in val:	405 elif '-' in val:

402 year, month, day = [int(x) for x in val.split('-')]	406 year, month, day = [int(x) for x in val.split('-')]

403 else:	407 else:

404 raise InvalidQueryError('Could not parse date: ' + val)	408 raise InvalidQueryError('Could not parse date: ' + val)

405 except ValueError:	409 except ValueError:

406 raise InvalidQueryError('Could not parse date: ' + val)	410 raise InvalidQueryError('Could not parse date: ' + val)

(...skipping 24 matching lines...) Expand all Loading...
431	435

432	436

433 class Error(Exception):	437 class Error(Exception):

434 """Base exception class for this package."""	438 """Base exception class for this package."""

435 pass	439 pass

436	440

437	441

438 class InvalidQueryError(Error):	442 class InvalidQueryError(Error):

439 """Error raised when an invalid query is requested."""	443 """Error raised when an invalid query is requested."""

440 pass	444 pass

OLD	NEW

« no previous file with comments | « no previous file | appengine/monorail/search/test/query2ast_test.py » ('j') | no next file with comments »