Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(618)

Side by Side Diff: appengine/monorail/search/query2ast.py

Issue 1941853002: [Monorail] Deflake the unit tests for parsing date queries. (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master
Patch Set: Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | appengine/monorail/search/test/query2ast_test.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2016 The Chromium Authors. All rights reserved. 1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is govered by a BSD-style 2 # Use of this source code is govered by a BSD-style
3 # license that can be found in the LICENSE file or at 3 # license that can be found in the LICENSE file or at
4 # https://developers.google.com/open-source/licenses/bsd 4 # https://developers.google.com/open-source/licenses/bsd
5 5
6 """A set of functions that integrate the GAE search index with Monorail.""" 6 """A set of functions that integrate the GAE search index with Monorail."""
7 7
8 import collections 8 import collections
9 import datetime 9 import datetime
10 import logging 10 import logging
(...skipping 120 matching lines...) Expand 10 before | Expand all | Expand 10 after
131 '-after': '>', 131 '-after': '>',
132 '-before': '<', 132 '-before': '<',
133 } 133 }
134 134
135 BUILTIN_ISSUE_FIELDS = { 135 BUILTIN_ISSUE_FIELDS = {
136 f_name: tracker_pb2.FieldDef(field_name=f_name, field_type=f_type) 136 f_name: tracker_pb2.FieldDef(field_name=f_name, field_type=f_type)
137 for f_name, f_type in _ISSUE_FIELDS_LIST} 137 for f_name, f_type in _ISSUE_FIELDS_LIST}
138 138
139 139
140 def ParseUserQuery( 140 def ParseUserQuery(
141 query, scope, builtin_fields, harmonized_config, warnings=None): 141 query, scope, builtin_fields, harmonized_config, warnings=None,
142 now=None):
142 """Parse a user query and return a set of structure terms. 143 """Parse a user query and return a set of structure terms.
143 144
144 Args: 145 Args:
145 query: string with user's query. E.g., 'Priority=High'. 146 query: string with user's query. E.g., 'Priority=High'.
146 scope: string search terms that define the scope in which the 147 scope: string search terms that define the scope in which the
147 query should be executed. They are expressed in the same 148 query should be executed. They are expressed in the same
148 user query language. E.g., adding the canned query. 149 user query language. E.g., adding the canned query.
149 builtin_fields: dict {field_name: FieldDef(field_name, type)} 150 builtin_fields: dict {field_name: FieldDef(field_name, type)}
150 mapping field names to FieldDef objects for built-in fields. 151 mapping field names to FieldDef objects for built-in fields.
151 harmonized_config: config for all the projects being searched. 152 harmonized_config: config for all the projects being searched.
152 @@@ custom field name is not unique in cross project search. 153 @@@ custom field name is not unique in cross project search.
153 - custom_fields = {field_name: [fd, ...]} 154 - custom_fields = {field_name: [fd, ...]}
154 - query build needs to OR each possible interpretation 155 - query build needs to OR each possible interpretation
155 - could be label in one project and field in another project. 156 - could be label in one project and field in another project.
156 @@@ what about searching across all projects? 157 @@@ what about searching across all projects?
157 warnings: optional list to accumulate warning messages. 158 warnings: optional list to accumulate warning messages.
159 now: optional timestamp for tests, otherwise time.time() is used.
158 160
159 Returns: 161 Returns:
160 A QueryAST with conjunctions (usually just one), where each has a list of 162 A QueryAST with conjunctions (usually just one), where each has a list of
161 Condition PBs with op, fields, str_values and int_values. E.g., the query 163 Condition PBs with op, fields, str_values and int_values. E.g., the query
162 [priority=high leak OR stars>100] over open issues would return 164 [priority=high leak OR stars>100] over open issues would return
163 QueryAST( 165 QueryAST(
164 Conjunction(Condition(EQ, [open_fd], [], [1]), 166 Conjunction(Condition(EQ, [open_fd], [], [1]),
165 Condition(EQ, [label_fd], ['priority-high'], []), 167 Condition(EQ, [label_fd], ['priority-high'], []),
166 Condition(TEXT_HAS, any_field_fd, ['leak'], [])), 168 Condition(TEXT_HAS, any_field_fd, ['leak'], [])),
167 Conjunction(Condition(EQ, [open_fd], [], [1]), 169 Conjunction(Condition(EQ, [open_fd], [], [1]),
(...skipping 19 matching lines...) Expand all
187 # Make a dictionary of all fields: built-in + custom in each project. 189 # Make a dictionary of all fields: built-in + custom in each project.
188 combined_fields = collections.defaultdict( 190 combined_fields = collections.defaultdict(
189 list, {field_name: [field_def] 191 list, {field_name: [field_def]
190 for field_name, field_def in builtin_fields.iteritems()}) 192 for field_name, field_def in builtin_fields.iteritems()})
191 for fd in harmonized_config.field_defs: 193 for fd in harmonized_config.field_defs:
192 if fd.field_type != tracker_pb2.FieldTypes.ENUM_TYPE: 194 if fd.field_type != tracker_pb2.FieldTypes.ENUM_TYPE:
193 # Only do non-enum fields because enums are stored as labels 195 # Only do non-enum fields because enums are stored as labels
194 combined_fields[fd.field_name.lower()].append(fd) 196 combined_fields[fd.field_name.lower()].append(fd)
195 197
196 conjunctions = [ 198 conjunctions = [
197 _ParseConjunction(sq, scope, combined_fields, warnings) 199 _ParseConjunction(sq, scope, combined_fields, warnings, now=now)
198 for sq in subqueries] 200 for sq in subqueries]
199 logging.info('search warnings: %r', warnings) 201 logging.info('search warnings: %r', warnings)
200 return ast_pb2.QueryAST(conjunctions=conjunctions) 202 return ast_pb2.QueryAST(conjunctions=conjunctions)
201 203
202 204
203 def _HasParens(s): 205 def _HasParens(s):
204 """Return True if there are parentheses in the given string.""" 206 """Return True if there are parentheses in the given string."""
205 # Monorail cannot handle parenthesized expressions, so we tell the 207 # Monorail cannot handle parenthesized expressions, so we tell the
206 # user that immediately. Even inside a quoted string, the GAE search 208 # user that immediately. Even inside a quoted string, the GAE search
207 # engine will not handle parens in TEXT-type fields. 209 # engine will not handle parens in TEXT-type fields.
208 return '(' in s or ')' in s 210 return '(' in s or ')' in s
209 211
210 212
211 def _ParseConjunction(subquery, scope, fields, warnings): 213 def _ParseConjunction(subquery, scope, fields, warnings, now=None):
212 """Parse part of a user query into a Conjunction PB.""" 214 """Parse part of a user query into a Conjunction PB."""
213 logging.info('Parsing sub query: %r in scope %r', subquery, scope) 215 logging.info('Parsing sub query: %r in scope %r', subquery, scope)
214 scoped_query = ('%s %s' % (scope, subquery)).lower() 216 scoped_query = ('%s %s' % (scope, subquery)).lower()
215 cond_strs = _ExtractConds(scoped_query) 217 cond_strs = _ExtractConds(scoped_query)
216 conds = [_ParseCond(cond_str, fields, warnings) for cond_str in cond_strs] 218 conds = [_ParseCond(cond_str, fields, warnings, now=now)
219 for cond_str in cond_strs]
217 conds = [cond for cond in conds if cond] 220 conds = [cond for cond in conds if cond]
218 return ast_pb2.Conjunction(conds=conds) 221 return ast_pb2.Conjunction(conds=conds)
219 222
220 223
221 def _ParseCond(cond_str, fields, warnings): 224 def _ParseCond(cond_str, fields, warnings, now=None):
222 """Parse one user query condition string into a Condition PB.""" 225 """Parse one user query condition string into a Condition PB."""
223 op_match = OP_RE.match(cond_str) 226 op_match = OP_RE.match(cond_str)
224 # Do not treat as key:value search terms if any of the special prefixes match. 227 # Do not treat as key:value search terms if any of the special prefixes match.
225 special_prefixes_match = any( 228 special_prefixes_match = any(
226 cond_str.startswith(p) for p in fulltext_helpers.NON_OP_PREFIXES) 229 cond_str.startswith(p) for p in fulltext_helpers.NON_OP_PREFIXES)
227 if op_match and not special_prefixes_match: 230 if op_match and not special_prefixes_match:
228 prefix = op_match.group('prefix') 231 prefix = op_match.group('prefix')
229 op = op_match.group('op') 232 op = op_match.group('op')
230 val = op_match.group('value') 233 val = op_match.group('value')
231 # Special case handling to continue to support old date query terms from 234 # Special case handling to continue to support old date query terms from
232 # codesite. See monorail:151 for more details. 235 # codesite. See monorail:151 for more details.
233 if prefix.startswith(_DATE_FIELDS): 236 if prefix.startswith(_DATE_FIELDS):
234 for date_suffix in _DATE_FIELD_SUFFIX_TO_OP: 237 for date_suffix in _DATE_FIELD_SUFFIX_TO_OP:
235 if prefix.endswith(date_suffix): 238 if prefix.endswith(date_suffix):
236 prefix = prefix.rstrip(date_suffix) 239 prefix = prefix.rstrip(date_suffix)
237 op = _DATE_FIELD_SUFFIX_TO_OP[date_suffix] 240 op = _DATE_FIELD_SUFFIX_TO_OP[date_suffix]
238 return _ParseStructuredTerm(prefix, op, val, fields) 241 return _ParseStructuredTerm(prefix, op, val, fields, now=now)
239 242
240 # Treat the cond as a full-text search term, which might be negated. 243 # Treat the cond as a full-text search term, which might be negated.
241 if cond_str.startswith('-'): 244 if cond_str.startswith('-'):
242 op = NOT_TEXT_HAS 245 op = NOT_TEXT_HAS
243 cond_str = cond_str[1:] 246 cond_str = cond_str[1:]
244 else: 247 else:
245 op = TEXT_HAS 248 op = TEXT_HAS
246 249
247 # Construct a full-text Query object as a dry-run to validate that 250 # Construct a full-text Query object as a dry-run to validate that
248 # the syntax is acceptable. 251 # the syntax is acceptable.
249 try: 252 try:
250 _fts_query = search.Query(cond_str) 253 _fts_query = search.Query(cond_str)
251 except search.QueryError: 254 except search.QueryError:
252 warnings.append('Ignoring full-text term: %s' % cond_str) 255 warnings.append('Ignoring full-text term: %s' % cond_str)
253 return None 256 return None
254 257
255 # Flag a potential user misunderstanding. 258 # Flag a potential user misunderstanding.
256 if cond_str.lower() in ('and', 'or', 'not'): 259 if cond_str.lower() in ('and', 'or', 'not'):
257 warnings.append( 260 warnings.append(
258 'The only supported boolean operator is OR (all capitals).') 261 'The only supported boolean operator is OR (all capitals).')
259 262
260 return ast_pb2.MakeCond( 263 return ast_pb2.MakeCond(
261 op, [BUILTIN_ISSUE_FIELDS[ast_pb2.ANY_FIELD]], [cond_str], []) 264 op, [BUILTIN_ISSUE_FIELDS[ast_pb2.ANY_FIELD]], [cond_str], [])
262 265
263 266
264 def _ParseStructuredTerm(prefix, op_str, value, fields): 267 def _ParseStructuredTerm(prefix, op_str, value, fields, now=None):
265 """Parse one user structured query term into an internal representation. 268 """Parse one user structured query term into an internal representation.
266 269
267 Args: 270 Args:
268 prefix: The query operator, usually a field name. E.g., summary. It can 271 prefix: The query operator, usually a field name. E.g., summary. It can
269 also be special operators like "is" to test boolean fields. 272 also be special operators like "is" to test boolean fields.
270 op_str: the comparison operator. Usually ":" or "=", but can be any OPS. 273 op_str: the comparison operator. Usually ":" or "=", but can be any OPS.
271 value: the value to compare against, e.g., term to find in that field. 274 value: the value to compare against, e.g., term to find in that field.
272 fields: dict {name_lower: [FieldDef, ...]} for built-in and custom fields. 275 fields: dict {name_lower: [FieldDef, ...]} for built-in and custom fields.
276 now: optional timestamp for tests, otherwise time.time() is used.
273 277
274 Returns: 278 Returns:
275 A Condition PB. 279 A Condition PB.
276 """ 280 """
277 unquoted_value = value.strip('"') 281 unquoted_value = value.strip('"')
278 # Quick-OR is a convenient way to write one condition that matches any one of 282 # Quick-OR is a convenient way to write one condition that matches any one of
279 # multiple values, like set membership. E.g., [Priority=High,Critical]. 283 # multiple values, like set membership. E.g., [Priority=High,Critical].
280 quick_or_vals = [v.strip() for v in unquoted_value.split(',')] 284 quick_or_vals = [v.strip() for v in unquoted_value.split(',')]
281 285
282 if ((prefix == 'is' or prefix == '-is') and 286 if ((prefix == 'is' or prefix == '-is') and
(...skipping 18 matching lines...) Expand all
301 return ast_pb2.MakeCond(op, fields[unquoted_value], [], []) 305 return ast_pb2.MakeCond(op, fields[unquoted_value], [], [])
302 else: # Look for any label with that prefix. 306 else: # Look for any label with that prefix.
303 return ast_pb2.MakeCond(op, fields['label'], [unquoted_value], []) 307 return ast_pb2.MakeCond(op, fields['label'], [unquoted_value], [])
304 308
305 if prefix in fields: # search built-in and custom fields. E.g., summary. 309 if prefix in fields: # search built-in and custom fields. E.g., summary.
306 # Note: if first matching field is date-type, we assume they all are. 310 # Note: if first matching field is date-type, we assume they all are.
307 # TODO(jrobbins): better handling for rare case where multiple projects 311 # TODO(jrobbins): better handling for rare case where multiple projects
308 # define the same custom field name, and one is a date and another is not. 312 # define the same custom field name, and one is a date and another is not.
309 first_field = fields[prefix][0] 313 first_field = fields[prefix][0]
310 if first_field.field_type == DATE: 314 if first_field.field_type == DATE:
311 date_value = _ParseDateValue(unquoted_value) 315 date_value = _ParseDateValue(unquoted_value, now=now)
312 return ast_pb2.MakeCond(op, fields[prefix], [], [date_value]) 316 return ast_pb2.MakeCond(op, fields[prefix], [], [date_value])
313 else: 317 else:
314 quick_or_ints = [] 318 quick_or_ints = []
315 for qov in quick_or_vals: 319 for qov in quick_or_vals:
316 try: 320 try:
317 quick_or_ints.append(int(qov)) 321 quick_or_ints.append(int(qov))
318 except ValueError: 322 except ValueError:
319 pass 323 pass
320 return ast_pb2.MakeCond(op, fields[prefix], quick_or_vals, quick_or_ints) 324 return ast_pb2.MakeCond(op, fields[prefix], quick_or_vals, quick_or_ints)
321 325
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
366 # as fulltext search. It is probably a tiny bit of source code. 370 # as fulltext search. It is probably a tiny bit of source code.
367 terms.append('"%s"' % word_label) 371 terms.append('"%s"' % word_label)
368 372
369 # Case 3: Simple words. 373 # Case 3: Simple words.
370 elif word: 374 elif word:
371 terms.append(word) 375 terms.append(word)
372 376
373 return terms 377 return terms
374 378
375 379
376 def _ParseDateValue(val): 380 def _ParseDateValue(val, now=None):
377 """Convert the user-entered date into timestamp.""" 381 """Convert the user-entered date into timestamp."""
378 # Support timestamp value such as opened>1437671476 382 # Support timestamp value such as opened>1437671476
379 try: 383 try:
380 return int(val) 384 return int(val)
381 except ValueError: 385 except ValueError:
382 pass 386 pass
383 387
384 # TODO(jrobbins): future: take timezones into account. 388 # TODO(jrobbins): future: take timezones into account.
385 # TODO(jrobbins): for now, explain to users that "today" is 389 # TODO(jrobbins): for now, explain to users that "today" is
386 # actually now: the current time, not 12:01am in their timezone. 390 # actually now: the current time, not 12:01am in their timezone.
387 # In fact, it is not very useful because everything in the system 391 # In fact, it is not very useful because everything in the system
388 # happened before the current time. 392 # happened before the current time.
389 if val == 'today': 393 if val == 'today':
390 return _CalculatePastDate(0) 394 return _CalculatePastDate(0, now=now)
391 elif val.startswith('today-'): 395 elif val.startswith('today-'):
392 try: 396 try:
393 days_ago = int(val.split('-')[1]) 397 days_ago = int(val.split('-')[1])
394 except ValueError: 398 except ValueError:
395 raise InvalidQueryError('Could not parse date: ' + val) 399 raise InvalidQueryError('Could not parse date: ' + val)
396 return _CalculatePastDate(days_ago) 400 return _CalculatePastDate(days_ago, now=now)
397 401
398 try: 402 try:
399 if '/' in val: 403 if '/' in val:
400 year, month, day = [int(x) for x in val.split('/')] 404 year, month, day = [int(x) for x in val.split('/')]
401 elif '-' in val: 405 elif '-' in val:
402 year, month, day = [int(x) for x in val.split('-')] 406 year, month, day = [int(x) for x in val.split('-')]
403 else: 407 else:
404 raise InvalidQueryError('Could not parse date: ' + val) 408 raise InvalidQueryError('Could not parse date: ' + val)
405 except ValueError: 409 except ValueError:
406 raise InvalidQueryError('Could not parse date: ' + val) 410 raise InvalidQueryError('Could not parse date: ' + val)
(...skipping 24 matching lines...) Expand all
431 435
432 436
433 class Error(Exception): 437 class Error(Exception):
434 """Base exception class for this package.""" 438 """Base exception class for this package."""
435 pass 439 pass
436 440
437 441
438 class InvalidQueryError(Error): 442 class InvalidQueryError(Error):
439 """Error raised when an invalid query is requested.""" 443 """Error raised when an invalid query is requested."""
440 pass 444 pass
OLDNEW
« no previous file with comments | « no previous file | appengine/monorail/search/test/query2ast_test.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698