| OLD | NEW |
| 1 # Copyright 2016 The Chromium Authors. All rights reserved. | 1 # Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is govered by a BSD-style | 2 # Use of this source code is govered by a BSD-style |
| 3 # license that can be found in the LICENSE file or at | 3 # license that can be found in the LICENSE file or at |
| 4 # https://developers.google.com/open-source/licenses/bsd | 4 # https://developers.google.com/open-source/licenses/bsd |
| 5 | 5 |
| 6 """A set of functions that integrate the GAE search index with Monorail.""" | 6 """A set of functions that integrate the GAE search index with Monorail.""" |
| 7 | 7 |
| 8 import collections | 8 import collections |
| 9 import datetime | 9 import datetime |
| 10 import logging | 10 import logging |
| (...skipping 120 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 131 '-after': '>', | 131 '-after': '>', |
| 132 '-before': '<', | 132 '-before': '<', |
| 133 } | 133 } |
| 134 | 134 |
| 135 BUILTIN_ISSUE_FIELDS = { | 135 BUILTIN_ISSUE_FIELDS = { |
| 136 f_name: tracker_pb2.FieldDef(field_name=f_name, field_type=f_type) | 136 f_name: tracker_pb2.FieldDef(field_name=f_name, field_type=f_type) |
| 137 for f_name, f_type in _ISSUE_FIELDS_LIST} | 137 for f_name, f_type in _ISSUE_FIELDS_LIST} |
| 138 | 138 |
| 139 | 139 |
| 140 def ParseUserQuery( | 140 def ParseUserQuery( |
| 141 query, scope, builtin_fields, harmonized_config, warnings=None): | 141 query, scope, builtin_fields, harmonized_config, warnings=None, |
| 142 now=None): |
| 142 """Parse a user query and return a set of structure terms. | 143 """Parse a user query and return a set of structure terms. |
| 143 | 144 |
| 144 Args: | 145 Args: |
| 145 query: string with user's query. E.g., 'Priority=High'. | 146 query: string with user's query. E.g., 'Priority=High'. |
| 146 scope: string search terms that define the scope in which the | 147 scope: string search terms that define the scope in which the |
| 147 query should be executed. They are expressed in the same | 148 query should be executed. They are expressed in the same |
| 148 user query language. E.g., adding the canned query. | 149 user query language. E.g., adding the canned query. |
| 149 builtin_fields: dict {field_name: FieldDef(field_name, type)} | 150 builtin_fields: dict {field_name: FieldDef(field_name, type)} |
| 150 mapping field names to FieldDef objects for built-in fields. | 151 mapping field names to FieldDef objects for built-in fields. |
| 151 harmonized_config: config for all the projects being searched. | 152 harmonized_config: config for all the projects being searched. |
| 152 @@@ custom field name is not unique in cross project search. | 153 @@@ custom field name is not unique in cross project search. |
| 153 - custom_fields = {field_name: [fd, ...]} | 154 - custom_fields = {field_name: [fd, ...]} |
| 154 - query build needs to OR each possible interpretation | 155 - query build needs to OR each possible interpretation |
| 155 - could be label in one project and field in another project. | 156 - could be label in one project and field in another project. |
| 156 @@@ what about searching across all projects? | 157 @@@ what about searching across all projects? |
| 157 warnings: optional list to accumulate warning messages. | 158 warnings: optional list to accumulate warning messages. |
| 159 now: optional timestamp for tests, otherwise time.time() is used. |
| 158 | 160 |
| 159 Returns: | 161 Returns: |
| 160 A QueryAST with conjunctions (usually just one), where each has a list of | 162 A QueryAST with conjunctions (usually just one), where each has a list of |
| 161 Condition PBs with op, fields, str_values and int_values. E.g., the query | 163 Condition PBs with op, fields, str_values and int_values. E.g., the query |
| 162 [priority=high leak OR stars>100] over open issues would return | 164 [priority=high leak OR stars>100] over open issues would return |
| 163 QueryAST( | 165 QueryAST( |
| 164 Conjunction(Condition(EQ, [open_fd], [], [1]), | 166 Conjunction(Condition(EQ, [open_fd], [], [1]), |
| 165 Condition(EQ, [label_fd], ['priority-high'], []), | 167 Condition(EQ, [label_fd], ['priority-high'], []), |
| 166 Condition(TEXT_HAS, any_field_fd, ['leak'], [])), | 168 Condition(TEXT_HAS, any_field_fd, ['leak'], [])), |
| 167 Conjunction(Condition(EQ, [open_fd], [], [1]), | 169 Conjunction(Condition(EQ, [open_fd], [], [1]), |
| (...skipping 19 matching lines...) Expand all Loading... |
| 187 # Make a dictionary of all fields: built-in + custom in each project. | 189 # Make a dictionary of all fields: built-in + custom in each project. |
| 188 combined_fields = collections.defaultdict( | 190 combined_fields = collections.defaultdict( |
| 189 list, {field_name: [field_def] | 191 list, {field_name: [field_def] |
| 190 for field_name, field_def in builtin_fields.iteritems()}) | 192 for field_name, field_def in builtin_fields.iteritems()}) |
| 191 for fd in harmonized_config.field_defs: | 193 for fd in harmonized_config.field_defs: |
| 192 if fd.field_type != tracker_pb2.FieldTypes.ENUM_TYPE: | 194 if fd.field_type != tracker_pb2.FieldTypes.ENUM_TYPE: |
| 193 # Only do non-enum fields because enums are stored as labels | 195 # Only do non-enum fields because enums are stored as labels |
| 194 combined_fields[fd.field_name.lower()].append(fd) | 196 combined_fields[fd.field_name.lower()].append(fd) |
| 195 | 197 |
| 196 conjunctions = [ | 198 conjunctions = [ |
| 197 _ParseConjunction(sq, scope, combined_fields, warnings) | 199 _ParseConjunction(sq, scope, combined_fields, warnings, now=now) |
| 198 for sq in subqueries] | 200 for sq in subqueries] |
| 199 logging.info('search warnings: %r', warnings) | 201 logging.info('search warnings: %r', warnings) |
| 200 return ast_pb2.QueryAST(conjunctions=conjunctions) | 202 return ast_pb2.QueryAST(conjunctions=conjunctions) |
| 201 | 203 |
| 202 | 204 |
| 203 def _HasParens(s): | 205 def _HasParens(s): |
| 204 """Return True if there are parentheses in the given string.""" | 206 """Return True if there are parentheses in the given string.""" |
| 205 # Monorail cannot handle parenthesized expressions, so we tell the | 207 # Monorail cannot handle parenthesized expressions, so we tell the |
| 206 # user that immediately. Even inside a quoted string, the GAE search | 208 # user that immediately. Even inside a quoted string, the GAE search |
| 207 # engine will not handle parens in TEXT-type fields. | 209 # engine will not handle parens in TEXT-type fields. |
| 208 return '(' in s or ')' in s | 210 return '(' in s or ')' in s |
| 209 | 211 |
| 210 | 212 |
| 211 def _ParseConjunction(subquery, scope, fields, warnings): | 213 def _ParseConjunction(subquery, scope, fields, warnings, now=None): |
| 212 """Parse part of a user query into a Conjunction PB.""" | 214 """Parse part of a user query into a Conjunction PB.""" |
| 213 logging.info('Parsing sub query: %r in scope %r', subquery, scope) | 215 logging.info('Parsing sub query: %r in scope %r', subquery, scope) |
| 214 scoped_query = ('%s %s' % (scope, subquery)).lower() | 216 scoped_query = ('%s %s' % (scope, subquery)).lower() |
| 215 cond_strs = _ExtractConds(scoped_query) | 217 cond_strs = _ExtractConds(scoped_query) |
| 216 conds = [_ParseCond(cond_str, fields, warnings) for cond_str in cond_strs] | 218 conds = [_ParseCond(cond_str, fields, warnings, now=now) |
| 219 for cond_str in cond_strs] |
| 217 conds = [cond for cond in conds if cond] | 220 conds = [cond for cond in conds if cond] |
| 218 return ast_pb2.Conjunction(conds=conds) | 221 return ast_pb2.Conjunction(conds=conds) |
| 219 | 222 |
| 220 | 223 |
| 221 def _ParseCond(cond_str, fields, warnings): | 224 def _ParseCond(cond_str, fields, warnings, now=None): |
| 222 """Parse one user query condition string into a Condition PB.""" | 225 """Parse one user query condition string into a Condition PB.""" |
| 223 op_match = OP_RE.match(cond_str) | 226 op_match = OP_RE.match(cond_str) |
| 224 # Do not treat as key:value search terms if any of the special prefixes match. | 227 # Do not treat as key:value search terms if any of the special prefixes match. |
| 225 special_prefixes_match = any( | 228 special_prefixes_match = any( |
| 226 cond_str.startswith(p) for p in fulltext_helpers.NON_OP_PREFIXES) | 229 cond_str.startswith(p) for p in fulltext_helpers.NON_OP_PREFIXES) |
| 227 if op_match and not special_prefixes_match: | 230 if op_match and not special_prefixes_match: |
| 228 prefix = op_match.group('prefix') | 231 prefix = op_match.group('prefix') |
| 229 op = op_match.group('op') | 232 op = op_match.group('op') |
| 230 val = op_match.group('value') | 233 val = op_match.group('value') |
| 231 # Special case handling to continue to support old date query terms from | 234 # Special case handling to continue to support old date query terms from |
| 232 # codesite. See monorail:151 for more details. | 235 # codesite. See monorail:151 for more details. |
| 233 if prefix.startswith(_DATE_FIELDS): | 236 if prefix.startswith(_DATE_FIELDS): |
| 234 for date_suffix in _DATE_FIELD_SUFFIX_TO_OP: | 237 for date_suffix in _DATE_FIELD_SUFFIX_TO_OP: |
| 235 if prefix.endswith(date_suffix): | 238 if prefix.endswith(date_suffix): |
| 236 prefix = prefix.rstrip(date_suffix) | 239 prefix = prefix.rstrip(date_suffix) |
| 237 op = _DATE_FIELD_SUFFIX_TO_OP[date_suffix] | 240 op = _DATE_FIELD_SUFFIX_TO_OP[date_suffix] |
| 238 return _ParseStructuredTerm(prefix, op, val, fields) | 241 return _ParseStructuredTerm(prefix, op, val, fields, now=now) |
| 239 | 242 |
| 240 # Treat the cond as a full-text search term, which might be negated. | 243 # Treat the cond as a full-text search term, which might be negated. |
| 241 if cond_str.startswith('-'): | 244 if cond_str.startswith('-'): |
| 242 op = NOT_TEXT_HAS | 245 op = NOT_TEXT_HAS |
| 243 cond_str = cond_str[1:] | 246 cond_str = cond_str[1:] |
| 244 else: | 247 else: |
| 245 op = TEXT_HAS | 248 op = TEXT_HAS |
| 246 | 249 |
| 247 # Construct a full-text Query object as a dry-run to validate that | 250 # Construct a full-text Query object as a dry-run to validate that |
| 248 # the syntax is acceptable. | 251 # the syntax is acceptable. |
| 249 try: | 252 try: |
| 250 _fts_query = search.Query(cond_str) | 253 _fts_query = search.Query(cond_str) |
| 251 except search.QueryError: | 254 except search.QueryError: |
| 252 warnings.append('Ignoring full-text term: %s' % cond_str) | 255 warnings.append('Ignoring full-text term: %s' % cond_str) |
| 253 return None | 256 return None |
| 254 | 257 |
| 255 # Flag a potential user misunderstanding. | 258 # Flag a potential user misunderstanding. |
| 256 if cond_str.lower() in ('and', 'or', 'not'): | 259 if cond_str.lower() in ('and', 'or', 'not'): |
| 257 warnings.append( | 260 warnings.append( |
| 258 'The only supported boolean operator is OR (all capitals).') | 261 'The only supported boolean operator is OR (all capitals).') |
| 259 | 262 |
| 260 return ast_pb2.MakeCond( | 263 return ast_pb2.MakeCond( |
| 261 op, [BUILTIN_ISSUE_FIELDS[ast_pb2.ANY_FIELD]], [cond_str], []) | 264 op, [BUILTIN_ISSUE_FIELDS[ast_pb2.ANY_FIELD]], [cond_str], []) |
| 262 | 265 |
| 263 | 266 |
| 264 def _ParseStructuredTerm(prefix, op_str, value, fields): | 267 def _ParseStructuredTerm(prefix, op_str, value, fields, now=None): |
| 265 """Parse one user structured query term into an internal representation. | 268 """Parse one user structured query term into an internal representation. |
| 266 | 269 |
| 267 Args: | 270 Args: |
| 268 prefix: The query operator, usually a field name. E.g., summary. It can | 271 prefix: The query operator, usually a field name. E.g., summary. It can |
| 269 also be special operators like "is" to test boolean fields. | 272 also be special operators like "is" to test boolean fields. |
| 270 op_str: the comparison operator. Usually ":" or "=", but can be any OPS. | 273 op_str: the comparison operator. Usually ":" or "=", but can be any OPS. |
| 271 value: the value to compare against, e.g., term to find in that field. | 274 value: the value to compare against, e.g., term to find in that field. |
| 272 fields: dict {name_lower: [FieldDef, ...]} for built-in and custom fields. | 275 fields: dict {name_lower: [FieldDef, ...]} for built-in and custom fields. |
| 276 now: optional timestamp for tests, otherwise time.time() is used. |
| 273 | 277 |
| 274 Returns: | 278 Returns: |
| 275 A Condition PB. | 279 A Condition PB. |
| 276 """ | 280 """ |
| 277 unquoted_value = value.strip('"') | 281 unquoted_value = value.strip('"') |
| 278 # Quick-OR is a convenient way to write one condition that matches any one of | 282 # Quick-OR is a convenient way to write one condition that matches any one of |
| 279 # multiple values, like set membership. E.g., [Priority=High,Critical]. | 283 # multiple values, like set membership. E.g., [Priority=High,Critical]. |
| 280 quick_or_vals = [v.strip() for v in unquoted_value.split(',')] | 284 quick_or_vals = [v.strip() for v in unquoted_value.split(',')] |
| 281 | 285 |
| 282 if ((prefix == 'is' or prefix == '-is') and | 286 if ((prefix == 'is' or prefix == '-is') and |
| (...skipping 18 matching lines...) Expand all Loading... |
| 301 return ast_pb2.MakeCond(op, fields[unquoted_value], [], []) | 305 return ast_pb2.MakeCond(op, fields[unquoted_value], [], []) |
| 302 else: # Look for any label with that prefix. | 306 else: # Look for any label with that prefix. |
| 303 return ast_pb2.MakeCond(op, fields['label'], [unquoted_value], []) | 307 return ast_pb2.MakeCond(op, fields['label'], [unquoted_value], []) |
| 304 | 308 |
| 305 if prefix in fields: # search built-in and custom fields. E.g., summary. | 309 if prefix in fields: # search built-in and custom fields. E.g., summary. |
| 306 # Note: if first matching field is date-type, we assume they all are. | 310 # Note: if first matching field is date-type, we assume they all are. |
| 307 # TODO(jrobbins): better handling for rare case where multiple projects | 311 # TODO(jrobbins): better handling for rare case where multiple projects |
| 308 # define the same custom field name, and one is a date and another is not. | 312 # define the same custom field name, and one is a date and another is not. |
| 309 first_field = fields[prefix][0] | 313 first_field = fields[prefix][0] |
| 310 if first_field.field_type == DATE: | 314 if first_field.field_type == DATE: |
| 311 date_value = _ParseDateValue(unquoted_value) | 315 date_value = _ParseDateValue(unquoted_value, now=now) |
| 312 return ast_pb2.MakeCond(op, fields[prefix], [], [date_value]) | 316 return ast_pb2.MakeCond(op, fields[prefix], [], [date_value]) |
| 313 else: | 317 else: |
| 314 quick_or_ints = [] | 318 quick_or_ints = [] |
| 315 for qov in quick_or_vals: | 319 for qov in quick_or_vals: |
| 316 try: | 320 try: |
| 317 quick_or_ints.append(int(qov)) | 321 quick_or_ints.append(int(qov)) |
| 318 except ValueError: | 322 except ValueError: |
| 319 pass | 323 pass |
| 320 return ast_pb2.MakeCond(op, fields[prefix], quick_or_vals, quick_or_ints) | 324 return ast_pb2.MakeCond(op, fields[prefix], quick_or_vals, quick_or_ints) |
| 321 | 325 |
| (...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 366 # as fulltext search. It is probably a tiny bit of source code. | 370 # as fulltext search. It is probably a tiny bit of source code. |
| 367 terms.append('"%s"' % word_label) | 371 terms.append('"%s"' % word_label) |
| 368 | 372 |
| 369 # Case 3: Simple words. | 373 # Case 3: Simple words. |
| 370 elif word: | 374 elif word: |
| 371 terms.append(word) | 375 terms.append(word) |
| 372 | 376 |
| 373 return terms | 377 return terms |
| 374 | 378 |
| 375 | 379 |
| 376 def _ParseDateValue(val): | 380 def _ParseDateValue(val, now=None): |
| 377 """Convert the user-entered date into timestamp.""" | 381 """Convert the user-entered date into timestamp.""" |
| 378 # Support timestamp value such as opened>1437671476 | 382 # Support timestamp value such as opened>1437671476 |
| 379 try: | 383 try: |
| 380 return int(val) | 384 return int(val) |
| 381 except ValueError: | 385 except ValueError: |
| 382 pass | 386 pass |
| 383 | 387 |
| 384 # TODO(jrobbins): future: take timezones into account. | 388 # TODO(jrobbins): future: take timezones into account. |
| 385 # TODO(jrobbins): for now, explain to users that "today" is | 389 # TODO(jrobbins): for now, explain to users that "today" is |
| 386 # actually now: the current time, not 12:01am in their timezone. | 390 # actually now: the current time, not 12:01am in their timezone. |
| 387 # In fact, it is not very useful because everything in the system | 391 # In fact, it is not very useful because everything in the system |
| 388 # happened before the current time. | 392 # happened before the current time. |
| 389 if val == 'today': | 393 if val == 'today': |
| 390 return _CalculatePastDate(0) | 394 return _CalculatePastDate(0, now=now) |
| 391 elif val.startswith('today-'): | 395 elif val.startswith('today-'): |
| 392 try: | 396 try: |
| 393 days_ago = int(val.split('-')[1]) | 397 days_ago = int(val.split('-')[1]) |
| 394 except ValueError: | 398 except ValueError: |
| 395 raise InvalidQueryError('Could not parse date: ' + val) | 399 raise InvalidQueryError('Could not parse date: ' + val) |
| 396 return _CalculatePastDate(days_ago) | 400 return _CalculatePastDate(days_ago, now=now) |
| 397 | 401 |
| 398 try: | 402 try: |
| 399 if '/' in val: | 403 if '/' in val: |
| 400 year, month, day = [int(x) for x in val.split('/')] | 404 year, month, day = [int(x) for x in val.split('/')] |
| 401 elif '-' in val: | 405 elif '-' in val: |
| 402 year, month, day = [int(x) for x in val.split('-')] | 406 year, month, day = [int(x) for x in val.split('-')] |
| 403 else: | 407 else: |
| 404 raise InvalidQueryError('Could not parse date: ' + val) | 408 raise InvalidQueryError('Could not parse date: ' + val) |
| 405 except ValueError: | 409 except ValueError: |
| 406 raise InvalidQueryError('Could not parse date: ' + val) | 410 raise InvalidQueryError('Could not parse date: ' + val) |
| (...skipping 24 matching lines...) Expand all Loading... |
| 431 | 435 |
| 432 | 436 |
| 433 class Error(Exception): | 437 class Error(Exception): |
| 434 """Base exception class for this package.""" | 438 """Base exception class for this package.""" |
| 435 pass | 439 pass |
| 436 | 440 |
| 437 | 441 |
| 438 class InvalidQueryError(Error): | 442 class InvalidQueryError(Error): |
| 439 """Error raised when an invalid query is requested.""" | 443 """Error raised when an invalid query is requested.""" |
| 440 pass | 444 pass |
| OLD | NEW |