| OLD | NEW | 
|---|
| (Empty) |  | 
|  | 1 # Copyright 2016 The Chromium Authors. All rights reserved. | 
|  | 2 # Use of this source code is govered by a BSD-style | 
|  | 3 # license that can be found in the LICENSE file or at | 
|  | 4 # https://developers.google.com/open-source/licenses/bsd | 
|  | 5 | 
|  | 6 """A set of helpers functions for fulltext search.""" | 
|  | 7 | 
|  | 8 import logging | 
|  | 9 | 
|  | 10 from google.appengine.api import search | 
|  | 11 | 
|  | 12 import settings | 
|  | 13 from proto import ast_pb2 | 
|  | 14 from proto import tracker_pb2 | 
|  | 15 | 
|  | 16 # GAE search API can only respond with 500 results per call. | 
|  | 17 _SEARCH_RESULT_CHUNK_SIZE = 500 | 
|  | 18 | 
|  | 19 # Do not treat strings that start with the below as key:value search terms. | 
|  | 20 # See bugs.chromium.org/p/monorail/issues/detail?id=419 for more detail. | 
|  | 21 NON_OP_PREFIXES = ( | 
|  | 22     'http:', | 
|  | 23     'https:', | 
|  | 24 ) | 
|  | 25 | 
|  | 26 | 
|  | 27 def BuildFTSQuery(query_ast_conj, fulltext_fields): | 
|  | 28   """Convert a Monorail query AST into a GAE search query string. | 
|  | 29 | 
|  | 30   Args: | 
|  | 31     query_ast_conj: a Conjunction PB with a list of Comparison PBs that each | 
|  | 32         have operator, field definitions, string values, and int values. | 
|  | 33         All Conditions should be AND'd together. | 
|  | 34     fulltext_fields: a list of string names of fields that may exist in the | 
|  | 35         fulltext documents.  E.g., issue fulltext documents have a "summary" | 
|  | 36         field. | 
|  | 37 | 
|  | 38   Returns: | 
|  | 39     A string that can be passed to AppEngine's search API. Or, None if there | 
|  | 40     were no fulltext conditions, so no fulltext search should be done. | 
|  | 41   """ | 
|  | 42   fulltext_parts = [ | 
|  | 43       _BuildFTSCondition(cond, fulltext_fields) | 
|  | 44       for cond in query_ast_conj.conds] | 
|  | 45   if any(fulltext_parts): | 
|  | 46     return ' '.join(fulltext_parts) | 
|  | 47   else: | 
|  | 48     return None | 
|  | 49 | 
|  | 50 | 
|  | 51 def _BuildFTSCondition(cond, fulltext_fields): | 
|  | 52   """Convert one query AST condition into a GAE search query string.""" | 
|  | 53   if cond.op == ast_pb2.QueryOp.NOT_TEXT_HAS: | 
|  | 54     neg = 'NOT ' | 
|  | 55   elif cond.op == ast_pb2.QueryOp.TEXT_HAS: | 
|  | 56     neg = '' | 
|  | 57   else: | 
|  | 58     return ''  # FTS only looks at TEXT_HAS and NOT_TEXT_HAS | 
|  | 59 | 
|  | 60   parts = [] | 
|  | 61 | 
|  | 62   for fd in cond.field_defs: | 
|  | 63     if fd.field_name in fulltext_fields: | 
|  | 64       pattern = fd.field_name + ':"%s"' | 
|  | 65     elif fd.field_name == ast_pb2.ANY_FIELD: | 
|  | 66       pattern = '"%s"' | 
|  | 67     elif fd.field_id and fd.field_type == tracker_pb2.FieldTypes.STR_TYPE: | 
|  | 68       pattern = 'custom_' + str(fd.field_id) + ':"%s"' | 
|  | 69     else: | 
|  | 70       continue  # This issue field is searched via SQL. | 
|  | 71 | 
|  | 72     for value in cond.str_values: | 
|  | 73       # Strip out quotes around the value. | 
|  | 74       value = value.strip('"') | 
|  | 75       special_prefixes_match = any(value.startswith(p) for p in NON_OP_PREFIXES) | 
|  | 76       if not special_prefixes_match: | 
|  | 77         value = value.replace(':', ' ') | 
|  | 78         assert ('"' not in value), 'Value %r has a quote in it' % value | 
|  | 79       parts.append(pattern % value) | 
|  | 80 | 
|  | 81   if parts: | 
|  | 82     return neg + '(%s)' % ' OR '.join(parts) | 
|  | 83   else: | 
|  | 84     return ''  # None of the fields were fulltext fields. | 
|  | 85 | 
|  | 86 | 
|  | 87 def ComprehensiveSearch(fulltext_query, index_name): | 
|  | 88   """Call the GAE search API, and keep calling it to get all results. | 
|  | 89 | 
|  | 90   Args: | 
|  | 91     fulltext_query: string in the GAE search API query language. | 
|  | 92     index_name: string name of the GAE fulltext index to hit. | 
|  | 93 | 
|  | 94   Returns: | 
|  | 95     A list of integer issue IIDs or project IDs. | 
|  | 96   """ | 
|  | 97   search_index = search.Index(name=index_name) | 
|  | 98 | 
|  | 99   response = search_index.search(search.Query( | 
|  | 100       fulltext_query, | 
|  | 101       options=search.QueryOptions( | 
|  | 102           limit=_SEARCH_RESULT_CHUNK_SIZE, returned_fields=[], ids_only=True, | 
|  | 103           cursor=search.Cursor()))) | 
|  | 104   logging.info('got %d initial results', len(response.results)) | 
|  | 105   ids = [int(result.doc_id) for result in response] | 
|  | 106 | 
|  | 107   remaining_iterations = int( | 
|  | 108       settings.fulltext_limit_per_shard - 1 / _SEARCH_RESULT_CHUNK_SIZE) | 
|  | 109   for _ in range(remaining_iterations): | 
|  | 110     if not response.cursor: | 
|  | 111       break | 
|  | 112     response = search_index.search(search.Query( | 
|  | 113         fulltext_query, | 
|  | 114         options=search.QueryOptions( | 
|  | 115             limit=_SEARCH_RESULT_CHUNK_SIZE, returned_fields=[], ids_only=True, | 
|  | 116             cursor=response.cursor))) | 
|  | 117     logging.info( | 
|  | 118         'got %d more results: %r', len(response.results), response.results) | 
|  | 119     ids.extend(int(result.doc_id) for result in response) | 
|  | 120 | 
|  | 121   logging.info('FTS result ids %d', len(ids)) | 
|  | 122   return ids | 
| OLD | NEW | 
|---|