| Index: appengine/monorail/services/fulltext_helpers.py
|
| diff --git a/appengine/monorail/services/fulltext_helpers.py b/appengine/monorail/services/fulltext_helpers.py
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..99cd4b7047aa00a7d774023585ff735e714f7ea7
|
| --- /dev/null
|
| +++ b/appengine/monorail/services/fulltext_helpers.py
|
| @@ -0,0 +1,122 @@
|
| +# Copyright 2016 The Chromium Authors. All rights reserved.
|
| +# Use of this source code is govered by a BSD-style
|
| +# license that can be found in the LICENSE file or at
|
| +# https://developers.google.com/open-source/licenses/bsd
|
| +
|
| +"""A set of helpers functions for fulltext search."""
|
| +
|
| +import logging
|
| +
|
| +from google.appengine.api import search
|
| +
|
| +import settings
|
| +from proto import ast_pb2
|
| +from proto import tracker_pb2
|
| +
|
| +# GAE search API can only respond with 500 results per call.
|
| +_SEARCH_RESULT_CHUNK_SIZE = 500
|
| +
|
| +# Do not treat strings that start with the below as key:value search terms.
|
| +# See bugs.chromium.org/p/monorail/issues/detail?id=419 for more detail.
|
| +NON_OP_PREFIXES = (
|
| + 'http:',
|
| + 'https:',
|
| +)
|
| +
|
| +
|
| +def BuildFTSQuery(query_ast_conj, fulltext_fields):
|
| + """Convert a Monorail query AST into a GAE search query string.
|
| +
|
| + Args:
|
| + query_ast_conj: a Conjunction PB with a list of Comparison PBs that each
|
| + have operator, field definitions, string values, and int values.
|
| + All Conditions should be AND'd together.
|
| + fulltext_fields: a list of string names of fields that may exist in the
|
| + fulltext documents. E.g., issue fulltext documents have a "summary"
|
| + field.
|
| +
|
| + Returns:
|
| + A string that can be passed to AppEngine's search API. Or, None if there
|
| + were no fulltext conditions, so no fulltext search should be done.
|
| + """
|
| + fulltext_parts = [
|
| + _BuildFTSCondition(cond, fulltext_fields)
|
| + for cond in query_ast_conj.conds]
|
| + if any(fulltext_parts):
|
| + return ' '.join(fulltext_parts)
|
| + else:
|
| + return None
|
| +
|
| +
|
| +def _BuildFTSCondition(cond, fulltext_fields):
|
| + """Convert one query AST condition into a GAE search query string."""
|
| + if cond.op == ast_pb2.QueryOp.NOT_TEXT_HAS:
|
| + neg = 'NOT '
|
| + elif cond.op == ast_pb2.QueryOp.TEXT_HAS:
|
| + neg = ''
|
| + else:
|
| + return '' # FTS only looks at TEXT_HAS and NOT_TEXT_HAS
|
| +
|
| + parts = []
|
| +
|
| + for fd in cond.field_defs:
|
| + if fd.field_name in fulltext_fields:
|
| + pattern = fd.field_name + ':"%s"'
|
| + elif fd.field_name == ast_pb2.ANY_FIELD:
|
| + pattern = '"%s"'
|
| + elif fd.field_id and fd.field_type == tracker_pb2.FieldTypes.STR_TYPE:
|
| + pattern = 'custom_' + str(fd.field_id) + ':"%s"'
|
| + else:
|
| + continue # This issue field is searched via SQL.
|
| +
|
| + for value in cond.str_values:
|
| + # Strip out quotes around the value.
|
| + value = value.strip('"')
|
| + special_prefixes_match = any(value.startswith(p) for p in NON_OP_PREFIXES)
|
| + if not special_prefixes_match:
|
| + value = value.replace(':', ' ')
|
| + assert ('"' not in value), 'Value %r has a quote in it' % value
|
| + parts.append(pattern % value)
|
| +
|
| + if parts:
|
| + return neg + '(%s)' % ' OR '.join(parts)
|
| + else:
|
| + return '' # None of the fields were fulltext fields.
|
| +
|
| +
|
| +def ComprehensiveSearch(fulltext_query, index_name):
|
| + """Call the GAE search API, and keep calling it to get all results.
|
| +
|
| + Args:
|
| + fulltext_query: string in the GAE search API query language.
|
| + index_name: string name of the GAE fulltext index to hit.
|
| +
|
| + Returns:
|
| + A list of integer issue IIDs or project IDs.
|
| + """
|
| + search_index = search.Index(name=index_name)
|
| +
|
| + response = search_index.search(search.Query(
|
| + fulltext_query,
|
| + options=search.QueryOptions(
|
| + limit=_SEARCH_RESULT_CHUNK_SIZE, returned_fields=[], ids_only=True,
|
| + cursor=search.Cursor())))
|
| + logging.info('got %d initial results', len(response.results))
|
| + ids = [int(result.doc_id) for result in response]
|
| +
|
| + remaining_iterations = int(
|
| + settings.fulltext_limit_per_shard - 1 / _SEARCH_RESULT_CHUNK_SIZE)
|
| + for _ in range(remaining_iterations):
|
| + if not response.cursor:
|
| + break
|
| + response = search_index.search(search.Query(
|
| + fulltext_query,
|
| + options=search.QueryOptions(
|
| + limit=_SEARCH_RESULT_CHUNK_SIZE, returned_fields=[], ids_only=True,
|
| + cursor=response.cursor)))
|
| + logging.info(
|
| + 'got %d more results: %r', len(response.results), response.results)
|
| + ids.extend(int(result.doc_id) for result in response)
|
| +
|
| + logging.info('FTS result ids %d', len(ids))
|
| + return ids
|
|
|