Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(122)

Unified Diff: appengine/monorail/services/fulltext_helpers.py

Issue 1868553004: Open Source Monorail (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master
Patch Set: Rebase Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « appengine/monorail/services/features_svc.py ('k') | appengine/monorail/services/issue_svc.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: appengine/monorail/services/fulltext_helpers.py
diff --git a/appengine/monorail/services/fulltext_helpers.py b/appengine/monorail/services/fulltext_helpers.py
new file mode 100644
index 0000000000000000000000000000000000000000..99cd4b7047aa00a7d774023585ff735e714f7ea7
--- /dev/null
+++ b/appengine/monorail/services/fulltext_helpers.py
@@ -0,0 +1,122 @@
+# Copyright 2016 The Chromium Authors. All rights reserved.
+# Use of this source code is govered by a BSD-style
+# license that can be found in the LICENSE file or at
+# https://developers.google.com/open-source/licenses/bsd
+
+"""A set of helpers functions for fulltext search."""
+
+import logging
+
+from google.appengine.api import search
+
+import settings
+from proto import ast_pb2
+from proto import tracker_pb2
+
+# GAE search API can only respond with 500 results per call.
+_SEARCH_RESULT_CHUNK_SIZE = 500
+
+# Do not treat strings that start with the below as key:value search terms.
+# See bugs.chromium.org/p/monorail/issues/detail?id=419 for more detail.
+NON_OP_PREFIXES = (
+ 'http:',
+ 'https:',
+)
+
+
+def BuildFTSQuery(query_ast_conj, fulltext_fields):
+ """Convert a Monorail query AST into a GAE search query string.
+
+ Args:
+ query_ast_conj: a Conjunction PB with a list of Comparison PBs that each
+ have operator, field definitions, string values, and int values.
+ All Conditions should be AND'd together.
+ fulltext_fields: a list of string names of fields that may exist in the
+ fulltext documents. E.g., issue fulltext documents have a "summary"
+ field.
+
+ Returns:
+ A string that can be passed to AppEngine's search API. Or, None if there
+ were no fulltext conditions, so no fulltext search should be done.
+ """
+ fulltext_parts = [
+ _BuildFTSCondition(cond, fulltext_fields)
+ for cond in query_ast_conj.conds]
+ if any(fulltext_parts):
+ return ' '.join(fulltext_parts)
+ else:
+ return None
+
+
+def _BuildFTSCondition(cond, fulltext_fields):
+ """Convert one query AST condition into a GAE search query string."""
+ if cond.op == ast_pb2.QueryOp.NOT_TEXT_HAS:
+ neg = 'NOT '
+ elif cond.op == ast_pb2.QueryOp.TEXT_HAS:
+ neg = ''
+ else:
+ return '' # FTS only looks at TEXT_HAS and NOT_TEXT_HAS
+
+ parts = []
+
+ for fd in cond.field_defs:
+ if fd.field_name in fulltext_fields:
+ pattern = fd.field_name + ':"%s"'
+ elif fd.field_name == ast_pb2.ANY_FIELD:
+ pattern = '"%s"'
+ elif fd.field_id and fd.field_type == tracker_pb2.FieldTypes.STR_TYPE:
+ pattern = 'custom_' + str(fd.field_id) + ':"%s"'
+ else:
+ continue # This issue field is searched via SQL.
+
+ for value in cond.str_values:
+ # Strip out quotes around the value.
+ value = value.strip('"')
+ special_prefixes_match = any(value.startswith(p) for p in NON_OP_PREFIXES)
+ if not special_prefixes_match:
+ value = value.replace(':', ' ')
+ assert ('"' not in value), 'Value %r has a quote in it' % value
+ parts.append(pattern % value)
+
+ if parts:
+ return neg + '(%s)' % ' OR '.join(parts)
+ else:
+ return '' # None of the fields were fulltext fields.
+
+
+def ComprehensiveSearch(fulltext_query, index_name):
+ """Call the GAE search API, and keep calling it to get all results.
+
+ Args:
+ fulltext_query: string in the GAE search API query language.
+ index_name: string name of the GAE fulltext index to hit.
+
+ Returns:
+ A list of integer issue IIDs or project IDs.
+ """
+ search_index = search.Index(name=index_name)
+
+ response = search_index.search(search.Query(
+ fulltext_query,
+ options=search.QueryOptions(
+ limit=_SEARCH_RESULT_CHUNK_SIZE, returned_fields=[], ids_only=True,
+ cursor=search.Cursor())))
+ logging.info('got %d initial results', len(response.results))
+ ids = [int(result.doc_id) for result in response]
+
+ remaining_iterations = int(
+ settings.fulltext_limit_per_shard - 1 / _SEARCH_RESULT_CHUNK_SIZE)
+ for _ in range(remaining_iterations):
+ if not response.cursor:
+ break
+ response = search_index.search(search.Query(
+ fulltext_query,
+ options=search.QueryOptions(
+ limit=_SEARCH_RESULT_CHUNK_SIZE, returned_fields=[], ids_only=True,
+ cursor=response.cursor)))
+ logging.info(
+ 'got %d more results: %r', len(response.results), response.results)
+ ids.extend(int(result.doc_id) for result in response)
+
+ logging.info('FTS result ids %d', len(ids))
+ return ids
« no previous file with comments | « appengine/monorail/services/features_svc.py ('k') | appengine/monorail/services/issue_svc.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698