Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(162)

Unified Diff: appengine/monorail/features/autolink.py

Issue 1868553004: Open Source Monorail (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master
Patch Set: Rebase Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « appengine/monorail/features/activities.py ('k') | appengine/monorail/features/commands.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: appengine/monorail/features/autolink.py
diff --git a/appengine/monorail/features/autolink.py b/appengine/monorail/features/autolink.py
new file mode 100644
index 0000000000000000000000000000000000000000..a50b84886bd5510e465fd488d9affc8716291a9c
--- /dev/null
+++ b/appengine/monorail/features/autolink.py
@@ -0,0 +1,465 @@
+# Copyright 2016 The Chromium Authors. All rights reserved.
+# Use of this source code is govered by a BSD-style
+# license that can be found in the LICENSE file or at
+# https://developers.google.com/open-source/licenses/bsd
+
+"""Autolink helps auto-link references to artifacts in text.
+
+This class maintains a registry of artifact autolink syntax specs and
+callbacks. The structure of that registry is:
+ { component_name: (lookup_callback,
+ { regex: substitution_callback, ...}),
+ ...
+ }
+
+For example:
+ { 'tracker':
+ (GetReferencedIssues,
+ ExtractProjectAndIssueIds,
+ {_ISSUE_REF_RE: ReplaceIssueRef}),
+ 'versioncontrol':
+ (GetReferencedRevisions,
+ ExtractProjectAndRevNum,
+ {_GIT_HASH_RE: ReplaceRevisionRef}),
+ }
+
+The dictionary of regexes is used here because, in the future, we
+might add more regexes for each component rather than have one complex
+regex per component.
+"""
+
+import logging
+import re
+import urllib
+import urlparse
+
+import settings
+from framework import template_helpers
+from framework import validate
+from proto import project_pb2
+from tracker import tracker_helpers
+
+
+_CLOSING_TAG_RE = re.compile('</[a-z0-9]+>$', re.IGNORECASE)
+
+_LINKIFY_SCHEMES = r'(https?://|ftp://|mailto:)'
+# Also count a start-tag '<' as a url delimeter, since the autolinker
+# is sometimes run against html fragments.
+_IS_A_LINK_RE = re.compile(r'(%s)([^\s<]+)' % _LINKIFY_SCHEMES, re.UNICODE)
+
+# These are allowed in links, but if any of closing delimiters appear
+# at the end of the link, and the opening one is not part of the link,
+# then trim off the closing delimiters.
+_LINK_TRAILING_CHARS = [
+ (None, ':'),
+ (None, '.'),
+ (None, ','),
+ ('<', '>'),
+ ('"', '"'),
+ ('(', ')'),
+ ('[', ']'),
+ ('{', '}'),
+ ]
+
+
+def Linkify(_mr, autolink_regex_match,
+ _component_ref_artifacts):
+ """Examine a textual reference and replace it with a hyperlink or not.
+
+ This is a callback for use with the autolink feature.
+
+ Args:
+ _mr: common info parsed from the user HTTP request.
+ autolink_regex_match: regex match for the textual reference.
+ _component_ref_artifacts: unused value
+
+ Returns:
+ A list of TextRuns with tag=a for all matched ftp, http, https and mailto
+ links converted into HTML hyperlinks.
+ """
+ hyperlink = autolink_regex_match.group(0)
+
+ trailing = ''
+ for begin, end in _LINK_TRAILING_CHARS:
+ if hyperlink.endswith(end):
+ if not begin or hyperlink[:-len(end)].find(begin) == -1:
+ trailing = end + trailing
+ hyperlink = hyperlink[:-len(end)]
+
+ tag_match = _CLOSING_TAG_RE.search(hyperlink)
+ if tag_match:
+ trailing = hyperlink[tag_match.start(0):] + trailing
+ hyperlink = hyperlink[:tag_match.start(0)]
+
+ if (not validate.IsValidURL(hyperlink) and
+ not validate.IsValidEmail(hyperlink)):
+ return [template_helpers.TextRun(hyperlink)]
+
+ result = [template_helpers.TextRun(hyperlink, tag='a', href=hyperlink)]
+ if trailing:
+ result.append(template_helpers.TextRun(trailing))
+
+ return result
+
+
+# Regular expression to detect git hashes.
+# Used to auto-link to Git hashes on crrev.com when displaying issue details.
+# Matches "rN", "r#N", and "revision N" when "rN" is not part of a larger word
+# and N is a hexadecimal string of 40 chars.
+_GIT_HASH_RE = re.compile(
+ r'\b(?P<prefix>r(evision\s+#?)?)?(?P<revnum>([a-f0-9]{40}))\b',
+ re.IGNORECASE | re.MULTILINE)
+
+# This is for SVN revisions and Git commit posisitons.
+_SVN_REF_RE = re.compile(
+ r'\b(?P<prefix>r(evision\s+#?)?)(?P<revnum>([0-9]{1,7}))\b',
+ re.IGNORECASE | re.MULTILINE)
+
+
+def GetReferencedRevisions(_mr, _refs):
+ """Load the referenced revision objects."""
+ # For now we just autolink any revision hash without actually
+ # checking that such a revision exists,
+ # TODO(jrobbins): Hit crrev.com and check that the revision exists
+ # and show a rollover with revision info.
+ return None
+
+
+def ExtractRevNums(_mr, autolink_regex_match):
+ """Return internal representation of a rev reference."""
+ ref = autolink_regex_match.group('revnum')
+ logging.debug('revision ref = %s', ref)
+ return [ref]
+
+
+def ReplaceRevisionRef(
+ mr, autolink_regex_match, _component_ref_artifacts):
+ """Return HTML markup for an autolink reference."""
+ prefix = autolink_regex_match.group('prefix')
+ revnum = autolink_regex_match.group('revnum')
+ url = _GetRevisionURLFormat(mr.project).format(revnum=revnum)
+ content = revnum
+ if prefix:
+ content = '%s%s' % (prefix, revnum)
+ return [template_helpers.TextRun(content, tag='a', href=url)]
+
+
+def _GetRevisionURLFormat(project):
+ # TODO(jrobbins): Expose a UI to customize it to point to whatever site
+ # hosts the source code. Also, site-wide default.
+ return (project.revision_url_format or settings.revision_url_format)
+
+
+# Regular expression to detect issue references.
+# Used to auto-link to other issues when displaying issue details.
+# Matches "issue " when "issue" is not part of a larger word, or
+# "issue #", or just a "#" when it is preceeded by a space.
+_ISSUE_REF_RE = re.compile(r"""
+ (?P<prefix>\b(issues?|bugs?)[ \t]*(:|=)?)
+ ([ \t]*(?P<project_name>\b[-a-z0-9]+[:\#])?
+ (?P<number_sign>\#?)
+ (?P<local_id>\d+)\b
+ (,?[ \t]*(and|or)?)?)+""", re.IGNORECASE | re.VERBOSE)
+
+_SINGLE_ISSUE_REF_RE = re.compile(r"""
+ (?P<prefix>\b(issue|bug)[ \t]*)?
+ (?P<project_name>\b[-a-z0-9]+[:\#])?
+ (?P<number_sign>\#?)
+ (?P<local_id>\d+)\b""", re.IGNORECASE | re.VERBOSE)
+
+
+def CurryGetReferencedIssues(services):
+ """Return a function to get ref'd issues with these persist objects bound.
+
+ Currying is a convienent way to give the callback access to the persist
+ objects, but without requiring that all possible persist objects be passed
+ through the autolink registry and functions.
+
+ Args:
+ services: connection to issue, config, and project persistence layers.
+
+ Returns:
+ A ready-to-use function that accepts the arguments that autolink
+ expects to pass to it.
+ """
+
+ def GetReferencedIssues(mr, ref_tuples):
+ """Return lists of open and closed issues referenced by these comments.
+
+ Args:
+ mr: commonly used info parsed from the request.
+ ref_tuples: list of (project_name, local_id) tuples for each issue
+ that is mentioned in the comment text. The project_name may be None,
+ in which case the issue is assumed to be in the current project.
+
+ Returns:
+ A list of open and closed issue dicts.
+ """
+ ref_projects = services.project.GetProjectsByName(
+ mr.cnxn,
+ [(ref_pn or mr.project_name) for ref_pn, _ in ref_tuples])
+ issue_ids = services.issue.ResolveIssueRefs(
+ mr.cnxn, ref_projects, mr.project_name, ref_tuples)
+ open_issues, closed_issues = (
+ tracker_helpers.GetAllowedOpenedAndClosedIssues(
+ mr, issue_ids, services))
+
+ open_dict = {}
+ for issue in open_issues:
+ open_dict[_IssueProjectKey(issue.project_name, issue.local_id)] = issue
+
+ closed_dict = {}
+ for issue in closed_issues:
+ closed_dict[_IssueProjectKey(issue.project_name, issue.local_id)] = issue
+
+ logging.info('autolinking dicts %r and %r', open_dict, closed_dict)
+
+ return open_dict, closed_dict
+
+ return GetReferencedIssues
+
+
+def _ParseProjectNameMatch(project_name):
+ """Process the passed project name and determine the best representation.
+
+ Args:
+ project_name: a string with the project name matched in a regex
+
+ Returns:
+ A minimal representation of the project name, None if no valid content.
+ """
+ if not project_name:
+ return None
+ return project_name.lstrip().rstrip('#: \t\n')
+
+
+def ExtractProjectAndIssueIds(_mr, autolink_regex_match):
+ """Convert a regex match for a textual reference into our internal form."""
+ whole_str = autolink_regex_match.group(0)
+ refs = []
+ for submatch in _SINGLE_ISSUE_REF_RE.finditer(whole_str):
+ ref = (_ParseProjectNameMatch(submatch.group('project_name')),
+ int(submatch.group('local_id')))
+ refs.append(ref)
+ logging.info('issue ref = %s', ref)
+
+ return refs
+
+
+# This uses project name to avoid a lookup on project ID in a function
+# that has no services object.
+def _IssueProjectKey(project_name, local_id):
+ """Make a dictionary key to identify a referenced issue."""
+ return '%s:%d' % (project_name, local_id)
+
+
+class IssueRefRun(object):
+ """A text run that links to a referenced issue."""
+
+ def __init__(self, issue, is_closed, project_name, prefix):
+ self.tag = 'a'
+ self.css_class = 'closed_ref' if is_closed else None
+ self.title = issue.summary
+ self.href = '/p/%s/issues/detail?id=%d' % (project_name, issue.local_id)
+
+ self.content = '%s%d' % (prefix, issue.local_id)
+ if is_closed and not prefix:
+ self.content = ' %s ' % self.content
+
+
+def ReplaceIssueRef(mr, autolink_regex_match, component_ref_artifacts):
+ """Examine a textual reference and replace it with an autolink or not.
+
+ Args:
+ mr: commonly used info parsed from the request
+ autolink_regex_match: regex match for the textual reference.
+ component_ref_artifacts: result of earlier call to GetReferencedIssues.
+
+ Returns:
+ A list of IssueRefRuns and TextRuns to replace the textual
+ reference. If there is an issue to autolink to, we return an HTML
+ hyperlink. Otherwise, we the run will have the original plain
+ text.
+ """
+ open_dict, closed_dict = component_ref_artifacts
+ original = autolink_regex_match.group(0)
+ logging.info('called ReplaceIssueRef on %r', original)
+ result_runs = []
+ pos = 0
+ for submatch in _SINGLE_ISSUE_REF_RE.finditer(original):
+ if submatch.start() >= pos:
+ if original[pos: submatch.start()]:
+ result_runs.append(template_helpers.TextRun(
+ original[pos: submatch.start()]))
+ replacement_run = _ReplaceSingleIssueRef(
+ mr, submatch, open_dict, closed_dict)
+ result_runs.append(replacement_run)
+ pos = submatch.end()
+
+ if original[pos:]:
+ result_runs.append(template_helpers.TextRun(original[pos:]))
+
+ return result_runs
+
+
+def _ReplaceSingleIssueRef(mr, submatch, open_dict, closed_dict):
+ """Replace one issue reference with a link, or the original text."""
+ prefix = submatch.group('prefix') or ''
+ project_name = submatch.group('project_name')
+ if project_name:
+ prefix += project_name
+ project_name = project_name.lstrip().rstrip(':#')
+ else:
+ # We need project_name for the URL, even if it is not in the text.
+ project_name = mr.project_name
+
+ number_sign = submatch.group('number_sign')
+ if number_sign:
+ prefix += number_sign
+ local_id = int(submatch.group('local_id'))
+ issue_key = _IssueProjectKey(project_name or mr.project_name, local_id)
+
+ if issue_key in open_dict:
+ return IssueRefRun(open_dict[issue_key], False, project_name, prefix)
+ elif issue_key in closed_dict:
+ return IssueRefRun(closed_dict[issue_key], True, project_name, prefix)
+ else: # Don't link to non-existent issues.
+ return template_helpers.TextRun('%s%d' % (prefix, local_id))
+
+
+class Autolink(object):
+ """Maintains a registry of autolink syntax and can apply it to comments."""
+
+ def __init__(self):
+ self.registry = {}
+
+ def RegisterComponent(self, component_name, artifact_lookup_function,
+ match_to_reference_function, autolink_re_subst_dict):
+ """Register all the autolink info for a software component.
+
+ Args:
+ component_name: string name of software component, must be unique.
+ artifact_lookup_function: function to batch lookup all artifacts that
+ might have been referenced in a set of comments:
+ function(all_matches) -> referenced_artifacts
+ the referenced_artifacts will be pased to each subst function.
+ match_to_reference_function: convert a regex match object to
+ some internal representation of the artifact reference.
+ autolink_re_subst_dict: dictionary of regular expressions and
+ the substitution function that should be called for each match:
+ function(match, referenced_artifacts) -> replacement_markup
+ """
+ self.registry[component_name] = (artifact_lookup_function,
+ match_to_reference_function,
+ autolink_re_subst_dict)
+
+ def GetAllReferencedArtifacts(self, mr, comment_text_list):
+ """Call callbacks to lookup all artifacts possibly referenced.
+
+ Args:
+ mr: information parsed out of the user HTTP request.
+ comment_text_list: list of comment content strings.
+
+ Returns:
+ Opaque object that can be pased to MarkupAutolinks. It's
+ structure happens to be {component_name: artifact_list, ...}.
+ """
+ all_referenced_artifacts = {}
+ for comp, (lookup, match_to_refs, re_dict) in self.registry.iteritems():
+ refs = set()
+ for comment_text in comment_text_list:
+ for regex in re_dict:
+ for match in regex.finditer(comment_text):
+ additional_refs = match_to_refs(mr, match)
+ if additional_refs:
+ refs.update(additional_refs)
+
+ all_referenced_artifacts[comp] = lookup(mr, refs)
+
+ return all_referenced_artifacts
+
+ def MarkupAutolinks(self, mr, text_runs, all_referenced_artifacts):
+ """Loop over components and regexes, applying all substitutions.
+
+ Args:
+ mr: info parsed from the user's HTTP request.
+ text_runs: List of text runs for the user's comment.
+ all_referenced_artifacts: result of previous call to
+ GetAllReferencedArtifacts.
+
+ Returns:
+ List of text runs for the entire user comment, some of which may have
+ attribures that cause them to render as links in render-rich-text.ezt.
+ """
+ items = self.registry.items()
+ items.sort() # Process components in determinate alphabetical order.
+ for component, (_lookup, _match_ref, re_subst_dict) in items:
+ component_ref_artifacts = all_referenced_artifacts[component]
+ for regex, subst_fun in re_subst_dict.iteritems():
+ text_runs = self._ApplySubstFunctionToRuns(
+ text_runs, regex, subst_fun, mr, component_ref_artifacts)
+
+ return text_runs
+
+ def _ApplySubstFunctionToRuns(
+ self, text_runs, regex, subst_fun, mr, component_ref_artifacts):
+ """Apply autolink regex and substitution function to each text run.
+
+ Args:
+ text_runs: list of TextRun objects with parts of the original comment.
+ regex: Regular expression for detecting textual references to artifacts.
+ subst_fun: function to return autolink markup, or original text.
+ mr: common info parsed from the user HTTP request.
+ component_ref_artifacts: already-looked-up destination artifacts to use
+ when computing substitution text.
+
+ Returns:
+ A new list with more and smaller runs, some of which may have tag
+ and link attributes set.
+ """
+ result_runs = []
+ for run in text_runs:
+ content = run.content
+ if run.tag:
+ # This chunk has already been substituted, don't allow nested
+ # autolinking to mess up our output.
+ result_runs.append(run)
+ else:
+ pos = 0
+ for match in regex.finditer(content):
+ if match.start() > pos:
+ result_runs.append(template_helpers.TextRun(
+ content[pos: match.start()]))
+ replacement_runs = subst_fun(mr, match, component_ref_artifacts)
+ result_runs.extend(replacement_runs)
+ pos = match.end()
+
+ if run.content[pos:]: # Keep any text that came after the last match
+ result_runs.append(template_helpers.TextRun(run.content[pos:]))
+
+ # TODO(jrobbins): ideally we would merge consecutive plain text runs
+ # so that regexes can match across those run boundaries.
+
+ return result_runs
+
+
+def RegisterAutolink(services):
+ """Register all the autolink hooks."""
+ services.autolink.RegisterComponent(
+ '01-linkify',
+ lambda request, mr: None,
+ lambda mr, match: None,
+ {_IS_A_LINK_RE: Linkify})
+
+ services.autolink.RegisterComponent(
+ '02-tracker',
+ CurryGetReferencedIssues(services),
+ ExtractProjectAndIssueIds,
+ {_ISSUE_REF_RE: ReplaceIssueRef})
+
+ services.autolink.RegisterComponent(
+ '03-versioncontrol',
+ GetReferencedRevisions,
+ ExtractRevNums,
+ {_GIT_HASH_RE: ReplaceRevisionRef,
+ _SVN_REF_RE: ReplaceRevisionRef})
« no previous file with comments | « appengine/monorail/features/activities.py ('k') | appengine/monorail/features/commands.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698