| Index: appengine/monorail/features/autolink.py
|
| diff --git a/appengine/monorail/features/autolink.py b/appengine/monorail/features/autolink.py
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..a50b84886bd5510e465fd488d9affc8716291a9c
|
| --- /dev/null
|
| +++ b/appengine/monorail/features/autolink.py
|
| @@ -0,0 +1,465 @@
|
| +# Copyright 2016 The Chromium Authors. All rights reserved.
|
| +# Use of this source code is govered by a BSD-style
|
| +# license that can be found in the LICENSE file or at
|
| +# https://developers.google.com/open-source/licenses/bsd
|
| +
|
| +"""Autolink helps auto-link references to artifacts in text.
|
| +
|
| +This class maintains a registry of artifact autolink syntax specs and
|
| +callbacks. The structure of that registry is:
|
| + { component_name: (lookup_callback,
|
| + { regex: substitution_callback, ...}),
|
| + ...
|
| + }
|
| +
|
| +For example:
|
| + { 'tracker':
|
| + (GetReferencedIssues,
|
| + ExtractProjectAndIssueIds,
|
| + {_ISSUE_REF_RE: ReplaceIssueRef}),
|
| + 'versioncontrol':
|
| + (GetReferencedRevisions,
|
| + ExtractProjectAndRevNum,
|
| + {_GIT_HASH_RE: ReplaceRevisionRef}),
|
| + }
|
| +
|
| +The dictionary of regexes is used here because, in the future, we
|
| +might add more regexes for each component rather than have one complex
|
| +regex per component.
|
| +"""
|
| +
|
| +import logging
|
| +import re
|
| +import urllib
|
| +import urlparse
|
| +
|
| +import settings
|
| +from framework import template_helpers
|
| +from framework import validate
|
| +from proto import project_pb2
|
| +from tracker import tracker_helpers
|
| +
|
| +
|
| +_CLOSING_TAG_RE = re.compile('</[a-z0-9]+>$', re.IGNORECASE)
|
| +
|
| +_LINKIFY_SCHEMES = r'(https?://|ftp://|mailto:)'
|
| +# Also count a start-tag '<' as a url delimeter, since the autolinker
|
| +# is sometimes run against html fragments.
|
| +_IS_A_LINK_RE = re.compile(r'(%s)([^\s<]+)' % _LINKIFY_SCHEMES, re.UNICODE)
|
| +
|
| +# These are allowed in links, but if any of closing delimiters appear
|
| +# at the end of the link, and the opening one is not part of the link,
|
| +# then trim off the closing delimiters.
|
| +_LINK_TRAILING_CHARS = [
|
| + (None, ':'),
|
| + (None, '.'),
|
| + (None, ','),
|
| + ('<', '>'),
|
| + ('"', '"'),
|
| + ('(', ')'),
|
| + ('[', ']'),
|
| + ('{', '}'),
|
| + ]
|
| +
|
| +
|
| +def Linkify(_mr, autolink_regex_match,
|
| + _component_ref_artifacts):
|
| + """Examine a textual reference and replace it with a hyperlink or not.
|
| +
|
| + This is a callback for use with the autolink feature.
|
| +
|
| + Args:
|
| + _mr: common info parsed from the user HTTP request.
|
| + autolink_regex_match: regex match for the textual reference.
|
| + _component_ref_artifacts: unused value
|
| +
|
| + Returns:
|
| + A list of TextRuns with tag=a for all matched ftp, http, https and mailto
|
| + links converted into HTML hyperlinks.
|
| + """
|
| + hyperlink = autolink_regex_match.group(0)
|
| +
|
| + trailing = ''
|
| + for begin, end in _LINK_TRAILING_CHARS:
|
| + if hyperlink.endswith(end):
|
| + if not begin or hyperlink[:-len(end)].find(begin) == -1:
|
| + trailing = end + trailing
|
| + hyperlink = hyperlink[:-len(end)]
|
| +
|
| + tag_match = _CLOSING_TAG_RE.search(hyperlink)
|
| + if tag_match:
|
| + trailing = hyperlink[tag_match.start(0):] + trailing
|
| + hyperlink = hyperlink[:tag_match.start(0)]
|
| +
|
| + if (not validate.IsValidURL(hyperlink) and
|
| + not validate.IsValidEmail(hyperlink)):
|
| + return [template_helpers.TextRun(hyperlink)]
|
| +
|
| + result = [template_helpers.TextRun(hyperlink, tag='a', href=hyperlink)]
|
| + if trailing:
|
| + result.append(template_helpers.TextRun(trailing))
|
| +
|
| + return result
|
| +
|
| +
|
| +# Regular expression to detect git hashes.
|
| +# Used to auto-link to Git hashes on crrev.com when displaying issue details.
|
| +# Matches "rN", "r#N", and "revision N" when "rN" is not part of a larger word
|
| +# and N is a hexadecimal string of 40 chars.
|
| +_GIT_HASH_RE = re.compile(
|
| + r'\b(?P<prefix>r(evision\s+#?)?)?(?P<revnum>([a-f0-9]{40}))\b',
|
| + re.IGNORECASE | re.MULTILINE)
|
| +
|
| +# This is for SVN revisions and Git commit posisitons.
|
| +_SVN_REF_RE = re.compile(
|
| + r'\b(?P<prefix>r(evision\s+#?)?)(?P<revnum>([0-9]{1,7}))\b',
|
| + re.IGNORECASE | re.MULTILINE)
|
| +
|
| +
|
| +def GetReferencedRevisions(_mr, _refs):
|
| + """Load the referenced revision objects."""
|
| + # For now we just autolink any revision hash without actually
|
| + # checking that such a revision exists,
|
| + # TODO(jrobbins): Hit crrev.com and check that the revision exists
|
| + # and show a rollover with revision info.
|
| + return None
|
| +
|
| +
|
| +def ExtractRevNums(_mr, autolink_regex_match):
|
| + """Return internal representation of a rev reference."""
|
| + ref = autolink_regex_match.group('revnum')
|
| + logging.debug('revision ref = %s', ref)
|
| + return [ref]
|
| +
|
| +
|
| +def ReplaceRevisionRef(
|
| + mr, autolink_regex_match, _component_ref_artifacts):
|
| + """Return HTML markup for an autolink reference."""
|
| + prefix = autolink_regex_match.group('prefix')
|
| + revnum = autolink_regex_match.group('revnum')
|
| + url = _GetRevisionURLFormat(mr.project).format(revnum=revnum)
|
| + content = revnum
|
| + if prefix:
|
| + content = '%s%s' % (prefix, revnum)
|
| + return [template_helpers.TextRun(content, tag='a', href=url)]
|
| +
|
| +
|
| +def _GetRevisionURLFormat(project):
|
| + # TODO(jrobbins): Expose a UI to customize it to point to whatever site
|
| + # hosts the source code. Also, site-wide default.
|
| + return (project.revision_url_format or settings.revision_url_format)
|
| +
|
| +
|
| +# Regular expression to detect issue references.
|
| +# Used to auto-link to other issues when displaying issue details.
|
| +# Matches "issue " when "issue" is not part of a larger word, or
|
| +# "issue #", or just a "#" when it is preceeded by a space.
|
| +_ISSUE_REF_RE = re.compile(r"""
|
| + (?P<prefix>\b(issues?|bugs?)[ \t]*(:|=)?)
|
| + ([ \t]*(?P<project_name>\b[-a-z0-9]+[:\#])?
|
| + (?P<number_sign>\#?)
|
| + (?P<local_id>\d+)\b
|
| + (,?[ \t]*(and|or)?)?)+""", re.IGNORECASE | re.VERBOSE)
|
| +
|
| +_SINGLE_ISSUE_REF_RE = re.compile(r"""
|
| + (?P<prefix>\b(issue|bug)[ \t]*)?
|
| + (?P<project_name>\b[-a-z0-9]+[:\#])?
|
| + (?P<number_sign>\#?)
|
| + (?P<local_id>\d+)\b""", re.IGNORECASE | re.VERBOSE)
|
| +
|
| +
|
| +def CurryGetReferencedIssues(services):
|
| + """Return a function to get ref'd issues with these persist objects bound.
|
| +
|
| + Currying is a convienent way to give the callback access to the persist
|
| + objects, but without requiring that all possible persist objects be passed
|
| + through the autolink registry and functions.
|
| +
|
| + Args:
|
| + services: connection to issue, config, and project persistence layers.
|
| +
|
| + Returns:
|
| + A ready-to-use function that accepts the arguments that autolink
|
| + expects to pass to it.
|
| + """
|
| +
|
| + def GetReferencedIssues(mr, ref_tuples):
|
| + """Return lists of open and closed issues referenced by these comments.
|
| +
|
| + Args:
|
| + mr: commonly used info parsed from the request.
|
| + ref_tuples: list of (project_name, local_id) tuples for each issue
|
| + that is mentioned in the comment text. The project_name may be None,
|
| + in which case the issue is assumed to be in the current project.
|
| +
|
| + Returns:
|
| + A list of open and closed issue dicts.
|
| + """
|
| + ref_projects = services.project.GetProjectsByName(
|
| + mr.cnxn,
|
| + [(ref_pn or mr.project_name) for ref_pn, _ in ref_tuples])
|
| + issue_ids = services.issue.ResolveIssueRefs(
|
| + mr.cnxn, ref_projects, mr.project_name, ref_tuples)
|
| + open_issues, closed_issues = (
|
| + tracker_helpers.GetAllowedOpenedAndClosedIssues(
|
| + mr, issue_ids, services))
|
| +
|
| + open_dict = {}
|
| + for issue in open_issues:
|
| + open_dict[_IssueProjectKey(issue.project_name, issue.local_id)] = issue
|
| +
|
| + closed_dict = {}
|
| + for issue in closed_issues:
|
| + closed_dict[_IssueProjectKey(issue.project_name, issue.local_id)] = issue
|
| +
|
| + logging.info('autolinking dicts %r and %r', open_dict, closed_dict)
|
| +
|
| + return open_dict, closed_dict
|
| +
|
| + return GetReferencedIssues
|
| +
|
| +
|
| +def _ParseProjectNameMatch(project_name):
|
| + """Process the passed project name and determine the best representation.
|
| +
|
| + Args:
|
| + project_name: a string with the project name matched in a regex
|
| +
|
| + Returns:
|
| + A minimal representation of the project name, None if no valid content.
|
| + """
|
| + if not project_name:
|
| + return None
|
| + return project_name.lstrip().rstrip('#: \t\n')
|
| +
|
| +
|
| +def ExtractProjectAndIssueIds(_mr, autolink_regex_match):
|
| + """Convert a regex match for a textual reference into our internal form."""
|
| + whole_str = autolink_regex_match.group(0)
|
| + refs = []
|
| + for submatch in _SINGLE_ISSUE_REF_RE.finditer(whole_str):
|
| + ref = (_ParseProjectNameMatch(submatch.group('project_name')),
|
| + int(submatch.group('local_id')))
|
| + refs.append(ref)
|
| + logging.info('issue ref = %s', ref)
|
| +
|
| + return refs
|
| +
|
| +
|
| +# This uses project name to avoid a lookup on project ID in a function
|
| +# that has no services object.
|
| +def _IssueProjectKey(project_name, local_id):
|
| + """Make a dictionary key to identify a referenced issue."""
|
| + return '%s:%d' % (project_name, local_id)
|
| +
|
| +
|
| +class IssueRefRun(object):
|
| + """A text run that links to a referenced issue."""
|
| +
|
| + def __init__(self, issue, is_closed, project_name, prefix):
|
| + self.tag = 'a'
|
| + self.css_class = 'closed_ref' if is_closed else None
|
| + self.title = issue.summary
|
| + self.href = '/p/%s/issues/detail?id=%d' % (project_name, issue.local_id)
|
| +
|
| + self.content = '%s%d' % (prefix, issue.local_id)
|
| + if is_closed and not prefix:
|
| + self.content = ' %s ' % self.content
|
| +
|
| +
|
| +def ReplaceIssueRef(mr, autolink_regex_match, component_ref_artifacts):
|
| + """Examine a textual reference and replace it with an autolink or not.
|
| +
|
| + Args:
|
| + mr: commonly used info parsed from the request
|
| + autolink_regex_match: regex match for the textual reference.
|
| + component_ref_artifacts: result of earlier call to GetReferencedIssues.
|
| +
|
| + Returns:
|
| + A list of IssueRefRuns and TextRuns to replace the textual
|
| + reference. If there is an issue to autolink to, we return an HTML
|
| + hyperlink. Otherwise, we the run will have the original plain
|
| + text.
|
| + """
|
| + open_dict, closed_dict = component_ref_artifacts
|
| + original = autolink_regex_match.group(0)
|
| + logging.info('called ReplaceIssueRef on %r', original)
|
| + result_runs = []
|
| + pos = 0
|
| + for submatch in _SINGLE_ISSUE_REF_RE.finditer(original):
|
| + if submatch.start() >= pos:
|
| + if original[pos: submatch.start()]:
|
| + result_runs.append(template_helpers.TextRun(
|
| + original[pos: submatch.start()]))
|
| + replacement_run = _ReplaceSingleIssueRef(
|
| + mr, submatch, open_dict, closed_dict)
|
| + result_runs.append(replacement_run)
|
| + pos = submatch.end()
|
| +
|
| + if original[pos:]:
|
| + result_runs.append(template_helpers.TextRun(original[pos:]))
|
| +
|
| + return result_runs
|
| +
|
| +
|
| +def _ReplaceSingleIssueRef(mr, submatch, open_dict, closed_dict):
|
| + """Replace one issue reference with a link, or the original text."""
|
| + prefix = submatch.group('prefix') or ''
|
| + project_name = submatch.group('project_name')
|
| + if project_name:
|
| + prefix += project_name
|
| + project_name = project_name.lstrip().rstrip(':#')
|
| + else:
|
| + # We need project_name for the URL, even if it is not in the text.
|
| + project_name = mr.project_name
|
| +
|
| + number_sign = submatch.group('number_sign')
|
| + if number_sign:
|
| + prefix += number_sign
|
| + local_id = int(submatch.group('local_id'))
|
| + issue_key = _IssueProjectKey(project_name or mr.project_name, local_id)
|
| +
|
| + if issue_key in open_dict:
|
| + return IssueRefRun(open_dict[issue_key], False, project_name, prefix)
|
| + elif issue_key in closed_dict:
|
| + return IssueRefRun(closed_dict[issue_key], True, project_name, prefix)
|
| + else: # Don't link to non-existent issues.
|
| + return template_helpers.TextRun('%s%d' % (prefix, local_id))
|
| +
|
| +
|
| +class Autolink(object):
|
| + """Maintains a registry of autolink syntax and can apply it to comments."""
|
| +
|
| + def __init__(self):
|
| + self.registry = {}
|
| +
|
| + def RegisterComponent(self, component_name, artifact_lookup_function,
|
| + match_to_reference_function, autolink_re_subst_dict):
|
| + """Register all the autolink info for a software component.
|
| +
|
| + Args:
|
| + component_name: string name of software component, must be unique.
|
| + artifact_lookup_function: function to batch lookup all artifacts that
|
| + might have been referenced in a set of comments:
|
| + function(all_matches) -> referenced_artifacts
|
| + the referenced_artifacts will be pased to each subst function.
|
| + match_to_reference_function: convert a regex match object to
|
| + some internal representation of the artifact reference.
|
| + autolink_re_subst_dict: dictionary of regular expressions and
|
| + the substitution function that should be called for each match:
|
| + function(match, referenced_artifacts) -> replacement_markup
|
| + """
|
| + self.registry[component_name] = (artifact_lookup_function,
|
| + match_to_reference_function,
|
| + autolink_re_subst_dict)
|
| +
|
| + def GetAllReferencedArtifacts(self, mr, comment_text_list):
|
| + """Call callbacks to lookup all artifacts possibly referenced.
|
| +
|
| + Args:
|
| + mr: information parsed out of the user HTTP request.
|
| + comment_text_list: list of comment content strings.
|
| +
|
| + Returns:
|
| + Opaque object that can be pased to MarkupAutolinks. It's
|
| + structure happens to be {component_name: artifact_list, ...}.
|
| + """
|
| + all_referenced_artifacts = {}
|
| + for comp, (lookup, match_to_refs, re_dict) in self.registry.iteritems():
|
| + refs = set()
|
| + for comment_text in comment_text_list:
|
| + for regex in re_dict:
|
| + for match in regex.finditer(comment_text):
|
| + additional_refs = match_to_refs(mr, match)
|
| + if additional_refs:
|
| + refs.update(additional_refs)
|
| +
|
| + all_referenced_artifacts[comp] = lookup(mr, refs)
|
| +
|
| + return all_referenced_artifacts
|
| +
|
| + def MarkupAutolinks(self, mr, text_runs, all_referenced_artifacts):
|
| + """Loop over components and regexes, applying all substitutions.
|
| +
|
| + Args:
|
| + mr: info parsed from the user's HTTP request.
|
| + text_runs: List of text runs for the user's comment.
|
| + all_referenced_artifacts: result of previous call to
|
| + GetAllReferencedArtifacts.
|
| +
|
| + Returns:
|
| + List of text runs for the entire user comment, some of which may have
|
| + attribures that cause them to render as links in render-rich-text.ezt.
|
| + """
|
| + items = self.registry.items()
|
| + items.sort() # Process components in determinate alphabetical order.
|
| + for component, (_lookup, _match_ref, re_subst_dict) in items:
|
| + component_ref_artifacts = all_referenced_artifacts[component]
|
| + for regex, subst_fun in re_subst_dict.iteritems():
|
| + text_runs = self._ApplySubstFunctionToRuns(
|
| + text_runs, regex, subst_fun, mr, component_ref_artifacts)
|
| +
|
| + return text_runs
|
| +
|
| + def _ApplySubstFunctionToRuns(
|
| + self, text_runs, regex, subst_fun, mr, component_ref_artifacts):
|
| + """Apply autolink regex and substitution function to each text run.
|
| +
|
| + Args:
|
| + text_runs: list of TextRun objects with parts of the original comment.
|
| + regex: Regular expression for detecting textual references to artifacts.
|
| + subst_fun: function to return autolink markup, or original text.
|
| + mr: common info parsed from the user HTTP request.
|
| + component_ref_artifacts: already-looked-up destination artifacts to use
|
| + when computing substitution text.
|
| +
|
| + Returns:
|
| + A new list with more and smaller runs, some of which may have tag
|
| + and link attributes set.
|
| + """
|
| + result_runs = []
|
| + for run in text_runs:
|
| + content = run.content
|
| + if run.tag:
|
| + # This chunk has already been substituted, don't allow nested
|
| + # autolinking to mess up our output.
|
| + result_runs.append(run)
|
| + else:
|
| + pos = 0
|
| + for match in regex.finditer(content):
|
| + if match.start() > pos:
|
| + result_runs.append(template_helpers.TextRun(
|
| + content[pos: match.start()]))
|
| + replacement_runs = subst_fun(mr, match, component_ref_artifacts)
|
| + result_runs.extend(replacement_runs)
|
| + pos = match.end()
|
| +
|
| + if run.content[pos:]: # Keep any text that came after the last match
|
| + result_runs.append(template_helpers.TextRun(run.content[pos:]))
|
| +
|
| + # TODO(jrobbins): ideally we would merge consecutive plain text runs
|
| + # so that regexes can match across those run boundaries.
|
| +
|
| + return result_runs
|
| +
|
| +
|
| +def RegisterAutolink(services):
|
| + """Register all the autolink hooks."""
|
| + services.autolink.RegisterComponent(
|
| + '01-linkify',
|
| + lambda request, mr: None,
|
| + lambda mr, match: None,
|
| + {_IS_A_LINK_RE: Linkify})
|
| +
|
| + services.autolink.RegisterComponent(
|
| + '02-tracker',
|
| + CurryGetReferencedIssues(services),
|
| + ExtractProjectAndIssueIds,
|
| + {_ISSUE_REF_RE: ReplaceIssueRef})
|
| +
|
| + services.autolink.RegisterComponent(
|
| + '03-versioncontrol',
|
| + GetReferencedRevisions,
|
| + ExtractRevNums,
|
| + {_GIT_HASH_RE: ReplaceRevisionRef,
|
| + _SVN_REF_RE: ReplaceRevisionRef})
|
|
|