OLD | NEW |
(Empty) | |
| 1 # Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is govered by a BSD-style |
| 3 # license that can be found in the LICENSE file or at |
| 4 # https://developers.google.com/open-source/licenses/bsd |
| 5 |
| 6 """Autolink helps auto-link references to artifacts in text. |
| 7 |
| 8 This class maintains a registry of artifact autolink syntax specs and |
| 9 callbacks. The structure of that registry is: |
| 10 { component_name: (lookup_callback, |
| 11 { regex: substitution_callback, ...}), |
| 12 ... |
| 13 } |
| 14 |
| 15 For example: |
| 16 { 'tracker': |
| 17 (GetReferencedIssues, |
| 18 ExtractProjectAndIssueIds, |
| 19 {_ISSUE_REF_RE: ReplaceIssueRef}), |
| 20 'versioncontrol': |
| 21 (GetReferencedRevisions, |
| 22 ExtractProjectAndRevNum, |
| 23 {_GIT_HASH_RE: ReplaceRevisionRef}), |
| 24 } |
| 25 |
| 26 The dictionary of regexes is used here because, in the future, we |
| 27 might add more regexes for each component rather than have one complex |
| 28 regex per component. |
| 29 """ |
| 30 |
| 31 import logging |
| 32 import re |
| 33 import urllib |
| 34 import urlparse |
| 35 |
| 36 import settings |
| 37 from framework import template_helpers |
| 38 from framework import validate |
| 39 from proto import project_pb2 |
| 40 from tracker import tracker_helpers |
| 41 |
| 42 |
| 43 _CLOSING_TAG_RE = re.compile('</[a-z0-9]+>$', re.IGNORECASE) |
| 44 |
| 45 _LINKIFY_SCHEMES = r'(https?://|ftp://|mailto:)' |
| 46 # Also count a start-tag '<' as a url delimeter, since the autolinker |
| 47 # is sometimes run against html fragments. |
| 48 _IS_A_LINK_RE = re.compile(r'(%s)([^\s<]+)' % _LINKIFY_SCHEMES, re.UNICODE) |
| 49 |
| 50 # These are allowed in links, but if any of closing delimiters appear |
| 51 # at the end of the link, and the opening one is not part of the link, |
| 52 # then trim off the closing delimiters. |
| 53 _LINK_TRAILING_CHARS = [ |
| 54 (None, ':'), |
| 55 (None, '.'), |
| 56 (None, ','), |
| 57 ('<', '>'), |
| 58 ('"', '"'), |
| 59 ('(', ')'), |
| 60 ('[', ']'), |
| 61 ('{', '}'), |
| 62 ] |
| 63 |
| 64 |
| 65 def Linkify(_mr, autolink_regex_match, |
| 66 _component_ref_artifacts): |
| 67 """Examine a textual reference and replace it with a hyperlink or not. |
| 68 |
| 69 This is a callback for use with the autolink feature. |
| 70 |
| 71 Args: |
| 72 _mr: common info parsed from the user HTTP request. |
| 73 autolink_regex_match: regex match for the textual reference. |
| 74 _component_ref_artifacts: unused value |
| 75 |
| 76 Returns: |
| 77 A list of TextRuns with tag=a for all matched ftp, http, https and mailto |
| 78 links converted into HTML hyperlinks. |
| 79 """ |
| 80 hyperlink = autolink_regex_match.group(0) |
| 81 |
| 82 trailing = '' |
| 83 for begin, end in _LINK_TRAILING_CHARS: |
| 84 if hyperlink.endswith(end): |
| 85 if not begin or hyperlink[:-len(end)].find(begin) == -1: |
| 86 trailing = end + trailing |
| 87 hyperlink = hyperlink[:-len(end)] |
| 88 |
| 89 tag_match = _CLOSING_TAG_RE.search(hyperlink) |
| 90 if tag_match: |
| 91 trailing = hyperlink[tag_match.start(0):] + trailing |
| 92 hyperlink = hyperlink[:tag_match.start(0)] |
| 93 |
| 94 if (not validate.IsValidURL(hyperlink) and |
| 95 not validate.IsValidEmail(hyperlink)): |
| 96 return [template_helpers.TextRun(hyperlink)] |
| 97 |
| 98 result = [template_helpers.TextRun(hyperlink, tag='a', href=hyperlink)] |
| 99 if trailing: |
| 100 result.append(template_helpers.TextRun(trailing)) |
| 101 |
| 102 return result |
| 103 |
| 104 |
| 105 # Regular expression to detect git hashes. |
| 106 # Used to auto-link to Git hashes on crrev.com when displaying issue details. |
| 107 # Matches "rN", "r#N", and "revision N" when "rN" is not part of a larger word |
| 108 # and N is a hexadecimal string of 40 chars. |
| 109 _GIT_HASH_RE = re.compile( |
| 110 r'\b(?P<prefix>r(evision\s+#?)?)?(?P<revnum>([a-f0-9]{40}))\b', |
| 111 re.IGNORECASE | re.MULTILINE) |
| 112 |
| 113 # This is for SVN revisions and Git commit posisitons. |
| 114 _SVN_REF_RE = re.compile( |
| 115 r'\b(?P<prefix>r(evision\s+#?)?)(?P<revnum>([0-9]{1,7}))\b', |
| 116 re.IGNORECASE | re.MULTILINE) |
| 117 |
| 118 |
| 119 def GetReferencedRevisions(_mr, _refs): |
| 120 """Load the referenced revision objects.""" |
| 121 # For now we just autolink any revision hash without actually |
| 122 # checking that such a revision exists, |
| 123 # TODO(jrobbins): Hit crrev.com and check that the revision exists |
| 124 # and show a rollover with revision info. |
| 125 return None |
| 126 |
| 127 |
| 128 def ExtractRevNums(_mr, autolink_regex_match): |
| 129 """Return internal representation of a rev reference.""" |
| 130 ref = autolink_regex_match.group('revnum') |
| 131 logging.debug('revision ref = %s', ref) |
| 132 return [ref] |
| 133 |
| 134 |
| 135 def ReplaceRevisionRef( |
| 136 mr, autolink_regex_match, _component_ref_artifacts): |
| 137 """Return HTML markup for an autolink reference.""" |
| 138 prefix = autolink_regex_match.group('prefix') |
| 139 revnum = autolink_regex_match.group('revnum') |
| 140 url = _GetRevisionURLFormat(mr.project).format(revnum=revnum) |
| 141 content = revnum |
| 142 if prefix: |
| 143 content = '%s%s' % (prefix, revnum) |
| 144 return [template_helpers.TextRun(content, tag='a', href=url)] |
| 145 |
| 146 |
| 147 def _GetRevisionURLFormat(project): |
| 148 # TODO(jrobbins): Expose a UI to customize it to point to whatever site |
| 149 # hosts the source code. Also, site-wide default. |
| 150 return (project.revision_url_format or settings.revision_url_format) |
| 151 |
| 152 |
| 153 # Regular expression to detect issue references. |
| 154 # Used to auto-link to other issues when displaying issue details. |
| 155 # Matches "issue " when "issue" is not part of a larger word, or |
| 156 # "issue #", or just a "#" when it is preceeded by a space. |
| 157 _ISSUE_REF_RE = re.compile(r""" |
| 158 (?P<prefix>\b(issues?|bugs?)[ \t]*(:|=)?) |
| 159 ([ \t]*(?P<project_name>\b[-a-z0-9]+[:\#])? |
| 160 (?P<number_sign>\#?) |
| 161 (?P<local_id>\d+)\b |
| 162 (,?[ \t]*(and|or)?)?)+""", re.IGNORECASE | re.VERBOSE) |
| 163 |
| 164 _SINGLE_ISSUE_REF_RE = re.compile(r""" |
| 165 (?P<prefix>\b(issue|bug)[ \t]*)? |
| 166 (?P<project_name>\b[-a-z0-9]+[:\#])? |
| 167 (?P<number_sign>\#?) |
| 168 (?P<local_id>\d+)\b""", re.IGNORECASE | re.VERBOSE) |
| 169 |
| 170 |
| 171 def CurryGetReferencedIssues(services): |
| 172 """Return a function to get ref'd issues with these persist objects bound. |
| 173 |
| 174 Currying is a convienent way to give the callback access to the persist |
| 175 objects, but without requiring that all possible persist objects be passed |
| 176 through the autolink registry and functions. |
| 177 |
| 178 Args: |
| 179 services: connection to issue, config, and project persistence layers. |
| 180 |
| 181 Returns: |
| 182 A ready-to-use function that accepts the arguments that autolink |
| 183 expects to pass to it. |
| 184 """ |
| 185 |
| 186 def GetReferencedIssues(mr, ref_tuples): |
| 187 """Return lists of open and closed issues referenced by these comments. |
| 188 |
| 189 Args: |
| 190 mr: commonly used info parsed from the request. |
| 191 ref_tuples: list of (project_name, local_id) tuples for each issue |
| 192 that is mentioned in the comment text. The project_name may be None, |
| 193 in which case the issue is assumed to be in the current project. |
| 194 |
| 195 Returns: |
| 196 A list of open and closed issue dicts. |
| 197 """ |
| 198 ref_projects = services.project.GetProjectsByName( |
| 199 mr.cnxn, |
| 200 [(ref_pn or mr.project_name) for ref_pn, _ in ref_tuples]) |
| 201 issue_ids = services.issue.ResolveIssueRefs( |
| 202 mr.cnxn, ref_projects, mr.project_name, ref_tuples) |
| 203 open_issues, closed_issues = ( |
| 204 tracker_helpers.GetAllowedOpenedAndClosedIssues( |
| 205 mr, issue_ids, services)) |
| 206 |
| 207 open_dict = {} |
| 208 for issue in open_issues: |
| 209 open_dict[_IssueProjectKey(issue.project_name, issue.local_id)] = issue |
| 210 |
| 211 closed_dict = {} |
| 212 for issue in closed_issues: |
| 213 closed_dict[_IssueProjectKey(issue.project_name, issue.local_id)] = issue |
| 214 |
| 215 logging.info('autolinking dicts %r and %r', open_dict, closed_dict) |
| 216 |
| 217 return open_dict, closed_dict |
| 218 |
| 219 return GetReferencedIssues |
| 220 |
| 221 |
| 222 def _ParseProjectNameMatch(project_name): |
| 223 """Process the passed project name and determine the best representation. |
| 224 |
| 225 Args: |
| 226 project_name: a string with the project name matched in a regex |
| 227 |
| 228 Returns: |
| 229 A minimal representation of the project name, None if no valid content. |
| 230 """ |
| 231 if not project_name: |
| 232 return None |
| 233 return project_name.lstrip().rstrip('#: \t\n') |
| 234 |
| 235 |
| 236 def ExtractProjectAndIssueIds(_mr, autolink_regex_match): |
| 237 """Convert a regex match for a textual reference into our internal form.""" |
| 238 whole_str = autolink_regex_match.group(0) |
| 239 refs = [] |
| 240 for submatch in _SINGLE_ISSUE_REF_RE.finditer(whole_str): |
| 241 ref = (_ParseProjectNameMatch(submatch.group('project_name')), |
| 242 int(submatch.group('local_id'))) |
| 243 refs.append(ref) |
| 244 logging.info('issue ref = %s', ref) |
| 245 |
| 246 return refs |
| 247 |
| 248 |
| 249 # This uses project name to avoid a lookup on project ID in a function |
| 250 # that has no services object. |
| 251 def _IssueProjectKey(project_name, local_id): |
| 252 """Make a dictionary key to identify a referenced issue.""" |
| 253 return '%s:%d' % (project_name, local_id) |
| 254 |
| 255 |
| 256 class IssueRefRun(object): |
| 257 """A text run that links to a referenced issue.""" |
| 258 |
| 259 def __init__(self, issue, is_closed, project_name, prefix): |
| 260 self.tag = 'a' |
| 261 self.css_class = 'closed_ref' if is_closed else None |
| 262 self.title = issue.summary |
| 263 self.href = '/p/%s/issues/detail?id=%d' % (project_name, issue.local_id) |
| 264 |
| 265 self.content = '%s%d' % (prefix, issue.local_id) |
| 266 if is_closed and not prefix: |
| 267 self.content = ' %s ' % self.content |
| 268 |
| 269 |
| 270 def ReplaceIssueRef(mr, autolink_regex_match, component_ref_artifacts): |
| 271 """Examine a textual reference and replace it with an autolink or not. |
| 272 |
| 273 Args: |
| 274 mr: commonly used info parsed from the request |
| 275 autolink_regex_match: regex match for the textual reference. |
| 276 component_ref_artifacts: result of earlier call to GetReferencedIssues. |
| 277 |
| 278 Returns: |
| 279 A list of IssueRefRuns and TextRuns to replace the textual |
| 280 reference. If there is an issue to autolink to, we return an HTML |
| 281 hyperlink. Otherwise, we the run will have the original plain |
| 282 text. |
| 283 """ |
| 284 open_dict, closed_dict = component_ref_artifacts |
| 285 original = autolink_regex_match.group(0) |
| 286 logging.info('called ReplaceIssueRef on %r', original) |
| 287 result_runs = [] |
| 288 pos = 0 |
| 289 for submatch in _SINGLE_ISSUE_REF_RE.finditer(original): |
| 290 if submatch.start() >= pos: |
| 291 if original[pos: submatch.start()]: |
| 292 result_runs.append(template_helpers.TextRun( |
| 293 original[pos: submatch.start()])) |
| 294 replacement_run = _ReplaceSingleIssueRef( |
| 295 mr, submatch, open_dict, closed_dict) |
| 296 result_runs.append(replacement_run) |
| 297 pos = submatch.end() |
| 298 |
| 299 if original[pos:]: |
| 300 result_runs.append(template_helpers.TextRun(original[pos:])) |
| 301 |
| 302 return result_runs |
| 303 |
| 304 |
| 305 def _ReplaceSingleIssueRef(mr, submatch, open_dict, closed_dict): |
| 306 """Replace one issue reference with a link, or the original text.""" |
| 307 prefix = submatch.group('prefix') or '' |
| 308 project_name = submatch.group('project_name') |
| 309 if project_name: |
| 310 prefix += project_name |
| 311 project_name = project_name.lstrip().rstrip(':#') |
| 312 else: |
| 313 # We need project_name for the URL, even if it is not in the text. |
| 314 project_name = mr.project_name |
| 315 |
| 316 number_sign = submatch.group('number_sign') |
| 317 if number_sign: |
| 318 prefix += number_sign |
| 319 local_id = int(submatch.group('local_id')) |
| 320 issue_key = _IssueProjectKey(project_name or mr.project_name, local_id) |
| 321 |
| 322 if issue_key in open_dict: |
| 323 return IssueRefRun(open_dict[issue_key], False, project_name, prefix) |
| 324 elif issue_key in closed_dict: |
| 325 return IssueRefRun(closed_dict[issue_key], True, project_name, prefix) |
| 326 else: # Don't link to non-existent issues. |
| 327 return template_helpers.TextRun('%s%d' % (prefix, local_id)) |
| 328 |
| 329 |
| 330 class Autolink(object): |
| 331 """Maintains a registry of autolink syntax and can apply it to comments.""" |
| 332 |
| 333 def __init__(self): |
| 334 self.registry = {} |
| 335 |
| 336 def RegisterComponent(self, component_name, artifact_lookup_function, |
| 337 match_to_reference_function, autolink_re_subst_dict): |
| 338 """Register all the autolink info for a software component. |
| 339 |
| 340 Args: |
| 341 component_name: string name of software component, must be unique. |
| 342 artifact_lookup_function: function to batch lookup all artifacts that |
| 343 might have been referenced in a set of comments: |
| 344 function(all_matches) -> referenced_artifacts |
| 345 the referenced_artifacts will be pased to each subst function. |
| 346 match_to_reference_function: convert a regex match object to |
| 347 some internal representation of the artifact reference. |
| 348 autolink_re_subst_dict: dictionary of regular expressions and |
| 349 the substitution function that should be called for each match: |
| 350 function(match, referenced_artifacts) -> replacement_markup |
| 351 """ |
| 352 self.registry[component_name] = (artifact_lookup_function, |
| 353 match_to_reference_function, |
| 354 autolink_re_subst_dict) |
| 355 |
| 356 def GetAllReferencedArtifacts(self, mr, comment_text_list): |
| 357 """Call callbacks to lookup all artifacts possibly referenced. |
| 358 |
| 359 Args: |
| 360 mr: information parsed out of the user HTTP request. |
| 361 comment_text_list: list of comment content strings. |
| 362 |
| 363 Returns: |
| 364 Opaque object that can be pased to MarkupAutolinks. It's |
| 365 structure happens to be {component_name: artifact_list, ...}. |
| 366 """ |
| 367 all_referenced_artifacts = {} |
| 368 for comp, (lookup, match_to_refs, re_dict) in self.registry.iteritems(): |
| 369 refs = set() |
| 370 for comment_text in comment_text_list: |
| 371 for regex in re_dict: |
| 372 for match in regex.finditer(comment_text): |
| 373 additional_refs = match_to_refs(mr, match) |
| 374 if additional_refs: |
| 375 refs.update(additional_refs) |
| 376 |
| 377 all_referenced_artifacts[comp] = lookup(mr, refs) |
| 378 |
| 379 return all_referenced_artifacts |
| 380 |
| 381 def MarkupAutolinks(self, mr, text_runs, all_referenced_artifacts): |
| 382 """Loop over components and regexes, applying all substitutions. |
| 383 |
| 384 Args: |
| 385 mr: info parsed from the user's HTTP request. |
| 386 text_runs: List of text runs for the user's comment. |
| 387 all_referenced_artifacts: result of previous call to |
| 388 GetAllReferencedArtifacts. |
| 389 |
| 390 Returns: |
| 391 List of text runs for the entire user comment, some of which may have |
| 392 attribures that cause them to render as links in render-rich-text.ezt. |
| 393 """ |
| 394 items = self.registry.items() |
| 395 items.sort() # Process components in determinate alphabetical order. |
| 396 for component, (_lookup, _match_ref, re_subst_dict) in items: |
| 397 component_ref_artifacts = all_referenced_artifacts[component] |
| 398 for regex, subst_fun in re_subst_dict.iteritems(): |
| 399 text_runs = self._ApplySubstFunctionToRuns( |
| 400 text_runs, regex, subst_fun, mr, component_ref_artifacts) |
| 401 |
| 402 return text_runs |
| 403 |
| 404 def _ApplySubstFunctionToRuns( |
| 405 self, text_runs, regex, subst_fun, mr, component_ref_artifacts): |
| 406 """Apply autolink regex and substitution function to each text run. |
| 407 |
| 408 Args: |
| 409 text_runs: list of TextRun objects with parts of the original comment. |
| 410 regex: Regular expression for detecting textual references to artifacts. |
| 411 subst_fun: function to return autolink markup, or original text. |
| 412 mr: common info parsed from the user HTTP request. |
| 413 component_ref_artifacts: already-looked-up destination artifacts to use |
| 414 when computing substitution text. |
| 415 |
| 416 Returns: |
| 417 A new list with more and smaller runs, some of which may have tag |
| 418 and link attributes set. |
| 419 """ |
| 420 result_runs = [] |
| 421 for run in text_runs: |
| 422 content = run.content |
| 423 if run.tag: |
| 424 # This chunk has already been substituted, don't allow nested |
| 425 # autolinking to mess up our output. |
| 426 result_runs.append(run) |
| 427 else: |
| 428 pos = 0 |
| 429 for match in regex.finditer(content): |
| 430 if match.start() > pos: |
| 431 result_runs.append(template_helpers.TextRun( |
| 432 content[pos: match.start()])) |
| 433 replacement_runs = subst_fun(mr, match, component_ref_artifacts) |
| 434 result_runs.extend(replacement_runs) |
| 435 pos = match.end() |
| 436 |
| 437 if run.content[pos:]: # Keep any text that came after the last match |
| 438 result_runs.append(template_helpers.TextRun(run.content[pos:])) |
| 439 |
| 440 # TODO(jrobbins): ideally we would merge consecutive plain text runs |
| 441 # so that regexes can match across those run boundaries. |
| 442 |
| 443 return result_runs |
| 444 |
| 445 |
| 446 def RegisterAutolink(services): |
| 447 """Register all the autolink hooks.""" |
| 448 services.autolink.RegisterComponent( |
| 449 '01-linkify', |
| 450 lambda request, mr: None, |
| 451 lambda mr, match: None, |
| 452 {_IS_A_LINK_RE: Linkify}) |
| 453 |
| 454 services.autolink.RegisterComponent( |
| 455 '02-tracker', |
| 456 CurryGetReferencedIssues(services), |
| 457 ExtractProjectAndIssueIds, |
| 458 {_ISSUE_REF_RE: ReplaceIssueRef}) |
| 459 |
| 460 services.autolink.RegisterComponent( |
| 461 '03-versioncontrol', |
| 462 GetReferencedRevisions, |
| 463 ExtractRevNums, |
| 464 {_GIT_HASH_RE: ReplaceRevisionRef, |
| 465 _SVN_REF_RE: ReplaceRevisionRef}) |
OLD | NEW |