Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(359)

Side by Side Diff: appengine/monorail/features/autolink.py

Issue 1868553004: Open Source Monorail (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master
Patch Set: Rebase Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « appengine/monorail/features/activities.py ('k') | appengine/monorail/features/commands.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is govered by a BSD-style
3 # license that can be found in the LICENSE file or at
4 # https://developers.google.com/open-source/licenses/bsd
5
6 """Autolink helps auto-link references to artifacts in text.
7
8 This class maintains a registry of artifact autolink syntax specs and
9 callbacks. The structure of that registry is:
10 { component_name: (lookup_callback,
11 { regex: substitution_callback, ...}),
12 ...
13 }
14
15 For example:
16 { 'tracker':
17 (GetReferencedIssues,
18 ExtractProjectAndIssueIds,
19 {_ISSUE_REF_RE: ReplaceIssueRef}),
20 'versioncontrol':
21 (GetReferencedRevisions,
22 ExtractProjectAndRevNum,
23 {_GIT_HASH_RE: ReplaceRevisionRef}),
24 }
25
26 The dictionary of regexes is used here because, in the future, we
27 might add more regexes for each component rather than have one complex
28 regex per component.
29 """
30
31 import logging
32 import re
33 import urllib
34 import urlparse
35
36 import settings
37 from framework import template_helpers
38 from framework import validate
39 from proto import project_pb2
40 from tracker import tracker_helpers
41
42
43 _CLOSING_TAG_RE = re.compile('</[a-z0-9]+>$', re.IGNORECASE)
44
45 _LINKIFY_SCHEMES = r'(https?://|ftp://|mailto:)'
46 # Also count a start-tag '<' as a url delimeter, since the autolinker
47 # is sometimes run against html fragments.
48 _IS_A_LINK_RE = re.compile(r'(%s)([^\s<]+)' % _LINKIFY_SCHEMES, re.UNICODE)
49
50 # These are allowed in links, but if any of closing delimiters appear
51 # at the end of the link, and the opening one is not part of the link,
52 # then trim off the closing delimiters.
53 _LINK_TRAILING_CHARS = [
54 (None, ':'),
55 (None, '.'),
56 (None, ','),
57 ('<', '>'),
58 ('"', '"'),
59 ('(', ')'),
60 ('[', ']'),
61 ('{', '}'),
62 ]
63
64
65 def Linkify(_mr, autolink_regex_match,
66 _component_ref_artifacts):
67 """Examine a textual reference and replace it with a hyperlink or not.
68
69 This is a callback for use with the autolink feature.
70
71 Args:
72 _mr: common info parsed from the user HTTP request.
73 autolink_regex_match: regex match for the textual reference.
74 _component_ref_artifacts: unused value
75
76 Returns:
77 A list of TextRuns with tag=a for all matched ftp, http, https and mailto
78 links converted into HTML hyperlinks.
79 """
80 hyperlink = autolink_regex_match.group(0)
81
82 trailing = ''
83 for begin, end in _LINK_TRAILING_CHARS:
84 if hyperlink.endswith(end):
85 if not begin or hyperlink[:-len(end)].find(begin) == -1:
86 trailing = end + trailing
87 hyperlink = hyperlink[:-len(end)]
88
89 tag_match = _CLOSING_TAG_RE.search(hyperlink)
90 if tag_match:
91 trailing = hyperlink[tag_match.start(0):] + trailing
92 hyperlink = hyperlink[:tag_match.start(0)]
93
94 if (not validate.IsValidURL(hyperlink) and
95 not validate.IsValidEmail(hyperlink)):
96 return [template_helpers.TextRun(hyperlink)]
97
98 result = [template_helpers.TextRun(hyperlink, tag='a', href=hyperlink)]
99 if trailing:
100 result.append(template_helpers.TextRun(trailing))
101
102 return result
103
104
105 # Regular expression to detect git hashes.
106 # Used to auto-link to Git hashes on crrev.com when displaying issue details.
107 # Matches "rN", "r#N", and "revision N" when "rN" is not part of a larger word
108 # and N is a hexadecimal string of 40 chars.
109 _GIT_HASH_RE = re.compile(
110 r'\b(?P<prefix>r(evision\s+#?)?)?(?P<revnum>([a-f0-9]{40}))\b',
111 re.IGNORECASE | re.MULTILINE)
112
113 # This is for SVN revisions and Git commit posisitons.
114 _SVN_REF_RE = re.compile(
115 r'\b(?P<prefix>r(evision\s+#?)?)(?P<revnum>([0-9]{1,7}))\b',
116 re.IGNORECASE | re.MULTILINE)
117
118
119 def GetReferencedRevisions(_mr, _refs):
120 """Load the referenced revision objects."""
121 # For now we just autolink any revision hash without actually
122 # checking that such a revision exists,
123 # TODO(jrobbins): Hit crrev.com and check that the revision exists
124 # and show a rollover with revision info.
125 return None
126
127
128 def ExtractRevNums(_mr, autolink_regex_match):
129 """Return internal representation of a rev reference."""
130 ref = autolink_regex_match.group('revnum')
131 logging.debug('revision ref = %s', ref)
132 return [ref]
133
134
135 def ReplaceRevisionRef(
136 mr, autolink_regex_match, _component_ref_artifacts):
137 """Return HTML markup for an autolink reference."""
138 prefix = autolink_regex_match.group('prefix')
139 revnum = autolink_regex_match.group('revnum')
140 url = _GetRevisionURLFormat(mr.project).format(revnum=revnum)
141 content = revnum
142 if prefix:
143 content = '%s%s' % (prefix, revnum)
144 return [template_helpers.TextRun(content, tag='a', href=url)]
145
146
147 def _GetRevisionURLFormat(project):
148 # TODO(jrobbins): Expose a UI to customize it to point to whatever site
149 # hosts the source code. Also, site-wide default.
150 return (project.revision_url_format or settings.revision_url_format)
151
152
153 # Regular expression to detect issue references.
154 # Used to auto-link to other issues when displaying issue details.
155 # Matches "issue " when "issue" is not part of a larger word, or
156 # "issue #", or just a "#" when it is preceeded by a space.
157 _ISSUE_REF_RE = re.compile(r"""
158 (?P<prefix>\b(issues?|bugs?)[ \t]*(:|=)?)
159 ([ \t]*(?P<project_name>\b[-a-z0-9]+[:\#])?
160 (?P<number_sign>\#?)
161 (?P<local_id>\d+)\b
162 (,?[ \t]*(and|or)?)?)+""", re.IGNORECASE | re.VERBOSE)
163
164 _SINGLE_ISSUE_REF_RE = re.compile(r"""
165 (?P<prefix>\b(issue|bug)[ \t]*)?
166 (?P<project_name>\b[-a-z0-9]+[:\#])?
167 (?P<number_sign>\#?)
168 (?P<local_id>\d+)\b""", re.IGNORECASE | re.VERBOSE)
169
170
171 def CurryGetReferencedIssues(services):
172 """Return a function to get ref'd issues with these persist objects bound.
173
174 Currying is a convienent way to give the callback access to the persist
175 objects, but without requiring that all possible persist objects be passed
176 through the autolink registry and functions.
177
178 Args:
179 services: connection to issue, config, and project persistence layers.
180
181 Returns:
182 A ready-to-use function that accepts the arguments that autolink
183 expects to pass to it.
184 """
185
186 def GetReferencedIssues(mr, ref_tuples):
187 """Return lists of open and closed issues referenced by these comments.
188
189 Args:
190 mr: commonly used info parsed from the request.
191 ref_tuples: list of (project_name, local_id) tuples for each issue
192 that is mentioned in the comment text. The project_name may be None,
193 in which case the issue is assumed to be in the current project.
194
195 Returns:
196 A list of open and closed issue dicts.
197 """
198 ref_projects = services.project.GetProjectsByName(
199 mr.cnxn,
200 [(ref_pn or mr.project_name) for ref_pn, _ in ref_tuples])
201 issue_ids = services.issue.ResolveIssueRefs(
202 mr.cnxn, ref_projects, mr.project_name, ref_tuples)
203 open_issues, closed_issues = (
204 tracker_helpers.GetAllowedOpenedAndClosedIssues(
205 mr, issue_ids, services))
206
207 open_dict = {}
208 for issue in open_issues:
209 open_dict[_IssueProjectKey(issue.project_name, issue.local_id)] = issue
210
211 closed_dict = {}
212 for issue in closed_issues:
213 closed_dict[_IssueProjectKey(issue.project_name, issue.local_id)] = issue
214
215 logging.info('autolinking dicts %r and %r', open_dict, closed_dict)
216
217 return open_dict, closed_dict
218
219 return GetReferencedIssues
220
221
222 def _ParseProjectNameMatch(project_name):
223 """Process the passed project name and determine the best representation.
224
225 Args:
226 project_name: a string with the project name matched in a regex
227
228 Returns:
229 A minimal representation of the project name, None if no valid content.
230 """
231 if not project_name:
232 return None
233 return project_name.lstrip().rstrip('#: \t\n')
234
235
236 def ExtractProjectAndIssueIds(_mr, autolink_regex_match):
237 """Convert a regex match for a textual reference into our internal form."""
238 whole_str = autolink_regex_match.group(0)
239 refs = []
240 for submatch in _SINGLE_ISSUE_REF_RE.finditer(whole_str):
241 ref = (_ParseProjectNameMatch(submatch.group('project_name')),
242 int(submatch.group('local_id')))
243 refs.append(ref)
244 logging.info('issue ref = %s', ref)
245
246 return refs
247
248
249 # This uses project name to avoid a lookup on project ID in a function
250 # that has no services object.
251 def _IssueProjectKey(project_name, local_id):
252 """Make a dictionary key to identify a referenced issue."""
253 return '%s:%d' % (project_name, local_id)
254
255
256 class IssueRefRun(object):
257 """A text run that links to a referenced issue."""
258
259 def __init__(self, issue, is_closed, project_name, prefix):
260 self.tag = 'a'
261 self.css_class = 'closed_ref' if is_closed else None
262 self.title = issue.summary
263 self.href = '/p/%s/issues/detail?id=%d' % (project_name, issue.local_id)
264
265 self.content = '%s%d' % (prefix, issue.local_id)
266 if is_closed and not prefix:
267 self.content = ' %s ' % self.content
268
269
270 def ReplaceIssueRef(mr, autolink_regex_match, component_ref_artifacts):
271 """Examine a textual reference and replace it with an autolink or not.
272
273 Args:
274 mr: commonly used info parsed from the request
275 autolink_regex_match: regex match for the textual reference.
276 component_ref_artifacts: result of earlier call to GetReferencedIssues.
277
278 Returns:
279 A list of IssueRefRuns and TextRuns to replace the textual
280 reference. If there is an issue to autolink to, we return an HTML
281 hyperlink. Otherwise, we the run will have the original plain
282 text.
283 """
284 open_dict, closed_dict = component_ref_artifacts
285 original = autolink_regex_match.group(0)
286 logging.info('called ReplaceIssueRef on %r', original)
287 result_runs = []
288 pos = 0
289 for submatch in _SINGLE_ISSUE_REF_RE.finditer(original):
290 if submatch.start() >= pos:
291 if original[pos: submatch.start()]:
292 result_runs.append(template_helpers.TextRun(
293 original[pos: submatch.start()]))
294 replacement_run = _ReplaceSingleIssueRef(
295 mr, submatch, open_dict, closed_dict)
296 result_runs.append(replacement_run)
297 pos = submatch.end()
298
299 if original[pos:]:
300 result_runs.append(template_helpers.TextRun(original[pos:]))
301
302 return result_runs
303
304
305 def _ReplaceSingleIssueRef(mr, submatch, open_dict, closed_dict):
306 """Replace one issue reference with a link, or the original text."""
307 prefix = submatch.group('prefix') or ''
308 project_name = submatch.group('project_name')
309 if project_name:
310 prefix += project_name
311 project_name = project_name.lstrip().rstrip(':#')
312 else:
313 # We need project_name for the URL, even if it is not in the text.
314 project_name = mr.project_name
315
316 number_sign = submatch.group('number_sign')
317 if number_sign:
318 prefix += number_sign
319 local_id = int(submatch.group('local_id'))
320 issue_key = _IssueProjectKey(project_name or mr.project_name, local_id)
321
322 if issue_key in open_dict:
323 return IssueRefRun(open_dict[issue_key], False, project_name, prefix)
324 elif issue_key in closed_dict:
325 return IssueRefRun(closed_dict[issue_key], True, project_name, prefix)
326 else: # Don't link to non-existent issues.
327 return template_helpers.TextRun('%s%d' % (prefix, local_id))
328
329
330 class Autolink(object):
331 """Maintains a registry of autolink syntax and can apply it to comments."""
332
333 def __init__(self):
334 self.registry = {}
335
336 def RegisterComponent(self, component_name, artifact_lookup_function,
337 match_to_reference_function, autolink_re_subst_dict):
338 """Register all the autolink info for a software component.
339
340 Args:
341 component_name: string name of software component, must be unique.
342 artifact_lookup_function: function to batch lookup all artifacts that
343 might have been referenced in a set of comments:
344 function(all_matches) -> referenced_artifacts
345 the referenced_artifacts will be pased to each subst function.
346 match_to_reference_function: convert a regex match object to
347 some internal representation of the artifact reference.
348 autolink_re_subst_dict: dictionary of regular expressions and
349 the substitution function that should be called for each match:
350 function(match, referenced_artifacts) -> replacement_markup
351 """
352 self.registry[component_name] = (artifact_lookup_function,
353 match_to_reference_function,
354 autolink_re_subst_dict)
355
356 def GetAllReferencedArtifacts(self, mr, comment_text_list):
357 """Call callbacks to lookup all artifacts possibly referenced.
358
359 Args:
360 mr: information parsed out of the user HTTP request.
361 comment_text_list: list of comment content strings.
362
363 Returns:
364 Opaque object that can be pased to MarkupAutolinks. It's
365 structure happens to be {component_name: artifact_list, ...}.
366 """
367 all_referenced_artifacts = {}
368 for comp, (lookup, match_to_refs, re_dict) in self.registry.iteritems():
369 refs = set()
370 for comment_text in comment_text_list:
371 for regex in re_dict:
372 for match in regex.finditer(comment_text):
373 additional_refs = match_to_refs(mr, match)
374 if additional_refs:
375 refs.update(additional_refs)
376
377 all_referenced_artifacts[comp] = lookup(mr, refs)
378
379 return all_referenced_artifacts
380
381 def MarkupAutolinks(self, mr, text_runs, all_referenced_artifacts):
382 """Loop over components and regexes, applying all substitutions.
383
384 Args:
385 mr: info parsed from the user's HTTP request.
386 text_runs: List of text runs for the user's comment.
387 all_referenced_artifacts: result of previous call to
388 GetAllReferencedArtifacts.
389
390 Returns:
391 List of text runs for the entire user comment, some of which may have
392 attribures that cause them to render as links in render-rich-text.ezt.
393 """
394 items = self.registry.items()
395 items.sort() # Process components in determinate alphabetical order.
396 for component, (_lookup, _match_ref, re_subst_dict) in items:
397 component_ref_artifacts = all_referenced_artifacts[component]
398 for regex, subst_fun in re_subst_dict.iteritems():
399 text_runs = self._ApplySubstFunctionToRuns(
400 text_runs, regex, subst_fun, mr, component_ref_artifacts)
401
402 return text_runs
403
404 def _ApplySubstFunctionToRuns(
405 self, text_runs, regex, subst_fun, mr, component_ref_artifacts):
406 """Apply autolink regex and substitution function to each text run.
407
408 Args:
409 text_runs: list of TextRun objects with parts of the original comment.
410 regex: Regular expression for detecting textual references to artifacts.
411 subst_fun: function to return autolink markup, or original text.
412 mr: common info parsed from the user HTTP request.
413 component_ref_artifacts: already-looked-up destination artifacts to use
414 when computing substitution text.
415
416 Returns:
417 A new list with more and smaller runs, some of which may have tag
418 and link attributes set.
419 """
420 result_runs = []
421 for run in text_runs:
422 content = run.content
423 if run.tag:
424 # This chunk has already been substituted, don't allow nested
425 # autolinking to mess up our output.
426 result_runs.append(run)
427 else:
428 pos = 0
429 for match in regex.finditer(content):
430 if match.start() > pos:
431 result_runs.append(template_helpers.TextRun(
432 content[pos: match.start()]))
433 replacement_runs = subst_fun(mr, match, component_ref_artifacts)
434 result_runs.extend(replacement_runs)
435 pos = match.end()
436
437 if run.content[pos:]: # Keep any text that came after the last match
438 result_runs.append(template_helpers.TextRun(run.content[pos:]))
439
440 # TODO(jrobbins): ideally we would merge consecutive plain text runs
441 # so that regexes can match across those run boundaries.
442
443 return result_runs
444
445
446 def RegisterAutolink(services):
447 """Register all the autolink hooks."""
448 services.autolink.RegisterComponent(
449 '01-linkify',
450 lambda request, mr: None,
451 lambda mr, match: None,
452 {_IS_A_LINK_RE: Linkify})
453
454 services.autolink.RegisterComponent(
455 '02-tracker',
456 CurryGetReferencedIssues(services),
457 ExtractProjectAndIssueIds,
458 {_ISSUE_REF_RE: ReplaceIssueRef})
459
460 services.autolink.RegisterComponent(
461 '03-versioncontrol',
462 GetReferencedRevisions,
463 ExtractRevNums,
464 {_GIT_HASH_RE: ReplaceRevisionRef,
465 _SVN_REF_RE: ReplaceRevisionRef})
OLDNEW
« no previous file with comments | « appengine/monorail/features/activities.py ('k') | appengine/monorail/features/commands.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698