Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(358)

Side by Side Diff: appengine/monorail/framework/emailfmt.py

Issue 1868553004: Open Source Monorail (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master
Patch Set: Rebase Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is govered by a BSD-style
3 # license that can be found in the LICENSE file or at
4 # https://developers.google.com/open-source/licenses/bsd
5
6 """Functions that format or parse email messages in Monorail.
7
8 Specifically, this module has the logic for generating various email
9 header lines that help match inbound and outbound email to the project
10 and artifact that generated it.
11 """
12
13 import hmac
14 import logging
15 import re
16 import rfc822
17
18 from google.appengine.api import app_identity
19
20 import settings
21 from framework import framework_constants
22 from services import client_config_svc
23 from services import secrets_svc
24
25 # TODO(jrobbins): Parsing very large messages is slow, and we are not going
26 # to handle attachments at first, so there is no reason to consider large
27 # emails.
28 MAX_BODY_SIZE = 100 * 1024
29 MAX_HEADER_CHARS_CONSIDERED = 255
30
31
32
33 def IsBodyTooBigToParse(body):
34 """Return True if the email message body is too big to process."""
35 return len(body) > MAX_BODY_SIZE
36
37
38 def IsProjectAddressOnToLine(project_addr, to_addrs):
39 """Return True if an email was explicitly sent directly to us."""
40 return project_addr in to_addrs
41
42
43 def ParseEmailMessage(msg):
44 """Parse the given MessageRouterMessage and return relevant fields.
45
46 Args:
47 msg: email.message.Message object for the email message sent to us.
48
49 Returns:
50 A tuple: from_addr, to_addrs, cc_addrs, references, subject, body.
51 """
52 # Ignore messages that are probably not from humans, see:
53 # http://google.com/search?q=precedence+bulk+junk
54 precedence = msg.get('precedence', '')
55 if precedence.lower() in ['bulk', 'junk']:
56 logging.info('Precedence: %r indicates an autoresponder', precedence)
57 return '', [], [], '', '', ''
58
59 from_addrs = _ExtractAddrs(msg.get('from', ''))
60 if from_addrs:
61 from_addr = from_addrs[0]
62 else:
63 from_addr = ''
64
65 to_addrs = _ExtractAddrs(msg.get('to', ''))
66 cc_addrs = _ExtractAddrs(msg.get('cc', ''))
67
68 in_reply_to = msg.get('in-reply-to', '')
69 references = msg.get('references', '').split()
70 references = list({ref for ref in [in_reply_to] + references if ref})
71 subject = _StripSubjectPrefixes(msg.get('subject', ''))
72
73 body = ''
74 for part in msg.walk():
75 # We only process plain text emails.
76 if part.get_content_type() == 'text/plain':
77 body = part.get_payload(decode=True)
78 break # Only consider the first text part.
79
80 return from_addr, to_addrs, cc_addrs, references, subject, body
81
82
83 def _ExtractAddrs(header_value):
84 """Given a message header value, return email address found there."""
85 friendly_addr_pairs = list(rfc822.AddressList(header_value))
86 return [addr for _friendly, addr in friendly_addr_pairs]
87
88
89 def _StripSubjectPrefixes(subject):
90 """Strip off any 'Re:', 'Fwd:', etc. subject line prefixes."""
91 prefix = _FindSubjectPrefix(subject)
92 while prefix:
93 subject = subject[len(prefix):].strip()
94 prefix = _FindSubjectPrefix(subject)
95
96 return subject
97
98
99 def _FindSubjectPrefix(subject):
100 """If the given subject starts with a prefix, return that prefix."""
101 for prefix in ['re:', 'aw:', 'fwd:', 'fw:']:
102 if subject.lower().startswith(prefix):
103 return prefix
104
105 return None
106
107
108 def MailDomain():
109 """Return the domain name where this app can recieve email."""
110 if settings.unit_test_mode:
111 return 'testbed-test.appspotmail.com'
112
113 # If running on a GAFYD domain, you must define an app alias on the
114 # Application Settings admin web page. If you cannot reserve the matching
115 # APP_ID for the alias, then specify it in settings.mail_domain.
116 if settings.mail_domain:
117 return settings.mail_domain
118
119 app_id = app_identity.get_application_id()
120 if ':' in app_id:
121 app_id = app_id.split(':')[-1]
122
123 return '%s.appspotmail.com' % app_id
124
125
126 def FormatFriendly(commenter_view, sender, reveal_addr):
127 """Format the From: line to include the commenter's friendly name if given."""
128 if commenter_view:
129 site_name = settings.site_name
130 if commenter_view.email in client_config_svc.GetServiceAccountMap():
131 friendly = commenter_view.display_name
132 elif reveal_addr:
133 friendly = commenter_view.email
134 else:
135 friendly = commenter_view.display_name
136 return '%s via %s <%s>' % (friendly, site_name, sender)
137 else:
138 return sender
139
140
141 def NoReplyAddress(commenter_view=None, reveal_addr=False):
142 """Return an address that ignores all messages sent to it."""
143 # Note: We use "no_reply" with an underscore to avoid potential conflict
144 # with any project name. Project names cannot have underscores.
145 sender = 'no_reply@%s' % MailDomain()
146 return FormatFriendly(commenter_view, sender, reveal_addr)
147
148
149 def FormatFromAddr(_project, commenter_view=None, reveal_addr=False,
150 can_reply_to=True):
151 """Return a string to be used on the email From: line.
152
153 Args:
154 project: Project PB for the project that the email is sent from.
155 commenter_view: Optional UserView of the user who made a comment. We use
156 the user's (potentially obscured) email address as their friendly name.
157 reveal_addr: Optional bool. If False then the address is obscured.
158 can_reply_to: Optional bool. If True then settings.send_email_as is used,
159 otherwise settings.send_noreply_email_as is used.
160
161 Returns:
162 A string that should be used in the From: line of outbound email
163 notifications for the given project.
164 """
165 addr = (settings.send_email_as if can_reply_to
166 else settings.send_noreply_email_as)
167 return FormatFriendly(commenter_view, addr, reveal_addr)
168
169
170 def NormalizeHeader(s):
171 """Make our message-ids robust against mail client spacing and truncation."""
172 words = _StripSubjectPrefixes(s).split() # Split on any runs of whitespace.
173 normalized = ' '.join(words)
174 truncated = normalized[:MAX_HEADER_CHARS_CONSIDERED]
175 return truncated
176
177
178 def MakeMessageID(to_addr, subject, from_addr):
179 """Make a unique (but deterministic) email Message-Id: value."""
180 normalized_subject = NormalizeHeader(subject)
181 if isinstance(normalized_subject, unicode):
182 normalized_subject = normalized_subject.encode('utf-8')
183 mail_hmac_key = secrets_svc.GetEmailKey()
184 return '<0=%s=%s=%s@%s>' % (
185 hmac.new(mail_hmac_key, to_addr).hexdigest(),
186 hmac.new(mail_hmac_key, normalized_subject).hexdigest(),
187 from_addr.split('@')[0],
188 MailDomain())
189
190
191 def GetReferences(to_addr, subject, seq_num, project_from_addr):
192 """Make a References: header to make this message thread properly.
193
194 Args:
195 to_addr: address that email message will be sent to.
196 subject: subject line of email message.
197 seq_num: sequence number of message in thread, e.g., 0, 1, 2, ...,
198 or None if the message is not part of a thread.
199 project_from_addr: address that the message will be sent from.
200
201 Returns:
202 A string Message-ID that does not correspond to any actual email
203 message that was ever sent, but it does serve to unite all the
204 messages that belong togther in a thread.
205 """
206 if seq_num is not None:
207 return MakeMessageID(to_addr, subject, project_from_addr)
208 else:
209 return ''
210
211
212 def ValidateReferencesHeader(message_ref, project, from_addr, subject):
213 """Check that the References header is one that we could have sent.
214
215 Args:
216 message_ref: one of the References header values from the inbound email.
217 project: Project PB for the affected project.
218 from_addr: string email address that inbound email was sent from.
219 subject: string base subject line of inbound email.
220
221 Returns:
222 True if it looks like this is a reply to a message that we sent
223 to the same address that replied. Otherwise, False.
224 """
225 sender = '%s@%s' % (project.project_name, MailDomain())
226 expected_ref = MakeMessageID(from_addr, subject, sender)
227
228 # TODO(jrobbins): project option to not check from_addr.
229 # TODO(jrobbins): project inbound auth token.
230 return expected_ref == message_ref
231
232
233 PROJECT_EMAIL_RE = re.compile(
234 r'(?P<project>[-a-z0-9]+)'
235 r'@(?P<domain>[-a-z0-9.]+)')
236
237 ISSUE_CHANGE_SUMMARY_RE = re.compile(
238 r'Issue (?P<local_id>[0-9]+) in '
239 r'(?P<project>[-a-z0-9]+): '
240 r'(?P<summary>.+)')
241
242
243 def IdentifyProjectAndIssue(project_addr, subject):
244 """Parse the domain name, project name, and artifact id from a reply.
245
246 Args:
247 project_addr: string email address that the email was delivered to,
248 it must match the Reply-To: header sent in the notification message.
249 subject: string email subject line received, it must match the one
250 sent. Leading prefixes like "Re:" should already have been stripped.
251
252 Returns:
253 A 2-tuple: (project_name, local_id). If either or both are
254 None, they could not be determined.
255 """
256 # Ignore any inbound email sent to a "no_reply@" address.
257 if project_addr.startswith('no_reply@'):
258 return None, None
259
260 project_name = None
261
262 m = PROJECT_EMAIL_RE.match(project_addr.lower())
263 if m:
264 project_name = m.group('project')
265
266 issue_project_name, local_id_str = _MatchSubject(subject)
267
268 if project_name != issue_project_name:
269 # Something is wrong with the project name.
270 project_name = None
271
272 logging.info('project_name = %r', project_name)
273 logging.info('local_id_str = %r', local_id_str)
274
275 try:
276 local_id = int(local_id_str)
277 except ValueError:
278 local_id = None
279
280 return project_name, local_id
281
282
283 def _MatchSubject(subject):
284 """Parse the project, artifact type, and artifact id from a subject line."""
285 m = ISSUE_CHANGE_SUMMARY_RE.match(subject)
286 if m:
287 return m.group('project'), m.group('local_id')
288
289 return None, None
290
291
292 # TODO(jrobbins): For now, we strip out lines that look like quoted
293 # text and then will give the user the option to see the whole email.
294 # For 2.0 of this feature, we should change the Comment PB to have
295 # runs of text with different properties so that the UI can present
296 # "- Show quoted text -" and expand it in-line.
297
298 # TODO(jrobbins): For now, we look for lines that indicate quoted
299 # text (e.g., they start with ">"). But, we should also collapse
300 # multiple lines that are identical to other lines in previous
301 # non-deleted comments on the same issue, regardless of quote markers.
302
303
304 # We cut off the message if we see something that looks like a signature and
305 # it is near the bottom of the message.
306 SIGNATURE_BOUNDARY_RE = re.compile(
307 r'^(([-_=]+ ?)+|'
308 r'cheers|(best |warm |kind )?regards|thx|thanks|thank you|'
309 r'Sent from my i?Phone|Sent from my iPod)'
310 r',? *$', re.I)
311
312 MAX_SIGNATURE_LINES = 8
313
314 FORWARD_OR_EXPLICIT_SIG_PATS = [
315 r'[^0-9a-z]+(forwarded|original) message[^0-9a-z]+\s*$',
316 r'Updates:\s*$',
317 r'Comment #\d+ on issue \d+ by \S+:',
318 # If we see this anywhere in the message, treat the rest as a signature.
319 r'--\s*$',
320 ]
321 FORWARD_OR_EXPLICIT_SIG_PATS_AND_REST_RE = re.compile(
322 r'^(%s)(.|\n)*' % '|'.join(FORWARD_OR_EXPLICIT_SIG_PATS),
323 flags=re.MULTILINE | re.IGNORECASE)
324
325 # This handles gmail well, and it's pretty broad without seeming like
326 # it would cause false positives.
327 QUOTE_PATS = [
328 r'^On .*\s+<\s*\S+?@[-a-z0-9.]+>\s*wrote:\s*$',
329 r'^On .* \S+?@[-a-z0-9.]+\s*wrote:\s*$',
330 r'^\S+?@[-a-z0-9.]+ \(\S+?@[-a-z0-9.]+\)\s*wrote:\s*$',
331 r'\S+?@[-a-z0-9]+.appspotmail.com\s.*wrote:\s*$',
332 r'\S+?@[-a-z0-9]+.appspotmail.com\s+.*a\s+\xc3\xa9crit\s*:\s*$',
333 r'^\d+/\d+/\d+ +<\S+@[-a-z0-9.]+>:?\s*$',
334 r'^>.*$',
335 ]
336 QUOTED_BLOCKS_RE = re.compile(
337 r'(^\s*\n)*((%s)\n?)+(^\s*\n)*' % '|'.join(QUOTE_PATS),
338 flags=re.MULTILINE | re.IGNORECASE)
339
340
341 def StripQuotedText(description):
342 """Strip all quoted text lines out of the given comment text."""
343 # If the rest of message is forwared text, we're done.
344 description = FORWARD_OR_EXPLICIT_SIG_PATS_AND_REST_RE.sub('', description)
345 # Replace each quoted block of lines and surrounding blank lines with at
346 # most one blank line.
347 description = QUOTED_BLOCKS_RE.sub('\n', description)
348
349 new_lines = description.strip().split('\n')
350 # Make another pass over the last few lines to strip out signatures.
351 sig_zone_start = max(0, len(new_lines) - MAX_SIGNATURE_LINES)
352 for idx in range(sig_zone_start, len(new_lines)):
353 line = new_lines[idx]
354 if SIGNATURE_BOUNDARY_RE.match(line):
355 # We found the likely start of a signature, just keep the lines above it.
356 new_lines = new_lines[:idx]
357 break
358
359 return '\n'.join(new_lines).strip()
OLDNEW
« no previous file with comments | « appengine/monorail/framework/csp_report.py ('k') | appengine/monorail/framework/excessiveactivity.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698