appengine/monorail/framework/emailfmt.py - Issue 1868553004: Open Source Monorail

Side by Side Diff: appengine/monorail/framework/emailfmt.py

Issue 1868553004: Open Source Monorail (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master

Patch Set: Rebase Created 4 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
(Empty)
	1 # Copyright 2016 The Chromium Authors. All rights reserved.

	2 # Use of this source code is govered by a BSD-style

	3 # license that can be found in the LICENSE file or at

	4 # https://developers.google.com/open-source/licenses/bsd

	5

	6 """Functions that format or parse email messages in Monorail.

	7

	8 Specifically, this module has the logic for generating various email

	9 header lines that help match inbound and outbound email to the project

	10 and artifact that generated it.

	11 """

	12

	13 import hmac

	14 import logging

	15 import re

	16 import rfc822

	17

	18 from google.appengine.api import app_identity

	19

	20 import settings

	21 from framework import framework_constants

	22 from services import client_config_svc

	23 from services import secrets_svc

	24

	25 # TODO(jrobbins): Parsing very large messages is slow, and we are not going

	26 # to handle attachments at first, so there is no reason to consider large

	27 # emails.

	28 MAX_BODY_SIZE = 100 * 1024

	29 MAX_HEADER_CHARS_CONSIDERED = 255

	30

	31

	32

	33 def IsBodyTooBigToParse(body):

	34 """Return True if the email message body is too big to process."""

	35 return len(body) > MAX_BODY_SIZE

	36

	37

	38 def IsProjectAddressOnToLine(project_addr, to_addrs):

	39 """Return True if an email was explicitly sent directly to us."""

	40 return project_addr in to_addrs

	41

	42

	43 def ParseEmailMessage(msg):

	44 """Parse the given MessageRouterMessage and return relevant fields.

	45

	46 Args:

	47 msg: email.message.Message object for the email message sent to us.

	48

	49 Returns:

	50 A tuple: from_addr, to_addrs, cc_addrs, references, subject, body.

	51 """

	52 # Ignore messages that are probably not from humans, see:

	53 # http://google.com/search?q=precedence+bulk+junk

	54 precedence = msg.get('precedence', '')

	55 if precedence.lower() in ['bulk', 'junk']:

	56 logging.info('Precedence: %r indicates an autoresponder', precedence)

	57 return '', [], [], '', '', ''

	58

	59 from_addrs = _ExtractAddrs(msg.get('from', ''))

	60 if from_addrs:

	61 from_addr = from_addrs[0]

	62 else:

	63 from_addr = ''

	64

	65 to_addrs = _ExtractAddrs(msg.get('to', ''))

	66 cc_addrs = _ExtractAddrs(msg.get('cc', ''))

	67

	68 in_reply_to = msg.get('in-reply-to', '')

	69 references = msg.get('references', '').split()

	70 references = list({ref for ref in [in_reply_to] + references if ref})

	71 subject = _StripSubjectPrefixes(msg.get('subject', ''))

	72

	73 body = ''

	74 for part in msg.walk():

	75 # We only process plain text emails.

	76 if part.get_content_type() == 'text/plain':

	77 body = part.get_payload(decode=True)

	78 break # Only consider the first text part.

	79

	80 return from_addr, to_addrs, cc_addrs, references, subject, body

	81

	82

	83 def _ExtractAddrs(header_value):

	84 """Given a message header value, return email address found there."""

	85 friendly_addr_pairs = list(rfc822.AddressList(header_value))

	86 return [addr for _friendly, addr in friendly_addr_pairs]

	87

	88

	89 def _StripSubjectPrefixes(subject):

	90 """Strip off any 'Re:', 'Fwd:', etc. subject line prefixes."""

	91 prefix = _FindSubjectPrefix(subject)

	92 while prefix:

	93 subject = subject[len(prefix):].strip()

	94 prefix = _FindSubjectPrefix(subject)

	95

	96 return subject

	97

	98

	99 def _FindSubjectPrefix(subject):

	100 """If the given subject starts with a prefix, return that prefix."""

	101 for prefix in ['re:', 'aw:', 'fwd:', 'fw:']:

	102 if subject.lower().startswith(prefix):

	103 return prefix

	104

	105 return None

	106

	107

	108 def MailDomain():

	109 """Return the domain name where this app can recieve email."""

	110 if settings.unit_test_mode:

	111 return 'testbed-test.appspotmail.com'

	112

	113 # If running on a GAFYD domain, you must define an app alias on the

	114 # Application Settings admin web page. If you cannot reserve the matching

	115 # APP_ID for the alias, then specify it in settings.mail_domain.

	116 if settings.mail_domain:

	117 return settings.mail_domain

	118

	119 app_id = app_identity.get_application_id()

	120 if ':' in app_id:

	121 app_id = app_id.split(':')[-1]

	122

	123 return '%s.appspotmail.com' % app_id

	124

	125

	126 def FormatFriendly(commenter_view, sender, reveal_addr):

	127 """Format the From: line to include the commenter's friendly name if given."""

	128 if commenter_view:

	129 site_name = settings.site_name

	130 if commenter_view.email in client_config_svc.GetServiceAccountMap():

	131 friendly = commenter_view.display_name

	132 elif reveal_addr:

	133 friendly = commenter_view.email

	134 else:

	135 friendly = commenter_view.display_name

	136 return '%s via %s <%s>' % (friendly, site_name, sender)

	137 else:

	138 return sender

	139

	140

	141 def NoReplyAddress(commenter_view=None, reveal_addr=False):

	142 """Return an address that ignores all messages sent to it."""

	143 # Note: We use "no_reply" with an underscore to avoid potential conflict

	144 # with any project name. Project names cannot have underscores.

	145 sender = 'no_reply@%s' % MailDomain()

	146 return FormatFriendly(commenter_view, sender, reveal_addr)

	147

	148

	149 def FormatFromAddr(_project, commenter_view=None, reveal_addr=False,

	150 can_reply_to=True):

	151 """Return a string to be used on the email From: line.

	152

	153 Args:

	154 project: Project PB for the project that the email is sent from.

	155 commenter_view: Optional UserView of the user who made a comment. We use

	156 the user's (potentially obscured) email address as their friendly name.

	157 reveal_addr: Optional bool. If False then the address is obscured.

	158 can_reply_to: Optional bool. If True then settings.send_email_as is used,

	159 otherwise settings.send_noreply_email_as is used.

	160

	161 Returns:

	162 A string that should be used in the From: line of outbound email

	163 notifications for the given project.

	164 """

	165 addr = (settings.send_email_as if can_reply_to

	166 else settings.send_noreply_email_as)

	167 return FormatFriendly(commenter_view, addr, reveal_addr)

	168

	169

	170 def NormalizeHeader(s):

	171 """Make our message-ids robust against mail client spacing and truncation."""

	172 words = _StripSubjectPrefixes(s).split() # Split on any runs of whitespace.

	173 normalized = ' '.join(words)

	174 truncated = normalized[:MAX_HEADER_CHARS_CONSIDERED]

	175 return truncated

	176

	177

	178 def MakeMessageID(to_addr, subject, from_addr):

	179 """Make a unique (but deterministic) email Message-Id: value."""

	180 normalized_subject = NormalizeHeader(subject)

	181 if isinstance(normalized_subject, unicode):

	182 normalized_subject = normalized_subject.encode('utf-8')

	183 mail_hmac_key = secrets_svc.GetEmailKey()

	184 return '<0=%s=%s=%s@%s>' % (

	185 hmac.new(mail_hmac_key, to_addr).hexdigest(),

	186 hmac.new(mail_hmac_key, normalized_subject).hexdigest(),

	187 from_addr.split('@')[0],

	188 MailDomain())

	189

	190

	191 def GetReferences(to_addr, subject, seq_num, project_from_addr):

	192 """Make a References: header to make this message thread properly.

	193

	194 Args:

	195 to_addr: address that email message will be sent to.

	196 subject: subject line of email message.

	197 seq_num: sequence number of message in thread, e.g., 0, 1, 2, ...,

	198 or None if the message is not part of a thread.

	199 project_from_addr: address that the message will be sent from.

	200

	201 Returns:

	202 A string Message-ID that does not correspond to any actual email

	203 message that was ever sent, but it does serve to unite all the

	204 messages that belong togther in a thread.

	205 """

	206 if seq_num is not None:

	207 return MakeMessageID(to_addr, subject, project_from_addr)

	208 else:

	209 return ''

	210

	211

	212 def ValidateReferencesHeader(message_ref, project, from_addr, subject):

	213 """Check that the References header is one that we could have sent.

	214

	215 Args:

	216 message_ref: one of the References header values from the inbound email.

	217 project: Project PB for the affected project.

	218 from_addr: string email address that inbound email was sent from.

	219 subject: string base subject line of inbound email.

	220

	221 Returns:

	222 True if it looks like this is a reply to a message that we sent

	223 to the same address that replied. Otherwise, False.

	224 """

	225 sender = '%s@%s' % (project.project_name, MailDomain())

	226 expected_ref = MakeMessageID(from_addr, subject, sender)

	227

	228 # TODO(jrobbins): project option to not check from_addr.

	229 # TODO(jrobbins): project inbound auth token.

	230 return expected_ref == message_ref

	231

	232

	233 PROJECT_EMAIL_RE = re.compile(

	234 r'(?P<project>[-a-z0-9]+)'

	235 r'@(?P<domain>[-a-z0-9.]+)')

	236

	237 ISSUE_CHANGE_SUMMARY_RE = re.compile(

	238 r'Issue (?P<local_id>[0-9]+) in '

	239 r'(?P<project>[-a-z0-9]+): '

	240 r'(?P<summary>.+)')

	241

	242

	243 def IdentifyProjectAndIssue(project_addr, subject):

	244 """Parse the domain name, project name, and artifact id from a reply.

	245

	246 Args:

	247 project_addr: string email address that the email was delivered to,

	248 it must match the Reply-To: header sent in the notification message.

	249 subject: string email subject line received, it must match the one

	250 sent. Leading prefixes like "Re:" should already have been stripped.

	251

	252 Returns:

	253 A 2-tuple: (project_name, local_id). If either or both are

	254 None, they could not be determined.

	255 """

	256 # Ignore any inbound email sent to a "no_reply@" address.

	257 if project_addr.startswith('no_reply@'):

	258 return None, None

	259

	260 project_name = None

	261

	262 m = PROJECT_EMAIL_RE.match(project_addr.lower())

	263 if m:

	264 project_name = m.group('project')

	265

	266 issue_project_name, local_id_str = _MatchSubject(subject)

	267

	268 if project_name != issue_project_name:

	269 # Something is wrong with the project name.

	270 project_name = None

	271

	272 logging.info('project_name = %r', project_name)

	273 logging.info('local_id_str = %r', local_id_str)

	274

	275 try:

	276 local_id = int(local_id_str)

	277 except ValueError:

	278 local_id = None

	279

	280 return project_name, local_id

	281

	282

	283 def _MatchSubject(subject):

	284 """Parse the project, artifact type, and artifact id from a subject line."""

	285 m = ISSUE_CHANGE_SUMMARY_RE.match(subject)

	286 if m:

	287 return m.group('project'), m.group('local_id')

	288

	289 return None, None

	290

	291

	292 # TODO(jrobbins): For now, we strip out lines that look like quoted

	293 # text and then will give the user the option to see the whole email.

	294 # For 2.0 of this feature, we should change the Comment PB to have

	295 # runs of text with different properties so that the UI can present

	296 # "- Show quoted text -" and expand it in-line.

	297

	298 # TODO(jrobbins): For now, we look for lines that indicate quoted

	299 # text (e.g., they start with ">"). But, we should also collapse

	300 # multiple lines that are identical to other lines in previous

	301 # non-deleted comments on the same issue, regardless of quote markers.

	302

	303

	304 # We cut off the message if we see something that looks like a signature and

	305 # it is near the bottom of the message.

	306 SIGNATURE_BOUNDARY_RE = re.compile(

	307 r'^(([-_=]+ ?)+\|'

	308 r'cheers\|(best \|warm \|kind )?regards\|thx\|thanks\|thank you\|'

	309 r'Sent from my i?Phone\|Sent from my iPod)'

	310 r',? *$', re.I)

	311

	312 MAX_SIGNATURE_LINES = 8

	313

	314 FORWARD_OR_EXPLICIT_SIG_PATS = [

	315 r'[^0-9a-z]+(forwarded\|original) message[^0-9a-z]+\s*$',

	316 r'Updates:\s*$',

	317 r'Comment #\d+ on issue \d+ by \S+:',

	318 # If we see this anywhere in the message, treat the rest as a signature.

	319 r'--\s*$',

	320 ]

	321 FORWARD_OR_EXPLICIT_SIG_PATS_AND_REST_RE = re.compile(

	322 r'^(%s)(.\|\n)*' % '\|'.join(FORWARD_OR_EXPLICIT_SIG_PATS),

	323 flags=re.MULTILINE \| re.IGNORECASE)

	324

	325 # This handles gmail well, and it's pretty broad without seeming like

	326 # it would cause false positives.

	327 QUOTE_PATS = [

	328 r'^On .\s+<\s\S+?@[-a-z0-9.]+>\swrote:\s$',

	329 r'^On .* \S+?@[-a-z0-9.]+\swrote:\s$',

	330 r'^\S+?@[-a-z0-9.]+ $\S+?@[-a-z0-9.]+$\swrote:\s$',

	331 r'\S+?@[-a-z0-9]+.appspotmail.com\s.wrote:\s$',

	332 r'\S+?@[-a-z0-9]+.appspotmail.com\s+.a\s+\xc3\xa9crit\s:\s*$',

	333 r'^\d+/\d+/\d+ +<\S+@[-a-z0-9.]+>:?\s*$',

	334 r'^>.*$',

	335 ]

	336 QUOTED_BLOCKS_RE = re.compile(

	337 r'(^\s\n)((%s)\n?)+(^\s\n)' % '\|'.join(QUOTE_PATS),

	338 flags=re.MULTILINE \| re.IGNORECASE)

	339

	340

	341 def StripQuotedText(description):

	342 """Strip all quoted text lines out of the given comment text."""

	343 # If the rest of message is forwared text, we're done.

	344 description = FORWARD_OR_EXPLICIT_SIG_PATS_AND_REST_RE.sub('', description)

	345 # Replace each quoted block of lines and surrounding blank lines with at

	346 # most one blank line.

	347 description = QUOTED_BLOCKS_RE.sub('\n', description)

	348

	349 new_lines = description.strip().split('\n')

	350 # Make another pass over the last few lines to strip out signatures.

	351 sig_zone_start = max(0, len(new_lines) - MAX_SIGNATURE_LINES)

	352 for idx in range(sig_zone_start, len(new_lines)):

	353 line = new_lines[idx]

	354 if SIGNATURE_BOUNDARY_RE.match(line):

	355 # We found the likely start of a signature, just keep the lines above it.

	356 new_lines = new_lines[:idx]

	357 break

	358

	359 return '\n'.join(new_lines).strip()

OLD	NEW

« no previous file with comments | « appengine/monorail/framework/csp_report.py ('k') | appengine/monorail/framework/excessiveactivity.py » ('j') | no next file with comments »