| Index: appengine/monorail/framework/emailfmt.py
|
| diff --git a/appengine/monorail/framework/emailfmt.py b/appengine/monorail/framework/emailfmt.py
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..d4aa955e8cdcb518ac4865e4a6e74944da1c52e3
|
| --- /dev/null
|
| +++ b/appengine/monorail/framework/emailfmt.py
|
| @@ -0,0 +1,359 @@
|
| +# Copyright 2016 The Chromium Authors. All rights reserved.
|
| +# Use of this source code is govered by a BSD-style
|
| +# license that can be found in the LICENSE file or at
|
| +# https://developers.google.com/open-source/licenses/bsd
|
| +
|
| +"""Functions that format or parse email messages in Monorail.
|
| +
|
| +Specifically, this module has the logic for generating various email
|
| +header lines that help match inbound and outbound email to the project
|
| +and artifact that generated it.
|
| +"""
|
| +
|
| +import hmac
|
| +import logging
|
| +import re
|
| +import rfc822
|
| +
|
| +from google.appengine.api import app_identity
|
| +
|
| +import settings
|
| +from framework import framework_constants
|
| +from services import client_config_svc
|
| +from services import secrets_svc
|
| +
|
| +# TODO(jrobbins): Parsing very large messages is slow, and we are not going
|
| +# to handle attachments at first, so there is no reason to consider large
|
| +# emails.
|
| +MAX_BODY_SIZE = 100 * 1024
|
| +MAX_HEADER_CHARS_CONSIDERED = 255
|
| +
|
| +
|
| +
|
| +def IsBodyTooBigToParse(body):
|
| + """Return True if the email message body is too big to process."""
|
| + return len(body) > MAX_BODY_SIZE
|
| +
|
| +
|
| +def IsProjectAddressOnToLine(project_addr, to_addrs):
|
| + """Return True if an email was explicitly sent directly to us."""
|
| + return project_addr in to_addrs
|
| +
|
| +
|
| +def ParseEmailMessage(msg):
|
| + """Parse the given MessageRouterMessage and return relevant fields.
|
| +
|
| + Args:
|
| + msg: email.message.Message object for the email message sent to us.
|
| +
|
| + Returns:
|
| + A tuple: from_addr, to_addrs, cc_addrs, references, subject, body.
|
| + """
|
| + # Ignore messages that are probably not from humans, see:
|
| + # http://google.com/search?q=precedence+bulk+junk
|
| + precedence = msg.get('precedence', '')
|
| + if precedence.lower() in ['bulk', 'junk']:
|
| + logging.info('Precedence: %r indicates an autoresponder', precedence)
|
| + return '', [], [], '', '', ''
|
| +
|
| + from_addrs = _ExtractAddrs(msg.get('from', ''))
|
| + if from_addrs:
|
| + from_addr = from_addrs[0]
|
| + else:
|
| + from_addr = ''
|
| +
|
| + to_addrs = _ExtractAddrs(msg.get('to', ''))
|
| + cc_addrs = _ExtractAddrs(msg.get('cc', ''))
|
| +
|
| + in_reply_to = msg.get('in-reply-to', '')
|
| + references = msg.get('references', '').split()
|
| + references = list({ref for ref in [in_reply_to] + references if ref})
|
| + subject = _StripSubjectPrefixes(msg.get('subject', ''))
|
| +
|
| + body = ''
|
| + for part in msg.walk():
|
| + # We only process plain text emails.
|
| + if part.get_content_type() == 'text/plain':
|
| + body = part.get_payload(decode=True)
|
| + break # Only consider the first text part.
|
| +
|
| + return from_addr, to_addrs, cc_addrs, references, subject, body
|
| +
|
| +
|
| +def _ExtractAddrs(header_value):
|
| + """Given a message header value, return email address found there."""
|
| + friendly_addr_pairs = list(rfc822.AddressList(header_value))
|
| + return [addr for _friendly, addr in friendly_addr_pairs]
|
| +
|
| +
|
| +def _StripSubjectPrefixes(subject):
|
| + """Strip off any 'Re:', 'Fwd:', etc. subject line prefixes."""
|
| + prefix = _FindSubjectPrefix(subject)
|
| + while prefix:
|
| + subject = subject[len(prefix):].strip()
|
| + prefix = _FindSubjectPrefix(subject)
|
| +
|
| + return subject
|
| +
|
| +
|
| +def _FindSubjectPrefix(subject):
|
| + """If the given subject starts with a prefix, return that prefix."""
|
| + for prefix in ['re:', 'aw:', 'fwd:', 'fw:']:
|
| + if subject.lower().startswith(prefix):
|
| + return prefix
|
| +
|
| + return None
|
| +
|
| +
|
| +def MailDomain():
|
| + """Return the domain name where this app can recieve email."""
|
| + if settings.unit_test_mode:
|
| + return 'testbed-test.appspotmail.com'
|
| +
|
| + # If running on a GAFYD domain, you must define an app alias on the
|
| + # Application Settings admin web page. If you cannot reserve the matching
|
| + # APP_ID for the alias, then specify it in settings.mail_domain.
|
| + if settings.mail_domain:
|
| + return settings.mail_domain
|
| +
|
| + app_id = app_identity.get_application_id()
|
| + if ':' in app_id:
|
| + app_id = app_id.split(':')[-1]
|
| +
|
| + return '%s.appspotmail.com' % app_id
|
| +
|
| +
|
| +def FormatFriendly(commenter_view, sender, reveal_addr):
|
| + """Format the From: line to include the commenter's friendly name if given."""
|
| + if commenter_view:
|
| + site_name = settings.site_name
|
| + if commenter_view.email in client_config_svc.GetServiceAccountMap():
|
| + friendly = commenter_view.display_name
|
| + elif reveal_addr:
|
| + friendly = commenter_view.email
|
| + else:
|
| + friendly = commenter_view.display_name
|
| + return '%s via %s <%s>' % (friendly, site_name, sender)
|
| + else:
|
| + return sender
|
| +
|
| +
|
| +def NoReplyAddress(commenter_view=None, reveal_addr=False):
|
| + """Return an address that ignores all messages sent to it."""
|
| + # Note: We use "no_reply" with an underscore to avoid potential conflict
|
| + # with any project name. Project names cannot have underscores.
|
| + sender = 'no_reply@%s' % MailDomain()
|
| + return FormatFriendly(commenter_view, sender, reveal_addr)
|
| +
|
| +
|
| +def FormatFromAddr(_project, commenter_view=None, reveal_addr=False,
|
| + can_reply_to=True):
|
| + """Return a string to be used on the email From: line.
|
| +
|
| + Args:
|
| + project: Project PB for the project that the email is sent from.
|
| + commenter_view: Optional UserView of the user who made a comment. We use
|
| + the user's (potentially obscured) email address as their friendly name.
|
| + reveal_addr: Optional bool. If False then the address is obscured.
|
| + can_reply_to: Optional bool. If True then settings.send_email_as is used,
|
| + otherwise settings.send_noreply_email_as is used.
|
| +
|
| + Returns:
|
| + A string that should be used in the From: line of outbound email
|
| + notifications for the given project.
|
| + """
|
| + addr = (settings.send_email_as if can_reply_to
|
| + else settings.send_noreply_email_as)
|
| + return FormatFriendly(commenter_view, addr, reveal_addr)
|
| +
|
| +
|
| +def NormalizeHeader(s):
|
| + """Make our message-ids robust against mail client spacing and truncation."""
|
| + words = _StripSubjectPrefixes(s).split() # Split on any runs of whitespace.
|
| + normalized = ' '.join(words)
|
| + truncated = normalized[:MAX_HEADER_CHARS_CONSIDERED]
|
| + return truncated
|
| +
|
| +
|
| +def MakeMessageID(to_addr, subject, from_addr):
|
| + """Make a unique (but deterministic) email Message-Id: value."""
|
| + normalized_subject = NormalizeHeader(subject)
|
| + if isinstance(normalized_subject, unicode):
|
| + normalized_subject = normalized_subject.encode('utf-8')
|
| + mail_hmac_key = secrets_svc.GetEmailKey()
|
| + return '<0=%s=%s=%s@%s>' % (
|
| + hmac.new(mail_hmac_key, to_addr).hexdigest(),
|
| + hmac.new(mail_hmac_key, normalized_subject).hexdigest(),
|
| + from_addr.split('@')[0],
|
| + MailDomain())
|
| +
|
| +
|
| +def GetReferences(to_addr, subject, seq_num, project_from_addr):
|
| + """Make a References: header to make this message thread properly.
|
| +
|
| + Args:
|
| + to_addr: address that email message will be sent to.
|
| + subject: subject line of email message.
|
| + seq_num: sequence number of message in thread, e.g., 0, 1, 2, ...,
|
| + or None if the message is not part of a thread.
|
| + project_from_addr: address that the message will be sent from.
|
| +
|
| + Returns:
|
| + A string Message-ID that does not correspond to any actual email
|
| + message that was ever sent, but it does serve to unite all the
|
| + messages that belong togther in a thread.
|
| + """
|
| + if seq_num is not None:
|
| + return MakeMessageID(to_addr, subject, project_from_addr)
|
| + else:
|
| + return ''
|
| +
|
| +
|
| +def ValidateReferencesHeader(message_ref, project, from_addr, subject):
|
| + """Check that the References header is one that we could have sent.
|
| +
|
| + Args:
|
| + message_ref: one of the References header values from the inbound email.
|
| + project: Project PB for the affected project.
|
| + from_addr: string email address that inbound email was sent from.
|
| + subject: string base subject line of inbound email.
|
| +
|
| + Returns:
|
| + True if it looks like this is a reply to a message that we sent
|
| + to the same address that replied. Otherwise, False.
|
| + """
|
| + sender = '%s@%s' % (project.project_name, MailDomain())
|
| + expected_ref = MakeMessageID(from_addr, subject, sender)
|
| +
|
| + # TODO(jrobbins): project option to not check from_addr.
|
| + # TODO(jrobbins): project inbound auth token.
|
| + return expected_ref == message_ref
|
| +
|
| +
|
| +PROJECT_EMAIL_RE = re.compile(
|
| + r'(?P<project>[-a-z0-9]+)'
|
| + r'@(?P<domain>[-a-z0-9.]+)')
|
| +
|
| +ISSUE_CHANGE_SUMMARY_RE = re.compile(
|
| + r'Issue (?P<local_id>[0-9]+) in '
|
| + r'(?P<project>[-a-z0-9]+): '
|
| + r'(?P<summary>.+)')
|
| +
|
| +
|
| +def IdentifyProjectAndIssue(project_addr, subject):
|
| + """Parse the domain name, project name, and artifact id from a reply.
|
| +
|
| + Args:
|
| + project_addr: string email address that the email was delivered to,
|
| + it must match the Reply-To: header sent in the notification message.
|
| + subject: string email subject line received, it must match the one
|
| + sent. Leading prefixes like "Re:" should already have been stripped.
|
| +
|
| + Returns:
|
| + A 2-tuple: (project_name, local_id). If either or both are
|
| + None, they could not be determined.
|
| + """
|
| + # Ignore any inbound email sent to a "no_reply@" address.
|
| + if project_addr.startswith('no_reply@'):
|
| + return None, None
|
| +
|
| + project_name = None
|
| +
|
| + m = PROJECT_EMAIL_RE.match(project_addr.lower())
|
| + if m:
|
| + project_name = m.group('project')
|
| +
|
| + issue_project_name, local_id_str = _MatchSubject(subject)
|
| +
|
| + if project_name != issue_project_name:
|
| + # Something is wrong with the project name.
|
| + project_name = None
|
| +
|
| + logging.info('project_name = %r', project_name)
|
| + logging.info('local_id_str = %r', local_id_str)
|
| +
|
| + try:
|
| + local_id = int(local_id_str)
|
| + except ValueError:
|
| + local_id = None
|
| +
|
| + return project_name, local_id
|
| +
|
| +
|
| +def _MatchSubject(subject):
|
| + """Parse the project, artifact type, and artifact id from a subject line."""
|
| + m = ISSUE_CHANGE_SUMMARY_RE.match(subject)
|
| + if m:
|
| + return m.group('project'), m.group('local_id')
|
| +
|
| + return None, None
|
| +
|
| +
|
| +# TODO(jrobbins): For now, we strip out lines that look like quoted
|
| +# text and then will give the user the option to see the whole email.
|
| +# For 2.0 of this feature, we should change the Comment PB to have
|
| +# runs of text with different properties so that the UI can present
|
| +# "- Show quoted text -" and expand it in-line.
|
| +
|
| +# TODO(jrobbins): For now, we look for lines that indicate quoted
|
| +# text (e.g., they start with ">"). But, we should also collapse
|
| +# multiple lines that are identical to other lines in previous
|
| +# non-deleted comments on the same issue, regardless of quote markers.
|
| +
|
| +
|
| +# We cut off the message if we see something that looks like a signature and
|
| +# it is near the bottom of the message.
|
| +SIGNATURE_BOUNDARY_RE = re.compile(
|
| + r'^(([-_=]+ ?)+|'
|
| + r'cheers|(best |warm |kind )?regards|thx|thanks|thank you|'
|
| + r'Sent from my i?Phone|Sent from my iPod)'
|
| + r',? *$', re.I)
|
| +
|
| +MAX_SIGNATURE_LINES = 8
|
| +
|
| +FORWARD_OR_EXPLICIT_SIG_PATS = [
|
| + r'[^0-9a-z]+(forwarded|original) message[^0-9a-z]+\s*$',
|
| + r'Updates:\s*$',
|
| + r'Comment #\d+ on issue \d+ by \S+:',
|
| + # If we see this anywhere in the message, treat the rest as a signature.
|
| + r'--\s*$',
|
| + ]
|
| +FORWARD_OR_EXPLICIT_SIG_PATS_AND_REST_RE = re.compile(
|
| + r'^(%s)(.|\n)*' % '|'.join(FORWARD_OR_EXPLICIT_SIG_PATS),
|
| + flags=re.MULTILINE | re.IGNORECASE)
|
| +
|
| +# This handles gmail well, and it's pretty broad without seeming like
|
| +# it would cause false positives.
|
| +QUOTE_PATS = [
|
| + r'^On .*\s+<\s*\S+?@[-a-z0-9.]+>\s*wrote:\s*$',
|
| + r'^On .* \S+?@[-a-z0-9.]+\s*wrote:\s*$',
|
| + r'^\S+?@[-a-z0-9.]+ \(\S+?@[-a-z0-9.]+\)\s*wrote:\s*$',
|
| + r'\S+?@[-a-z0-9]+.appspotmail.com\s.*wrote:\s*$',
|
| + r'\S+?@[-a-z0-9]+.appspotmail.com\s+.*a\s+\xc3\xa9crit\s*:\s*$',
|
| + r'^\d+/\d+/\d+ +<\S+@[-a-z0-9.]+>:?\s*$',
|
| + r'^>.*$',
|
| + ]
|
| +QUOTED_BLOCKS_RE = re.compile(
|
| + r'(^\s*\n)*((%s)\n?)+(^\s*\n)*' % '|'.join(QUOTE_PATS),
|
| + flags=re.MULTILINE | re.IGNORECASE)
|
| +
|
| +
|
| +def StripQuotedText(description):
|
| + """Strip all quoted text lines out of the given comment text."""
|
| + # If the rest of message is forwared text, we're done.
|
| + description = FORWARD_OR_EXPLICIT_SIG_PATS_AND_REST_RE.sub('', description)
|
| + # Replace each quoted block of lines and surrounding blank lines with at
|
| + # most one blank line.
|
| + description = QUOTED_BLOCKS_RE.sub('\n', description)
|
| +
|
| + new_lines = description.strip().split('\n')
|
| + # Make another pass over the last few lines to strip out signatures.
|
| + sig_zone_start = max(0, len(new_lines) - MAX_SIGNATURE_LINES)
|
| + for idx in range(sig_zone_start, len(new_lines)):
|
| + line = new_lines[idx]
|
| + if SIGNATURE_BOUNDARY_RE.match(line):
|
| + # We found the likely start of a signature, just keep the lines above it.
|
| + new_lines = new_lines[:idx]
|
| + break
|
| +
|
| + return '\n'.join(new_lines).strip()
|
|
|