appengine/monorail/framework/emailfmt.py - Issue 1868553004: Open Source Monorail

Unified Diff: appengine/monorail/framework/emailfmt.py

Issue 1868553004: Open Source Monorail (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master

Patch Set: Rebase Created 4 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: appengine/monorail/framework/emailfmt.py

diff --git a/appengine/monorail/framework/emailfmt.py b/appengine/monorail/framework/emailfmt.py

new file mode 100644

index 0000000000000000000000000000000000000000..d4aa955e8cdcb518ac4865e4a6e74944da1c52e3

--- /dev/null

+++ b/appengine/monorail/framework/emailfmt.py

@@ -0,0 +1,359 @@

+# Use of this source code is govered by a BSD-style

+# license that can be found in the LICENSE file or at

+# https://developers.google.com/open-source/licenses/bsd

+"""Functions that format or parse email messages in Monorail.

+Specifically, this module has the logic for generating various email

+header lines that help match inbound and outbound email to the project

+and artifact that generated it.

+"""

+import hmac

+import logging

+import re

+import rfc822

+from google.appengine.api import app_identity

+import settings

+from framework import framework_constants

+from services import client_config_svc

+from services import secrets_svc

+# TODO(jrobbins): Parsing very large messages is slow, and we are not going

+# to handle attachments at first, so there is no reason to consider large

+# emails.

+MAX_BODY_SIZE = 100 * 1024

+MAX_HEADER_CHARS_CONSIDERED = 255

+def IsBodyTooBigToParse(body):

+ """Return True if the email message body is too big to process."""

+ return len(body) > MAX_BODY_SIZE

+def IsProjectAddressOnToLine(project_addr, to_addrs):

+ """Return True if an email was explicitly sent directly to us."""

+ return project_addr in to_addrs

+def ParseEmailMessage(msg):

+ """Parse the given MessageRouterMessage and return relevant fields.

+ Args:

+ msg: email.message.Message object for the email message sent to us.

+ Returns:

+ A tuple: from_addr, to_addrs, cc_addrs, references, subject, body.

+ """

+ # Ignore messages that are probably not from humans, see:

+ # http://google.com/search?q=precedence+bulk+junk

+ precedence = msg.get('precedence', '')

+ if precedence.lower() in ['bulk', 'junk']:

+ logging.info('Precedence: %r indicates an autoresponder', precedence)

+ return '', [], [], '', '', ''

+ from_addrs = _ExtractAddrs(msg.get('from', ''))

+ if from_addrs:

+ from_addr = from_addrs[0]

+ else:

+ from_addr = ''

+ to_addrs = _ExtractAddrs(msg.get('to', ''))

+ cc_addrs = _ExtractAddrs(msg.get('cc', ''))

+ in_reply_to = msg.get('in-reply-to', '')

+ references = msg.get('references', '').split()

+ references = list({ref for ref in [in_reply_to] + references if ref})

+ subject = _StripSubjectPrefixes(msg.get('subject', ''))

+ body = ''

+ for part in msg.walk():

+ # We only process plain text emails.

+ if part.get_content_type() == 'text/plain':

+ body = part.get_payload(decode=True)

+ break # Only consider the first text part.

+ return from_addr, to_addrs, cc_addrs, references, subject, body

+def _ExtractAddrs(header_value):

+ """Given a message header value, return email address found there."""

+ friendly_addr_pairs = list(rfc822.AddressList(header_value))

+ return [addr for _friendly, addr in friendly_addr_pairs]

+def _StripSubjectPrefixes(subject):

+ """Strip off any 'Re:', 'Fwd:', etc. subject line prefixes."""

+ prefix = _FindSubjectPrefix(subject)

+ while prefix:

+ subject = subject[len(prefix):].strip()

+ prefix = _FindSubjectPrefix(subject)

+ return subject

+def _FindSubjectPrefix(subject):

+ """If the given subject starts with a prefix, return that prefix."""

+ for prefix in ['re:', 'aw:', 'fwd:', 'fw:']:

+ if subject.lower().startswith(prefix):

+ return prefix

+ return None

+def MailDomain():

+ """Return the domain name where this app can recieve email."""

+ if settings.unit_test_mode:

+ return 'testbed-test.appspotmail.com'

+ # If running on a GAFYD domain, you must define an app alias on the

+ # Application Settings admin web page. If you cannot reserve the matching

+ # APP_ID for the alias, then specify it in settings.mail_domain.

+ if settings.mail_domain:

+ return settings.mail_domain

+ app_id = app_identity.get_application_id()

+ if ':' in app_id:

+ app_id = app_id.split(':')[-1]

+ return '%s.appspotmail.com' % app_id

+def FormatFriendly(commenter_view, sender, reveal_addr):

+ """Format the From: line to include the commenter's friendly name if given."""

+ if commenter_view:

+ site_name = settings.site_name

+ if commenter_view.email in client_config_svc.GetServiceAccountMap():

+ friendly = commenter_view.display_name

+ elif reveal_addr:

+ friendly = commenter_view.email

+ else:

+ friendly = commenter_view.display_name

+ return '%s via %s <%s>' % (friendly, site_name, sender)

+ else:

+ return sender

+def NoReplyAddress(commenter_view=None, reveal_addr=False):

+ """Return an address that ignores all messages sent to it."""

+ # Note: We use "no_reply" with an underscore to avoid potential conflict

+ # with any project name. Project names cannot have underscores.

+ sender = 'no_reply@%s' % MailDomain()

+ return FormatFriendly(commenter_view, sender, reveal_addr)

+def FormatFromAddr(_project, commenter_view=None, reveal_addr=False,

+ can_reply_to=True):

+ """Return a string to be used on the email From: line.

+ Args:

+ project: Project PB for the project that the email is sent from.

+ commenter_view: Optional UserView of the user who made a comment. We use

+ the user's (potentially obscured) email address as their friendly name.

+ reveal_addr: Optional bool. If False then the address is obscured.

+ can_reply_to: Optional bool. If True then settings.send_email_as is used,

+ otherwise settings.send_noreply_email_as is used.

+ Returns:

+ A string that should be used in the From: line of outbound email

+ notifications for the given project.

+ """

+ addr = (settings.send_email_as if can_reply_to

+ else settings.send_noreply_email_as)

+ return FormatFriendly(commenter_view, addr, reveal_addr)

+def NormalizeHeader(s):

+ """Make our message-ids robust against mail client spacing and truncation."""

+ words = _StripSubjectPrefixes(s).split() # Split on any runs of whitespace.

+ normalized = ' '.join(words)

+ truncated = normalized[:MAX_HEADER_CHARS_CONSIDERED]

+ return truncated

+def MakeMessageID(to_addr, subject, from_addr):

+ """Make a unique (but deterministic) email Message-Id: value."""

+ normalized_subject = NormalizeHeader(subject)

+ if isinstance(normalized_subject, unicode):

+ normalized_subject = normalized_subject.encode('utf-8')

+ mail_hmac_key = secrets_svc.GetEmailKey()

+ return '<0=%s=%s=%s@%s>' % (

+ hmac.new(mail_hmac_key, to_addr).hexdigest(),

+ hmac.new(mail_hmac_key, normalized_subject).hexdigest(),

+ from_addr.split('@')[0],

+ MailDomain())

+def GetReferences(to_addr, subject, seq_num, project_from_addr):

+ """Make a References: header to make this message thread properly.

+ Args:

+ to_addr: address that email message will be sent to.

+ subject: subject line of email message.

+ seq_num: sequence number of message in thread, e.g., 0, 1, 2, ...,

+ or None if the message is not part of a thread.

+ project_from_addr: address that the message will be sent from.

+ Returns:

+ A string Message-ID that does not correspond to any actual email

+ message that was ever sent, but it does serve to unite all the

+ messages that belong togther in a thread.

+ """

+ if seq_num is not None:

+ return MakeMessageID(to_addr, subject, project_from_addr)

+ else:

+ return ''

+def ValidateReferencesHeader(message_ref, project, from_addr, subject):

+ """Check that the References header is one that we could have sent.

+ Args:

+ message_ref: one of the References header values from the inbound email.

+ project: Project PB for the affected project.

+ from_addr: string email address that inbound email was sent from.

+ subject: string base subject line of inbound email.

+ Returns:

+ True if it looks like this is a reply to a message that we sent

+ to the same address that replied. Otherwise, False.

+ """

+ sender = '%s@%s' % (project.project_name, MailDomain())

+ expected_ref = MakeMessageID(from_addr, subject, sender)

+ # TODO(jrobbins): project option to not check from_addr.

+ # TODO(jrobbins): project inbound auth token.

+ return expected_ref == message_ref

+PROJECT_EMAIL_RE = re.compile(

+ r'(?P<project>[-a-z0-9]+)'

+ r'@(?P<domain>[-a-z0-9.]+)')

+ISSUE_CHANGE_SUMMARY_RE = re.compile(

+ r'Issue (?P<local_id>[0-9]+) in '

+ r'(?P<project>[-a-z0-9]+): '

+ r'(?P<summary>.+)')

+def IdentifyProjectAndIssue(project_addr, subject):

+ """Parse the domain name, project name, and artifact id from a reply.

+ Args:

+ project_addr: string email address that the email was delivered to,

+ it must match the Reply-To: header sent in the notification message.

+ subject: string email subject line received, it must match the one

+ sent. Leading prefixes like "Re:" should already have been stripped.

+ Returns:

+ A 2-tuple: (project_name, local_id). If either or both are

+ None, they could not be determined.

+ """

+ # Ignore any inbound email sent to a "no_reply@" address.

+ if project_addr.startswith('no_reply@'):

+ return None, None

+ project_name = None

+ m = PROJECT_EMAIL_RE.match(project_addr.lower())

+ if m:

+ project_name = m.group('project')

+ issue_project_name, local_id_str = _MatchSubject(subject)

+ if project_name != issue_project_name:

+ # Something is wrong with the project name.

+ project_name = None

+ logging.info('project_name = %r', project_name)

+ logging.info('local_id_str = %r', local_id_str)

+ try:

+ local_id = int(local_id_str)

+ except ValueError:

+ local_id = None

+ return project_name, local_id

+def _MatchSubject(subject):

+ """Parse the project, artifact type, and artifact id from a subject line."""

+ m = ISSUE_CHANGE_SUMMARY_RE.match(subject)

+ if m:

+ return m.group('project'), m.group('local_id')

+ return None, None

+# TODO(jrobbins): For now, we strip out lines that look like quoted

+# text and then will give the user the option to see the whole email.

+# For 2.0 of this feature, we should change the Comment PB to have

+# runs of text with different properties so that the UI can present

+# "- Show quoted text -" and expand it in-line.

+# TODO(jrobbins): For now, we look for lines that indicate quoted

+# text (e.g., they start with ">"). But, we should also collapse

+# multiple lines that are identical to other lines in previous

+# non-deleted comments on the same issue, regardless of quote markers.

+# We cut off the message if we see something that looks like a signature and

+# it is near the bottom of the message.

+SIGNATURE_BOUNDARY_RE = re.compile(

+ r'^(([-_=]+ ?)+|'

+ r'Sent from my i?Phone|Sent from my iPod)'

+ r',? *$', re.I)

+MAX_SIGNATURE_LINES = 8

+FORWARD_OR_EXPLICIT_SIG_PATS = [

+ r'[^0-9a-z]+(forwarded|original) message[^0-9a-z]+\s*$',

+ r'Updates:\s*$',

+ r'Comment #\d+ on issue \d+ by \S+:',

+ # If we see this anywhere in the message, treat the rest as a signature.

+ r'--\s*$',

+ ]

+FORWARD_OR_EXPLICIT_SIG_PATS_AND_REST_RE = re.compile(

+ r'^(%s)(.|\n)*' % '|'.join(FORWARD_OR_EXPLICIT_SIG_PATS),

+ flags=re.MULTILINE | re.IGNORECASE)

+# This handles gmail well, and it's pretty broad without seeming like

+# it would cause false positives.

+QUOTE_PATS = [

+ r'^On .*\s+<\s*\S+?@[-a-z0-9.]+>\s*wrote:\s*$',

+ r'^On .* \S+?@[-a-z0-9.]+\s*wrote:\s*$',

+ r'^\S+?@[-a-z0-9.]+ $\S+?@[-a-z0-9.]+$\s*wrote:\s*$',

+ r'\S+?@[-a-z0-9]+.appspotmail.com\s.*wrote:\s*$',

+ r'\S+?@[-a-z0-9]+.appspotmail.com\s+.*a\s+\xc3\xa9crit\s*:\s*$',

+ r'^\d+/\d+/\d+ +<\S+@[-a-z0-9.]+>:?\s*$',

+ r'^>.*$',

+ ]

+QUOTED_BLOCKS_RE = re.compile(

+ r'(^\s*\n)*((%s)\n?)+(^\s*\n)*' % '|'.join(QUOTE_PATS),

+ flags=re.MULTILINE | re.IGNORECASE)

+def StripQuotedText(description):

+ """Strip all quoted text lines out of the given comment text."""

+ # If the rest of message is forwared text, we're done.

+ description = FORWARD_OR_EXPLICIT_SIG_PATS_AND_REST_RE.sub('', description)

+ # Replace each quoted block of lines and surrounding blank lines with at

+ # most one blank line.

+ description = QUOTED_BLOCKS_RE.sub('\n', description)

+ new_lines = description.strip().split('\n')

+ # Make another pass over the last few lines to strip out signatures.

+ sig_zone_start = max(0, len(new_lines) - MAX_SIGNATURE_LINES)

+ for idx in range(sig_zone_start, len(new_lines)):

+ line = new_lines[idx]

+ if SIGNATURE_BOUNDARY_RE.match(line):

+ # We found the likely start of a signature, just keep the lines above it.

+ new_lines = new_lines[:idx]

+ break

+ return '\n'.join(new_lines).strip()

« no previous file with comments | « appengine/monorail/framework/csp_report.py ('k') | appengine/monorail/framework/excessiveactivity.py » ('j') | no next file with comments »