third_party/google-endpoints/future/backports/email/feedparser.py - Issue 2666783008: Add google-endpoints to third_party/.

Unified Diff: third_party/google-endpoints/future/backports/email/feedparser.py

Issue 2666783008: Add google-endpoints to third_party/. (Closed)

Patch Set: Created 3 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

« no previous file with comments | « third_party/google-endpoints/future/backports/email/errors.py ('k') | third_party/google-endpoints/future/backports/email/generator.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: third_party/google-endpoints/future/backports/email/feedparser.py

diff --git a/third_party/google-endpoints/future/backports/email/feedparser.py b/third_party/google-endpoints/future/backports/email/feedparser.py

new file mode 100644

index 0000000000000000000000000000000000000000..935c26e31795d970b52677f3ad10b1cb59d29556

--- /dev/null

+++ b/third_party/google-endpoints/future/backports/email/feedparser.py

@@ -0,0 +1,525 @@

+# Authors: Baxter, Wouters and Warsaw

+# Contact: email-sig@python.org

+"""FeedParser - An email feed parser.

+The feed parser implements an interface for incrementally parsing an email

+message, line by line. This has advantages for certain applications, such as

+those reading email messages off a socket.

+FeedParser.feed() is the primary interface for pushing new data into the

+parser. It returns when there's nothing more it can do with the available

+data. When you have no more data to push into the parser, call .close().

+This completes the parsing and returns the root message object.

+The other advantage of this parser is that it will never raise a parsing

+exception. Instead, when it finds something unexpected, it adds a 'defect' to

+the current message. Defects are just instances that live on the message

+object's .defects attribute.

+"""

+from __future__ import unicode_literals

+from __future__ import division

+from __future__ import absolute_import

+from future.builtins import object, range, super

+from future.utils import implements_iterator, PY3

+__all__ = ['FeedParser', 'BytesFeedParser']

+import re

+from future.backports.email import errors

+from future.backports.email import message

+from future.backports.email._policybase import compat32

+NLCRE = re.compile('\r\n|\r|\n')

+NLCRE_bol = re.compile('(\r\n|\r|\n)')

+NLCRE_eol = re.compile('(\r\n|\r|\n)\Z')

+NLCRE_crack = re.compile('(\r\n|\r|\n)')

+# RFC 2822 $3.6.8 Optional fields. ftext is %d33-57 / %d59-126, Any character

+# except controls, SP, and ":".

+headerRE = re.compile(r'^(From |[\041-\071\073-\176]{1,}:|[\t ])')

+EMPTYSTRING = ''

+NL = '\n'

+NeedMoreData = object()

+# @implements_iterator

+class BufferedSubFile(object):

+ """A file-ish object that can have new data loaded into it.

+ You can also push and pop line-matching predicates onto a stack. When the

+ current predicate matches the current line, a false EOF response

+ (i.e. empty string) is returned instead. This lets the parser adhere to a

+ simple abstraction -- it parses until EOF closes the current message.

+ """

+ def __init__(self):

+ # The last partial line pushed into this object.

+ self._partial = ''

+ # The list of full, pushed lines, in reverse order

+ self._lines = []

+ # The stack of false-EOF checking predicates.

+ self._eofstack = []

+ # A flag indicating whether the file has been closed or not.

+ self._closed = False

+ def push_eof_matcher(self, pred):

+ self._eofstack.append(pred)

+ def pop_eof_matcher(self):

+ return self._eofstack.pop()

+ def close(self):

+ # Don't forget any trailing partial line.

+ self._lines.append(self._partial)

+ self._partial = ''

+ self._closed = True

+ def readline(self):

+ if not self._lines:

+ if self._closed:

+ return ''

+ return NeedMoreData

+ # Pop the line off the stack and see if it matches the current

+ # false-EOF predicate.

+ line = self._lines.pop()

+ # RFC 2046, section 5.1.2 requires us to recognize outer level

+ # boundaries at any level of inner nesting. Do this, but be sure it's

+ # in the order of most to least nested.

+ for ateof in self._eofstack[::-1]:

+ if ateof(line):

+ # We're at the false EOF. But push the last line back first.

+ self._lines.append(line)

+ return ''

+ return line

+ def unreadline(self, line):

+ # Let the consumer push a line back into the buffer.

+ assert line is not NeedMoreData

+ self._lines.append(line)

+ def push(self, data):

+ """Push some new data into this object."""

+ # Handle any previous leftovers

+ data, self._partial = self._partial + data, ''

+ # Crack into lines, but preserve the newlines on the end of each

+ parts = NLCRE_crack.split(data)

+ # The *ahem* interesting behaviour of re.split when supplied grouping

+ # parentheses is that the last element of the resulting list is the

+ # data after the final RE. In the case of a NL/CR terminated string,

+ # this is the empty string.

+ self._partial = parts.pop()

+ #GAN 29Mar09 bugs 1555570, 1721862 Confusion at 8K boundary ending with \r:

+ # is there a \n to follow later?

+ if not self._partial and parts and parts[-1].endswith('\r'):

+ self._partial = parts.pop(-2)+parts.pop()

+ # parts is a list of strings, alternating between the line contents

+ # and the eol character(s). Gather up a list of lines after

+ # re-attaching the newlines.

+ lines = []

+ for i in range(len(parts) // 2):

+ lines.append(parts[i*2] + parts[i*2+1])

+ self.pushlines(lines)

+ def pushlines(self, lines):

+ # Reverse and insert at the front of the lines.

+ self._lines[:0] = lines[::-1]

+ def __iter__(self):

+ return self

+ def __next__(self):

+ line = self.readline()

+ if line == '':

+ raise StopIteration

+ return line

+class FeedParser(object):

+ """A feed-style parser of email."""

+ def __init__(self, _factory=message.Message, **_3to2kwargs):

+ if 'policy' in _3to2kwargs: policy = _3to2kwargs['policy']; del _3to2kwargs['policy']

+ else: policy = compat32

+ """_factory is called with no arguments to create a new message obj

+ The policy keyword specifies a policy object that controls a number of

+ aspects of the parser's operation. The default policy maintains

+ backward compatibility.

+ """

+ self._factory = _factory

+ self.policy = policy

+ try:

+ _factory(policy=self.policy)

+ self._factory_kwds = lambda: {'policy': self.policy}

+ except TypeError:

+ # Assume this is an old-style factory

+ self._factory_kwds = lambda: {}

+ self._input = BufferedSubFile()

+ self._msgstack = []

+ if PY3:

+ self._parse = self._parsegen().__next__

+ else:

+ self._parse = self._parsegen().next

+ self._cur = None

+ self._last = None

+ self._headersonly = False

+ # Non-public interface for supporting Parser's headersonly flag

+ def _set_headersonly(self):

+ self._headersonly = True

+ def feed(self, data):

+ """Push more data into the parser."""

+ self._input.push(data)

+ self._call_parse()

+ def _call_parse(self):

+ try:

+ self._parse()

+ except StopIteration:

+ pass

+ def close(self):

+ """Parse all remaining data and return the root message object."""

+ self._input.close()

+ self._call_parse()

+ root = self._pop_message()

+ assert not self._msgstack

+ # Look for final set of defects

+ if root.get_content_maintype() == 'multipart' \

+ and not root.is_multipart():

+ defect = errors.MultipartInvariantViolationDefect()

+ self.policy.handle_defect(root, defect)

+ return root

+ def _new_message(self):

+ msg = self._factory(**self._factory_kwds())

+ if self._cur and self._cur.get_content_type() == 'multipart/digest':

+ msg.set_default_type('message/rfc822')

+ if self._msgstack:

+ self._msgstack[-1].attach(msg)

+ self._msgstack.append(msg)

+ self._cur = msg

+ self._last = msg

+ def _pop_message(self):

+ retval = self._msgstack.pop()

+ if self._msgstack:

+ self._cur = self._msgstack[-1]

+ else:

+ self._cur = None

+ return retval

+ def _parsegen(self):

+ # Create a new message and start by parsing headers.

+ self._new_message()

+ headers = []

+ # Collect the headers, searching for a line that doesn't match the RFC

+ # 2822 header or continuation pattern (including an empty line).

+ for line in self._input:

+ if line is NeedMoreData:

+ yield NeedMoreData

+ continue

+ if not headerRE.match(line):

+ # If we saw the RFC defined header/body separator

+ # (i.e. newline), just throw it away. Otherwise the line is

+ # part of the body so push it back.

+ if not NLCRE.match(line):

+ defect = errors.MissingHeaderBodySeparatorDefect()

+ self.policy.handle_defect(self._cur, defect)

+ self._input.unreadline(line)

+ break

+ headers.append(line)

+ # Done with the headers, so parse them and figure out what we're

+ # supposed to see in the body of the message.

+ self._parse_headers(headers)

+ # Headers-only parsing is a backwards compatibility hack, which was

+ # necessary in the older parser, which could raise errors. All

+ # remaining lines in the input are thrown into the message body.

+ if self._headersonly:

+ lines = []

+ while True:

+ line = self._input.readline()

+ if line is NeedMoreData:

+ yield NeedMoreData

+ continue

+ if line == '':

+ break

+ lines.append(line)

+ self._cur.set_payload(EMPTYSTRING.join(lines))

+ return

+ if self._cur.get_content_type() == 'message/delivery-status':

+ # message/delivery-status contains blocks of headers separated by

+ # a blank line. We'll represent each header block as a separate

+ # nested message object, but the processing is a bit different

+ # than standard message/* types because there is no body for the

+ # nested messages. A blank line separates the subparts.

+ while True:

+ self._input.push_eof_matcher(NLCRE.match)

+ for retval in self._parsegen():

+ if retval is NeedMoreData:

+ yield NeedMoreData

+ continue

+ break

+ msg = self._pop_message()

+ # We need to pop the EOF matcher in order to tell if we're at

+ # the end of the current file, not the end of the last block

+ # of message headers.

+ self._input.pop_eof_matcher()

+ # The input stream must be sitting at the newline or at the

+ # EOF. We want to see if we're at the end of this subpart, so

+ # first consume the blank line, then test the next line to see

+ # if we're at this subpart's EOF.

+ while True:

+ line = self._input.readline()

+ if line is NeedMoreData:

+ yield NeedMoreData

+ continue

+ break

+ while True:

+ line = self._input.readline()

+ if line is NeedMoreData:

+ yield NeedMoreData

+ continue

+ break

+ if line == '':

+ break

+ # Not at EOF so this is a line we're going to need.

+ self._input.unreadline(line)

+ return

+ if self._cur.get_content_maintype() == 'message':

+ # The message claims to be a message/* type, then what follows is

+ # another RFC 2822 message.

+ for retval in self._parsegen():

+ if retval is NeedMoreData:

+ yield NeedMoreData

+ continue

+ break

+ self._pop_message()

+ return

+ if self._cur.get_content_maintype() == 'multipart':

+ boundary = self._cur.get_boundary()

+ if boundary is None:

+ # The message /claims/ to be a multipart but it has not

+ # defined a boundary. That's a problem which we'll handle by

+ # reading everything until the EOF and marking the message as

+ # defective.

+ defect = errors.NoBoundaryInMultipartDefect()

+ self.policy.handle_defect(self._cur, defect)

+ lines = []

+ for line in self._input:

+ if line is NeedMoreData:

+ yield NeedMoreData

+ continue

+ lines.append(line)

+ self._cur.set_payload(EMPTYSTRING.join(lines))

+ return

+ # Make sure a valid content type was specified per RFC 2045:6.4.

+ if (self._cur.get('content-transfer-encoding', '8bit').lower()

+ not in ('7bit', '8bit', 'binary')):

+ defect = errors.InvalidMultipartContentTransferEncodingDefect()

+ self.policy.handle_defect(self._cur, defect)

+ # Create a line match predicate which matches the inter-part

+ # boundary as well as the end-of-multipart boundary. Don't push

+ # this onto the input stream until we've scanned past the

+ # preamble.

+ separator = '--' + boundary

+ boundaryre = re.compile(

+ '(?P<sep>' + re.escape(separator) +

+ r')(?P<end>--)?(?P<ws>[ \t]*)(?P<linesep>\r\n|\r|\n)?$')

+ capturing_preamble = True

+ preamble = []

+ linesep = False

+ close_boundary_seen = False

+ while True:

+ line = self._input.readline()

+ if line is NeedMoreData:

+ yield NeedMoreData

+ continue

+ if line == '':

+ break

+ mo = boundaryre.match(line)

+ if mo:

+ # If we're looking at the end boundary, we're done with

+ # this multipart. If there was a newline at the end of

+ # the closing boundary, then we need to initialize the

+ # epilogue with the empty string (see below).

+ if mo.group('end'):

+ close_boundary_seen = True

+ linesep = mo.group('linesep')

+ break

+ # We saw an inter-part boundary. Were we in the preamble?

+ if capturing_preamble:

+ if preamble:

+ # According to RFC 2046, the last newline belongs

+ # to the boundary.

+ lastline = preamble[-1]

+ eolmo = NLCRE_eol.search(lastline)

+ if eolmo:

+ preamble[-1] = lastline[:-len(eolmo.group(0))]

+ self._cur.preamble = EMPTYSTRING.join(preamble)

+ capturing_preamble = False

+ self._input.unreadline(line)

+ continue

+ # We saw a boundary separating two parts. Consume any

+ # multiple boundary lines that may be following. Our

+ # interpretation of RFC 2046 BNF grammar does not produce

+ # body parts within such double boundaries.

+ while True:

+ line = self._input.readline()

+ if line is NeedMoreData:

+ yield NeedMoreData

+ continue

+ mo = boundaryre.match(line)

+ if not mo:

+ self._input.unreadline(line)

+ break

+ # Recurse to parse this subpart; the input stream points

+ # at the subpart's first line.

+ self._input.push_eof_matcher(boundaryre.match)

+ for retval in self._parsegen():

+ if retval is NeedMoreData:

+ yield NeedMoreData

+ continue

+ break

+ # Because of RFC 2046, the newline preceding the boundary

+ # separator actually belongs to the boundary, not the

+ # previous subpart's payload (or epilogue if the previous

+ # part is a multipart).

+ if self._last.get_content_maintype() == 'multipart':

+ epilogue = self._last.epilogue

+ if epilogue == '':

+ self._last.epilogue = None

+ elif epilogue is not None:

+ mo = NLCRE_eol.search(epilogue)

+ if mo:

+ end = len(mo.group(0))

+ self._last.epilogue = epilogue[:-end]

+ else:

+ payload = self._last._payload

+ if isinstance(payload, str):

+ mo = NLCRE_eol.search(payload)

+ if mo:

+ payload = payload[:-len(mo.group(0))]

+ self._last._payload = payload

+ self._input.pop_eof_matcher()

+ self._pop_message()

+ # Set the multipart up for newline cleansing, which will

+ # happen if we're in a nested multipart.

+ self._last = self._cur

+ else:

+ # I think we must be in the preamble

+ assert capturing_preamble

+ preamble.append(line)

+ # We've seen either the EOF or the end boundary. If we're still

+ # capturing the preamble, we never saw the start boundary. Note

+ # that as a defect and store the captured text as the payload.

+ if capturing_preamble:

+ defect = errors.StartBoundaryNotFoundDefect()

+ self.policy.handle_defect(self._cur, defect)

+ self._cur.set_payload(EMPTYSTRING.join(preamble))

+ epilogue = []

+ for line in self._input:

+ if line is NeedMoreData:

+ yield NeedMoreData

+ continue

+ self._cur.epilogue = EMPTYSTRING.join(epilogue)

+ return

+ # If we're not processing the preamble, then we might have seen

+ # EOF without seeing that end boundary...that is also a defect.

+ if not close_boundary_seen:

+ defect = errors.CloseBoundaryNotFoundDefect()

+ self.policy.handle_defect(self._cur, defect)

+ return

+ # Everything from here to the EOF is epilogue. If the end boundary

+ # ended in a newline, we'll need to make sure the epilogue isn't

+ # None

+ if linesep:

+ epilogue = ['']

+ else:

+ epilogue = []

+ for line in self._input:

+ if line is NeedMoreData:

+ yield NeedMoreData

+ continue

+ epilogue.append(line)

+ # Any CRLF at the front of the epilogue is not technically part of

+ # the epilogue. Also, watch out for an empty string epilogue,

+ # which means a single newline.

+ if epilogue:

+ firstline = epilogue[0]

+ bolmo = NLCRE_bol.match(firstline)

+ if bolmo:

+ epilogue[0] = firstline[len(bolmo.group(0)):]

+ self._cur.epilogue = EMPTYSTRING.join(epilogue)

+ return

+ # Otherwise, it's some non-multipart type, so the entire rest of the

+ # file contents becomes the payload.

+ lines = []

+ for line in self._input:

+ if line is NeedMoreData:

+ yield NeedMoreData

+ continue

+ lines.append(line)

+ self._cur.set_payload(EMPTYSTRING.join(lines))

+ def _parse_headers(self, lines):

+ # Passed a list of lines that make up the headers for the current msg

+ lastheader = ''

+ lastvalue = []

+ for lineno, line in enumerate(lines):

+ # Check for continuation

+ if line[0] in ' \t':

+ if not lastheader:

+ # The first line of the headers was a continuation. This

+ # is illegal, so let's note the defect, store the illegal

+ # line, and ignore it for purposes of headers.

+ defect = errors.FirstHeaderLineIsContinuationDefect(line)

+ self.policy.handle_defect(self._cur, defect)

+ continue

+ lastvalue.append(line)

+ continue

+ if lastheader:

+ self._cur.set_raw(*self.policy.header_source_parse(lastvalue))

+ lastheader, lastvalue = '', []

+ # Check for envelope header, i.e. unix-from

+ if line.startswith('From '):

+ if lineno == 0:

+ # Strip off the trailing newline

+ mo = NLCRE_eol.search(line)

+ if mo:

+ line = line[:-len(mo.group(0))]

+ self._cur.set_unixfrom(line)

+ continue

+ elif lineno == len(lines) - 1:

+ # Something looking like a unix-from at the end - it's

+ # probably the first line of the body, so push back the

+ # line and stop.

+ self._input.unreadline(line)

+ return

+ else:

+ # Weirdly placed unix-from line. Note this as a defect

+ # and ignore it.

+ defect = errors.MisplacedEnvelopeHeaderDefect(line)

+ self._cur.defects.append(defect)

+ continue

+ # Split the line on the colon separating field name from value.

+ # There will always be a colon, because if there wasn't the part of

+ # the parser that calls us would have started parsing the body.

+ i = line.find(':')

+ assert i>0, "_parse_headers fed line with no : and no leading WS"

+ lastheader = line[:i]

+ lastvalue = [line]

+ # Done with all the lines, so handle the last header.

+ if lastheader:

+ self._cur.set_raw(*self.policy.header_source_parse(lastvalue))

+class BytesFeedParser(FeedParser):

+ """Like FeedParser, but feed accepts bytes."""

+ def feed(self, data):

+ super().feed(data.decode('ascii', 'surrogateescape'))