third_party/google-endpoints/future/backports/email/_parseaddr.py - Issue 2666783008: Add google-endpoints to third_party/.

Side by Side Diff: third_party/google-endpoints/future/backports/email/_parseaddr.py

Issue 2666783008: Add google-endpoints to third_party/. (Closed)

Patch Set: Created 3 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« no previous file with comments | « third_party/google-endpoints/future/backports/email/_header_value_parser.py ('k') | third_party/google-endpoints/future/backports/email/_policybase.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
(Empty)
	1 # Copyright (C) 2002-2007 Python Software Foundation

	2 # Contact: email-sig@python.org

	3

	4 """Email address parsing code.

	5

	6 Lifted directly from rfc822.py. This should eventually be rewritten.

	7 """

	8

	9 from __future__ import unicode_literals

	10 from __future__ import print_function

	11 from __future__ import division

	12 from __future__ import absolute_import

	13 from future.builtins import int

	14

	15 __all__ = [

	16 'mktime_tz',

	17 'parsedate',

	18 'parsedate_tz',

	19 'quote',

	20 ]

	21

	22 import time, calendar

	23

	24 SPACE = ' '

	25 EMPTYSTRING = ''

	26 COMMASPACE = ', '

	27

	28 # Parse a date field

	29 _monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',

	30 'aug', 'sep', 'oct', 'nov', 'dec',

	31 'january', 'february', 'march', 'april', 'may', 'june', 'july',

	32 'august', 'september', 'october', 'november', 'december']

	33

	34 _daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']

	35

	36 # The timezone table does not include the military time zones defined

	37 # in RFC822, other than Z. According to RFC1123, the description in

	38 # RFC822 gets the signs wrong, so we can't rely on any such time

	39 # zones. RFC1123 recommends that numeric timezone indicators be used

	40 # instead of timezone names.

	41

	42 _timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,

	43 'AST': -400, 'ADT': -300, # Atlantic (used in Canada)

	44 'EST': -500, 'EDT': -400, # Eastern

	45 'CST': -600, 'CDT': -500, # Central

	46 'MST': -700, 'MDT': -600, # Mountain

	47 'PST': -800, 'PDT': -700 # Pacific

	48 }

	49

	50

	51 def parsedate_tz(data):

	52 """Convert a date string to a time tuple.

	53

	54 Accounts for military timezones.

	55 """

	56 res = _parsedate_tz(data)

	57 if not res:

	58 return

	59 if res[9] is None:

	60 res[9] = 0

	61 return tuple(res)

	62

	63 def _parsedate_tz(data):

	64 """Convert date to extended time tuple.

	65

	66 The last (additional) element is the time zone offset in seconds, except if

	67 the timezone was specified as -0000. In that case the last element is

	68 None. This indicates a UTC timestamp that explicitly declaims knowledge of

	69 the source timezone, as opposed to a +0000 timestamp that indicates the

	70 source timezone really was UTC.

	71

	72 """

	73 if not data:

	74 return

	75 data = data.split()

	76 # The FWS after the comma after the day-of-week is optional, so search and

	77 # adjust for this.

	78 if data[0].endswith(',') or data[0].lower() in _daynames:

	79 # There's a dayname here. Skip it

	80 del data[0]

	81 else:

	82 i = data[0].rfind(',')

	83 if i >= 0:

	84 data[0] = data[0][i+1:]

	85 if len(data) == 3: # RFC 850 date, deprecated

	86 stuff = data[0].split('-')

	87 if len(stuff) == 3:

	88 data = stuff + data[1:]

	89 if len(data) == 4:

	90 s = data[3]

	91 i = s.find('+')

	92 if i == -1:

	93 i = s.find('-')

	94 if i > 0:

	95 data[3:] = [s[:i], s[i:]]

	96 else:

	97 data.append('') # Dummy tz

	98 if len(data) < 5:

	99 return None

	100 data = data[:5]

	101 [dd, mm, yy, tm, tz] = data

	102 mm = mm.lower()

	103 if mm not in _monthnames:

	104 dd, mm = mm, dd.lower()

	105 if mm not in _monthnames:

	106 return None

	107 mm = _monthnames.index(mm) + 1

	108 if mm > 12:

	109 mm -= 12

	110 if dd[-1] == ',':

	111 dd = dd[:-1]

	112 i = yy.find(':')

	113 if i > 0:

	114 yy, tm = tm, yy

	115 if yy[-1] == ',':

	116 yy = yy[:-1]

	117 if not yy[0].isdigit():

	118 yy, tz = tz, yy

	119 if tm[-1] == ',':

	120 tm = tm[:-1]

	121 tm = tm.split(':')

	122 if len(tm) == 2:

	123 [thh, tmm] = tm

	124 tss = '0'

	125 elif len(tm) == 3:

	126 [thh, tmm, tss] = tm

	127 elif len(tm) == 1 and '.' in tm[0]:

	128 # Some non-compliant MUAs use '.' to separate time elements.

	129 tm = tm[0].split('.')

	130 if len(tm) == 2:

	131 [thh, tmm] = tm

	132 tss = 0

	133 elif len(tm) == 3:

	134 [thh, tmm, tss] = tm

	135 else:

	136 return None

	137 try:

	138 yy = int(yy)

	139 dd = int(dd)

	140 thh = int(thh)

	141 tmm = int(tmm)

	142 tss = int(tss)

	143 except ValueError:

	144 return None

	145 # Check for a yy specified in two-digit format, then convert it to the

	146 # appropriate four-digit format, according to the POSIX standard. RFC 822

	147 # calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822)

	148 # mandates a 4-digit yy. For more information, see the documentation for

	149 # the time module.

	150 if yy < 100:

	151 # The year is between 1969 and 1999 (inclusive).

	152 if yy > 68:

	153 yy += 1900

	154 # The year is between 2000 and 2068 (inclusive).

	155 else:

	156 yy += 2000

	157 tzoffset = None

	158 tz = tz.upper()

	159 if tz in _timezones:

	160 tzoffset = _timezones[tz]

	161 else:

	162 try:

	163 tzoffset = int(tz)

	164 except ValueError:

	165 pass

	166 if tzoffset==0 and tz.startswith('-'):

	167 tzoffset = None

	168 # Convert a timezone offset into seconds ; -0500 -> -18000

	169 if tzoffset:

	170 if tzoffset < 0:

	171 tzsign = -1

	172 tzoffset = -tzoffset

	173 else:

	174 tzsign = 1

	175 tzoffset = tzsign * ( (tzoffset//100)3600 + (tzoffset % 100)60)

	176 # Daylight Saving Time flag is set to -1, since DST is unknown.

	177 return [yy, mm, dd, thh, tmm, tss, 0, 1, -1, tzoffset]

	178

	179

	180 def parsedate(data):

	181 """Convert a time string to a time tuple."""

	182 t = parsedate_tz(data)

	183 if isinstance(t, tuple):

	184 return t[:9]

	185 else:

	186 return t

	187

	188

	189 def mktime_tz(data):

	190 """Turn a 10-tuple as returned by parsedate_tz() into a POSIX timestamp."""

	191 if data[9] is None:

	192 # No zone info, so localtime is better assumption than GMT

	193 return time.mktime(data[:8] + (-1,))

	194 else:

	195 t = calendar.timegm(data)

	196 return t - data[9]

	197

	198

	199 def quote(str):

	200 """Prepare string to be used in a quoted string.

	201

	202 Turns backslash and double quote characters into quoted pairs. These

	203 are the only characters that need to be quoted inside a quoted string.

	204 Does not add the surrounding double quotes.

	205 """

	206 return str.replace('\\', '\\\\').replace('"', '\\"')

	207

	208

	209 class AddrlistClass(object):

	210 """Address parser class by Ben Escoto.

	211

	212 To understand what this class does, it helps to have a copy of RFC 2822 in

	213 front of you.

	214

	215 Note: this class interface is deprecated and may be removed in the future.

	216 Use email.utils.AddressList instead.

	217 """

	218

	219 def __init__(self, field):

	220 """Initialize a new instance.

	221

	222 `field' is an unparsed address header field, containing

	223 one or more addresses.

	224 """

	225 self.specials = '()<>@,:;.\"[]'

	226 self.pos = 0

	227 self.LWS = ' \t'

	228 self.CR = '\r\n'

	229 self.FWS = self.LWS + self.CR

	230 self.atomends = self.specials + self.LWS + self.CR

	231 # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it

	232 # is obsolete syntax. RFC 2822 requires that we recognize obsolete

	233 # syntax, so allow dots in phrases.

	234 self.phraseends = self.atomends.replace('.', '')

	235 self.field = field

	236 self.commentlist = []

	237

	238 def gotonext(self):

	239 """Skip white space and extract comments."""

	240 wslist = []

	241 while self.pos < len(self.field):

	242 if self.field[self.pos] in self.LWS + '\n\r':

	243 if self.field[self.pos] not in '\n\r':

	244 wslist.append(self.field[self.pos])

	245 self.pos += 1

	246 elif self.field[self.pos] == '(':

	247 self.commentlist.append(self.getcomment())

	248 else:

	249 break

	250 return EMPTYSTRING.join(wslist)

	251

	252 def getaddrlist(self):

	253 """Parse all addresses.

	254

	255 Returns a list containing all of the addresses.

	256 """

	257 result = []

	258 while self.pos < len(self.field):

	259 ad = self.getaddress()

	260 if ad:

	261 result += ad

	262 else:

	263 result.append(('', ''))

	264 return result

	265

	266 def getaddress(self):

	267 """Parse the next address."""

	268 self.commentlist = []

	269 self.gotonext()

	270

	271 oldpos = self.pos

	272 oldcl = self.commentlist

	273 plist = self.getphraselist()

	274

	275 self.gotonext()

	276 returnlist = []

	277

	278 if self.pos >= len(self.field):

	279 # Bad email address technically, no domain.

	280 if plist:

	281 returnlist = [(SPACE.join(self.commentlist), plist[0])]

	282

	283 elif self.field[self.pos] in '.@':

	284 # email address is just an addrspec

	285 # this isn't very efficient since we start over

	286 self.pos = oldpos

	287 self.commentlist = oldcl

	288 addrspec = self.getaddrspec()

	289 returnlist = [(SPACE.join(self.commentlist), addrspec)]

	290

	291 elif self.field[self.pos] == ':':

	292 # address is a group

	293 returnlist = []

	294

	295 fieldlen = len(self.field)

	296 self.pos += 1

	297 while self.pos < len(self.field):

	298 self.gotonext()

	299 if self.pos < fieldlen and self.field[self.pos] == ';':

	300 self.pos += 1

	301 break

	302 returnlist = returnlist + self.getaddress()

	303

	304 elif self.field[self.pos] == '<':

	305 # Address is a phrase then a route addr

	306 routeaddr = self.getrouteaddr()

	307

	308 if self.commentlist:

	309 returnlist = [(SPACE.join(plist) + ' (' +

	310 ' '.join(self.commentlist) + ')', routeaddr)]

	311 else:

	312 returnlist = [(SPACE.join(plist), routeaddr)]

	313

	314 else:

	315 if plist:

	316 returnlist = [(SPACE.join(self.commentlist), plist[0])]

	317 elif self.field[self.pos] in self.specials:

	318 self.pos += 1

	319

	320 self.gotonext()

	321 if self.pos < len(self.field) and self.field[self.pos] == ',':

	322 self.pos += 1

	323 return returnlist

	324

	325 def getrouteaddr(self):

	326 """Parse a route address (Return-path value).

	327

	328 This method just skips all the route stuff and returns the addrspec.

	329 """

	330 if self.field[self.pos] != '<':

	331 return

	332

	333 expectroute = False

	334 self.pos += 1

	335 self.gotonext()

	336 adlist = ''

	337 while self.pos < len(self.field):

	338 if expectroute:

	339 self.getdomain()

	340 expectroute = False

	341 elif self.field[self.pos] == '>':

	342 self.pos += 1

	343 break

	344 elif self.field[self.pos] == '@':

	345 self.pos += 1

	346 expectroute = True

	347 elif self.field[self.pos] == ':':

	348 self.pos += 1

	349 else:

	350 adlist = self.getaddrspec()

	351 self.pos += 1

	352 break

	353 self.gotonext()

	354

	355 return adlist

	356

	357 def getaddrspec(self):

	358 """Parse an RFC 2822 addr-spec."""

	359 aslist = []

	360

	361 self.gotonext()

	362 while self.pos < len(self.field):

	363 preserve_ws = True

	364 if self.field[self.pos] == '.':

	365 if aslist and not aslist[-1].strip():

	366 aslist.pop()

	367 aslist.append('.')

	368 self.pos += 1

	369 preserve_ws = False

	370 elif self.field[self.pos] == '"':

	371 aslist.append('"%s"' % quote(self.getquote()))

	372 elif self.field[self.pos] in self.atomends:

	373 if aslist and not aslist[-1].strip():

	374 aslist.pop()

	375 break

	376 else:

	377 aslist.append(self.getatom())

	378 ws = self.gotonext()

	379 if preserve_ws and ws:

	380 aslist.append(ws)

	381

	382 if self.pos >= len(self.field) or self.field[self.pos] != '@':

	383 return EMPTYSTRING.join(aslist)

	384

	385 aslist.append('@')

	386 self.pos += 1

	387 self.gotonext()

	388 return EMPTYSTRING.join(aslist) + self.getdomain()

	389

	390 def getdomain(self):

	391 """Get the complete domain name from an address."""

	392 sdlist = []

	393 while self.pos < len(self.field):

	394 if self.field[self.pos] in self.LWS:

	395 self.pos += 1

	396 elif self.field[self.pos] == '(':

	397 self.commentlist.append(self.getcomment())

	398 elif self.field[self.pos] == '[':

	399 sdlist.append(self.getdomainliteral())

	400 elif self.field[self.pos] == '.':

	401 self.pos += 1

	402 sdlist.append('.')

	403 elif self.field[self.pos] in self.atomends:

	404 break

	405 else:

	406 sdlist.append(self.getatom())

	407 return EMPTYSTRING.join(sdlist)

	408

	409 def getdelimited(self, beginchar, endchars, allowcomments=True):

	410 """Parse a header fragment delimited by special characters.

	411

	412 `beginchar' is the start character for the fragment.

	413 If self is not looking at an instance of `beginchar' then

	414 getdelimited returns the empty string.

	415

	416 `endchars' is a sequence of allowable end-delimiting characters.

	417 Parsing stops when one of these is encountered.

	418

	419 If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed

	420 within the parsed fragment.

	421 """

	422 if self.field[self.pos] != beginchar:

	423 return ''

	424

	425 slist = ['']

	426 quote = False

	427 self.pos += 1

	428 while self.pos < len(self.field):

	429 if quote:

	430 slist.append(self.field[self.pos])

	431 quote = False

	432 elif self.field[self.pos] in endchars:

	433 self.pos += 1

	434 break

	435 elif allowcomments and self.field[self.pos] == '(':

	436 slist.append(self.getcomment())

	437 continue # have already advanced pos from getcomment

	438 elif self.field[self.pos] == '\\':

	439 quote = True

	440 else:

	441 slist.append(self.field[self.pos])

	442 self.pos += 1

	443

	444 return EMPTYSTRING.join(slist)

	445

	446 def getquote(self):

	447 """Get a quote-delimited fragment from self's field."""

	448 return self.getdelimited('"', '"\r', False)

	449

	450 def getcomment(self):

	451 """Get a parenthesis-delimited fragment from self's field."""

	452 return self.getdelimited('(', ')\r', True)

	453

	454 def getdomainliteral(self):

	455 """Parse an RFC 2822 domain-literal."""

	456 return '[%s]' % self.getdelimited('[', ']\r', False)

	457

	458 def getatom(self, atomends=None):

	459 """Parse an RFC 2822 atom.

	460

	461 Optional atomends specifies a different set of end token delimiters

	462 (the default is to use self.atomends). This is used e.g. in

	463 getphraselist() since phrase endings must not include the `.' (which

	464 is legal in phrases)."""

	465 atomlist = ['']

	466 if atomends is None:

	467 atomends = self.atomends

	468

	469 while self.pos < len(self.field):

	470 if self.field[self.pos] in atomends:

	471 break

	472 else:

	473 atomlist.append(self.field[self.pos])

	474 self.pos += 1

	475

	476 return EMPTYSTRING.join(atomlist)

	477

	478 def getphraselist(self):

	479 """Parse a sequence of RFC 2822 phrases.

	480

	481 A phrase is a sequence of words, which are in turn either RFC 2822

	482 atoms or quoted-strings. Phrases are canonicalized by squeezing all

	483 runs of continuous whitespace into one space.

	484 """

	485 plist = []

	486

	487 while self.pos < len(self.field):

	488 if self.field[self.pos] in self.FWS:

	489 self.pos += 1

	490 elif self.field[self.pos] == '"':

	491 plist.append(self.getquote())

	492 elif self.field[self.pos] == '(':

	493 self.commentlist.append(self.getcomment())

	494 elif self.field[self.pos] in self.phraseends:

	495 break

	496 else:

	497 plist.append(self.getatom(self.phraseends))

	498

	499 return plist

	500

	501 class AddressList(AddrlistClass):

	502 """An AddressList encapsulates a list of parsed RFC 2822 addresses."""

	503 def __init__(self, field):

	504 AddrlistClass.__init__(self, field)

	505 if field:

	506 self.addresslist = self.getaddrlist()

	507 else:

	508 self.addresslist = []

	509

	510 def __len__(self):

	511 return len(self.addresslist)

	512

	513 def __add__(self, other):

	514 # Set union

	515 newaddr = AddressList(None)

	516 newaddr.addresslist = self.addresslist[:]

	517 for x in other.addresslist:

	518 if not x in self.addresslist:

	519 newaddr.addresslist.append(x)

	520 return newaddr

	521

	522 def __iadd__(self, other):

	523 # Set union, in-place

	524 for x in other.addresslist:

	525 if not x in self.addresslist:

	526 self.addresslist.append(x)

	527 return self

	528

	529 def __sub__(self, other):

	530 # Set difference

	531 newaddr = AddressList(None)

	532 for x in self.addresslist:

	533 if not x in other.addresslist:

	534 newaddr.addresslist.append(x)

	535 return newaddr

	536

	537 def __isub__(self, other):

	538 # Set difference, in-place

	539 for x in other.addresslist:

	540 if x in self.addresslist:

	541 self.addresslist.remove(x)

	542 return self

	543

	544 def __getitem__(self, index):

	545 # Make indexing, slices, and 'in' work

	546 return self.addresslist[index]

OLD	NEW