third_party/google-endpoints/requests/packages/idna/core.py - Issue 2666783008: Add google-endpoints to third_party/.

Side by Side Diff: third_party/google-endpoints/requests/packages/idna/core.py

Issue 2666783008: Add google-endpoints to third_party/. (Closed)

Patch Set: Created 3 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« no previous file with comments | « third_party/google-endpoints/requests/packages/idna/compat.py ('k') | third_party/google-endpoints/requests/packages/idna/idnadata.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
(Empty)
	1 from . import idnadata

	2 import bisect

	3 import unicodedata

	4 import re

	5 import sys

	6 from .intranges import intranges_contain

	7

	8 _virama_combining_class = 9

	9 _alabel_prefix = b'xn--'

	10 _unicode_dots_re = re.compile(u'[\u002e\u3002\uff0e\uff61]')

	11

	12 if sys.version_info[0] == 3:

	13 unicode = str

	14 unichr = chr

	15

	16 class IDNAError(UnicodeError):

	17 """ Base exception for all IDNA-encoding related problems """

	18 pass

	19

	20

	21 class IDNABidiError(IDNAError):

	22 """ Exception when bidirectional requirements are not satisfied """

	23 pass

	24

	25

	26 class InvalidCodepoint(IDNAError):

	27 """ Exception when a disallowed or unallocated codepoint is used """

	28 pass

	29

	30

	31 class InvalidCodepointContext(IDNAError):

	32 """ Exception when the codepoint is not valid in the context it is used """

	33 pass

	34

	35

	36 def _combining_class(cp):

	37 return unicodedata.combining(unichr(cp))

	38

	39 def _is_script(cp, script):

	40 return intranges_contain(ord(cp), idnadata.scripts[script])

	41

	42 def _punycode(s):

	43 return s.encode('punycode')

	44

	45 def _unot(s):

	46 return 'U+{0:04X}'.format(s)

	47

	48

	49 def valid_label_length(label):

	50

	51 if len(label) > 63:

	52 return False

	53 return True

	54

	55

	56 def valid_string_length(label, trailing_dot):

	57

	58 if len(label) > (254 if trailing_dot else 253):

	59 return False

	60 return True

	61

	62

	63 def check_bidi(label, check_ltr=False):

	64

	65 # Bidi rules should only be applied if string contains RTL characters

	66 bidi_label = False

	67 for (idx, cp) in enumerate(label, 1):

	68 direction = unicodedata.bidirectional(cp)

	69 if direction == '':

	70 # String likely comes from a newer version of Unicode

	71 raise IDNABidiError('Unknown directionality in label {0} at position {1}'.format(repr(label), idx))

	72 if direction in ['R', 'AL', 'AN']:

	73 bidi_label = True

	74 break

	75 if not bidi_label and not check_ltr:

	76 return True

	77

	78 # Bidi rule 1

	79 direction = unicodedata.bidirectional(label[0])

	80 if direction in ['R', 'AL']:

	81 rtl = True

	82 elif direction == 'L':

	83 rtl = False

	84 else:

	85 raise IDNABidiError('First codepoint in label {0} must be directionality L, R or AL'.format(repr(label)))

	86

	87 valid_ending = False

	88 number_type = False

	89 for (idx, cp) in enumerate(label, 1):

	90 direction = unicodedata.bidirectional(cp)

	91

	92 if rtl:

	93 # Bidi rule 2

	94 if not direction in ['R', 'AL', 'AN', 'EN', 'ES', 'CS', 'ET', 'ON', 'BN', 'NSM']:

	95 raise IDNABidiError('Invalid direction for codepoint at position {0} in a right-to-left label'.format(idx))

	96 # Bidi rule 3

	97 if direction in ['R', 'AL', 'EN', 'AN']:

	98 valid_ending = True

	99 elif direction != 'NSM':

	100 valid_ending = False

	101 # Bidi rule 4

	102 if direction in ['AN', 'EN']:

	103 if not number_type:

	104 number_type = direction

	105 else:

	106 if number_type != direction:

	107 raise IDNABidiError('Can not mix numeral types in a righ t-to-left label')

	108 else:

	109 # Bidi rule 5

	110 if not direction in ['L', 'EN', 'ES', 'CS', 'ET', 'ON', 'BN', 'NSM'] :

	111 raise IDNABidiError('Invalid direction for codepoint at position {0} in a left-to-right label'.format(idx))

	112 # Bidi rule 6

	113 if direction in ['L', 'EN']:

	114 valid_ending = True

	115 elif direction != 'NSM':

	116 valid_ending = False

	117

	118 if not valid_ending:

	119 raise IDNABidiError('Label ends with illegal codepoint directionality')

	120

	121 return True

	122

	123

	124 def check_initial_combiner(label):

	125

	126 if unicodedata.category(label[0])[0] == 'M':

	127 raise IDNAError('Label begins with an illegal combining character')

	128 return True

	129

	130

	131 def check_hyphen_ok(label):

	132

	133 if label[2:4] == '--':

	134 raise IDNAError('Label has disallowed hyphens in 3rd and 4th position')

	135 if label[0] == '-' or label[-1] == '-':

	136 raise IDNAError('Label must not start or end with a hyphen')

	137 return True

	138

	139

	140 def check_nfc(label):

	141

	142 if unicodedata.normalize('NFC', label) != label:

	143 raise IDNAError('Label must be in Normalization Form C')

	144

	145

	146 def valid_contextj(label, pos):

	147

	148 cp_value = ord(label[pos])

	149

	150 if cp_value == 0x200c:

	151

	152 if pos > 0:

	153 if _combining_class(ord(label[pos - 1])) == _virama_combining_class:

	154 return True

	155

	156 ok = False

	157 for i in range(pos-1, -1, -1):

	158 joining_type = idnadata.joining_types.get(ord(label[i]))

	159 if joining_type == 'T':

	160 continue

	161 if joining_type in ['L', 'D']:

	162 ok = True

	163 break

	164

	165 if not ok:

	166 return False

	167

	168 ok = False

	169 for i in range(pos+1, len(label)):

	170 joining_type = idnadata.joining_types.get(ord(label[i]))

	171 if joining_type == 'T':

	172 continue

	173 if joining_type in ['R', 'D']:

	174 ok = True

	175 break

	176 return ok

	177

	178 if cp_value == 0x200d:

	179

	180 if pos > 0:

	181 if _combining_class(ord(label[pos - 1])) == _virama_combining_class:

	182 return True

	183 return False

	184

	185 else:

	186

	187 return False

	188

	189

	190 def valid_contexto(label, pos, exception=False):

	191

	192 cp_value = ord(label[pos])

	193

	194 if cp_value == 0x00b7:

	195 if 0 < pos < len(label)-1:

	196 if ord(label[pos - 1]) == 0x006c and ord(label[pos + 1]) == 0x006c:

	197 return True

	198 return False

	199

	200 elif cp_value == 0x0375:

	201 if pos < len(label)-1 and len(label) > 1:

	202 return _is_script(label[pos + 1], 'Greek')

	203 return False

	204

	205 elif cp_value == 0x05f3 or cp_value == 0x05f4:

	206 if pos > 0:

	207 return _is_script(label[pos - 1], 'Hebrew')

	208 return False

	209

	210 elif cp_value == 0x30fb:

	211 for cp in label:

	212 if cp == u'\u30fb':

	213 continue

	214 if not _is_script(cp, 'Hiragana') and not _is_script(cp, 'Katakana') and not _is_script(cp, 'Han'):

	215 return False

	216 return True

	217

	218 elif 0x660 <= cp_value <= 0x669:

	219 for cp in label:

	220 if 0x6f0 <= ord(cp) <= 0x06f9:

	221 return False

	222 return True

	223

	224 elif 0x6f0 <= cp_value <= 0x6f9:

	225 for cp in label:

	226 if 0x660 <= ord(cp) <= 0x0669:

	227 return False

	228 return True

	229

	230

	231 def check_label(label):

	232

	233 if isinstance(label, (bytes, bytearray)):

	234 label = label.decode('utf-8')

	235 if len(label) == 0:

	236 raise IDNAError('Empty Label')

	237

	238 check_nfc(label)

	239 check_hyphen_ok(label)

	240 check_initial_combiner(label)

	241

	242 for (pos, cp) in enumerate(label):

	243 cp_value = ord(cp)

	244 if intranges_contain(cp_value, idnadata.codepoint_classes['PVALID']):

	245 continue

	246 elif intranges_contain(cp_value, idnadata.codepoint_classes['CONTEXTJ']) :

	247 if not valid_contextj(label, pos):

	248 raise InvalidCodepointContext('Joiner {0} not allowed at positio n {1} in {2}'.format(_unot(cp_value), pos+1, repr(label)))

	249 elif intranges_contain(cp_value, idnadata.codepoint_classes['CONTEXTO']) :

	250 if not valid_contexto(label, pos):

	251 raise InvalidCodepointContext('Codepoint {0} not allowed at posi tion {1} in {2}'.format(_unot(cp_value), pos+1, repr(label)))

	252 else:

	253 raise InvalidCodepoint('Codepoint {0} at position {1} of {2} not all owed'.format(_unot(cp_value), pos+1, repr(label)))

	254

	255 check_bidi(label)

	256

	257

	258 def alabel(label):

	259

	260 try:

	261 label = label.encode('ascii')

	262 try:

	263 ulabel(label)

	264 except:

	265 raise IDNAError('The label {0} is not a valid A-label'.format(label) )

	266 if not valid_label_length(label):

	267 raise IDNAError('Label too long')

	268 return label

	269 except UnicodeError:

	270 pass

	271

	272 if not label:

	273 raise IDNAError('No Input')

	274

	275 label = unicode(label)

	276 check_label(label)

	277 label = _punycode(label)

	278 label = _alabel_prefix + label

	279

	280 if not valid_label_length(label):

	281 raise IDNAError('Label too long')

	282

	283 return label

	284

	285

	286 def ulabel(label):

	287

	288 if not isinstance(label, (bytes, bytearray)):

	289 try:

	290 label = label.encode('ascii')

	291 except UnicodeError:

	292 check_label(label)

	293 return label

	294

	295 label = label.lower()

	296 if label.startswith(_alabel_prefix):

	297 label = label[len(_alabel_prefix):]

	298 else:

	299 check_label(label)

	300 return label.decode('ascii')

	301

	302 label = label.decode('punycode')

	303 check_label(label)

	304 return label

	305

	306

	307 def uts46_remap(domain, std3_rules=True, transitional=False):

	308 """Re-map the characters in the string according to UTS46 processing."""

	309 from .uts46data import uts46data

	310 output = u""

	311 try:

	312 for pos, char in enumerate(domain):

	313 code_point = ord(char)

	314 uts46row = uts46data[code_point if code_point < 256 else

	315 bisect.bisect_left(uts46data, (code_point, "Z")) - 1]

	316 status = uts46row[1]

	317 replacement = uts46row[2] if len(uts46row) == 3 else None

	318 if (status == "V" or

	319 (status == "D" and not transitional) or

	320 (status == "3" and std3_rules and replacement is None)):

	321 output += char

	322 elif replacement is not None and (status == "M" or

	323 (status == "3" and std3_rules) or

	324 (status == "D" and transitional)):

	325 output += replacement

	326 elif status != "I":

	327 raise IndexError()

	328 return unicodedata.normalize("NFC", output)

	329 except IndexError:

	330 raise InvalidCodepoint(

	331 "Codepoint {0} not allowed at position {1} in {2}".format(

	332 _unot(code_point), pos + 1, repr(domain)))

	333

	334

	335 def encode(s, strict=False, uts46=False, std3_rules=False, transitional=False):

	336

	337 if isinstance(s, (bytes, bytearray)):

	338 s = s.decode("ascii")

	339 if uts46:

	340 s = uts46_remap(s, std3_rules, transitional)

	341 trailing_dot = False

	342 result = []

	343 if strict:

	344 labels = s.split('.')

	345 else:

	346 labels = _unicode_dots_re.split(s)

	347 while labels and not labels[0]:

	348 del labels[0]

	349 if not labels:

	350 raise IDNAError('Empty domain')

	351 if labels[-1] == '':

	352 del labels[-1]

	353 trailing_dot = True

	354 for label in labels:

	355 result.append(alabel(label))

	356 if trailing_dot:

	357 result.append(b'')

	358 s = b'.'.join(result)

	359 if not valid_string_length(s, trailing_dot):

	360 raise IDNAError('Domain too long')

	361 return s

	362

	363

	364 def decode(s, strict=False, uts46=False, std3_rules=False):

	365

	366 if isinstance(s, (bytes, bytearray)):

	367 s = s.decode("ascii")

	368 if uts46:

	369 s = uts46_remap(s, std3_rules, False)

	370 trailing_dot = False

	371 result = []

	372 if not strict:

	373 labels = _unicode_dots_re.split(s)

	374 else:

	375 labels = s.split(u'.')

	376 while labels and not labels[0]:

	377 del labels[0]

	378 if not labels:

	379 raise IDNAError('Empty domain')

	380 if not labels[-1]:

	381 del labels[-1]

	382 trailing_dot = True

	383 for label in labels:

	384 result.append(ulabel(label))

	385 if trailing_dot:

	386 result.append(u'')

	387 return u'.'.join(result)

OLD	NEW