Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1934)

Side by Side Diff: third_party/google-endpoints/requests/packages/idna/core.py

Issue 2666783008: Add google-endpoints to third_party/. (Closed)
Patch Set: Created 3 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 from . import idnadata
2 import bisect
3 import unicodedata
4 import re
5 import sys
6 from .intranges import intranges_contain
7
8 _virama_combining_class = 9
9 _alabel_prefix = b'xn--'
10 _unicode_dots_re = re.compile(u'[\u002e\u3002\uff0e\uff61]')
11
12 if sys.version_info[0] == 3:
13 unicode = str
14 unichr = chr
15
16 class IDNAError(UnicodeError):
17 """ Base exception for all IDNA-encoding related problems """
18 pass
19
20
21 class IDNABidiError(IDNAError):
22 """ Exception when bidirectional requirements are not satisfied """
23 pass
24
25
26 class InvalidCodepoint(IDNAError):
27 """ Exception when a disallowed or unallocated codepoint is used """
28 pass
29
30
31 class InvalidCodepointContext(IDNAError):
32 """ Exception when the codepoint is not valid in the context it is used """
33 pass
34
35
36 def _combining_class(cp):
37 return unicodedata.combining(unichr(cp))
38
39 def _is_script(cp, script):
40 return intranges_contain(ord(cp), idnadata.scripts[script])
41
42 def _punycode(s):
43 return s.encode('punycode')
44
45 def _unot(s):
46 return 'U+{0:04X}'.format(s)
47
48
49 def valid_label_length(label):
50
51 if len(label) > 63:
52 return False
53 return True
54
55
56 def valid_string_length(label, trailing_dot):
57
58 if len(label) > (254 if trailing_dot else 253):
59 return False
60 return True
61
62
63 def check_bidi(label, check_ltr=False):
64
65 # Bidi rules should only be applied if string contains RTL characters
66 bidi_label = False
67 for (idx, cp) in enumerate(label, 1):
68 direction = unicodedata.bidirectional(cp)
69 if direction == '':
70 # String likely comes from a newer version of Unicode
71 raise IDNABidiError('Unknown directionality in label {0} at position {1}'.format(repr(label), idx))
72 if direction in ['R', 'AL', 'AN']:
73 bidi_label = True
74 break
75 if not bidi_label and not check_ltr:
76 return True
77
78 # Bidi rule 1
79 direction = unicodedata.bidirectional(label[0])
80 if direction in ['R', 'AL']:
81 rtl = True
82 elif direction == 'L':
83 rtl = False
84 else:
85 raise IDNABidiError('First codepoint in label {0} must be directionality L, R or AL'.format(repr(label)))
86
87 valid_ending = False
88 number_type = False
89 for (idx, cp) in enumerate(label, 1):
90 direction = unicodedata.bidirectional(cp)
91
92 if rtl:
93 # Bidi rule 2
94 if not direction in ['R', 'AL', 'AN', 'EN', 'ES', 'CS', 'ET', 'ON', 'BN', 'NSM']:
95 raise IDNABidiError('Invalid direction for codepoint at position {0} in a right-to-left label'.format(idx))
96 # Bidi rule 3
97 if direction in ['R', 'AL', 'EN', 'AN']:
98 valid_ending = True
99 elif direction != 'NSM':
100 valid_ending = False
101 # Bidi rule 4
102 if direction in ['AN', 'EN']:
103 if not number_type:
104 number_type = direction
105 else:
106 if number_type != direction:
107 raise IDNABidiError('Can not mix numeral types in a righ t-to-left label')
108 else:
109 # Bidi rule 5
110 if not direction in ['L', 'EN', 'ES', 'CS', 'ET', 'ON', 'BN', 'NSM'] :
111 raise IDNABidiError('Invalid direction for codepoint at position {0} in a left-to-right label'.format(idx))
112 # Bidi rule 6
113 if direction in ['L', 'EN']:
114 valid_ending = True
115 elif direction != 'NSM':
116 valid_ending = False
117
118 if not valid_ending:
119 raise IDNABidiError('Label ends with illegal codepoint directionality')
120
121 return True
122
123
124 def check_initial_combiner(label):
125
126 if unicodedata.category(label[0])[0] == 'M':
127 raise IDNAError('Label begins with an illegal combining character')
128 return True
129
130
131 def check_hyphen_ok(label):
132
133 if label[2:4] == '--':
134 raise IDNAError('Label has disallowed hyphens in 3rd and 4th position')
135 if label[0] == '-' or label[-1] == '-':
136 raise IDNAError('Label must not start or end with a hyphen')
137 return True
138
139
140 def check_nfc(label):
141
142 if unicodedata.normalize('NFC', label) != label:
143 raise IDNAError('Label must be in Normalization Form C')
144
145
146 def valid_contextj(label, pos):
147
148 cp_value = ord(label[pos])
149
150 if cp_value == 0x200c:
151
152 if pos > 0:
153 if _combining_class(ord(label[pos - 1])) == _virama_combining_class:
154 return True
155
156 ok = False
157 for i in range(pos-1, -1, -1):
158 joining_type = idnadata.joining_types.get(ord(label[i]))
159 if joining_type == 'T':
160 continue
161 if joining_type in ['L', 'D']:
162 ok = True
163 break
164
165 if not ok:
166 return False
167
168 ok = False
169 for i in range(pos+1, len(label)):
170 joining_type = idnadata.joining_types.get(ord(label[i]))
171 if joining_type == 'T':
172 continue
173 if joining_type in ['R', 'D']:
174 ok = True
175 break
176 return ok
177
178 if cp_value == 0x200d:
179
180 if pos > 0:
181 if _combining_class(ord(label[pos - 1])) == _virama_combining_class:
182 return True
183 return False
184
185 else:
186
187 return False
188
189
190 def valid_contexto(label, pos, exception=False):
191
192 cp_value = ord(label[pos])
193
194 if cp_value == 0x00b7:
195 if 0 < pos < len(label)-1:
196 if ord(label[pos - 1]) == 0x006c and ord(label[pos + 1]) == 0x006c:
197 return True
198 return False
199
200 elif cp_value == 0x0375:
201 if pos < len(label)-1 and len(label) > 1:
202 return _is_script(label[pos + 1], 'Greek')
203 return False
204
205 elif cp_value == 0x05f3 or cp_value == 0x05f4:
206 if pos > 0:
207 return _is_script(label[pos - 1], 'Hebrew')
208 return False
209
210 elif cp_value == 0x30fb:
211 for cp in label:
212 if cp == u'\u30fb':
213 continue
214 if not _is_script(cp, 'Hiragana') and not _is_script(cp, 'Katakana') and not _is_script(cp, 'Han'):
215 return False
216 return True
217
218 elif 0x660 <= cp_value <= 0x669:
219 for cp in label:
220 if 0x6f0 <= ord(cp) <= 0x06f9:
221 return False
222 return True
223
224 elif 0x6f0 <= cp_value <= 0x6f9:
225 for cp in label:
226 if 0x660 <= ord(cp) <= 0x0669:
227 return False
228 return True
229
230
231 def check_label(label):
232
233 if isinstance(label, (bytes, bytearray)):
234 label = label.decode('utf-8')
235 if len(label) == 0:
236 raise IDNAError('Empty Label')
237
238 check_nfc(label)
239 check_hyphen_ok(label)
240 check_initial_combiner(label)
241
242 for (pos, cp) in enumerate(label):
243 cp_value = ord(cp)
244 if intranges_contain(cp_value, idnadata.codepoint_classes['PVALID']):
245 continue
246 elif intranges_contain(cp_value, idnadata.codepoint_classes['CONTEXTJ']) :
247 if not valid_contextj(label, pos):
248 raise InvalidCodepointContext('Joiner {0} not allowed at positio n {1} in {2}'.format(_unot(cp_value), pos+1, repr(label)))
249 elif intranges_contain(cp_value, idnadata.codepoint_classes['CONTEXTO']) :
250 if not valid_contexto(label, pos):
251 raise InvalidCodepointContext('Codepoint {0} not allowed at posi tion {1} in {2}'.format(_unot(cp_value), pos+1, repr(label)))
252 else:
253 raise InvalidCodepoint('Codepoint {0} at position {1} of {2} not all owed'.format(_unot(cp_value), pos+1, repr(label)))
254
255 check_bidi(label)
256
257
258 def alabel(label):
259
260 try:
261 label = label.encode('ascii')
262 try:
263 ulabel(label)
264 except:
265 raise IDNAError('The label {0} is not a valid A-label'.format(label) )
266 if not valid_label_length(label):
267 raise IDNAError('Label too long')
268 return label
269 except UnicodeError:
270 pass
271
272 if not label:
273 raise IDNAError('No Input')
274
275 label = unicode(label)
276 check_label(label)
277 label = _punycode(label)
278 label = _alabel_prefix + label
279
280 if not valid_label_length(label):
281 raise IDNAError('Label too long')
282
283 return label
284
285
286 def ulabel(label):
287
288 if not isinstance(label, (bytes, bytearray)):
289 try:
290 label = label.encode('ascii')
291 except UnicodeError:
292 check_label(label)
293 return label
294
295 label = label.lower()
296 if label.startswith(_alabel_prefix):
297 label = label[len(_alabel_prefix):]
298 else:
299 check_label(label)
300 return label.decode('ascii')
301
302 label = label.decode('punycode')
303 check_label(label)
304 return label
305
306
307 def uts46_remap(domain, std3_rules=True, transitional=False):
308 """Re-map the characters in the string according to UTS46 processing."""
309 from .uts46data import uts46data
310 output = u""
311 try:
312 for pos, char in enumerate(domain):
313 code_point = ord(char)
314 uts46row = uts46data[code_point if code_point < 256 else
315 bisect.bisect_left(uts46data, (code_point, "Z")) - 1]
316 status = uts46row[1]
317 replacement = uts46row[2] if len(uts46row) == 3 else None
318 if (status == "V" or
319 (status == "D" and not transitional) or
320 (status == "3" and std3_rules and replacement is None)):
321 output += char
322 elif replacement is not None and (status == "M" or
323 (status == "3" and std3_rules) or
324 (status == "D" and transitional)):
325 output += replacement
326 elif status != "I":
327 raise IndexError()
328 return unicodedata.normalize("NFC", output)
329 except IndexError:
330 raise InvalidCodepoint(
331 "Codepoint {0} not allowed at position {1} in {2}".format(
332 _unot(code_point), pos + 1, repr(domain)))
333
334
335 def encode(s, strict=False, uts46=False, std3_rules=False, transitional=False):
336
337 if isinstance(s, (bytes, bytearray)):
338 s = s.decode("ascii")
339 if uts46:
340 s = uts46_remap(s, std3_rules, transitional)
341 trailing_dot = False
342 result = []
343 if strict:
344 labels = s.split('.')
345 else:
346 labels = _unicode_dots_re.split(s)
347 while labels and not labels[0]:
348 del labels[0]
349 if not labels:
350 raise IDNAError('Empty domain')
351 if labels[-1] == '':
352 del labels[-1]
353 trailing_dot = True
354 for label in labels:
355 result.append(alabel(label))
356 if trailing_dot:
357 result.append(b'')
358 s = b'.'.join(result)
359 if not valid_string_length(s, trailing_dot):
360 raise IDNAError('Domain too long')
361 return s
362
363
364 def decode(s, strict=False, uts46=False, std3_rules=False):
365
366 if isinstance(s, (bytes, bytearray)):
367 s = s.decode("ascii")
368 if uts46:
369 s = uts46_remap(s, std3_rules, False)
370 trailing_dot = False
371 result = []
372 if not strict:
373 labels = _unicode_dots_re.split(s)
374 else:
375 labels = s.split(u'.')
376 while labels and not labels[0]:
377 del labels[0]
378 if not labels:
379 raise IDNAError('Empty domain')
380 if not labels[-1]:
381 del labels[-1]
382 trailing_dot = True
383 for label in labels:
384 result.append(ulabel(label))
385 if trailing_dot:
386 result.append(u'')
387 return u'.'.join(result)
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698