OLD | NEW |
(Empty) | |
| 1 from .core import encode, decode, alabel, ulabel, IDNAError |
| 2 import codecs |
| 3 import re |
| 4 |
| 5 _unicode_dots_re = re.compile(u'[\u002e\u3002\uff0e\uff61]') |
| 6 |
| 7 class Codec(codecs.Codec): |
| 8 |
| 9 def encode(self, data, errors='strict'): |
| 10 |
| 11 if errors != 'strict': |
| 12 raise IDNAError("Unsupported error handling \"{0}\"".format(errors)) |
| 13 |
| 14 if not data: |
| 15 return "", 0 |
| 16 |
| 17 return encode(data), len(data) |
| 18 |
| 19 def decode(self, data, errors='strict'): |
| 20 |
| 21 if errors != 'strict': |
| 22 raise IDNAError("Unsupported error handling \"{0}\"".format(errors)) |
| 23 |
| 24 if not data: |
| 25 return u"", 0 |
| 26 |
| 27 return decode(data), len(data) |
| 28 |
| 29 class IncrementalEncoder(codecs.BufferedIncrementalEncoder): |
| 30 def _buffer_encode(self, data, errors, final): |
| 31 if errors != 'strict': |
| 32 raise IDNAError("Unsupported error handling \"{0}\"".format(errors)) |
| 33 |
| 34 if not data: |
| 35 return ("", 0) |
| 36 |
| 37 labels = _unicode_dots_re.split(data) |
| 38 trailing_dot = u'' |
| 39 if labels: |
| 40 if not labels[-1]: |
| 41 trailing_dot = '.' |
| 42 del labels[-1] |
| 43 elif not final: |
| 44 # Keep potentially unfinished label until the next call |
| 45 del labels[-1] |
| 46 if labels: |
| 47 trailing_dot = '.' |
| 48 |
| 49 result = [] |
| 50 size = 0 |
| 51 for label in labels: |
| 52 result.append(alabel(label)) |
| 53 if size: |
| 54 size += 1 |
| 55 size += len(label) |
| 56 |
| 57 # Join with U+002E |
| 58 result = ".".join(result) + trailing_dot |
| 59 size += len(trailing_dot) |
| 60 return (result, size) |
| 61 |
| 62 class IncrementalDecoder(codecs.BufferedIncrementalDecoder): |
| 63 def _buffer_decode(self, data, errors, final): |
| 64 if errors != 'strict': |
| 65 raise IDNAError("Unsupported error handling \"{0}\"".format(errors)) |
| 66 |
| 67 if not data: |
| 68 return (u"", 0) |
| 69 |
| 70 # IDNA allows decoding to operate on Unicode strings, too. |
| 71 if isinstance(data, unicode): |
| 72 labels = _unicode_dots_re.split(data) |
| 73 else: |
| 74 # Must be ASCII string |
| 75 data = str(data) |
| 76 unicode(data, "ascii") |
| 77 labels = data.split(".") |
| 78 |
| 79 trailing_dot = u'' |
| 80 if labels: |
| 81 if not labels[-1]: |
| 82 trailing_dot = u'.' |
| 83 del labels[-1] |
| 84 elif not final: |
| 85 # Keep potentially unfinished label until the next call |
| 86 del labels[-1] |
| 87 if labels: |
| 88 trailing_dot = u'.' |
| 89 |
| 90 result = [] |
| 91 size = 0 |
| 92 for label in labels: |
| 93 result.append(ulabel(label)) |
| 94 if size: |
| 95 size += 1 |
| 96 size += len(label) |
| 97 |
| 98 result = u".".join(result) + trailing_dot |
| 99 size += len(trailing_dot) |
| 100 return (result, size) |
| 101 |
| 102 |
| 103 class StreamWriter(Codec, codecs.StreamWriter): |
| 104 pass |
| 105 |
| 106 class StreamReader(Codec, codecs.StreamReader): |
| 107 pass |
| 108 |
| 109 def getregentry(): |
| 110 return codecs.CodecInfo( |
| 111 name='idna', |
| 112 encode=Codec().encode, |
| 113 decode=Codec().decode, |
| 114 incrementalencoder=IncrementalEncoder, |
| 115 incrementaldecoder=IncrementalDecoder, |
| 116 streamwriter=StreamWriter, |
| 117 streamreader=StreamReader, |
| 118 ) |
OLD | NEW |