OLD | NEW |
| (Empty) |
1 # -*- test-case-name: twisted.words.test.test_jabberxmppstringprep -*- | |
2 # | |
3 # Copyright (c) 2001-2005 Twisted Matrix Laboratories. | |
4 # See LICENSE for details. | |
5 | |
6 import sys, warnings | |
7 from zope.interface import Interface, implements | |
8 | |
9 if sys.version_info < (2,3,2): | |
10 import re | |
11 | |
12 class IDNA: | |
13 dots = re.compile(u"[\u002E\u3002\uFF0E\uFF61]") | |
14 def nameprep(self, label): | |
15 return label.lower() | |
16 | |
17 idna = IDNA() | |
18 | |
19 crippled = True | |
20 | |
21 warnings.warn("Accented and non-Western Jabber IDs will not be properly " | |
22 "case-folded with this version of Python, resulting in " | |
23 "incorrect protocol-level behavior. It is strongly " | |
24 "recommended you upgrade to Python 2.3.2 or newer if you " | |
25 "intend to use Twisted's Jabber support.") | |
26 | |
27 else: | |
28 import stringprep | |
29 import unicodedata | |
30 from encodings import idna | |
31 | |
32 crippled = False | |
33 | |
34 del sys, warnings | |
35 | |
36 class ILookupTable(Interface): | |
37 """ Interface for character lookup classes. """ | |
38 | |
39 def lookup(c): | |
40 """ Return whether character is in this table. """ | |
41 | |
42 class IMappingTable(Interface): | |
43 """ Interface for character mapping classes. """ | |
44 | |
45 def map(c): | |
46 """ Return mapping for character. """ | |
47 | |
48 class LookupTableFromFunction: | |
49 | |
50 implements(ILookupTable) | |
51 | |
52 def __init__(self, in_table_function): | |
53 self.lookup = in_table_function | |
54 | |
55 class LookupTable: | |
56 | |
57 implements(ILookupTable) | |
58 | |
59 def __init__(self, table): | |
60 self._table = table | |
61 | |
62 def lookup(self, c): | |
63 return c in self._table | |
64 | |
65 class MappingTableFromFunction: | |
66 | |
67 implements(IMappingTable) | |
68 | |
69 def __init__(self, map_table_function): | |
70 self.map = map_table_function | |
71 | |
72 class EmptyMappingTable: | |
73 | |
74 implements(IMappingTable) | |
75 | |
76 def __init__(self, in_table_function): | |
77 self._in_table_function = in_table_function | |
78 | |
79 def map(self, c): | |
80 if self._in_table_function(c): | |
81 return None | |
82 else: | |
83 return c | |
84 | |
85 class Profile: | |
86 def __init__(self, mappings=[], normalize=True, prohibiteds=[], | |
87 check_unassigneds=True, check_bidi=True): | |
88 self.mappings = mappings | |
89 self.normalize = normalize | |
90 self.prohibiteds = prohibiteds | |
91 self.do_check_unassigneds = check_unassigneds | |
92 self.do_check_bidi = check_bidi | |
93 | |
94 def prepare(self, string): | |
95 result = self.map(string) | |
96 if self.normalize: | |
97 result = unicodedata.normalize("NFKC", result) | |
98 self.check_prohibiteds(result) | |
99 if self.do_check_unassigneds: | |
100 self.check_unassigneds(result) | |
101 if self.do_check_bidi: | |
102 self.check_bidirectionals(result) | |
103 return result | |
104 | |
105 def map(self, string): | |
106 result = [] | |
107 | |
108 for c in string: | |
109 result_c = c | |
110 | |
111 for mapping in self.mappings: | |
112 result_c = mapping.map(c) | |
113 if result_c != c: | |
114 break | |
115 | |
116 if result_c is not None: | |
117 result.append(result_c) | |
118 | |
119 return u"".join(result) | |
120 | |
121 def check_prohibiteds(self, string): | |
122 for c in string: | |
123 for table in self.prohibiteds: | |
124 if table.lookup(c): | |
125 raise UnicodeError, "Invalid character %s" % repr(c) | |
126 | |
127 def check_unassigneds(self, string): | |
128 for c in string: | |
129 if stringprep.in_table_a1(c): | |
130 raise UnicodeError, "Unassigned code point %s" % repr(c) | |
131 | |
132 def check_bidirectionals(self, string): | |
133 found_LCat = False | |
134 found_RandALCat = False | |
135 | |
136 for c in string: | |
137 if stringprep.in_table_d1(c): | |
138 found_RandALCat = True | |
139 if stringprep.in_table_d2(c): | |
140 found_LCat = True | |
141 | |
142 if found_LCat and found_RandALCat: | |
143 raise UnicodeError, "Violation of BIDI Requirement 2" | |
144 | |
145 if found_RandALCat and not (stringprep.in_table_d1(string[0]) and | |
146 stringprep.in_table_d1(string[-1])): | |
147 raise UnicodeError, "Violation of BIDI Requirement 3" | |
148 | |
149 | |
150 class NamePrep: | |
151 """ Implements preparation of internationalized domain names. | |
152 | |
153 This class implements preparing internationalized domain names using the | |
154 rules defined in RFC 3491, section 4 (Conversion operations). | |
155 | |
156 We do not perform step 4 since we deal with unicode representations of | |
157 domain names and do not convert from or to ASCII representations using | |
158 punycode encoding. When such a conversion is needed, the L{idna} standard | |
159 library provides the C{ToUnicode()} and C{ToASCII()} functions. Note that | |
160 L{idna} itself assumes UseSTD3ASCIIRules to be false. | |
161 | |
162 The following steps are performed by C{prepare()}: | |
163 | |
164 - Split the domain name in labels at the dots (RFC 3490, 3.1) | |
165 - Apply nameprep proper on each label (RFC 3491) | |
166 - Enforce the restrictions on ASCII characters in host names by | |
167 assuming STD3ASCIIRules to be true. (STD 3) | |
168 - Rejoin the labels using the label separator U+002E (full stop). | |
169 | |
170 """ | |
171 | |
172 # Prohibited characters. | |
173 prohibiteds = [unichr(n) for n in range(0x00, 0x2c + 1) + | |
174 range(0x2e, 0x2f + 1) + | |
175 range(0x3a, 0x40 + 1) + | |
176 range(0x5b, 0x60 + 1) + | |
177 range(0x7b, 0x7f + 1) ] | |
178 | |
179 def prepare(self, string): | |
180 result = [] | |
181 | |
182 labels = idna.dots.split(string) | |
183 | |
184 if labels and len(labels[-1]) == 0: | |
185 trailing_dot = '.' | |
186 del labels[-1] | |
187 else: | |
188 trailing_dot = '' | |
189 | |
190 for label in labels: | |
191 result.append(self.nameprep(label)) | |
192 | |
193 return ".".join(result) + trailing_dot | |
194 | |
195 def check_prohibiteds(self, string): | |
196 for c in string: | |
197 if c in self.prohibiteds: | |
198 raise UnicodeError, "Invalid character %s" % repr(c) | |
199 | |
200 def nameprep(self, label): | |
201 label = idna.nameprep(label) | |
202 self.check_prohibiteds(label) | |
203 if label[0] == '-': | |
204 raise UnicodeError, "Invalid leading hyphen-minus" | |
205 if label[-1] == '-': | |
206 raise UnicodeError, "Invalid trailing hyphen-minus" | |
207 return label | |
208 | |
209 if crippled: | |
210 case_map = MappingTableFromFunction(lambda c: c.lower()) | |
211 nodeprep = Profile(mappings=[case_map], | |
212 normalize=False, | |
213 prohibiteds=[LookupTable([u' ', u'"', u'&', u"'", u'/', | |
214 u':', u'<', u'>', u'@'])], | |
215 check_unassigneds=False, | |
216 check_bidi=False) | |
217 | |
218 resourceprep = Profile(normalize=False, | |
219 check_unassigneds=False, | |
220 check_bidi=False) | |
221 | |
222 else: | |
223 C_11 = LookupTableFromFunction(stringprep.in_table_c11) | |
224 C_12 = LookupTableFromFunction(stringprep.in_table_c12) | |
225 C_21 = LookupTableFromFunction(stringprep.in_table_c21) | |
226 C_22 = LookupTableFromFunction(stringprep.in_table_c22) | |
227 C_3 = LookupTableFromFunction(stringprep.in_table_c3) | |
228 C_4 = LookupTableFromFunction(stringprep.in_table_c4) | |
229 C_5 = LookupTableFromFunction(stringprep.in_table_c5) | |
230 C_6 = LookupTableFromFunction(stringprep.in_table_c6) | |
231 C_7 = LookupTableFromFunction(stringprep.in_table_c7) | |
232 C_8 = LookupTableFromFunction(stringprep.in_table_c8) | |
233 C_9 = LookupTableFromFunction(stringprep.in_table_c9) | |
234 | |
235 B_1 = EmptyMappingTable(stringprep.in_table_b1) | |
236 B_2 = MappingTableFromFunction(stringprep.map_table_b2) | |
237 | |
238 nodeprep = Profile(mappings=[B_1, B_2], | |
239 prohibiteds=[C_11, C_12, C_21, C_22, | |
240 C_3, C_4, C_5, C_6, C_7, C_8, C_9, | |
241 LookupTable([u'"', u'&', u"'", u'/', | |
242 u':', u'<', u'>', u'@'])]) | |
243 | |
244 resourceprep = Profile(mappings=[B_1,], | |
245 prohibiteds=[C_12, C_21, C_22, | |
246 C_3, C_4, C_5, C_6, C_7, C_8, C_9]) | |
247 | |
248 nameprep = NamePrep() | |
OLD | NEW |