OLD | NEW |
| (Empty) |
1 /* | |
2 ******************************************************************************* | |
3 * | |
4 * Copyright (C) 2003-2014, International Business Machines | |
5 * Corporation and others. All Rights Reserved. | |
6 * | |
7 ******************************************************************************* | |
8 * file name: nptrans.h | |
9 * encoding: US-ASCII | |
10 * tab size: 8 (not used) | |
11 * indentation:4 | |
12 * | |
13 * created on: 2003feb1 | |
14 * created by: Ram Viswanadha | |
15 */ | |
16 | |
17 #include "unicode/utypes.h" | |
18 | |
19 #if !UCONFIG_NO_TRANSLITERATION | |
20 #if !UCONFIG_NO_IDNA | |
21 | |
22 #include "nptrans.h" | |
23 #include "unicode/resbund.h" | |
24 #include "unicode/uniset.h" | |
25 #include "sprpimpl.h" | |
26 #include "cmemory.h" | |
27 #include "ustr_imp.h" | |
28 #include "intltest.h" | |
29 | |
30 #ifdef NPTRANS_DEBUG | |
31 #include <stdio.h> | |
32 #endif | |
33 | |
34 const char NamePrepTransform::fgClassID=0; | |
35 | |
36 //Factory method | |
37 NamePrepTransform* NamePrepTransform::createInstance(UParseError& parseError, UE
rrorCode& status){ | |
38 NamePrepTransform* transform = new NamePrepTransform(parseError, status); | |
39 if(U_FAILURE(status)){ | |
40 delete transform; | |
41 return NULL; | |
42 } | |
43 return transform; | |
44 } | |
45 | |
46 //constructor | |
47 NamePrepTransform::NamePrepTransform(UParseError& parseError, UErrorCode& status
) | |
48 : unassigned(), prohibited(), labelSeparatorSet(){ | |
49 | |
50 mapping = NULL; | |
51 bundle = NULL; | |
52 | |
53 | |
54 const char* testDataName = IntlTest::loadTestData(status); | |
55 | |
56 if(U_FAILURE(status)){ | |
57 return; | |
58 } | |
59 | |
60 bundle = ures_openDirect(testDataName,"idna_rules",&status); | |
61 | |
62 if(bundle != NULL && U_SUCCESS(status)){ | |
63 // create the mapping transliterator | |
64 int32_t ruleLen = 0; | |
65 const UChar* ruleUChar = ures_getStringByKey(bundle, "MapNFKC",&ruleLen,
&status); | |
66 int32_t mapRuleLen = 0; | |
67 const UChar *mapRuleUChar = ures_getStringByKey(bundle, "MapNoNormalizat
ion", &mapRuleLen, &status); | |
68 UnicodeString rule(mapRuleUChar, mapRuleLen); | |
69 rule.append(ruleUChar, ruleLen); | |
70 | |
71 mapping = Transliterator::createFromRules(UnicodeString("NamePrepTransfo
rm", ""), rule, | |
72 UTRANS_FORWARD, parseError,st
atus); | |
73 if(U_FAILURE(status)) { | |
74 return; | |
75 } | |
76 | |
77 //create the unassigned set | |
78 int32_t patternLen =0; | |
79 const UChar* pattern = ures_getStringByKey(bundle,"UnassignedSet",&patte
rnLen, &status); | |
80 unassigned.applyPattern(UnicodeString(pattern, patternLen), status); | |
81 | |
82 //create prohibited set | |
83 patternLen=0; | |
84 pattern = ures_getStringByKey(bundle,"ProhibitedSet",&patternLen, &stat
us); | |
85 UnicodeString test(pattern,patternLen); | |
86 prohibited.applyPattern(test,status); | |
87 #ifdef NPTRANS_DEBUG | |
88 if(U_FAILURE(status)){ | |
89 printf("Construction of Unicode set failed\n"); | |
90 } | |
91 | |
92 if(U_SUCCESS(status)){ | |
93 if(prohibited.contains((UChar) 0x644)){ | |
94 printf("The string contains 0x644 ... !!\n"); | |
95 } | |
96 UnicodeString temp; | |
97 prohibited.toPattern(temp,TRUE); | |
98 | |
99 for(int32_t i=0;i<temp.length();i++){ | |
100 printf("%c", (char)temp.charAt(i)); | |
101 } | |
102 printf("\n"); | |
103 } | |
104 #endif | |
105 | |
106 //create label separator set | |
107 patternLen=0; | |
108 pattern = ures_getStringByKey(bundle,"LabelSeparatorSet",&patternLen, &
status); | |
109 labelSeparatorSet.applyPattern(UnicodeString(pattern,patternLen),status)
; | |
110 } | |
111 | |
112 if(U_SUCCESS(status) && | |
113 (mapping == NULL) | |
114 ){ | |
115 status = U_MEMORY_ALLOCATION_ERROR; | |
116 delete mapping; | |
117 ures_close(bundle); | |
118 mapping = NULL; | |
119 bundle = NULL; | |
120 } | |
121 | |
122 } | |
123 | |
124 | |
125 UBool NamePrepTransform::isProhibited(UChar32 ch){ | |
126 return (UBool)(ch != ASCII_SPACE); | |
127 } | |
128 | |
129 NamePrepTransform::~NamePrepTransform(){ | |
130 delete mapping; | |
131 mapping = NULL; | |
132 | |
133 //close the bundle | |
134 ures_close(bundle); | |
135 bundle = NULL; | |
136 } | |
137 | |
138 | |
139 int32_t NamePrepTransform::map(const UChar* src, int32_t srcLength, | |
140 UChar* dest, int32_t destCapacity, | |
141 UBool allowUnassigned, | |
142 UParseError* /*parseError*/, | |
143 UErrorCode& status ){ | |
144 | |
145 if(U_FAILURE(status)){ | |
146 return 0; | |
147 } | |
148 //check arguments | |
149 if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) { | |
150 status=U_ILLEGAL_ARGUMENT_ERROR; | |
151 return 0; | |
152 } | |
153 | |
154 UnicodeString rsource(src,srcLength); | |
155 // map the code points | |
156 // transliteration also performs NFKC | |
157 mapping->transliterate(rsource); | |
158 | |
159 const UChar* buffer = rsource.getBuffer(); | |
160 int32_t bufLen = rsource.length(); | |
161 // check if unassigned | |
162 if(allowUnassigned == FALSE){ | |
163 int32_t bufIndex=0; | |
164 UChar32 ch =0 ; | |
165 for(;bufIndex<bufLen;){ | |
166 U16_NEXT(buffer, bufIndex, bufLen, ch); | |
167 if(unassigned.contains(ch)){ | |
168 status = U_IDNA_UNASSIGNED_ERROR; | |
169 return 0; | |
170 } | |
171 } | |
172 } | |
173 // check if there is enough room in the output | |
174 if(bufLen < destCapacity){ | |
175 uprv_memcpy(dest,buffer,bufLen*U_SIZEOF_UCHAR); | |
176 } | |
177 | |
178 return u_terminateUChars(dest, destCapacity, bufLen, &status); | |
179 } | |
180 | |
181 | |
182 #define MAX_BUFFER_SIZE 300 | |
183 | |
184 int32_t NamePrepTransform::process( const UChar* src, int32_t srcLength, | |
185 UChar* dest, int32_t destCapacity, | |
186 UBool allowUnassigned, | |
187 UParseError* parseError, | |
188 UErrorCode& status ){ | |
189 // check error status | |
190 if(U_FAILURE(status)){ | |
191 return 0; | |
192 } | |
193 | |
194 //check arguments | |
195 if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) { | |
196 status=U_ILLEGAL_ARGUMENT_ERROR; | |
197 return 0; | |
198 } | |
199 | |
200 UnicodeString b1String; | |
201 UChar *b1 = b1String.getBuffer(MAX_BUFFER_SIZE); | |
202 int32_t b1Len; | |
203 | |
204 int32_t b1Index = 0; | |
205 UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTI
ON_COUNT; | |
206 UBool leftToRight=FALSE, rightToLeft=FALSE; | |
207 | |
208 b1Len = map(src, srcLength, b1, b1String.getCapacity(), allowUnassigned, par
seError, status); | |
209 b1String.releaseBuffer(b1Len); | |
210 | |
211 if(status == U_BUFFER_OVERFLOW_ERROR){ | |
212 // redo processing of string | |
213 /* we do not have enough room so grow the buffer*/ | |
214 b1 = b1String.getBuffer(b1Len); | |
215 status = U_ZERO_ERROR; // reset error | |
216 b1Len = map(src, srcLength, b1, b1String.getCapacity(), allowUnassigned,
parseError, status); | |
217 b1String.releaseBuffer(b1Len); | |
218 } | |
219 | |
220 if(U_FAILURE(status)){ | |
221 b1Len = 0; | |
222 goto CLEANUP; | |
223 } | |
224 | |
225 | |
226 for(; b1Index<b1Len; ){ | |
227 | |
228 UChar32 ch = 0; | |
229 | |
230 U16_NEXT(b1, b1Index, b1Len, ch); | |
231 | |
232 if(prohibited.contains(ch) && ch!=0x0020){ | |
233 status = U_IDNA_PROHIBITED_ERROR; | |
234 b1Len = 0; | |
235 goto CLEANUP; | |
236 } | |
237 | |
238 direction = u_charDirection(ch); | |
239 if(firstCharDir==U_CHAR_DIRECTION_COUNT){ | |
240 firstCharDir = direction; | |
241 } | |
242 if(direction == U_LEFT_TO_RIGHT){ | |
243 leftToRight = TRUE; | |
244 } | |
245 if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){ | |
246 rightToLeft = TRUE; | |
247 } | |
248 } | |
249 | |
250 // satisfy 2 | |
251 if( leftToRight == TRUE && rightToLeft == TRUE){ | |
252 status = U_IDNA_CHECK_BIDI_ERROR; | |
253 b1Len = 0; | |
254 goto CLEANUP; | |
255 } | |
256 | |
257 //satisfy 3 | |
258 if( rightToLeft == TRUE && | |
259 !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_AR
ABIC) && | |
260 (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC)) | |
261 ){ | |
262 status = U_IDNA_CHECK_BIDI_ERROR; | |
263 return FALSE; | |
264 } | |
265 | |
266 if(b1Len <= destCapacity){ | |
267 uprv_memmove(dest,b1, b1Len*U_SIZEOF_UCHAR); | |
268 } | |
269 | |
270 CLEANUP: | |
271 return u_terminateUChars(dest, destCapacity, b1Len, &status); | |
272 } | |
273 | |
274 UBool NamePrepTransform::isLabelSeparator(UChar32 ch, UErrorCode& status){ | |
275 // check error status | |
276 if(U_FAILURE(status)){ | |
277 return FALSE; | |
278 } | |
279 | |
280 return labelSeparatorSet.contains(ch); | |
281 } | |
282 | |
283 #endif /* #if !UCONFIG_NO_IDNA */ | |
284 #endif /* #if !UCONFIG_NO_TRANSLITERATION */ | |
OLD | NEW |