OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ******************************************************************************* |
| 3 * |
| 4 * Copyright (C) 2003-2010, International Business Machines |
| 5 * Corporation and others. All Rights Reserved. |
| 6 * |
| 7 ******************************************************************************* |
| 8 * file name: nptrans.h |
| 9 * encoding: US-ASCII |
| 10 * tab size: 8 (not used) |
| 11 * indentation:4 |
| 12 * |
| 13 * created on: 2003feb1 |
| 14 * created by: Ram Viswanadha |
| 15 */ |
| 16 |
| 17 #include "unicode/utypes.h" |
| 18 |
| 19 #if !UCONFIG_NO_TRANSLITERATION |
| 20 #if !UCONFIG_NO_IDNA |
| 21 |
| 22 #include "nptrans.h" |
| 23 #include "unicode/resbund.h" |
| 24 #include "unicode/uniset.h" |
| 25 #include "sprpimpl.h" |
| 26 #include "cmemory.h" |
| 27 #include "ustr_imp.h" |
| 28 #include "intltest.h" |
| 29 |
| 30 #ifdef DEBUG |
| 31 #include <stdio.h> |
| 32 #endif |
| 33 |
| 34 const char NamePrepTransform::fgClassID=0; |
| 35 |
| 36 //Factory method |
| 37 NamePrepTransform* NamePrepTransform::createInstance(UParseError& parseError, UE
rrorCode& status){ |
| 38 NamePrepTransform* transform = new NamePrepTransform(parseError, status); |
| 39 if(U_FAILURE(status)){ |
| 40 delete transform; |
| 41 return NULL; |
| 42 } |
| 43 return transform; |
| 44 } |
| 45 |
| 46 //constructor |
| 47 NamePrepTransform::NamePrepTransform(UParseError& parseError, UErrorCode& status
) |
| 48 : unassigned(), prohibited(), labelSeparatorSet(){ |
| 49 |
| 50 mapping = NULL; |
| 51 bundle = NULL; |
| 52 |
| 53 |
| 54 const char* testDataName = IntlTest::loadTestData(status); |
| 55 |
| 56 if(U_FAILURE(status)){ |
| 57 return; |
| 58 } |
| 59 |
| 60 bundle = ures_openDirect(testDataName,"idna_rules",&status); |
| 61 |
| 62 if(bundle != NULL && U_SUCCESS(status)){ |
| 63 // create the mapping transliterator |
| 64 int32_t ruleLen = 0; |
| 65 const UChar* ruleUChar = ures_getStringByKey(bundle, "MapNFKC",&ruleLen,
&status); |
| 66 int32_t mapRuleLen = 0; |
| 67 const UChar *mapRuleUChar = ures_getStringByKey(bundle, "MapNoNormalizat
ion", &mapRuleLen, &status); |
| 68 UnicodeString rule(mapRuleUChar, mapRuleLen); |
| 69 rule.append(ruleUChar, ruleLen); |
| 70 |
| 71 mapping = Transliterator::createFromRules(UnicodeString("NamePrepTransfo
rm", ""), rule, |
| 72 UTRANS_FORWARD, parseError,st
atus); |
| 73 if(U_FAILURE(status)) { |
| 74 return; |
| 75 } |
| 76 |
| 77 //create the unassigned set |
| 78 int32_t patternLen =0; |
| 79 const UChar* pattern = ures_getStringByKey(bundle,"UnassignedSet",&patte
rnLen, &status); |
| 80 unassigned.applyPattern(UnicodeString(pattern, patternLen), status); |
| 81 |
| 82 //create prohibited set |
| 83 patternLen=0; |
| 84 pattern = ures_getStringByKey(bundle,"ProhibitedSet",&patternLen, &stat
us); |
| 85 UnicodeString test(pattern,patternLen); |
| 86 prohibited.applyPattern(test,status); |
| 87 #ifdef DEBUG |
| 88 if(U_FAILURE(status)){ |
| 89 printf("Construction of Unicode set failed\n"); |
| 90 } |
| 91 |
| 92 if(U_SUCCESS(status)){ |
| 93 if(prohibited.contains((UChar) 0x644)){ |
| 94 printf("The string contains 0x644 ... damn !!\n"); |
| 95 } |
| 96 UnicodeString temp; |
| 97 prohibited.toPattern(temp,TRUE); |
| 98 |
| 99 for(int32_t i=0;i<temp.length();i++){ |
| 100 printf("%c", (char)temp.charAt(i)); |
| 101 } |
| 102 printf("\n"); |
| 103 } |
| 104 #endif |
| 105 |
| 106 //create label separator set |
| 107 patternLen=0; |
| 108 pattern = ures_getStringByKey(bundle,"LabelSeparatorSet",&patternLen, &
status); |
| 109 labelSeparatorSet.applyPattern(UnicodeString(pattern,patternLen),status)
; |
| 110 } |
| 111 |
| 112 if(U_SUCCESS(status) && |
| 113 (mapping == NULL) |
| 114 ){ |
| 115 status = U_MEMORY_ALLOCATION_ERROR; |
| 116 delete mapping; |
| 117 ures_close(bundle); |
| 118 mapping = NULL; |
| 119 bundle = NULL; |
| 120 } |
| 121 |
| 122 } |
| 123 |
| 124 |
| 125 UBool NamePrepTransform::isProhibited(UChar32 ch){ |
| 126 return (UBool)(ch != ASCII_SPACE); |
| 127 } |
| 128 |
| 129 NamePrepTransform::~NamePrepTransform(){ |
| 130 delete mapping; |
| 131 mapping = NULL; |
| 132 |
| 133 //close the bundle |
| 134 ures_close(bundle); |
| 135 bundle = NULL; |
| 136 } |
| 137 |
| 138 |
| 139 int32_t NamePrepTransform::map(const UChar* src, int32_t srcLength, |
| 140 UChar* dest, int32_t destCapacity, |
| 141 UBool allowUnassigned, |
| 142 UParseError* /*parseError*/, |
| 143 UErrorCode& status ){ |
| 144 |
| 145 if(U_FAILURE(status)){ |
| 146 return 0; |
| 147 } |
| 148 //check arguments |
| 149 if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) { |
| 150 status=U_ILLEGAL_ARGUMENT_ERROR; |
| 151 return 0; |
| 152 } |
| 153 |
| 154 UnicodeString rsource(src,srcLength); |
| 155 // map the code points |
| 156 // transliteration also performs NFKC |
| 157 mapping->transliterate(rsource); |
| 158 |
| 159 const UChar* buffer = rsource.getBuffer(); |
| 160 int32_t bufLen = rsource.length(); |
| 161 // check if unassigned |
| 162 if(allowUnassigned == FALSE){ |
| 163 int32_t bufIndex=0; |
| 164 UChar32 ch =0 ; |
| 165 for(;bufIndex<bufLen;){ |
| 166 U16_NEXT(buffer, bufIndex, bufLen, ch); |
| 167 if(unassigned.contains(ch)){ |
| 168 status = U_IDNA_UNASSIGNED_ERROR; |
| 169 return 0; |
| 170 } |
| 171 } |
| 172 } |
| 173 // check if there is enough room in the output |
| 174 if(bufLen < destCapacity){ |
| 175 uprv_memcpy(dest,buffer,bufLen*U_SIZEOF_UCHAR); |
| 176 } |
| 177 |
| 178 return u_terminateUChars(dest, destCapacity, bufLen, &status); |
| 179 } |
| 180 |
| 181 |
| 182 #define MAX_BUFFER_SIZE 300 |
| 183 |
| 184 int32_t NamePrepTransform::process( const UChar* src, int32_t srcLength, |
| 185 UChar* dest, int32_t destCapacity, |
| 186 UBool allowUnassigned, |
| 187 UParseError* parseError, |
| 188 UErrorCode& status ){ |
| 189 // check error status |
| 190 if(U_FAILURE(status)){ |
| 191 return 0; |
| 192 } |
| 193 |
| 194 //check arguments |
| 195 if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) { |
| 196 status=U_ILLEGAL_ARGUMENT_ERROR; |
| 197 return 0; |
| 198 } |
| 199 |
| 200 UnicodeString b1String; |
| 201 UChar *b1 = b1String.getBuffer(MAX_BUFFER_SIZE); |
| 202 int32_t b1Len; |
| 203 |
| 204 int32_t b1Index = 0; |
| 205 UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTI
ON_COUNT; |
| 206 UBool leftToRight=FALSE, rightToLeft=FALSE; |
| 207 |
| 208 b1Len = map(src, srcLength, b1, b1String.getCapacity(), allowUnassigned, par
seError, status); |
| 209 b1String.releaseBuffer(b1Len); |
| 210 |
| 211 if(status == U_BUFFER_OVERFLOW_ERROR){ |
| 212 // redo processing of string |
| 213 /* we do not have enough room so grow the buffer*/ |
| 214 b1 = b1String.getBuffer(b1Len); |
| 215 status = U_ZERO_ERROR; // reset error |
| 216 b1Len = map(src, srcLength, b1, b1String.getCapacity(), allowUnassigned,
parseError, status); |
| 217 b1String.releaseBuffer(b1Len); |
| 218 } |
| 219 |
| 220 if(U_FAILURE(status)){ |
| 221 b1Len = 0; |
| 222 goto CLEANUP; |
| 223 } |
| 224 |
| 225 |
| 226 for(; b1Index<b1Len; ){ |
| 227 |
| 228 UChar32 ch = 0; |
| 229 |
| 230 U16_NEXT(b1, b1Index, b1Len, ch); |
| 231 |
| 232 if(prohibited.contains(ch) && ch!=0x0020){ |
| 233 status = U_IDNA_PROHIBITED_ERROR; |
| 234 b1Len = 0; |
| 235 goto CLEANUP; |
| 236 } |
| 237 |
| 238 direction = u_charDirection(ch); |
| 239 if(firstCharDir==U_CHAR_DIRECTION_COUNT){ |
| 240 firstCharDir = direction; |
| 241 } |
| 242 if(direction == U_LEFT_TO_RIGHT){ |
| 243 leftToRight = TRUE; |
| 244 } |
| 245 if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){ |
| 246 rightToLeft = TRUE; |
| 247 } |
| 248 } |
| 249 |
| 250 // satisfy 2 |
| 251 if( leftToRight == TRUE && rightToLeft == TRUE){ |
| 252 status = U_IDNA_CHECK_BIDI_ERROR; |
| 253 b1Len = 0; |
| 254 goto CLEANUP; |
| 255 } |
| 256 |
| 257 //satisfy 3 |
| 258 if( rightToLeft == TRUE && |
| 259 !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_AR
ABIC) && |
| 260 (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC)) |
| 261 ){ |
| 262 status = U_IDNA_CHECK_BIDI_ERROR; |
| 263 return FALSE; |
| 264 } |
| 265 |
| 266 if(b1Len <= destCapacity){ |
| 267 uprv_memmove(dest,b1, b1Len*U_SIZEOF_UCHAR); |
| 268 } |
| 269 |
| 270 CLEANUP: |
| 271 return u_terminateUChars(dest, destCapacity, b1Len, &status); |
| 272 } |
| 273 |
| 274 UBool NamePrepTransform::isLabelSeparator(UChar32 ch, UErrorCode& status){ |
| 275 // check error status |
| 276 if(U_FAILURE(status)){ |
| 277 return FALSE; |
| 278 } |
| 279 |
| 280 return labelSeparatorSet.contains(ch); |
| 281 } |
| 282 |
| 283 #endif /* #if !UCONFIG_NO_IDNA */ |
| 284 #endif /* #if !UCONFIG_NO_TRANSLITERATION */ |
OLD | NEW |