OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ********************************************************************** |
| 3 * Copyright (C) 2000-2009, International Business Machines |
| 4 * Corporation and others. All Rights Reserved. |
| 5 ********************************************************************** |
| 6 * file name: ucnvisci.c |
| 7 * encoding: US-ASCII |
| 8 * tab size: 8 (not used) |
| 9 * indentation:4 |
| 10 * |
| 11 * created on: 2001JUN26 |
| 12 * created by: Ram Viswanadha |
| 13 * |
| 14 * Date Name Description |
| 15 * 24/7/2001 Ram Added support for EXT character handling |
| 16 */ |
| 17 |
| 18 #include "unicode/utypes.h" |
| 19 |
| 20 #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION |
| 21 |
| 22 #include "cmemory.h" |
| 23 #include "ucnv_bld.h" |
| 24 #include "unicode/ucnv.h" |
| 25 #include "ucnv_cnv.h" |
| 26 #include "unicode/ucnv_cb.h" |
| 27 #include "unicode/uset.h" |
| 28 #include "cstring.h" |
| 29 |
| 30 #define UCNV_OPTIONS_VERSION_MASK 0xf |
| 31 #define NUKTA 0x093c |
| 32 #define HALANT 0x094d |
| 33 #define ZWNJ 0x200c /* Zero Width Non Joiner */ |
| 34 #define ZWJ 0x200d /* Zero width Joiner */ |
| 35 #define INVALID_CHAR 0xffff |
| 36 #define ATR 0xEF /* Attribute code */ |
| 37 #define EXT 0xF0 /* Extension code */ |
| 38 #define DANDA 0x0964 |
| 39 #define DOUBLE_DANDA 0x0965 |
| 40 #define ISCII_NUKTA 0xE9 |
| 41 #define ISCII_HALANT 0xE8 |
| 42 #define ISCII_DANDA 0xEA |
| 43 #define ISCII_INV 0xD9 |
| 44 #define ISCII_VOWEL_SIGN_E 0xE0 |
| 45 #define INDIC_BLOCK_BEGIN 0x0900 |
| 46 #define INDIC_BLOCK_END 0x0D7F |
| 47 #define INDIC_RANGE (INDIC_BLOCK_END - INDIC_BLOCK_BEGIN) |
| 48 #define VOCALLIC_RR 0x0931 |
| 49 #define LF 0x0A |
| 50 #define ASCII_END 0xA0 |
| 51 #define NO_CHAR_MARKER 0xFFFE |
| 52 #define TELUGU_DELTA DELTA * TELUGU |
| 53 #define DEV_ABBR_SIGN 0x0970 |
| 54 #define DEV_ANUDATTA 0x0952 |
| 55 #define EXT_RANGE_BEGIN 0xA1 |
| 56 #define EXT_RANGE_END 0xEE |
| 57 |
| 58 #define PNJ_DELTA 0x0100 |
| 59 #define PNJ_BINDI 0x0A02 |
| 60 #define PNJ_TIPPI 0x0A70 |
| 61 #define PNJ_SIGN_VIRAMA 0x0A4D |
| 62 #define PNJ_ADHAK 0x0A71 |
| 63 #define PNJ_HA 0x0A39 |
| 64 #define PNJ_RRA 0x0A5C |
| 65 |
| 66 static USet* PNJ_BINDI_TIPPI_SET= NULL; |
| 67 static USet* PNJ_CONSONANT_SET= NULL; |
| 68 |
| 69 typedef enum { |
| 70 DEVANAGARI =0, |
| 71 BENGALI, |
| 72 GURMUKHI, |
| 73 GUJARATI, |
| 74 ORIYA, |
| 75 TAMIL, |
| 76 TELUGU, |
| 77 KANNADA, |
| 78 MALAYALAM, |
| 79 DELTA=0x80 |
| 80 }UniLang; |
| 81 |
| 82 /** |
| 83 * Enumeration for switching code pages if <ATR>+<one of below values> |
| 84 * is encountered |
| 85 */ |
| 86 typedef enum { |
| 87 DEF = 0x40, |
| 88 RMN = 0x41, |
| 89 DEV = 0x42, |
| 90 BNG = 0x43, |
| 91 TML = 0x44, |
| 92 TLG = 0x45, |
| 93 ASM = 0x46, |
| 94 ORI = 0x47, |
| 95 KND = 0x48, |
| 96 MLM = 0x49, |
| 97 GJR = 0x4A, |
| 98 PNJ = 0x4B, |
| 99 ARB = 0x71, |
| 100 PES = 0x72, |
| 101 URD = 0x73, |
| 102 SND = 0x74, |
| 103 KSM = 0x75, |
| 104 PST = 0x76 |
| 105 }ISCIILang; |
| 106 |
| 107 typedef enum { |
| 108 DEV_MASK =0x80, |
| 109 PNJ_MASK =0x40, |
| 110 GJR_MASK =0x20, |
| 111 ORI_MASK =0x10, |
| 112 BNG_MASK =0x08, |
| 113 KND_MASK =0x04, |
| 114 MLM_MASK =0x02, |
| 115 TML_MASK =0x01, |
| 116 ZERO =0x00 |
| 117 }MaskEnum; |
| 118 |
| 119 #define ISCII_CNV_PREFIX "ISCII,version=" |
| 120 |
| 121 typedef struct { |
| 122 UChar contextCharToUnicode; /* previous Unicode codepoint for contex
tual analysis */ |
| 123 UChar contextCharFromUnicode; /* previous Unicode codepoint for contex
tual analysis */ |
| 124 uint16_t defDeltaToUnicode; /* delta for switching to default state
when DEF is encountered */ |
| 125 uint16_t currentDeltaFromUnicode; /* current delta in Indic block */ |
| 126 uint16_t currentDeltaToUnicode; /* current delta in Indic block */ |
| 127 MaskEnum currentMaskFromUnicode; /* mask for current state in toUnicode *
/ |
| 128 MaskEnum currentMaskToUnicode; /* mask for current state in toUnicode *
/ |
| 129 MaskEnum defMaskToUnicode; /* mask for default state in toUnicode *
/ |
| 130 UBool isFirstBuffer; /* boolean for fromUnicode to see if we
need to announce the first script */ |
| 131 UBool resetToDefaultToUnicode; /* boolean for reseting to default delta
and mask when a newline is encountered*/ |
| 132 char name[sizeof(ISCII_CNV_PREFIX) + 1]; |
| 133 UChar32 prevToUnicodeStatus; /* Hold the previous toUnicodeStatus. Th
is is necessary because we may need to know the last two code points. */ |
| 134 } UConverterDataISCII; |
| 135 |
| 136 typedef struct LookupDataStruct { |
| 137 UniLang uniLang; |
| 138 MaskEnum maskEnum; |
| 139 ISCIILang isciiLang; |
| 140 } LookupDataStruct; |
| 141 |
| 142 static const LookupDataStruct lookupInitialData[]={ |
| 143 { DEVANAGARI, DEV_MASK, DEV }, |
| 144 { BENGALI, BNG_MASK, BNG }, |
| 145 { GURMUKHI, PNJ_MASK, PNJ }, |
| 146 { GUJARATI, GJR_MASK, GJR }, |
| 147 { ORIYA, ORI_MASK, ORI }, |
| 148 { TAMIL, TML_MASK, TML }, |
| 149 { TELUGU, KND_MASK, TLG }, |
| 150 { KANNADA, KND_MASK, KND }, |
| 151 { MALAYALAM, MLM_MASK, MLM } |
| 152 }; |
| 153 |
| 154 static void initializeSets() { |
| 155 /* TODO: Replace the following two lines with PNJ_CONSONANT_SET = uset_openE
mpty(); */ |
| 156 PNJ_CONSONANT_SET = uset_open(0,0); |
| 157 uset_clear(PNJ_CONSONANT_SET); |
| 158 |
| 159 uset_addRange(PNJ_CONSONANT_SET, 0x0A15, 0x0A28); |
| 160 uset_addRange(PNJ_CONSONANT_SET, 0x0A2A, 0x0A30); |
| 161 uset_addRange(PNJ_CONSONANT_SET, 0x0A35, 0x0A36); |
| 162 uset_addRange(PNJ_CONSONANT_SET, 0x0A38, 0x0A39); |
| 163 |
| 164 PNJ_BINDI_TIPPI_SET = uset_clone(PNJ_CONSONANT_SET); |
| 165 uset_add(PNJ_BINDI_TIPPI_SET, 0x0A05); |
| 166 uset_add(PNJ_BINDI_TIPPI_SET, 0x0A07); |
| 167 uset_add(PNJ_BINDI_TIPPI_SET, 0x0A3F); |
| 168 uset_addRange(PNJ_BINDI_TIPPI_SET, 0x0A41, 0x0A42); |
| 169 |
| 170 uset_compact(PNJ_CONSONANT_SET); |
| 171 uset_compact(PNJ_BINDI_TIPPI_SET); |
| 172 } |
| 173 |
| 174 static void _ISCIIOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *e
rrorCode) { |
| 175 if(pArgs->onlyTestIsLoadable) { |
| 176 return; |
| 177 } |
| 178 |
| 179 /* Ensure that the sets used in special handling of certain Gurmukhi charact
ers are initialized. */ |
| 180 initializeSets(); |
| 181 |
| 182 cnv->extraInfo = uprv_malloc(sizeof(UConverterDataISCII)); |
| 183 |
| 184 if (cnv->extraInfo != NULL) { |
| 185 int32_t len=0; |
| 186 UConverterDataISCII *converterData= |
| 187 (UConverterDataISCII *) cnv->extraInfo; |
| 188 converterData->contextCharToUnicode=NO_CHAR_MARKER; |
| 189 cnv->toUnicodeStatus = missingCharMarker; |
| 190 converterData->contextCharFromUnicode=0x0000; |
| 191 converterData->resetToDefaultToUnicode=FALSE; |
| 192 /* check if the version requested is supported */ |
| 193 if ((pArgs->options & UCNV_OPTIONS_VERSION_MASK) < 9) { |
| 194 /* initialize state variables */ |
| 195 converterData->currentDeltaFromUnicode |
| 196 = converterData->currentDeltaToUnicode |
| 197 = converterData->defDeltaToUnicode = (uint16_t)(look
upInitialData[pArgs->options & UCNV_OPTIONS_VERSION_MASK].uniLang * DELTA); |
| 198 |
| 199 converterData->currentMaskFromUnicode |
| 200 = converterData->currentMaskToUnicode |
| 201 = converterData->defMaskToUnicode = lookupInitialDat
a[pArgs->options & UCNV_OPTIONS_VERSION_MASK].maskEnum; |
| 202 |
| 203 converterData->isFirstBuffer=TRUE; |
| 204 (void)uprv_strcpy(converterData->name, ISCII_CNV_PREFIX); |
| 205 len = (int32_t)uprv_strlen(converterData->name); |
| 206 converterData->name[len]= (char)((pArgs->options & UCNV_OPTIONS_VERS
ION_MASK) + '0'); |
| 207 converterData->name[len+1]=0; |
| 208 |
| 209 converterData->prevToUnicodeStatus = 0x0000; |
| 210 } else { |
| 211 uprv_free(cnv->extraInfo); |
| 212 cnv->extraInfo = NULL; |
| 213 *errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
| 214 } |
| 215 |
| 216 } else { |
| 217 *errorCode =U_MEMORY_ALLOCATION_ERROR; |
| 218 } |
| 219 } |
| 220 |
| 221 static void _ISCIIClose(UConverter *cnv) { |
| 222 if (cnv->extraInfo!=NULL) { |
| 223 if (!cnv->isExtraLocal) { |
| 224 uprv_free(cnv->extraInfo); |
| 225 } |
| 226 cnv->extraInfo=NULL; |
| 227 } |
| 228 if (PNJ_CONSONANT_SET != NULL) { |
| 229 uset_close(PNJ_CONSONANT_SET); |
| 230 PNJ_CONSONANT_SET = NULL; |
| 231 } |
| 232 if (PNJ_BINDI_TIPPI_SET != NULL) { |
| 233 uset_close(PNJ_BINDI_TIPPI_SET); |
| 234 PNJ_BINDI_TIPPI_SET = NULL; |
| 235 } |
| 236 } |
| 237 |
| 238 static const char* _ISCIIgetName(const UConverter* cnv) { |
| 239 if (cnv->extraInfo) { |
| 240 UConverterDataISCII* myData= (UConverterDataISCII*)cnv->extraInfo; |
| 241 return myData->name; |
| 242 } |
| 243 return NULL; |
| 244 } |
| 245 |
| 246 static void _ISCIIReset(UConverter *cnv, UConverterResetChoice choice) { |
| 247 UConverterDataISCII* data =(UConverterDataISCII *) (cnv->extraInfo); |
| 248 if (choice<=UCNV_RESET_TO_UNICODE) { |
| 249 cnv->toUnicodeStatus = missingCharMarker; |
| 250 cnv->mode=0; |
| 251 data->currentDeltaToUnicode=data->defDeltaToUnicode; |
| 252 data->currentMaskToUnicode = data->defMaskToUnicode; |
| 253 data->contextCharToUnicode=NO_CHAR_MARKER; |
| 254 data->prevToUnicodeStatus = 0x0000; |
| 255 } |
| 256 if (choice!=UCNV_RESET_TO_UNICODE) { |
| 257 cnv->fromUChar32=0x0000; |
| 258 data->contextCharFromUnicode=0x00; |
| 259 data->currentMaskFromUnicode=data->defMaskToUnicode; |
| 260 data->currentDeltaFromUnicode=data->defDeltaToUnicode; |
| 261 data->isFirstBuffer=TRUE; |
| 262 data->resetToDefaultToUnicode=FALSE; |
| 263 } |
| 264 } |
| 265 |
| 266 /** |
| 267 * The values in validity table are indexed by the lower bits of Unicode |
| 268 * range 0x0900 - 0x09ff. The values have a structure like: |
| 269 * --------------------------------------------------------------- |
| 270 * | DEV | PNJ | GJR | ORI | BNG | TLG | MLM | TML | |
| 271 * | | | | | ASM | KND | | | |
| 272 * --------------------------------------------------------------- |
| 273 * If a code point is valid in a particular script |
| 274 * then that bit is turned on |
| 275 * |
| 276 * Unicode does not distinguish between Bengali and Assamese so we use 1 bit for |
| 277 * to represent these languages |
| 278 * |
| 279 * Telugu and Kannada have same codepoints except for Vocallic_RR which we speci
al case |
| 280 * and combine and use 1 bit to represent these languages. |
| 281 * |
| 282 * TODO: It is probably easier to understand and maintain to change this |
| 283 * to use uint16_t and give each of the 9 Unicode/script blocks its own bit. |
| 284 */ |
| 285 |
| 286 static const uint8_t validityTable[128] = { |
| 287 /* This state table is tool generated please do not edit unless you know exactly
what you are doing */ |
| 288 /* Note: This table was edited to mirror the Windows XP implementation */ |
| 289 /*ISCII:Valid:Unicode */ |
| 290 /*0xa0 : 0x00: 0x900 */ ZERO + ZERO + ZERO + ZERO + ZERO +
ZERO + ZERO + ZERO , |
| 291 /*0xa1 : 0xb8: 0x901 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
ZERO + ZERO + ZERO , |
| 292 /*0xa2 : 0xfe: 0x902 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + TML_MASK , |
| 293 /*0xa3 : 0xbf: 0x903 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + TML_MASK , |
| 294 /*0x00 : 0x00: 0x904 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO +
ZERO + ZERO + ZERO , |
| 295 /*0xa4 : 0xff: 0x905 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + TML_MASK , |
| 296 /*0xa5 : 0xff: 0x906 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + TML_MASK , |
| 297 /*0xa6 : 0xff: 0x907 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + TML_MASK , |
| 298 /*0xa7 : 0xff: 0x908 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + TML_MASK , |
| 299 /*0xa8 : 0xff: 0x909 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + TML_MASK , |
| 300 /*0xa9 : 0xff: 0x90a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + TML_MASK , |
| 301 /*0xaa : 0xfe: 0x90b */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + ZERO , |
| 302 /*0x00 : 0x00: 0x90c */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + ZERO , |
| 303 /*0xae : 0x80: 0x90d */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO +
ZERO + ZERO + ZERO , |
| 304 /*0xab : 0x87: 0x90e */ DEV_MASK + ZERO + ZERO + ZERO + ZERO +
KND_MASK + MLM_MASK + TML_MASK , |
| 305 /*0xac : 0xff: 0x90f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + TML_MASK , |
| 306 /*0xad : 0xff: 0x910 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + TML_MASK , |
| 307 /*0xb2 : 0x80: 0x911 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO +
ZERO + ZERO + ZERO , |
| 308 /*0xaf : 0x87: 0x912 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO +
KND_MASK + MLM_MASK + TML_MASK , |
| 309 /*0xb0 : 0xff: 0x913 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + TML_MASK , |
| 310 /*0xb1 : 0xff: 0x914 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + TML_MASK , |
| 311 /*0xb3 : 0xff: 0x915 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + TML_MASK , |
| 312 /*0xb4 : 0xfe: 0x916 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + ZERO , |
| 313 /*0xb5 : 0xfe: 0x917 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + ZERO , |
| 314 /*0xb6 : 0xfe: 0x918 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + ZERO , |
| 315 /*0xb7 : 0xff: 0x919 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + TML_MASK , |
| 316 /*0xb8 : 0xff: 0x91a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + TML_MASK , |
| 317 /*0xb9 : 0xfe: 0x91b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + ZERO , |
| 318 /*0xba : 0xff: 0x91c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + TML_MASK , |
| 319 /*0xbb : 0xfe: 0x91d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + ZERO , |
| 320 /*0xbc : 0xff: 0x91e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + TML_MASK , |
| 321 /*0xbd : 0xff: 0x91f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + TML_MASK , |
| 322 /*0xbe : 0xfe: 0x920 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + ZERO , |
| 323 /*0xbf : 0xfe: 0x921 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + ZERO , |
| 324 /*0xc0 : 0xfe: 0x922 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + ZERO , |
| 325 /*0xc1 : 0xff: 0x923 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + TML_MASK , |
| 326 /*0xc2 : 0xff: 0x924 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + TML_MASK , |
| 327 /*0xc3 : 0xfe: 0x925 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + ZERO , |
| 328 /*0xc4 : 0xfe: 0x926 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + ZERO , |
| 329 /*0xc5 : 0xfe: 0x927 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + ZERO , |
| 330 /*0xc6 : 0xff: 0x928 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + TML_MASK , |
| 331 /*0xc7 : 0x81: 0x929 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO +
ZERO + ZERO + TML_MASK , |
| 332 /*0xc8 : 0xff: 0x92a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + TML_MASK , |
| 333 /*0xc9 : 0xfe: 0x92b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + ZERO , |
| 334 /*0xca : 0xfe: 0x92c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + ZERO , |
| 335 /*0xcb : 0xfe: 0x92d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + ZERO , |
| 336 /*0xcc : 0xfe: 0x92e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + TML_MASK , |
| 337 /*0xcd : 0xff: 0x92f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + TML_MASK , |
| 338 /*0xcf : 0xff: 0x930 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + TML_MASK , |
| 339 /*0xd0 : 0x87: 0x931 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO +
ZERO + MLM_MASK + TML_MASK , |
| 340 /*0xd1 : 0xff: 0x932 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + TML_MASK , |
| 341 /*0xd2 : 0xb7: 0x933 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO +
KND_MASK + MLM_MASK + TML_MASK , |
| 342 /*0xd3 : 0x83: 0x934 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO +
ZERO + MLM_MASK + TML_MASK , |
| 343 /*0xd4 : 0xff: 0x935 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO +
KND_MASK + MLM_MASK + TML_MASK , |
| 344 /*0xd5 : 0xfe: 0x936 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + ZERO , |
| 345 /*0xd6 : 0xbf: 0x937 */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + TML_MASK , |
| 346 /*0xd7 : 0xff: 0x938 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + TML_MASK , |
| 347 /*0xd8 : 0xff: 0x939 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + TML_MASK , |
| 348 /*0x00 : 0x00: 0x93A */ ZERO + ZERO + ZERO + ZERO + ZERO +
ZERO + ZERO + ZERO , |
| 349 /*0x00 : 0x00: 0x93B */ ZERO + ZERO + ZERO + ZERO + ZERO +
ZERO + ZERO + ZERO , |
| 350 /*0xe9 : 0xda: 0x93c */ DEV_MASK + PNJ_MASK + ZERO + ORI_MASK + BNG_MASK +
ZERO + ZERO + ZERO , |
| 351 /*0x00 : 0x00: 0x93d */ DEV_MASK + ZERO + ZERO + ZERO + ZERO +
ZERO + ZERO + ZERO , |
| 352 /*0xda : 0xff: 0x93e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + TML_MASK , |
| 353 /*0xdb : 0xff: 0x93f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + TML_MASK , |
| 354 /*0xdc : 0xff: 0x940 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + TML_MASK , |
| 355 /*0xdd : 0xff: 0x941 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + TML_MASK , |
| 356 /*0xde : 0xff: 0x942 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + TML_MASK , |
| 357 /*0xdf : 0xbe: 0x943 */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + ZERO , |
| 358 /*0x00 : 0x00: 0x944 */ DEV_MASK + ZERO + GJR_MASK + ZERO + BNG_MASK +
KND_MASK + ZERO + ZERO , |
| 359 /*0xe3 : 0x80: 0x945 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO +
ZERO + ZERO + ZERO , |
| 360 /*0xe0 : 0x87: 0x946 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO +
KND_MASK + MLM_MASK + TML_MASK , |
| 361 /*0xe1 : 0xff: 0x947 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + TML_MASK , |
| 362 /*0xe2 : 0xff: 0x948 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + TML_MASK , |
| 363 /*0xe7 : 0x80: 0x949 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO +
ZERO + ZERO + ZERO , |
| 364 /*0xe4 : 0x87: 0x94a */ DEV_MASK + ZERO + ZERO + ZERO + ZERO +
KND_MASK + MLM_MASK + TML_MASK , |
| 365 /*0xe5 : 0xff: 0x94b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + TML_MASK , |
| 366 /*0xe6 : 0xff: 0x94c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + TML_MASK , |
| 367 /*0xe8 : 0xff: 0x94d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + TML_MASK , |
| 368 /*0xec : 0x00: 0x94e */ ZERO + ZERO + ZERO + ZERO + ZERO +
ZERO + ZERO + ZERO , |
| 369 /*0xed : 0x00: 0x94f */ ZERO + ZERO + ZERO + ZERO + ZERO +
ZERO + ZERO + ZERO , |
| 370 /*0x00 : 0x00: 0x950 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO +
ZERO + ZERO + ZERO , |
| 371 /*0x00 : 0x00: 0x951 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO +
ZERO + ZERO + ZERO , |
| 372 /*0x00 : 0x00: 0x952 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO +
ZERO + ZERO + ZERO , |
| 373 /*0x00 : 0x00: 0x953 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO +
ZERO + ZERO + ZERO , |
| 374 /*0x00 : 0x00: 0x954 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO +
ZERO + ZERO + ZERO , |
| 375 /*0x00 : 0x00: 0x955 */ ZERO + ZERO + ZERO + ZERO + ZERO +
KND_MASK + ZERO + ZERO , |
| 376 /*0x00 : 0x00: 0x956 */ ZERO + ZERO + ZERO + ORI_MASK + ZERO +
KND_MASK + ZERO + ZERO , |
| 377 /*0x00 : 0x00: 0x957 */ ZERO + ZERO + ZERO + ORI_MASK + BNG_MASK +
ZERO + MLM_MASK + ZERO , |
| 378 /*0x00 : 0x00: 0x958 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO +
ZERO + ZERO + ZERO , |
| 379 /*0x00 : 0x00: 0x959 */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO +
ZERO + ZERO + ZERO , |
| 380 /*0x00 : 0x00: 0x95a */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO +
ZERO + ZERO + ZERO , |
| 381 /*0x00 : 0x00: 0x95b */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO +
ZERO + ZERO + ZERO , |
| 382 /*0x00 : 0x00: 0x95c */ DEV_MASK + PNJ_MASK + ZERO + ZERO + BNG_MASK +
ZERO + ZERO + ZERO , |
| 383 /*0x00 : 0x00: 0x95d */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK +
ZERO + ZERO + ZERO , |
| 384 /*0x00 : 0x00: 0x95e */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO +
ZERO + ZERO + ZERO , |
| 385 /*0xce : 0x98: 0x95f */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK +
ZERO + ZERO + ZERO , |
| 386 /*0x00 : 0x00: 0x960 */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + ZERO , |
| 387 /*0x00 : 0x00: 0x961 */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + ZERO , |
| 388 /*0x00 : 0x00: 0x962 */ DEV_MASK + ZERO + ZERO + ZERO + BNG_MASK +
ZERO + ZERO + ZERO , |
| 389 /*0x00 : 0x00: 0x963 */ DEV_MASK + ZERO + ZERO + ZERO + BNG_MASK +
ZERO + ZERO + ZERO , |
| 390 /*0xea : 0xf8: 0x964 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO +
ZERO + ZERO + ZERO , |
| 391 /*0xeaea : 0x00: 0x965*/ DEV_MASK + ZERO + ZERO + ZERO + ZERO +
ZERO + ZERO + ZERO , |
| 392 /*0xf1 : 0xff: 0x966 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + TML_MASK , |
| 393 /*0xf2 : 0xff: 0x967 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + TML_MASK , |
| 394 /*0xf3 : 0xff: 0x968 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + TML_MASK , |
| 395 /*0xf4 : 0xff: 0x969 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + TML_MASK , |
| 396 /*0xf5 : 0xff: 0x96a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + TML_MASK , |
| 397 /*0xf6 : 0xff: 0x96b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + TML_MASK , |
| 398 /*0xf7 : 0xff: 0x96c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + TML_MASK , |
| 399 /*0xf8 : 0xff: 0x96d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + TML_MASK , |
| 400 /*0xf9 : 0xff: 0x96e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + TML_MASK , |
| 401 /*0xfa : 0xff: 0x96f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK +
KND_MASK + MLM_MASK + TML_MASK , |
| 402 /*0x00 : 0x80: 0x970 */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO +
ZERO + ZERO + ZERO , |
| 403 /* |
| 404 * The length of the array is 128 to provide values for 0x900..0x97f. |
| 405 * The last 15 entries for 0x971..0x97f of the validity table are all zero |
| 406 * because no Indic script uses such Unicode code points. |
| 407 */ |
| 408 /*0x00 : 0x00: 0x9yz */ ZERO + ZERO + ZERO + ZERO + ZERO +
ZERO + ZERO + ZERO |
| 409 }; |
| 410 |
| 411 static const uint16_t fromUnicodeTable[128]={ |
| 412 0x00a0 ,/* 0x0900 */ |
| 413 0x00a1 ,/* 0x0901 */ |
| 414 0x00a2 ,/* 0x0902 */ |
| 415 0x00a3 ,/* 0x0903 */ |
| 416 0xa4e0 ,/* 0x0904 */ |
| 417 0x00a4 ,/* 0x0905 */ |
| 418 0x00a5 ,/* 0x0906 */ |
| 419 0x00a6 ,/* 0x0907 */ |
| 420 0x00a7 ,/* 0x0908 */ |
| 421 0x00a8 ,/* 0x0909 */ |
| 422 0x00a9 ,/* 0x090a */ |
| 423 0x00aa ,/* 0x090b */ |
| 424 0xA6E9 ,/* 0x090c */ |
| 425 0x00ae ,/* 0x090d */ |
| 426 0x00ab ,/* 0x090e */ |
| 427 0x00ac ,/* 0x090f */ |
| 428 0x00ad ,/* 0x0910 */ |
| 429 0x00b2 ,/* 0x0911 */ |
| 430 0x00af ,/* 0x0912 */ |
| 431 0x00b0 ,/* 0x0913 */ |
| 432 0x00b1 ,/* 0x0914 */ |
| 433 0x00b3 ,/* 0x0915 */ |
| 434 0x00b4 ,/* 0x0916 */ |
| 435 0x00b5 ,/* 0x0917 */ |
| 436 0x00b6 ,/* 0x0918 */ |
| 437 0x00b7 ,/* 0x0919 */ |
| 438 0x00b8 ,/* 0x091a */ |
| 439 0x00b9 ,/* 0x091b */ |
| 440 0x00ba ,/* 0x091c */ |
| 441 0x00bb ,/* 0x091d */ |
| 442 0x00bc ,/* 0x091e */ |
| 443 0x00bd ,/* 0x091f */ |
| 444 0x00be ,/* 0x0920 */ |
| 445 0x00bf ,/* 0x0921 */ |
| 446 0x00c0 ,/* 0x0922 */ |
| 447 0x00c1 ,/* 0x0923 */ |
| 448 0x00c2 ,/* 0x0924 */ |
| 449 0x00c3 ,/* 0x0925 */ |
| 450 0x00c4 ,/* 0x0926 */ |
| 451 0x00c5 ,/* 0x0927 */ |
| 452 0x00c6 ,/* 0x0928 */ |
| 453 0x00c7 ,/* 0x0929 */ |
| 454 0x00c8 ,/* 0x092a */ |
| 455 0x00c9 ,/* 0x092b */ |
| 456 0x00ca ,/* 0x092c */ |
| 457 0x00cb ,/* 0x092d */ |
| 458 0x00cc ,/* 0x092e */ |
| 459 0x00cd ,/* 0x092f */ |
| 460 0x00cf ,/* 0x0930 */ |
| 461 0x00d0 ,/* 0x0931 */ |
| 462 0x00d1 ,/* 0x0932 */ |
| 463 0x00d2 ,/* 0x0933 */ |
| 464 0x00d3 ,/* 0x0934 */ |
| 465 0x00d4 ,/* 0x0935 */ |
| 466 0x00d5 ,/* 0x0936 */ |
| 467 0x00d6 ,/* 0x0937 */ |
| 468 0x00d7 ,/* 0x0938 */ |
| 469 0x00d8 ,/* 0x0939 */ |
| 470 0xFFFF ,/* 0x093A */ |
| 471 0xFFFF ,/* 0x093B */ |
| 472 0x00e9 ,/* 0x093c */ |
| 473 0xEAE9 ,/* 0x093d */ |
| 474 0x00da ,/* 0x093e */ |
| 475 0x00db ,/* 0x093f */ |
| 476 0x00dc ,/* 0x0940 */ |
| 477 0x00dd ,/* 0x0941 */ |
| 478 0x00de ,/* 0x0942 */ |
| 479 0x00df ,/* 0x0943 */ |
| 480 0xDFE9 ,/* 0x0944 */ |
| 481 0x00e3 ,/* 0x0945 */ |
| 482 0x00e0 ,/* 0x0946 */ |
| 483 0x00e1 ,/* 0x0947 */ |
| 484 0x00e2 ,/* 0x0948 */ |
| 485 0x00e7 ,/* 0x0949 */ |
| 486 0x00e4 ,/* 0x094a */ |
| 487 0x00e5 ,/* 0x094b */ |
| 488 0x00e6 ,/* 0x094c */ |
| 489 0x00e8 ,/* 0x094d */ |
| 490 0x00ec ,/* 0x094e */ |
| 491 0x00ed ,/* 0x094f */ |
| 492 0xA1E9 ,/* 0x0950 */ /* OM Symbol */ |
| 493 0xFFFF ,/* 0x0951 */ |
| 494 0xF0B8 ,/* 0x0952 */ |
| 495 0xFFFF ,/* 0x0953 */ |
| 496 0xFFFF ,/* 0x0954 */ |
| 497 0xFFFF ,/* 0x0955 */ |
| 498 0xFFFF ,/* 0x0956 */ |
| 499 0xFFFF ,/* 0x0957 */ |
| 500 0xb3e9 ,/* 0x0958 */ |
| 501 0xb4e9 ,/* 0x0959 */ |
| 502 0xb5e9 ,/* 0x095a */ |
| 503 0xbae9 ,/* 0x095b */ |
| 504 0xbfe9 ,/* 0x095c */ |
| 505 0xC0E9 ,/* 0x095d */ |
| 506 0xc9e9 ,/* 0x095e */ |
| 507 0x00ce ,/* 0x095f */ |
| 508 0xAAe9 ,/* 0x0960 */ |
| 509 0xA7E9 ,/* 0x0961 */ |
| 510 0xDBE9 ,/* 0x0962 */ |
| 511 0xDCE9 ,/* 0x0963 */ |
| 512 0x00ea ,/* 0x0964 */ |
| 513 0xeaea ,/* 0x0965 */ |
| 514 0x00f1 ,/* 0x0966 */ |
| 515 0x00f2 ,/* 0x0967 */ |
| 516 0x00f3 ,/* 0x0968 */ |
| 517 0x00f4 ,/* 0x0969 */ |
| 518 0x00f5 ,/* 0x096a */ |
| 519 0x00f6 ,/* 0x096b */ |
| 520 0x00f7 ,/* 0x096c */ |
| 521 0x00f8 ,/* 0x096d */ |
| 522 0x00f9 ,/* 0x096e */ |
| 523 0x00fa ,/* 0x096f */ |
| 524 0xF0BF ,/* 0x0970 */ |
| 525 0xFFFF ,/* 0x0971 */ |
| 526 0xFFFF ,/* 0x0972 */ |
| 527 0xFFFF ,/* 0x0973 */ |
| 528 0xFFFF ,/* 0x0974 */ |
| 529 0xFFFF ,/* 0x0975 */ |
| 530 0xFFFF ,/* 0x0976 */ |
| 531 0xFFFF ,/* 0x0977 */ |
| 532 0xFFFF ,/* 0x0978 */ |
| 533 0xFFFF ,/* 0x0979 */ |
| 534 0xFFFF ,/* 0x097a */ |
| 535 0xFFFF ,/* 0x097b */ |
| 536 0xFFFF ,/* 0x097c */ |
| 537 0xFFFF ,/* 0x097d */ |
| 538 0xFFFF ,/* 0x097e */ |
| 539 0xFFFF ,/* 0x097f */ |
| 540 }; |
| 541 static const uint16_t toUnicodeTable[256]={ |
| 542 0x0000,/* 0x00 */ |
| 543 0x0001,/* 0x01 */ |
| 544 0x0002,/* 0x02 */ |
| 545 0x0003,/* 0x03 */ |
| 546 0x0004,/* 0x04 */ |
| 547 0x0005,/* 0x05 */ |
| 548 0x0006,/* 0x06 */ |
| 549 0x0007,/* 0x07 */ |
| 550 0x0008,/* 0x08 */ |
| 551 0x0009,/* 0x09 */ |
| 552 0x000a,/* 0x0a */ |
| 553 0x000b,/* 0x0b */ |
| 554 0x000c,/* 0x0c */ |
| 555 0x000d,/* 0x0d */ |
| 556 0x000e,/* 0x0e */ |
| 557 0x000f,/* 0x0f */ |
| 558 0x0010,/* 0x10 */ |
| 559 0x0011,/* 0x11 */ |
| 560 0x0012,/* 0x12 */ |
| 561 0x0013,/* 0x13 */ |
| 562 0x0014,/* 0x14 */ |
| 563 0x0015,/* 0x15 */ |
| 564 0x0016,/* 0x16 */ |
| 565 0x0017,/* 0x17 */ |
| 566 0x0018,/* 0x18 */ |
| 567 0x0019,/* 0x19 */ |
| 568 0x001a,/* 0x1a */ |
| 569 0x001b,/* 0x1b */ |
| 570 0x001c,/* 0x1c */ |
| 571 0x001d,/* 0x1d */ |
| 572 0x001e,/* 0x1e */ |
| 573 0x001f,/* 0x1f */ |
| 574 0x0020,/* 0x20 */ |
| 575 0x0021,/* 0x21 */ |
| 576 0x0022,/* 0x22 */ |
| 577 0x0023,/* 0x23 */ |
| 578 0x0024,/* 0x24 */ |
| 579 0x0025,/* 0x25 */ |
| 580 0x0026,/* 0x26 */ |
| 581 0x0027,/* 0x27 */ |
| 582 0x0028,/* 0x28 */ |
| 583 0x0029,/* 0x29 */ |
| 584 0x002a,/* 0x2a */ |
| 585 0x002b,/* 0x2b */ |
| 586 0x002c,/* 0x2c */ |
| 587 0x002d,/* 0x2d */ |
| 588 0x002e,/* 0x2e */ |
| 589 0x002f,/* 0x2f */ |
| 590 0x0030,/* 0x30 */ |
| 591 0x0031,/* 0x31 */ |
| 592 0x0032,/* 0x32 */ |
| 593 0x0033,/* 0x33 */ |
| 594 0x0034,/* 0x34 */ |
| 595 0x0035,/* 0x35 */ |
| 596 0x0036,/* 0x36 */ |
| 597 0x0037,/* 0x37 */ |
| 598 0x0038,/* 0x38 */ |
| 599 0x0039,/* 0x39 */ |
| 600 0x003A,/* 0x3A */ |
| 601 0x003B,/* 0x3B */ |
| 602 0x003c,/* 0x3c */ |
| 603 0x003d,/* 0x3d */ |
| 604 0x003e,/* 0x3e */ |
| 605 0x003f,/* 0x3f */ |
| 606 0x0040,/* 0x40 */ |
| 607 0x0041,/* 0x41 */ |
| 608 0x0042,/* 0x42 */ |
| 609 0x0043,/* 0x43 */ |
| 610 0x0044,/* 0x44 */ |
| 611 0x0045,/* 0x45 */ |
| 612 0x0046,/* 0x46 */ |
| 613 0x0047,/* 0x47 */ |
| 614 0x0048,/* 0x48 */ |
| 615 0x0049,/* 0x49 */ |
| 616 0x004a,/* 0x4a */ |
| 617 0x004b,/* 0x4b */ |
| 618 0x004c,/* 0x4c */ |
| 619 0x004d,/* 0x4d */ |
| 620 0x004e,/* 0x4e */ |
| 621 0x004f,/* 0x4f */ |
| 622 0x0050,/* 0x50 */ |
| 623 0x0051,/* 0x51 */ |
| 624 0x0052,/* 0x52 */ |
| 625 0x0053,/* 0x53 */ |
| 626 0x0054,/* 0x54 */ |
| 627 0x0055,/* 0x55 */ |
| 628 0x0056,/* 0x56 */ |
| 629 0x0057,/* 0x57 */ |
| 630 0x0058,/* 0x58 */ |
| 631 0x0059,/* 0x59 */ |
| 632 0x005a,/* 0x5a */ |
| 633 0x005b,/* 0x5b */ |
| 634 0x005c,/* 0x5c */ |
| 635 0x005d,/* 0x5d */ |
| 636 0x005e,/* 0x5e */ |
| 637 0x005f,/* 0x5f */ |
| 638 0x0060,/* 0x60 */ |
| 639 0x0061,/* 0x61 */ |
| 640 0x0062,/* 0x62 */ |
| 641 0x0063,/* 0x63 */ |
| 642 0x0064,/* 0x64 */ |
| 643 0x0065,/* 0x65 */ |
| 644 0x0066,/* 0x66 */ |
| 645 0x0067,/* 0x67 */ |
| 646 0x0068,/* 0x68 */ |
| 647 0x0069,/* 0x69 */ |
| 648 0x006a,/* 0x6a */ |
| 649 0x006b,/* 0x6b */ |
| 650 0x006c,/* 0x6c */ |
| 651 0x006d,/* 0x6d */ |
| 652 0x006e,/* 0x6e */ |
| 653 0x006f,/* 0x6f */ |
| 654 0x0070,/* 0x70 */ |
| 655 0x0071,/* 0x71 */ |
| 656 0x0072,/* 0x72 */ |
| 657 0x0073,/* 0x73 */ |
| 658 0x0074,/* 0x74 */ |
| 659 0x0075,/* 0x75 */ |
| 660 0x0076,/* 0x76 */ |
| 661 0x0077,/* 0x77 */ |
| 662 0x0078,/* 0x78 */ |
| 663 0x0079,/* 0x79 */ |
| 664 0x007a,/* 0x7a */ |
| 665 0x007b,/* 0x7b */ |
| 666 0x007c,/* 0x7c */ |
| 667 0x007d,/* 0x7d */ |
| 668 0x007e,/* 0x7e */ |
| 669 0x007f,/* 0x7f */ |
| 670 0x0080,/* 0x80 */ |
| 671 0x0081,/* 0x81 */ |
| 672 0x0082,/* 0x82 */ |
| 673 0x0083,/* 0x83 */ |
| 674 0x0084,/* 0x84 */ |
| 675 0x0085,/* 0x85 */ |
| 676 0x0086,/* 0x86 */ |
| 677 0x0087,/* 0x87 */ |
| 678 0x0088,/* 0x88 */ |
| 679 0x0089,/* 0x89 */ |
| 680 0x008a,/* 0x8a */ |
| 681 0x008b,/* 0x8b */ |
| 682 0x008c,/* 0x8c */ |
| 683 0x008d,/* 0x8d */ |
| 684 0x008e,/* 0x8e */ |
| 685 0x008f,/* 0x8f */ |
| 686 0x0090,/* 0x90 */ |
| 687 0x0091,/* 0x91 */ |
| 688 0x0092,/* 0x92 */ |
| 689 0x0093,/* 0x93 */ |
| 690 0x0094,/* 0x94 */ |
| 691 0x0095,/* 0x95 */ |
| 692 0x0096,/* 0x96 */ |
| 693 0x0097,/* 0x97 */ |
| 694 0x0098,/* 0x98 */ |
| 695 0x0099,/* 0x99 */ |
| 696 0x009a,/* 0x9a */ |
| 697 0x009b,/* 0x9b */ |
| 698 0x009c,/* 0x9c */ |
| 699 0x009d,/* 0x9d */ |
| 700 0x009e,/* 0x9e */ |
| 701 0x009f,/* 0x9f */ |
| 702 0x00A0,/* 0xa0 */ |
| 703 0x0901,/* 0xa1 */ |
| 704 0x0902,/* 0xa2 */ |
| 705 0x0903,/* 0xa3 */ |
| 706 0x0905,/* 0xa4 */ |
| 707 0x0906,/* 0xa5 */ |
| 708 0x0907,/* 0xa6 */ |
| 709 0x0908,/* 0xa7 */ |
| 710 0x0909,/* 0xa8 */ |
| 711 0x090a,/* 0xa9 */ |
| 712 0x090b,/* 0xaa */ |
| 713 0x090e,/* 0xab */ |
| 714 0x090f,/* 0xac */ |
| 715 0x0910,/* 0xad */ |
| 716 0x090d,/* 0xae */ |
| 717 0x0912,/* 0xaf */ |
| 718 0x0913,/* 0xb0 */ |
| 719 0x0914,/* 0xb1 */ |
| 720 0x0911,/* 0xb2 */ |
| 721 0x0915,/* 0xb3 */ |
| 722 0x0916,/* 0xb4 */ |
| 723 0x0917,/* 0xb5 */ |
| 724 0x0918,/* 0xb6 */ |
| 725 0x0919,/* 0xb7 */ |
| 726 0x091a,/* 0xb8 */ |
| 727 0x091b,/* 0xb9 */ |
| 728 0x091c,/* 0xba */ |
| 729 0x091d,/* 0xbb */ |
| 730 0x091e,/* 0xbc */ |
| 731 0x091f,/* 0xbd */ |
| 732 0x0920,/* 0xbe */ |
| 733 0x0921,/* 0xbf */ |
| 734 0x0922,/* 0xc0 */ |
| 735 0x0923,/* 0xc1 */ |
| 736 0x0924,/* 0xc2 */ |
| 737 0x0925,/* 0xc3 */ |
| 738 0x0926,/* 0xc4 */ |
| 739 0x0927,/* 0xc5 */ |
| 740 0x0928,/* 0xc6 */ |
| 741 0x0929,/* 0xc7 */ |
| 742 0x092a,/* 0xc8 */ |
| 743 0x092b,/* 0xc9 */ |
| 744 0x092c,/* 0xca */ |
| 745 0x092d,/* 0xcb */ |
| 746 0x092e,/* 0xcc */ |
| 747 0x092f,/* 0xcd */ |
| 748 0x095f,/* 0xce */ |
| 749 0x0930,/* 0xcf */ |
| 750 0x0931,/* 0xd0 */ |
| 751 0x0932,/* 0xd1 */ |
| 752 0x0933,/* 0xd2 */ |
| 753 0x0934,/* 0xd3 */ |
| 754 0x0935,/* 0xd4 */ |
| 755 0x0936,/* 0xd5 */ |
| 756 0x0937,/* 0xd6 */ |
| 757 0x0938,/* 0xd7 */ |
| 758 0x0939,/* 0xd8 */ |
| 759 0x200D,/* 0xd9 */ |
| 760 0x093e,/* 0xda */ |
| 761 0x093f,/* 0xdb */ |
| 762 0x0940,/* 0xdc */ |
| 763 0x0941,/* 0xdd */ |
| 764 0x0942,/* 0xde */ |
| 765 0x0943,/* 0xdf */ |
| 766 0x0946,/* 0xe0 */ |
| 767 0x0947,/* 0xe1 */ |
| 768 0x0948,/* 0xe2 */ |
| 769 0x0945,/* 0xe3 */ |
| 770 0x094a,/* 0xe4 */ |
| 771 0x094b,/* 0xe5 */ |
| 772 0x094c,/* 0xe6 */ |
| 773 0x0949,/* 0xe7 */ |
| 774 0x094d,/* 0xe8 */ |
| 775 0x093c,/* 0xe9 */ |
| 776 0x0964,/* 0xea */ |
| 777 0xFFFF,/* 0xeb */ |
| 778 0xFFFF,/* 0xec */ |
| 779 0xFFFF,/* 0xed */ |
| 780 0xFFFF,/* 0xee */ |
| 781 0xFFFF,/* 0xef */ |
| 782 0xFFFF,/* 0xf0 */ |
| 783 0x0966,/* 0xf1 */ |
| 784 0x0967,/* 0xf2 */ |
| 785 0x0968,/* 0xf3 */ |
| 786 0x0969,/* 0xf4 */ |
| 787 0x096a,/* 0xf5 */ |
| 788 0x096b,/* 0xf6 */ |
| 789 0x096c,/* 0xf7 */ |
| 790 0x096d,/* 0xf8 */ |
| 791 0x096e,/* 0xf9 */ |
| 792 0x096f,/* 0xfa */ |
| 793 0xFFFF,/* 0xfb */ |
| 794 0xFFFF,/* 0xfc */ |
| 795 0xFFFF,/* 0xfd */ |
| 796 0xFFFF,/* 0xfe */ |
| 797 0xFFFF /* 0xff */ |
| 798 }; |
| 799 |
| 800 static const uint16_t vowelSignESpecialCases[][2]={ |
| 801 { 2 /*length of array*/ , 0 }, |
| 802 { 0xA4 , 0x0904 }, |
| 803 }; |
| 804 |
| 805 static const uint16_t nuktaSpecialCases[][2]={ |
| 806 { 16 /*length of array*/ , 0 }, |
| 807 { 0xA6 , 0x090c }, |
| 808 { 0xEA , 0x093D }, |
| 809 { 0xDF , 0x0944 }, |
| 810 { 0xA1 , 0x0950 }, |
| 811 { 0xb3 , 0x0958 }, |
| 812 { 0xb4 , 0x0959 }, |
| 813 { 0xb5 , 0x095a }, |
| 814 { 0xba , 0x095b }, |
| 815 { 0xbf , 0x095c }, |
| 816 { 0xC0 , 0x095d }, |
| 817 { 0xc9 , 0x095e }, |
| 818 { 0xAA , 0x0960 }, |
| 819 { 0xA7 , 0x0961 }, |
| 820 { 0xDB , 0x0962 }, |
| 821 { 0xDC , 0x0963 }, |
| 822 }; |
| 823 |
| 824 |
| 825 #define WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByte
Unit,err){ \ |
| 826 int32_t offset = (int32_t)(source - args->source-1);
\ |
| 827 /* write the targetUniChar to target */
\ |
| 828 if(target < targetLimit){
\ |
| 829 if(targetByteUnit <= 0xFF){
\ |
| 830 *(target)++ = (uint8_t)(targetByteUnit);
\ |
| 831 if(offsets){
\ |
| 832 *(offsets++) = offset;
\ |
| 833 }
\ |
| 834 }else{
\ |
| 835 if (targetByteUnit > 0xFFFF) {
\ |
| 836 *(target)++ = (uint8_t)(targetByteUnit>>16);
\ |
| 837 if (offsets) {
\ |
| 838 --offset;
\ |
| 839 *(offsets++) = offset;
\ |
| 840 }
\ |
| 841 }
\ |
| 842 if (!(target < targetLimit)) {
\ |
| 843 args->converter->charErrorBuffer[args->converter->charErrorBuffe
rLength++] = \ |
| 844 (uint8_t)(targetByteUnit >> 8);
\ |
| 845 args->converter->charErrorBuffer[args->converter->charErrorBuffe
rLength++] = \ |
| 846 (uint8_t)targetByteUnit;
\ |
| 847 *err = U_BUFFER_OVERFLOW_ERROR;
\ |
| 848 } else {
\ |
| 849 *(target)++ = (uint8_t)(targetByteUnit>>8);
\ |
| 850 if(offsets){
\ |
| 851 *(offsets++) = offset;
\ |
| 852 }
\ |
| 853 if(target < targetLimit){
\ |
| 854 *(target)++ = (uint8_t) targetByteUnit;
\ |
| 855 if(offsets){
\ |
| 856 *(offsets++) = offset ;
\ |
| 857 }
\ |
| 858 }else{
\ |
| 859 args->converter->charErrorBuffer[args->converter->charErrorB
ufferLength++] =\ |
| 860 (uint8_t) (targetByteUnit);
\ |
| 861 *err = U_BUFFER_OVERFLOW_ERROR;
\ |
| 862 }
\ |
| 863 }
\ |
| 864 }
\ |
| 865 }else{
\ |
| 866 if (targetByteUnit & 0xFF0000) {
\ |
| 867 args->converter->charErrorBuffer[args->converter->charErrorBufferLen
gth++] = \ |
| 868 (uint8_t) (targetByteUnit >>16);
\ |
| 869 }
\ |
| 870 if(targetByteUnit & 0xFF00){
\ |
| 871 args->converter->charErrorBuffer[args->converter->charErrorBufferLen
gth++] = \ |
| 872 (uint8_t) (targetByteUnit >>8);
\ |
| 873 }
\ |
| 874 args->converter->charErrorBuffer[args->converter->charErrorBufferLength+
+] = \ |
| 875 (uint8_t) (targetByteUnit);
\ |
| 876 *err = U_BUFFER_OVERFLOW_ERROR;
\ |
| 877 }
\ |
| 878 } |
| 879 |
| 880 /* Rules: |
| 881 * Explicit Halant : |
| 882 * <HALANT> + <ZWNJ> |
| 883 * Soft Halant : |
| 884 * <HALANT> + <ZWJ> |
| 885 */ |
| 886 |
| 887 static void UConverter_fromUnicode_ISCII_OFFSETS_LOGIC( |
| 888 UConverterFromUnicodeArgs * args, UErrorCode * err) { |
| 889 const UChar *source = args->source; |
| 890 const UChar *sourceLimit = args->sourceLimit; |
| 891 unsigned char *target = (unsigned char *) args->target; |
| 892 unsigned char *targetLimit = (unsigned char *) args->targetLimit; |
| 893 int32_t* offsets = args->offsets; |
| 894 uint32_t targetByteUnit = 0x0000; |
| 895 UChar32 sourceChar = 0x0000; |
| 896 UChar32 tempContextFromUnicode = 0x0000; /* For special handling of the G
urmukhi script. */ |
| 897 UConverterDataISCII *converterData; |
| 898 uint16_t newDelta=0; |
| 899 uint16_t range = 0; |
| 900 UBool deltaChanged = FALSE; |
| 901 |
| 902 if ((args->converter == NULL) || (args->targetLimit < args->target) || (args
->sourceLimit < args->source)) { |
| 903 *err = U_ILLEGAL_ARGUMENT_ERROR; |
| 904 return; |
| 905 } |
| 906 /* initialize data */ |
| 907 converterData=(UConverterDataISCII*)args->converter->extraInfo; |
| 908 newDelta=converterData->currentDeltaFromUnicode; |
| 909 range = (uint16_t)(newDelta/DELTA); |
| 910 |
| 911 if ((sourceChar = args->converter->fromUChar32)!=0) { |
| 912 goto getTrail; |
| 913 } |
| 914 |
| 915 /*writing the char to the output stream */ |
| 916 while (source < sourceLimit) { |
| 917 /* Write the language code following LF only if LF is not the last chara
cter. */ |
| 918 if (args->converter->fromUnicodeStatus == LF) { |
| 919 targetByteUnit = ATR<<8; |
| 920 targetByteUnit += (uint8_t) lookupInitialData[range].isciiLang; |
| 921 args->converter->fromUnicodeStatus = 0x0000; |
| 922 /* now append ATR and language code */ |
| 923 WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,target
ByteUnit,err); |
| 924 if (U_FAILURE(*err)) { |
| 925 break; |
| 926 } |
| 927 } |
| 928 |
| 929 sourceChar = *source++; |
| 930 tempContextFromUnicode = converterData->contextCharFromUnicode; |
| 931 |
| 932 targetByteUnit = missingCharMarker; |
| 933 |
| 934 /*check if input is in ASCII and C0 control codes range*/ |
| 935 if (sourceChar <= ASCII_END) { |
| 936 args->converter->fromUnicodeStatus = sourceChar; |
| 937 WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,source
Char,err); |
| 938 if (U_FAILURE(*err)) { |
| 939 break; |
| 940 } |
| 941 continue; |
| 942 } |
| 943 switch (sourceChar) { |
| 944 case ZWNJ: |
| 945 /* contextChar has HALANT */ |
| 946 if (converterData->contextCharFromUnicode) { |
| 947 converterData->contextCharFromUnicode = 0x00; |
| 948 targetByteUnit = ISCII_HALANT; |
| 949 } else { |
| 950 /* consume ZWNJ and continue */ |
| 951 converterData->contextCharFromUnicode = 0x00; |
| 952 continue; |
| 953 } |
| 954 break; |
| 955 case ZWJ: |
| 956 /* contextChar has HALANT */ |
| 957 if (converterData->contextCharFromUnicode) { |
| 958 targetByteUnit = ISCII_NUKTA; |
| 959 } else { |
| 960 targetByteUnit =ISCII_INV; |
| 961 } |
| 962 converterData->contextCharFromUnicode = 0x00; |
| 963 break; |
| 964 default: |
| 965 /* is the sourceChar in the INDIC_RANGE? */ |
| 966 if ((uint16_t)(INDIC_BLOCK_END-sourceChar) <= INDIC_RANGE) { |
| 967 /* Danda and Double Danda are valid in Northern scripts.. since
Unicode |
| 968 * does not include these codepoints in all Northern scrips we n
eed to |
| 969 * filter them out |
| 970 */ |
| 971 if (sourceChar!= DANDA && sourceChar != DOUBLE_DANDA) { |
| 972 /* find out to which block the souceChar belongs*/ |
| 973 range =(uint16_t)((sourceChar-INDIC_BLOCK_BEGIN)/DELTA); |
| 974 newDelta =(uint16_t)(range*DELTA); |
| 975 |
| 976 /* Now are we in the same block as the previous? */ |
| 977 if (newDelta!= converterData->currentDeltaFromUnicode || con
verterData->isFirstBuffer) { |
| 978 converterData->currentDeltaFromUnicode = newDelta; |
| 979 converterData->currentMaskFromUnicode = lookupInitialDat
a[range].maskEnum; |
| 980 deltaChanged =TRUE; |
| 981 converterData->isFirstBuffer=FALSE; |
| 982 } |
| 983 |
| 984 if (converterData->currentDeltaFromUnicode == PNJ_DELTA) { |
| 985 if (sourceChar == PNJ_TIPPI) { |
| 986 /* Make sure Tippi is converterd to Bindi. */ |
| 987 sourceChar = PNJ_BINDI; |
| 988 } else if (sourceChar == PNJ_ADHAK) { |
| 989 /* This is for consonant cluster handling. */ |
| 990 converterData->contextCharFromUnicode = PNJ_ADHAK; |
| 991 } |
| 992 |
| 993 } |
| 994 /* Normalize all Indic codepoints to Devanagari and map them
to ISCII */ |
| 995 /* now subtract the new delta from sourceChar*/ |
| 996 sourceChar -= converterData->currentDeltaFromUnicode; |
| 997 } |
| 998 |
| 999 /* get the target byte unit */ |
| 1000 targetByteUnit=fromUnicodeTable[(uint8_t)sourceChar]; |
| 1001 |
| 1002 /* is the code point valid in current script? */ |
| 1003 if ((validityTable[(uint8_t)sourceChar] & converterData->current
MaskFromUnicode)==0) { |
| 1004 /* Vocallic RR is assigned in ISCII Telugu and Unicode */ |
| 1005 if (converterData->currentDeltaFromUnicode!=(TELUGU_DELTA) |
| sourceChar!=VOCALLIC_RR) { |
| 1006 targetByteUnit=missingCharMarker; |
| 1007 } |
| 1008 } |
| 1009 |
| 1010 if (deltaChanged) { |
| 1011 /* we are in a script block which is different than |
| 1012 * previous sourceChar's script block write ATR and language
codes |
| 1013 */ |
| 1014 uint32_t temp=0; |
| 1015 temp =(uint16_t)(ATR<<8); |
| 1016 temp += (uint16_t)((uint8_t) lookupInitialData[range].isciiL
ang); |
| 1017 /* reset */ |
| 1018 deltaChanged=FALSE; |
| 1019 /* now append ATR and language code */ |
| 1020 WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimi
t,temp,err); |
| 1021 if (U_FAILURE(*err)) { |
| 1022 break; |
| 1023 } |
| 1024 } |
| 1025 |
| 1026 if (converterData->currentDeltaFromUnicode == PNJ_DELTA && (sour
ceChar + PNJ_DELTA) == PNJ_ADHAK) { |
| 1027 continue; |
| 1028 } |
| 1029 } |
| 1030 /* reset context char */ |
| 1031 converterData->contextCharFromUnicode = 0x00; |
| 1032 break; |
| 1033 } |
| 1034 if (converterData->currentDeltaFromUnicode == PNJ_DELTA && tempContextFr
omUnicode == PNJ_ADHAK && uset_contains(PNJ_CONSONANT_SET, (sourceChar + PNJ_DEL
TA))) { |
| 1035 /* If the previous codepoint is Adhak and the current codepoint is a
consonant, the targetByteUnit should be C + Halant + C. */ |
| 1036 /* reset context char */ |
| 1037 converterData->contextCharFromUnicode = 0x0000; |
| 1038 targetByteUnit = targetByteUnit << 16 | ISCII_HALANT << 8 | targetBy
teUnit; |
| 1039 /* write targetByteUnit to target */ |
| 1040 WRITE_TO_TARGET_FROM_U(args, offsets, source, target, targetLimit, t
argetByteUnit,err); |
| 1041 if (U_FAILURE(*err)) { |
| 1042 break; |
| 1043 } |
| 1044 } else if (targetByteUnit != missingCharMarker) { |
| 1045 if (targetByteUnit==ISCII_HALANT) { |
| 1046 converterData->contextCharFromUnicode = (UChar)targetByteUnit; |
| 1047 } |
| 1048 /* write targetByteUnit to target*/ |
| 1049 WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,target
ByteUnit,err); |
| 1050 if (U_FAILURE(*err)) { |
| 1051 break; |
| 1052 } |
| 1053 } else { |
| 1054 /* oops.. the code point is unassigned */ |
| 1055 /*check if the char is a First surrogate*/ |
| 1056 if (UTF_IS_SURROGATE(sourceChar)) { |
| 1057 if (UTF_IS_SURROGATE_FIRST(sourceChar)) { |
| 1058 getTrail: |
| 1059 /*look ahead to find the trail surrogate*/ |
| 1060 if (source < sourceLimit) { |
| 1061 /* test the following code unit */ |
| 1062 UChar trail= (*source); |
| 1063 if (UTF_IS_SECOND_SURROGATE(trail)) { |
| 1064 source++; |
| 1065 sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail); |
| 1066 *err =U_INVALID_CHAR_FOUND; |
| 1067 /* convert this surrogate code point */ |
| 1068 /* exit this condition tree */ |
| 1069 } else { |
| 1070 /* this is an unmatched lead code unit (1st surrogat
e) */ |
| 1071 /* callback(illegal) */ |
| 1072 *err=U_ILLEGAL_CHAR_FOUND; |
| 1073 } |
| 1074 } else { |
| 1075 /* no more input */ |
| 1076 *err = U_ZERO_ERROR; |
| 1077 } |
| 1078 } else { |
| 1079 /* this is an unmatched trail code unit (2nd surrogate) */ |
| 1080 /* callback(illegal) */ |
| 1081 *err=U_ILLEGAL_CHAR_FOUND; |
| 1082 } |
| 1083 } else { |
| 1084 /* callback(unassigned) for a BMP code point */ |
| 1085 *err = U_INVALID_CHAR_FOUND; |
| 1086 } |
| 1087 |
| 1088 args->converter->fromUChar32=sourceChar; |
| 1089 break; |
| 1090 } |
| 1091 }/* end while(mySourceIndex<mySourceLength) */ |
| 1092 |
| 1093 /*save the state and return */ |
| 1094 args->source = source; |
| 1095 args->target = (char*)target; |
| 1096 } |
| 1097 |
| 1098 static const uint16_t lookupTable[][2]={ |
| 1099 { ZERO, ZERO }, /*DEFALT*/ |
| 1100 { ZERO, ZERO }, /*ROMAN*/ |
| 1101 { DEVANAGARI, DEV_MASK }, |
| 1102 { BENGALI, BNG_MASK }, |
| 1103 { TAMIL, TML_MASK }, |
| 1104 { TELUGU, KND_MASK }, |
| 1105 { BENGALI, BNG_MASK }, |
| 1106 { ORIYA, ORI_MASK }, |
| 1107 { KANNADA, KND_MASK }, |
| 1108 { MALAYALAM, MLM_MASK }, |
| 1109 { GUJARATI, GJR_MASK }, |
| 1110 { GURMUKHI, PNJ_MASK } |
| 1111 }; |
| 1112 |
| 1113 #define WRITE_TO_TARGET_TO_U(args,source,target,offsets,offset,targetUniChar,del
ta, err){\ |
| 1114 /* add offset to current Indic Block */
\ |
| 1115 if(targetUniChar>ASCII_END &&
\ |
| 1116 targetUniChar != ZWJ &&
\ |
| 1117 targetUniChar != ZWNJ &&
\ |
| 1118 targetUniChar != DANDA &&
\ |
| 1119 targetUniChar != DOUBLE_DANDA){
\ |
| 1120
\ |
| 1121 targetUniChar+=(uint16_t)(delta);
\ |
| 1122 }
\ |
| 1123 /* now write the targetUniChar */
\ |
| 1124 if(target<args->targetLimit){
\ |
| 1125 *(target)++ = (UChar)targetUniChar;
\ |
| 1126 if(offsets){
\ |
| 1127 *(offsets)++ = (int32_t)(offset);
\ |
| 1128 }
\ |
| 1129 }else{
\ |
| 1130 args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLengt
h++] = \ |
| 1131 (UChar)targetUniChar;
\ |
| 1132 *err = U_BUFFER_OVERFLOW_ERROR;
\ |
| 1133 }
\ |
| 1134 } |
| 1135 |
| 1136 #define GET_MAPPING(sourceChar,targetUniChar,data){
\ |
| 1137 targetUniChar = toUnicodeTable[(sourceChar)] ;
\ |
| 1138 /* is the code point valid in current script? */
\ |
| 1139 if(sourceChar> ASCII_END &&
\ |
| 1140 (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode)
==0){ \ |
| 1141 /* Vocallic RR is assigne in ISCII Telugu and Unicode */
\ |
| 1142 if(data->currentDeltaToUnicode!=(TELUGU_DELTA) ||
\ |
| 1143 targetUniChar!=VOCALLIC_RR){
\ |
| 1144 targetUniChar=missingCharMarker;
\ |
| 1145 }
\ |
| 1146 }
\ |
| 1147 } |
| 1148 |
| 1149 /*********** |
| 1150 * Rules for ISCII to Unicode converter |
| 1151 * ISCII is stateful encoding. To convert ISCII bytes to Unicode, |
| 1152 * which has both precomposed and decomposed forms characters |
| 1153 * pre-context and post-context need to be considered. |
| 1154 * |
| 1155 * Post context |
| 1156 * i) ATR : Attribute code is used to declare the font and script switching. |
| 1157 * Currently we only switch scripts and font codes consumed without generat
ing an error |
| 1158 * ii) EXT : Extention code is used to declare switching to Sanskrit and for ob
scure, |
| 1159 * obsolete characters |
| 1160 * Pre context |
| 1161 * i) Halant: if preceeded by a halant then it is a explicit halant |
| 1162 * ii) Nukta : |
| 1163 * a) if preceeded by a halant then it is a soft halant |
| 1164 * b) if preceeded by specific consonants and the ligatures have pre-compo
sed |
| 1165 * characters in Unicode then convert to pre-composed characters |
| 1166 * iii) Danda: If Danda is preceeded by a Danda then convert to Double Danda |
| 1167 * |
| 1168 */ |
| 1169 |
| 1170 static void UConverter_toUnicode_ISCII_OFFSETS_LOGIC(UConverterToUnicodeArgs *ar
gs, UErrorCode* err) { |
| 1171 const char *source = ( char *) args->source; |
| 1172 UChar *target = args->target; |
| 1173 const char *sourceLimit = args->sourceLimit; |
| 1174 const UChar* targetLimit = args->targetLimit; |
| 1175 uint32_t targetUniChar = 0x0000; |
| 1176 uint8_t sourceChar = 0x0000; |
| 1177 UConverterDataISCII* data; |
| 1178 UChar32* toUnicodeStatus=NULL; |
| 1179 UChar32 tempTargetUniChar = 0x0000; |
| 1180 UChar* contextCharToUnicode= NULL; |
| 1181 UBool found; |
| 1182 int i; |
| 1183 int offset = 0; |
| 1184 |
| 1185 if ((args->converter == NULL) || (target < args->target) || (source < args->
source)) { |
| 1186 *err = U_ILLEGAL_ARGUMENT_ERROR; |
| 1187 return; |
| 1188 } |
| 1189 |
| 1190 data = (UConverterDataISCII*)(args->converter->extraInfo); |
| 1191 contextCharToUnicode = &data->contextCharToUnicode; /* contains previous ISC
II codepoint visited */ |
| 1192 toUnicodeStatus = (UChar32*)&args->converter->toUnicodeStatus;/* contains th
e mapping to Unicode of the above codepoint*/ |
| 1193 |
| 1194 while (U_SUCCESS(*err) && source<sourceLimit) { |
| 1195 |
| 1196 targetUniChar = missingCharMarker; |
| 1197 |
| 1198 if (target < targetLimit) { |
| 1199 sourceChar = (unsigned char)*(source)++; |
| 1200 |
| 1201 /* look at the post-context preform special processing */ |
| 1202 if (*contextCharToUnicode==ATR) { |
| 1203 |
| 1204 /* If we have ATR in *contextCharToUnicode then we need to chang
e our |
| 1205 * state to the Indic Script specified by sourceChar |
| 1206 */ |
| 1207 |
| 1208 /* check if the sourceChar is supported script range*/ |
| 1209 if ((uint8_t)(PNJ-sourceChar)<=PNJ-DEV) { |
| 1210 data->currentDeltaToUnicode = (uint16_t)(lookupTable[sourceC
har & 0x0F][0] * DELTA); |
| 1211 data->currentMaskToUnicode = (MaskEnum)lookupTable[sourceCha
r & 0x0F][1]; |
| 1212 } else if (sourceChar==DEF) { |
| 1213 /* switch back to default */ |
| 1214 data->currentDeltaToUnicode = data->defDeltaToUnicode; |
| 1215 data->currentMaskToUnicode = data->defMaskToUnicode; |
| 1216 } else { |
| 1217 if ((sourceChar >= 0x21 && sourceChar <= 0x3F)) { |
| 1218 /* these are display codes consume and continue */ |
| 1219 } else { |
| 1220 *err =U_ILLEGAL_CHAR_FOUND; |
| 1221 /* reset */ |
| 1222 *contextCharToUnicode=NO_CHAR_MARKER; |
| 1223 goto CALLBACK; |
| 1224 } |
| 1225 } |
| 1226 |
| 1227 /* reset */ |
| 1228 *contextCharToUnicode=NO_CHAR_MARKER; |
| 1229 |
| 1230 continue; |
| 1231 |
| 1232 } else if (*contextCharToUnicode==EXT) { |
| 1233 /* check if sourceChar is in 0xA1-0xEE range */ |
| 1234 if ((uint8_t) (EXT_RANGE_END - sourceChar) <= (EXT_RANGE_END - E
XT_RANGE_BEGIN)) { |
| 1235 /* We currently support only Anudatta and Devanagari abbrevi
ation sign */ |
| 1236 if (sourceChar==0xBF || sourceChar == 0xB8) { |
| 1237 targetUniChar = (sourceChar==0xBF) ? DEV_ABBR_SIGN : DEV
_ANUDATTA; |
| 1238 |
| 1239 /* find out if the mapping is valid in this state */ |
| 1240 if (validityTable[(uint8_t)targetUniChar] & data->curren
tMaskToUnicode) { |
| 1241 *contextCharToUnicode= NO_CHAR_MARKER; |
| 1242 |
| 1243 /* Write the previous toUnicodeStatus, this was dela
yed to handle consonant clustering for Gurmukhi script. */ |
| 1244 if (data->prevToUnicodeStatus) { |
| 1245 WRITE_TO_TARGET_TO_U(args,source,target,args->of
fsets,(source-args->source -1),data->prevToUnicodeStatus,0,err); |
| 1246 data->prevToUnicodeStatus = 0x0000; |
| 1247 } |
| 1248 /* write to target */ |
| 1249 WRITE_TO_TARGET_TO_U(args,source,target,args->offset
s,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err); |
| 1250 |
| 1251 continue; |
| 1252 } |
| 1253 } |
| 1254 /* byte unit is unassigned */ |
| 1255 targetUniChar = missingCharMarker; |
| 1256 *err= U_INVALID_CHAR_FOUND; |
| 1257 } else { |
| 1258 /* only 0xA1 - 0xEE are legal after EXT char */ |
| 1259 *contextCharToUnicode= NO_CHAR_MARKER; |
| 1260 *err = U_ILLEGAL_CHAR_FOUND; |
| 1261 } |
| 1262 goto CALLBACK; |
| 1263 } else if (*contextCharToUnicode==ISCII_INV) { |
| 1264 if (sourceChar==ISCII_HALANT) { |
| 1265 targetUniChar = 0x0020; /* replace with space accoding to In
dic FAQ */ |
| 1266 } else { |
| 1267 targetUniChar = ZWJ; |
| 1268 } |
| 1269 |
| 1270 /* Write the previous toUnicodeStatus, this was delayed to handl
e consonant clustering for Gurmukhi script. */ |
| 1271 if (data->prevToUnicodeStatus) { |
| 1272 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(sourc
e-args->source -1),data->prevToUnicodeStatus,0,err); |
| 1273 data->prevToUnicodeStatus = 0x0000; |
| 1274 } |
| 1275 /* write to target */ |
| 1276 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-ar
gs->source -2),targetUniChar,data->currentDeltaToUnicode,err); |
| 1277 /* reset */ |
| 1278 *contextCharToUnicode=NO_CHAR_MARKER; |
| 1279 } |
| 1280 |
| 1281 /* look at the pre-context and perform special processing */ |
| 1282 switch (sourceChar) { |
| 1283 case ISCII_INV: |
| 1284 case EXT: /*falls through*/ |
| 1285 case ATR: |
| 1286 *contextCharToUnicode = (UChar)sourceChar; |
| 1287 |
| 1288 if (*toUnicodeStatus != missingCharMarker) { |
| 1289 /* Write the previous toUnicodeStatus, this was delayed to h
andle consonant clustering for Gurmukhi script. */ |
| 1290 if (data->prevToUnicodeStatus) { |
| 1291 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(s
ource-args->source -1),data->prevToUnicodeStatus,0,err); |
| 1292 data->prevToUnicodeStatus = 0x0000; |
| 1293 } |
| 1294 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(sourc
e-args->source -2),*toUnicodeStatus,data->currentDeltaToUnicode,err); |
| 1295 *toUnicodeStatus = missingCharMarker; |
| 1296 } |
| 1297 continue; |
| 1298 case ISCII_DANDA: |
| 1299 /* handle double danda*/ |
| 1300 if (*contextCharToUnicode== ISCII_DANDA) { |
| 1301 targetUniChar = DOUBLE_DANDA; |
| 1302 /* clear the context */ |
| 1303 *contextCharToUnicode = NO_CHAR_MARKER; |
| 1304 *toUnicodeStatus = missingCharMarker; |
| 1305 } else { |
| 1306 GET_MAPPING(sourceChar,targetUniChar,data); |
| 1307 *contextCharToUnicode = sourceChar; |
| 1308 } |
| 1309 break; |
| 1310 case ISCII_HALANT: |
| 1311 /* handle explicit halant */ |
| 1312 if (*contextCharToUnicode == ISCII_HALANT) { |
| 1313 targetUniChar = ZWNJ; |
| 1314 /* clear the context */ |
| 1315 *contextCharToUnicode = NO_CHAR_MARKER; |
| 1316 } else { |
| 1317 GET_MAPPING(sourceChar,targetUniChar,data); |
| 1318 *contextCharToUnicode = sourceChar; |
| 1319 } |
| 1320 break; |
| 1321 case 0x0A: |
| 1322 /* fall through */ |
| 1323 case 0x0D: |
| 1324 data->resetToDefaultToUnicode = TRUE; |
| 1325 GET_MAPPING(sourceChar,targetUniChar,data) |
| 1326 ; |
| 1327 *contextCharToUnicode = sourceChar; |
| 1328 break; |
| 1329 |
| 1330 case ISCII_VOWEL_SIGN_E: |
| 1331 i=1; |
| 1332 found=FALSE; |
| 1333 for (; i<vowelSignESpecialCases[0][0]; i++) { |
| 1334 if (vowelSignESpecialCases[i][0]==(uint8_t)*contextCharToUni
code) { |
| 1335 targetUniChar=vowelSignESpecialCases[i][1]; |
| 1336 found=TRUE; |
| 1337 break; |
| 1338 } |
| 1339 } |
| 1340 if (found) { |
| 1341 /* find out if the mapping is valid in this state */ |
| 1342 if (validityTable[(uint8_t)targetUniChar] & data->currentMas
kToUnicode) { |
| 1343 /*targetUniChar += data->currentDeltaToUnicode ;*/ |
| 1344 *contextCharToUnicode= NO_CHAR_MARKER; |
| 1345 *toUnicodeStatus = missingCharMarker; |
| 1346 break; |
| 1347 } |
| 1348 } |
| 1349 GET_MAPPING(sourceChar,targetUniChar,data); |
| 1350 *contextCharToUnicode = sourceChar; |
| 1351 break; |
| 1352 |
| 1353 case ISCII_NUKTA: |
| 1354 /* handle soft halant */ |
| 1355 if (*contextCharToUnicode == ISCII_HALANT) { |
| 1356 targetUniChar = ZWJ; |
| 1357 /* clear the context */ |
| 1358 *contextCharToUnicode = NO_CHAR_MARKER; |
| 1359 break; |
| 1360 } else if (data->currentDeltaToUnicode == PNJ_DELTA && data->con
textCharToUnicode == 0xc0) { |
| 1361 /* Write the previous toUnicodeStatus, this was delayed to h
andle consonant clustering for Gurmukhi script. */ |
| 1362 if (data->prevToUnicodeStatus) { |
| 1363 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(s
ource-args->source -1),data->prevToUnicodeStatus,0,err); |
| 1364 data->prevToUnicodeStatus = 0x0000; |
| 1365 } |
| 1366 /* We got here because ISCII_NUKTA was preceded by 0xc0 and
we are converting Gurmukhi. |
| 1367 * In that case we must convert (0xc0 0xe9) to (\u0a5c\u0a4d
\u0a39). |
| 1368 */ |
| 1369 targetUniChar = PNJ_RRA; |
| 1370 WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (s
ource-args->source)-2, targetUniChar, 0, err); |
| 1371 if (U_SUCCESS(*err)) { |
| 1372 targetUniChar = PNJ_SIGN_VIRAMA; |
| 1373 WRITE_TO_TARGET_TO_U(args, source, target, args->offsets
, (source-args->source)-2, targetUniChar, 0, err); |
| 1374 if (U_SUCCESS(*err)) { |
| 1375 targetUniChar = PNJ_HA; |
| 1376 WRITE_TO_TARGET_TO_U(args, source, target, args->off
sets, (source-args->source)-2, targetUniChar, 0, err); |
| 1377 } else { |
| 1378 args->converter->UCharErrorBuffer[args->converter->U
CharErrorBufferLength++]= PNJ_HA; |
| 1379 } |
| 1380 } else { |
| 1381 args->converter->UCharErrorBuffer[args->converter->UChar
ErrorBufferLength++]= PNJ_SIGN_VIRAMA; |
| 1382 args->converter->UCharErrorBuffer[args->converter->UChar
ErrorBufferLength++]= PNJ_HA; |
| 1383 } |
| 1384 *toUnicodeStatus = missingCharMarker; |
| 1385 data->contextCharToUnicode = NO_CHAR_MARKER; |
| 1386 continue; |
| 1387 } else { |
| 1388 /* try to handle <CHAR> + ISCII_NUKTA special mappings */ |
| 1389 i=1; |
| 1390 found =FALSE; |
| 1391 for (; i<nuktaSpecialCases[0][0]; i++) { |
| 1392 if (nuktaSpecialCases[i][0]==(uint8_t) |
| 1393 *contextCharToUnicode) { |
| 1394 targetUniChar=nuktaSpecialCases[i][1]; |
| 1395 found =TRUE; |
| 1396 break; |
| 1397 } |
| 1398 } |
| 1399 if (found) { |
| 1400 /* find out if the mapping is valid in this state */ |
| 1401 if (validityTable[(uint8_t)targetUniChar] & data->curren
tMaskToUnicode) { |
| 1402 /*targetUniChar += data->currentDeltaToUnicode ;*/ |
| 1403 *contextCharToUnicode= NO_CHAR_MARKER; |
| 1404 *toUnicodeStatus = missingCharMarker; |
| 1405 if (data->currentDeltaToUnicode == PNJ_DELTA) { |
| 1406 /* Write the previous toUnicodeStatus, this was
delayed to handle consonant clustering for Gurmukhi script. */ |
| 1407 if (data->prevToUnicodeStatus) { |
| 1408 WRITE_TO_TARGET_TO_U(args,source,target,args
->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err); |
| 1409 data->prevToUnicodeStatus = 0x0000; |
| 1410 } |
| 1411 WRITE_TO_TARGET_TO_U(args,source,target,args->of
fsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err); |
| 1412 continue; |
| 1413 } |
| 1414 break; |
| 1415 } |
| 1416 /* else fall through to default */ |
| 1417 } |
| 1418 /* else fall through to default */ |
| 1419 } |
| 1420 default:GET_MAPPING(sourceChar,targetUniChar,data) |
| 1421 ; |
| 1422 *contextCharToUnicode = sourceChar; |
| 1423 break; |
| 1424 } |
| 1425 |
| 1426 if (*toUnicodeStatus != missingCharMarker) { |
| 1427 /* Check to make sure that consonant clusters are handled correc
t for Gurmukhi script. */ |
| 1428 if (data->currentDeltaToUnicode == PNJ_DELTA && data->prevToUnic
odeStatus != 0 && uset_contains(PNJ_CONSONANT_SET, data->prevToUnicodeStatus) && |
| 1429 (*toUnicodeStatus + PNJ_DELTA) == PNJ_SIGN_VIRAMA && (ta
rgetUniChar + PNJ_DELTA) == data->prevToUnicodeStatus) { |
| 1430 /* Consonant clusters C + HALANT + C should be encoded as AD
HAK + C */ |
| 1431 offset = (int)(source-args->source - 3); |
| 1432 tempTargetUniChar = PNJ_ADHAK; /* This is necessary to avoid
some compiler warnings. */ |
| 1433 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,offset
,tempTargetUniChar,0,err); |
| 1434 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,offset
,data->prevToUnicodeStatus,0,err); |
| 1435 data->prevToUnicodeStatus = 0x0000; /* reset the previous un
icode code point */ |
| 1436 *toUnicodeStatus = missingCharMarker; |
| 1437 continue; |
| 1438 } else { |
| 1439 /* Write the previous toUnicodeStatus, this was delayed to h
andle consonant clustering for Gurmukhi script. */ |
| 1440 if (data->prevToUnicodeStatus) { |
| 1441 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(s
ource-args->source -1),data->prevToUnicodeStatus,0,err); |
| 1442 data->prevToUnicodeStatus = 0x0000; |
| 1443 } |
| 1444 /* Check to make sure that Bindi and Tippi are handled corre
ctly for Gurmukhi script. |
| 1445 * If 0xA2 is preceded by a codepoint in the PNJ_BINDI_TIPPI
_SET then the target codepoint should be Tippi instead of Bindi. |
| 1446 */ |
| 1447 if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniCh
ar + PNJ_DELTA) == PNJ_BINDI && uset_contains(PNJ_BINDI_TIPPI_SET, (*toUnicodeSt
atus + PNJ_DELTA))) { |
| 1448 targetUniChar = PNJ_TIPPI - PNJ_DELTA; |
| 1449 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(s
ource-args->source -2),*toUnicodeStatus,PNJ_DELTA,err); |
| 1450 } else if (data->currentDeltaToUnicode == PNJ_DELTA && (targ
etUniChar + PNJ_DELTA) == PNJ_SIGN_VIRAMA && uset_contains(PNJ_CONSONANT_SET, (*
toUnicodeStatus + PNJ_DELTA))) { |
| 1451 /* Store the current toUnicodeStatus code point for late
r handling of consonant cluster in Gurmukhi. */ |
| 1452 data->prevToUnicodeStatus = *toUnicodeStatus + PNJ_DELTA
; |
| 1453 } else { |
| 1454 /* write the previously mapped codepoint */ |
| 1455 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(s
ource-args->source -2),*toUnicodeStatus,data->currentDeltaToUnicode,err); |
| 1456 } |
| 1457 } |
| 1458 *toUnicodeStatus = missingCharMarker; |
| 1459 } |
| 1460 |
| 1461 if (targetUniChar != missingCharMarker) { |
| 1462 /* now save the targetUniChar for delayed write */ |
| 1463 *toUnicodeStatus = (UChar) targetUniChar; |
| 1464 if (data->resetToDefaultToUnicode==TRUE) { |
| 1465 data->currentDeltaToUnicode = data->defDeltaToUnicode; |
| 1466 data->currentMaskToUnicode = data->defMaskToUnicode; |
| 1467 data->resetToDefaultToUnicode=FALSE; |
| 1468 } |
| 1469 } else { |
| 1470 |
| 1471 /* we reach here only if targetUniChar == missingCharMarker |
| 1472 * so assign codes to reason and err |
| 1473 */ |
| 1474 *err = U_INVALID_CHAR_FOUND; |
| 1475 CALLBACK: |
| 1476 args->converter->toUBytes[0] = (uint8_t) sourceChar; |
| 1477 args->converter->toULength = 1; |
| 1478 break; |
| 1479 } |
| 1480 |
| 1481 } else { |
| 1482 *err =U_BUFFER_OVERFLOW_ERROR; |
| 1483 break; |
| 1484 } |
| 1485 } |
| 1486 |
| 1487 if (U_SUCCESS(*err) && args->flush && source == sourceLimit) { |
| 1488 /* end of the input stream */ |
| 1489 UConverter *cnv = args->converter; |
| 1490 |
| 1491 if (*contextCharToUnicode==ATR || *contextCharToUnicode==EXT || *context
CharToUnicode==ISCII_INV) { |
| 1492 /* set toUBytes[] */ |
| 1493 cnv->toUBytes[0] = (uint8_t)*contextCharToUnicode; |
| 1494 cnv->toULength = 1; |
| 1495 |
| 1496 /* avoid looping on truncated sequences */ |
| 1497 *contextCharToUnicode = NO_CHAR_MARKER; |
| 1498 } else { |
| 1499 cnv->toULength = 0; |
| 1500 } |
| 1501 |
| 1502 if (*toUnicodeStatus != missingCharMarker) { |
| 1503 /* output a remaining target character */ |
| 1504 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source - args
->source -1),*toUnicodeStatus,data->currentDeltaToUnicode,err); |
| 1505 *toUnicodeStatus = missingCharMarker; |
| 1506 } |
| 1507 } |
| 1508 |
| 1509 args->target = target; |
| 1510 args->source = source; |
| 1511 } |
| 1512 |
| 1513 /* structure for SafeClone calculations */ |
| 1514 struct cloneISCIIStruct { |
| 1515 UConverter cnv; |
| 1516 UConverterDataISCII mydata; |
| 1517 }; |
| 1518 |
| 1519 static UConverter * |
| 1520 _ISCII_SafeClone(const UConverter *cnv, |
| 1521 void *stackBuffer, |
| 1522 int32_t *pBufferSize, |
| 1523 UErrorCode *status) |
| 1524 { |
| 1525 struct cloneISCIIStruct * localClone; |
| 1526 int32_t bufferSizeNeeded = sizeof(struct cloneISCIIStruct); |
| 1527 |
| 1528 if (U_FAILURE(*status)) { |
| 1529 return 0; |
| 1530 } |
| 1531 |
| 1532 if (*pBufferSize == 0) { /* 'preflighting' request - set needed size into *p
BufferSize */ |
| 1533 *pBufferSize = bufferSizeNeeded; |
| 1534 return 0; |
| 1535 } |
| 1536 |
| 1537 localClone = (struct cloneISCIIStruct *)stackBuffer; |
| 1538 /* ucnv.c/ucnv_safeClone() copied the main UConverter already */ |
| 1539 |
| 1540 uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataISCII)
); |
| 1541 localClone->cnv.extraInfo = &localClone->mydata; |
| 1542 localClone->cnv.isExtraLocal = TRUE; |
| 1543 |
| 1544 return &localClone->cnv; |
| 1545 } |
| 1546 |
| 1547 static void |
| 1548 _ISCIIGetUnicodeSet(const UConverter *cnv, |
| 1549 const USetAdder *sa, |
| 1550 UConverterUnicodeSet which, |
| 1551 UErrorCode *pErrorCode) |
| 1552 { |
| 1553 int32_t idx, script; |
| 1554 uint8_t mask; |
| 1555 |
| 1556 /* Since all ISCII versions allow switching to other ISCII |
| 1557 scripts, we add all roundtrippable characters to this set. */ |
| 1558 sa->addRange(sa->set, 0, ASCII_END); |
| 1559 for (script = DEVANAGARI; script <= MALAYALAM; script++) { |
| 1560 mask = (uint8_t)(lookupInitialData[script].maskEnum); |
| 1561 for (idx = 0; idx < DELTA; idx++) { |
| 1562 /* added check for TELUGU character */ |
| 1563 if ((validityTable[idx] & mask) || (script==TELUGU && idx==0x31)) { |
| 1564 sa->add(sa->set, idx + (script * DELTA) + INDIC_BLOCK_BEGIN); |
| 1565 } |
| 1566 } |
| 1567 } |
| 1568 sa->add(sa->set, DANDA); |
| 1569 sa->add(sa->set, DOUBLE_DANDA); |
| 1570 sa->add(sa->set, ZWNJ); |
| 1571 sa->add(sa->set, ZWJ); |
| 1572 } |
| 1573 |
| 1574 static const UConverterImpl _ISCIIImpl={ |
| 1575 |
| 1576 UCNV_ISCII, |
| 1577 |
| 1578 NULL, |
| 1579 NULL, |
| 1580 |
| 1581 _ISCIIOpen, |
| 1582 _ISCIIClose, |
| 1583 _ISCIIReset, |
| 1584 |
| 1585 UConverter_toUnicode_ISCII_OFFSETS_LOGIC, |
| 1586 UConverter_toUnicode_ISCII_OFFSETS_LOGIC, |
| 1587 UConverter_fromUnicode_ISCII_OFFSETS_LOGIC, |
| 1588 UConverter_fromUnicode_ISCII_OFFSETS_LOGIC, |
| 1589 NULL, |
| 1590 |
| 1591 NULL, |
| 1592 _ISCIIgetName, |
| 1593 NULL, |
| 1594 _ISCII_SafeClone, |
| 1595 _ISCIIGetUnicodeSet |
| 1596 }; |
| 1597 |
| 1598 static const UConverterStaticData _ISCIIStaticData={ |
| 1599 sizeof(UConverterStaticData), |
| 1600 "ISCII", |
| 1601 0, |
| 1602 UCNV_IBM, |
| 1603 UCNV_ISCII, |
| 1604 1, |
| 1605 4, |
| 1606 { 0x1a, 0, 0, 0 }, |
| 1607 0x1, |
| 1608 FALSE, |
| 1609 FALSE, |
| 1610 0x0, |
| 1611 0x0, |
| 1612 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, /* reserved */ |
| 1613 |
| 1614 }; |
| 1615 |
| 1616 const UConverterSharedData _ISCIIData={ |
| 1617 sizeof(UConverterSharedData), |
| 1618 ~((uint32_t) 0), |
| 1619 NULL, |
| 1620 NULL, |
| 1621 &_ISCIIStaticData, |
| 1622 FALSE, |
| 1623 &_ISCIIImpl, |
| 1624 0 |
| 1625 }; |
| 1626 |
| 1627 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ |
OLD | NEW |