icu46/source/common/ucnvisci.c - Issue 5516007: Check in the pristine copy of ICU 4.6...

Side by Side Diff: icu46/source/common/ucnvisci.c

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 /*

	2 **********************************************************************

	3 * Copyright (C) 2000-2009, International Business Machines

	4 * Corporation and others. All Rights Reserved.

	5 **********************************************************************

	6 * file name: ucnvisci.c

	7 * encoding: US-ASCII

	8 * tab size: 8 (not used)

	9 * indentation:4

	10 *

	11 * created on: 2001JUN26

	12 * created by: Ram Viswanadha

	13 *

	14 * Date Name Description

	15 * 24/7/2001 Ram Added support for EXT character handling

	16 */

	17

	18 #include "unicode/utypes.h"

	19

	20 #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION

	21

	22 #include "cmemory.h"

	23 #include "ucnv_bld.h"

	24 #include "unicode/ucnv.h"

	25 #include "ucnv_cnv.h"

	26 #include "unicode/ucnv_cb.h"

	27 #include "unicode/uset.h"

	28 #include "cstring.h"

	29

	30 #define UCNV_OPTIONS_VERSION_MASK 0xf

	31 #define NUKTA 0x093c

	32 #define HALANT 0x094d

	33 #define ZWNJ 0x200c /* Zero Width Non Joiner */

	34 #define ZWJ 0x200d /* Zero width Joiner */

	35 #define INVALID_CHAR 0xffff

	36 #define ATR 0xEF /* Attribute code */

	37 #define EXT 0xF0 /* Extension code */

	38 #define DANDA 0x0964

	39 #define DOUBLE_DANDA 0x0965

	40 #define ISCII_NUKTA 0xE9

	41 #define ISCII_HALANT 0xE8

	42 #define ISCII_DANDA 0xEA

	43 #define ISCII_INV 0xD9

	44 #define ISCII_VOWEL_SIGN_E 0xE0

	45 #define INDIC_BLOCK_BEGIN 0x0900

	46 #define INDIC_BLOCK_END 0x0D7F

	47 #define INDIC_RANGE (INDIC_BLOCK_END - INDIC_BLOCK_BEGIN)

	48 #define VOCALLIC_RR 0x0931

	49 #define LF 0x0A

	50 #define ASCII_END 0xA0

	51 #define NO_CHAR_MARKER 0xFFFE

	52 #define TELUGU_DELTA DELTA * TELUGU

	53 #define DEV_ABBR_SIGN 0x0970

	54 #define DEV_ANUDATTA 0x0952

	55 #define EXT_RANGE_BEGIN 0xA1

	56 #define EXT_RANGE_END 0xEE

	57

	58 #define PNJ_DELTA 0x0100

	59 #define PNJ_BINDI 0x0A02

	60 #define PNJ_TIPPI 0x0A70

	61 #define PNJ_SIGN_VIRAMA 0x0A4D

	62 #define PNJ_ADHAK 0x0A71

	63 #define PNJ_HA 0x0A39

	64 #define PNJ_RRA 0x0A5C

	65

	66 static USet* PNJ_BINDI_TIPPI_SET= NULL;

	67 static USet* PNJ_CONSONANT_SET= NULL;

	68

	69 typedef enum {

	70 DEVANAGARI =0,

	71 BENGALI,

	72 GURMUKHI,

	73 GUJARATI,

	74 ORIYA,

	75 TAMIL,

	76 TELUGU,

	77 KANNADA,

	78 MALAYALAM,

	79 DELTA=0x80

	80 }UniLang;

	81

	82 /**

	83 * Enumeration for switching code pages if <ATR>+<one of below values>

	84 * is encountered

	85 */

	86 typedef enum {

	87 DEF = 0x40,

	88 RMN = 0x41,

	89 DEV = 0x42,

	90 BNG = 0x43,

	91 TML = 0x44,

	92 TLG = 0x45,

	93 ASM = 0x46,

	94 ORI = 0x47,

	95 KND = 0x48,

	96 MLM = 0x49,

	97 GJR = 0x4A,

	98 PNJ = 0x4B,

	99 ARB = 0x71,

	100 PES = 0x72,

	101 URD = 0x73,

	102 SND = 0x74,

	103 KSM = 0x75,

	104 PST = 0x76

	105 }ISCIILang;

	106

	107 typedef enum {

	108 DEV_MASK =0x80,

	109 PNJ_MASK =0x40,

	110 GJR_MASK =0x20,

	111 ORI_MASK =0x10,

	112 BNG_MASK =0x08,

	113 KND_MASK =0x04,

	114 MLM_MASK =0x02,

	115 TML_MASK =0x01,

	116 ZERO =0x00

	117 }MaskEnum;

	118

	119 #define ISCII_CNV_PREFIX "ISCII,version="

	120

	121 typedef struct {

	122 UChar contextCharToUnicode; /* previous Unicode codepoint for contex tual analysis */

	123 UChar contextCharFromUnicode; /* previous Unicode codepoint for contex tual analysis */

	124 uint16_t defDeltaToUnicode; /* delta for switching to default state when DEF is encountered */

	125 uint16_t currentDeltaFromUnicode; /* current delta in Indic block */

	126 uint16_t currentDeltaToUnicode; /* current delta in Indic block */

	127 MaskEnum currentMaskFromUnicode; /* mask for current state in toUnicode * /

	128 MaskEnum currentMaskToUnicode; /* mask for current state in toUnicode * /

	129 MaskEnum defMaskToUnicode; /* mask for default state in toUnicode * /

	130 UBool isFirstBuffer; /* boolean for fromUnicode to see if we need to announce the first script */

	131 UBool resetToDefaultToUnicode; /* boolean for reseting to default delta and mask when a newline is encountered*/

	132 char name[sizeof(ISCII_CNV_PREFIX) + 1];

	133 UChar32 prevToUnicodeStatus; /* Hold the previous toUnicodeStatus. Th is is necessary because we may need to know the last two code points. */

	134 } UConverterDataISCII;

	135

	136 typedef struct LookupDataStruct {

	137 UniLang uniLang;

	138 MaskEnum maskEnum;

	139 ISCIILang isciiLang;

	140 } LookupDataStruct;

	141

	142 static const LookupDataStruct lookupInitialData[]={

	143 { DEVANAGARI, DEV_MASK, DEV },

	144 { BENGALI, BNG_MASK, BNG },

	145 { GURMUKHI, PNJ_MASK, PNJ },

	146 { GUJARATI, GJR_MASK, GJR },

	147 { ORIYA, ORI_MASK, ORI },

	148 { TAMIL, TML_MASK, TML },

	149 { TELUGU, KND_MASK, TLG },

	150 { KANNADA, KND_MASK, KND },

	151 { MALAYALAM, MLM_MASK, MLM }

	152 };

	153

	154 static void initializeSets() {

	155 /* TODO: Replace the following two lines with PNJ_CONSONANT_SET = uset_openE mpty(); */

	156 PNJ_CONSONANT_SET = uset_open(0,0);

	157 uset_clear(PNJ_CONSONANT_SET);

	158

	159 uset_addRange(PNJ_CONSONANT_SET, 0x0A15, 0x0A28);

	160 uset_addRange(PNJ_CONSONANT_SET, 0x0A2A, 0x0A30);

	161 uset_addRange(PNJ_CONSONANT_SET, 0x0A35, 0x0A36);

	162 uset_addRange(PNJ_CONSONANT_SET, 0x0A38, 0x0A39);

	163

	164 PNJ_BINDI_TIPPI_SET = uset_clone(PNJ_CONSONANT_SET);

	165 uset_add(PNJ_BINDI_TIPPI_SET, 0x0A05);

	166 uset_add(PNJ_BINDI_TIPPI_SET, 0x0A07);

	167 uset_add(PNJ_BINDI_TIPPI_SET, 0x0A3F);

	168 uset_addRange(PNJ_BINDI_TIPPI_SET, 0x0A41, 0x0A42);

	169

	170 uset_compact(PNJ_CONSONANT_SET);

	171 uset_compact(PNJ_BINDI_TIPPI_SET);

	172 }

	173

	174 static void _ISCIIOpen(UConverter cnv, UConverterLoadArgs pArgs, UErrorCode *e rrorCode) {

	175 if(pArgs->onlyTestIsLoadable) {

	176 return;

	177 }

	178

	179 /* Ensure that the sets used in special handling of certain Gurmukhi charact ers are initialized. */

	180 initializeSets();

	181

	182 cnv->extraInfo = uprv_malloc(sizeof(UConverterDataISCII));

	183

	184 if (cnv->extraInfo != NULL) {

	185 int32_t len=0;

	186 UConverterDataISCII *converterData=

	187 (UConverterDataISCII *) cnv->extraInfo;

	188 converterData->contextCharToUnicode=NO_CHAR_MARKER;

	189 cnv->toUnicodeStatus = missingCharMarker;

	190 converterData->contextCharFromUnicode=0x0000;

	191 converterData->resetToDefaultToUnicode=FALSE;

	192 /* check if the version requested is supported */

	193 if ((pArgs->options & UCNV_OPTIONS_VERSION_MASK) < 9) {

	194 /* initialize state variables */

	195 converterData->currentDeltaFromUnicode

	196 = converterData->currentDeltaToUnicode

	197 = converterData->defDeltaToUnicode = (uint16_t)(look upInitialData[pArgs->options & UCNV_OPTIONS_VERSION_MASK].uniLang * DELTA);

	198

	199 converterData->currentMaskFromUnicode

	200 = converterData->currentMaskToUnicode

	201 = converterData->defMaskToUnicode = lookupInitialDat a[pArgs->options & UCNV_OPTIONS_VERSION_MASK].maskEnum;

	202

	203 converterData->isFirstBuffer=TRUE;

	204 (void)uprv_strcpy(converterData->name, ISCII_CNV_PREFIX);

	205 len = (int32_t)uprv_strlen(converterData->name);

	206 converterData->name[len]= (char)((pArgs->options & UCNV_OPTIONS_VERS ION_MASK) + '0');

	207 converterData->name[len+1]=0;

	208

	209 converterData->prevToUnicodeStatus = 0x0000;

	210 } else {

	211 uprv_free(cnv->extraInfo);

	212 cnv->extraInfo = NULL;

	213 *errorCode = U_ILLEGAL_ARGUMENT_ERROR;

	214 }

	215

	216 } else {

	217 *errorCode =U_MEMORY_ALLOCATION_ERROR;

	218 }

	219 }

	220

	221 static void _ISCIIClose(UConverter *cnv) {

	222 if (cnv->extraInfo!=NULL) {

	223 if (!cnv->isExtraLocal) {

	224 uprv_free(cnv->extraInfo);

	225 }

	226 cnv->extraInfo=NULL;

	227 }

	228 if (PNJ_CONSONANT_SET != NULL) {

	229 uset_close(PNJ_CONSONANT_SET);

	230 PNJ_CONSONANT_SET = NULL;

	231 }

	232 if (PNJ_BINDI_TIPPI_SET != NULL) {

	233 uset_close(PNJ_BINDI_TIPPI_SET);

	234 PNJ_BINDI_TIPPI_SET = NULL;

	235 }

	236 }

	237

	238 static const char* _ISCIIgetName(const UConverter* cnv) {

	239 if (cnv->extraInfo) {

	240 UConverterDataISCII* myData= (UConverterDataISCII*)cnv->extraInfo;

	241 return myData->name;

	242 }

	243 return NULL;

	244 }

	245

	246 static void _ISCIIReset(UConverter *cnv, UConverterResetChoice choice) {

	247 UConverterDataISCII* data =(UConverterDataISCII *) (cnv->extraInfo);

	248 if (choice<=UCNV_RESET_TO_UNICODE) {

	249 cnv->toUnicodeStatus = missingCharMarker;

	250 cnv->mode=0;

	251 data->currentDeltaToUnicode=data->defDeltaToUnicode;

	252 data->currentMaskToUnicode = data->defMaskToUnicode;

	253 data->contextCharToUnicode=NO_CHAR_MARKER;

	254 data->prevToUnicodeStatus = 0x0000;

	255 }

	256 if (choice!=UCNV_RESET_TO_UNICODE) {

	257 cnv->fromUChar32=0x0000;

	258 data->contextCharFromUnicode=0x00;

	259 data->currentMaskFromUnicode=data->defMaskToUnicode;

	260 data->currentDeltaFromUnicode=data->defDeltaToUnicode;

	261 data->isFirstBuffer=TRUE;

	262 data->resetToDefaultToUnicode=FALSE;

	263 }

	264 }

	265

	266 /**

	267 * The values in validity table are indexed by the lower bits of Unicode

	268 * range 0x0900 - 0x09ff. The values have a structure like:

	269 * ---------------------------------------------------------------

	270 * \| DEV \| PNJ \| GJR \| ORI \| BNG \| TLG \| MLM \| TML \|

	271 * \| \| \| \| \| ASM \| KND \| \| \|

	272 * ---------------------------------------------------------------

	273 * If a code point is valid in a particular script

	274 * then that bit is turned on

	275 *

	276 * Unicode does not distinguish between Bengali and Assamese so we use 1 bit for

	277 * to represent these languages

	278 *

	279 * Telugu and Kannada have same codepoints except for Vocallic_RR which we speci al case

	280 * and combine and use 1 bit to represent these languages.

	281 *

	282 * TODO: It is probably easier to understand and maintain to change this

	283 * to use uint16_t and give each of the 9 Unicode/script blocks its own bit.

	284 */

	285

	286 static const uint8_t validityTable[128] = {

	287 /* This state table is tool generated please do not edit unless you know exactly what you are doing */

	288 /* Note: This table was edited to mirror the Windows XP implementation */

	289 /ISCII:Valid:Unicode /

	290 /0xa0 : 0x00: 0x900 / ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

	291 /0xa1 : 0xb8: 0x901 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO ,

	292 /0xa2 : 0xfe: 0x902 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

	293 /0xa3 : 0xbf: 0x903 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

	294 /0x00 : 0x00: 0x904 / DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

	295 /0xa4 : 0xff: 0x905 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

	296 /0xa5 : 0xff: 0x906 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

	297 /0xa6 : 0xff: 0x907 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

	298 /0xa7 : 0xff: 0x908 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

	299 /0xa8 : 0xff: 0x909 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

	300 /0xa9 : 0xff: 0x90a / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

	301 /0xaa : 0xfe: 0x90b / DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

	302 /0x00 : 0x00: 0x90c / DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

	303 /0xae : 0x80: 0x90d / DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO ,

	304 /0xab : 0x87: 0x90e / DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK ,

	305 /0xac : 0xff: 0x90f / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

	306 /0xad : 0xff: 0x910 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

	307 /0xb2 : 0x80: 0x911 / DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO ,

	308 /0xaf : 0x87: 0x912 / DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK ,

	309 /0xb0 : 0xff: 0x913 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

	310 /0xb1 : 0xff: 0x914 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

	311 /0xb3 : 0xff: 0x915 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

	312 /0xb4 : 0xfe: 0x916 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

	313 /0xb5 : 0xfe: 0x917 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

	314 /0xb6 : 0xfe: 0x918 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

	315 /0xb7 : 0xff: 0x919 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

	316 /0xb8 : 0xff: 0x91a / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

	317 /0xb9 : 0xfe: 0x91b / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

	318 /0xba : 0xff: 0x91c / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

	319 /0xbb : 0xfe: 0x91d / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

	320 /0xbc : 0xff: 0x91e / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

	321 /0xbd : 0xff: 0x91f / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

	322 /0xbe : 0xfe: 0x920 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

	323 /0xbf : 0xfe: 0x921 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

	324 /0xc0 : 0xfe: 0x922 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

	325 /0xc1 : 0xff: 0x923 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

	326 /0xc2 : 0xff: 0x924 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

	327 /0xc3 : 0xfe: 0x925 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

	328 /0xc4 : 0xfe: 0x926 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

	329 /0xc5 : 0xfe: 0x927 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

	330 /0xc6 : 0xff: 0x928 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

	331 /0xc7 : 0x81: 0x929 / DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + TML_MASK ,

	332 /0xc8 : 0xff: 0x92a / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

	333 /0xc9 : 0xfe: 0x92b / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

	334 /0xca : 0xfe: 0x92c / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

	335 /0xcb : 0xfe: 0x92d / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

	336 /0xcc : 0xfe: 0x92e / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

	337 /0xcd : 0xff: 0x92f / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

	338 /0xcf : 0xff: 0x930 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

	339 /0xd0 : 0x87: 0x931 / DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + MLM_MASK + TML_MASK ,

	340 /0xd1 : 0xff: 0x932 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

	341 /0xd2 : 0xb7: 0x933 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO + KND_MASK + MLM_MASK + TML_MASK ,

	342 /0xd3 : 0x83: 0x934 / DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + MLM_MASK + TML_MASK ,

	343 /0xd4 : 0xff: 0x935 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO + KND_MASK + MLM_MASK + TML_MASK ,

	344 /0xd5 : 0xfe: 0x936 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

	345 /0xd6 : 0xbf: 0x937 / DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

	346 /0xd7 : 0xff: 0x938 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

	347 /0xd8 : 0xff: 0x939 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

	348 /0x00 : 0x00: 0x93A / ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

	349 /0x00 : 0x00: 0x93B / ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

	350 /0xe9 : 0xda: 0x93c / DEV_MASK + PNJ_MASK + ZERO + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO ,

	351 /0x00 : 0x00: 0x93d / DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

	352 /0xda : 0xff: 0x93e / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

	353 /0xdb : 0xff: 0x93f / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

	354 /0xdc : 0xff: 0x940 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

	355 /0xdd : 0xff: 0x941 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

	356 /0xde : 0xff: 0x942 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

	357 /0xdf : 0xbe: 0x943 / DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

	358 /0x00 : 0x00: 0x944 / DEV_MASK + ZERO + GJR_MASK + ZERO + BNG_MASK + KND_MASK + ZERO + ZERO ,

	359 /0xe3 : 0x80: 0x945 / DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO ,

	360 /0xe0 : 0x87: 0x946 / DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK ,

	361 /0xe1 : 0xff: 0x947 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

	362 /0xe2 : 0xff: 0x948 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

	363 /0xe7 : 0x80: 0x949 / DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO ,

	364 /0xe4 : 0x87: 0x94a / DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK ,

	365 /0xe5 : 0xff: 0x94b / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

	366 /0xe6 : 0xff: 0x94c / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

	367 /0xe8 : 0xff: 0x94d / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

	368 /0xec : 0x00: 0x94e / ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

	369 /0xed : 0x00: 0x94f / ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

	370 /0x00 : 0x00: 0x950 / DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO ,

	371 /0x00 : 0x00: 0x951 / DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

	372 /0x00 : 0x00: 0x952 / DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

	373 /0x00 : 0x00: 0x953 / DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

	374 /0x00 : 0x00: 0x954 / DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

	375 /0x00 : 0x00: 0x955 / ZERO + ZERO + ZERO + ZERO + ZERO + KND_MASK + ZERO + ZERO ,

	376 /0x00 : 0x00: 0x956 / ZERO + ZERO + ZERO + ORI_MASK + ZERO + KND_MASK + ZERO + ZERO ,

	377 /0x00 : 0x00: 0x957 / ZERO + ZERO + ZERO + ORI_MASK + BNG_MASK + ZERO + MLM_MASK + ZERO ,

	378 /0x00 : 0x00: 0x958 / DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

	379 /0x00 : 0x00: 0x959 / DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

	380 /0x00 : 0x00: 0x95a / DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

	381 /0x00 : 0x00: 0x95b / DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

	382 /0x00 : 0x00: 0x95c / DEV_MASK + PNJ_MASK + ZERO + ZERO + BNG_MASK + ZERO + ZERO + ZERO ,

	383 /0x00 : 0x00: 0x95d / DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO ,

	384 /0x00 : 0x00: 0x95e / DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

	385 /0xce : 0x98: 0x95f / DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO ,

	386 /0x00 : 0x00: 0x960 / DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

	387 /0x00 : 0x00: 0x961 / DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

	388 /0x00 : 0x00: 0x962 / DEV_MASK + ZERO + ZERO + ZERO + BNG_MASK + ZERO + ZERO + ZERO ,

	389 /0x00 : 0x00: 0x963 / DEV_MASK + ZERO + ZERO + ZERO + BNG_MASK + ZERO + ZERO + ZERO ,

	390 /0xea : 0xf8: 0x964 / DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

	391 /0xeaea : 0x00: 0x965/ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

	392 /0xf1 : 0xff: 0x966 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

	393 /0xf2 : 0xff: 0x967 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

	394 /0xf3 : 0xff: 0x968 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

	395 /0xf4 : 0xff: 0x969 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

	396 /0xf5 : 0xff: 0x96a / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

	397 /0xf6 : 0xff: 0x96b / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

	398 /0xf7 : 0xff: 0x96c / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

	399 /0xf8 : 0xff: 0x96d / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

	400 /0xf9 : 0xff: 0x96e / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

	401 /0xfa : 0xff: 0x96f / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

	402 /0x00 : 0x80: 0x970 / DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

	403 /*

	404 * The length of the array is 128 to provide values for 0x900..0x97f.

	405 * The last 15 entries for 0x971..0x97f of the validity table are all zero

	406 * because no Indic script uses such Unicode code points.

	407 */

	408 /0x00 : 0x00: 0x9yz / ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO

	409 };

	410

	411 static const uint16_t fromUnicodeTable[128]={

	412 0x00a0 ,/* 0x0900 */

	413 0x00a1 ,/* 0x0901 */

	414 0x00a2 ,/* 0x0902 */

	415 0x00a3 ,/* 0x0903 */

	416 0xa4e0 ,/* 0x0904 */

	417 0x00a4 ,/* 0x0905 */

	418 0x00a5 ,/* 0x0906 */

	419 0x00a6 ,/* 0x0907 */

	420 0x00a7 ,/* 0x0908 */

	421 0x00a8 ,/* 0x0909 */

	422 0x00a9 ,/* 0x090a */

	423 0x00aa ,/* 0x090b */

	424 0xA6E9 ,/* 0x090c */

	425 0x00ae ,/* 0x090d */

	426 0x00ab ,/* 0x090e */

	427 0x00ac ,/* 0x090f */

	428 0x00ad ,/* 0x0910 */

	429 0x00b2 ,/* 0x0911 */

	430 0x00af ,/* 0x0912 */

	431 0x00b0 ,/* 0x0913 */

	432 0x00b1 ,/* 0x0914 */

	433 0x00b3 ,/* 0x0915 */

	434 0x00b4 ,/* 0x0916 */

	435 0x00b5 ,/* 0x0917 */

	436 0x00b6 ,/* 0x0918 */

	437 0x00b7 ,/* 0x0919 */

	438 0x00b8 ,/* 0x091a */

	439 0x00b9 ,/* 0x091b */

	440 0x00ba ,/* 0x091c */

	441 0x00bb ,/* 0x091d */

	442 0x00bc ,/* 0x091e */

	443 0x00bd ,/* 0x091f */

	444 0x00be ,/* 0x0920 */

	445 0x00bf ,/* 0x0921 */

	446 0x00c0 ,/* 0x0922 */

	447 0x00c1 ,/* 0x0923 */

	448 0x00c2 ,/* 0x0924 */

	449 0x00c3 ,/* 0x0925 */

	450 0x00c4 ,/* 0x0926 */

	451 0x00c5 ,/* 0x0927 */

	452 0x00c6 ,/* 0x0928 */

	453 0x00c7 ,/* 0x0929 */

	454 0x00c8 ,/* 0x092a */

	455 0x00c9 ,/* 0x092b */

	456 0x00ca ,/* 0x092c */

	457 0x00cb ,/* 0x092d */

	458 0x00cc ,/* 0x092e */

	459 0x00cd ,/* 0x092f */

	460 0x00cf ,/* 0x0930 */

	461 0x00d0 ,/* 0x0931 */

	462 0x00d1 ,/* 0x0932 */

	463 0x00d2 ,/* 0x0933 */

	464 0x00d3 ,/* 0x0934 */

	465 0x00d4 ,/* 0x0935 */

	466 0x00d5 ,/* 0x0936 */

	467 0x00d6 ,/* 0x0937 */

	468 0x00d7 ,/* 0x0938 */

	469 0x00d8 ,/* 0x0939 */

	470 0xFFFF ,/* 0x093A */

	471 0xFFFF ,/* 0x093B */

	472 0x00e9 ,/* 0x093c */

	473 0xEAE9 ,/* 0x093d */

	474 0x00da ,/* 0x093e */

	475 0x00db ,/* 0x093f */

	476 0x00dc ,/* 0x0940 */

	477 0x00dd ,/* 0x0941 */

	478 0x00de ,/* 0x0942 */

	479 0x00df ,/* 0x0943 */

	480 0xDFE9 ,/* 0x0944 */

	481 0x00e3 ,/* 0x0945 */

	482 0x00e0 ,/* 0x0946 */

	483 0x00e1 ,/* 0x0947 */

	484 0x00e2 ,/* 0x0948 */

	485 0x00e7 ,/* 0x0949 */

	486 0x00e4 ,/* 0x094a */

	487 0x00e5 ,/* 0x094b */

	488 0x00e6 ,/* 0x094c */

	489 0x00e8 ,/* 0x094d */

	490 0x00ec ,/* 0x094e */

	491 0x00ed ,/* 0x094f */

	492 0xA1E9 ,/* 0x0950 / / OM Symbol */

	493 0xFFFF ,/* 0x0951 */

	494 0xF0B8 ,/* 0x0952 */

	495 0xFFFF ,/* 0x0953 */

	496 0xFFFF ,/* 0x0954 */

	497 0xFFFF ,/* 0x0955 */

	498 0xFFFF ,/* 0x0956 */

	499 0xFFFF ,/* 0x0957 */

	500 0xb3e9 ,/* 0x0958 */

	501 0xb4e9 ,/* 0x0959 */

	502 0xb5e9 ,/* 0x095a */

	503 0xbae9 ,/* 0x095b */

	504 0xbfe9 ,/* 0x095c */

	505 0xC0E9 ,/* 0x095d */

	506 0xc9e9 ,/* 0x095e */

	507 0x00ce ,/* 0x095f */

	508 0xAAe9 ,/* 0x0960 */

	509 0xA7E9 ,/* 0x0961 */

	510 0xDBE9 ,/* 0x0962 */

	511 0xDCE9 ,/* 0x0963 */

	512 0x00ea ,/* 0x0964 */

	513 0xeaea ,/* 0x0965 */

	514 0x00f1 ,/* 0x0966 */

	515 0x00f2 ,/* 0x0967 */

	516 0x00f3 ,/* 0x0968 */

	517 0x00f4 ,/* 0x0969 */

	518 0x00f5 ,/* 0x096a */

	519 0x00f6 ,/* 0x096b */

	520 0x00f7 ,/* 0x096c */

	521 0x00f8 ,/* 0x096d */

	522 0x00f9 ,/* 0x096e */

	523 0x00fa ,/* 0x096f */

	524 0xF0BF ,/* 0x0970 */

	525 0xFFFF ,/* 0x0971 */

	526 0xFFFF ,/* 0x0972 */

	527 0xFFFF ,/* 0x0973 */

	528 0xFFFF ,/* 0x0974 */

	529 0xFFFF ,/* 0x0975 */

	530 0xFFFF ,/* 0x0976 */

	531 0xFFFF ,/* 0x0977 */

	532 0xFFFF ,/* 0x0978 */

	533 0xFFFF ,/* 0x0979 */

	534 0xFFFF ,/* 0x097a */

	535 0xFFFF ,/* 0x097b */

	536 0xFFFF ,/* 0x097c */

	537 0xFFFF ,/* 0x097d */

	538 0xFFFF ,/* 0x097e */

	539 0xFFFF ,/* 0x097f */

	540 };

	541 static const uint16_t toUnicodeTable[256]={

	542 0x0000,/* 0x00 */

	543 0x0001,/* 0x01 */

	544 0x0002,/* 0x02 */

	545 0x0003,/* 0x03 */

	546 0x0004,/* 0x04 */

	547 0x0005,/* 0x05 */

	548 0x0006,/* 0x06 */

	549 0x0007,/* 0x07 */

	550 0x0008,/* 0x08 */

	551 0x0009,/* 0x09 */

	552 0x000a,/* 0x0a */

	553 0x000b,/* 0x0b */

	554 0x000c,/* 0x0c */

	555 0x000d,/* 0x0d */

	556 0x000e,/* 0x0e */

	557 0x000f,/* 0x0f */

	558 0x0010,/* 0x10 */

	559 0x0011,/* 0x11 */

	560 0x0012,/* 0x12 */

	561 0x0013,/* 0x13 */

	562 0x0014,/* 0x14 */

	563 0x0015,/* 0x15 */

	564 0x0016,/* 0x16 */

	565 0x0017,/* 0x17 */

	566 0x0018,/* 0x18 */

	567 0x0019,/* 0x19 */

	568 0x001a,/* 0x1a */

	569 0x001b,/* 0x1b */

	570 0x001c,/* 0x1c */

	571 0x001d,/* 0x1d */

	572 0x001e,/* 0x1e */

	573 0x001f,/* 0x1f */

	574 0x0020,/* 0x20 */

	575 0x0021,/* 0x21 */

	576 0x0022,/* 0x22 */

	577 0x0023,/* 0x23 */

	578 0x0024,/* 0x24 */

	579 0x0025,/* 0x25 */

	580 0x0026,/* 0x26 */

	581 0x0027,/* 0x27 */

	582 0x0028,/* 0x28 */

	583 0x0029,/* 0x29 */

	584 0x002a,/* 0x2a */

	585 0x002b,/* 0x2b */

	586 0x002c,/* 0x2c */

	587 0x002d,/* 0x2d */

	588 0x002e,/* 0x2e */

	589 0x002f,/* 0x2f */

	590 0x0030,/* 0x30 */

	591 0x0031,/* 0x31 */

	592 0x0032,/* 0x32 */

	593 0x0033,/* 0x33 */

	594 0x0034,/* 0x34 */

	595 0x0035,/* 0x35 */

	596 0x0036,/* 0x36 */

	597 0x0037,/* 0x37 */

	598 0x0038,/* 0x38 */

	599 0x0039,/* 0x39 */

	600 0x003A,/* 0x3A */

	601 0x003B,/* 0x3B */

	602 0x003c,/* 0x3c */

	603 0x003d,/* 0x3d */

	604 0x003e,/* 0x3e */

	605 0x003f,/* 0x3f */

	606 0x0040,/* 0x40 */

	607 0x0041,/* 0x41 */

	608 0x0042,/* 0x42 */

	609 0x0043,/* 0x43 */

	610 0x0044,/* 0x44 */

	611 0x0045,/* 0x45 */

	612 0x0046,/* 0x46 */

	613 0x0047,/* 0x47 */

	614 0x0048,/* 0x48 */

	615 0x0049,/* 0x49 */

	616 0x004a,/* 0x4a */

	617 0x004b,/* 0x4b */

	618 0x004c,/* 0x4c */

	619 0x004d,/* 0x4d */

	620 0x004e,/* 0x4e */

	621 0x004f,/* 0x4f */

	622 0x0050,/* 0x50 */

	623 0x0051,/* 0x51 */

	624 0x0052,/* 0x52 */

	625 0x0053,/* 0x53 */

	626 0x0054,/* 0x54 */

	627 0x0055,/* 0x55 */

	628 0x0056,/* 0x56 */

	629 0x0057,/* 0x57 */

	630 0x0058,/* 0x58 */

	631 0x0059,/* 0x59 */

	632 0x005a,/* 0x5a */

	633 0x005b,/* 0x5b */

	634 0x005c,/* 0x5c */

	635 0x005d,/* 0x5d */

	636 0x005e,/* 0x5e */

	637 0x005f,/* 0x5f */

	638 0x0060,/* 0x60 */

	639 0x0061,/* 0x61 */

	640 0x0062,/* 0x62 */

	641 0x0063,/* 0x63 */

	642 0x0064,/* 0x64 */

	643 0x0065,/* 0x65 */

	644 0x0066,/* 0x66 */

	645 0x0067,/* 0x67 */

	646 0x0068,/* 0x68 */

	647 0x0069,/* 0x69 */

	648 0x006a,/* 0x6a */

	649 0x006b,/* 0x6b */

	650 0x006c,/* 0x6c */

	651 0x006d,/* 0x6d */

	652 0x006e,/* 0x6e */

	653 0x006f,/* 0x6f */

	654 0x0070,/* 0x70 */

	655 0x0071,/* 0x71 */

	656 0x0072,/* 0x72 */

	657 0x0073,/* 0x73 */

	658 0x0074,/* 0x74 */

	659 0x0075,/* 0x75 */

	660 0x0076,/* 0x76 */

	661 0x0077,/* 0x77 */

	662 0x0078,/* 0x78 */

	663 0x0079,/* 0x79 */

	664 0x007a,/* 0x7a */

	665 0x007b,/* 0x7b */

	666 0x007c,/* 0x7c */

	667 0x007d,/* 0x7d */

	668 0x007e,/* 0x7e */

	669 0x007f,/* 0x7f */

	670 0x0080,/* 0x80 */

	671 0x0081,/* 0x81 */

	672 0x0082,/* 0x82 */

	673 0x0083,/* 0x83 */

	674 0x0084,/* 0x84 */

	675 0x0085,/* 0x85 */

	676 0x0086,/* 0x86 */

	677 0x0087,/* 0x87 */

	678 0x0088,/* 0x88 */

	679 0x0089,/* 0x89 */

	680 0x008a,/* 0x8a */

	681 0x008b,/* 0x8b */

	682 0x008c,/* 0x8c */

	683 0x008d,/* 0x8d */

	684 0x008e,/* 0x8e */

	685 0x008f,/* 0x8f */

	686 0x0090,/* 0x90 */

	687 0x0091,/* 0x91 */

	688 0x0092,/* 0x92 */

	689 0x0093,/* 0x93 */

	690 0x0094,/* 0x94 */

	691 0x0095,/* 0x95 */

	692 0x0096,/* 0x96 */

	693 0x0097,/* 0x97 */

	694 0x0098,/* 0x98 */

	695 0x0099,/* 0x99 */

	696 0x009a,/* 0x9a */

	697 0x009b,/* 0x9b */

	698 0x009c,/* 0x9c */

	699 0x009d,/* 0x9d */

	700 0x009e,/* 0x9e */

	701 0x009f,/* 0x9f */

	702 0x00A0,/* 0xa0 */

	703 0x0901,/* 0xa1 */

	704 0x0902,/* 0xa2 */

	705 0x0903,/* 0xa3 */

	706 0x0905,/* 0xa4 */

	707 0x0906,/* 0xa5 */

	708 0x0907,/* 0xa6 */

	709 0x0908,/* 0xa7 */

	710 0x0909,/* 0xa8 */

	711 0x090a,/* 0xa9 */

	712 0x090b,/* 0xaa */

	713 0x090e,/* 0xab */

	714 0x090f,/* 0xac */

	715 0x0910,/* 0xad */

	716 0x090d,/* 0xae */

	717 0x0912,/* 0xaf */

	718 0x0913,/* 0xb0 */

	719 0x0914,/* 0xb1 */

	720 0x0911,/* 0xb2 */

	721 0x0915,/* 0xb3 */

	722 0x0916,/* 0xb4 */

	723 0x0917,/* 0xb5 */

	724 0x0918,/* 0xb6 */

	725 0x0919,/* 0xb7 */

	726 0x091a,/* 0xb8 */

	727 0x091b,/* 0xb9 */

	728 0x091c,/* 0xba */

	729 0x091d,/* 0xbb */

	730 0x091e,/* 0xbc */

	731 0x091f,/* 0xbd */

	732 0x0920,/* 0xbe */

	733 0x0921,/* 0xbf */

	734 0x0922,/* 0xc0 */

	735 0x0923,/* 0xc1 */

	736 0x0924,/* 0xc2 */

	737 0x0925,/* 0xc3 */

	738 0x0926,/* 0xc4 */

	739 0x0927,/* 0xc5 */

	740 0x0928,/* 0xc6 */

	741 0x0929,/* 0xc7 */

	742 0x092a,/* 0xc8 */

	743 0x092b,/* 0xc9 */

	744 0x092c,/* 0xca */

	745 0x092d,/* 0xcb */

	746 0x092e,/* 0xcc */

	747 0x092f,/* 0xcd */

	748 0x095f,/* 0xce */

	749 0x0930,/* 0xcf */

	750 0x0931,/* 0xd0 */

	751 0x0932,/* 0xd1 */

	752 0x0933,/* 0xd2 */

	753 0x0934,/* 0xd3 */

	754 0x0935,/* 0xd4 */

	755 0x0936,/* 0xd5 */

	756 0x0937,/* 0xd6 */

	757 0x0938,/* 0xd7 */

	758 0x0939,/* 0xd8 */

	759 0x200D,/* 0xd9 */

	760 0x093e,/* 0xda */

	761 0x093f,/* 0xdb */

	762 0x0940,/* 0xdc */

	763 0x0941,/* 0xdd */

	764 0x0942,/* 0xde */

	765 0x0943,/* 0xdf */

	766 0x0946,/* 0xe0 */

	767 0x0947,/* 0xe1 */

	768 0x0948,/* 0xe2 */

	769 0x0945,/* 0xe3 */

	770 0x094a,/* 0xe4 */

	771 0x094b,/* 0xe5 */

	772 0x094c,/* 0xe6 */

	773 0x0949,/* 0xe7 */

	774 0x094d,/* 0xe8 */

	775 0x093c,/* 0xe9 */

	776 0x0964,/* 0xea */

	777 0xFFFF,/* 0xeb */

	778 0xFFFF,/* 0xec */

	779 0xFFFF,/* 0xed */

	780 0xFFFF,/* 0xee */

	781 0xFFFF,/* 0xef */

	782 0xFFFF,/* 0xf0 */

	783 0x0966,/* 0xf1 */

	784 0x0967,/* 0xf2 */

	785 0x0968,/* 0xf3 */

	786 0x0969,/* 0xf4 */

	787 0x096a,/* 0xf5 */

	788 0x096b,/* 0xf6 */

	789 0x096c,/* 0xf7 */

	790 0x096d,/* 0xf8 */

	791 0x096e,/* 0xf9 */

	792 0x096f,/* 0xfa */

	793 0xFFFF,/* 0xfb */

	794 0xFFFF,/* 0xfc */

	795 0xFFFF,/* 0xfd */

	796 0xFFFF,/* 0xfe */

	797 0xFFFF /* 0xff */

	798 };

	799

	800 static const uint16_t vowelSignESpecialCases[][2]={

	801 { 2 /length of array/ , 0 },

	802 { 0xA4 , 0x0904 },

	803 };

	804

	805 static const uint16_t nuktaSpecialCases[][2]={

	806 { 16 /length of array/ , 0 },

	807 { 0xA6 , 0x090c },

	808 { 0xEA , 0x093D },

	809 { 0xDF , 0x0944 },

	810 { 0xA1 , 0x0950 },

	811 { 0xb3 , 0x0958 },

	812 { 0xb4 , 0x0959 },

	813 { 0xb5 , 0x095a },

	814 { 0xba , 0x095b },

	815 { 0xbf , 0x095c },

	816 { 0xC0 , 0x095d },

	817 { 0xc9 , 0x095e },

	818 { 0xAA , 0x0960 },

	819 { 0xA7 , 0x0961 },

	820 { 0xDB , 0x0962 },

	821 { 0xDC , 0x0963 },

	822 };

	823

	824

	825 #define WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByte Unit,err){ \

	826 int32_t offset = (int32_t)(source - args->source-1); \

	827 /* write the targetUniChar to target */ \

	828 if(target < targetLimit){ \

	829 if(targetByteUnit <= 0xFF){ \

	830 *(target)++ = (uint8_t)(targetByteUnit); \

	831 if(offsets){ \

	832 *(offsets++) = offset; \

	833 } \

	834 }else{ \

	835 if (targetByteUnit > 0xFFFF) { \

	836 *(target)++ = (uint8_t)(targetByteUnit>>16); \

	837 if (offsets) { \

	838 --offset; \

	839 *(offsets++) = offset; \

	840 } \

	841 } \

	842 if (!(target < targetLimit)) { \

	843 args->converter->charErrorBuffer[args->converter->charErrorBuffe rLength++] = \

	844 (uint8_t)(targetByteUnit >> 8); \

	845 args->converter->charErrorBuffer[args->converter->charErrorBuffe rLength++] = \

	846 (uint8_t)targetByteUnit; \

	847 *err = U_BUFFER_OVERFLOW_ERROR; \

	848 } else { \

	849 *(target)++ = (uint8_t)(targetByteUnit>>8); \

	850 if(offsets){ \

	851 *(offsets++) = offset; \

	852 } \

	853 if(target < targetLimit){ \

	854 *(target)++ = (uint8_t) targetByteUnit; \

	855 if(offsets){ \

	856 *(offsets++) = offset ; \

	857 } \

	858 }else{ \

	859 args->converter->charErrorBuffer[args->converter->charErrorB ufferLength++] =\

	860 (uint8_t) (targetByteUnit); \

	861 *err = U_BUFFER_OVERFLOW_ERROR; \

	862 } \

	863 } \

	864 } \

	865 }else{ \

	866 if (targetByteUnit & 0xFF0000) { \

	867 args->converter->charErrorBuffer[args->converter->charErrorBufferLen gth++] = \

	868 (uint8_t) (targetByteUnit >>16); \

	869 } \

	870 if(targetByteUnit & 0xFF00){ \

	871 args->converter->charErrorBuffer[args->converter->charErrorBufferLen gth++] = \

	872 (uint8_t) (targetByteUnit >>8); \

	873 } \

	874 args->converter->charErrorBuffer[args->converter->charErrorBufferLength+ +] = \

	875 (uint8_t) (targetByteUnit); \

	876 *err = U_BUFFER_OVERFLOW_ERROR; \

	877 } \

	878 }

	879

	880 /* Rules:

	881 * Explicit Halant :

	882 * <HALANT> + <ZWNJ>

	883 * Soft Halant :

	884 * <HALANT> + <ZWJ>

	885 */

	886

	887 static void UConverter_fromUnicode_ISCII_OFFSETS_LOGIC(

	888 UConverterFromUnicodeArgs * args, UErrorCode * err) {

	889 const UChar *source = args->source;

	890 const UChar *sourceLimit = args->sourceLimit;

	891 unsigned char target = (unsigned char ) args->target;

	892 unsigned char targetLimit = (unsigned char ) args->targetLimit;

	893 int32_t* offsets = args->offsets;

	894 uint32_t targetByteUnit = 0x0000;

	895 UChar32 sourceChar = 0x0000;

	896 UChar32 tempContextFromUnicode = 0x0000; /* For special handling of the G urmukhi script. */

	897 UConverterDataISCII *converterData;

	898 uint16_t newDelta=0;

	899 uint16_t range = 0;

	900 UBool deltaChanged = FALSE;

	901

	902 if ((args->converter == NULL) \|\| (args->targetLimit < args->target) \|\| (args ->sourceLimit < args->source)) {

	903 *err = U_ILLEGAL_ARGUMENT_ERROR;

	904 return;

	905 }

	906 /* initialize data */

	907 converterData=(UConverterDataISCII*)args->converter->extraInfo;

	908 newDelta=converterData->currentDeltaFromUnicode;

	909 range = (uint16_t)(newDelta/DELTA);

	910

	911 if ((sourceChar = args->converter->fromUChar32)!=0) {

	912 goto getTrail;

	913 }

	914

	915 /writing the char to the output stream /

	916 while (source < sourceLimit) {

	917 /* Write the language code following LF only if LF is not the last chara cter. */

	918 if (args->converter->fromUnicodeStatus == LF) {

	919 targetByteUnit = ATR<<8;

	920 targetByteUnit += (uint8_t) lookupInitialData[range].isciiLang;

	921 args->converter->fromUnicodeStatus = 0x0000;

	922 /* now append ATR and language code */

	923 WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,target ByteUnit,err);

	924 if (U_FAILURE(*err)) {

	925 break;

	926 }

	927 }

	928

	929 sourceChar = *source++;

	930 tempContextFromUnicode = converterData->contextCharFromUnicode;

	931

	932 targetByteUnit = missingCharMarker;

	933

	934 /check if input is in ASCII and C0 control codes range/

	935 if (sourceChar <= ASCII_END) {

	936 args->converter->fromUnicodeStatus = sourceChar;

	937 WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,source Char,err);

	938 if (U_FAILURE(*err)) {

	939 break;

	940 }

	941 continue;

	942 }

	943 switch (sourceChar) {

	944 case ZWNJ:

	945 /* contextChar has HALANT */

	946 if (converterData->contextCharFromUnicode) {

	947 converterData->contextCharFromUnicode = 0x00;

	948 targetByteUnit = ISCII_HALANT;

	949 } else {

	950 /* consume ZWNJ and continue */

	951 converterData->contextCharFromUnicode = 0x00;

	952 continue;

	953 }

	954 break;

	955 case ZWJ:

	956 /* contextChar has HALANT */

	957 if (converterData->contextCharFromUnicode) {

	958 targetByteUnit = ISCII_NUKTA;

	959 } else {

	960 targetByteUnit =ISCII_INV;

	961 }

	962 converterData->contextCharFromUnicode = 0x00;

	963 break;

	964 default:

	965 /* is the sourceChar in the INDIC_RANGE? */

	966 if ((uint16_t)(INDIC_BLOCK_END-sourceChar) <= INDIC_RANGE) {

	967 /* Danda and Double Danda are valid in Northern scripts.. since Unicode

	968 * does not include these codepoints in all Northern scrips we n eed to

	969 * filter them out

	970 */

	971 if (sourceChar!= DANDA && sourceChar != DOUBLE_DANDA) {

	972 /* find out to which block the souceChar belongs*/

	973 range =(uint16_t)((sourceChar-INDIC_BLOCK_BEGIN)/DELTA);

	974 newDelta =(uint16_t)(range*DELTA);

	975

	976 /* Now are we in the same block as the previous? */

	977 if (newDelta!= converterData->currentDeltaFromUnicode \|\| con verterData->isFirstBuffer) {

	978 converterData->currentDeltaFromUnicode = newDelta;

	979 converterData->currentMaskFromUnicode = lookupInitialDat a[range].maskEnum;

	980 deltaChanged =TRUE;

	981 converterData->isFirstBuffer=FALSE;

	982 }

	983

	984 if (converterData->currentDeltaFromUnicode == PNJ_DELTA) {

	985 if (sourceChar == PNJ_TIPPI) {

	986 /* Make sure Tippi is converterd to Bindi. */

	987 sourceChar = PNJ_BINDI;

	988 } else if (sourceChar == PNJ_ADHAK) {

	989 /* This is for consonant cluster handling. */

	990 converterData->contextCharFromUnicode = PNJ_ADHAK;

	991 }

	992

	993 }

	994 /* Normalize all Indic codepoints to Devanagari and map them to ISCII */

	995 /* now subtract the new delta from sourceChar*/

	996 sourceChar -= converterData->currentDeltaFromUnicode;

	997 }

	998

	999 /* get the target byte unit */

	1000 targetByteUnit=fromUnicodeTable[(uint8_t)sourceChar];

	1001

	1002 /* is the code point valid in current script? */

	1003 if ((validityTable[(uint8_t)sourceChar] & converterData->current MaskFromUnicode)==0) {

	1004 /* Vocallic RR is assigned in ISCII Telugu and Unicode */

	1005 if (converterData->currentDeltaFromUnicode!=(TELUGU_DELTA) \| \| sourceChar!=VOCALLIC_RR) {

	1006 targetByteUnit=missingCharMarker;

	1007 }

	1008 }

	1009

	1010 if (deltaChanged) {

	1011 /* we are in a script block which is different than

	1012 * previous sourceChar's script block write ATR and language codes

	1013 */

	1014 uint32_t temp=0;

	1015 temp =(uint16_t)(ATR<<8);

	1016 temp += (uint16_t)((uint8_t) lookupInitialData[range].isciiL ang);

	1017 /* reset */

	1018 deltaChanged=FALSE;

	1019 /* now append ATR and language code */

	1020 WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimi t,temp,err);

	1021 if (U_FAILURE(*err)) {

	1022 break;

	1023 }

	1024 }

	1025

	1026 if (converterData->currentDeltaFromUnicode == PNJ_DELTA && (sour ceChar + PNJ_DELTA) == PNJ_ADHAK) {

	1027 continue;

	1028 }

	1029 }

	1030 /* reset context char */

	1031 converterData->contextCharFromUnicode = 0x00;

	1032 break;

	1033 }

	1034 if (converterData->currentDeltaFromUnicode == PNJ_DELTA && tempContextFr omUnicode == PNJ_ADHAK && uset_contains(PNJ_CONSONANT_SET, (sourceChar + PNJ_DEL TA))) {

	1035 /* If the previous codepoint is Adhak and the current codepoint is a consonant, the targetByteUnit should be C + Halant + C. */

	1036 /* reset context char */

	1037 converterData->contextCharFromUnicode = 0x0000;

	1038 targetByteUnit = targetByteUnit << 16 \| ISCII_HALANT << 8 \| targetBy teUnit;

	1039 /* write targetByteUnit to target */

	1040 WRITE_TO_TARGET_FROM_U(args, offsets, source, target, targetLimit, t argetByteUnit,err);

	1041 if (U_FAILURE(*err)) {

	1042 break;

	1043 }

	1044 } else if (targetByteUnit != missingCharMarker) {

	1045 if (targetByteUnit==ISCII_HALANT) {

	1046 converterData->contextCharFromUnicode = (UChar)targetByteUnit;

	1047 }

	1048 /* write targetByteUnit to target*/

	1049 WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,target ByteUnit,err);

	1050 if (U_FAILURE(*err)) {

	1051 break;

	1052 }

	1053 } else {

	1054 /* oops.. the code point is unassigned */

	1055 /check if the char is a First surrogate/

	1056 if (UTF_IS_SURROGATE(sourceChar)) {

	1057 if (UTF_IS_SURROGATE_FIRST(sourceChar)) {

	1058 getTrail:

	1059 /look ahead to find the trail surrogate/

	1060 if (source < sourceLimit) {

	1061 /* test the following code unit */

	1062 UChar trail= (*source);

	1063 if (UTF_IS_SECOND_SURROGATE(trail)) {

	1064 source++;

	1065 sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);

	1066 *err =U_INVALID_CHAR_FOUND;

	1067 /* convert this surrogate code point */

	1068 /* exit this condition tree */

	1069 } else {

	1070 /* this is an unmatched lead code unit (1st surrogat e) */

	1071 /* callback(illegal) */

	1072 *err=U_ILLEGAL_CHAR_FOUND;

	1073 }

	1074 } else {

	1075 /* no more input */

	1076 *err = U_ZERO_ERROR;

	1077 }

	1078 } else {

	1079 /* this is an unmatched trail code unit (2nd surrogate) */

	1080 /* callback(illegal) */

	1081 *err=U_ILLEGAL_CHAR_FOUND;

	1082 }

	1083 } else {

	1084 /* callback(unassigned) for a BMP code point */

	1085 *err = U_INVALID_CHAR_FOUND;

	1086 }

	1087

	1088 args->converter->fromUChar32=sourceChar;

	1089 break;

	1090 }

	1091 }/* end while(mySourceIndex<mySourceLength) */

	1092

	1093 /save the state and return /

	1094 args->source = source;

	1095 args->target = (char*)target;

	1096 }

	1097

	1098 static const uint16_t lookupTable[][2]={

	1099 { ZERO, ZERO }, /DEFALT/

	1100 { ZERO, ZERO }, /ROMAN/

	1101 { DEVANAGARI, DEV_MASK },

	1102 { BENGALI, BNG_MASK },

	1103 { TAMIL, TML_MASK },

	1104 { TELUGU, KND_MASK },

	1105 { BENGALI, BNG_MASK },

	1106 { ORIYA, ORI_MASK },

	1107 { KANNADA, KND_MASK },

	1108 { MALAYALAM, MLM_MASK },

	1109 { GUJARATI, GJR_MASK },

	1110 { GURMUKHI, PNJ_MASK }

	1111 };

	1112

	1113 #define WRITE_TO_TARGET_TO_U(args,source,target,offsets,offset,targetUniChar,del ta, err){\

	1114 /* add offset to current Indic Block */ \

	1115 if(targetUniChar>ASCII_END && \

	1116 targetUniChar != ZWJ && \

	1117 targetUniChar != ZWNJ && \

	1118 targetUniChar != DANDA && \

	1119 targetUniChar != DOUBLE_DANDA){ \

	1120 \

	1121 targetUniChar+=(uint16_t)(delta); \

	1122 } \

	1123 /* now write the targetUniChar */ \

	1124 if(target<args->targetLimit){ \

	1125 *(target)++ = (UChar)targetUniChar; \

	1126 if(offsets){ \

	1127 *(offsets)++ = (int32_t)(offset); \

	1128 } \

	1129 }else{ \

	1130 args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLengt h++] = \

	1131 (UChar)targetUniChar; \

	1132 *err = U_BUFFER_OVERFLOW_ERROR; \

	1133 } \

	1134 }

	1135

	1136 #define GET_MAPPING(sourceChar,targetUniChar,data){ \

	1137 targetUniChar = toUnicodeTable[(sourceChar)] ; \

	1138 /* is the code point valid in current script? */ \

	1139 if(sourceChar> ASCII_END && \

	1140 (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) ==0){ \

	1141 /* Vocallic RR is assigne in ISCII Telugu and Unicode */ \

	1142 if(data->currentDeltaToUnicode!=(TELUGU_DELTA) \|\| \

	1143 targetUniChar!=VOCALLIC_RR){ \

	1144 targetUniChar=missingCharMarker; \

	1145 } \

	1146 } \

	1147 }

	1148

	1149 /***********

	1150 * Rules for ISCII to Unicode converter

	1151 * ISCII is stateful encoding. To convert ISCII bytes to Unicode,

	1152 * which has both precomposed and decomposed forms characters

	1153 * pre-context and post-context need to be considered.

	1154 *

	1155 * Post context

	1156 * i) ATR : Attribute code is used to declare the font and script switching.

	1157 * Currently we only switch scripts and font codes consumed without generat ing an error

	1158 * ii) EXT : Extention code is used to declare switching to Sanskrit and for ob scure,

	1159 * obsolete characters

	1160 * Pre context

	1161 * i) Halant: if preceeded by a halant then it is a explicit halant

	1162 * ii) Nukta :

	1163 * a) if preceeded by a halant then it is a soft halant

	1164 * b) if preceeded by specific consonants and the ligatures have pre-compo sed

	1165 * characters in Unicode then convert to pre-composed characters

	1166 * iii) Danda: If Danda is preceeded by a Danda then convert to Double Danda

	1167 *

	1168 */

	1169

	1170 static void UConverter_toUnicode_ISCII_OFFSETS_LOGIC(UConverterToUnicodeArgs ar gs, UErrorCode err) {

	1171 const char source = ( char ) args->source;

	1172 UChar *target = args->target;

	1173 const char *sourceLimit = args->sourceLimit;

	1174 const UChar* targetLimit = args->targetLimit;

	1175 uint32_t targetUniChar = 0x0000;

	1176 uint8_t sourceChar = 0x0000;

	1177 UConverterDataISCII* data;

	1178 UChar32* toUnicodeStatus=NULL;

	1179 UChar32 tempTargetUniChar = 0x0000;

	1180 UChar* contextCharToUnicode= NULL;

	1181 UBool found;

	1182 int i;

	1183 int offset = 0;

	1184

	1185 if ((args->converter == NULL) \|\| (target < args->target) \|\| (source < args-> source)) {

	1186 *err = U_ILLEGAL_ARGUMENT_ERROR;

	1187 return;

	1188 }

	1189

	1190 data = (UConverterDataISCII*)(args->converter->extraInfo);

	1191 contextCharToUnicode = &data->contextCharToUnicode; /* contains previous ISC II codepoint visited */

	1192 toUnicodeStatus = (UChar32)&args->converter->toUnicodeStatus;/ contains th e mapping to Unicode of the above codepoint*/

	1193

	1194 while (U_SUCCESS(*err) && source<sourceLimit) {

	1195

	1196 targetUniChar = missingCharMarker;

	1197

	1198 if (target < targetLimit) {

	1199 sourceChar = (unsigned char)*(source)++;

	1200

	1201 /* look at the post-context preform special processing */

	1202 if (*contextCharToUnicode==ATR) {

	1203

	1204 /* If we have ATR in *contextCharToUnicode then we need to chang e our

	1205 * state to the Indic Script specified by sourceChar

	1206 */

	1207

	1208 /* check if the sourceChar is supported script range*/

	1209 if ((uint8_t)(PNJ-sourceChar)<=PNJ-DEV) {

	1210 data->currentDeltaToUnicode = (uint16_t)(lookupTable[sourceC har & 0x0F][0] * DELTA);

	1211 data->currentMaskToUnicode = (MaskEnum)lookupTable[sourceCha r & 0x0F][1];

	1212 } else if (sourceChar==DEF) {

	1213 /* switch back to default */

	1214 data->currentDeltaToUnicode = data->defDeltaToUnicode;

	1215 data->currentMaskToUnicode = data->defMaskToUnicode;

	1216 } else {

	1217 if ((sourceChar >= 0x21 && sourceChar <= 0x3F)) {

	1218 /* these are display codes consume and continue */

	1219 } else {

	1220 *err =U_ILLEGAL_CHAR_FOUND;

	1221 /* reset */

	1222 *contextCharToUnicode=NO_CHAR_MARKER;

	1223 goto CALLBACK;

	1224 }

	1225 }

	1226

	1227 /* reset */

	1228 *contextCharToUnicode=NO_CHAR_MARKER;

	1229

	1230 continue;

	1231

	1232 } else if (*contextCharToUnicode==EXT) {

	1233 /* check if sourceChar is in 0xA1-0xEE range */

	1234 if ((uint8_t) (EXT_RANGE_END - sourceChar) <= (EXT_RANGE_END - E XT_RANGE_BEGIN)) {

	1235 /* We currently support only Anudatta and Devanagari abbrevi ation sign */

	1236 if (sourceChar==0xBF \|\| sourceChar == 0xB8) {

	1237 targetUniChar = (sourceChar==0xBF) ? DEV_ABBR_SIGN : DEV _ANUDATTA;

	1238

	1239 /* find out if the mapping is valid in this state */

	1240 if (validityTable[(uint8_t)targetUniChar] & data->curren tMaskToUnicode) {

	1241 *contextCharToUnicode= NO_CHAR_MARKER;

	1242

	1243 /* Write the previous toUnicodeStatus, this was dela yed to handle consonant clustering for Gurmukhi script. */

	1244 if (data->prevToUnicodeStatus) {

	1245 WRITE_TO_TARGET_TO_U(args,source,target,args->of fsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);

	1246 data->prevToUnicodeStatus = 0x0000;

	1247 }

	1248 /* write to target */

	1249 WRITE_TO_TARGET_TO_U(args,source,target,args->offset s,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err);

	1250

	1251 continue;

	1252 }

	1253 }

	1254 /* byte unit is unassigned */

	1255 targetUniChar = missingCharMarker;

	1256 *err= U_INVALID_CHAR_FOUND;

	1257 } else {

	1258 /* only 0xA1 - 0xEE are legal after EXT char */

	1259 *contextCharToUnicode= NO_CHAR_MARKER;

	1260 *err = U_ILLEGAL_CHAR_FOUND;

	1261 }

	1262 goto CALLBACK;

	1263 } else if (*contextCharToUnicode==ISCII_INV) {

	1264 if (sourceChar==ISCII_HALANT) {

	1265 targetUniChar = 0x0020; /* replace with space accoding to In dic FAQ */

	1266 } else {

	1267 targetUniChar = ZWJ;

	1268 }

	1269

	1270 /* Write the previous toUnicodeStatus, this was delayed to handl e consonant clustering for Gurmukhi script. */

	1271 if (data->prevToUnicodeStatus) {

	1272 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(sourc e-args->source -1),data->prevToUnicodeStatus,0,err);

	1273 data->prevToUnicodeStatus = 0x0000;

	1274 }

	1275 /* write to target */

	1276 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-ar gs->source -2),targetUniChar,data->currentDeltaToUnicode,err);

	1277 /* reset */

	1278 *contextCharToUnicode=NO_CHAR_MARKER;

	1279 }

	1280

	1281 /* look at the pre-context and perform special processing */

	1282 switch (sourceChar) {

	1283 case ISCII_INV:

	1284 case EXT: /falls through/

	1285 case ATR:

	1286 *contextCharToUnicode = (UChar)sourceChar;

	1287

	1288 if (*toUnicodeStatus != missingCharMarker) {

	1289 /* Write the previous toUnicodeStatus, this was delayed to h andle consonant clustering for Gurmukhi script. */

	1290 if (data->prevToUnicodeStatus) {

	1291 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(s ource-args->source -1),data->prevToUnicodeStatus,0,err);

	1292 data->prevToUnicodeStatus = 0x0000;

	1293 }

	1294 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(sourc e-args->source -2),*toUnicodeStatus,data->currentDeltaToUnicode,err);

	1295 *toUnicodeStatus = missingCharMarker;

	1296 }

	1297 continue;

	1298 case ISCII_DANDA:

	1299 /* handle double danda*/

	1300 if (*contextCharToUnicode== ISCII_DANDA) {

	1301 targetUniChar = DOUBLE_DANDA;

	1302 /* clear the context */

	1303 *contextCharToUnicode = NO_CHAR_MARKER;

	1304 *toUnicodeStatus = missingCharMarker;

	1305 } else {

	1306 GET_MAPPING(sourceChar,targetUniChar,data);

	1307 *contextCharToUnicode = sourceChar;

	1308 }

	1309 break;

	1310 case ISCII_HALANT:

	1311 /* handle explicit halant */

	1312 if (*contextCharToUnicode == ISCII_HALANT) {

	1313 targetUniChar = ZWNJ;

	1314 /* clear the context */

	1315 *contextCharToUnicode = NO_CHAR_MARKER;

	1316 } else {

	1317 GET_MAPPING(sourceChar,targetUniChar,data);

	1318 *contextCharToUnicode = sourceChar;

	1319 }

	1320 break;

	1321 case 0x0A:

	1322 /* fall through */

	1323 case 0x0D:

	1324 data->resetToDefaultToUnicode = TRUE;

	1325 GET_MAPPING(sourceChar,targetUniChar,data)

	1326 ;

	1327 *contextCharToUnicode = sourceChar;

	1328 break;

	1329

	1330 case ISCII_VOWEL_SIGN_E:

	1331 i=1;

	1332 found=FALSE;

	1333 for (; i<vowelSignESpecialCases[0][0]; i++) {

	1334 if (vowelSignESpecialCases[i][0]==(uint8_t)*contextCharToUni code) {

	1335 targetUniChar=vowelSignESpecialCases[i][1];

	1336 found=TRUE;

	1337 break;

	1338 }

	1339 }

	1340 if (found) {

	1341 /* find out if the mapping is valid in this state */

	1342 if (validityTable[(uint8_t)targetUniChar] & data->currentMas kToUnicode) {

	1343 /targetUniChar += data->currentDeltaToUnicode ;/

	1344 *contextCharToUnicode= NO_CHAR_MARKER;

	1345 *toUnicodeStatus = missingCharMarker;

	1346 break;

	1347 }

	1348 }

	1349 GET_MAPPING(sourceChar,targetUniChar,data);

	1350 *contextCharToUnicode = sourceChar;

	1351 break;

	1352

	1353 case ISCII_NUKTA:

	1354 /* handle soft halant */

	1355 if (*contextCharToUnicode == ISCII_HALANT) {

	1356 targetUniChar = ZWJ;

	1357 /* clear the context */

	1358 *contextCharToUnicode = NO_CHAR_MARKER;

	1359 break;

	1360 } else if (data->currentDeltaToUnicode == PNJ_DELTA && data->con textCharToUnicode == 0xc0) {

	1361 /* Write the previous toUnicodeStatus, this was delayed to h andle consonant clustering for Gurmukhi script. */

	1362 if (data->prevToUnicodeStatus) {

	1363 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(s ource-args->source -1),data->prevToUnicodeStatus,0,err);

	1364 data->prevToUnicodeStatus = 0x0000;

	1365 }

	1366 /* We got here because ISCII_NUKTA was preceded by 0xc0 and we are converting Gurmukhi.

	1367 * In that case we must convert (0xc0 0xe9) to (\u0a5c\u0a4d \u0a39).

	1368 */

	1369 targetUniChar = PNJ_RRA;

	1370 WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (s ource-args->source)-2, targetUniChar, 0, err);

	1371 if (U_SUCCESS(*err)) {

	1372 targetUniChar = PNJ_SIGN_VIRAMA;

	1373 WRITE_TO_TARGET_TO_U(args, source, target, args->offsets , (source-args->source)-2, targetUniChar, 0, err);

	1374 if (U_SUCCESS(*err)) {

	1375 targetUniChar = PNJ_HA;

	1376 WRITE_TO_TARGET_TO_U(args, source, target, args->off sets, (source-args->source)-2, targetUniChar, 0, err);

	1377 } else {

	1378 args->converter->UCharErrorBuffer[args->converter->U CharErrorBufferLength++]= PNJ_HA;

	1379 }

	1380 } else {

	1381 args->converter->UCharErrorBuffer[args->converter->UChar ErrorBufferLength++]= PNJ_SIGN_VIRAMA;

	1382 args->converter->UCharErrorBuffer[args->converter->UChar ErrorBufferLength++]= PNJ_HA;

	1383 }

	1384 *toUnicodeStatus = missingCharMarker;

	1385 data->contextCharToUnicode = NO_CHAR_MARKER;

	1386 continue;

	1387 } else {

	1388 /* try to handle <CHAR> + ISCII_NUKTA special mappings */

	1389 i=1;

	1390 found =FALSE;

	1391 for (; i<nuktaSpecialCases[0][0]; i++) {

	1392 if (nuktaSpecialCases[i][0]==(uint8_t)

	1393 *contextCharToUnicode) {

	1394 targetUniChar=nuktaSpecialCases[i][1];

	1395 found =TRUE;

	1396 break;

	1397 }

	1398 }

	1399 if (found) {

	1400 /* find out if the mapping is valid in this state */

	1401 if (validityTable[(uint8_t)targetUniChar] & data->curren tMaskToUnicode) {

	1402 /targetUniChar += data->currentDeltaToUnicode ;/

	1403 *contextCharToUnicode= NO_CHAR_MARKER;

	1404 *toUnicodeStatus = missingCharMarker;

	1405 if (data->currentDeltaToUnicode == PNJ_DELTA) {

	1406 /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */

	1407 if (data->prevToUnicodeStatus) {

	1408 WRITE_TO_TARGET_TO_U(args,source,target,args ->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);

	1409 data->prevToUnicodeStatus = 0x0000;

	1410 }

	1411 WRITE_TO_TARGET_TO_U(args,source,target,args->of fsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err);

	1412 continue;

	1413 }

	1414 break;

	1415 }

	1416 /* else fall through to default */

	1417 }

	1418 /* else fall through to default */

	1419 }

	1420 default:GET_MAPPING(sourceChar,targetUniChar,data)

	1421 ;

	1422 *contextCharToUnicode = sourceChar;

	1423 break;

	1424 }

	1425

	1426 if (*toUnicodeStatus != missingCharMarker) {

	1427 /* Check to make sure that consonant clusters are handled correc t for Gurmukhi script. */

	1428 if (data->currentDeltaToUnicode == PNJ_DELTA && data->prevToUnic odeStatus != 0 && uset_contains(PNJ_CONSONANT_SET, data->prevToUnicodeStatus) &&

	1429 (*toUnicodeStatus + PNJ_DELTA) == PNJ_SIGN_VIRAMA && (ta rgetUniChar + PNJ_DELTA) == data->prevToUnicodeStatus) {

	1430 /* Consonant clusters C + HALANT + C should be encoded as AD HAK + C */

	1431 offset = (int)(source-args->source - 3);

	1432 tempTargetUniChar = PNJ_ADHAK; /* This is necessary to avoid some compiler warnings. */

	1433 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,offset ,tempTargetUniChar,0,err);

	1434 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,offset ,data->prevToUnicodeStatus,0,err);

	1435 data->prevToUnicodeStatus = 0x0000; /* reset the previous un icode code point */

	1436 *toUnicodeStatus = missingCharMarker;

	1437 continue;

	1438 } else {

	1439 /* Write the previous toUnicodeStatus, this was delayed to h andle consonant clustering for Gurmukhi script. */

	1440 if (data->prevToUnicodeStatus) {

	1441 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(s ource-args->source -1),data->prevToUnicodeStatus,0,err);

	1442 data->prevToUnicodeStatus = 0x0000;

	1443 }

	1444 /* Check to make sure that Bindi and Tippi are handled corre ctly for Gurmukhi script.

	1445 * If 0xA2 is preceded by a codepoint in the PNJ_BINDI_TIPPI _SET then the target codepoint should be Tippi instead of Bindi.

	1446 */

	1447 if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniCh ar + PNJ_DELTA) == PNJ_BINDI && uset_contains(PNJ_BINDI_TIPPI_SET, (*toUnicodeSt atus + PNJ_DELTA))) {

	1448 targetUniChar = PNJ_TIPPI - PNJ_DELTA;

	1449 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(s ource-args->source -2),*toUnicodeStatus,PNJ_DELTA,err);

	1450 } else if (data->currentDeltaToUnicode == PNJ_DELTA && (targ etUniChar + PNJ_DELTA) == PNJ_SIGN_VIRAMA && uset_contains(PNJ_CONSONANT_SET, (* toUnicodeStatus + PNJ_DELTA))) {

	1451 /* Store the current toUnicodeStatus code point for late r handling of consonant cluster in Gurmukhi. */

	1452 data->prevToUnicodeStatus = *toUnicodeStatus + PNJ_DELTA ;

	1453 } else {

	1454 /* write the previously mapped codepoint */

	1455 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(s ource-args->source -2),*toUnicodeStatus,data->currentDeltaToUnicode,err);

	1456 }

	1457 }

	1458 *toUnicodeStatus = missingCharMarker;

	1459 }

	1460

	1461 if (targetUniChar != missingCharMarker) {

	1462 /* now save the targetUniChar for delayed write */

	1463 *toUnicodeStatus = (UChar) targetUniChar;

	1464 if (data->resetToDefaultToUnicode==TRUE) {

	1465 data->currentDeltaToUnicode = data->defDeltaToUnicode;

	1466 data->currentMaskToUnicode = data->defMaskToUnicode;

	1467 data->resetToDefaultToUnicode=FALSE;

	1468 }

	1469 } else {

	1470

	1471 /* we reach here only if targetUniChar == missingCharMarker

	1472 * so assign codes to reason and err

	1473 */

	1474 *err = U_INVALID_CHAR_FOUND;

	1475 CALLBACK:

	1476 args->converter->toUBytes[0] = (uint8_t) sourceChar;

	1477 args->converter->toULength = 1;

	1478 break;

	1479 }

	1480

	1481 } else {

	1482 *err =U_BUFFER_OVERFLOW_ERROR;

	1483 break;

	1484 }

	1485 }

	1486

	1487 if (U_SUCCESS(*err) && args->flush && source == sourceLimit) {

	1488 /* end of the input stream */

	1489 UConverter *cnv = args->converter;

	1490

	1491 if (contextCharToUnicode==ATR \|\| contextCharToUnicode==EXT \|\| *context CharToUnicode==ISCII_INV) {

	1492 /* set toUBytes[] */

	1493 cnv->toUBytes[0] = (uint8_t)*contextCharToUnicode;

	1494 cnv->toULength = 1;

	1495

	1496 /* avoid looping on truncated sequences */

	1497 *contextCharToUnicode = NO_CHAR_MARKER;

	1498 } else {

	1499 cnv->toULength = 0;

	1500 }

	1501

	1502 if (*toUnicodeStatus != missingCharMarker) {

	1503 /* output a remaining target character */

	1504 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source - args ->source -1),*toUnicodeStatus,data->currentDeltaToUnicode,err);

	1505 *toUnicodeStatus = missingCharMarker;

	1506 }

	1507 }

	1508

	1509 args->target = target;

	1510 args->source = source;

	1511 }

	1512

	1513 /* structure for SafeClone calculations */

	1514 struct cloneISCIIStruct {

	1515 UConverter cnv;

	1516 UConverterDataISCII mydata;

	1517 };

	1518

	1519 static UConverter *

	1520 _ISCII_SafeClone(const UConverter *cnv,

	1521 void *stackBuffer,

	1522 int32_t *pBufferSize,

	1523 UErrorCode *status)

	1524 {

	1525 struct cloneISCIIStruct * localClone;

	1526 int32_t bufferSizeNeeded = sizeof(struct cloneISCIIStruct);

	1527

	1528 if (U_FAILURE(*status)) {

	1529 return 0;

	1530 }

	1531

	1532 if (pBufferSize == 0) { / 'preflighting' request - set needed size into p BufferSize /

	1533 *pBufferSize = bufferSizeNeeded;

	1534 return 0;

	1535 }

	1536

	1537 localClone = (struct cloneISCIIStruct *)stackBuffer;

	1538 /* ucnv.c/ucnv_safeClone() copied the main UConverter already */

	1539

	1540 uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataISCII) );

	1541 localClone->cnv.extraInfo = &localClone->mydata;

	1542 localClone->cnv.isExtraLocal = TRUE;

	1543

	1544 return &localClone->cnv;

	1545 }

	1546

	1547 static void

	1548 _ISCIIGetUnicodeSet(const UConverter *cnv,

	1549 const USetAdder *sa,

	1550 UConverterUnicodeSet which,

	1551 UErrorCode *pErrorCode)

	1552 {

	1553 int32_t idx, script;

	1554 uint8_t mask;

	1555

	1556 /* Since all ISCII versions allow switching to other ISCII

	1557 scripts, we add all roundtrippable characters to this set. */

	1558 sa->addRange(sa->set, 0, ASCII_END);

	1559 for (script = DEVANAGARI; script <= MALAYALAM; script++) {

	1560 mask = (uint8_t)(lookupInitialData[script].maskEnum);

	1561 for (idx = 0; idx < DELTA; idx++) {

	1562 /* added check for TELUGU character */

	1563 if ((validityTable[idx] & mask) \|\| (script==TELUGU && idx==0x31)) {

	1564 sa->add(sa->set, idx + (script * DELTA) + INDIC_BLOCK_BEGIN);

	1565 }

	1566 }

	1567 }

	1568 sa->add(sa->set, DANDA);

	1569 sa->add(sa->set, DOUBLE_DANDA);

	1570 sa->add(sa->set, ZWNJ);

	1571 sa->add(sa->set, ZWJ);

	1572 }

	1573

	1574 static const UConverterImpl _ISCIIImpl={

	1575

	1576 UCNV_ISCII,

	1577

	1578 NULL,

	1579 NULL,

	1580

	1581 _ISCIIOpen,

	1582 _ISCIIClose,

	1583 _ISCIIReset,

	1584

	1585 UConverter_toUnicode_ISCII_OFFSETS_LOGIC,

	1586 UConverter_toUnicode_ISCII_OFFSETS_LOGIC,

	1587 UConverter_fromUnicode_ISCII_OFFSETS_LOGIC,

	1588 UConverter_fromUnicode_ISCII_OFFSETS_LOGIC,

	1589 NULL,

	1590

	1591 NULL,

	1592 _ISCIIgetName,

	1593 NULL,

	1594 _ISCII_SafeClone,

	1595 _ISCIIGetUnicodeSet

	1596 };

	1597

	1598 static const UConverterStaticData _ISCIIStaticData={

	1599 sizeof(UConverterStaticData),

	1600 "ISCII",

	1601 0,

	1602 UCNV_IBM,

	1603 UCNV_ISCII,

	1604 1,

	1605 4,

	1606 { 0x1a, 0, 0, 0 },

	1607 0x1,

	1608 FALSE,

	1609 FALSE,

	1610 0x0,

	1611 0x0,

	1612 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, /* reserved */

	1613

	1614 };

	1615

	1616 const UConverterSharedData _ISCIIData={

	1617 sizeof(UConverterSharedData),

	1618 ~((uint32_t) 0),

	1619 NULL,

	1620 NULL,

	1621 &_ISCIIStaticData,

	1622 FALSE,

	1623 &_ISCIIImpl,

	1624 0

	1625 };

	1626

	1627 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */

OLD	NEW

« no previous file with comments | « icu46/source/common/ucnvhz.c ('k') | icu46/source/common/ucnvlat1.c » ('j') | no next file with comments »