icu46/source/common/ucnvisci.c - Issue 5516007: Check in the pristine copy of ICU 4.6...

Unified Diff: icu46/source/common/ucnvisci.c

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: icu46/source/common/ucnvisci.c

===================================================================

--- icu46/source/common/ucnvisci.c (revision 0)

+++ icu46/source/common/ucnvisci.c (revision 0)

@@ -0,0 +1,1627 @@

+/*

+**********************************************************************

+* file name: ucnvisci.c

+* encoding: US-ASCII

+* tab size: 8 (not used)

+* indentation:4

+* created on: 2001JUN26

+* created by: Ram Viswanadha

+* Date Name Description

+* 24/7/2001 Ram Added support for EXT character handling

+*/

+#include "unicode/utypes.h"

+#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION

+#include "cmemory.h"

+#include "ucnv_bld.h"

+#include "unicode/ucnv.h"

+#include "ucnv_cnv.h"

+#include "unicode/ucnv_cb.h"

+#include "unicode/uset.h"

+#include "cstring.h"

+#define UCNV_OPTIONS_VERSION_MASK 0xf

+#define NUKTA 0x093c

+#define HALANT 0x094d

+#define ZWNJ 0x200c /* Zero Width Non Joiner */

+#define ZWJ 0x200d /* Zero width Joiner */

+#define INVALID_CHAR 0xffff

+#define ATR 0xEF /* Attribute code */

+#define EXT 0xF0 /* Extension code */

+#define DANDA 0x0964

+#define DOUBLE_DANDA 0x0965

+#define ISCII_NUKTA 0xE9

+#define ISCII_HALANT 0xE8

+#define ISCII_DANDA 0xEA

+#define ISCII_INV 0xD9

+#define ISCII_VOWEL_SIGN_E 0xE0

+#define INDIC_BLOCK_BEGIN 0x0900

+#define INDIC_BLOCK_END 0x0D7F

+#define INDIC_RANGE (INDIC_BLOCK_END - INDIC_BLOCK_BEGIN)

+#define VOCALLIC_RR 0x0931

+#define LF 0x0A

+#define ASCII_END 0xA0

+#define NO_CHAR_MARKER 0xFFFE

+#define TELUGU_DELTA DELTA * TELUGU

+#define DEV_ABBR_SIGN 0x0970

+#define DEV_ANUDATTA 0x0952

+#define EXT_RANGE_BEGIN 0xA1

+#define EXT_RANGE_END 0xEE

+#define PNJ_DELTA 0x0100

+#define PNJ_BINDI 0x0A02

+#define PNJ_TIPPI 0x0A70

+#define PNJ_SIGN_VIRAMA 0x0A4D

+#define PNJ_ADHAK 0x0A71

+#define PNJ_HA 0x0A39

+#define PNJ_RRA 0x0A5C

+static USet* PNJ_BINDI_TIPPI_SET= NULL;

+static USet* PNJ_CONSONANT_SET= NULL;

+typedef enum {

+ DEVANAGARI =0,

+ BENGALI,

+ GURMUKHI,

+ GUJARATI,

+ ORIYA,

+ TAMIL,

+ TELUGU,

+ KANNADA,

+ MALAYALAM,

+ DELTA=0x80

+}UniLang;

+/**

+ * Enumeration for switching code pages if <ATR>+<one of below values>

+ * is encountered

+ */

+typedef enum {

+ DEF = 0x40,

+ RMN = 0x41,

+ DEV = 0x42,

+ BNG = 0x43,

+ TML = 0x44,

+ TLG = 0x45,

+ ASM = 0x46,

+ ORI = 0x47,

+ KND = 0x48,

+ MLM = 0x49,

+ GJR = 0x4A,

+ PNJ = 0x4B,

+ ARB = 0x71,

+ PES = 0x72,

+ URD = 0x73,

+ SND = 0x74,

+ KSM = 0x75,

+ PST = 0x76

+}ISCIILang;

+typedef enum {

+ DEV_MASK =0x80,

+ PNJ_MASK =0x40,

+ GJR_MASK =0x20,

+ ORI_MASK =0x10,

+ BNG_MASK =0x08,

+ KND_MASK =0x04,

+ MLM_MASK =0x02,

+ TML_MASK =0x01,

+ ZERO =0x00

+}MaskEnum;

+#define ISCII_CNV_PREFIX "ISCII,version="

+typedef struct {

+ UChar contextCharToUnicode; /* previous Unicode codepoint for contextual analysis */

+ UChar contextCharFromUnicode; /* previous Unicode codepoint for contextual analysis */

+ uint16_t defDeltaToUnicode; /* delta for switching to default state when DEF is encountered */

+ uint16_t currentDeltaFromUnicode; /* current delta in Indic block */

+ uint16_t currentDeltaToUnicode; /* current delta in Indic block */

+ MaskEnum currentMaskFromUnicode; /* mask for current state in toUnicode */

+ MaskEnum currentMaskToUnicode; /* mask for current state in toUnicode */

+ MaskEnum defMaskToUnicode; /* mask for default state in toUnicode */

+ UBool isFirstBuffer; /* boolean for fromUnicode to see if we need to announce the first script */

+ UBool resetToDefaultToUnicode; /* boolean for reseting to default delta and mask when a newline is encountered*/

+ char name[sizeof(ISCII_CNV_PREFIX) + 1];

+ UChar32 prevToUnicodeStatus; /* Hold the previous toUnicodeStatus. This is necessary because we may need to know the last two code points. */

+} UConverterDataISCII;

+typedef struct LookupDataStruct {

+ UniLang uniLang;

+ MaskEnum maskEnum;

+ ISCIILang isciiLang;

+} LookupDataStruct;

+static const LookupDataStruct lookupInitialData[]={

+ { DEVANAGARI, DEV_MASK, DEV },

+ { BENGALI, BNG_MASK, BNG },

+ { GURMUKHI, PNJ_MASK, PNJ },

+ { GUJARATI, GJR_MASK, GJR },

+ { ORIYA, ORI_MASK, ORI },

+ { TAMIL, TML_MASK, TML },

+ { TELUGU, KND_MASK, TLG },

+ { KANNADA, KND_MASK, KND },

+ { MALAYALAM, MLM_MASK, MLM }

+};

+static void initializeSets() {

+ /* TODO: Replace the following two lines with PNJ_CONSONANT_SET = uset_openEmpty(); */

+ PNJ_CONSONANT_SET = uset_open(0,0);

+ uset_clear(PNJ_CONSONANT_SET);

+ uset_addRange(PNJ_CONSONANT_SET, 0x0A15, 0x0A28);

+ uset_addRange(PNJ_CONSONANT_SET, 0x0A2A, 0x0A30);

+ uset_addRange(PNJ_CONSONANT_SET, 0x0A35, 0x0A36);

+ uset_addRange(PNJ_CONSONANT_SET, 0x0A38, 0x0A39);

+ PNJ_BINDI_TIPPI_SET = uset_clone(PNJ_CONSONANT_SET);

+ uset_add(PNJ_BINDI_TIPPI_SET, 0x0A05);

+ uset_add(PNJ_BINDI_TIPPI_SET, 0x0A07);

+ uset_add(PNJ_BINDI_TIPPI_SET, 0x0A3F);

+ uset_addRange(PNJ_BINDI_TIPPI_SET, 0x0A41, 0x0A42);

+ uset_compact(PNJ_CONSONANT_SET);

+ uset_compact(PNJ_BINDI_TIPPI_SET);

+static void _ISCIIOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode) {

+ if(pArgs->onlyTestIsLoadable) {

+ return;

+ }

+ /* Ensure that the sets used in special handling of certain Gurmukhi characters are initialized. */

+ initializeSets();

+ cnv->extraInfo = uprv_malloc(sizeof(UConverterDataISCII));

+ if (cnv->extraInfo != NULL) {

+ int32_t len=0;

+ UConverterDataISCII *converterData=

+ (UConverterDataISCII *) cnv->extraInfo;

+ converterData->contextCharToUnicode=NO_CHAR_MARKER;

+ cnv->toUnicodeStatus = missingCharMarker;

+ converterData->contextCharFromUnicode=0x0000;

+ converterData->resetToDefaultToUnicode=FALSE;

+ /* check if the version requested is supported */

+ if ((pArgs->options & UCNV_OPTIONS_VERSION_MASK) < 9) {

+ /* initialize state variables */

+ converterData->currentDeltaFromUnicode

+ = converterData->currentDeltaToUnicode

+ = converterData->defDeltaToUnicode = (uint16_t)(lookupInitialData[pArgs->options & UCNV_OPTIONS_VERSION_MASK].uniLang * DELTA);

+ converterData->currentMaskFromUnicode

+ = converterData->currentMaskToUnicode

+ = converterData->defMaskToUnicode = lookupInitialData[pArgs->options & UCNV_OPTIONS_VERSION_MASK].maskEnum;

+ converterData->isFirstBuffer=TRUE;

+ (void)uprv_strcpy(converterData->name, ISCII_CNV_PREFIX);

+ len = (int32_t)uprv_strlen(converterData->name);

+ converterData->name[len]= (char)((pArgs->options & UCNV_OPTIONS_VERSION_MASK) + '0');

+ converterData->name[len+1]=0;

+ converterData->prevToUnicodeStatus = 0x0000;

+ } else {

+ uprv_free(cnv->extraInfo);

+ cnv->extraInfo = NULL;

+ *errorCode = U_ILLEGAL_ARGUMENT_ERROR;

+ }

+ } else {

+ *errorCode =U_MEMORY_ALLOCATION_ERROR;

+ }

+static void _ISCIIClose(UConverter *cnv) {

+ if (cnv->extraInfo!=NULL) {

+ if (!cnv->isExtraLocal) {

+ uprv_free(cnv->extraInfo);

+ }

+ cnv->extraInfo=NULL;

+ }

+ if (PNJ_CONSONANT_SET != NULL) {

+ uset_close(PNJ_CONSONANT_SET);

+ PNJ_CONSONANT_SET = NULL;

+ }

+ if (PNJ_BINDI_TIPPI_SET != NULL) {

+ uset_close(PNJ_BINDI_TIPPI_SET);

+ PNJ_BINDI_TIPPI_SET = NULL;

+ }

+static const char* _ISCIIgetName(const UConverter* cnv) {

+ if (cnv->extraInfo) {

+ UConverterDataISCII* myData= (UConverterDataISCII*)cnv->extraInfo;

+ return myData->name;

+ }

+ return NULL;

+static void _ISCIIReset(UConverter *cnv, UConverterResetChoice choice) {

+ UConverterDataISCII* data =(UConverterDataISCII *) (cnv->extraInfo);

+ if (choice<=UCNV_RESET_TO_UNICODE) {

+ cnv->toUnicodeStatus = missingCharMarker;

+ cnv->mode=0;

+ data->currentDeltaToUnicode=data->defDeltaToUnicode;

+ data->currentMaskToUnicode = data->defMaskToUnicode;

+ data->contextCharToUnicode=NO_CHAR_MARKER;

+ data->prevToUnicodeStatus = 0x0000;

+ }

+ if (choice!=UCNV_RESET_TO_UNICODE) {

+ cnv->fromUChar32=0x0000;

+ data->contextCharFromUnicode=0x00;

+ data->currentMaskFromUnicode=data->defMaskToUnicode;

+ data->currentDeltaFromUnicode=data->defDeltaToUnicode;

+ data->isFirstBuffer=TRUE;

+ data->resetToDefaultToUnicode=FALSE;

+ }

+/**

+ * The values in validity table are indexed by the lower bits of Unicode

+ * range 0x0900 - 0x09ff. The values have a structure like:

+ * ---------------------------------------------------------------

+ * | DEV | PNJ | GJR | ORI | BNG | TLG | MLM | TML |

+ * | | | | | ASM | KND | | |

+ * ---------------------------------------------------------------

+ * If a code point is valid in a particular script

+ * then that bit is turned on

+ *

+ * Unicode does not distinguish between Bengali and Assamese so we use 1 bit for

+ * to represent these languages

+ *

+ * Telugu and Kannada have same codepoints except for Vocallic_RR which we special case

+ * and combine and use 1 bit to represent these languages.

+ *

+ * TODO: It is probably easier to understand and maintain to change this

+ * to use uint16_t and give each of the 9 Unicode/script blocks its own bit.

+ */

+static const uint8_t validityTable[128] = {

+/* This state table is tool generated please do not edit unless you know exactly what you are doing */

+/* Note: This table was edited to mirror the Windows XP implementation */

+/*ISCII:Valid:Unicode */

+/*0xa0 : 0x00: 0x900 */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

+/*0xa1 : 0xb8: 0x901 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO ,

+/*0xa2 : 0xfe: 0x902 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

+/*0xa3 : 0xbf: 0x903 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

+/*0x00 : 0x00: 0x904 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

+/*0xa4 : 0xff: 0x905 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

+/*0xa5 : 0xff: 0x906 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

+/*0xa6 : 0xff: 0x907 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

+/*0xa7 : 0xff: 0x908 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

+/*0xa8 : 0xff: 0x909 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

+/*0xa9 : 0xff: 0x90a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

+/*0xaa : 0xfe: 0x90b */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

+/*0x00 : 0x00: 0x90c */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

+/*0xae : 0x80: 0x90d */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO ,

+/*0xab : 0x87: 0x90e */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK ,

+/*0xac : 0xff: 0x90f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

+/*0xad : 0xff: 0x910 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

+/*0xb2 : 0x80: 0x911 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO ,

+/*0xaf : 0x87: 0x912 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK ,

+/*0xb0 : 0xff: 0x913 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

+/*0xb1 : 0xff: 0x914 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

+/*0xb3 : 0xff: 0x915 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

+/*0xb4 : 0xfe: 0x916 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

+/*0xb5 : 0xfe: 0x917 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

+/*0xb6 : 0xfe: 0x918 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

+/*0xb7 : 0xff: 0x919 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

+/*0xb8 : 0xff: 0x91a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

+/*0xb9 : 0xfe: 0x91b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

+/*0xba : 0xff: 0x91c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

+/*0xbb : 0xfe: 0x91d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

+/*0xbc : 0xff: 0x91e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

+/*0xbd : 0xff: 0x91f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

+/*0xbe : 0xfe: 0x920 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

+/*0xbf : 0xfe: 0x921 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

+/*0xc0 : 0xfe: 0x922 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

+/*0xc1 : 0xff: 0x923 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

+/*0xc2 : 0xff: 0x924 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

+/*0xc3 : 0xfe: 0x925 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

+/*0xc4 : 0xfe: 0x926 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

+/*0xc5 : 0xfe: 0x927 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

+/*0xc6 : 0xff: 0x928 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

+/*0xc7 : 0x81: 0x929 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + TML_MASK ,

+/*0xc8 : 0xff: 0x92a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

+/*0xc9 : 0xfe: 0x92b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

+/*0xca : 0xfe: 0x92c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

+/*0xcb : 0xfe: 0x92d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

+/*0xcc : 0xfe: 0x92e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

+/*0xcd : 0xff: 0x92f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

+/*0xcf : 0xff: 0x930 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

+/*0xd0 : 0x87: 0x931 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + MLM_MASK + TML_MASK ,

+/*0xd1 : 0xff: 0x932 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

+/*0xd2 : 0xb7: 0x933 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO + KND_MASK + MLM_MASK + TML_MASK ,

+/*0xd3 : 0x83: 0x934 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + MLM_MASK + TML_MASK ,

+/*0xd4 : 0xff: 0x935 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO + KND_MASK + MLM_MASK + TML_MASK ,

+/*0xd5 : 0xfe: 0x936 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

+/*0xd6 : 0xbf: 0x937 */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

+/*0xd7 : 0xff: 0x938 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

+/*0xd8 : 0xff: 0x939 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

+/*0x00 : 0x00: 0x93A */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

+/*0x00 : 0x00: 0x93B */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

+/*0xe9 : 0xda: 0x93c */ DEV_MASK + PNJ_MASK + ZERO + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO ,

+/*0x00 : 0x00: 0x93d */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

+/*0xda : 0xff: 0x93e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

+/*0xdb : 0xff: 0x93f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

+/*0xdc : 0xff: 0x940 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

+/*0xdd : 0xff: 0x941 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

+/*0xde : 0xff: 0x942 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

+/*0xdf : 0xbe: 0x943 */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

+/*0x00 : 0x00: 0x944 */ DEV_MASK + ZERO + GJR_MASK + ZERO + BNG_MASK + KND_MASK + ZERO + ZERO ,

+/*0xe3 : 0x80: 0x945 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO ,

+/*0xe0 : 0x87: 0x946 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK ,

+/*0xe1 : 0xff: 0x947 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

+/*0xe2 : 0xff: 0x948 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

+/*0xe7 : 0x80: 0x949 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO ,

+/*0xe4 : 0x87: 0x94a */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK ,

+/*0xe5 : 0xff: 0x94b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

+/*0xe6 : 0xff: 0x94c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

+/*0xe8 : 0xff: 0x94d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

+/*0xec : 0x00: 0x94e */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

+/*0xed : 0x00: 0x94f */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

+/*0x00 : 0x00: 0x950 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO ,

+/*0x00 : 0x00: 0x951 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

+/*0x00 : 0x00: 0x952 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

+/*0x00 : 0x00: 0x953 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

+/*0x00 : 0x00: 0x954 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

+/*0x00 : 0x00: 0x955 */ ZERO + ZERO + ZERO + ZERO + ZERO + KND_MASK + ZERO + ZERO ,

+/*0x00 : 0x00: 0x956 */ ZERO + ZERO + ZERO + ORI_MASK + ZERO + KND_MASK + ZERO + ZERO ,

+/*0x00 : 0x00: 0x957 */ ZERO + ZERO + ZERO + ORI_MASK + BNG_MASK + ZERO + MLM_MASK + ZERO ,

+/*0x00 : 0x00: 0x958 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

+/*0x00 : 0x00: 0x959 */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

+/*0x00 : 0x00: 0x95a */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

+/*0x00 : 0x00: 0x95b */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

+/*0x00 : 0x00: 0x95c */ DEV_MASK + PNJ_MASK + ZERO + ZERO + BNG_MASK + ZERO + ZERO + ZERO ,

+/*0x00 : 0x00: 0x95d */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO ,

+/*0x00 : 0x00: 0x95e */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

+/*0xce : 0x98: 0x95f */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO ,

+/*0x00 : 0x00: 0x960 */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

+/*0x00 : 0x00: 0x961 */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,

+/*0x00 : 0x00: 0x962 */ DEV_MASK + ZERO + ZERO + ZERO + BNG_MASK + ZERO + ZERO + ZERO ,

+/*0x00 : 0x00: 0x963 */ DEV_MASK + ZERO + ZERO + ZERO + BNG_MASK + ZERO + ZERO + ZERO ,

+/*0xea : 0xf8: 0x964 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

+/*0xeaea : 0x00: 0x965*/ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

+/*0xf1 : 0xff: 0x966 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

+/*0xf2 : 0xff: 0x967 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

+/*0xf3 : 0xff: 0x968 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

+/*0xf4 : 0xff: 0x969 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

+/*0xf5 : 0xff: 0x96a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

+/*0xf6 : 0xff: 0x96b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

+/*0xf7 : 0xff: 0x96c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

+/*0xf8 : 0xff: 0x96d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

+/*0xf9 : 0xff: 0x96e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

+/*0xfa : 0xff: 0x96f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,

+/*0x00 : 0x80: 0x970 */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,

+/*

+ * The length of the array is 128 to provide values for 0x900..0x97f.

+ * The last 15 entries for 0x971..0x97f of the validity table are all zero

+ * because no Indic script uses such Unicode code points.

+ */

+/*0x00 : 0x00: 0x9yz */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO

+};

+static const uint16_t fromUnicodeTable[128]={

+ 0x00a0 ,/* 0x0900 */

+ 0x00a1 ,/* 0x0901 */

+ 0x00a2 ,/* 0x0902 */

+ 0x00a3 ,/* 0x0903 */

+ 0xa4e0 ,/* 0x0904 */

+ 0x00a4 ,/* 0x0905 */

+ 0x00a5 ,/* 0x0906 */

+ 0x00a6 ,/* 0x0907 */

+ 0x00a7 ,/* 0x0908 */

+ 0x00a8 ,/* 0x0909 */

+ 0x00a9 ,/* 0x090a */

+ 0x00aa ,/* 0x090b */

+ 0xA6E9 ,/* 0x090c */

+ 0x00ae ,/* 0x090d */

+ 0x00ab ,/* 0x090e */

+ 0x00ac ,/* 0x090f */

+ 0x00ad ,/* 0x0910 */

+ 0x00b2 ,/* 0x0911 */

+ 0x00af ,/* 0x0912 */

+ 0x00b0 ,/* 0x0913 */

+ 0x00b1 ,/* 0x0914 */

+ 0x00b3 ,/* 0x0915 */

+ 0x00b4 ,/* 0x0916 */

+ 0x00b5 ,/* 0x0917 */

+ 0x00b6 ,/* 0x0918 */

+ 0x00b7 ,/* 0x0919 */

+ 0x00b8 ,/* 0x091a */

+ 0x00b9 ,/* 0x091b */

+ 0x00ba ,/* 0x091c */

+ 0x00bb ,/* 0x091d */

+ 0x00bc ,/* 0x091e */

+ 0x00bd ,/* 0x091f */

+ 0x00be ,/* 0x0920 */

+ 0x00bf ,/* 0x0921 */

+ 0x00c0 ,/* 0x0922 */

+ 0x00c1 ,/* 0x0923 */

+ 0x00c2 ,/* 0x0924 */

+ 0x00c3 ,/* 0x0925 */

+ 0x00c4 ,/* 0x0926 */

+ 0x00c5 ,/* 0x0927 */

+ 0x00c6 ,/* 0x0928 */

+ 0x00c7 ,/* 0x0929 */

+ 0x00c8 ,/* 0x092a */

+ 0x00c9 ,/* 0x092b */

+ 0x00ca ,/* 0x092c */

+ 0x00cb ,/* 0x092d */

+ 0x00cc ,/* 0x092e */

+ 0x00cd ,/* 0x092f */

+ 0x00cf ,/* 0x0930 */

+ 0x00d0 ,/* 0x0931 */

+ 0x00d1 ,/* 0x0932 */

+ 0x00d2 ,/* 0x0933 */

+ 0x00d3 ,/* 0x0934 */

+ 0x00d4 ,/* 0x0935 */

+ 0x00d5 ,/* 0x0936 */

+ 0x00d6 ,/* 0x0937 */

+ 0x00d7 ,/* 0x0938 */

+ 0x00d8 ,/* 0x0939 */

+ 0xFFFF ,/* 0x093A */

+ 0xFFFF ,/* 0x093B */

+ 0x00e9 ,/* 0x093c */

+ 0xEAE9 ,/* 0x093d */

+ 0x00da ,/* 0x093e */

+ 0x00db ,/* 0x093f */

+ 0x00dc ,/* 0x0940 */

+ 0x00dd ,/* 0x0941 */

+ 0x00de ,/* 0x0942 */

+ 0x00df ,/* 0x0943 */

+ 0xDFE9 ,/* 0x0944 */

+ 0x00e3 ,/* 0x0945 */

+ 0x00e0 ,/* 0x0946 */

+ 0x00e1 ,/* 0x0947 */

+ 0x00e2 ,/* 0x0948 */

+ 0x00e7 ,/* 0x0949 */

+ 0x00e4 ,/* 0x094a */

+ 0x00e5 ,/* 0x094b */

+ 0x00e6 ,/* 0x094c */

+ 0x00e8 ,/* 0x094d */

+ 0x00ec ,/* 0x094e */

+ 0x00ed ,/* 0x094f */

+ 0xA1E9 ,/* 0x0950 */ /* OM Symbol */

+ 0xFFFF ,/* 0x0951 */

+ 0xF0B8 ,/* 0x0952 */

+ 0xFFFF ,/* 0x0953 */

+ 0xFFFF ,/* 0x0954 */

+ 0xFFFF ,/* 0x0955 */

+ 0xFFFF ,/* 0x0956 */

+ 0xFFFF ,/* 0x0957 */

+ 0xb3e9 ,/* 0x0958 */

+ 0xb4e9 ,/* 0x0959 */

+ 0xb5e9 ,/* 0x095a */

+ 0xbae9 ,/* 0x095b */

+ 0xbfe9 ,/* 0x095c */

+ 0xC0E9 ,/* 0x095d */

+ 0xc9e9 ,/* 0x095e */

+ 0x00ce ,/* 0x095f */

+ 0xAAe9 ,/* 0x0960 */

+ 0xA7E9 ,/* 0x0961 */

+ 0xDBE9 ,/* 0x0962 */

+ 0xDCE9 ,/* 0x0963 */

+ 0x00ea ,/* 0x0964 */

+ 0xeaea ,/* 0x0965 */

+ 0x00f1 ,/* 0x0966 */

+ 0x00f2 ,/* 0x0967 */

+ 0x00f3 ,/* 0x0968 */

+ 0x00f4 ,/* 0x0969 */

+ 0x00f5 ,/* 0x096a */

+ 0x00f6 ,/* 0x096b */

+ 0x00f7 ,/* 0x096c */

+ 0x00f8 ,/* 0x096d */

+ 0x00f9 ,/* 0x096e */

+ 0x00fa ,/* 0x096f */

+ 0xF0BF ,/* 0x0970 */

+ 0xFFFF ,/* 0x0971 */

+ 0xFFFF ,/* 0x0972 */

+ 0xFFFF ,/* 0x0973 */

+ 0xFFFF ,/* 0x0974 */

+ 0xFFFF ,/* 0x0975 */

+ 0xFFFF ,/* 0x0976 */

+ 0xFFFF ,/* 0x0977 */

+ 0xFFFF ,/* 0x0978 */

+ 0xFFFF ,/* 0x0979 */

+ 0xFFFF ,/* 0x097a */

+ 0xFFFF ,/* 0x097b */

+ 0xFFFF ,/* 0x097c */

+ 0xFFFF ,/* 0x097d */

+ 0xFFFF ,/* 0x097e */

+ 0xFFFF ,/* 0x097f */

+};

+static const uint16_t toUnicodeTable[256]={

+ 0x0000,/* 0x00 */

+ 0x0001,/* 0x01 */

+ 0x0002,/* 0x02 */

+ 0x0003,/* 0x03 */

+ 0x0004,/* 0x04 */

+ 0x0005,/* 0x05 */

+ 0x0006,/* 0x06 */

+ 0x0007,/* 0x07 */

+ 0x0008,/* 0x08 */

+ 0x0009,/* 0x09 */

+ 0x000a,/* 0x0a */

+ 0x000b,/* 0x0b */

+ 0x000c,/* 0x0c */

+ 0x000d,/* 0x0d */

+ 0x000e,/* 0x0e */

+ 0x000f,/* 0x0f */

+ 0x0010,/* 0x10 */

+ 0x0011,/* 0x11 */

+ 0x0012,/* 0x12 */

+ 0x0013,/* 0x13 */

+ 0x0014,/* 0x14 */

+ 0x0015,/* 0x15 */

+ 0x0016,/* 0x16 */

+ 0x0017,/* 0x17 */

+ 0x0018,/* 0x18 */

+ 0x0019,/* 0x19 */

+ 0x001a,/* 0x1a */

+ 0x001b,/* 0x1b */

+ 0x001c,/* 0x1c */

+ 0x001d,/* 0x1d */

+ 0x001e,/* 0x1e */

+ 0x001f,/* 0x1f */

+ 0x0020,/* 0x20 */

+ 0x0021,/* 0x21 */

+ 0x0022,/* 0x22 */

+ 0x0023,/* 0x23 */

+ 0x0024,/* 0x24 */

+ 0x0025,/* 0x25 */

+ 0x0026,/* 0x26 */

+ 0x0027,/* 0x27 */

+ 0x0028,/* 0x28 */

+ 0x0029,/* 0x29 */

+ 0x002a,/* 0x2a */

+ 0x002b,/* 0x2b */

+ 0x002c,/* 0x2c */

+ 0x002d,/* 0x2d */

+ 0x002e,/* 0x2e */

+ 0x002f,/* 0x2f */

+ 0x0030,/* 0x30 */

+ 0x0031,/* 0x31 */

+ 0x0032,/* 0x32 */

+ 0x0033,/* 0x33 */

+ 0x0034,/* 0x34 */

+ 0x0035,/* 0x35 */

+ 0x0036,/* 0x36 */

+ 0x0037,/* 0x37 */

+ 0x0038,/* 0x38 */

+ 0x0039,/* 0x39 */

+ 0x003A,/* 0x3A */

+ 0x003B,/* 0x3B */

+ 0x003c,/* 0x3c */

+ 0x003d,/* 0x3d */

+ 0x003e,/* 0x3e */

+ 0x003f,/* 0x3f */

+ 0x0040,/* 0x40 */

+ 0x0041,/* 0x41 */

+ 0x0042,/* 0x42 */

+ 0x0043,/* 0x43 */

+ 0x0044,/* 0x44 */

+ 0x0045,/* 0x45 */

+ 0x0046,/* 0x46 */

+ 0x0047,/* 0x47 */

+ 0x0048,/* 0x48 */

+ 0x0049,/* 0x49 */

+ 0x004a,/* 0x4a */

+ 0x004b,/* 0x4b */

+ 0x004c,/* 0x4c */

+ 0x004d,/* 0x4d */

+ 0x004e,/* 0x4e */

+ 0x004f,/* 0x4f */

+ 0x0050,/* 0x50 */

+ 0x0051,/* 0x51 */

+ 0x0052,/* 0x52 */

+ 0x0053,/* 0x53 */

+ 0x0054,/* 0x54 */

+ 0x0055,/* 0x55 */

+ 0x0056,/* 0x56 */

+ 0x0057,/* 0x57 */

+ 0x0058,/* 0x58 */

+ 0x0059,/* 0x59 */

+ 0x005a,/* 0x5a */

+ 0x005b,/* 0x5b */

+ 0x005c,/* 0x5c */

+ 0x005d,/* 0x5d */

+ 0x005e,/* 0x5e */

+ 0x005f,/* 0x5f */

+ 0x0060,/* 0x60 */

+ 0x0061,/* 0x61 */

+ 0x0062,/* 0x62 */

+ 0x0063,/* 0x63 */

+ 0x0064,/* 0x64 */

+ 0x0065,/* 0x65 */

+ 0x0066,/* 0x66 */

+ 0x0067,/* 0x67 */

+ 0x0068,/* 0x68 */

+ 0x0069,/* 0x69 */

+ 0x006a,/* 0x6a */

+ 0x006b,/* 0x6b */

+ 0x006c,/* 0x6c */

+ 0x006d,/* 0x6d */

+ 0x006e,/* 0x6e */

+ 0x006f,/* 0x6f */

+ 0x0070,/* 0x70 */

+ 0x0071,/* 0x71 */

+ 0x0072,/* 0x72 */

+ 0x0073,/* 0x73 */

+ 0x0074,/* 0x74 */

+ 0x0075,/* 0x75 */

+ 0x0076,/* 0x76 */

+ 0x0077,/* 0x77 */

+ 0x0078,/* 0x78 */

+ 0x0079,/* 0x79 */

+ 0x007a,/* 0x7a */

+ 0x007b,/* 0x7b */

+ 0x007c,/* 0x7c */

+ 0x007d,/* 0x7d */

+ 0x007e,/* 0x7e */

+ 0x007f,/* 0x7f */

+ 0x0080,/* 0x80 */

+ 0x0081,/* 0x81 */

+ 0x0082,/* 0x82 */

+ 0x0083,/* 0x83 */

+ 0x0084,/* 0x84 */

+ 0x0085,/* 0x85 */

+ 0x0086,/* 0x86 */

+ 0x0087,/* 0x87 */

+ 0x0088,/* 0x88 */

+ 0x0089,/* 0x89 */

+ 0x008a,/* 0x8a */

+ 0x008b,/* 0x8b */

+ 0x008c,/* 0x8c */

+ 0x008d,/* 0x8d */

+ 0x008e,/* 0x8e */

+ 0x008f,/* 0x8f */

+ 0x0090,/* 0x90 */

+ 0x0091,/* 0x91 */

+ 0x0092,/* 0x92 */

+ 0x0093,/* 0x93 */

+ 0x0094,/* 0x94 */

+ 0x0095,/* 0x95 */

+ 0x0096,/* 0x96 */

+ 0x0097,/* 0x97 */

+ 0x0098,/* 0x98 */

+ 0x0099,/* 0x99 */

+ 0x009a,/* 0x9a */

+ 0x009b,/* 0x9b */

+ 0x009c,/* 0x9c */

+ 0x009d,/* 0x9d */

+ 0x009e,/* 0x9e */

+ 0x009f,/* 0x9f */

+ 0x00A0,/* 0xa0 */

+ 0x0901,/* 0xa1 */

+ 0x0902,/* 0xa2 */

+ 0x0903,/* 0xa3 */

+ 0x0905,/* 0xa4 */

+ 0x0906,/* 0xa5 */

+ 0x0907,/* 0xa6 */

+ 0x0908,/* 0xa7 */

+ 0x0909,/* 0xa8 */

+ 0x090a,/* 0xa9 */

+ 0x090b,/* 0xaa */

+ 0x090e,/* 0xab */

+ 0x090f,/* 0xac */

+ 0x0910,/* 0xad */

+ 0x090d,/* 0xae */

+ 0x0912,/* 0xaf */

+ 0x0913,/* 0xb0 */

+ 0x0914,/* 0xb1 */

+ 0x0911,/* 0xb2 */

+ 0x0915,/* 0xb3 */

+ 0x0916,/* 0xb4 */

+ 0x0917,/* 0xb5 */

+ 0x0918,/* 0xb6 */

+ 0x0919,/* 0xb7 */

+ 0x091a,/* 0xb8 */

+ 0x091b,/* 0xb9 */

+ 0x091c,/* 0xba */

+ 0x091d,/* 0xbb */

+ 0x091e,/* 0xbc */

+ 0x091f,/* 0xbd */

+ 0x0920,/* 0xbe */

+ 0x0921,/* 0xbf */

+ 0x0922,/* 0xc0 */

+ 0x0923,/* 0xc1 */

+ 0x0924,/* 0xc2 */

+ 0x0925,/* 0xc3 */

+ 0x0926,/* 0xc4 */

+ 0x0927,/* 0xc5 */

+ 0x0928,/* 0xc6 */

+ 0x0929,/* 0xc7 */

+ 0x092a,/* 0xc8 */

+ 0x092b,/* 0xc9 */

+ 0x092c,/* 0xca */

+ 0x092d,/* 0xcb */

+ 0x092e,/* 0xcc */

+ 0x092f,/* 0xcd */

+ 0x095f,/* 0xce */

+ 0x0930,/* 0xcf */

+ 0x0931,/* 0xd0 */

+ 0x0932,/* 0xd1 */

+ 0x0933,/* 0xd2 */

+ 0x0934,/* 0xd3 */

+ 0x0935,/* 0xd4 */

+ 0x0936,/* 0xd5 */

+ 0x0937,/* 0xd6 */

+ 0x0938,/* 0xd7 */

+ 0x0939,/* 0xd8 */

+ 0x200D,/* 0xd9 */

+ 0x093e,/* 0xda */

+ 0x093f,/* 0xdb */

+ 0x0940,/* 0xdc */

+ 0x0941,/* 0xdd */

+ 0x0942,/* 0xde */

+ 0x0943,/* 0xdf */

+ 0x0946,/* 0xe0 */

+ 0x0947,/* 0xe1 */

+ 0x0948,/* 0xe2 */

+ 0x0945,/* 0xe3 */

+ 0x094a,/* 0xe4 */

+ 0x094b,/* 0xe5 */

+ 0x094c,/* 0xe6 */

+ 0x0949,/* 0xe7 */

+ 0x094d,/* 0xe8 */

+ 0x093c,/* 0xe9 */

+ 0x0964,/* 0xea */

+ 0xFFFF,/* 0xeb */

+ 0xFFFF,/* 0xec */

+ 0xFFFF,/* 0xed */

+ 0xFFFF,/* 0xee */

+ 0xFFFF,/* 0xef */

+ 0xFFFF,/* 0xf0 */

+ 0x0966,/* 0xf1 */

+ 0x0967,/* 0xf2 */

+ 0x0968,/* 0xf3 */

+ 0x0969,/* 0xf4 */

+ 0x096a,/* 0xf5 */

+ 0x096b,/* 0xf6 */

+ 0x096c,/* 0xf7 */

+ 0x096d,/* 0xf8 */

+ 0x096e,/* 0xf9 */

+ 0x096f,/* 0xfa */

+ 0xFFFF,/* 0xfb */

+ 0xFFFF,/* 0xfc */

+ 0xFFFF,/* 0xfd */

+ 0xFFFF,/* 0xfe */

+ 0xFFFF /* 0xff */

+};

+static const uint16_t vowelSignESpecialCases[][2]={

+ { 2 /*length of array*/ , 0 },

+ { 0xA4 , 0x0904 },

+};

+static const uint16_t nuktaSpecialCases[][2]={

+ { 16 /*length of array*/ , 0 },

+ { 0xA6 , 0x090c },

+ { 0xEA , 0x093D },

+ { 0xDF , 0x0944 },

+ { 0xA1 , 0x0950 },

+ { 0xb3 , 0x0958 },

+ { 0xb4 , 0x0959 },

+ { 0xb5 , 0x095a },

+ { 0xba , 0x095b },

+ { 0xbf , 0x095c },

+ { 0xC0 , 0x095d },

+ { 0xc9 , 0x095e },

+ { 0xAA , 0x0960 },

+ { 0xA7 , 0x0961 },

+ { 0xDB , 0x0962 },

+ { 0xDC , 0x0963 },

+};

+#define WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err){ \

+ int32_t offset = (int32_t)(source - args->source-1); \

+ /* write the targetUniChar to target */ \

+ if(target < targetLimit){ \

+ if(targetByteUnit <= 0xFF){ \

+ *(target)++ = (uint8_t)(targetByteUnit); \

+ if(offsets){ \

+ *(offsets++) = offset; \

+ } \

+ }else{ \

+ if (targetByteUnit > 0xFFFF) { \

+ *(target)++ = (uint8_t)(targetByteUnit>>16); \

+ if (offsets) { \

+ --offset; \

+ *(offsets++) = offset; \

+ } \

+ if (!(target < targetLimit)) { \

+ args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \

+ (uint8_t)(targetByteUnit >> 8); \

+ args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \

+ (uint8_t)targetByteUnit; \

+ *err = U_BUFFER_OVERFLOW_ERROR; \

+ } else { \

+ *(target)++ = (uint8_t)(targetByteUnit>>8); \

+ if(offsets){ \

+ *(offsets++) = offset; \

+ } \

+ if(target < targetLimit){ \

+ *(target)++ = (uint8_t) targetByteUnit; \

+ if(offsets){ \

+ *(offsets++) = offset ; \

+ } \

+ }else{ \

+ args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =\

+ (uint8_t) (targetByteUnit); \

+ *err = U_BUFFER_OVERFLOW_ERROR; \

+ } \

+ }else{ \

+ if (targetByteUnit & 0xFF0000) { \

+ args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \

+ (uint8_t) (targetByteUnit >>16); \

+ } \

+ if(targetByteUnit & 0xFF00){ \

+ args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \

+ (uint8_t) (targetByteUnit >>8); \

+ } \

+ args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \

+ (uint8_t) (targetByteUnit); \

+ *err = U_BUFFER_OVERFLOW_ERROR; \

+ } \

+/* Rules:

+ * Explicit Halant :

+ * <HALANT> + <ZWNJ>

+ * Soft Halant :

+ * <HALANT> + <ZWJ>

+ */

+static void UConverter_fromUnicode_ISCII_OFFSETS_LOGIC(

+ UConverterFromUnicodeArgs * args, UErrorCode * err) {

+ const UChar *source = args->source;

+ const UChar *sourceLimit = args->sourceLimit;

+ unsigned char *target = (unsigned char *) args->target;

+ unsigned char *targetLimit = (unsigned char *) args->targetLimit;

+ int32_t* offsets = args->offsets;

+ uint32_t targetByteUnit = 0x0000;

+ UChar32 sourceChar = 0x0000;

+ UChar32 tempContextFromUnicode = 0x0000; /* For special handling of the Gurmukhi script. */

+ UConverterDataISCII *converterData;

+ uint16_t newDelta=0;

+ uint16_t range = 0;

+ UBool deltaChanged = FALSE;

+ if ((args->converter == NULL) || (args->targetLimit < args->target) || (args->sourceLimit < args->source)) {

+ *err = U_ILLEGAL_ARGUMENT_ERROR;

+ return;

+ }

+ /* initialize data */

+ converterData=(UConverterDataISCII*)args->converter->extraInfo;

+ newDelta=converterData->currentDeltaFromUnicode;

+ range = (uint16_t)(newDelta/DELTA);

+ if ((sourceChar = args->converter->fromUChar32)!=0) {

+ goto getTrail;

+ }

+ /*writing the char to the output stream */

+ while (source < sourceLimit) {

+ /* Write the language code following LF only if LF is not the last character. */

+ if (args->converter->fromUnicodeStatus == LF) {

+ targetByteUnit = ATR<<8;

+ targetByteUnit += (uint8_t) lookupInitialData[range].isciiLang;

+ args->converter->fromUnicodeStatus = 0x0000;

+ /* now append ATR and language code */

+ WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err);

+ if (U_FAILURE(*err)) {

+ break;

+ }

+ sourceChar = *source++;

+ tempContextFromUnicode = converterData->contextCharFromUnicode;

+ targetByteUnit = missingCharMarker;

+ /*check if input is in ASCII and C0 control codes range*/

+ if (sourceChar <= ASCII_END) {

+ args->converter->fromUnicodeStatus = sourceChar;

+ WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,sourceChar,err);

+ if (U_FAILURE(*err)) {

+ break;

+ }

+ continue;

+ }

+ switch (sourceChar) {

+ case ZWNJ:

+ /* contextChar has HALANT */

+ if (converterData->contextCharFromUnicode) {

+ converterData->contextCharFromUnicode = 0x00;

+ targetByteUnit = ISCII_HALANT;

+ } else {

+ /* consume ZWNJ and continue */

+ converterData->contextCharFromUnicode = 0x00;

+ continue;

+ }

+ break;

+ case ZWJ:

+ /* contextChar has HALANT */

+ if (converterData->contextCharFromUnicode) {

+ targetByteUnit = ISCII_NUKTA;

+ } else {

+ targetByteUnit =ISCII_INV;

+ }

+ converterData->contextCharFromUnicode = 0x00;

+ break;

+ default:

+ /* is the sourceChar in the INDIC_RANGE? */

+ if ((uint16_t)(INDIC_BLOCK_END-sourceChar) <= INDIC_RANGE) {

+ /* Danda and Double Danda are valid in Northern scripts.. since Unicode

+ * does not include these codepoints in all Northern scrips we need to

+ * filter them out

+ */

+ if (sourceChar!= DANDA && sourceChar != DOUBLE_DANDA) {

+ /* find out to which block the souceChar belongs*/

+ range =(uint16_t)((sourceChar-INDIC_BLOCK_BEGIN)/DELTA);

+ newDelta =(uint16_t)(range*DELTA);

+ /* Now are we in the same block as the previous? */

+ if (newDelta!= converterData->currentDeltaFromUnicode || converterData->isFirstBuffer) {

+ converterData->currentDeltaFromUnicode = newDelta;

+ converterData->currentMaskFromUnicode = lookupInitialData[range].maskEnum;

+ deltaChanged =TRUE;

+ converterData->isFirstBuffer=FALSE;

+ }

+ if (converterData->currentDeltaFromUnicode == PNJ_DELTA) {

+ if (sourceChar == PNJ_TIPPI) {

+ /* Make sure Tippi is converterd to Bindi. */

+ sourceChar = PNJ_BINDI;

+ } else if (sourceChar == PNJ_ADHAK) {

+ /* This is for consonant cluster handling. */

+ converterData->contextCharFromUnicode = PNJ_ADHAK;

+ }

+ /* Normalize all Indic codepoints to Devanagari and map them to ISCII */

+ /* now subtract the new delta from sourceChar*/

+ sourceChar -= converterData->currentDeltaFromUnicode;

+ }

+ /* get the target byte unit */

+ targetByteUnit=fromUnicodeTable[(uint8_t)sourceChar];

+ /* is the code point valid in current script? */

+ if ((validityTable[(uint8_t)sourceChar] & converterData->currentMaskFromUnicode)==0) {

+ /* Vocallic RR is assigned in ISCII Telugu and Unicode */

+ if (converterData->currentDeltaFromUnicode!=(TELUGU_DELTA) || sourceChar!=VOCALLIC_RR) {

+ targetByteUnit=missingCharMarker;

+ }

+ if (deltaChanged) {

+ /* we are in a script block which is different than

+ * previous sourceChar's script block write ATR and language codes

+ */

+ uint32_t temp=0;

+ temp =(uint16_t)(ATR<<8);

+ temp += (uint16_t)((uint8_t) lookupInitialData[range].isciiLang);

+ /* reset */

+ deltaChanged=FALSE;

+ /* now append ATR and language code */

+ WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,temp,err);

+ if (U_FAILURE(*err)) {

+ break;

+ }

+ if (converterData->currentDeltaFromUnicode == PNJ_DELTA && (sourceChar + PNJ_DELTA) == PNJ_ADHAK) {

+ continue;

+ }

+ /* reset context char */

+ converterData->contextCharFromUnicode = 0x00;

+ break;

+ }

+ if (converterData->currentDeltaFromUnicode == PNJ_DELTA && tempContextFromUnicode == PNJ_ADHAK && uset_contains(PNJ_CONSONANT_SET, (sourceChar + PNJ_DELTA))) {

+ /* If the previous codepoint is Adhak and the current codepoint is a consonant, the targetByteUnit should be C + Halant + C. */

+ /* reset context char */

+ converterData->contextCharFromUnicode = 0x0000;

+ targetByteUnit = targetByteUnit << 16 | ISCII_HALANT << 8 | targetByteUnit;

+ /* write targetByteUnit to target */

+ WRITE_TO_TARGET_FROM_U(args, offsets, source, target, targetLimit, targetByteUnit,err);

+ if (U_FAILURE(*err)) {

+ break;

+ }

+ } else if (targetByteUnit != missingCharMarker) {

+ if (targetByteUnit==ISCII_HALANT) {

+ converterData->contextCharFromUnicode = (UChar)targetByteUnit;

+ }

+ /* write targetByteUnit to target*/

+ WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err);

+ if (U_FAILURE(*err)) {

+ break;

+ }

+ } else {

+ /* oops.. the code point is unassigned */

+ /*check if the char is a First surrogate*/

+ if (UTF_IS_SURROGATE(sourceChar)) {

+ if (UTF_IS_SURROGATE_FIRST(sourceChar)) {

+getTrail:

+ /*look ahead to find the trail surrogate*/

+ if (source < sourceLimit) {

+ /* test the following code unit */

+ UChar trail= (*source);

+ if (UTF_IS_SECOND_SURROGATE(trail)) {

+ source++;

+ sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);

+ *err =U_INVALID_CHAR_FOUND;

+ /* convert this surrogate code point */

+ /* exit this condition tree */

+ } else {

+ /* this is an unmatched lead code unit (1st surrogate) */

+ /* callback(illegal) */

+ *err=U_ILLEGAL_CHAR_FOUND;

+ }

+ } else {

+ /* no more input */

+ *err = U_ZERO_ERROR;

+ }

+ } else {

+ /* this is an unmatched trail code unit (2nd surrogate) */

+ /* callback(illegal) */

+ *err=U_ILLEGAL_CHAR_FOUND;

+ }

+ } else {

+ /* callback(unassigned) for a BMP code point */

+ *err = U_INVALID_CHAR_FOUND;

+ }

+ args->converter->fromUChar32=sourceChar;

+ break;

+ }

+ }/* end while(mySourceIndex<mySourceLength) */

+ /*save the state and return */

+ args->source = source;

+ args->target = (char*)target;

+static const uint16_t lookupTable[][2]={

+ { ZERO, ZERO }, /*DEFALT*/

+ { ZERO, ZERO }, /*ROMAN*/

+ { DEVANAGARI, DEV_MASK },

+ { BENGALI, BNG_MASK },

+ { TAMIL, TML_MASK },

+ { TELUGU, KND_MASK },

+ { BENGALI, BNG_MASK },

+ { ORIYA, ORI_MASK },

+ { KANNADA, KND_MASK },

+ { MALAYALAM, MLM_MASK },

+ { GUJARATI, GJR_MASK },

+ { GURMUKHI, PNJ_MASK }

+};

+#define WRITE_TO_TARGET_TO_U(args,source,target,offsets,offset,targetUniChar,delta, err){\

+ /* add offset to current Indic Block */ \

+ if(targetUniChar>ASCII_END && \

+ targetUniChar != ZWJ && \

+ targetUniChar != ZWNJ && \

+ targetUniChar != DANDA && \

+ targetUniChar != DOUBLE_DANDA){ \

+ \

+ targetUniChar+=(uint16_t)(delta); \

+ } \

+ /* now write the targetUniChar */ \

+ if(target<args->targetLimit){ \

+ *(target)++ = (UChar)targetUniChar; \

+ if(offsets){ \

+ *(offsets)++ = (int32_t)(offset); \

+ } \

+ }else{ \

+ args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++] = \

+ (UChar)targetUniChar; \

+ *err = U_BUFFER_OVERFLOW_ERROR; \

+ } \

+#define GET_MAPPING(sourceChar,targetUniChar,data){ \

+ targetUniChar = toUnicodeTable[(sourceChar)] ; \

+ /* is the code point valid in current script? */ \

+ if(sourceChar> ASCII_END && \

+ (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode)==0){ \

+ /* Vocallic RR is assigne in ISCII Telugu and Unicode */ \

+ if(data->currentDeltaToUnicode!=(TELUGU_DELTA) || \

+ targetUniChar!=VOCALLIC_RR){ \

+ targetUniChar=missingCharMarker; \

+ } \

+/***********

+ * Rules for ISCII to Unicode converter

+ * ISCII is stateful encoding. To convert ISCII bytes to Unicode,

+ * which has both precomposed and decomposed forms characters

+ * pre-context and post-context need to be considered.

+ *

+ * Post context

+ * i) ATR : Attribute code is used to declare the font and script switching.

+ * Currently we only switch scripts and font codes consumed without generating an error

+ * ii) EXT : Extention code is used to declare switching to Sanskrit and for obscure,

+ * obsolete characters

+ * Pre context

+ * i) Halant: if preceeded by a halant then it is a explicit halant

+ * ii) Nukta :

+ * a) if preceeded by a halant then it is a soft halant

+ * b) if preceeded by specific consonants and the ligatures have pre-composed

+ * characters in Unicode then convert to pre-composed characters

+ * iii) Danda: If Danda is preceeded by a Danda then convert to Double Danda

+ *

+ */

+static void UConverter_toUnicode_ISCII_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, UErrorCode* err) {

+ const char *source = ( char *) args->source;

+ UChar *target = args->target;

+ const char *sourceLimit = args->sourceLimit;

+ const UChar* targetLimit = args->targetLimit;

+ uint32_t targetUniChar = 0x0000;

+ uint8_t sourceChar = 0x0000;

+ UConverterDataISCII* data;

+ UChar32* toUnicodeStatus=NULL;

+ UChar32 tempTargetUniChar = 0x0000;

+ UChar* contextCharToUnicode= NULL;

+ UBool found;

+ int i;

+ int offset = 0;

+ if ((args->converter == NULL) || (target < args->target) || (source < args->source)) {

+ *err = U_ILLEGAL_ARGUMENT_ERROR;

+ return;

+ }

+ data = (UConverterDataISCII*)(args->converter->extraInfo);

+ contextCharToUnicode = &data->contextCharToUnicode; /* contains previous ISCII codepoint visited */

+ toUnicodeStatus = (UChar32*)&args->converter->toUnicodeStatus;/* contains the mapping to Unicode of the above codepoint*/

+ while (U_SUCCESS(*err) && source<sourceLimit) {

+ targetUniChar = missingCharMarker;

+ if (target < targetLimit) {

+ sourceChar = (unsigned char)*(source)++;

+ /* look at the post-context preform special processing */

+ if (*contextCharToUnicode==ATR) {

+ /* If we have ATR in *contextCharToUnicode then we need to change our

+ * state to the Indic Script specified by sourceChar

+ */

+ /* check if the sourceChar is supported script range*/

+ if ((uint8_t)(PNJ-sourceChar)<=PNJ-DEV) {

+ data->currentDeltaToUnicode = (uint16_t)(lookupTable[sourceChar & 0x0F][0] * DELTA);

+ data->currentMaskToUnicode = (MaskEnum)lookupTable[sourceChar & 0x0F][1];

+ } else if (sourceChar==DEF) {

+ /* switch back to default */

+ data->currentDeltaToUnicode = data->defDeltaToUnicode;

+ data->currentMaskToUnicode = data->defMaskToUnicode;

+ } else {

+ if ((sourceChar >= 0x21 && sourceChar <= 0x3F)) {

+ /* these are display codes consume and continue */

+ } else {

+ *err =U_ILLEGAL_CHAR_FOUND;

+ /* reset */

+ *contextCharToUnicode=NO_CHAR_MARKER;

+ goto CALLBACK;

+ }

+ /* reset */

+ *contextCharToUnicode=NO_CHAR_MARKER;

+ continue;

+ } else if (*contextCharToUnicode==EXT) {

+ /* check if sourceChar is in 0xA1-0xEE range */

+ if ((uint8_t) (EXT_RANGE_END - sourceChar) <= (EXT_RANGE_END - EXT_RANGE_BEGIN)) {

+ /* We currently support only Anudatta and Devanagari abbreviation sign */

+ if (sourceChar==0xBF || sourceChar == 0xB8) {

+ targetUniChar = (sourceChar==0xBF) ? DEV_ABBR_SIGN : DEV_ANUDATTA;

+ /* find out if the mapping is valid in this state */

+ if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) {

+ *contextCharToUnicode= NO_CHAR_MARKER;

+ /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */

+ if (data->prevToUnicodeStatus) {

+ WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);

+ data->prevToUnicodeStatus = 0x0000;

+ }

+ /* write to target */

+ WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err);

+ continue;

+ }

+ /* byte unit is unassigned */

+ targetUniChar = missingCharMarker;

+ *err= U_INVALID_CHAR_FOUND;

+ } else {

+ /* only 0xA1 - 0xEE are legal after EXT char */

+ *contextCharToUnicode= NO_CHAR_MARKER;

+ *err = U_ILLEGAL_CHAR_FOUND;

+ }

+ goto CALLBACK;

+ } else if (*contextCharToUnicode==ISCII_INV) {

+ if (sourceChar==ISCII_HALANT) {

+ targetUniChar = 0x0020; /* replace with space accoding to Indic FAQ */

+ } else {

+ targetUniChar = ZWJ;

+ }

+ /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */

+ if (data->prevToUnicodeStatus) {

+ WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);

+ data->prevToUnicodeStatus = 0x0000;

+ }

+ /* write to target */

+ WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err);

+ /* reset */

+ *contextCharToUnicode=NO_CHAR_MARKER;

+ }

+ /* look at the pre-context and perform special processing */

+ switch (sourceChar) {

+ case ISCII_INV:

+ case EXT: /*falls through*/

+ case ATR:

+ *contextCharToUnicode = (UChar)sourceChar;

+ if (*toUnicodeStatus != missingCharMarker) {

+ /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */

+ if (data->prevToUnicodeStatus) {

+ WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);

+ data->prevToUnicodeStatus = 0x0000;

+ }

+ WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,data->currentDeltaToUnicode,err);

+ *toUnicodeStatus = missingCharMarker;

+ }

+ continue;

+ case ISCII_DANDA:

+ /* handle double danda*/

+ if (*contextCharToUnicode== ISCII_DANDA) {

+ targetUniChar = DOUBLE_DANDA;

+ /* clear the context */

+ *contextCharToUnicode = NO_CHAR_MARKER;

+ *toUnicodeStatus = missingCharMarker;

+ } else {

+ GET_MAPPING(sourceChar,targetUniChar,data);

+ *contextCharToUnicode = sourceChar;

+ }

+ break;

+ case ISCII_HALANT:

+ /* handle explicit halant */

+ if (*contextCharToUnicode == ISCII_HALANT) {

+ targetUniChar = ZWNJ;

+ /* clear the context */

+ *contextCharToUnicode = NO_CHAR_MARKER;

+ } else {

+ GET_MAPPING(sourceChar,targetUniChar,data);

+ *contextCharToUnicode = sourceChar;

+ }

+ break;

+ case 0x0A:

+ /* fall through */

+ case 0x0D:

+ data->resetToDefaultToUnicode = TRUE;

+ GET_MAPPING(sourceChar,targetUniChar,data)

+ ;

+ *contextCharToUnicode = sourceChar;

+ break;

+ case ISCII_VOWEL_SIGN_E:

+ i=1;

+ found=FALSE;

+ for (; i<vowelSignESpecialCases[0][0]; i++) {

+ if (vowelSignESpecialCases[i][0]==(uint8_t)*contextCharToUnicode) {

+ targetUniChar=vowelSignESpecialCases[i][1];

+ found=TRUE;

+ break;

+ }

+ if (found) {

+ /* find out if the mapping is valid in this state */

+ if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) {

+ /*targetUniChar += data->currentDeltaToUnicode ;*/

+ *contextCharToUnicode= NO_CHAR_MARKER;

+ *toUnicodeStatus = missingCharMarker;

+ break;

+ }

+ GET_MAPPING(sourceChar,targetUniChar,data);

+ *contextCharToUnicode = sourceChar;

+ break;

+ case ISCII_NUKTA:

+ /* handle soft halant */

+ if (*contextCharToUnicode == ISCII_HALANT) {

+ targetUniChar = ZWJ;

+ /* clear the context */

+ *contextCharToUnicode = NO_CHAR_MARKER;

+ break;

+ } else if (data->currentDeltaToUnicode == PNJ_DELTA && data->contextCharToUnicode == 0xc0) {

+ /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */

+ if (data->prevToUnicodeStatus) {

+ WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);

+ data->prevToUnicodeStatus = 0x0000;

+ }

+ /* We got here because ISCII_NUKTA was preceded by 0xc0 and we are converting Gurmukhi.

+ * In that case we must convert (0xc0 0xe9) to (\u0a5c\u0a4d\u0a39).

+ */

+ targetUniChar = PNJ_RRA;

+ WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err);

+ if (U_SUCCESS(*err)) {

+ targetUniChar = PNJ_SIGN_VIRAMA;

+ WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err);

+ if (U_SUCCESS(*err)) {

+ targetUniChar = PNJ_HA;

+ WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err);

+ } else {

+ args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_HA;

+ }

+ } else {

+ args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_SIGN_VIRAMA;

+ args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_HA;

+ }

+ *toUnicodeStatus = missingCharMarker;

+ data->contextCharToUnicode = NO_CHAR_MARKER;

+ continue;

+ } else {

+ /* try to handle <CHAR> + ISCII_NUKTA special mappings */

+ i=1;

+ found =FALSE;

+ for (; i<nuktaSpecialCases[0][0]; i++) {

+ if (nuktaSpecialCases[i][0]==(uint8_t)

+ *contextCharToUnicode) {

+ targetUniChar=nuktaSpecialCases[i][1];

+ found =TRUE;

+ break;

+ }

+ if (found) {

+ /* find out if the mapping is valid in this state */

+ if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) {

+ /*targetUniChar += data->currentDeltaToUnicode ;*/

+ *contextCharToUnicode= NO_CHAR_MARKER;

+ *toUnicodeStatus = missingCharMarker;

+ if (data->currentDeltaToUnicode == PNJ_DELTA) {

+ /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */

+ if (data->prevToUnicodeStatus) {

+ WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);

+ data->prevToUnicodeStatus = 0x0000;

+ }

+ WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err);

+ continue;

+ }

+ break;

+ }

+ /* else fall through to default */

+ }

+ /* else fall through to default */

+ }

+ default:GET_MAPPING(sourceChar,targetUniChar,data)

+ ;

+ *contextCharToUnicode = sourceChar;

+ break;

+ }

+ if (*toUnicodeStatus != missingCharMarker) {

+ /* Check to make sure that consonant clusters are handled correct for Gurmukhi script. */

+ if (data->currentDeltaToUnicode == PNJ_DELTA && data->prevToUnicodeStatus != 0 && uset_contains(PNJ_CONSONANT_SET, data->prevToUnicodeStatus) &&

+ (*toUnicodeStatus + PNJ_DELTA) == PNJ_SIGN_VIRAMA && (targetUniChar + PNJ_DELTA) == data->prevToUnicodeStatus) {

+ /* Consonant clusters C + HALANT + C should be encoded as ADHAK + C */

+ offset = (int)(source-args->source - 3);

+ tempTargetUniChar = PNJ_ADHAK; /* This is necessary to avoid some compiler warnings. */

+ WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,offset,tempTargetUniChar,0,err);

+ WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,offset,data->prevToUnicodeStatus,0,err);

+ data->prevToUnicodeStatus = 0x0000; /* reset the previous unicode code point */

+ *toUnicodeStatus = missingCharMarker;

+ continue;

+ } else {

+ /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */

+ if (data->prevToUnicodeStatus) {

+ WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);

+ data->prevToUnicodeStatus = 0x0000;

+ }

+ /* Check to make sure that Bindi and Tippi are handled correctly for Gurmukhi script.

+ * If 0xA2 is preceded by a codepoint in the PNJ_BINDI_TIPPI_SET then the target codepoint should be Tippi instead of Bindi.

+ */

+ if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_BINDI && uset_contains(PNJ_BINDI_TIPPI_SET, (*toUnicodeStatus + PNJ_DELTA))) {

+ targetUniChar = PNJ_TIPPI - PNJ_DELTA;

+ WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,PNJ_DELTA,err);

+ } else if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_SIGN_VIRAMA && uset_contains(PNJ_CONSONANT_SET, (*toUnicodeStatus + PNJ_DELTA))) {

+ /* Store the current toUnicodeStatus code point for later handling of consonant cluster in Gurmukhi. */

+ data->prevToUnicodeStatus = *toUnicodeStatus + PNJ_DELTA;

+ } else {

+ /* write the previously mapped codepoint */

+ WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,data->currentDeltaToUnicode,err);

+ }

+ *toUnicodeStatus = missingCharMarker;

+ }

+ if (targetUniChar != missingCharMarker) {

+ /* now save the targetUniChar for delayed write */

+ *toUnicodeStatus = (UChar) targetUniChar;

+ if (data->resetToDefaultToUnicode==TRUE) {

+ data->currentDeltaToUnicode = data->defDeltaToUnicode;

+ data->currentMaskToUnicode = data->defMaskToUnicode;

+ data->resetToDefaultToUnicode=FALSE;

+ }

+ } else {

+ /* we reach here only if targetUniChar == missingCharMarker

+ * so assign codes to reason and err

+ */

+ *err = U_INVALID_CHAR_FOUND;

+CALLBACK:

+ args->converter->toUBytes[0] = (uint8_t) sourceChar;

+ args->converter->toULength = 1;

+ break;

+ }

+ } else {

+ *err =U_BUFFER_OVERFLOW_ERROR;

+ break;

+ }

+ if (U_SUCCESS(*err) && args->flush && source == sourceLimit) {

+ /* end of the input stream */

+ UConverter *cnv = args->converter;

+ if (*contextCharToUnicode==ATR || *contextCharToUnicode==EXT || *contextCharToUnicode==ISCII_INV) {

+ /* set toUBytes[] */

+ cnv->toUBytes[0] = (uint8_t)*contextCharToUnicode;

+ cnv->toULength = 1;

+ /* avoid looping on truncated sequences */

+ *contextCharToUnicode = NO_CHAR_MARKER;

+ } else {

+ cnv->toULength = 0;

+ }

+ if (*toUnicodeStatus != missingCharMarker) {

+ /* output a remaining target character */

+ WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source - args->source -1),*toUnicodeStatus,data->currentDeltaToUnicode,err);

+ *toUnicodeStatus = missingCharMarker;

+ }

+ args->target = target;

+ args->source = source;

+/* structure for SafeClone calculations */

+struct cloneISCIIStruct {

+ UConverter cnv;

+ UConverterDataISCII mydata;

+};

+static UConverter *

+_ISCII_SafeClone(const UConverter *cnv,

+ void *stackBuffer,

+ int32_t *pBufferSize,

+ UErrorCode *status)

+ struct cloneISCIIStruct * localClone;

+ int32_t bufferSizeNeeded = sizeof(struct cloneISCIIStruct);

+ if (U_FAILURE(*status)) {

+ return 0;

+ }

+ if (*pBufferSize == 0) { /* 'preflighting' request - set needed size into *pBufferSize */

+ *pBufferSize = bufferSizeNeeded;

+ return 0;

+ }

+ localClone = (struct cloneISCIIStruct *)stackBuffer;

+ /* ucnv.c/ucnv_safeClone() copied the main UConverter already */

+ uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataISCII));

+ localClone->cnv.extraInfo = &localClone->mydata;

+ localClone->cnv.isExtraLocal = TRUE;

+ return &localClone->cnv;

+static void

+_ISCIIGetUnicodeSet(const UConverter *cnv,

+ const USetAdder *sa,

+ UConverterUnicodeSet which,

+ UErrorCode *pErrorCode)

+ int32_t idx, script;

+ uint8_t mask;

+ /* Since all ISCII versions allow switching to other ISCII

+ scripts, we add all roundtrippable characters to this set. */

+ sa->addRange(sa->set, 0, ASCII_END);

+ for (script = DEVANAGARI; script <= MALAYALAM; script++) {

+ mask = (uint8_t)(lookupInitialData[script].maskEnum);

+ for (idx = 0; idx < DELTA; idx++) {

+ /* added check for TELUGU character */

+ if ((validityTable[idx] & mask) || (script==TELUGU && idx==0x31)) {

+ sa->add(sa->set, idx + (script * DELTA) + INDIC_BLOCK_BEGIN);

+ }

+ sa->add(sa->set, DANDA);

+ sa->add(sa->set, DOUBLE_DANDA);

+ sa->add(sa->set, ZWNJ);

+ sa->add(sa->set, ZWJ);

+static const UConverterImpl _ISCIIImpl={

+ UCNV_ISCII,

+ NULL,

+ _ISCIIOpen,

+ _ISCIIClose,

+ _ISCIIReset,

+ UConverter_toUnicode_ISCII_OFFSETS_LOGIC,

+ UConverter_fromUnicode_ISCII_OFFSETS_LOGIC,

+ NULL,

+ _ISCIIgetName,

+ NULL,

+ _ISCII_SafeClone,

+ _ISCIIGetUnicodeSet

+};

+static const UConverterStaticData _ISCIIStaticData={

+ sizeof(UConverterStaticData),

+ "ISCII",

+ 0,

+ UCNV_IBM,

+ UCNV_ISCII,

+ 1,

+ 4,

+ { 0x1a, 0, 0, 0 },

+ 0x1,

+ FALSE,

+ 0x0,

+ { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, /* reserved */

+};

+const UConverterSharedData _ISCIIData={

+ sizeof(UConverterSharedData),

+ ~((uint32_t) 0),

+ NULL,

+ &_ISCIIStaticData,

+ FALSE,

+ &_ISCIIImpl,

+ 0

+};

+#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */

Property changes on: icu46/source/common/ucnvisci.c

___________________________________________________________________

Added: svn:eol-style

+ LF

« no previous file with comments | « icu46/source/common/ucnvhz.c ('k') | icu46/source/common/ucnvlat1.c » ('j') | no next file with comments »