icu46/source/tools/genrb/read.c - Issue 5516007: Check in the pristine copy of ICU 4.6...

Side by Side Diff: icu46/source/tools/genrb/read.c

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 /*

	2 *******************************************************************************

	3 *

	4 * Copyright (C) 1998-2009, International Business Machines

	5 * Corporation and others. All Rights Reserved.

	6 *

	7 *******************************************************************************

	8 *

	9 * File read.c

	10 *

	11 * Modification History:

	12 *

	13 * Date Name Description

	14 * 05/26/99 stephen Creation.

	15 * 5/10/01 Ram removed ustdio dependency

	16 *******************************************************************************

	17 */

	18

	19 #include "read.h"

	20 #include "errmsg.h"

	21 #include "unicode/ustring.h"

	22

	23 #define OPENBRACE 0x007B

	24 #define CLOSEBRACE 0x007D

	25 #define COMMA 0x002C

	26 #define QUOTE 0x0022

	27 #define ESCAPE 0x005C

	28 #define SLASH 0x002F

	29 #define ASTERISK 0x002A

	30 #define SPACE 0x0020

	31 #define COLON 0x003A

	32 #define BADBOM 0xFFFE

	33 #define CR 0x000D

	34 #define LF 0x000A

	35

	36 static int32_t lineCount;

	37

	38 /* Protos */

	39 static enum ETokenType getStringToken(UCHARBUF *buf,

	40 UChar32 initialChar,

	41 struct UString *token,

	42 UErrorCode *status);

	43

	44 static UChar32 getNextChar (UCHARBUF buf, UBool skipwhite, struct USt ring token, UErrorCode *status);

	45 static void seekUntilNewline (UCHARBUF buf, struct UString token, UErr orCode *status);

	46 static void seekUntilEndOfComment (UCHARBUF buf, struct UString token, UErr orCode *status);

	47 static UBool isWhitespace (UChar32 c);

	48 static UBool isNewline (UChar32 c);

	49

	50 void resetLineNumber() {

	51 lineCount = 1;

	52 }

	53

	54 /* Read and return the next token from the stream. If the token is of

	55 type eString, fill in the token parameter with the token. If the

	56 token is eError, then the status parameter will contain the

	57 specific error. This will be eItemNotFound at the end of file,

	58 indicating that all tokens have been returned. This method will

	59 never return eString twice in a row; instead, multiple adjacent

	60 string tokens will be merged into one, with no intervening

	61 space. */

	62 enum ETokenType getNextToken(UCHARBUF* buf,

	63 struct UString *token,

	64 uint32_t linenumber, / out: linenumber of token * /

	65 struct UString *comment,

	66 UErrorCode *status) {

	67 enum ETokenType result;

	68 UChar32 c;

	69

	70 if (U_FAILURE(*status)) {

	71 return TOK_ERROR;

	72 }

	73

	74 /* Skip whitespace */

	75 c = getNextChar(buf, TRUE, comment, status);

	76

	77 if (U_FAILURE(*status)) {

	78 return TOK_ERROR;

	79 }

	80

	81 *linenumber = lineCount;

	82

	83 switch(c) {

	84 case BADBOM:

	85 return TOK_ERROR;

	86 case OPENBRACE:

	87 return TOK_OPEN_BRACE;

	88 case CLOSEBRACE:

	89 return TOK_CLOSE_BRACE;

	90 case COMMA:

	91 return TOK_COMMA;

	92 case U_EOF:

	93 return TOK_EOF;

	94 case COLON:

	95 return TOK_COLON;

	96

	97 default:

	98 result = getStringToken(buf, c, token, status);

	99 }

	100

	101 *linenumber = lineCount;

	102 return result;

	103 }

	104

	105 /* Copy a string token into the given UnicodeString. Upon entry, we

	106 have already read the first character of the string token, which is

	107 not a whitespace character (but may be a QUOTE or ESCAPE). This

	108 function reads all subsequent characters that belong with this

	109 string, and copy them into the token parameter. The other

	110 important, and slightly convoluted purpose of this function is to

	111 merge adjacent strings. It looks forward a bit, and if the next

	112 non comment, non whitespace item is a string, it reads it in as

	113 well. If two adjacent strings are quoted, they are merged without

	114 intervening space. Otherwise a single SPACE character is

	115 inserted. */

	116 static enum ETokenType getStringToken(UCHARBUF* buf,

	117 UChar32 initialChar,

	118 struct UString *token,

	119 UErrorCode *status) {

	120 UBool lastStringWasQuoted;

	121 UChar32 c;

	122 UChar target[3] = { '\0' };

	123 UChar *pTarget = target;

	124 int len=0;

	125 UBool isFollowingCharEscaped=FALSE;

	126 UBool isNLUnescaped = FALSE;

	127 UChar32 prevC=0;

	128

	129 /* We are guaranteed on entry that initialChar is not a whitespace

	130 character. If we are at the EOF, or have some other problem, it

	131 doesn't matter; we still want to validly return the initialChar

	132 (if nothing else) as a string token. */

	133

	134 if (U_FAILURE(*status)) {

	135 return TOK_ERROR;

	136 }

	137

	138 /* setup */

	139 lastStringWasQuoted = FALSE;

	140 c = initialChar;

	141 ustr_setlen(token, 0, status);

	142

	143 if (U_FAILURE(*status)) {

	144 return TOK_ERROR;

	145 }

	146

	147 for (;;) {

	148 if (c == QUOTE) {

	149 if (!lastStringWasQuoted && token->fLength > 0) {

	150 ustr_ucat(token, SPACE, status);

	151

	152 if (U_FAILURE(*status)) {

	153 return TOK_ERROR;

	154 }

	155 }

	156

	157 lastStringWasQuoted = TRUE;

	158

	159 for (;;) {

	160 c = ucbuf_getc(buf,status);

	161

	162 /* EOF reached */

	163 if (c == U_EOF) {

	164 return TOK_EOF;

	165 }

	166

	167 /* Unterminated quoted strings */

	168 if (U_FAILURE(*status)) {

	169 return TOK_ERROR;

	170 }

	171

	172 if (c == QUOTE && !isFollowingCharEscaped) {

	173 break;

	174 }

	175

	176 if (c == ESCAPE && !isFollowingCharEscaped) {

	177 pTarget = target;

	178 c = unescape(buf, status);

	179

	180 if (c == U_ERR) {

	181 return TOK_ERROR;

	182 }

	183 if(c == CR \|\| c == LF){

	184 isNLUnescaped = TRUE;

	185 }

	186 }

	187

	188 if(c==ESCAPE && !isFollowingCharEscaped){

	189 isFollowingCharEscaped = TRUE;

	190 }else{

	191 U_APPEND_CHAR32(c, pTarget,len);

	192 pTarget = target;

	193 ustr_uscat(token, pTarget,len, status);

	194 isFollowingCharEscaped = FALSE;

	195 len=0;

	196 if(c == CR \|\| c == LF){

	197 if(isNLUnescaped == FALSE && prevC!=CR){

	198 lineCount++;

	199 }

	200 isNLUnescaped = FALSE;

	201 }

	202 }

	203

	204 if (U_FAILURE(*status)) {

	205 return TOK_ERROR;

	206 }

	207 prevC = c;

	208 }

	209 } else {

	210 if (token->fLength > 0) {

	211 ustr_ucat(token, SPACE, status);

	212

	213 if (U_FAILURE(*status)) {

	214 return TOK_ERROR;

	215 }

	216 }

	217

	218 if(lastStringWasQuoted){

	219 if(getShowWarning()){

	220 warning(lineCount, "Mixing quoted and unquoted strings");

	221 }

	222 if(isStrict()){

	223 return TOK_ERROR;

	224 }

	225

	226 }

	227

	228 lastStringWasQuoted = FALSE;

	229

	230 /* if we reach here we are mixing

	231 * quoted and unquoted strings

	232 * warn in normal mode and error in

	233 * pedantic mode

	234 */

	235

	236 if (c == ESCAPE) {

	237 pTarget = target;

	238 c = unescape(buf, status);

	239

	240 /* EOF reached */

	241 if (c == U_EOF) {

	242 return TOK_ERROR;

	243 }

	244 }

	245

	246 U_APPEND_CHAR32(c, pTarget,len);

	247 pTarget = target;

	248 ustr_uscat(token, pTarget,len, status);

	249 len=0;

	250

	251 if (U_FAILURE(*status)) {

	252 return TOK_ERROR;

	253 }

	254

	255 for (;;) {

	256 /* DON'T skip whitespace */

	257 c = getNextChar(buf, FALSE, NULL, status);

	258

	259 /* EOF reached */

	260 if (c == U_EOF) {

	261 ucbuf_ungetc(c, buf);

	262 return TOK_STRING;

	263 }

	264

	265 if (U_FAILURE(*status)) {

	266 return TOK_STRING;

	267 }

	268

	269 if (c == QUOTE

	270 \|\| c == OPENBRACE

	271 \|\| c == CLOSEBRACE

	272 \|\| c == COMMA

	273 \|\| c == COLON) {

	274 ucbuf_ungetc(c, buf);

	275 break;

	276 }

	277

	278 if (isWhitespace(c)) {

	279 break;

	280 }

	281

	282 if (c == ESCAPE) {

	283 pTarget = target;

	284 c = unescape(buf, status);

	285

	286 if (c == U_ERR) {

	287 return TOK_ERROR;

	288 }

	289 }

	290

	291 U_APPEND_CHAR32(c, pTarget,len);

	292 pTarget = target;

	293 ustr_uscat(token, pTarget,len, status);

	294 len=0;

	295 if (U_FAILURE(*status)) {

	296 return TOK_ERROR;

	297 }

	298 }

	299 }

	300

	301 /* DO skip whitespace */

	302 c = getNextChar(buf, TRUE, NULL, status);

	303

	304 if (U_FAILURE(*status)) {

	305 return TOK_STRING;

	306 }

	307

	308 if (c == OPENBRACE \|\| c == CLOSEBRACE \|\| c == COMMA \|\| c == COLON) {

	309 ucbuf_ungetc(c, buf);

	310 return TOK_STRING;

	311 }

	312 }

	313 }

	314

	315 /* Retrieve the next character. If skipwhite is

	316 true, whitespace is skipped as well. */

	317 static UChar32 getNextChar(UCHARBUF* buf,

	318 UBool skipwhite,

	319 struct UString *token,

	320 UErrorCode *status) {

	321 UChar32 c, c2;

	322

	323 if (U_FAILURE(*status)) {

	324 return U_EOF;

	325 }

	326

	327 for (;;) {

	328 c = ucbuf_getc(buf,status);

	329

	330 if (c == U_EOF) {

	331 return U_EOF;

	332 }

	333

	334 if (skipwhite && isWhitespace(c)) {

	335 continue;

	336 }

	337

	338 /* This also handles the get() failing case */

	339 if (c != SLASH) {

	340 return c;

	341 }

	342

	343 c = ucbuf_getc(buf,status); /* "/c" */

	344

	345 if (c == U_EOF) {

	346 return U_EOF;

	347 }

	348

	349 switch (c) {

	350 case SLASH: /* "//" */

	351 seekUntilNewline(buf, NULL, status);

	352 break;

	353

	354 case ASTERISK: /* " / * " */

	355 c2 = ucbuf_getc(buf, status); /* "/ * c" */

	356 if(c2 == ASTERISK){ /* "/ * " /

	357 /* parse multi-line comment and store it in token*/

	358 seekUntilEndOfComment(buf, token, status);

	359 } else {

	360 ucbuf_ungetc(c2, buf); /* c2 is the non-asterisk following "/ " . Include c2 back in buffer. /

	361 seekUntilEndOfComment(buf, NULL, status);

	362 }

	363 break;

	364

	365 default:

	366 ucbuf_ungetc(c, buf); /* "/c" - put back the c */

	367 /* If get() failed this is a NOP */

	368 return SLASH;

	369 }

	370

	371 }

	372 }

	373

	374 static void seekUntilNewline(UCHARBUF* buf,

	375 struct UString *token,

	376 UErrorCode *status) {

	377 UChar32 c;

	378

	379 if (U_FAILURE(*status)) {

	380 return;

	381 }

	382

	383 do {

	384 c = ucbuf_getc(buf,status);

	385 /* add the char to token */

	386 if(token!=NULL){

	387 ustr_u32cat(token, c, status);

	388 }

	389 } while (!isNewline(c) && c != U_EOF && *status == U_ZERO_ERROR);

	390 }

	391

	392 static void seekUntilEndOfComment(UCHARBUF *buf,

	393 struct UString *token,

	394 UErrorCode *status) {

	395 UChar32 c, d;

	396 uint32_t line;

	397

	398 if (U_FAILURE(*status)) {

	399 return;

	400 }

	401

	402 line = lineCount;

	403

	404 do {

	405 c = ucbuf_getc(buf, status);

	406

	407 if (c == ASTERISK) {

	408 d = ucbuf_getc(buf, status);

	409

	410 if (d != SLASH) {

	411 ucbuf_ungetc(d, buf);

	412 } else {

	413 break;

	414 }

	415 }

	416 /* add the char to token */

	417 if(token!=NULL){

	418 ustr_u32cat(token, c, status);

	419 }

	420 /* increment the lineCount */

	421 isNewline(c);

	422

	423 } while (c != U_EOF && *status == U_ZERO_ERROR);

	424

	425 if (c == U_EOF) {

	426 *status = U_INVALID_FORMAT_ERROR;

	427 error(line, "unterminated comment detected");

	428 }

	429 }

	430

	431 UChar32 unescape(UCHARBUF *buf,

	432 UErrorCode *status) {

	433 if (U_FAILURE(*status)) {

	434 return U_EOF;

	435 }

	436

	437 /* We expect to be called after the ESCAPE has been seen, but

	438 * u_fgetcx needs an ESCAPE to do its magic. */

	439 ucbuf_ungetc(ESCAPE, buf);

	440

	441 return ucbuf_getcx32(buf, status);

	442 }

	443

	444 static UBool isWhitespace(UChar32 c) {

	445 switch (c) {

	446 /* ' ', '\t', '\n', '\r', 0x2029, 0xFEFF */

	447 case 0x000A:

	448 case 0x2029:

	449 lineCount++;

	450 case 0x000D:

	451 case 0x0020:

	452 case 0x0009:

	453 case 0xFEFF:

	454 return TRUE;

	455

	456 default:

	457 return FALSE;

	458 }

	459 }

	460

	461 static UBool isNewline(UChar32 c) {

	462 switch (c) {

	463 /* '\n', '\r', 0x2029 */

	464 case 0x000A:

	465 case 0x2029:

	466 lineCount++;

	467 case 0x000D:

	468 return TRUE;

	469

	470 default:

	471 return FALSE;

	472 }

	473 }

OLD	NEW

« no previous file with comments | « icu46/source/tools/genrb/read.h ('k') | icu46/source/tools/genrb/reslist.h » ('j') | no next file with comments »