icu46/source/tools/toolutil/uparse.c - Issue 5516007: Check in the pristine copy of ICU 4.6...

Side by Side Diff: icu46/source/tools/toolutil/uparse.c

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 /*

	2 *******************************************************************************

	3 *

	4 * Copyright (C) 2000-2010, International Business Machines

	5 * Corporation and others. All Rights Reserved.

	6 *

	7 *******************************************************************************

	8 * file name: uparse.c

	9 * encoding: US-ASCII

	10 * tab size: 8 (not used)

	11 * indentation:4

	12 *

	13 * created on: 2000apr18

	14 * created by: Markus W. Scherer

	15 *

	16 * This file provides a parser for files that are delimited by one single

	17 * character like ';' or TAB. Example: the Unicode Character Properties files

	18 * like UnicodeData.txt are semicolon-delimited.

	19 */

	20

	21 #include "unicode/utypes.h"

	22 #include "cstring.h"

	23 #include "filestrm.h"

	24 #include "uparse.h"

	25 #include "unicode/uchar.h"

	26 #include "unicode/ustring.h"

	27 #include "ustr_imp.h"

	28

	29 #include <stdio.h>

	30

	31 U_CAPI const char * U_EXPORT2

	32 u_skipWhitespace(const char *s) {

	33 while(U_IS_INV_WHITESPACE(*s)) {

	34 ++s;

	35 }

	36 return s;

	37 }

	38

	39 U_CAPI char * U_EXPORT2

	40 u_rtrim(char *s) {

	41 char *end=uprv_strchr(s, 0);

	42 while(s<end && U_IS_INV_WHITESPACE(*(end-1))) {

	43 *--end = 0;

	44 }

	45 return end;

	46 }

	47

	48 /*

	49 * If the string starts with # @missing: then return the pointer to the

	50 * following non-whitespace character.

	51 * Otherwise return the original pointer.

	52 * Unicode 5.0 adds such lines in some data files to document

	53 * default property values.

	54 * Poor man's regex for variable amounts of white space.

	55 */

	56 static const char *

	57 getMissingLimit(const char *s) {

	58 const char *s0=s;

	59 if(

	60 *(s=u_skipWhitespace(s))=='#' &&

	61 *(s=u_skipWhitespace(s+1))=='@' &&

	62 0==strncmp((s=u_skipWhitespace(s+1)), "missing", 7) &&

	63 *(s=u_skipWhitespace(s+7))==':'

	64 ) {

	65 return u_skipWhitespace(s+1);

	66 } else {

	67 return s0;

	68 }

	69 }

	70

	71 U_CAPI void U_EXPORT2

	72 u_parseDelimitedFile(const char *filename, char delimiter,

	73 char *fields[][2], int32_t fieldCount,

	74 UParseLineFn lineFn, void context,

	75 UErrorCode *pErrorCode) {

	76 FileStream *file;

	77 char line[300];

	78 char start, limit;

	79 int32_t i, length;

	80

	81 if(U_FAILURE(*pErrorCode)) {

	82 return;

	83 }

	84

	85 if(fields==NULL \|\| lineFn==NULL \|\| fieldCount<=0) {

	86 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

	87 return;

	88 }

	89

	90 if(filename==NULL \|\| filename==0 \|\| (filename=='-' && filename[1]==0)) {

	91 filename=NULL;

	92 file=T_FileStream_stdin();

	93 } else {

	94 file=T_FileStream_open(filename, "r");

	95 }

	96 if(file==NULL) {

	97 *pErrorCode=U_FILE_ACCESS_ERROR;

	98 return;

	99 }

	100

	101 while(T_FileStream_readLine(file, line, sizeof(line))!=NULL) {

	102 /* remove trailing newline characters */

	103 length=(int32_t)(u_rtrim(line)-line);

	104

	105 /*

	106 * detect a line with # @missing:

	107 * start parsing after that, or else from the beginning of the line

	108 * set the default warning for @missing lines

	109 */

	110 start=(char *)getMissingLimit(line);

	111 if(start==line) {

	112 *pErrorCode=U_ZERO_ERROR;

	113 } else {

	114 *pErrorCode=U_USING_DEFAULT_WARNING;

	115 }

	116

	117 /* skip this line if it is empty or a comment */

	118 if(start==0 \|\| start=='#') {

	119 continue;

	120 }

	121

	122 /* remove in-line comments */

	123 limit=uprv_strchr(start, '#');

	124 if(limit!=NULL) {

	125 /* get white space before the pound sign */

	126 while(limit>start && U_IS_INV_WHITESPACE(*(limit-1))) {

	127 --limit;

	128 }

	129

	130 /* truncate the line */

	131 *limit=0;

	132 }

	133

	134 /* skip lines with only whitespace */

	135 if(u_skipWhitespace(start)[0]==0) {

	136 continue;

	137 }

	138

	139 /* for each field, call the corresponding field function */

	140 for(i=0; i<fieldCount; ++i) {

	141 /* set the limit pointer of this field */

	142 limit=start;

	143 while(limit!=delimiter && limit!=0) {

	144 ++limit;

	145 }

	146

	147 /* set the field start and limit in the fields array */

	148 fields[i][0]=start;

	149 fields[i][1]=limit;

	150

	151 /* set start to the beginning of the next field, if any */

	152 start=limit;

	153 if(*start!=0) {

	154 ++start;

	155 } else if(i+1<fieldCount) {

	156 *pErrorCode=U_PARSE_ERROR;

	157 limit=line+length;

	158 i=fieldCount;

	159 break;

	160 }

	161 }

	162

	163 /* error in a field function? */

	164 if(U_FAILURE(*pErrorCode)) {

	165 break;

	166 }

	167

	168 /* call the field function */

	169 lineFn(context, fields, fieldCount, pErrorCode);

	170 if(U_FAILURE(*pErrorCode)) {

	171 break;

	172 }

	173 }

	174

	175 if(filename!=NULL) {

	176 T_FileStream_close(file);

	177 }

	178 }

	179

	180 /*

	181 * parse a list of code points

	182 * store them as a UTF-32 string in dest[destCapacity]

	183 * return the number of code points

	184 */

	185 U_CAPI int32_t U_EXPORT2

	186 u_parseCodePoints(const char *s,

	187 uint32_t *dest, int32_t destCapacity,

	188 UErrorCode *pErrorCode) {

	189 char *end;

	190 uint32_t value;

	191 int32_t count;

	192

	193 if(U_FAILURE(*pErrorCode)) {

	194 return 0;

	195 }

	196 if(s==NULL \|\| destCapacity<0 \|\| (destCapacity>0 && dest==NULL)) {

	197 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

	198 return 0;

	199 }

	200

	201 count=0;

	202 for(;;) {

	203 s=u_skipWhitespace(s);

	204 if(s==';' \|\| s==0) {

	205 return count;

	206 }

	207

	208 /* read one code point */

	209 value=(uint32_t)uprv_strtoul(s, &end, 16);

	210 if(end<=s \|\| (!U_IS_INV_WHITESPACE(end) && end!=';' && *end!=0) \|\| val ue>=0x110000) {

	211 *pErrorCode=U_PARSE_ERROR;

	212 return 0;

	213 }

	214

	215 /* append it to the destination array */

	216 if(count<destCapacity) {

	217 dest[count++]=value;

	218 } else {

	219 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;

	220 }

	221

	222 /* go to the following characters */

	223 s=end;

	224 }

	225 }

	226

	227 /*

	228 * parse a list of code points

	229 * store them as a string in dest[destCapacity]

	230 * set the first code point in *pFirst

	231 * @return The length of the string in numbers of UChars.

	232 */

	233 U_CAPI int32_t U_EXPORT2

	234 u_parseString(const char *s,

	235 UChar *dest, int32_t destCapacity,

	236 uint32_t *pFirst,

	237 UErrorCode *pErrorCode) {

	238 char *end;

	239 uint32_t value;

	240 int32_t destLength;

	241

	242 if(U_FAILURE(*pErrorCode)) {

	243 return 0;

	244 }

	245 if(s==NULL \|\| destCapacity<0 \|\| (destCapacity>0 && dest==NULL)) {

	246 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

	247 }

	248

	249 if(pFirst!=NULL) {

	250 *pFirst=0xffffffff;

	251 }

	252

	253 destLength=0;

	254 for(;;) {

	255 s=u_skipWhitespace(s);

	256 if(s==';' \|\| s==0) {

	257 if(destLength<destCapacity) {

	258 dest[destLength]=0;

	259 } else if(destLength==destCapacity) {

	260 *pErrorCode=U_STRING_NOT_TERMINATED_WARNING;

	261 } else {

	262 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;

	263 }

	264 return destLength;

	265 }

	266

	267 /* read one code point */

	268 value=(uint32_t)uprv_strtoul(s, &end, 16);

	269 if(end<=s \|\| (!U_IS_INV_WHITESPACE(end) && end!=';' && *end!=0) \|\| val ue>=0x110000) {

	270 *pErrorCode=U_PARSE_ERROR;

	271 return 0;

	272 }

	273

	274 /* store the first code point */

	275 if(pFirst!=NULL) {

	276 *pFirst=value;

	277 pFirst=NULL;

	278 }

	279

	280 /* append it to the destination array */

	281 if((destLength+U16_LENGTH(value))<=destCapacity) {

	282 U16_APPEND_UNSAFE(dest, destLength, value);

	283 } else {

	284 destLength+=U16_LENGTH(value);

	285 }

	286

	287 /* go to the following characters */

	288 s=end;

	289 }

	290 }

	291

	292 /* read a range like start or start..end */

	293 U_CAPI int32_t U_EXPORT2

	294 u_parseCodePointRangeAnyTerminator(const char *s,

	295 uint32_t pStart, uint32_t pEnd,

	296 const char **terminator,

	297 UErrorCode *pErrorCode) {

	298 char *end;

	299 uint32_t value;

	300

	301 if(U_FAILURE(*pErrorCode)) {

	302 return 0;

	303 }

	304 if(s==NULL \|\| pStart==NULL \|\| pEnd==NULL) {

	305 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

	306 return 0;

	307 }

	308

	309 /* read the start code point */

	310 s=u_skipWhitespace(s);

	311 value=(uint32_t)uprv_strtoul(s, &end, 16);

	312 if(end<=s \|\| value>=0x110000) {

	313 *pErrorCode=U_PARSE_ERROR;

	314 return 0;

	315 }

	316 pStart=pEnd=value;

	317

	318 /* is there a "..end"? */

	319 s=u_skipWhitespace(end);

	320 if(*s!='.' \|\| s[1]!='.') {

	321 *terminator=end;

	322 return 1;

	323 }

	324 s=u_skipWhitespace(s+2);

	325

	326 /* read the end code point */

	327 value=(uint32_t)uprv_strtoul(s, &end, 16);

	328 if(end<=s \|\| value>=0x110000) {

	329 *pErrorCode=U_PARSE_ERROR;

	330 return 0;

	331 }

	332 *pEnd=value;

	333

	334 /* is this a valid range? */

	335 if(value<*pStart) {

	336 *pErrorCode=U_PARSE_ERROR;

	337 return 0;

	338 }

	339

	340 *terminator=end;

	341 return value-*pStart+1;

	342 }

	343

	344 U_CAPI int32_t U_EXPORT2

	345 u_parseCodePointRange(const char *s,

	346 uint32_t pStart, uint32_t pEnd,

	347 UErrorCode *pErrorCode) {

	348 const char *terminator;

	349 int32_t rangeLength=

	350 u_parseCodePointRangeAnyTerminator(s, pStart, pEnd, &terminator, pErrorC ode);

	351 if(U_SUCCESS(*pErrorCode)) {

	352 terminator=u_skipWhitespace(terminator);

	353 if(terminator!=';' && terminator!=0) {

	354 *pErrorCode=U_PARSE_ERROR;

	355 return 0;

	356 }

	357 }

	358 return rangeLength;

	359 }

	360

	361 U_CAPI int32_t U_EXPORT2

	362 u_parseUTF8(const char source, int32_t sLen, char dest, int32_t destCapacity, UErrorCode *status) {

	363 const char *read = source;

	364 int32_t i = 0;

	365 unsigned int value = 0;

	366 if(sLen == -1) {

	367 sLen = (int32_t)strlen(source);

	368 }

	369

	370 while(read < source+sLen) {

	371 sscanf(read, "%2x", &value);

	372 if(i < destCapacity) {

	373 dest[i] = (char)value;

	374 }

	375 i++;

	376 read += 2;

	377 }

	378 return u_terminateChars(dest, destCapacity, i, status);

	379 }

OLD	NEW

« no previous file with comments | « icu46/source/tools/toolutil/uparse.h ('k') | icu46/source/tools/toolutil/writesrc.h » ('j') | no next file with comments »