| Index: icu46/source/tools/toolutil/uparse.h
|
| ===================================================================
|
| --- icu46/source/tools/toolutil/uparse.h (revision 0)
|
| +++ icu46/source/tools/toolutil/uparse.h (revision 0)
|
| @@ -0,0 +1,151 @@
|
| +/*
|
| +*******************************************************************************
|
| +*
|
| +* Copyright (C) 2000-2010, International Business Machines
|
| +* Corporation and others. All Rights Reserved.
|
| +*
|
| +*******************************************************************************
|
| +* file name: uparse.h
|
| +* encoding: US-ASCII
|
| +* tab size: 8 (not used)
|
| +* indentation:4
|
| +*
|
| +* created on: 2000apr18
|
| +* created by: Markus W. Scherer
|
| +*
|
| +* This file provides a parser for files that are delimited by one single
|
| +* character like ';' or TAB. Example: the Unicode Character Properties files
|
| +* like UnicodeData.txt are semicolon-delimited.
|
| +*/
|
| +
|
| +#ifndef __UPARSE_H__
|
| +#define __UPARSE_H__
|
| +
|
| +#include "unicode/utypes.h"
|
| +
|
| +/**
|
| + * Is c an invariant-character whitespace?
|
| + * @param c invariant character
|
| + */
|
| +#define U_IS_INV_WHITESPACE(c) ((c)==' ' || (c)=='\t' || (c)=='\r' || (c)=='\n')
|
| +
|
| +U_CDECL_BEGIN
|
| +
|
| +/**
|
| + * Skip space ' ' and TAB '\t' characters.
|
| + *
|
| + * @param s Pointer to characters.
|
| + * @return Pointer to first character at or after s that is not a space or TAB.
|
| + */
|
| +U_CAPI const char * U_EXPORT2
|
| +u_skipWhitespace(const char *s);
|
| +
|
| +/**
|
| + * Trim whitespace (including line endings) from the end of the string.
|
| + *
|
| + * @param s Pointer to the string.
|
| + * @return Pointer to the new end of the string.
|
| + */
|
| +U_CAPI char * U_EXPORT2
|
| +u_rtrim(char *s);
|
| +
|
| +/** Function type for u_parseDelimitedFile(). */
|
| +typedef void U_CALLCONV
|
| +UParseLineFn(void *context,
|
| + char *fields[][2],
|
| + int32_t fieldCount,
|
| + UErrorCode *pErrorCode);
|
| +
|
| +/**
|
| + * Parser for files that are similar to UnicodeData.txt:
|
| + * This function opens the file and reads it line by line. It skips empty lines
|
| + * and comment lines that start with a '#'.
|
| + * All other lines are separated into fields with one delimiter character
|
| + * (semicolon for Unicode Properties files) between two fields. The last field in
|
| + * a line does not need to be terminated with a delimiter.
|
| + *
|
| + * For each line, after segmenting it, a line function is called.
|
| + * It gets passed the array of field start and limit pointers that is
|
| + * passed into this parser and filled by it for each line.
|
| + * For each field i of the line, the start pointer in fields[i][0]
|
| + * points to the beginning of the field, while the limit pointer in fields[i][1]
|
| + * points behind the field, i.e., to the delimiter or the line end.
|
| + *
|
| + * The context parameter of the line function is
|
| + * the same as the one for the parse function.
|
| + *
|
| + * The line function may modify the contents of the fields including the
|
| + * limit characters.
|
| + *
|
| + * If the file cannot be opened, or there is a parsing error or a field function
|
| + * sets *pErrorCode, then the parser returns with *pErrorCode set to an error code.
|
| + */
|
| +U_CAPI void U_EXPORT2
|
| +u_parseDelimitedFile(const char *filename, char delimiter,
|
| + char *fields[][2], int32_t fieldCount,
|
| + UParseLineFn *lineFn, void *context,
|
| + UErrorCode *pErrorCode);
|
| +
|
| +/**
|
| + * Parse a string of code points like 0061 0308 0300.
|
| + * s must end with either ';' or NUL.
|
| + *
|
| + * @return Number of code points.
|
| + */
|
| +U_CAPI int32_t U_EXPORT2
|
| +u_parseCodePoints(const char *s,
|
| + uint32_t *dest, int32_t destCapacity,
|
| + UErrorCode *pErrorCode);
|
| +
|
| +/**
|
| + * Parse a list of code points like 0061 0308 0300
|
| + * into a UChar * string.
|
| + * s must end with either ';' or NUL.
|
| + *
|
| + * Set the first code point in *pFirst.
|
| + *
|
| + * @param s Input char * string.
|
| + * @param dest Output string buffer.
|
| + * @param destCapacity Capacity of dest in numbers of UChars.
|
| + * @param pFirst If pFirst!=NULL the *pFirst will be set to the first
|
| + * code point in the string.
|
| + * @param pErrorCode ICU error code.
|
| + * @return The length of the string in numbers of UChars.
|
| + */
|
| +U_CAPI int32_t U_EXPORT2
|
| +u_parseString(const char *s,
|
| + UChar *dest, int32_t destCapacity,
|
| + uint32_t *pFirst,
|
| + UErrorCode *pErrorCode);
|
| +
|
| +/**
|
| + * Parse a code point range like
|
| + * 0085 or
|
| + * 4E00..9FA5.
|
| + *
|
| + * s must contain such a range and end with either ';' or NUL.
|
| + *
|
| + * @return Length of code point range, end-start+1
|
| + */
|
| +U_CAPI int32_t U_EXPORT2
|
| +u_parseCodePointRange(const char *s,
|
| + uint32_t *pStart, uint32_t *pEnd,
|
| + UErrorCode *pErrorCode);
|
| +
|
| +/**
|
| + * Same as u_parseCodePointRange() but the range may be terminated by
|
| + * any character. The position of the terminating character is returned via
|
| + * the *terminator output parameter.
|
| + */
|
| +U_CAPI int32_t U_EXPORT2
|
| +u_parseCodePointRangeAnyTerminator(const char *s,
|
| + uint32_t *pStart, uint32_t *pEnd,
|
| + const char **terminator,
|
| + UErrorCode *pErrorCode);
|
| +
|
| +U_CAPI int32_t U_EXPORT2
|
| +u_parseUTF8(const char *source, int32_t sLen, char *dest, int32_t destCapacity, UErrorCode *status);
|
| +
|
| +U_CDECL_END
|
| +
|
| +#endif
|
|
|
| Property changes on: icu46/source/tools/toolutil/uparse.h
|
| ___________________________________________________________________
|
| Added: svn:eol-style
|
| + LF
|
|
|
|
|