OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ******************************************************************************* |
| 3 * |
| 4 * Copyright (C) 2000-2010, International Business Machines |
| 5 * Corporation and others. All Rights Reserved. |
| 6 * |
| 7 ******************************************************************************* |
| 8 * file name: uparse.h |
| 9 * encoding: US-ASCII |
| 10 * tab size: 8 (not used) |
| 11 * indentation:4 |
| 12 * |
| 13 * created on: 2000apr18 |
| 14 * created by: Markus W. Scherer |
| 15 * |
| 16 * This file provides a parser for files that are delimited by one single |
| 17 * character like ';' or TAB. Example: the Unicode Character Properties files |
| 18 * like UnicodeData.txt are semicolon-delimited. |
| 19 */ |
| 20 |
| 21 #ifndef __UPARSE_H__ |
| 22 #define __UPARSE_H__ |
| 23 |
| 24 #include "unicode/utypes.h" |
| 25 |
| 26 /** |
| 27 * Is c an invariant-character whitespace? |
| 28 * @param c invariant character |
| 29 */ |
| 30 #define U_IS_INV_WHITESPACE(c) ((c)==' ' || (c)=='\t' || (c)=='\r' || (c)=='\n') |
| 31 |
| 32 U_CDECL_BEGIN |
| 33 |
| 34 /** |
| 35 * Skip space ' ' and TAB '\t' characters. |
| 36 * |
| 37 * @param s Pointer to characters. |
| 38 * @return Pointer to first character at or after s that is not a space or TAB. |
| 39 */ |
| 40 U_CAPI const char * U_EXPORT2 |
| 41 u_skipWhitespace(const char *s); |
| 42 |
| 43 /** |
| 44 * Trim whitespace (including line endings) from the end of the string. |
| 45 * |
| 46 * @param s Pointer to the string. |
| 47 * @return Pointer to the new end of the string. |
| 48 */ |
| 49 U_CAPI char * U_EXPORT2 |
| 50 u_rtrim(char *s); |
| 51 |
| 52 /** Function type for u_parseDelimitedFile(). */ |
| 53 typedef void U_CALLCONV |
| 54 UParseLineFn(void *context, |
| 55 char *fields[][2], |
| 56 int32_t fieldCount, |
| 57 UErrorCode *pErrorCode); |
| 58 |
| 59 /** |
| 60 * Parser for files that are similar to UnicodeData.txt: |
| 61 * This function opens the file and reads it line by line. It skips empty lines |
| 62 * and comment lines that start with a '#'. |
| 63 * All other lines are separated into fields with one delimiter character |
| 64 * (semicolon for Unicode Properties files) between two fields. The last field i
n |
| 65 * a line does not need to be terminated with a delimiter. |
| 66 * |
| 67 * For each line, after segmenting it, a line function is called. |
| 68 * It gets passed the array of field start and limit pointers that is |
| 69 * passed into this parser and filled by it for each line. |
| 70 * For each field i of the line, the start pointer in fields[i][0] |
| 71 * points to the beginning of the field, while the limit pointer in fields[i][1] |
| 72 * points behind the field, i.e., to the delimiter or the line end. |
| 73 * |
| 74 * The context parameter of the line function is |
| 75 * the same as the one for the parse function. |
| 76 * |
| 77 * The line function may modify the contents of the fields including the |
| 78 * limit characters. |
| 79 * |
| 80 * If the file cannot be opened, or there is a parsing error or a field function |
| 81 * sets *pErrorCode, then the parser returns with *pErrorCode set to an error co
de. |
| 82 */ |
| 83 U_CAPI void U_EXPORT2 |
| 84 u_parseDelimitedFile(const char *filename, char delimiter, |
| 85 char *fields[][2], int32_t fieldCount, |
| 86 UParseLineFn *lineFn, void *context, |
| 87 UErrorCode *pErrorCode); |
| 88 |
| 89 /** |
| 90 * Parse a string of code points like 0061 0308 0300. |
| 91 * s must end with either ';' or NUL. |
| 92 * |
| 93 * @return Number of code points. |
| 94 */ |
| 95 U_CAPI int32_t U_EXPORT2 |
| 96 u_parseCodePoints(const char *s, |
| 97 uint32_t *dest, int32_t destCapacity, |
| 98 UErrorCode *pErrorCode); |
| 99 |
| 100 /** |
| 101 * Parse a list of code points like 0061 0308 0300 |
| 102 * into a UChar * string. |
| 103 * s must end with either ';' or NUL. |
| 104 * |
| 105 * Set the first code point in *pFirst. |
| 106 * |
| 107 * @param s Input char * string. |
| 108 * @param dest Output string buffer. |
| 109 * @param destCapacity Capacity of dest in numbers of UChars. |
| 110 * @param pFirst If pFirst!=NULL the *pFirst will be set to the first |
| 111 * code point in the string. |
| 112 * @param pErrorCode ICU error code. |
| 113 * @return The length of the string in numbers of UChars. |
| 114 */ |
| 115 U_CAPI int32_t U_EXPORT2 |
| 116 u_parseString(const char *s, |
| 117 UChar *dest, int32_t destCapacity, |
| 118 uint32_t *pFirst, |
| 119 UErrorCode *pErrorCode); |
| 120 |
| 121 /** |
| 122 * Parse a code point range like |
| 123 * 0085 or |
| 124 * 4E00..9FA5. |
| 125 * |
| 126 * s must contain such a range and end with either ';' or NUL. |
| 127 * |
| 128 * @return Length of code point range, end-start+1 |
| 129 */ |
| 130 U_CAPI int32_t U_EXPORT2 |
| 131 u_parseCodePointRange(const char *s, |
| 132 uint32_t *pStart, uint32_t *pEnd, |
| 133 UErrorCode *pErrorCode); |
| 134 |
| 135 /** |
| 136 * Same as u_parseCodePointRange() but the range may be terminated by |
| 137 * any character. The position of the terminating character is returned via |
| 138 * the *terminator output parameter. |
| 139 */ |
| 140 U_CAPI int32_t U_EXPORT2 |
| 141 u_parseCodePointRangeAnyTerminator(const char *s, |
| 142 uint32_t *pStart, uint32_t *pEnd, |
| 143 const char **terminator, |
| 144 UErrorCode *pErrorCode); |
| 145 |
| 146 U_CAPI int32_t U_EXPORT2 |
| 147 u_parseUTF8(const char *source, int32_t sLen, char *dest, int32_t destCapacity,
UErrorCode *status); |
| 148 |
| 149 U_CDECL_END |
| 150 |
| 151 #endif |
OLD | NEW |