OLD | NEW |
| (Empty) |
1 /* | |
2 ********************************************************************** | |
3 * Copyright (c) 2004-2011, International Business Machines | |
4 * Corporation and others. All Rights Reserved. | |
5 ********************************************************************** | |
6 * Author: Alan Liu | |
7 * Created: March 22 2004 | |
8 * Since: ICU 3.0 | |
9 ********************************************************************** | |
10 */ | |
11 #include "tokiter.h" | |
12 #include "textfile.h" | |
13 #include "patternprops.h" | |
14 #include "util.h" | |
15 #include "uprops.h" | |
16 | |
17 TokenIterator::TokenIterator(TextFile* r) { | |
18 reader = r; | |
19 done = haveLine = FALSE; | |
20 pos = lastpos = -1; | |
21 } | |
22 | |
23 TokenIterator::~TokenIterator() { | |
24 } | |
25 | |
26 UBool TokenIterator::next(UnicodeString& token, UErrorCode& ec) { | |
27 if (done || U_FAILURE(ec)) { | |
28 return FALSE; | |
29 } | |
30 token.truncate(0); | |
31 for (;;) { | |
32 if (!haveLine) { | |
33 if (!reader->readLineSkippingComments(line, ec)) { | |
34 done = TRUE; | |
35 return FALSE; | |
36 } | |
37 haveLine = TRUE; | |
38 pos = 0; | |
39 } | |
40 lastpos = pos; | |
41 if (!nextToken(token, ec)) { | |
42 haveLine = FALSE; | |
43 if (U_FAILURE(ec)) return FALSE; | |
44 continue; | |
45 } | |
46 return TRUE; | |
47 } | |
48 } | |
49 | |
50 int32_t TokenIterator::getLineNumber() const { | |
51 return reader->getLineNumber(); | |
52 } | |
53 | |
54 /** | |
55 * Read the next token from 'this->line' and append it to 'token'. | |
56 * Tokens are separated by Pattern_White_Space. Tokens may also be | |
57 * delimited by double or single quotes. The closing quote must match | |
58 * the opening quote. If a '#' is encountered, the rest of the line | |
59 * is ignored, unless it is backslash-escaped or within quotes. | |
60 * @param token the token is appended to this StringBuffer | |
61 * @param ec input-output error code | |
62 * @return TRUE if a valid token is found, or FALSE if the end | |
63 * of the line is reached or an error occurs | |
64 */ | |
65 UBool TokenIterator::nextToken(UnicodeString& token, UErrorCode& ec) { | |
66 ICU_Utility::skipWhitespace(line, pos, TRUE); | |
67 if (pos == line.length()) { | |
68 return FALSE; | |
69 } | |
70 UChar c = line.charAt(pos++); | |
71 UChar quote = 0; | |
72 switch (c) { | |
73 case 34/*'"'*/: | |
74 case 39/*'\\'*/: | |
75 quote = c; | |
76 break; | |
77 case 35/*'#'*/: | |
78 return FALSE; | |
79 default: | |
80 token.append(c); | |
81 break; | |
82 } | |
83 while (pos < line.length()) { | |
84 c = line.charAt(pos); // 16-bit ok | |
85 if (c == 92/*'\\'*/) { | |
86 UChar32 c32 = line.unescapeAt(pos); | |
87 if (c32 < 0) { | |
88 ec = U_MALFORMED_UNICODE_ESCAPE; | |
89 return FALSE; | |
90 } | |
91 token.append(c32); | |
92 } else if ((quote != 0 && c == quote) || | |
93 (quote == 0 && PatternProps::isWhiteSpace(c))) { | |
94 ++pos; | |
95 return TRUE; | |
96 } else if (quote == 0 && c == '#') { | |
97 return TRUE; // do NOT increment | |
98 } else { | |
99 token.append(c); | |
100 ++pos; | |
101 } | |
102 } | |
103 if (quote != 0) { | |
104 ec = U_UNTERMINATED_QUOTE; | |
105 return FALSE; | |
106 } | |
107 return TRUE; | |
108 } | |
OLD | NEW |