OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ********************************************************************** |
| 3 * Copyright (c) 2003-2007, International Business Machines |
| 4 * Corporation and others. All Rights Reserved. |
| 5 ********************************************************************** |
| 6 * Author: Alan Liu |
| 7 * Created: September 24 2003 |
| 8 * Since: ICU 2.8 |
| 9 ********************************************************************** |
| 10 */ |
| 11 #include "ruleiter.h" |
| 12 #include "unicode/parsepos.h" |
| 13 #include "unicode/unistr.h" |
| 14 #include "unicode/symtable.h" |
| 15 #include "util.h" |
| 16 |
| 17 /* \U87654321 or \ud800\udc00 */ |
| 18 #define MAX_U_NOTATION_LEN 12 |
| 19 |
| 20 U_NAMESPACE_BEGIN |
| 21 |
| 22 RuleCharacterIterator::RuleCharacterIterator(const UnicodeString& theText, const
SymbolTable* theSym, |
| 23 ParsePosition& thePos) : |
| 24 text(theText), |
| 25 pos(thePos), |
| 26 sym(theSym), |
| 27 buf(0), |
| 28 bufPos(0) |
| 29 {} |
| 30 |
| 31 UBool RuleCharacterIterator::atEnd() const { |
| 32 return buf == 0 && pos.getIndex() == text.length(); |
| 33 } |
| 34 |
| 35 UChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCod
e& ec) { |
| 36 if (U_FAILURE(ec)) return DONE; |
| 37 |
| 38 UChar32 c = DONE; |
| 39 isEscaped = FALSE; |
| 40 |
| 41 for (;;) { |
| 42 c = _current(); |
| 43 _advance(UTF_CHAR_LENGTH(c)); |
| 44 |
| 45 if (c == SymbolTable::SYMBOL_REF && buf == 0 && |
| 46 (options & PARSE_VARIABLES) != 0 && sym != 0) { |
| 47 UnicodeString name = sym->parseReference(text, pos, text.length()); |
| 48 // If name is empty there was an isolated SYMBOL_REF; |
| 49 // return it. Caller must be prepared for this. |
| 50 if (name.length() == 0) { |
| 51 break; |
| 52 } |
| 53 bufPos = 0; |
| 54 buf = sym->lookup(name); |
| 55 if (buf == 0) { |
| 56 ec = U_UNDEFINED_VARIABLE; |
| 57 return DONE; |
| 58 } |
| 59 // Handle empty variable value |
| 60 if (buf->length() == 0) { |
| 61 buf = 0; |
| 62 } |
| 63 continue; |
| 64 } |
| 65 |
| 66 if ((options & SKIP_WHITESPACE) != 0 && |
| 67 uprv_isRuleWhiteSpace(c)) { |
| 68 continue; |
| 69 } |
| 70 |
| 71 if (c == 0x5C /*'\\'*/ && (options & PARSE_ESCAPES) != 0) { |
| 72 UnicodeString tempEscape; |
| 73 int32_t offset = 0; |
| 74 c = lookahead(tempEscape, MAX_U_NOTATION_LEN).unescapeAt(offset); |
| 75 jumpahead(offset); |
| 76 isEscaped = TRUE; |
| 77 if (c < 0) { |
| 78 ec = U_MALFORMED_UNICODE_ESCAPE; |
| 79 return DONE; |
| 80 } |
| 81 } |
| 82 |
| 83 break; |
| 84 } |
| 85 |
| 86 return c; |
| 87 } |
| 88 |
| 89 void RuleCharacterIterator::getPos(RuleCharacterIterator::Pos& p) const { |
| 90 p.buf = buf; |
| 91 p.pos = pos.getIndex(); |
| 92 p.bufPos = bufPos; |
| 93 } |
| 94 |
| 95 void RuleCharacterIterator::setPos(const RuleCharacterIterator::Pos& p) { |
| 96 buf = p.buf; |
| 97 pos.setIndex(p.pos); |
| 98 bufPos = p.bufPos; |
| 99 } |
| 100 |
| 101 void RuleCharacterIterator::skipIgnored(int32_t options) { |
| 102 if ((options & SKIP_WHITESPACE) != 0) { |
| 103 for (;;) { |
| 104 UChar32 a = _current(); |
| 105 if (!uprv_isRuleWhiteSpace(a)) break; |
| 106 _advance(UTF_CHAR_LENGTH(a)); |
| 107 } |
| 108 } |
| 109 } |
| 110 |
| 111 UnicodeString& RuleCharacterIterator::lookahead(UnicodeString& result, int32_t m
axLookAhead) const { |
| 112 if (maxLookAhead < 0) { |
| 113 maxLookAhead = 0x7FFFFFFF; |
| 114 } |
| 115 if (buf != 0) { |
| 116 buf->extract(bufPos, maxLookAhead, result); |
| 117 } else { |
| 118 text.extract(pos.getIndex(), maxLookAhead, result); |
| 119 } |
| 120 return result; |
| 121 } |
| 122 |
| 123 void RuleCharacterIterator::jumpahead(int32_t count) { |
| 124 _advance(count); |
| 125 } |
| 126 |
| 127 /* |
| 128 UnicodeString& RuleCharacterIterator::toString(UnicodeString& result) const { |
| 129 int32_t b = pos.getIndex(); |
| 130 text.extract(0, b, result); |
| 131 return result.append((UChar) 0x7C).append(text, b, 0x7FFFFFFF); // Insert '|
' at index |
| 132 } |
| 133 */ |
| 134 |
| 135 UChar32 RuleCharacterIterator::_current() const { |
| 136 if (buf != 0) { |
| 137 return buf->char32At(bufPos); |
| 138 } else { |
| 139 int i = pos.getIndex(); |
| 140 return (i < text.length()) ? text.char32At(i) : (UChar32)DONE; |
| 141 } |
| 142 } |
| 143 |
| 144 void RuleCharacterIterator::_advance(int32_t count) { |
| 145 if (buf != 0) { |
| 146 bufPos += count; |
| 147 if (bufPos == buf->length()) { |
| 148 buf = 0; |
| 149 } |
| 150 } else { |
| 151 pos.setIndex(pos.getIndex() + count); |
| 152 if (pos.getIndex() > text.length()) { |
| 153 pos.setIndex(text.length()); |
| 154 } |
| 155 } |
| 156 } |
| 157 |
| 158 U_NAMESPACE_END |
| 159 |
| 160 //eof |
OLD | NEW |