OLD | NEW |
1 /* | 1 /* |
2 ******************************************************************************* | 2 ******************************************************************************* |
3 * Copyright (C) 2003-2007, International Business Machines | 3 * Copyright (C) 2003-2014, International Business Machines |
4 * Corporation and others. All Rights Reserved. | 4 * Corporation and others. All Rights Reserved. |
5 ******************************************************************************* | 5 ******************************************************************************* |
6 * | 6 * |
7 * File prscmnts.cpp | 7 * File prscmnts.cpp |
8 * | 8 * |
9 * Modification History: | 9 * Modification History: |
10 * | 10 * |
11 * Date Name Description | 11 * Date Name Description |
12 * 08/22/2003 ram Creation. | 12 * 08/22/2003 ram Creation. |
13 ******************************************************************************* | 13 ******************************************************************************* |
14 */ | 14 */ |
15 | 15 |
| 16 // Safer use of UnicodeString. |
| 17 #ifndef UNISTR_FROM_CHAR_EXPLICIT |
| 18 # define UNISTR_FROM_CHAR_EXPLICIT explicit |
| 19 #endif |
| 20 |
| 21 // Less important, but still a good idea. |
| 22 #ifndef UNISTR_FROM_STRING_EXPLICIT |
| 23 # define UNISTR_FROM_STRING_EXPLICIT explicit |
| 24 #endif |
| 25 |
16 #include "unicode/regex.h" | 26 #include "unicode/regex.h" |
17 #include "unicode/unistr.h" | 27 #include "unicode/unistr.h" |
18 #include "unicode/parseerr.h" | 28 #include "unicode/parseerr.h" |
19 #include "prscmnts.h" | 29 #include "prscmnts.h" |
20 #include <stdio.h> | 30 #include <stdio.h> |
21 #include <stdlib.h> | 31 #include <stdlib.h> |
22 | 32 |
23 U_NAMESPACE_USE | 33 U_NAMESPACE_USE |
24 | 34 |
25 #if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when RegularExpressions n
ot available */ | 35 #if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when RegularExpressions n
ot available */ |
(...skipping 24 matching lines...) Expand all Loading... |
50 | 60 |
51 | 61 |
52 dest = myMatcher.replaceAll(replaceText,*status); | 62 dest = myMatcher.replaceAll(replaceText,*status); |
53 | 63 |
54 | 64 |
55 return dest.extract(source, srcLen, *status); | 65 return dest.extract(source, srcLen, *status); |
56 | 66 |
57 } | 67 } |
58 U_CFUNC int32_t | 68 U_CFUNC int32_t |
59 trim(UChar *src, int32_t srcLen, UErrorCode *status){ | 69 trim(UChar *src, int32_t srcLen, UErrorCode *status){ |
60 srcLen = removeText(src, srcLen, "^[ \\r\\n]+ ", 0, "", status); // remove
leading new lines | 70 srcLen = removeText(src, srcLen, UnicodeString("^[ \\r\\n]+ "), 0, UnicodeS
tring(), status); // remove leading new lines |
61 srcLen = removeText(src, srcLen, "^\\s+", 0, "", status); // remove leading
spaces | 71 srcLen = removeText(src, srcLen, UnicodeString("^\\s+"), 0, UnicodeString()
, status); // remove leading spaces |
62 srcLen = removeText(src, srcLen, "\\s+$", 0, "", status); // remvoe trailin
g spcaes | 72 srcLen = removeText(src, srcLen, UnicodeString("\\s+$"), 0, UnicodeString()
, status); // remvoe trailing spcaes |
63 return srcLen; | 73 return srcLen; |
64 } | 74 } |
65 | 75 |
66 U_CFUNC int32_t | 76 U_CFUNC int32_t |
67 removeCmtText(UChar* source, int32_t srcLen, UErrorCode* status){ | 77 removeCmtText(UChar* source, int32_t srcLen, UErrorCode* status){ |
68 srcLen = trim(source, srcLen, status); | 78 srcLen = trim(source, srcLen, status); |
69 UnicodeString patString = "^\\s*?\\*\\s*?"; // remove pattern like "
* " at the begining of the line | 79 UnicodeString patString("^\\s*?\\*\\s*?"); // remove pattern like " * " at
the begining of the line |
70 srcLen = removeText(source, srcLen, patString, UREGEX_MULTILINE, "", status)
; | 80 srcLen = removeText(source, srcLen, patString, UREGEX_MULTILINE, UnicodeStri
ng(), status); |
71 return removeText(source, srcLen, "[ \\r\\n]+", 0, " ", status);// remove ne
w lines; | 81 return removeText(source, srcLen, UnicodeString("[ \\r\\n]+"), 0, UnicodeStr
ing(" "), status);// remove new lines; |
72 } | 82 } |
73 | 83 |
74 U_CFUNC int32_t | 84 U_CFUNC int32_t |
75 getText(const UChar* source, int32_t srcLen, | 85 getText(const UChar* source, int32_t srcLen, |
76 UChar** dest, int32_t destCapacity, | 86 UChar** dest, int32_t destCapacity, |
77 UnicodeString patternString, | 87 UnicodeString patternString, |
78 UErrorCode* status){ | 88 UErrorCode* status){ |
79 | 89 |
80 if(status == NULL || U_FAILURE(*status)){ | 90 if(status == NULL || U_FAILURE(*status)){ |
81 return 0; | 91 return 0; |
82 } | 92 } |
83 | 93 |
84 UnicodeString stringArray[MAX_SPLIT_STRINGS]; | 94 UnicodeString stringArray[MAX_SPLIT_STRINGS]; |
85 RegexPattern *pattern = RegexPattern::compile("@", 0, *status); | 95 RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), 0, *s
tatus); |
86 UnicodeString src (source,srcLen); | 96 UnicodeString src (source,srcLen); |
87 | 97 |
88 if (U_FAILURE(*status)) { | 98 if (U_FAILURE(*status)) { |
89 return 0; | 99 return 0; |
90 } | 100 } |
91 pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status); | 101 pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status); |
92 | 102 |
93 RegexMatcher matcher(patternString, UREGEX_DOTALL, *status); | 103 RegexMatcher matcher(patternString, UREGEX_DOTALL, *status); |
94 if (U_FAILURE(*status)) { | 104 if (U_FAILURE(*status)) { |
95 return 0; | 105 return 0; |
(...skipping 14 matching lines...) Expand all Loading... |
110 | 120 |
111 U_CFUNC int32_t | 121 U_CFUNC int32_t |
112 getDescription( const UChar* source, int32_t srcLen, | 122 getDescription( const UChar* source, int32_t srcLen, |
113 UChar** dest, int32_t destCapacity, | 123 UChar** dest, int32_t destCapacity, |
114 UErrorCode* status){ | 124 UErrorCode* status){ |
115 if(status == NULL || U_FAILURE(*status)){ | 125 if(status == NULL || U_FAILURE(*status)){ |
116 return 0; | 126 return 0; |
117 } | 127 } |
118 | 128 |
119 UnicodeString stringArray[MAX_SPLIT_STRINGS]; | 129 UnicodeString stringArray[MAX_SPLIT_STRINGS]; |
120 RegexPattern *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *s
tatus); | 130 RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), UREGE
X_MULTILINE, *status); |
121 UnicodeString src(source, srcLen); | 131 UnicodeString src(source, srcLen); |
122 | 132 |
123 if (U_FAILURE(*status)) { | 133 if (U_FAILURE(*status)) { |
124 return 0; | 134 return 0; |
125 } | 135 } |
126 pattern->split(src, stringArray,MAX_SPLIT_STRINGS , *status); | 136 pattern->split(src, stringArray,MAX_SPLIT_STRINGS , *status); |
127 | 137 |
128 if(stringArray[0].indexOf((UChar)AT_SIGN)==-1){ | 138 if(stringArray[0].indexOf((UChar)AT_SIGN)==-1){ |
129 int32_t destLen = stringArray[0].extract(*dest, destCapacity, *status); | 139 int32_t destLen = stringArray[0].extract(*dest, destCapacity, *status); |
130 return trim(*dest, destLen, status); | 140 return trim(*dest, destLen, status); |
131 } | 141 } |
132 return 0; | 142 return 0; |
133 } | 143 } |
134 | 144 |
135 U_CFUNC int32_t | 145 U_CFUNC int32_t |
136 getCount(const UChar* source, int32_t srcLen, | 146 getCount(const UChar* source, int32_t srcLen, |
137 UParseCommentsOption option, UErrorCode *status){ | 147 UParseCommentsOption option, UErrorCode *status){ |
138 | 148 |
139 if(status == NULL || U_FAILURE(*status)){ | 149 if(status == NULL || U_FAILURE(*status)){ |
140 return 0; | 150 return 0; |
141 } | 151 } |
142 | 152 |
143 UnicodeString stringArray[MAX_SPLIT_STRINGS]; | 153 UnicodeString stringArray[MAX_SPLIT_STRINGS]; |
144 RegexPattern *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *s
tatus); | 154 RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), UREGE
X_MULTILINE, *status); |
145 UnicodeString src (source, srcLen); | 155 UnicodeString src (source, srcLen); |
146 | 156 |
147 | 157 |
148 if (U_FAILURE(*status)) { | 158 if (U_FAILURE(*status)) { |
149 return 0; | 159 return 0; |
150 } | 160 } |
151 int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status
); | 161 int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status
); |
152 | 162 |
153 RegexMatcher matcher(patternStrings[option], UREGEX_DOTALL, *status); | 163 UnicodeString patternString(patternStrings[option]); |
| 164 RegexMatcher matcher(patternString, UREGEX_DOTALL, *status); |
154 if (U_FAILURE(*status)) { | 165 if (U_FAILURE(*status)) { |
155 return 0; | 166 return 0; |
156 } | 167 } |
157 int32_t count = 0; | 168 int32_t count = 0; |
158 for(int32_t i=0; i<retLen; i++){ | 169 for(int32_t i=0; i<retLen; i++){ |
159 matcher.reset(stringArray[i]); | 170 matcher.reset(stringArray[i]); |
160 if(matcher.lookingAt(*status)){ | 171 if(matcher.lookingAt(*status)){ |
161 count++; | 172 count++; |
162 } | 173 } |
163 } | 174 } |
164 if(option == UPC_TRANSLATE && count > 1){ | 175 if(option == UPC_TRANSLATE && count > 1){ |
165 fprintf(stderr, "Multiple @translate tags cannot be supported.\n"); | 176 fprintf(stderr, "Multiple @translate tags cannot be supported.\n"); |
166 exit(U_UNSUPPORTED_ERROR); | 177 exit(U_UNSUPPORTED_ERROR); |
167 } | 178 } |
168 return count; | 179 return count; |
169 } | 180 } |
170 | 181 |
171 U_CFUNC int32_t | 182 U_CFUNC int32_t |
172 getAt(const UChar* source, int32_t srcLen, | 183 getAt(const UChar* source, int32_t srcLen, |
173 UChar** dest, int32_t destCapacity, | 184 UChar** dest, int32_t destCapacity, |
174 int32_t index, | 185 int32_t index, |
175 UParseCommentsOption option, | 186 UParseCommentsOption option, |
176 UErrorCode* status){ | 187 UErrorCode* status){ |
177 | 188 |
178 if(status == NULL || U_FAILURE(*status)){ | 189 if(status == NULL || U_FAILURE(*status)){ |
179 return 0; | 190 return 0; |
180 } | 191 } |
181 | 192 |
182 UnicodeString stringArray[MAX_SPLIT_STRINGS]; | 193 UnicodeString stringArray[MAX_SPLIT_STRINGS]; |
183 RegexPattern *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *s
tatus); | 194 RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), UREGE
X_MULTILINE, *status); |
184 UnicodeString src (source, srcLen); | 195 UnicodeString src (source, srcLen); |
185 | 196 |
186 | 197 |
187 if (U_FAILURE(*status)) { | 198 if (U_FAILURE(*status)) { |
188 return 0; | 199 return 0; |
189 } | 200 } |
190 int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status
); | 201 int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status
); |
191 | 202 |
192 RegexMatcher matcher(patternStrings[option], UREGEX_DOTALL, *status); | 203 UnicodeString patternString(patternStrings[option]); |
| 204 RegexMatcher matcher(patternString, UREGEX_DOTALL, *status); |
193 if (U_FAILURE(*status)) { | 205 if (U_FAILURE(*status)) { |
194 return 0; | 206 return 0; |
195 } | 207 } |
196 int32_t count = 0; | 208 int32_t count = 0; |
197 for(int32_t i=0; i<retLen; i++){ | 209 for(int32_t i=0; i<retLen; i++){ |
198 matcher.reset(stringArray[i]); | 210 matcher.reset(stringArray[i]); |
199 if(matcher.lookingAt(*status)){ | 211 if(matcher.lookingAt(*status)){ |
200 if(count == index){ | 212 if(count == index){ |
201 UnicodeString out = matcher.group(1, *status); | 213 UnicodeString out = matcher.group(1, *status); |
202 return out.extract(*dest, destCapacity,*status); | 214 return out.extract(*dest, destCapacity,*status); |
203 } | 215 } |
204 count++; | 216 count++; |
205 | 217 |
206 } | 218 } |
207 } | 219 } |
208 return 0; | 220 return 0; |
209 | 221 |
210 } | 222 } |
211 | 223 |
212 U_CFUNC int32_t | 224 U_CFUNC int32_t |
213 getTranslate( const UChar* source, int32_t srcLen, | 225 getTranslate( const UChar* source, int32_t srcLen, |
214 UChar** dest, int32_t destCapacity, | 226 UChar** dest, int32_t destCapacity, |
215 UErrorCode* status){ | 227 UErrorCode* status){ |
216 UnicodeString notePatternString = "^translate\\s*?(.*)"; | 228 UnicodeString notePatternString("^translate\\s*?(.*)"); |
217 | 229 |
218 int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternStr
ing, status); | 230 int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternStr
ing, status); |
219 return trim(*dest, destLen, status); | 231 return trim(*dest, destLen, status); |
220 } | 232 } |
221 | 233 |
222 U_CFUNC int32_t | 234 U_CFUNC int32_t |
223 getNote(const UChar* source, int32_t srcLen, | 235 getNote(const UChar* source, int32_t srcLen, |
224 UChar** dest, int32_t destCapacity, | 236 UChar** dest, int32_t destCapacity, |
225 UErrorCode* status){ | 237 UErrorCode* status){ |
226 | 238 |
227 UnicodeString notePatternString = "^note\\s*?(.*)"; | 239 UnicodeString notePatternString("^note\\s*?(.*)"); |
228 int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternSt
ring, status); | 240 int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternSt
ring, status); |
229 return trim(*dest, destLen, status); | 241 return trim(*dest, destLen, status); |
230 | 242 |
231 } | 243 } |
232 | 244 |
233 #endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */ | 245 #endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */ |
234 | 246 |
OLD | NEW |