| OLD | NEW |
| 1 /* | 1 /* |
| 2 ******************************************************************************* | 2 ******************************************************************************* |
| 3 * Copyright (C) 2003-2007, International Business Machines | 3 * Copyright (C) 2003-2014, International Business Machines |
| 4 * Corporation and others. All Rights Reserved. | 4 * Corporation and others. All Rights Reserved. |
| 5 ******************************************************************************* | 5 ******************************************************************************* |
| 6 * | 6 * |
| 7 * File prscmnts.cpp | 7 * File prscmnts.cpp |
| 8 * | 8 * |
| 9 * Modification History: | 9 * Modification History: |
| 10 * | 10 * |
| 11 * Date Name Description | 11 * Date Name Description |
| 12 * 08/22/2003 ram Creation. | 12 * 08/22/2003 ram Creation. |
| 13 ******************************************************************************* | 13 ******************************************************************************* |
| 14 */ | 14 */ |
| 15 | 15 |
| 16 // Safer use of UnicodeString. |
| 17 #ifndef UNISTR_FROM_CHAR_EXPLICIT |
| 18 # define UNISTR_FROM_CHAR_EXPLICIT explicit |
| 19 #endif |
| 20 |
| 21 // Less important, but still a good idea. |
| 22 #ifndef UNISTR_FROM_STRING_EXPLICIT |
| 23 # define UNISTR_FROM_STRING_EXPLICIT explicit |
| 24 #endif |
| 25 |
| 16 #include "unicode/regex.h" | 26 #include "unicode/regex.h" |
| 17 #include "unicode/unistr.h" | 27 #include "unicode/unistr.h" |
| 18 #include "unicode/parseerr.h" | 28 #include "unicode/parseerr.h" |
| 19 #include "prscmnts.h" | 29 #include "prscmnts.h" |
| 20 #include <stdio.h> | 30 #include <stdio.h> |
| 21 #include <stdlib.h> | 31 #include <stdlib.h> |
| 22 | 32 |
| 23 U_NAMESPACE_USE | 33 U_NAMESPACE_USE |
| 24 | 34 |
| 25 #if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when RegularExpressions n
ot available */ | 35 #if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when RegularExpressions n
ot available */ |
| (...skipping 24 matching lines...) Expand all Loading... |
| 50 | 60 |
| 51 | 61 |
| 52 dest = myMatcher.replaceAll(replaceText,*status); | 62 dest = myMatcher.replaceAll(replaceText,*status); |
| 53 | 63 |
| 54 | 64 |
| 55 return dest.extract(source, srcLen, *status); | 65 return dest.extract(source, srcLen, *status); |
| 56 | 66 |
| 57 } | 67 } |
| 58 U_CFUNC int32_t | 68 U_CFUNC int32_t |
| 59 trim(UChar *src, int32_t srcLen, UErrorCode *status){ | 69 trim(UChar *src, int32_t srcLen, UErrorCode *status){ |
| 60 srcLen = removeText(src, srcLen, "^[ \\r\\n]+ ", 0, "", status); // remove
leading new lines | 70 srcLen = removeText(src, srcLen, UnicodeString("^[ \\r\\n]+ "), 0, UnicodeS
tring(), status); // remove leading new lines |
| 61 srcLen = removeText(src, srcLen, "^\\s+", 0, "", status); // remove leading
spaces | 71 srcLen = removeText(src, srcLen, UnicodeString("^\\s+"), 0, UnicodeString()
, status); // remove leading spaces |
| 62 srcLen = removeText(src, srcLen, "\\s+$", 0, "", status); // remvoe trailin
g spcaes | 72 srcLen = removeText(src, srcLen, UnicodeString("\\s+$"), 0, UnicodeString()
, status); // remvoe trailing spcaes |
| 63 return srcLen; | 73 return srcLen; |
| 64 } | 74 } |
| 65 | 75 |
| 66 U_CFUNC int32_t | 76 U_CFUNC int32_t |
| 67 removeCmtText(UChar* source, int32_t srcLen, UErrorCode* status){ | 77 removeCmtText(UChar* source, int32_t srcLen, UErrorCode* status){ |
| 68 srcLen = trim(source, srcLen, status); | 78 srcLen = trim(source, srcLen, status); |
| 69 UnicodeString patString = "^\\s*?\\*\\s*?"; // remove pattern like "
* " at the begining of the line | 79 UnicodeString patString("^\\s*?\\*\\s*?"); // remove pattern like " * " at
the begining of the line |
| 70 srcLen = removeText(source, srcLen, patString, UREGEX_MULTILINE, "", status)
; | 80 srcLen = removeText(source, srcLen, patString, UREGEX_MULTILINE, UnicodeStri
ng(), status); |
| 71 return removeText(source, srcLen, "[ \\r\\n]+", 0, " ", status);// remove ne
w lines; | 81 return removeText(source, srcLen, UnicodeString("[ \\r\\n]+"), 0, UnicodeStr
ing(" "), status);// remove new lines; |
| 72 } | 82 } |
| 73 | 83 |
| 74 U_CFUNC int32_t | 84 U_CFUNC int32_t |
| 75 getText(const UChar* source, int32_t srcLen, | 85 getText(const UChar* source, int32_t srcLen, |
| 76 UChar** dest, int32_t destCapacity, | 86 UChar** dest, int32_t destCapacity, |
| 77 UnicodeString patternString, | 87 UnicodeString patternString, |
| 78 UErrorCode* status){ | 88 UErrorCode* status){ |
| 79 | 89 |
| 80 if(status == NULL || U_FAILURE(*status)){ | 90 if(status == NULL || U_FAILURE(*status)){ |
| 81 return 0; | 91 return 0; |
| 82 } | 92 } |
| 83 | 93 |
| 84 UnicodeString stringArray[MAX_SPLIT_STRINGS]; | 94 UnicodeString stringArray[MAX_SPLIT_STRINGS]; |
| 85 RegexPattern *pattern = RegexPattern::compile("@", 0, *status); | 95 RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), 0, *s
tatus); |
| 86 UnicodeString src (source,srcLen); | 96 UnicodeString src (source,srcLen); |
| 87 | 97 |
| 88 if (U_FAILURE(*status)) { | 98 if (U_FAILURE(*status)) { |
| 89 return 0; | 99 return 0; |
| 90 } | 100 } |
| 91 pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status); | 101 pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status); |
| 92 | 102 |
| 93 RegexMatcher matcher(patternString, UREGEX_DOTALL, *status); | 103 RegexMatcher matcher(patternString, UREGEX_DOTALL, *status); |
| 94 if (U_FAILURE(*status)) { | 104 if (U_FAILURE(*status)) { |
| 95 return 0; | 105 return 0; |
| (...skipping 14 matching lines...) Expand all Loading... |
| 110 | 120 |
| 111 U_CFUNC int32_t | 121 U_CFUNC int32_t |
| 112 getDescription( const UChar* source, int32_t srcLen, | 122 getDescription( const UChar* source, int32_t srcLen, |
| 113 UChar** dest, int32_t destCapacity, | 123 UChar** dest, int32_t destCapacity, |
| 114 UErrorCode* status){ | 124 UErrorCode* status){ |
| 115 if(status == NULL || U_FAILURE(*status)){ | 125 if(status == NULL || U_FAILURE(*status)){ |
| 116 return 0; | 126 return 0; |
| 117 } | 127 } |
| 118 | 128 |
| 119 UnicodeString stringArray[MAX_SPLIT_STRINGS]; | 129 UnicodeString stringArray[MAX_SPLIT_STRINGS]; |
| 120 RegexPattern *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *s
tatus); | 130 RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), UREGE
X_MULTILINE, *status); |
| 121 UnicodeString src(source, srcLen); | 131 UnicodeString src(source, srcLen); |
| 122 | 132 |
| 123 if (U_FAILURE(*status)) { | 133 if (U_FAILURE(*status)) { |
| 124 return 0; | 134 return 0; |
| 125 } | 135 } |
| 126 pattern->split(src, stringArray,MAX_SPLIT_STRINGS , *status); | 136 pattern->split(src, stringArray,MAX_SPLIT_STRINGS , *status); |
| 127 | 137 |
| 128 if(stringArray[0].indexOf((UChar)AT_SIGN)==-1){ | 138 if(stringArray[0].indexOf((UChar)AT_SIGN)==-1){ |
| 129 int32_t destLen = stringArray[0].extract(*dest, destCapacity, *status); | 139 int32_t destLen = stringArray[0].extract(*dest, destCapacity, *status); |
| 130 return trim(*dest, destLen, status); | 140 return trim(*dest, destLen, status); |
| 131 } | 141 } |
| 132 return 0; | 142 return 0; |
| 133 } | 143 } |
| 134 | 144 |
| 135 U_CFUNC int32_t | 145 U_CFUNC int32_t |
| 136 getCount(const UChar* source, int32_t srcLen, | 146 getCount(const UChar* source, int32_t srcLen, |
| 137 UParseCommentsOption option, UErrorCode *status){ | 147 UParseCommentsOption option, UErrorCode *status){ |
| 138 | 148 |
| 139 if(status == NULL || U_FAILURE(*status)){ | 149 if(status == NULL || U_FAILURE(*status)){ |
| 140 return 0; | 150 return 0; |
| 141 } | 151 } |
| 142 | 152 |
| 143 UnicodeString stringArray[MAX_SPLIT_STRINGS]; | 153 UnicodeString stringArray[MAX_SPLIT_STRINGS]; |
| 144 RegexPattern *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *s
tatus); | 154 RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), UREGE
X_MULTILINE, *status); |
| 145 UnicodeString src (source, srcLen); | 155 UnicodeString src (source, srcLen); |
| 146 | 156 |
| 147 | 157 |
| 148 if (U_FAILURE(*status)) { | 158 if (U_FAILURE(*status)) { |
| 149 return 0; | 159 return 0; |
| 150 } | 160 } |
| 151 int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status
); | 161 int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status
); |
| 152 | 162 |
| 153 RegexMatcher matcher(patternStrings[option], UREGEX_DOTALL, *status); | 163 UnicodeString patternString(patternStrings[option]); |
| 164 RegexMatcher matcher(patternString, UREGEX_DOTALL, *status); |
| 154 if (U_FAILURE(*status)) { | 165 if (U_FAILURE(*status)) { |
| 155 return 0; | 166 return 0; |
| 156 } | 167 } |
| 157 int32_t count = 0; | 168 int32_t count = 0; |
| 158 for(int32_t i=0; i<retLen; i++){ | 169 for(int32_t i=0; i<retLen; i++){ |
| 159 matcher.reset(stringArray[i]); | 170 matcher.reset(stringArray[i]); |
| 160 if(matcher.lookingAt(*status)){ | 171 if(matcher.lookingAt(*status)){ |
| 161 count++; | 172 count++; |
| 162 } | 173 } |
| 163 } | 174 } |
| 164 if(option == UPC_TRANSLATE && count > 1){ | 175 if(option == UPC_TRANSLATE && count > 1){ |
| 165 fprintf(stderr, "Multiple @translate tags cannot be supported.\n"); | 176 fprintf(stderr, "Multiple @translate tags cannot be supported.\n"); |
| 166 exit(U_UNSUPPORTED_ERROR); | 177 exit(U_UNSUPPORTED_ERROR); |
| 167 } | 178 } |
| 168 return count; | 179 return count; |
| 169 } | 180 } |
| 170 | 181 |
| 171 U_CFUNC int32_t | 182 U_CFUNC int32_t |
| 172 getAt(const UChar* source, int32_t srcLen, | 183 getAt(const UChar* source, int32_t srcLen, |
| 173 UChar** dest, int32_t destCapacity, | 184 UChar** dest, int32_t destCapacity, |
| 174 int32_t index, | 185 int32_t index, |
| 175 UParseCommentsOption option, | 186 UParseCommentsOption option, |
| 176 UErrorCode* status){ | 187 UErrorCode* status){ |
| 177 | 188 |
| 178 if(status == NULL || U_FAILURE(*status)){ | 189 if(status == NULL || U_FAILURE(*status)){ |
| 179 return 0; | 190 return 0; |
| 180 } | 191 } |
| 181 | 192 |
| 182 UnicodeString stringArray[MAX_SPLIT_STRINGS]; | 193 UnicodeString stringArray[MAX_SPLIT_STRINGS]; |
| 183 RegexPattern *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *s
tatus); | 194 RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), UREGE
X_MULTILINE, *status); |
| 184 UnicodeString src (source, srcLen); | 195 UnicodeString src (source, srcLen); |
| 185 | 196 |
| 186 | 197 |
| 187 if (U_FAILURE(*status)) { | 198 if (U_FAILURE(*status)) { |
| 188 return 0; | 199 return 0; |
| 189 } | 200 } |
| 190 int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status
); | 201 int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status
); |
| 191 | 202 |
| 192 RegexMatcher matcher(patternStrings[option], UREGEX_DOTALL, *status); | 203 UnicodeString patternString(patternStrings[option]); |
| 204 RegexMatcher matcher(patternString, UREGEX_DOTALL, *status); |
| 193 if (U_FAILURE(*status)) { | 205 if (U_FAILURE(*status)) { |
| 194 return 0; | 206 return 0; |
| 195 } | 207 } |
| 196 int32_t count = 0; | 208 int32_t count = 0; |
| 197 for(int32_t i=0; i<retLen; i++){ | 209 for(int32_t i=0; i<retLen; i++){ |
| 198 matcher.reset(stringArray[i]); | 210 matcher.reset(stringArray[i]); |
| 199 if(matcher.lookingAt(*status)){ | 211 if(matcher.lookingAt(*status)){ |
| 200 if(count == index){ | 212 if(count == index){ |
| 201 UnicodeString out = matcher.group(1, *status); | 213 UnicodeString out = matcher.group(1, *status); |
| 202 return out.extract(*dest, destCapacity,*status); | 214 return out.extract(*dest, destCapacity,*status); |
| 203 } | 215 } |
| 204 count++; | 216 count++; |
| 205 | 217 |
| 206 } | 218 } |
| 207 } | 219 } |
| 208 return 0; | 220 return 0; |
| 209 | 221 |
| 210 } | 222 } |
| 211 | 223 |
| 212 U_CFUNC int32_t | 224 U_CFUNC int32_t |
| 213 getTranslate( const UChar* source, int32_t srcLen, | 225 getTranslate( const UChar* source, int32_t srcLen, |
| 214 UChar** dest, int32_t destCapacity, | 226 UChar** dest, int32_t destCapacity, |
| 215 UErrorCode* status){ | 227 UErrorCode* status){ |
| 216 UnicodeString notePatternString = "^translate\\s*?(.*)"; | 228 UnicodeString notePatternString("^translate\\s*?(.*)"); |
| 217 | 229 |
| 218 int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternStr
ing, status); | 230 int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternStr
ing, status); |
| 219 return trim(*dest, destLen, status); | 231 return trim(*dest, destLen, status); |
| 220 } | 232 } |
| 221 | 233 |
| 222 U_CFUNC int32_t | 234 U_CFUNC int32_t |
| 223 getNote(const UChar* source, int32_t srcLen, | 235 getNote(const UChar* source, int32_t srcLen, |
| 224 UChar** dest, int32_t destCapacity, | 236 UChar** dest, int32_t destCapacity, |
| 225 UErrorCode* status){ | 237 UErrorCode* status){ |
| 226 | 238 |
| 227 UnicodeString notePatternString = "^note\\s*?(.*)"; | 239 UnicodeString notePatternString("^note\\s*?(.*)"); |
| 228 int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternSt
ring, status); | 240 int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternSt
ring, status); |
| 229 return trim(*dest, destLen, status); | 241 return trim(*dest, destLen, status); |
| 230 | 242 |
| 231 } | 243 } |
| 232 | 244 |
| 233 #endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */ | 245 #endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */ |
| 234 | 246 |
| OLD | NEW |