OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ******************************************************************************* |
| 3 * Copyright (C) 2003-2007, International Business Machines |
| 4 * Corporation and others. All Rights Reserved. |
| 5 ******************************************************************************* |
| 6 * |
| 7 * File prscmnts.cpp |
| 8 * |
| 9 * Modification History: |
| 10 * |
| 11 * Date Name Description |
| 12 * 08/22/2003 ram Creation. |
| 13 ******************************************************************************* |
| 14 */ |
| 15 |
| 16 #include "unicode/regex.h" |
| 17 #include "unicode/unistr.h" |
| 18 #include "unicode/parseerr.h" |
| 19 #include "prscmnts.h" |
| 20 #include <stdio.h> |
| 21 #include <stdlib.h> |
| 22 |
| 23 U_NAMESPACE_USE |
| 24 |
| 25 #if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when RegularExpressions n
ot available */ |
| 26 |
| 27 #define MAX_SPLIT_STRINGS 20 |
| 28 |
| 29 const char *patternStrings[UPC_LIMIT]={ |
| 30 "^translate\\s*(.*)", |
| 31 "^note\\s*(.*)" |
| 32 }; |
| 33 |
| 34 U_CFUNC int32_t |
| 35 removeText(UChar *source, int32_t srcLen, |
| 36 UnicodeString patString,uint32_t options, |
| 37 UnicodeString replaceText, UErrorCode *status){ |
| 38 |
| 39 if(status == NULL || U_FAILURE(*status)){ |
| 40 return 0; |
| 41 } |
| 42 |
| 43 UnicodeString src(source, srcLen); |
| 44 |
| 45 RegexMatcher myMatcher(patString, src, options, *status); |
| 46 if(U_FAILURE(*status)){ |
| 47 return 0; |
| 48 } |
| 49 UnicodeString dest; |
| 50 |
| 51 |
| 52 dest = myMatcher.replaceAll(replaceText,*status); |
| 53 |
| 54 |
| 55 return dest.extract(source, srcLen, *status); |
| 56 |
| 57 } |
| 58 U_CFUNC int32_t |
| 59 trim(UChar *src, int32_t srcLen, UErrorCode *status){ |
| 60 srcLen = removeText(src, srcLen, "^[ \\r\\n]+ ", 0, "", status); // remove
leading new lines |
| 61 srcLen = removeText(src, srcLen, "^\\s+", 0, "", status); // remove leading
spaces |
| 62 srcLen = removeText(src, srcLen, "\\s+$", 0, "", status); // remvoe trailin
g spcaes |
| 63 return srcLen; |
| 64 } |
| 65 |
| 66 U_CFUNC int32_t |
| 67 removeCmtText(UChar* source, int32_t srcLen, UErrorCode* status){ |
| 68 srcLen = trim(source, srcLen, status); |
| 69 UnicodeString patString = "^\\s*?\\*\\s*?"; // remove pattern like "
* " at the begining of the line |
| 70 srcLen = removeText(source, srcLen, patString, UREGEX_MULTILINE, "", status)
; |
| 71 return removeText(source, srcLen, "[ \\r\\n]+", 0, " ", status);// remove ne
w lines; |
| 72 } |
| 73 |
| 74 U_CFUNC int32_t |
| 75 getText(const UChar* source, int32_t srcLen, |
| 76 UChar** dest, int32_t destCapacity, |
| 77 UnicodeString patternString, |
| 78 UErrorCode* status){ |
| 79 |
| 80 if(status == NULL || U_FAILURE(*status)){ |
| 81 return 0; |
| 82 } |
| 83 |
| 84 UnicodeString stringArray[MAX_SPLIT_STRINGS]; |
| 85 RegexPattern *pattern = RegexPattern::compile("@", 0, *status); |
| 86 UnicodeString src (source,srcLen); |
| 87 |
| 88 if (U_FAILURE(*status)) { |
| 89 return 0; |
| 90 } |
| 91 pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status); |
| 92 |
| 93 RegexMatcher matcher(patternString, UREGEX_DOTALL, *status); |
| 94 if (U_FAILURE(*status)) { |
| 95 return 0; |
| 96 } |
| 97 for(int32_t i=0; i<MAX_SPLIT_STRINGS; i++){ |
| 98 matcher.reset(stringArray[i]); |
| 99 if(matcher.lookingAt(*status)){ |
| 100 UnicodeString out = matcher.group(1, *status); |
| 101 |
| 102 return out.extract(*dest, destCapacity,*status); |
| 103 } |
| 104 } |
| 105 return 0; |
| 106 } |
| 107 |
| 108 |
| 109 #define AT_SIGN 0x0040 |
| 110 |
| 111 U_CFUNC int32_t |
| 112 getDescription( const UChar* source, int32_t srcLen, |
| 113 UChar** dest, int32_t destCapacity, |
| 114 UErrorCode* status){ |
| 115 if(status == NULL || U_FAILURE(*status)){ |
| 116 return 0; |
| 117 } |
| 118 |
| 119 UnicodeString stringArray[MAX_SPLIT_STRINGS]; |
| 120 RegexPattern *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *s
tatus); |
| 121 UnicodeString src(source, srcLen); |
| 122 |
| 123 if (U_FAILURE(*status)) { |
| 124 return 0; |
| 125 } |
| 126 pattern->split(src, stringArray,MAX_SPLIT_STRINGS , *status); |
| 127 |
| 128 if(stringArray[0].indexOf((UChar)AT_SIGN)==-1){ |
| 129 int32_t destLen = stringArray[0].extract(*dest, destCapacity, *status); |
| 130 return trim(*dest, destLen, status); |
| 131 } |
| 132 return 0; |
| 133 } |
| 134 |
| 135 U_CFUNC int32_t |
| 136 getCount(const UChar* source, int32_t srcLen, |
| 137 UParseCommentsOption option, UErrorCode *status){ |
| 138 |
| 139 if(status == NULL || U_FAILURE(*status)){ |
| 140 return 0; |
| 141 } |
| 142 |
| 143 UnicodeString stringArray[MAX_SPLIT_STRINGS]; |
| 144 RegexPattern *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *s
tatus); |
| 145 UnicodeString src (source, srcLen); |
| 146 |
| 147 |
| 148 if (U_FAILURE(*status)) { |
| 149 return 0; |
| 150 } |
| 151 int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status
); |
| 152 |
| 153 RegexMatcher matcher(patternStrings[option], UREGEX_DOTALL, *status); |
| 154 if (U_FAILURE(*status)) { |
| 155 return 0; |
| 156 } |
| 157 int32_t count = 0; |
| 158 for(int32_t i=0; i<retLen; i++){ |
| 159 matcher.reset(stringArray[i]); |
| 160 if(matcher.lookingAt(*status)){ |
| 161 count++; |
| 162 } |
| 163 } |
| 164 if(option == UPC_TRANSLATE && count > 1){ |
| 165 fprintf(stderr, "Multiple @translate tags cannot be supported.\n"); |
| 166 exit(U_UNSUPPORTED_ERROR); |
| 167 } |
| 168 return count; |
| 169 } |
| 170 |
| 171 U_CFUNC int32_t |
| 172 getAt(const UChar* source, int32_t srcLen, |
| 173 UChar** dest, int32_t destCapacity, |
| 174 int32_t index, |
| 175 UParseCommentsOption option, |
| 176 UErrorCode* status){ |
| 177 |
| 178 if(status == NULL || U_FAILURE(*status)){ |
| 179 return 0; |
| 180 } |
| 181 |
| 182 UnicodeString stringArray[MAX_SPLIT_STRINGS]; |
| 183 RegexPattern *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *s
tatus); |
| 184 UnicodeString src (source, srcLen); |
| 185 |
| 186 |
| 187 if (U_FAILURE(*status)) { |
| 188 return 0; |
| 189 } |
| 190 int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status
); |
| 191 |
| 192 RegexMatcher matcher(patternStrings[option], UREGEX_DOTALL, *status); |
| 193 if (U_FAILURE(*status)) { |
| 194 return 0; |
| 195 } |
| 196 int32_t count = 0; |
| 197 for(int32_t i=0; i<retLen; i++){ |
| 198 matcher.reset(stringArray[i]); |
| 199 if(matcher.lookingAt(*status)){ |
| 200 if(count == index){ |
| 201 UnicodeString out = matcher.group(1, *status); |
| 202 return out.extract(*dest, destCapacity,*status); |
| 203 } |
| 204 count++; |
| 205 |
| 206 } |
| 207 } |
| 208 return 0; |
| 209 |
| 210 } |
| 211 |
| 212 U_CFUNC int32_t |
| 213 getTranslate( const UChar* source, int32_t srcLen, |
| 214 UChar** dest, int32_t destCapacity, |
| 215 UErrorCode* status){ |
| 216 UnicodeString notePatternString = "^translate\\s*?(.*)"; |
| 217 |
| 218 int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternStr
ing, status); |
| 219 return trim(*dest, destLen, status); |
| 220 } |
| 221 |
| 222 U_CFUNC int32_t |
| 223 getNote(const UChar* source, int32_t srcLen, |
| 224 UChar** dest, int32_t destCapacity, |
| 225 UErrorCode* status){ |
| 226 |
| 227 UnicodeString notePatternString = "^note\\s*?(.*)"; |
| 228 int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternSt
ring, status); |
| 229 return trim(*dest, destLen, status); |
| 230 |
| 231 } |
| 232 |
| 233 #endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */ |
| 234 |
OLD | NEW |