Index: icu46/source/tools/genrb/prscmnts.cpp |
=================================================================== |
--- icu46/source/tools/genrb/prscmnts.cpp (revision 0) |
+++ icu46/source/tools/genrb/prscmnts.cpp (revision 0) |
@@ -0,0 +1,234 @@ |
+/* |
+ ******************************************************************************* |
+ * Copyright (C) 2003-2007, International Business Machines |
+ * Corporation and others. All Rights Reserved. |
+ ******************************************************************************* |
+ * |
+ * File prscmnts.cpp |
+ * |
+ * Modification History: |
+ * |
+ * Date Name Description |
+ * 08/22/2003 ram Creation. |
+ ******************************************************************************* |
+ */ |
+ |
+#include "unicode/regex.h" |
+#include "unicode/unistr.h" |
+#include "unicode/parseerr.h" |
+#include "prscmnts.h" |
+#include <stdio.h> |
+#include <stdlib.h> |
+ |
+U_NAMESPACE_USE |
+ |
+#if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when RegularExpressions not available */ |
+ |
+#define MAX_SPLIT_STRINGS 20 |
+ |
+const char *patternStrings[UPC_LIMIT]={ |
+ "^translate\\s*(.*)", |
+ "^note\\s*(.*)" |
+}; |
+ |
+U_CFUNC int32_t |
+removeText(UChar *source, int32_t srcLen, |
+ UnicodeString patString,uint32_t options, |
+ UnicodeString replaceText, UErrorCode *status){ |
+ |
+ if(status == NULL || U_FAILURE(*status)){ |
+ return 0; |
+ } |
+ |
+ UnicodeString src(source, srcLen); |
+ |
+ RegexMatcher myMatcher(patString, src, options, *status); |
+ if(U_FAILURE(*status)){ |
+ return 0; |
+ } |
+ UnicodeString dest; |
+ |
+ |
+ dest = myMatcher.replaceAll(replaceText,*status); |
+ |
+ |
+ return dest.extract(source, srcLen, *status); |
+ |
+} |
+U_CFUNC int32_t |
+trim(UChar *src, int32_t srcLen, UErrorCode *status){ |
+ srcLen = removeText(src, srcLen, "^[ \\r\\n]+ ", 0, "", status); // remove leading new lines |
+ srcLen = removeText(src, srcLen, "^\\s+", 0, "", status); // remove leading spaces |
+ srcLen = removeText(src, srcLen, "\\s+$", 0, "", status); // remvoe trailing spcaes |
+ return srcLen; |
+} |
+ |
+U_CFUNC int32_t |
+removeCmtText(UChar* source, int32_t srcLen, UErrorCode* status){ |
+ srcLen = trim(source, srcLen, status); |
+ UnicodeString patString = "^\\s*?\\*\\s*?"; // remove pattern like " * " at the begining of the line |
+ srcLen = removeText(source, srcLen, patString, UREGEX_MULTILINE, "", status); |
+ return removeText(source, srcLen, "[ \\r\\n]+", 0, " ", status);// remove new lines; |
+} |
+ |
+U_CFUNC int32_t |
+getText(const UChar* source, int32_t srcLen, |
+ UChar** dest, int32_t destCapacity, |
+ UnicodeString patternString, |
+ UErrorCode* status){ |
+ |
+ if(status == NULL || U_FAILURE(*status)){ |
+ return 0; |
+ } |
+ |
+ UnicodeString stringArray[MAX_SPLIT_STRINGS]; |
+ RegexPattern *pattern = RegexPattern::compile("@", 0, *status); |
+ UnicodeString src (source,srcLen); |
+ |
+ if (U_FAILURE(*status)) { |
+ return 0; |
+ } |
+ pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status); |
+ |
+ RegexMatcher matcher(patternString, UREGEX_DOTALL, *status); |
+ if (U_FAILURE(*status)) { |
+ return 0; |
+ } |
+ for(int32_t i=0; i<MAX_SPLIT_STRINGS; i++){ |
+ matcher.reset(stringArray[i]); |
+ if(matcher.lookingAt(*status)){ |
+ UnicodeString out = matcher.group(1, *status); |
+ |
+ return out.extract(*dest, destCapacity,*status); |
+ } |
+ } |
+ return 0; |
+} |
+ |
+ |
+#define AT_SIGN 0x0040 |
+ |
+U_CFUNC int32_t |
+getDescription( const UChar* source, int32_t srcLen, |
+ UChar** dest, int32_t destCapacity, |
+ UErrorCode* status){ |
+ if(status == NULL || U_FAILURE(*status)){ |
+ return 0; |
+ } |
+ |
+ UnicodeString stringArray[MAX_SPLIT_STRINGS]; |
+ RegexPattern *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *status); |
+ UnicodeString src(source, srcLen); |
+ |
+ if (U_FAILURE(*status)) { |
+ return 0; |
+ } |
+ pattern->split(src, stringArray,MAX_SPLIT_STRINGS , *status); |
+ |
+ if(stringArray[0].indexOf((UChar)AT_SIGN)==-1){ |
+ int32_t destLen = stringArray[0].extract(*dest, destCapacity, *status); |
+ return trim(*dest, destLen, status); |
+ } |
+ return 0; |
+} |
+ |
+U_CFUNC int32_t |
+getCount(const UChar* source, int32_t srcLen, |
+ UParseCommentsOption option, UErrorCode *status){ |
+ |
+ if(status == NULL || U_FAILURE(*status)){ |
+ return 0; |
+ } |
+ |
+ UnicodeString stringArray[MAX_SPLIT_STRINGS]; |
+ RegexPattern *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *status); |
+ UnicodeString src (source, srcLen); |
+ |
+ |
+ if (U_FAILURE(*status)) { |
+ return 0; |
+ } |
+ int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status); |
+ |
+ RegexMatcher matcher(patternStrings[option], UREGEX_DOTALL, *status); |
+ if (U_FAILURE(*status)) { |
+ return 0; |
+ } |
+ int32_t count = 0; |
+ for(int32_t i=0; i<retLen; i++){ |
+ matcher.reset(stringArray[i]); |
+ if(matcher.lookingAt(*status)){ |
+ count++; |
+ } |
+ } |
+ if(option == UPC_TRANSLATE && count > 1){ |
+ fprintf(stderr, "Multiple @translate tags cannot be supported.\n"); |
+ exit(U_UNSUPPORTED_ERROR); |
+ } |
+ return count; |
+} |
+ |
+U_CFUNC int32_t |
+getAt(const UChar* source, int32_t srcLen, |
+ UChar** dest, int32_t destCapacity, |
+ int32_t index, |
+ UParseCommentsOption option, |
+ UErrorCode* status){ |
+ |
+ if(status == NULL || U_FAILURE(*status)){ |
+ return 0; |
+ } |
+ |
+ UnicodeString stringArray[MAX_SPLIT_STRINGS]; |
+ RegexPattern *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *status); |
+ UnicodeString src (source, srcLen); |
+ |
+ |
+ if (U_FAILURE(*status)) { |
+ return 0; |
+ } |
+ int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status); |
+ |
+ RegexMatcher matcher(patternStrings[option], UREGEX_DOTALL, *status); |
+ if (U_FAILURE(*status)) { |
+ return 0; |
+ } |
+ int32_t count = 0; |
+ for(int32_t i=0; i<retLen; i++){ |
+ matcher.reset(stringArray[i]); |
+ if(matcher.lookingAt(*status)){ |
+ if(count == index){ |
+ UnicodeString out = matcher.group(1, *status); |
+ return out.extract(*dest, destCapacity,*status); |
+ } |
+ count++; |
+ |
+ } |
+ } |
+ return 0; |
+ |
+} |
+ |
+U_CFUNC int32_t |
+getTranslate( const UChar* source, int32_t srcLen, |
+ UChar** dest, int32_t destCapacity, |
+ UErrorCode* status){ |
+ UnicodeString notePatternString = "^translate\\s*?(.*)"; |
+ |
+ int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status); |
+ return trim(*dest, destLen, status); |
+} |
+ |
+U_CFUNC int32_t |
+getNote(const UChar* source, int32_t srcLen, |
+ UChar** dest, int32_t destCapacity, |
+ UErrorCode* status){ |
+ |
+ UnicodeString notePatternString = "^note\\s*?(.*)"; |
+ int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status); |
+ return trim(*dest, destLen, status); |
+ |
+} |
+ |
+#endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */ |
+ |
Property changes on: icu46/source/tools/genrb/prscmnts.cpp |
___________________________________________________________________ |
Added: svn:eol-style |
+ LF |