| Index: icu46/source/tools/genrb/prscmnts.cpp
|
| ===================================================================
|
| --- icu46/source/tools/genrb/prscmnts.cpp (revision 0)
|
| +++ icu46/source/tools/genrb/prscmnts.cpp (revision 0)
|
| @@ -0,0 +1,234 @@
|
| +/*
|
| + *******************************************************************************
|
| + * Copyright (C) 2003-2007, International Business Machines
|
| + * Corporation and others. All Rights Reserved.
|
| + *******************************************************************************
|
| + *
|
| + * File prscmnts.cpp
|
| + *
|
| + * Modification History:
|
| + *
|
| + * Date Name Description
|
| + * 08/22/2003 ram Creation.
|
| + *******************************************************************************
|
| + */
|
| +
|
| +#include "unicode/regex.h"
|
| +#include "unicode/unistr.h"
|
| +#include "unicode/parseerr.h"
|
| +#include "prscmnts.h"
|
| +#include <stdio.h>
|
| +#include <stdlib.h>
|
| +
|
| +U_NAMESPACE_USE
|
| +
|
| +#if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when RegularExpressions not available */
|
| +
|
| +#define MAX_SPLIT_STRINGS 20
|
| +
|
| +const char *patternStrings[UPC_LIMIT]={
|
| + "^translate\\s*(.*)",
|
| + "^note\\s*(.*)"
|
| +};
|
| +
|
| +U_CFUNC int32_t
|
| +removeText(UChar *source, int32_t srcLen,
|
| + UnicodeString patString,uint32_t options,
|
| + UnicodeString replaceText, UErrorCode *status){
|
| +
|
| + if(status == NULL || U_FAILURE(*status)){
|
| + return 0;
|
| + }
|
| +
|
| + UnicodeString src(source, srcLen);
|
| +
|
| + RegexMatcher myMatcher(patString, src, options, *status);
|
| + if(U_FAILURE(*status)){
|
| + return 0;
|
| + }
|
| + UnicodeString dest;
|
| +
|
| +
|
| + dest = myMatcher.replaceAll(replaceText,*status);
|
| +
|
| +
|
| + return dest.extract(source, srcLen, *status);
|
| +
|
| +}
|
| +U_CFUNC int32_t
|
| +trim(UChar *src, int32_t srcLen, UErrorCode *status){
|
| + srcLen = removeText(src, srcLen, "^[ \\r\\n]+ ", 0, "", status); // remove leading new lines
|
| + srcLen = removeText(src, srcLen, "^\\s+", 0, "", status); // remove leading spaces
|
| + srcLen = removeText(src, srcLen, "\\s+$", 0, "", status); // remvoe trailing spcaes
|
| + return srcLen;
|
| +}
|
| +
|
| +U_CFUNC int32_t
|
| +removeCmtText(UChar* source, int32_t srcLen, UErrorCode* status){
|
| + srcLen = trim(source, srcLen, status);
|
| + UnicodeString patString = "^\\s*?\\*\\s*?"; // remove pattern like " * " at the begining of the line
|
| + srcLen = removeText(source, srcLen, patString, UREGEX_MULTILINE, "", status);
|
| + return removeText(source, srcLen, "[ \\r\\n]+", 0, " ", status);// remove new lines;
|
| +}
|
| +
|
| +U_CFUNC int32_t
|
| +getText(const UChar* source, int32_t srcLen,
|
| + UChar** dest, int32_t destCapacity,
|
| + UnicodeString patternString,
|
| + UErrorCode* status){
|
| +
|
| + if(status == NULL || U_FAILURE(*status)){
|
| + return 0;
|
| + }
|
| +
|
| + UnicodeString stringArray[MAX_SPLIT_STRINGS];
|
| + RegexPattern *pattern = RegexPattern::compile("@", 0, *status);
|
| + UnicodeString src (source,srcLen);
|
| +
|
| + if (U_FAILURE(*status)) {
|
| + return 0;
|
| + }
|
| + pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
|
| +
|
| + RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
|
| + if (U_FAILURE(*status)) {
|
| + return 0;
|
| + }
|
| + for(int32_t i=0; i<MAX_SPLIT_STRINGS; i++){
|
| + matcher.reset(stringArray[i]);
|
| + if(matcher.lookingAt(*status)){
|
| + UnicodeString out = matcher.group(1, *status);
|
| +
|
| + return out.extract(*dest, destCapacity,*status);
|
| + }
|
| + }
|
| + return 0;
|
| +}
|
| +
|
| +
|
| +#define AT_SIGN 0x0040
|
| +
|
| +U_CFUNC int32_t
|
| +getDescription( const UChar* source, int32_t srcLen,
|
| + UChar** dest, int32_t destCapacity,
|
| + UErrorCode* status){
|
| + if(status == NULL || U_FAILURE(*status)){
|
| + return 0;
|
| + }
|
| +
|
| + UnicodeString stringArray[MAX_SPLIT_STRINGS];
|
| + RegexPattern *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *status);
|
| + UnicodeString src(source, srcLen);
|
| +
|
| + if (U_FAILURE(*status)) {
|
| + return 0;
|
| + }
|
| + pattern->split(src, stringArray,MAX_SPLIT_STRINGS , *status);
|
| +
|
| + if(stringArray[0].indexOf((UChar)AT_SIGN)==-1){
|
| + int32_t destLen = stringArray[0].extract(*dest, destCapacity, *status);
|
| + return trim(*dest, destLen, status);
|
| + }
|
| + return 0;
|
| +}
|
| +
|
| +U_CFUNC int32_t
|
| +getCount(const UChar* source, int32_t srcLen,
|
| + UParseCommentsOption option, UErrorCode *status){
|
| +
|
| + if(status == NULL || U_FAILURE(*status)){
|
| + return 0;
|
| + }
|
| +
|
| + UnicodeString stringArray[MAX_SPLIT_STRINGS];
|
| + RegexPattern *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *status);
|
| + UnicodeString src (source, srcLen);
|
| +
|
| +
|
| + if (U_FAILURE(*status)) {
|
| + return 0;
|
| + }
|
| + int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
|
| +
|
| + RegexMatcher matcher(patternStrings[option], UREGEX_DOTALL, *status);
|
| + if (U_FAILURE(*status)) {
|
| + return 0;
|
| + }
|
| + int32_t count = 0;
|
| + for(int32_t i=0; i<retLen; i++){
|
| + matcher.reset(stringArray[i]);
|
| + if(matcher.lookingAt(*status)){
|
| + count++;
|
| + }
|
| + }
|
| + if(option == UPC_TRANSLATE && count > 1){
|
| + fprintf(stderr, "Multiple @translate tags cannot be supported.\n");
|
| + exit(U_UNSUPPORTED_ERROR);
|
| + }
|
| + return count;
|
| +}
|
| +
|
| +U_CFUNC int32_t
|
| +getAt(const UChar* source, int32_t srcLen,
|
| + UChar** dest, int32_t destCapacity,
|
| + int32_t index,
|
| + UParseCommentsOption option,
|
| + UErrorCode* status){
|
| +
|
| + if(status == NULL || U_FAILURE(*status)){
|
| + return 0;
|
| + }
|
| +
|
| + UnicodeString stringArray[MAX_SPLIT_STRINGS];
|
| + RegexPattern *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *status);
|
| + UnicodeString src (source, srcLen);
|
| +
|
| +
|
| + if (U_FAILURE(*status)) {
|
| + return 0;
|
| + }
|
| + int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
|
| +
|
| + RegexMatcher matcher(patternStrings[option], UREGEX_DOTALL, *status);
|
| + if (U_FAILURE(*status)) {
|
| + return 0;
|
| + }
|
| + int32_t count = 0;
|
| + for(int32_t i=0; i<retLen; i++){
|
| + matcher.reset(stringArray[i]);
|
| + if(matcher.lookingAt(*status)){
|
| + if(count == index){
|
| + UnicodeString out = matcher.group(1, *status);
|
| + return out.extract(*dest, destCapacity,*status);
|
| + }
|
| + count++;
|
| +
|
| + }
|
| + }
|
| + return 0;
|
| +
|
| +}
|
| +
|
| +U_CFUNC int32_t
|
| +getTranslate( const UChar* source, int32_t srcLen,
|
| + UChar** dest, int32_t destCapacity,
|
| + UErrorCode* status){
|
| + UnicodeString notePatternString = "^translate\\s*?(.*)";
|
| +
|
| + int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status);
|
| + return trim(*dest, destLen, status);
|
| +}
|
| +
|
| +U_CFUNC int32_t
|
| +getNote(const UChar* source, int32_t srcLen,
|
| + UChar** dest, int32_t destCapacity,
|
| + UErrorCode* status){
|
| +
|
| + UnicodeString notePatternString = "^note\\s*?(.*)";
|
| + int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status);
|
| + return trim(*dest, destLen, status);
|
| +
|
| +}
|
| +
|
| +#endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */
|
| +
|
|
|
| Property changes on: icu46/source/tools/genrb/prscmnts.cpp
|
| ___________________________________________________________________
|
| Added: svn:eol-style
|
| + LF
|
|
|
|
|