| Index: icu46/source/i18n/uregex.cpp
|
| ===================================================================
|
| --- icu46/source/i18n/uregex.cpp (revision 0)
|
| +++ icu46/source/i18n/uregex.cpp (revision 0)
|
| @@ -0,0 +1,1910 @@
|
| +/*
|
| +*******************************************************************************
|
| +* Copyright (C) 2004-2010, International Business Machines
|
| +* Corporation and others. All Rights Reserved.
|
| +*******************************************************************************
|
| +* file name: regex.cpp
|
| +*/
|
| +
|
| +#include "unicode/utypes.h"
|
| +
|
| +#if !UCONFIG_NO_REGULAR_EXPRESSIONS
|
| +
|
| +#include "unicode/regex.h"
|
| +#include "unicode/uregex.h"
|
| +#include "unicode/unistr.h"
|
| +#include "unicode/ustring.h"
|
| +#include "unicode/uchar.h"
|
| +#include "unicode/uobject.h"
|
| +#include "umutex.h"
|
| +#include "uassert.h"
|
| +#include "cmemory.h"
|
| +
|
| +#include "regextxt.h"
|
| +
|
| +#include <stdio.h>
|
| +
|
| +U_NAMESPACE_BEGIN
|
| +
|
| +#define REMAINING_CAPACITY(idx,len) ((((len)-(idx))>0)?((len)-(idx)):0)
|
| +
|
| +struct RegularExpression: public UMemory {
|
| +public:
|
| + RegularExpression();
|
| + ~RegularExpression();
|
| + int32_t fMagic;
|
| + RegexPattern *fPat;
|
| + int32_t *fPatRefCount;
|
| + UChar *fPatString;
|
| + int32_t fPatStringLen;
|
| + RegexMatcher *fMatcher;
|
| + const UChar *fText; // Text from setText()
|
| + int32_t fTextLength; // Length provided by user with setText(), which
|
| + // may be -1.
|
| + UBool fOwnsText;
|
| +};
|
| +
|
| +static const int32_t REXP_MAGIC = 0x72657870; // "rexp" in ASCII
|
| +
|
| +RegularExpression::RegularExpression() {
|
| + fMagic = REXP_MAGIC;
|
| + fPat = NULL;
|
| + fPatRefCount = NULL;
|
| + fPatString = NULL;
|
| + fPatStringLen = 0;
|
| + fMatcher = NULL;
|
| + fText = NULL;
|
| + fTextLength = 0;
|
| + fOwnsText = FALSE;
|
| +}
|
| +
|
| +RegularExpression::~RegularExpression() {
|
| + delete fMatcher;
|
| + fMatcher = NULL;
|
| + if (fPatRefCount!=NULL && umtx_atomic_dec(fPatRefCount)==0) {
|
| + delete fPat;
|
| + uprv_free(fPatString);
|
| + uprv_free(fPatRefCount);
|
| + }
|
| + if (fOwnsText && fText!=NULL) {
|
| + uprv_free((void *)fText);
|
| + }
|
| + fMagic = 0;
|
| +}
|
| +
|
| +U_NAMESPACE_END
|
| +
|
| +U_NAMESPACE_USE
|
| +
|
| +//----------------------------------------------------------------------------------------
|
| +//
|
| +// validateRE Do boilerplate style checks on API function parameters.
|
| +// Return TRUE if they look OK.
|
| +//----------------------------------------------------------------------------------------
|
| +static UBool validateRE(const RegularExpression *re, UErrorCode *status, UBool requiresText = TRUE) {
|
| + if (U_FAILURE(*status)) {
|
| + return FALSE;
|
| + }
|
| + if (re == NULL || re->fMagic != REXP_MAGIC) {
|
| + *status = U_ILLEGAL_ARGUMENT_ERROR;
|
| + return FALSE;
|
| + }
|
| + // !!! Not sure how to update this with the new UText backing, which is stored in re->fMatcher anyway
|
| + if (requiresText && re->fText == NULL && !re->fOwnsText) {
|
| + *status = U_REGEX_INVALID_STATE;
|
| + return FALSE;
|
| + }
|
| + return TRUE;
|
| +}
|
| +
|
| +//----------------------------------------------------------------------------------------
|
| +//
|
| +// uregex_open
|
| +//
|
| +//----------------------------------------------------------------------------------------
|
| +U_CAPI URegularExpression * U_EXPORT2
|
| +uregex_open( const UChar *pattern,
|
| + int32_t patternLength,
|
| + uint32_t flags,
|
| + UParseError *pe,
|
| + UErrorCode *status) {
|
| +
|
| + if (U_FAILURE(*status)) {
|
| + return NULL;
|
| + }
|
| + if (pattern == NULL || patternLength < -1 || patternLength == 0) {
|
| + *status = U_ILLEGAL_ARGUMENT_ERROR;
|
| + return NULL;
|
| + }
|
| + int32_t actualPatLen = patternLength;
|
| + if (actualPatLen == -1) {
|
| + actualPatLen = u_strlen(pattern);
|
| + }
|
| +
|
| + RegularExpression *re = new RegularExpression;
|
| + int32_t *refC = (int32_t *)uprv_malloc(sizeof(int32_t));
|
| + UChar *patBuf = (UChar *)uprv_malloc(sizeof(UChar)*(actualPatLen+1));
|
| + if (re == NULL || refC == NULL || patBuf == NULL) {
|
| + *status = U_MEMORY_ALLOCATION_ERROR;
|
| + delete re;
|
| + uprv_free(refC);
|
| + uprv_free(patBuf);
|
| + return NULL;
|
| + }
|
| + re->fPatRefCount = refC;
|
| + *re->fPatRefCount = 1;
|
| +
|
| + //
|
| + // Make a copy of the pattern string, so we can return it later if asked.
|
| + // For compiling the pattern, we will use a UText wrapper around
|
| + // this local copy, to avoid making even more copies.
|
| + //
|
| + re->fPatString = patBuf;
|
| + re->fPatStringLen = patternLength;
|
| + u_memcpy(patBuf, pattern, actualPatLen);
|
| + patBuf[actualPatLen] = 0;
|
| +
|
| + UText patText = UTEXT_INITIALIZER;
|
| + utext_openUChars(&patText, patBuf, patternLength, status);
|
| +
|
| + //
|
| + // Compile the pattern
|
| + //
|
| + if (pe != NULL) {
|
| + re->fPat = RegexPattern::compile(&patText, flags, *pe, *status);
|
| + } else {
|
| + re->fPat = RegexPattern::compile(&patText, flags, *status);
|
| + }
|
| + utext_close(&patText);
|
| +
|
| + if (U_FAILURE(*status)) {
|
| + goto ErrorExit;
|
| + }
|
| +
|
| + //
|
| + // Create the matcher object
|
| + //
|
| + re->fMatcher = re->fPat->matcher(*status);
|
| + if (U_SUCCESS(*status)) {
|
| + return (URegularExpression*)re;
|
| + }
|
| +
|
| +ErrorExit:
|
| + delete re;
|
| + return NULL;
|
| +
|
| +}
|
| +
|
| +//----------------------------------------------------------------------------------------
|
| +//
|
| +// uregex_openUText
|
| +//
|
| +//----------------------------------------------------------------------------------------
|
| +U_CAPI URegularExpression * U_EXPORT2
|
| +uregex_openUText(UText *pattern,
|
| + uint32_t flags,
|
| + UParseError *pe,
|
| + UErrorCode *status) {
|
| +
|
| + if (U_FAILURE(*status)) {
|
| + return NULL;
|
| + }
|
| + if (pattern == NULL) {
|
| + *status = U_ILLEGAL_ARGUMENT_ERROR;
|
| + return NULL;
|
| + }
|
| +
|
| + int64_t patternNativeLength = utext_nativeLength(pattern);
|
| +
|
| + if (patternNativeLength == 0) {
|
| + *status = U_ILLEGAL_ARGUMENT_ERROR;
|
| + return NULL;
|
| + }
|
| +
|
| + RegularExpression *re = new RegularExpression;
|
| +
|
| + UErrorCode lengthStatus = U_ZERO_ERROR;
|
| + int32_t pattern16Length = utext_extract(pattern, 0, patternNativeLength, NULL, 0, &lengthStatus);
|
| +
|
| + int32_t *refC = (int32_t *)uprv_malloc(sizeof(int32_t));
|
| + UChar *patBuf = (UChar *)uprv_malloc(sizeof(UChar)*(pattern16Length+1));
|
| + if (re == NULL || refC == NULL || patBuf == NULL) {
|
| + *status = U_MEMORY_ALLOCATION_ERROR;
|
| + delete re;
|
| + uprv_free(refC);
|
| + uprv_free(patBuf);
|
| + return NULL;
|
| + }
|
| + re->fPatRefCount = refC;
|
| + *re->fPatRefCount = 1;
|
| +
|
| + //
|
| + // Make a copy of the pattern string, so we can return it later if asked.
|
| + // For compiling the pattern, we will use a read-only UText wrapper
|
| + // around this local copy, to avoid making even more copies.
|
| + //
|
| + re->fPatString = patBuf;
|
| + re->fPatStringLen = pattern16Length;
|
| + utext_extract(pattern, 0, patternNativeLength, patBuf, pattern16Length+1, status);
|
| +
|
| + UText patText = UTEXT_INITIALIZER;
|
| + utext_openUChars(&patText, patBuf, pattern16Length, status);
|
| +
|
| + //
|
| + // Compile the pattern
|
| + //
|
| + if (pe != NULL) {
|
| + re->fPat = RegexPattern::compile(&patText, flags, *pe, *status);
|
| + } else {
|
| + re->fPat = RegexPattern::compile(&patText, flags, *status);
|
| + }
|
| + utext_close(&patText);
|
| +
|
| + if (U_FAILURE(*status)) {
|
| + goto ErrorExit;
|
| + }
|
| +
|
| + //
|
| + // Create the matcher object
|
| + //
|
| + re->fMatcher = re->fPat->matcher(*status);
|
| + if (U_SUCCESS(*status)) {
|
| + return (URegularExpression*)re;
|
| + }
|
| +
|
| +ErrorExit:
|
| + delete re;
|
| + return NULL;
|
| +
|
| +}
|
| +
|
| +//----------------------------------------------------------------------------------------
|
| +//
|
| +// uregex_close
|
| +//
|
| +//----------------------------------------------------------------------------------------
|
| +U_CAPI void U_EXPORT2
|
| +uregex_close(URegularExpression *re2) {
|
| + RegularExpression *re = (RegularExpression*)re2;
|
| + UErrorCode status = U_ZERO_ERROR;
|
| + if (validateRE(re, &status, FALSE) == FALSE) {
|
| + return;
|
| + }
|
| + delete re;
|
| +}
|
| +
|
| +
|
| +//----------------------------------------------------------------------------------------
|
| +//
|
| +// uregex_clone
|
| +//
|
| +//----------------------------------------------------------------------------------------
|
| +U_CAPI URegularExpression * U_EXPORT2
|
| +uregex_clone(const URegularExpression *source2, UErrorCode *status) {
|
| + RegularExpression *source = (RegularExpression*)source2;
|
| + if (validateRE(source, status, FALSE) == FALSE) {
|
| + return NULL;
|
| + }
|
| +
|
| + RegularExpression *clone = new RegularExpression;
|
| + if (clone == NULL) {
|
| + *status = U_MEMORY_ALLOCATION_ERROR;
|
| + return NULL;
|
| + }
|
| +
|
| + clone->fMatcher = source->fPat->matcher(*status);
|
| + if (U_FAILURE(*status)) {
|
| + delete clone;
|
| + return NULL;
|
| + }
|
| +
|
| + clone->fPat = source->fPat;
|
| + clone->fPatRefCount = source->fPatRefCount;
|
| + clone->fPatString = source->fPatString;
|
| + clone->fPatStringLen = source->fPatStringLen;
|
| + umtx_atomic_inc(source->fPatRefCount);
|
| + // Note: fText is not cloned.
|
| +
|
| + return (URegularExpression*)clone;
|
| +}
|
| +
|
| +
|
| +
|
| +
|
| +//------------------------------------------------------------------------------
|
| +//
|
| +// uregex_pattern
|
| +//
|
| +//------------------------------------------------------------------------------
|
| +U_CAPI const UChar * U_EXPORT2
|
| +uregex_pattern(const URegularExpression *regexp2,
|
| + int32_t *patLength,
|
| + UErrorCode *status) {
|
| + RegularExpression *regexp = (RegularExpression*)regexp2;
|
| +
|
| + if (validateRE(regexp, status, FALSE) == FALSE) {
|
| + return NULL;
|
| + }
|
| + if (patLength != NULL) {
|
| + *patLength = regexp->fPatStringLen;
|
| + }
|
| + return regexp->fPatString;
|
| +}
|
| +
|
| +
|
| +//------------------------------------------------------------------------------
|
| +//
|
| +// uregex_patternUText
|
| +//
|
| +//------------------------------------------------------------------------------
|
| +U_CAPI UText * U_EXPORT2
|
| +uregex_patternUText(const URegularExpression *regexp2,
|
| + UErrorCode *status) {
|
| + RegularExpression *regexp = (RegularExpression*)regexp2;
|
| + return regexp->fPat->patternText(*status);
|
| +}
|
| +
|
| +
|
| +//------------------------------------------------------------------------------
|
| +//
|
| +// uregex_flags
|
| +//
|
| +//------------------------------------------------------------------------------
|
| +U_CAPI int32_t U_EXPORT2
|
| +uregex_flags(const URegularExpression *regexp2, UErrorCode *status) {
|
| + RegularExpression *regexp = (RegularExpression*)regexp2;
|
| + if (validateRE(regexp, status, FALSE) == FALSE) {
|
| + return 0;
|
| + }
|
| + int32_t flags = regexp->fPat->flags();
|
| + return flags;
|
| +}
|
| +
|
| +
|
| +//------------------------------------------------------------------------------
|
| +//
|
| +// uregex_setText
|
| +//
|
| +//------------------------------------------------------------------------------
|
| +U_CAPI void U_EXPORT2
|
| +uregex_setText(URegularExpression *regexp2,
|
| + const UChar *text,
|
| + int32_t textLength,
|
| + UErrorCode *status) {
|
| + RegularExpression *regexp = (RegularExpression*)regexp2;
|
| + if (validateRE(regexp, status, FALSE) == FALSE) {
|
| + return;
|
| + }
|
| + if (text == NULL || textLength < -1) {
|
| + *status = U_ILLEGAL_ARGUMENT_ERROR;
|
| + return;
|
| + }
|
| +
|
| + if (regexp->fOwnsText && regexp->fText != NULL) {
|
| + uprv_free((void *)regexp->fText);
|
| + }
|
| +
|
| + regexp->fText = text;
|
| + regexp->fTextLength = textLength;
|
| + regexp->fOwnsText = FALSE;
|
| +
|
| + UText input = UTEXT_INITIALIZER;
|
| + utext_openUChars(&input, text, textLength, status);
|
| + regexp->fMatcher->reset(&input);
|
| + utext_close(&input); // reset() made a shallow clone, so we don't need this copy
|
| +}
|
| +
|
| +
|
| +//------------------------------------------------------------------------------
|
| +//
|
| +// uregex_setUText
|
| +//
|
| +//------------------------------------------------------------------------------
|
| +U_CAPI void U_EXPORT2
|
| +uregex_setUText(URegularExpression *regexp2,
|
| + UText *text,
|
| + UErrorCode *status) {
|
| + RegularExpression *regexp = (RegularExpression*)regexp2;
|
| + if (validateRE(regexp, status, FALSE) == FALSE) {
|
| + return;
|
| + }
|
| + if (text == NULL) {
|
| + *status = U_ILLEGAL_ARGUMENT_ERROR;
|
| + return;
|
| + }
|
| +
|
| + if (regexp->fOwnsText && regexp->fText != NULL) {
|
| + uprv_free((void *)regexp->fText);
|
| + }
|
| +
|
| + regexp->fText = NULL; // only fill it in on request
|
| + regexp->fTextLength = -1;
|
| + regexp->fOwnsText = TRUE;
|
| + regexp->fMatcher->reset(text);
|
| +}
|
| +
|
| +
|
| +
|
| +//------------------------------------------------------------------------------
|
| +//
|
| +// uregex_getText
|
| +//
|
| +//------------------------------------------------------------------------------
|
| +U_CAPI const UChar * U_EXPORT2
|
| +uregex_getText(URegularExpression *regexp2,
|
| + int32_t *textLength,
|
| + UErrorCode *status) {
|
| + RegularExpression *regexp = (RegularExpression*)regexp2;
|
| + if (validateRE(regexp, status, FALSE) == FALSE) {
|
| + return NULL;
|
| + }
|
| +
|
| + if (regexp->fText == NULL) {
|
| + // need to fill in the text
|
| + UText *inputText = regexp->fMatcher->inputText();
|
| + int64_t inputNativeLength = utext_nativeLength(inputText);
|
| + if (UTEXT_FULL_TEXT_IN_CHUNK(inputText, inputNativeLength)) {
|
| + regexp->fText = inputText->chunkContents;
|
| + regexp->fTextLength = (int32_t)inputNativeLength;
|
| + regexp->fOwnsText = FALSE; // because the UText owns it
|
| + } else {
|
| + UErrorCode lengthStatus = U_ZERO_ERROR;
|
| + regexp->fTextLength = utext_extract(inputText, 0, inputNativeLength, NULL, 0, &lengthStatus); // buffer overflow error
|
| + UChar *inputChars = (UChar *)uprv_malloc(sizeof(UChar)*(regexp->fTextLength+1));
|
| +
|
| + utext_extract(inputText, 0, inputNativeLength, inputChars, regexp->fTextLength+1, status);
|
| + regexp->fText = inputChars;
|
| + regexp->fOwnsText = TRUE; // should already be set but just in case
|
| + }
|
| + }
|
| +
|
| + if (textLength != NULL) {
|
| + *textLength = regexp->fTextLength;
|
| + }
|
| + return regexp->fText;
|
| +}
|
| +
|
| +
|
| +//------------------------------------------------------------------------------
|
| +//
|
| +// uregex_getUText
|
| +//
|
| +//------------------------------------------------------------------------------
|
| +U_CAPI UText * U_EXPORT2
|
| +uregex_getUText(URegularExpression *regexp2,
|
| + UText *dest,
|
| + UErrorCode *status) {
|
| + RegularExpression *regexp = (RegularExpression*)regexp2;
|
| + if (validateRE(regexp, status, FALSE) == FALSE) {
|
| + return dest;
|
| + }
|
| + return regexp->fMatcher->getInput(dest, *status);
|
| +}
|
| +
|
| +
|
| +//------------------------------------------------------------------------------
|
| +//
|
| +// uregex_matches
|
| +//
|
| +//------------------------------------------------------------------------------
|
| +U_CAPI UBool U_EXPORT2
|
| +uregex_matches(URegularExpression *regexp2,
|
| + int32_t startIndex,
|
| + UErrorCode *status) {
|
| + return uregex_matches64( regexp2, (int64_t)startIndex, status);
|
| +}
|
| +
|
| +U_CAPI UBool U_EXPORT2
|
| +uregex_matches64(URegularExpression *regexp2,
|
| + int64_t startIndex,
|
| + UErrorCode *status) {
|
| + RegularExpression *regexp = (RegularExpression*)regexp2;
|
| + UBool result = FALSE;
|
| + if (validateRE(regexp, status) == FALSE) {
|
| + return result;
|
| + }
|
| + if (startIndex == -1) {
|
| + result = regexp->fMatcher->matches(*status);
|
| + } else {
|
| + result = regexp->fMatcher->matches(startIndex, *status);
|
| + }
|
| + return result;
|
| +}
|
| +
|
| +
|
| +//------------------------------------------------------------------------------
|
| +//
|
| +// uregex_lookingAt
|
| +//
|
| +//------------------------------------------------------------------------------
|
| +U_CAPI UBool U_EXPORT2
|
| +uregex_lookingAt(URegularExpression *regexp2,
|
| + int32_t startIndex,
|
| + UErrorCode *status) {
|
| + return uregex_lookingAt64( regexp2, (int64_t)startIndex, status);
|
| +}
|
| +
|
| +U_CAPI UBool U_EXPORT2
|
| +uregex_lookingAt64(URegularExpression *regexp2,
|
| + int64_t startIndex,
|
| + UErrorCode *status) {
|
| + RegularExpression *regexp = (RegularExpression*)regexp2;
|
| + UBool result = FALSE;
|
| + if (validateRE(regexp, status) == FALSE) {
|
| + return result;
|
| + }
|
| + if (startIndex == -1) {
|
| + result = regexp->fMatcher->lookingAt(*status);
|
| + } else {
|
| + result = regexp->fMatcher->lookingAt(startIndex, *status);
|
| + }
|
| + return result;
|
| +}
|
| +
|
| +
|
| +
|
| +//------------------------------------------------------------------------------
|
| +//
|
| +// uregex_find
|
| +//
|
| +//------------------------------------------------------------------------------
|
| +U_CAPI UBool U_EXPORT2
|
| +uregex_find(URegularExpression *regexp2,
|
| + int32_t startIndex,
|
| + UErrorCode *status) {
|
| + return uregex_find64( regexp2, (int64_t)startIndex, status);
|
| +}
|
| +
|
| +U_CAPI UBool U_EXPORT2
|
| +uregex_find64(URegularExpression *regexp2,
|
| + int64_t startIndex,
|
| + UErrorCode *status) {
|
| + RegularExpression *regexp = (RegularExpression*)regexp2;
|
| + UBool result = FALSE;
|
| + if (validateRE(regexp, status) == FALSE) {
|
| + return result;
|
| + }
|
| + if (startIndex == -1) {
|
| + regexp->fMatcher->resetPreserveRegion();
|
| + result = regexp->fMatcher->find();
|
| + } else {
|
| + result = regexp->fMatcher->find(startIndex, *status);
|
| + }
|
| + return result;
|
| +}
|
| +
|
| +
|
| +//------------------------------------------------------------------------------
|
| +//
|
| +// uregex_findNext
|
| +//
|
| +//------------------------------------------------------------------------------
|
| +U_CAPI UBool U_EXPORT2
|
| +uregex_findNext(URegularExpression *regexp2,
|
| + UErrorCode *status) {
|
| + RegularExpression *regexp = (RegularExpression*)regexp2;
|
| + if (validateRE(regexp, status) == FALSE) {
|
| + return FALSE;
|
| + }
|
| + UBool result = regexp->fMatcher->find();
|
| + return result;
|
| +}
|
| +
|
| +//------------------------------------------------------------------------------
|
| +//
|
| +// uregex_groupCount
|
| +//
|
| +//------------------------------------------------------------------------------
|
| +U_CAPI int32_t U_EXPORT2
|
| +uregex_groupCount(URegularExpression *regexp2,
|
| + UErrorCode *status) {
|
| + RegularExpression *regexp = (RegularExpression*)regexp2;
|
| + if (validateRE(regexp, status, FALSE) == FALSE) {
|
| + return 0;
|
| + }
|
| + int32_t result = regexp->fMatcher->groupCount();
|
| + return result;
|
| +}
|
| +
|
| +
|
| +//------------------------------------------------------------------------------
|
| +//
|
| +// uregex_group
|
| +//
|
| +//------------------------------------------------------------------------------
|
| +U_CAPI int32_t U_EXPORT2
|
| +uregex_group(URegularExpression *regexp2,
|
| + int32_t groupNum,
|
| + UChar *dest,
|
| + int32_t destCapacity,
|
| + UErrorCode *status) {
|
| + RegularExpression *regexp = (RegularExpression*)regexp2;
|
| + if (validateRE(regexp, status) == FALSE) {
|
| + return 0;
|
| + }
|
| + if (destCapacity < 0 || (destCapacity > 0 && dest == NULL)) {
|
| + *status = U_ILLEGAL_ARGUMENT_ERROR;
|
| + return 0;
|
| + }
|
| +
|
| + if (destCapacity == 0 || regexp->fText != NULL) {
|
| + // If preflighting or if we already have the text as UChars,
|
| + // this is a little cheaper than going through uregex_groupUTextDeep()
|
| +
|
| + //
|
| + // Pick up the range of characters from the matcher
|
| + //
|
| + int32_t startIx = regexp->fMatcher->start(groupNum, *status);
|
| + int32_t endIx = regexp->fMatcher->end (groupNum, *status);
|
| + if (U_FAILURE(*status)) {
|
| + return 0;
|
| + }
|
| +
|
| + //
|
| + // Trim length based on buffer capacity
|
| + //
|
| + int32_t fullLength = endIx - startIx;
|
| + int32_t copyLength = fullLength;
|
| + if (copyLength < destCapacity) {
|
| + dest[copyLength] = 0;
|
| + } else if (copyLength == destCapacity) {
|
| + *status = U_STRING_NOT_TERMINATED_WARNING;
|
| + } else {
|
| + copyLength = destCapacity;
|
| + *status = U_BUFFER_OVERFLOW_ERROR;
|
| + }
|
| +
|
| + //
|
| + // Copy capture group to user's buffer
|
| + //
|
| + if (copyLength > 0) {
|
| + u_memcpy(dest, ®exp->fText[startIx], copyLength);
|
| + }
|
| + return fullLength;
|
| + } else {
|
| + UText *groupText = uregex_groupUTextDeep(regexp2, groupNum, NULL, status);
|
| + int32_t result = utext_extract(groupText, 0, utext_nativeLength(groupText), dest, destCapacity, status);
|
| + utext_close(groupText);
|
| + return result;
|
| + }
|
| +}
|
| +
|
| +
|
| +//------------------------------------------------------------------------------
|
| +//
|
| +// uregex_groupUText
|
| +//
|
| +//------------------------------------------------------------------------------
|
| +U_CAPI UText * U_EXPORT2
|
| +uregex_groupUText(URegularExpression *regexp2,
|
| + int32_t groupNum,
|
| + UText *dest,
|
| + int64_t *groupLength,
|
| + UErrorCode *status) {
|
| + RegularExpression *regexp = (RegularExpression*)regexp2;
|
| + if (validateRE(regexp, status) == FALSE) {
|
| + UErrorCode emptyTextStatus = U_ZERO_ERROR;
|
| + return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus));
|
| + }
|
| +
|
| + return regexp->fMatcher->group(groupNum, dest, *groupLength, *status);
|
| +}
|
| +
|
| +//------------------------------------------------------------------------------
|
| +//
|
| +// uregex_groupUTextDeep
|
| +//
|
| +//------------------------------------------------------------------------------
|
| +U_CAPI UText * U_EXPORT2
|
| +uregex_groupUTextDeep(URegularExpression *regexp2,
|
| + int32_t groupNum,
|
| + UText *dest,
|
| + UErrorCode *status) {
|
| + RegularExpression *regexp = (RegularExpression*)regexp2;
|
| + if (validateRE(regexp, status) == FALSE) {
|
| + UErrorCode emptyTextStatus = U_ZERO_ERROR;
|
| + return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus));
|
| + }
|
| +
|
| + if (regexp->fText != NULL) {
|
| + //
|
| + // Pick up the range of characters from the matcher
|
| + // and use our already-extracted characters
|
| + //
|
| + int32_t startIx = regexp->fMatcher->start(groupNum, *status);
|
| + int32_t endIx = regexp->fMatcher->end (groupNum, *status);
|
| + if (U_FAILURE(*status)) {
|
| + UErrorCode emptyTextStatus = U_ZERO_ERROR;
|
| + return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus));
|
| + }
|
| +
|
| + if (dest) {
|
| + utext_replace(dest, 0, utext_nativeLength(dest), ®exp->fText[startIx], endIx - startIx, status);
|
| + } else {
|
| + UText groupText = UTEXT_INITIALIZER;
|
| + utext_openUChars(&groupText, ®exp->fText[startIx], endIx - startIx, status);
|
| + dest = utext_clone(NULL, &groupText, TRUE, FALSE, status);
|
| + utext_close(&groupText);
|
| + }
|
| +
|
| + return dest;
|
| + } else {
|
| + return regexp->fMatcher->group(groupNum, dest, *status);
|
| + }
|
| +}
|
| +
|
| +//------------------------------------------------------------------------------
|
| +//
|
| +// uregex_start
|
| +//
|
| +//------------------------------------------------------------------------------
|
| +U_CAPI int32_t U_EXPORT2
|
| +uregex_start(URegularExpression *regexp2,
|
| + int32_t groupNum,
|
| + UErrorCode *status) {
|
| + return (int32_t)uregex_start64( regexp2, groupNum, status);
|
| +}
|
| +
|
| +U_CAPI int64_t U_EXPORT2
|
| +uregex_start64(URegularExpression *regexp2,
|
| + int32_t groupNum,
|
| + UErrorCode *status) {
|
| + RegularExpression *regexp = (RegularExpression*)regexp2;
|
| + if (validateRE(regexp, status) == FALSE) {
|
| + return 0;
|
| + }
|
| + int32_t result = regexp->fMatcher->start(groupNum, *status);
|
| + return result;
|
| +}
|
| +
|
| +//------------------------------------------------------------------------------
|
| +//
|
| +// uregex_end
|
| +//
|
| +//------------------------------------------------------------------------------
|
| +U_CAPI int32_t U_EXPORT2
|
| +uregex_end(URegularExpression *regexp2,
|
| + int32_t groupNum,
|
| + UErrorCode *status) {
|
| + return (int32_t)uregex_end64( regexp2, groupNum, status);
|
| +}
|
| +
|
| +U_CAPI int64_t U_EXPORT2
|
| +uregex_end64(URegularExpression *regexp2,
|
| + int32_t groupNum,
|
| + UErrorCode *status) {
|
| + RegularExpression *regexp = (RegularExpression*)regexp2;
|
| + if (validateRE(regexp, status) == FALSE) {
|
| + return 0;
|
| + }
|
| + int32_t result = regexp->fMatcher->end(groupNum, *status);
|
| + return result;
|
| +}
|
| +
|
| +//------------------------------------------------------------------------------
|
| +//
|
| +// uregex_reset
|
| +//
|
| +//------------------------------------------------------------------------------
|
| +U_CAPI void U_EXPORT2
|
| +uregex_reset(URegularExpression *regexp2,
|
| + int32_t index,
|
| + UErrorCode *status) {
|
| + uregex_reset64( regexp2, (int64_t)index, status);
|
| +}
|
| +
|
| +U_CAPI void U_EXPORT2
|
| +uregex_reset64(URegularExpression *regexp2,
|
| + int64_t index,
|
| + UErrorCode *status) {
|
| + RegularExpression *regexp = (RegularExpression*)regexp2;
|
| + if (validateRE(regexp, status) == FALSE) {
|
| + return;
|
| + }
|
| + regexp->fMatcher->reset(index, *status);
|
| +}
|
| +
|
| +
|
| +//------------------------------------------------------------------------------
|
| +//
|
| +// uregex_setRegion
|
| +//
|
| +//------------------------------------------------------------------------------
|
| +U_CAPI void U_EXPORT2
|
| +uregex_setRegion(URegularExpression *regexp2,
|
| + int32_t regionStart,
|
| + int32_t regionLimit,
|
| + UErrorCode *status) {
|
| + uregex_setRegion64( regexp2, (int64_t)regionStart, (int64_t)regionLimit, status);
|
| +}
|
| +
|
| +U_CAPI void U_EXPORT2
|
| +uregex_setRegion64(URegularExpression *regexp2,
|
| + int64_t regionStart,
|
| + int64_t regionLimit,
|
| + UErrorCode *status) {
|
| + RegularExpression *regexp = (RegularExpression*)regexp2;
|
| + if (validateRE(regexp, status) == FALSE) {
|
| + return;
|
| + }
|
| + regexp->fMatcher->region(regionStart, regionLimit, *status);
|
| +}
|
| +
|
| +
|
| +//------------------------------------------------------------------------------
|
| +//
|
| +// uregex_setRegionAndStart
|
| +//
|
| +//------------------------------------------------------------------------------
|
| +U_DRAFT void U_EXPORT2
|
| +uregex_setRegionAndStart(URegularExpression *regexp2,
|
| + int64_t regionStart,
|
| + int64_t regionLimit,
|
| + int64_t startIndex,
|
| + UErrorCode *status) {
|
| + RegularExpression *regexp = (RegularExpression*)regexp2;
|
| + if (validateRE(regexp, status) == FALSE) {
|
| + return;
|
| + }
|
| + regexp->fMatcher->region(regionStart, regionLimit, startIndex, *status);
|
| +}
|
| +
|
| +//------------------------------------------------------------------------------
|
| +//
|
| +// uregex_regionStart
|
| +//
|
| +//------------------------------------------------------------------------------
|
| +U_CAPI int32_t U_EXPORT2
|
| +uregex_regionStart(const URegularExpression *regexp2,
|
| + UErrorCode *status) {
|
| + return (int32_t)uregex_regionStart64(regexp2, status);
|
| +}
|
| +
|
| +U_CAPI int64_t U_EXPORT2
|
| +uregex_regionStart64(const URegularExpression *regexp2,
|
| + UErrorCode *status) {
|
| + RegularExpression *regexp = (RegularExpression*)regexp2;
|
| + if (validateRE(regexp, status) == FALSE) {
|
| + return 0;
|
| + }
|
| + return regexp->fMatcher->regionStart();
|
| +}
|
| +
|
| +
|
| +//------------------------------------------------------------------------------
|
| +//
|
| +// uregex_regionEnd
|
| +//
|
| +//------------------------------------------------------------------------------
|
| +U_CAPI int32_t U_EXPORT2
|
| +uregex_regionEnd(const URegularExpression *regexp2,
|
| + UErrorCode *status) {
|
| + return (int32_t)uregex_regionEnd64(regexp2, status);
|
| +}
|
| +
|
| +U_CAPI int64_t U_EXPORT2
|
| +uregex_regionEnd64(const URegularExpression *regexp2,
|
| + UErrorCode *status) {
|
| + RegularExpression *regexp = (RegularExpression*)regexp2;
|
| + if (validateRE(regexp, status) == FALSE) {
|
| + return 0;
|
| + }
|
| + return regexp->fMatcher->regionEnd();
|
| +}
|
| +
|
| +
|
| +//------------------------------------------------------------------------------
|
| +//
|
| +// uregex_hasTransparentBounds
|
| +//
|
| +//------------------------------------------------------------------------------
|
| +U_CAPI UBool U_EXPORT2
|
| +uregex_hasTransparentBounds(const URegularExpression *regexp2,
|
| + UErrorCode *status) {
|
| + RegularExpression *regexp = (RegularExpression*)regexp2;
|
| + if (validateRE(regexp, status) == FALSE) {
|
| + return FALSE;
|
| + }
|
| + return regexp->fMatcher->hasTransparentBounds();
|
| +}
|
| +
|
| +
|
| +//------------------------------------------------------------------------------
|
| +//
|
| +// uregex_useTransparentBounds
|
| +//
|
| +//------------------------------------------------------------------------------
|
| +U_CAPI void U_EXPORT2
|
| +uregex_useTransparentBounds(URegularExpression *regexp2,
|
| + UBool b,
|
| + UErrorCode *status) {
|
| + RegularExpression *regexp = (RegularExpression*)regexp2;
|
| + if (validateRE(regexp, status) == FALSE) {
|
| + return;
|
| + }
|
| + regexp->fMatcher->useTransparentBounds(b);
|
| +}
|
| +
|
| +
|
| +//------------------------------------------------------------------------------
|
| +//
|
| +// uregex_hasAnchoringBounds
|
| +//
|
| +//------------------------------------------------------------------------------
|
| +U_CAPI UBool U_EXPORT2
|
| +uregex_hasAnchoringBounds(const URegularExpression *regexp2,
|
| + UErrorCode *status) {
|
| + RegularExpression *regexp = (RegularExpression*)regexp2;
|
| + if (validateRE(regexp, status) == FALSE) {
|
| + return FALSE;
|
| + }
|
| + return regexp->fMatcher->hasAnchoringBounds();
|
| +}
|
| +
|
| +
|
| +//------------------------------------------------------------------------------
|
| +//
|
| +// uregex_useAnchoringBounds
|
| +//
|
| +//------------------------------------------------------------------------------
|
| +U_CAPI void U_EXPORT2
|
| +uregex_useAnchoringBounds(URegularExpression *regexp2,
|
| + UBool b,
|
| + UErrorCode *status) {
|
| + RegularExpression *regexp = (RegularExpression*)regexp2;
|
| + if (validateRE(regexp, status) == FALSE) {
|
| + return;
|
| + }
|
| + regexp->fMatcher->useAnchoringBounds(b);
|
| +}
|
| +
|
| +
|
| +//------------------------------------------------------------------------------
|
| +//
|
| +// uregex_hitEnd
|
| +//
|
| +//------------------------------------------------------------------------------
|
| +U_CAPI UBool U_EXPORT2
|
| +uregex_hitEnd(const URegularExpression *regexp2,
|
| + UErrorCode *status) {
|
| + RegularExpression *regexp = (RegularExpression*)regexp2;
|
| + if (validateRE(regexp, status) == FALSE) {
|
| + return FALSE;
|
| + }
|
| + return regexp->fMatcher->hitEnd();
|
| +}
|
| +
|
| +
|
| +//------------------------------------------------------------------------------
|
| +//
|
| +// uregex_requireEnd
|
| +//
|
| +//------------------------------------------------------------------------------
|
| +U_CAPI UBool U_EXPORT2
|
| +uregex_requireEnd(const URegularExpression *regexp2,
|
| + UErrorCode *status) {
|
| + RegularExpression *regexp = (RegularExpression*)regexp2;
|
| + if (validateRE(regexp, status) == FALSE) {
|
| + return FALSE;
|
| + }
|
| + return regexp->fMatcher->requireEnd();
|
| +}
|
| +
|
| +
|
| +//------------------------------------------------------------------------------
|
| +//
|
| +// uregex_setTimeLimit
|
| +//
|
| +//------------------------------------------------------------------------------
|
| +U_CAPI void U_EXPORT2
|
| +uregex_setTimeLimit(URegularExpression *regexp2,
|
| + int32_t limit,
|
| + UErrorCode *status) {
|
| + RegularExpression *regexp = (RegularExpression*)regexp2;
|
| + if (validateRE(regexp, status)) {
|
| + regexp->fMatcher->setTimeLimit(limit, *status);
|
| + }
|
| +}
|
| +
|
| +
|
| +
|
| +//------------------------------------------------------------------------------
|
| +//
|
| +// uregex_getTimeLimit
|
| +//
|
| +//------------------------------------------------------------------------------
|
| +U_CAPI int32_t U_EXPORT2
|
| +uregex_getTimeLimit(const URegularExpression *regexp2,
|
| + UErrorCode *status) {
|
| + int32_t retVal = 0;
|
| + RegularExpression *regexp = (RegularExpression*)regexp2;
|
| + if (validateRE(regexp, status)) {
|
| + retVal = regexp->fMatcher->getTimeLimit();
|
| + }
|
| + return retVal;
|
| +}
|
| +
|
| +
|
| +
|
| +//------------------------------------------------------------------------------
|
| +//
|
| +// uregex_setStackLimit
|
| +//
|
| +//------------------------------------------------------------------------------
|
| +U_CAPI void U_EXPORT2
|
| +uregex_setStackLimit(URegularExpression *regexp2,
|
| + int32_t limit,
|
| + UErrorCode *status) {
|
| + RegularExpression *regexp = (RegularExpression*)regexp2;
|
| + if (validateRE(regexp, status)) {
|
| + regexp->fMatcher->setStackLimit(limit, *status);
|
| + }
|
| +}
|
| +
|
| +
|
| +
|
| +//------------------------------------------------------------------------------
|
| +//
|
| +// uregex_getStackLimit
|
| +//
|
| +//------------------------------------------------------------------------------
|
| +U_CAPI int32_t U_EXPORT2
|
| +uregex_getStackLimit(const URegularExpression *regexp2,
|
| + UErrorCode *status) {
|
| + int32_t retVal = 0;
|
| + RegularExpression *regexp = (RegularExpression*)regexp2;
|
| + if (validateRE(regexp, status)) {
|
| + retVal = regexp->fMatcher->getStackLimit();
|
| + }
|
| + return retVal;
|
| +}
|
| +
|
| +
|
| +//------------------------------------------------------------------------------
|
| +//
|
| +// uregex_setMatchCallback
|
| +//
|
| +//------------------------------------------------------------------------------
|
| +U_CAPI void U_EXPORT2
|
| +uregex_setMatchCallback(URegularExpression *regexp2,
|
| + URegexMatchCallback *callback,
|
| + const void *context,
|
| + UErrorCode *status) {
|
| + RegularExpression *regexp = (RegularExpression*)regexp2;
|
| + if (validateRE(regexp, status)) {
|
| + regexp->fMatcher->setMatchCallback(callback, context, *status);
|
| + }
|
| +}
|
| +
|
| +
|
| +//------------------------------------------------------------------------------
|
| +//
|
| +// uregex_getMatchCallback
|
| +//
|
| +//------------------------------------------------------------------------------
|
| +U_CAPI void U_EXPORT2
|
| +uregex_getMatchCallback(const URegularExpression *regexp2,
|
| + URegexMatchCallback **callback,
|
| + const void **context,
|
| + UErrorCode *status) {
|
| + RegularExpression *regexp = (RegularExpression*)regexp2;
|
| + if (validateRE(regexp, status)) {
|
| + regexp->fMatcher->getMatchCallback(*callback, *context, *status);
|
| + }
|
| +}
|
| +
|
| +
|
| +//------------------------------------------------------------------------------
|
| +//
|
| +// uregex_setMatchProgressCallback
|
| +//
|
| +//------------------------------------------------------------------------------
|
| +U_CAPI void U_EXPORT2
|
| +uregex_setFindProgressCallback(URegularExpression *regexp2,
|
| + URegexFindProgressCallback *callback,
|
| + const void *context,
|
| + UErrorCode *status) {
|
| + RegularExpression *regexp = (RegularExpression*)regexp2;
|
| + if (validateRE(regexp, status)) {
|
| + regexp->fMatcher->setFindProgressCallback(callback, context, *status);
|
| + }
|
| +}
|
| +
|
| +
|
| +//------------------------------------------------------------------------------
|
| +//
|
| +// uregex_getMatchCallback
|
| +//
|
| +//------------------------------------------------------------------------------
|
| +U_CAPI void U_EXPORT2
|
| +uregex_getFindProgressCallback(const URegularExpression *regexp2,
|
| + URegexFindProgressCallback **callback,
|
| + const void **context,
|
| + UErrorCode *status) {
|
| + RegularExpression *regexp = (RegularExpression*)regexp2;
|
| + if (validateRE(regexp, status)) {
|
| + regexp->fMatcher->getFindProgressCallback(*callback, *context, *status);
|
| + }
|
| +}
|
| +
|
| +
|
| +//------------------------------------------------------------------------------
|
| +//
|
| +// uregex_replaceAll
|
| +//
|
| +//------------------------------------------------------------------------------
|
| +U_CAPI int32_t U_EXPORT2
|
| +uregex_replaceAll(URegularExpression *regexp2,
|
| + const UChar *replacementText,
|
| + int32_t replacementLength,
|
| + UChar *destBuf,
|
| + int32_t destCapacity,
|
| + UErrorCode *status) {
|
| + RegularExpression *regexp = (RegularExpression*)regexp2;
|
| + if (validateRE(regexp, status) == FALSE) {
|
| + return 0;
|
| + }
|
| + if (replacementText == NULL || replacementLength < -1 ||
|
| + (destBuf == NULL && destCapacity > 0) ||
|
| + destCapacity < 0) {
|
| + *status = U_ILLEGAL_ARGUMENT_ERROR;
|
| + return 0;
|
| + }
|
| +
|
| + int32_t len = 0;
|
| +
|
| + uregex_reset(regexp2, 0, status);
|
| +
|
| + // Note: Seperate error code variables for findNext() and appendReplacement()
|
| + // are used so that destination buffer overflow errors
|
| + // in appendReplacement won't stop findNext() from working.
|
| + // appendReplacement() and appendTail() special case incoming buffer
|
| + // overflow errors, continuing to return the correct length.
|
| + UErrorCode findStatus = *status;
|
| + while (uregex_findNext(regexp2, &findStatus)) {
|
| + len += uregex_appendReplacement(regexp2, replacementText, replacementLength,
|
| + &destBuf, &destCapacity, status);
|
| + }
|
| + len += uregex_appendTail(regexp2, &destBuf, &destCapacity, status);
|
| +
|
| + if (U_FAILURE(findStatus)) {
|
| + // If anything went wrong with the findNext(), make that error trump
|
| + // whatever may have happened with the append() operations.
|
| + // Errors in findNext() are not expected.
|
| + *status = findStatus;
|
| + }
|
| +
|
| + return len;
|
| +}
|
| +
|
| +
|
| +//------------------------------------------------------------------------------
|
| +//
|
| +// uregex_replaceAllUText
|
| +//
|
| +//------------------------------------------------------------------------------
|
| +U_CAPI UText * U_EXPORT2
|
| +uregex_replaceAllUText(URegularExpression *regexp2,
|
| + UText *replacementText,
|
| + UText *dest,
|
| + UErrorCode *status) {
|
| + RegularExpression *regexp = (RegularExpression*)regexp2;
|
| + if (validateRE(regexp, status) == FALSE) {
|
| + return 0;
|
| + }
|
| + if (replacementText == NULL) {
|
| + *status = U_ILLEGAL_ARGUMENT_ERROR;
|
| + return 0;
|
| + }
|
| +
|
| + dest = regexp->fMatcher->replaceAll(replacementText, dest, *status);
|
| + return dest;
|
| +}
|
| +
|
| +
|
| +//------------------------------------------------------------------------------
|
| +//
|
| +// uregex_replaceFirst
|
| +//
|
| +//------------------------------------------------------------------------------
|
| +U_CAPI int32_t U_EXPORT2
|
| +uregex_replaceFirst(URegularExpression *regexp2,
|
| + const UChar *replacementText,
|
| + int32_t replacementLength,
|
| + UChar *destBuf,
|
| + int32_t destCapacity,
|
| + UErrorCode *status) {
|
| + RegularExpression *regexp = (RegularExpression*)regexp2;
|
| + if (validateRE(regexp, status) == FALSE) {
|
| + return 0;
|
| + }
|
| + if (replacementText == NULL || replacementLength < -1 ||
|
| + (destBuf == NULL && destCapacity > 0) ||
|
| + destCapacity < 0) {
|
| + *status = U_ILLEGAL_ARGUMENT_ERROR;
|
| + return 0;
|
| + }
|
| +
|
| + int32_t len = 0;
|
| + UBool findSucceeded;
|
| + uregex_reset(regexp2, 0, status);
|
| + findSucceeded = uregex_find(regexp2, 0, status);
|
| + if (findSucceeded) {
|
| + len = uregex_appendReplacement(regexp2, replacementText, replacementLength,
|
| + &destBuf, &destCapacity, status);
|
| + }
|
| + len += uregex_appendTail(regexp2, &destBuf, &destCapacity, status);
|
| +
|
| + return len;
|
| +}
|
| +
|
| +
|
| +//------------------------------------------------------------------------------
|
| +//
|
| +// uregex_replaceFirstUText
|
| +//
|
| +//------------------------------------------------------------------------------
|
| +U_CAPI UText * U_EXPORT2
|
| +uregex_replaceFirstUText(URegularExpression *regexp2,
|
| + UText *replacementText,
|
| + UText *dest,
|
| + UErrorCode *status) {
|
| + RegularExpression *regexp = (RegularExpression*)regexp2;
|
| + if (validateRE(regexp, status) == FALSE) {
|
| + return 0;
|
| + }
|
| + if (replacementText == NULL) {
|
| + *status = U_ILLEGAL_ARGUMENT_ERROR;
|
| + return 0;
|
| + }
|
| +
|
| + dest = regexp->fMatcher->replaceFirst(replacementText, dest, *status);
|
| + return dest;
|
| +}
|
| +
|
| +
|
| +//------------------------------------------------------------------------------
|
| +//
|
| +// uregex_appendReplacement
|
| +//
|
| +//------------------------------------------------------------------------------
|
| +
|
| +U_NAMESPACE_BEGIN
|
| +//
|
| +// Dummy class, because these functions need to be friends of class RegexMatcher,
|
| +// and stand-alone C functions don't work as friends
|
| +//
|
| +class RegexCImpl {
|
| + public:
|
| + inline static int32_t appendReplacement(RegularExpression *regexp,
|
| + const UChar *replacementText,
|
| + int32_t replacementLength,
|
| + UChar **destBuf,
|
| + int32_t *destCapacity,
|
| + UErrorCode *status);
|
| +
|
| + inline static int32_t appendTail(RegularExpression *regexp,
|
| + UChar **destBuf,
|
| + int32_t *destCapacity,
|
| + UErrorCode *status);
|
| +
|
| + inline static int32_t split(RegularExpression *regexp,
|
| + UChar *destBuf,
|
| + int32_t destCapacity,
|
| + int32_t *requiredCapacity,
|
| + UChar *destFields[],
|
| + int32_t destFieldsCapacity,
|
| + UErrorCode *status);
|
| +};
|
| +
|
| +U_NAMESPACE_END
|
| +
|
| +
|
| +
|
| +static const UChar BACKSLASH = 0x5c;
|
| +static const UChar DOLLARSIGN = 0x24;
|
| +
|
| +//
|
| +// Move a character to an output buffer, with bounds checking on the index.
|
| +// Index advances even if capacity is exceeded, for preflight size computations.
|
| +// This little sequence is used a LOT.
|
| +//
|
| +static inline void appendToBuf(UChar c, int32_t *idx, UChar *buf, int32_t bufCapacity) {
|
| + if (*idx < bufCapacity) {
|
| + buf[*idx] = c;
|
| + }
|
| + (*idx)++;
|
| +}
|
| +
|
| +
|
| +//
|
| +// appendReplacement, the actual implementation.
|
| +//
|
| +int32_t RegexCImpl::appendReplacement(RegularExpression *regexp,
|
| + const UChar *replacementText,
|
| + int32_t replacementLength,
|
| + UChar **destBuf,
|
| + int32_t *destCapacity,
|
| + UErrorCode *status) {
|
| +
|
| + // If we come in with a buffer overflow error, don't suppress the operation.
|
| + // A series of appendReplacements, appendTail need to correctly preflight
|
| + // the buffer size when an overflow happens somewhere in the middle.
|
| + UBool pendingBufferOverflow = FALSE;
|
| + if (*status == U_BUFFER_OVERFLOW_ERROR && destCapacity != NULL && *destCapacity == 0) {
|
| + pendingBufferOverflow = TRUE;
|
| + *status = U_ZERO_ERROR;
|
| + }
|
| +
|
| + //
|
| + // Validate all paramters
|
| + //
|
| + if (validateRE(regexp, status) == FALSE) {
|
| + return 0;
|
| + }
|
| + if (replacementText == NULL || replacementLength < -1 ||
|
| + destCapacity == NULL || destBuf == NULL ||
|
| + (*destBuf == NULL && *destCapacity > 0) ||
|
| + *destCapacity < 0) {
|
| + *status = U_ILLEGAL_ARGUMENT_ERROR;
|
| + return 0;
|
| + }
|
| +
|
| + RegexMatcher *m = regexp->fMatcher;
|
| + if (m->fMatch == FALSE) {
|
| + *status = U_REGEX_INVALID_STATE;
|
| + return 0;
|
| + }
|
| +
|
| + UChar *dest = *destBuf;
|
| + int32_t capacity = *destCapacity;
|
| + int32_t destIdx = 0;
|
| + int32_t i;
|
| +
|
| + // If it wasn't supplied by the caller, get the length of the replacement text.
|
| + // TODO: slightly smarter logic in the copy loop could watch for the NUL on
|
| + // the fly and avoid this step.
|
| + if (replacementLength == -1) {
|
| + replacementLength = u_strlen(replacementText);
|
| + }
|
| +
|
| + // Copy input string from the end of previous match to start of current match
|
| + if (regexp->fText != NULL) {
|
| + int32_t matchStart;
|
| + int32_t lastMatchEnd;
|
| + if (UTEXT_USES_U16(m->fInputText)) {
|
| + lastMatchEnd = (int32_t)m->fLastMatchEnd;
|
| + matchStart = (int32_t)m->fMatchStart;
|
| + } else {
|
| + // !!!: Would like a better way to do this!
|
| + UErrorCode status = U_ZERO_ERROR;
|
| + lastMatchEnd = utext_extract(m->fInputText, 0, m->fLastMatchEnd, NULL, 0, &status);
|
| + status = U_ZERO_ERROR;
|
| + matchStart = lastMatchEnd + utext_extract(m->fInputText, m->fLastMatchEnd, m->fMatchStart, NULL, 0, &status);
|
| + }
|
| + for (i=lastMatchEnd; i<matchStart; i++) {
|
| + appendToBuf(regexp->fText[i], &destIdx, dest, capacity);
|
| + }
|
| + } else {
|
| + UErrorCode possibleOverflowError = U_ZERO_ERROR; // ignore
|
| + destIdx += utext_extract(m->fInputText, m->fLastMatchEnd, m->fMatchStart,
|
| + &dest[destIdx], REMAINING_CAPACITY(destIdx, capacity), &possibleOverflowError);
|
| + }
|
| +
|
| +
|
| + // scan the replacement text, looking for substitutions ($n) and \escapes.
|
| + int32_t replIdx = 0;
|
| + while (replIdx < replacementLength) {
|
| + UChar c = replacementText[replIdx];
|
| + replIdx++;
|
| + if (c != DOLLARSIGN && c != BACKSLASH) {
|
| + // Common case, no substitution, no escaping,
|
| + // just copy the char to the dest buf.
|
| + appendToBuf(c, &destIdx, dest, capacity);
|
| + continue;
|
| + }
|
| +
|
| + if (c == BACKSLASH) {
|
| + // Backslash Escape. Copy the following char out without further checks.
|
| + // Note: Surrogate pairs don't need any special handling
|
| + // The second half wont be a '$' or a '\', and
|
| + // will move to the dest normally on the next
|
| + // loop iteration.
|
| + if (replIdx >= replacementLength) {
|
| + break;
|
| + }
|
| + c = replacementText[replIdx];
|
| +
|
| + if (c==0x55/*U*/ || c==0x75/*u*/) {
|
| + // We have a \udddd or \Udddddddd escape sequence.
|
| + UChar32 escapedChar =
|
| + u_unescapeAt(uregex_ucstr_unescape_charAt,
|
| + &replIdx, // Index is updated by unescapeAt
|
| + replacementLength, // Length of replacement text
|
| + (void *)replacementText);
|
| +
|
| + if (escapedChar != (UChar32)0xFFFFFFFF) {
|
| + if (escapedChar <= 0xffff) {
|
| + appendToBuf((UChar)escapedChar, &destIdx, dest, capacity);
|
| + } else {
|
| + appendToBuf(U16_LEAD(escapedChar), &destIdx, dest, capacity);
|
| + appendToBuf(U16_TRAIL(escapedChar), &destIdx, dest, capacity);
|
| + }
|
| + continue;
|
| + }
|
| + // Note: if the \u escape was invalid, just fall through and
|
| + // treat it as a plain \<anything> escape.
|
| + }
|
| +
|
| + // Plain backslash escape. Just put out the escaped character.
|
| + appendToBuf(c, &destIdx, dest, capacity);
|
| +
|
| + replIdx++;
|
| + continue;
|
| + }
|
| +
|
| +
|
| +
|
| + // We've got a $. Pick up a capture group number if one follows.
|
| + // Consume at most the number of digits necessary for the largest capture
|
| + // number that is valid for this pattern.
|
| +
|
| + int32_t numDigits = 0;
|
| + int32_t groupNum = 0;
|
| + UChar32 digitC;
|
| + for (;;) {
|
| + if (replIdx >= replacementLength) {
|
| + break;
|
| + }
|
| + U16_GET(replacementText, 0, replIdx, replacementLength, digitC);
|
| + if (u_isdigit(digitC) == FALSE) {
|
| + break;
|
| + }
|
| +
|
| + U16_FWD_1(replacementText, replIdx, replacementLength);
|
| + groupNum=groupNum*10 + u_charDigitValue(digitC);
|
| + numDigits++;
|
| + if (numDigits >= m->fPattern->fMaxCaptureDigits) {
|
| + break;
|
| + }
|
| + }
|
| +
|
| +
|
| + if (numDigits == 0) {
|
| + // The $ didn't introduce a group number at all.
|
| + // Treat it as just part of the substitution text.
|
| + appendToBuf(DOLLARSIGN, &destIdx, dest, capacity);
|
| + continue;
|
| + }
|
| +
|
| + // Finally, append the capture group data to the destination.
|
| + destIdx += uregex_group((URegularExpression*)regexp, groupNum, &dest[destIdx], REMAINING_CAPACITY(destIdx, capacity), status);
|
| + if (*status == U_BUFFER_OVERFLOW_ERROR) {
|
| + // Ignore buffer overflow when extracting the group. We need to
|
| + // continue on to get full size of the untruncated result. We will
|
| + // raise our own buffer overflow error at the end.
|
| + *status = U_ZERO_ERROR;
|
| + }
|
| +
|
| + if (U_FAILURE(*status)) {
|
| + // Can fail if group number is out of range.
|
| + break;
|
| + }
|
| +
|
| + }
|
| +
|
| + //
|
| + // Nul Terminate the dest buffer if possible.
|
| + // Set the appropriate buffer overflow or not terminated error, if needed.
|
| + //
|
| + if (destIdx < capacity) {
|
| + dest[destIdx] = 0;
|
| + } else if (destIdx == *destCapacity) {
|
| + *status = U_STRING_NOT_TERMINATED_WARNING;
|
| + } else {
|
| + *status = U_BUFFER_OVERFLOW_ERROR;
|
| + }
|
| +
|
| + //
|
| + // Return an updated dest buffer and capacity to the caller.
|
| + //
|
| + if (destIdx > 0 && *destCapacity > 0) {
|
| + if (destIdx < capacity) {
|
| + *destBuf += destIdx;
|
| + *destCapacity -= destIdx;
|
| + } else {
|
| + *destBuf += capacity;
|
| + *destCapacity = 0;
|
| + }
|
| + }
|
| +
|
| + // If we came in with a buffer overflow, make sure we go out with one also.
|
| + // (A zero length match right at the end of the previous match could
|
| + // make this function succeed even though a previous call had overflowed the buf)
|
| + if (pendingBufferOverflow && U_SUCCESS(*status)) {
|
| + *status = U_BUFFER_OVERFLOW_ERROR;
|
| + }
|
| +
|
| + return destIdx;
|
| +}
|
| +
|
| +//
|
| +// appendReplacement the actual API function,
|
| +//
|
| +U_CAPI int32_t U_EXPORT2
|
| +uregex_appendReplacement(URegularExpression *regexp2,
|
| + const UChar *replacementText,
|
| + int32_t replacementLength,
|
| + UChar **destBuf,
|
| + int32_t *destCapacity,
|
| + UErrorCode *status) {
|
| +
|
| + RegularExpression *regexp = (RegularExpression*)regexp2;
|
| + return RegexCImpl::appendReplacement(
|
| + regexp, replacementText, replacementLength,destBuf, destCapacity, status);
|
| +}
|
| +
|
| +//
|
| +// uregex_appendReplacementUText...can just use the normal C++ method
|
| +//
|
| +U_CAPI void U_EXPORT2
|
| +uregex_appendReplacementUText(URegularExpression *regexp2,
|
| + UText *replText,
|
| + UText *dest,
|
| + UErrorCode *status) {
|
| + RegularExpression *regexp = (RegularExpression*)regexp2;
|
| + regexp->fMatcher->appendReplacement(dest, replText, *status);
|
| +}
|
| +
|
| +
|
| +//------------------------------------------------------------------------------
|
| +//
|
| +// uregex_appendTail
|
| +//
|
| +//------------------------------------------------------------------------------
|
| +int32_t RegexCImpl::appendTail(RegularExpression *regexp,
|
| + UChar **destBuf,
|
| + int32_t *destCapacity,
|
| + UErrorCode *status)
|
| +{
|
| +
|
| + // If we come in with a buffer overflow error, don't suppress the operation.
|
| + // A series of appendReplacements, appendTail need to correctly preflight
|
| + // the buffer size when an overflow happens somewhere in the middle.
|
| + UBool pendingBufferOverflow = FALSE;
|
| + if (*status == U_BUFFER_OVERFLOW_ERROR && destCapacity != NULL && *destCapacity == 0) {
|
| + pendingBufferOverflow = TRUE;
|
| + *status = U_ZERO_ERROR;
|
| + }
|
| +
|
| + if (validateRE(regexp, status) == FALSE) {
|
| + return 0;
|
| + }
|
| +
|
| + if (destCapacity == NULL || destBuf == NULL ||
|
| + (*destBuf == NULL && *destCapacity > 0) ||
|
| + *destCapacity < 0)
|
| + {
|
| + *status = U_ILLEGAL_ARGUMENT_ERROR;
|
| + return 0;
|
| + }
|
| +
|
| + RegexMatcher *m = regexp->fMatcher;
|
| +
|
| + int32_t destIdx = 0;
|
| + int32_t destCap = *destCapacity;
|
| + UChar *dest = *destBuf;
|
| +
|
| + if (regexp->fText != NULL) {
|
| + int32_t srcIdx;
|
| + int64_t nativeIdx = (m->fMatch ? m->fMatchEnd : m->fLastMatchEnd);
|
| + if (nativeIdx == -1) {
|
| + srcIdx = 0;
|
| + } else if (UTEXT_USES_U16(m->fInputText)) {
|
| + srcIdx = (int32_t)nativeIdx;
|
| + } else {
|
| + UErrorCode status = U_ZERO_ERROR;
|
| + srcIdx = utext_extract(m->fInputText, 0, nativeIdx, NULL, 0, &status);
|
| + }
|
| +
|
| + for (;;) {
|
| + if (srcIdx == regexp->fTextLength) {
|
| + break;
|
| + }
|
| + UChar c = regexp->fText[srcIdx];
|
| + if (c == 0 && regexp->fTextLength == -1) {
|
| + regexp->fTextLength = srcIdx;
|
| + break;
|
| + }
|
| + if (destIdx < destCap) {
|
| + dest[destIdx] = c;
|
| + } else {
|
| + // We've overflowed the dest buffer.
|
| + // If the total input string length is known, we can
|
| + // compute the total buffer size needed without scanning through the string.
|
| + if (regexp->fTextLength > 0) {
|
| + destIdx += (regexp->fTextLength - srcIdx);
|
| + break;
|
| + }
|
| + }
|
| + srcIdx++;
|
| + destIdx++;
|
| + }
|
| + } else {
|
| + int64_t srcIdx;
|
| + if (m->fMatch) {
|
| + // The most recent call to find() succeeded.
|
| + srcIdx = m->fMatchEnd;
|
| + } else {
|
| + // The last call to find() on this matcher failed().
|
| + // Look back to the end of the last find() that succeeded for src index.
|
| + srcIdx = m->fLastMatchEnd;
|
| + if (srcIdx == -1) {
|
| + // There has been no successful match with this matcher.
|
| + // We want to copy the whole string.
|
| + srcIdx = 0;
|
| + }
|
| + }
|
| +
|
| + destIdx = utext_extract(m->fInputText, srcIdx, m->fInputLength, dest, destCap, status);
|
| + }
|
| +
|
| + //
|
| + // NUL terminate the output string, if possible, otherwise issue the
|
| + // appropriate error or warning.
|
| + //
|
| + if (destIdx < destCap) {
|
| + dest[destIdx] = 0;
|
| + } else if (destIdx == destCap) {
|
| + *status = U_STRING_NOT_TERMINATED_WARNING;
|
| + } else {
|
| + *status = U_BUFFER_OVERFLOW_ERROR;
|
| + }
|
| +
|
| + //
|
| + // Update the user's buffer ptr and capacity vars to reflect the
|
| + // amount used.
|
| + //
|
| + if (destIdx < destCap) {
|
| + *destBuf += destIdx;
|
| + *destCapacity -= destIdx;
|
| + } else {
|
| + *destBuf += destCap;
|
| + *destCapacity = 0;
|
| + }
|
| +
|
| + if (pendingBufferOverflow && U_SUCCESS(*status)) {
|
| + *status = U_BUFFER_OVERFLOW_ERROR;
|
| + }
|
| +
|
| + return destIdx;
|
| +}
|
| +
|
| +
|
| +//
|
| +// appendTail the actual API function
|
| +//
|
| +U_CAPI int32_t U_EXPORT2
|
| +uregex_appendTail(URegularExpression *regexp2,
|
| + UChar **destBuf,
|
| + int32_t *destCapacity,
|
| + UErrorCode *status) {
|
| + RegularExpression *regexp = (RegularExpression*)regexp2;
|
| + return RegexCImpl::appendTail(regexp, destBuf, destCapacity, status);
|
| +}
|
| +
|
| +
|
| +//
|
| +// uregex_appendTailUText...can just use the normal C++ method
|
| +//
|
| +U_CAPI UText * U_EXPORT2
|
| +uregex_appendTailUText(URegularExpression *regexp2,
|
| + UText *dest,
|
| + UErrorCode *status) {
|
| + RegularExpression *regexp = (RegularExpression*)regexp2;
|
| + return regexp->fMatcher->appendTail(dest, *status);
|
| +}
|
| +
|
| +
|
| +//------------------------------------------------------------------------------
|
| +//
|
| +// copyString Internal utility to copy a string to an output buffer,
|
| +// while managing buffer overflow and preflight size
|
| +// computation. NUL termination is added to destination,
|
| +// and the NUL is counted in the output size.
|
| +//
|
| +//------------------------------------------------------------------------------
|
| +#if 0
|
| +static void copyString(UChar *destBuffer, // Destination buffer.
|
| + int32_t destCapacity, // Total capacity of dest buffer
|
| + int32_t *destIndex, // Index into dest buffer. Updated on return.
|
| + // Update not clipped to destCapacity.
|
| + const UChar *srcPtr, // Pointer to source string
|
| + int32_t srcLen) // Source string len.
|
| +{
|
| + int32_t si;
|
| + int32_t di = *destIndex;
|
| + UChar c;
|
| +
|
| + for (si=0; si<srcLen; si++) {
|
| + c = srcPtr[si];
|
| + if (di < destCapacity) {
|
| + destBuffer[di] = c;
|
| + di++;
|
| + } else {
|
| + di += srcLen - si;
|
| + break;
|
| + }
|
| + }
|
| + if (di<destCapacity) {
|
| + destBuffer[di] = 0;
|
| + }
|
| + di++;
|
| + *destIndex = di;
|
| +}
|
| +#endif
|
| +
|
| +//------------------------------------------------------------------------------
|
| +//
|
| +// uregex_split
|
| +//
|
| +//------------------------------------------------------------------------------
|
| +int32_t RegexCImpl::split(RegularExpression *regexp,
|
| + UChar *destBuf,
|
| + int32_t destCapacity,
|
| + int32_t *requiredCapacity,
|
| + UChar *destFields[],
|
| + int32_t destFieldsCapacity,
|
| + UErrorCode *status) {
|
| + //
|
| + // Reset for the input text
|
| + //
|
| + regexp->fMatcher->reset();
|
| + UText *inputText = regexp->fMatcher->fInputText;
|
| + int64_t nextOutputStringStart = 0;
|
| + int64_t inputLen = regexp->fMatcher->fInputLength;
|
| + if (inputLen == 0) {
|
| + return 0;
|
| + }
|
| +
|
| + //
|
| + // Loop through the input text, searching for the delimiter pattern
|
| + //
|
| + int32_t i; // Index of the field being processed.
|
| + int32_t destIdx = 0; // Next available position in destBuf;
|
| + int32_t numCaptureGroups = regexp->fMatcher->groupCount();
|
| + UErrorCode tStatus = U_ZERO_ERROR; // Want to ignore any buffer overflow errors so that the strings are still counted
|
| + for (i=0; ; i++) {
|
| + if (i>=destFieldsCapacity-1) {
|
| + // There are one or zero output strings left.
|
| + // Fill the last output string with whatever is left from the input, then exit the loop.
|
| + // ( i will be == destFieldsCapacity if we filled the output array while processing
|
| + // capture groups of the delimiter expression, in which case we will discard the
|
| + // last capture group saved in favor of the unprocessed remainder of the
|
| + // input string.)
|
| + if (inputLen > nextOutputStringStart) {
|
| + if (i != destFieldsCapacity-1) {
|
| + // No fields are left. Recycle the last one for holding the trailing part of
|
| + // the input string.
|
| + i = destFieldsCapacity-1;
|
| + destIdx = (int32_t)(destFields[i] - destFields[0]);
|
| + }
|
| +
|
| + destFields[i] = &destBuf[destIdx];
|
| + destIdx += 1 + utext_extract(inputText, nextOutputStringStart, inputLen,
|
| + &destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), status);
|
| + }
|
| + break;
|
| + }
|
| +
|
| + if (regexp->fMatcher->find()) {
|
| + // We found another delimiter. Move everything from where we started looking
|
| + // up until the start of the delimiter into the next output string.
|
| + destFields[i] = &destBuf[destIdx];
|
| +
|
| + destIdx += 1 + utext_extract(inputText, nextOutputStringStart, regexp->fMatcher->fMatchStart,
|
| + &destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), &tStatus);
|
| + if (tStatus == U_BUFFER_OVERFLOW_ERROR) {
|
| + tStatus = U_ZERO_ERROR;
|
| + } else {
|
| + *status = tStatus;
|
| + }
|
| + nextOutputStringStart = regexp->fMatcher->fMatchEnd;
|
| +
|
| + // If the delimiter pattern has capturing parentheses, the captured
|
| + // text goes out into the next n destination strings.
|
| + int32_t groupNum;
|
| + for (groupNum=1; groupNum<=numCaptureGroups; groupNum++) {
|
| + // If we've run out of output string slots, bail out.
|
| + if (i==destFieldsCapacity-1) {
|
| + break;
|
| + }
|
| + i++;
|
| +
|
| + // Set up to extract the capture group contents into the dest buffer.
|
| + destFields[i] = &destBuf[destIdx];
|
| + tStatus = U_ZERO_ERROR;
|
| + int32_t t = uregex_group((URegularExpression*)regexp, groupNum, destFields[i], REMAINING_CAPACITY(destIdx, destCapacity), &tStatus);
|
| + destIdx += t + 1; // Record the space used in the output string buffer.
|
| + // +1 for the NUL that terminates the string.
|
| + if (tStatus == U_BUFFER_OVERFLOW_ERROR) {
|
| + tStatus = U_ZERO_ERROR;
|
| + } else {
|
| + *status = tStatus;
|
| + }
|
| + }
|
| +
|
| + if (nextOutputStringStart == inputLen) {
|
| + // The delimiter was at the end of the string. We're done.
|
| + break;
|
| + }
|
| +
|
| + }
|
| + else
|
| + {
|
| + // We ran off the end of the input while looking for the next delimiter.
|
| + // All the remaining text goes into the current output string.
|
| + destFields[i] = &destBuf[destIdx];
|
| + destIdx += 1 + utext_extract(inputText, nextOutputStringStart, inputLen,
|
| + &destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), status);
|
| + break;
|
| + }
|
| + }
|
| +
|
| + // Zero out any unused portion of the destFields array
|
| + int j;
|
| + for (j=i+1; j<destFieldsCapacity; j++) {
|
| + destFields[j] = NULL;
|
| + }
|
| +
|
| + if (requiredCapacity != NULL) {
|
| + *requiredCapacity = destIdx;
|
| + }
|
| + if (destIdx > destCapacity) {
|
| + *status = U_BUFFER_OVERFLOW_ERROR;
|
| + }
|
| + return i+1;
|
| +}
|
| +
|
| +//
|
| +// uregex_split The actual API function
|
| +//
|
| +U_CAPI int32_t U_EXPORT2
|
| +uregex_split(URegularExpression *regexp2,
|
| + UChar *destBuf,
|
| + int32_t destCapacity,
|
| + int32_t *requiredCapacity,
|
| + UChar *destFields[],
|
| + int32_t destFieldsCapacity,
|
| + UErrorCode *status) {
|
| + RegularExpression *regexp = (RegularExpression*)regexp2;
|
| + if (validateRE(regexp, status) == FALSE) {
|
| + return 0;
|
| + }
|
| + if ((destBuf == NULL && destCapacity > 0) ||
|
| + destCapacity < 0 ||
|
| + destFields == NULL ||
|
| + destFieldsCapacity < 1 ) {
|
| + *status = U_ILLEGAL_ARGUMENT_ERROR;
|
| + return 0;
|
| + }
|
| +
|
| + return RegexCImpl::split(regexp, destBuf, destCapacity, requiredCapacity, destFields, destFieldsCapacity, status);
|
| +}
|
| +
|
| +
|
| +//
|
| +// uregex_splitUText...can just use the normal C++ method
|
| +//
|
| +U_CAPI int32_t U_EXPORT2
|
| +uregex_splitUText(URegularExpression *regexp2,
|
| + UText *destFields[],
|
| + int32_t destFieldsCapacity,
|
| + UErrorCode *status) {
|
| + RegularExpression *regexp = (RegularExpression*)regexp2;
|
| + return regexp->fMatcher->split(regexp->fMatcher->inputText(), destFields, destFieldsCapacity, *status);
|
| +}
|
| +
|
| +
|
| +#endif // !UCONFIG_NO_REGULAR_EXPRESSIONS
|
| +
|
|
|
| Property changes on: icu46/source/i18n/uregex.cpp
|
| ___________________________________________________________________
|
| Added: svn:eol-style
|
| + LF
|
|
|
|
|