Index: source/common/simplepatternformatter.cpp |
diff --git a/source/common/simplepatternformatter.cpp b/source/common/simplepatternformatter.cpp |
index 30390f4150c013824df7235647f39f7f75c8ea8f..0cac2ec3fd0ac6b067faee2f6c00b8d81b86c8dc 100644 |
--- a/source/common/simplepatternformatter.cpp |
+++ b/source/common/simplepatternformatter.cpp |
@@ -11,20 +11,38 @@ |
U_NAMESPACE_BEGIN |
+static UBool isInvalidArray(const void *array, int32_t size) { |
+ return (size < 0 || (size > 0 && array == NULL)); |
+} |
+ |
typedef enum SimplePatternFormatterCompileState { |
INIT, |
APOSTROPHE, |
PLACEHOLDER |
} SimplePatternFormatterCompileState; |
+// Handles parsing placeholders in the pattern string, e.g {4} or {35} |
class SimplePatternFormatterIdBuilder { |
public: |
SimplePatternFormatterIdBuilder() : id(0), idLen(0) { } |
~SimplePatternFormatterIdBuilder() { } |
+ |
+ // Resets so that this object has seen no placeholder ID. |
void reset() { id = 0; idLen = 0; } |
+ |
+ // Returns the numeric placeholder ID parsed so far |
int32_t getId() const { return id; } |
+ |
+ // Appends the numeric placeholder ID parsed so far back to a |
+ // UChar buffer. Used to recover if parser using this object finds |
+ // no closing curly brace. |
void appendTo(UChar *buffer, int32_t *len) const; |
+ |
+ // Returns true if this object has seen a placeholder ID. |
UBool isValid() const { return (idLen > 0); } |
+ |
+ // Processes a single digit character. Pattern string parser calls this |
+ // as it processes digits after an opening curly brace. |
void add(UChar ch); |
private: |
int32_t id; |
@@ -52,18 +70,81 @@ void SimplePatternFormatterIdBuilder::add(UChar ch) { |
idLen++; |
} |
+// Represents placeholder values. |
+class SimplePatternFormatterPlaceholderValues : public UMemory { |
+public: |
+ SimplePatternFormatterPlaceholderValues( |
+ const UnicodeString * const *values, |
+ int32_t valuesCount); |
+ |
+ // Returns TRUE if appendTo value is at any index besides exceptIndex. |
+ UBool isAppendToInAnyIndexExcept( |
+ const UnicodeString &appendTo, int32_t exceptIndex) const; |
+ |
+ // For each appendTo value, stores the snapshot of it in its place. |
+ void snapshotAppendTo(const UnicodeString &appendTo); |
+ |
+ // Returns the placeholder value at index. No range checking performed. |
+ // Returned reference is valid for as long as this object exists. |
+ const UnicodeString &get(int32_t index) const; |
+private: |
+ const UnicodeString * const *fValues; |
+ int32_t fValuesCount; |
+ const UnicodeString *fAppendTo; |
+ UnicodeString fAppendToCopy; |
+ SimplePatternFormatterPlaceholderValues( |
+ const SimplePatternFormatterPlaceholderValues &); |
+ SimplePatternFormatterPlaceholderValues &operator=( |
+ const SimplePatternFormatterPlaceholderValues &); |
+}; |
+ |
+SimplePatternFormatterPlaceholderValues::SimplePatternFormatterPlaceholderValues( |
+ const UnicodeString * const *values, |
+ int32_t valuesCount) |
+ : fValues(values), |
+ fValuesCount(valuesCount), |
+ fAppendTo(NULL), |
+ fAppendToCopy() { |
+} |
+ |
+UBool SimplePatternFormatterPlaceholderValues::isAppendToInAnyIndexExcept( |
+ const UnicodeString &appendTo, int32_t exceptIndex) const { |
+ for (int32_t i = 0; i < fValuesCount; ++i) { |
+ if (i != exceptIndex && fValues[i] == &appendTo) { |
+ return TRUE; |
+ } |
+ } |
+ return FALSE; |
+} |
+ |
+void SimplePatternFormatterPlaceholderValues::snapshotAppendTo( |
+ const UnicodeString &appendTo) { |
+ fAppendTo = &appendTo; |
+ fAppendToCopy = appendTo; |
+} |
+ |
+const UnicodeString &SimplePatternFormatterPlaceholderValues::get( |
+ int32_t index) const { |
+ if (fAppendTo == NULL || fAppendTo != fValues[index]) { |
+ return *fValues[index]; |
+ } |
+ return fAppendToCopy; |
+} |
+ |
SimplePatternFormatter::SimplePatternFormatter() : |
noPlaceholders(), |
placeholders(), |
placeholderSize(0), |
- placeholderCount(0) { |
+ placeholderCount(0), |
+ firstPlaceholderReused(FALSE) { |
} |
SimplePatternFormatter::SimplePatternFormatter(const UnicodeString &pattern) : |
noPlaceholders(), |
placeholders(), |
placeholderSize(0), |
- placeholderCount(0) { |
+ placeholderCount(0), |
+ firstPlaceholderReused(FALSE) { |
UErrorCode status = U_ZERO_ERROR; |
compile(pattern, status); |
} |
@@ -73,7 +154,8 @@ SimplePatternFormatter::SimplePatternFormatter( |
noPlaceholders(other.noPlaceholders), |
placeholders(), |
placeholderSize(0), |
- placeholderCount(other.placeholderCount) { |
+ placeholderCount(other.placeholderCount), |
+ firstPlaceholderReused(other.firstPlaceholderReused) { |
placeholderSize = ensureCapacity(other.placeholderSize); |
uprv_memcpy( |
placeholders.getAlias(), |
@@ -89,6 +171,7 @@ SimplePatternFormatter &SimplePatternFormatter::operator=( |
noPlaceholders = other.noPlaceholders; |
placeholderSize = ensureCapacity(other.placeholderSize); |
placeholderCount = other.placeholderCount; |
+ firstPlaceholderReused = other.firstPlaceholderReused; |
uprv_memcpy( |
placeholders.getAlias(), |
other.placeholders.getAlias(), |
@@ -175,19 +258,12 @@ UBool SimplePatternFormatter::compile( |
return TRUE; |
} |
-UBool SimplePatternFormatter::startsWithPlaceholder(int32_t id) const { |
- if (placeholderSize == 0) { |
- return FALSE; |
- } |
- return (placeholders[0].offset == 0 && placeholders[0].id == id); |
-} |
- |
UnicodeString& SimplePatternFormatter::format( |
const UnicodeString &arg0, |
UnicodeString &appendTo, |
UErrorCode &status) const { |
const UnicodeString *params[] = {&arg0}; |
- return format( |
+ return formatAndAppend( |
params, |
UPRV_LENGTHOF(params), |
appendTo, |
@@ -202,7 +278,7 @@ UnicodeString& SimplePatternFormatter::format( |
UnicodeString &appendTo, |
UErrorCode &status) const { |
const UnicodeString *params[] = {&arg0, &arg1}; |
- return format( |
+ return formatAndAppend( |
params, |
UPRV_LENGTHOF(params), |
appendTo, |
@@ -218,7 +294,7 @@ UnicodeString& SimplePatternFormatter::format( |
UnicodeString &appendTo, |
UErrorCode &status) const { |
const UnicodeString *params[] = {&arg0, &arg1, &arg2}; |
- return format( |
+ return formatAndAppend( |
params, |
UPRV_LENGTHOF(params), |
appendTo, |
@@ -242,10 +318,14 @@ static void appendRange( |
int32_t start, |
int32_t end, |
UnicodeString &dest) { |
+ // This check improves performance significantly. |
+ if (start == end) { |
+ return; |
+ } |
dest.append(src, start, end - start); |
} |
-UnicodeString& SimplePatternFormatter::format( |
+UnicodeString& SimplePatternFormatter::formatAndAppend( |
const UnicodeString * const *placeholderValues, |
int32_t placeholderValueCount, |
UnicodeString &appendTo, |
@@ -255,10 +335,102 @@ UnicodeString& SimplePatternFormatter::format( |
if (U_FAILURE(status)) { |
return appendTo; |
} |
+ if (isInvalidArray(placeholderValues, placeholderValueCount) |
+ || isInvalidArray(offsetArray, offsetArrayLength)) { |
+ status = U_ILLEGAL_ARGUMENT_ERROR; |
+ return appendTo; |
+ } |
if (placeholderValueCount < placeholderCount) { |
status = U_ILLEGAL_ARGUMENT_ERROR; |
return appendTo; |
} |
+ |
+ // Since we are disallowing parameter values that are the same as |
+ // appendTo, we have to check all placeholderValues as opposed to |
+ // the first placeholderCount placeholder values. |
+ SimplePatternFormatterPlaceholderValues values( |
+ placeholderValues, placeholderValueCount); |
+ if (values.isAppendToInAnyIndexExcept(appendTo, -1)) { |
+ status = U_ILLEGAL_ARGUMENT_ERROR; |
+ return appendTo; |
+ } |
+ return formatAndAppend( |
+ values, |
+ appendTo, |
+ offsetArray, |
+ offsetArrayLength); |
+} |
+ |
+UnicodeString& SimplePatternFormatter::formatAndReplace( |
+ const UnicodeString * const *placeholderValues, |
+ int32_t placeholderValueCount, |
+ UnicodeString &result, |
+ int32_t *offsetArray, |
+ int32_t offsetArrayLength, |
+ UErrorCode &status) const { |
+ if (U_FAILURE(status)) { |
+ return result; |
+ } |
+ if (isInvalidArray(placeholderValues, placeholderValueCount) |
+ || isInvalidArray(offsetArray, offsetArrayLength)) { |
+ status = U_ILLEGAL_ARGUMENT_ERROR; |
+ return result; |
+ } |
+ if (placeholderValueCount < placeholderCount) { |
+ status = U_ILLEGAL_ARGUMENT_ERROR; |
+ return result; |
+ } |
+ SimplePatternFormatterPlaceholderValues values( |
+ placeholderValues, placeholderCount); |
+ int32_t placeholderAtStart = getUniquePlaceholderAtStart(); |
+ |
+ // If pattern starts with a unique placeholder and that placeholder |
+ // value is result, we may be able to optimize by just appending to result. |
+ if (placeholderAtStart >= 0 |
+ && placeholderValues[placeholderAtStart] == &result) { |
+ |
+ // If result is the value for other placeholders, call off optimization. |
+ if (values.isAppendToInAnyIndexExcept(result, placeholderAtStart)) { |
+ values.snapshotAppendTo(result); |
+ result.remove(); |
+ return formatAndAppend( |
+ values, |
+ result, |
+ offsetArray, |
+ offsetArrayLength); |
+ } |
+ |
+ // Otherwise we can optimize |
+ formatAndAppend( |
+ values, |
+ result, |
+ offsetArray, |
+ offsetArrayLength); |
+ |
+ // We have to make the offset for the placeholderAtStart |
+ // placeholder be 0. Otherwise it would be the length of the |
+ // previous value of result. |
+ if (offsetArrayLength > placeholderAtStart) { |
+ offsetArray[placeholderAtStart] = 0; |
+ } |
+ return result; |
+ } |
+ if (values.isAppendToInAnyIndexExcept(result, -1)) { |
+ values.snapshotAppendTo(result); |
+ } |
+ result.remove(); |
+ return formatAndAppend( |
+ values, |
+ result, |
+ offsetArray, |
+ offsetArrayLength); |
+} |
+ |
+UnicodeString& SimplePatternFormatter::formatAndAppend( |
+ const SimplePatternFormatterPlaceholderValues &values, |
+ UnicodeString &appendTo, |
+ int32_t *offsetArray, |
+ int32_t offsetArrayLength) const { |
for (int32_t i = 0; i < offsetArrayLength; ++i) { |
offsetArray[i] = -1; |
} |
@@ -266,25 +438,19 @@ UnicodeString& SimplePatternFormatter::format( |
appendTo.append(noPlaceholders); |
return appendTo; |
} |
- if (placeholders[0].offset > 0 || |
- placeholderValues[placeholders[0].id] != &appendTo) { |
- appendRange( |
- noPlaceholders, |
- 0, |
- placeholders[0].offset, |
- appendTo); |
- updatePlaceholderOffset( |
- placeholders[0].id, |
- appendTo.length(), |
- offsetArray, |
- offsetArrayLength); |
- appendTo.append(*placeholderValues[placeholders[0].id]); |
- } else { |
- updatePlaceholderOffset( |
- placeholders[0].id, |
- 0, |
- offsetArray, |
- offsetArrayLength); |
+ appendRange( |
+ noPlaceholders, |
+ 0, |
+ placeholders[0].offset, |
+ appendTo); |
+ updatePlaceholderOffset( |
+ placeholders[0].id, |
+ appendTo.length(), |
+ offsetArray, |
+ offsetArrayLength); |
+ const UnicodeString *placeholderValue = &values.get(placeholders[0].id); |
+ if (placeholderValue != &appendTo) { |
+ appendTo.append(*placeholderValue); |
} |
for (int32_t i = 1; i < placeholderSize; ++i) { |
appendRange( |
@@ -297,7 +463,10 @@ UnicodeString& SimplePatternFormatter::format( |
appendTo.length(), |
offsetArray, |
offsetArrayLength); |
- appendTo.append(*placeholderValues[placeholders[i].id]); |
+ placeholderValue = &values.get(placeholders[i].id); |
+ if (placeholderValue != &appendTo) { |
+ appendTo.append(*placeholderValue); |
+ } |
} |
appendRange( |
noPlaceholders, |
@@ -307,6 +476,14 @@ UnicodeString& SimplePatternFormatter::format( |
return appendTo; |
} |
+int32_t SimplePatternFormatter::getUniquePlaceholderAtStart() const { |
+ if (placeholderSize == 0 |
+ || firstPlaceholderReused || placeholders[0].offset != 0) { |
+ return -1; |
+ } |
+ return placeholders[0].id; |
+} |
+ |
int32_t SimplePatternFormatter::ensureCapacity( |
int32_t desiredCapacity, int32_t allocationSize) { |
if (allocationSize < desiredCapacity) { |
@@ -333,6 +510,10 @@ UBool SimplePatternFormatter::addPlaceholder(int32_t id, int32_t offset) { |
if (id >= placeholderCount) { |
placeholderCount = id + 1; |
} |
+ if (placeholderSize > 1 |
+ && placeholders[placeholderSize - 1].id == placeholders[0].id) { |
+ firstPlaceholderReused = TRUE; |
+ } |
return TRUE; |
} |