icu46/source/i18n/strmatch.cpp - Issue 5516007: Check in the pristine copy of ICU 4.6...

Unified Diff: icu46/source/i18n/strmatch.cpp

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: icu46/source/i18n/strmatch.cpp

===================================================================

--- icu46/source/i18n/strmatch.cpp (revision 0)

+++ icu46/source/i18n/strmatch.cpp (revision 0)

@@ -0,0 +1,289 @@

+/*

+**********************************************************************

+* Date Name Description

+* 07/23/01 aliu Creation.

+**********************************************************************

+*/

+#include "unicode/utypes.h"

+#if !UCONFIG_NO_TRANSLITERATION

+#include "strmatch.h"

+#include "rbt_data.h"

+#include "util.h"

+#include "unicode/uniset.h"

+U_NAMESPACE_BEGIN

+static const UChar EMPTY[] = { 0 }; // empty string: ""

+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringMatcher)

+StringMatcher::StringMatcher(const UnicodeString& theString,

+ int32_t start,

+ int32_t limit,

+ int32_t segmentNum,

+ const TransliterationRuleData& theData) :

+ data(&theData),

+ segmentNumber(segmentNum),

+ matchStart(-1),

+ matchLimit(-1)

+ theString.extractBetween(start, limit, pattern);

+StringMatcher::StringMatcher(const StringMatcher& o) :

+ UnicodeFunctor(o),

+ UnicodeMatcher(o),

+ UnicodeReplacer(o),

+ pattern(o.pattern),

+ data(o.data),

+ segmentNumber(o.segmentNumber),

+ matchStart(o.matchStart),

+ matchLimit(o.matchLimit)

+/**

+ * Destructor

+ */

+StringMatcher::~StringMatcher() {

+/**

+ * Implement UnicodeFunctor

+ */

+UnicodeFunctor* StringMatcher::clone() const {

+ return new StringMatcher(*this);

+/**

+ * UnicodeFunctor API. Cast 'this' to a UnicodeMatcher* pointer

+ * and return the pointer.

+ */

+UnicodeMatcher* StringMatcher::toMatcher() const {

+ return (UnicodeMatcher*) this;

+/**

+ * UnicodeFunctor API. Cast 'this' to a UnicodeReplacer* pointer

+ * and return the pointer.

+ */

+UnicodeReplacer* StringMatcher::toReplacer() const {

+ return (UnicodeReplacer*) this;

+/**

+ * Implement UnicodeMatcher

+ */

+UMatchDegree StringMatcher::matches(const Replaceable& text,

+ int32_t& offset,

+ int32_t limit,

+ UBool incremental) {

+ int32_t i;

+ int32_t cursor = offset;

+ if (limit < cursor) {

+ // Match in the reverse direction

+ for (i=pattern.length()-1; i>=0; --i) {

+ UChar keyChar = pattern.charAt(i);

+ UnicodeMatcher* subm = data->lookupMatcher(keyChar);

+ if (subm == 0) {

+ if (cursor > limit &&

+ keyChar == text.charAt(cursor)) {

+ --cursor;

+ } else {

+ return U_MISMATCH;

+ }

+ } else {

+ UMatchDegree m =

+ subm->matches(text, cursor, limit, incremental);

+ if (m != U_MATCH) {

+ return m;

+ }

+ // Record the match position, but adjust for a normal

+ // forward start, limit, and only if a prior match does not

+ // exist -- we want the rightmost match.

+ if (matchStart < 0) {

+ matchStart = cursor+1;

+ matchLimit = offset+1;

+ }

+ } else {

+ for (i=0; i<pattern.length(); ++i) {

+ if (incremental && cursor == limit) {

+ // We've reached the context limit without a mismatch and

+ // without completing our match.

+ return U_PARTIAL_MATCH;

+ }

+ UChar keyChar = pattern.charAt(i);

+ UnicodeMatcher* subm = data->lookupMatcher(keyChar);

+ if (subm == 0) {

+ // Don't need the cursor < limit check if

+ // incremental is TRUE (because it's done above); do need

+ // it otherwise.

+ if (cursor < limit &&

+ keyChar == text.charAt(cursor)) {

+ ++cursor;

+ } else {

+ return U_MISMATCH;

+ }

+ } else {

+ UMatchDegree m =

+ subm->matches(text, cursor, limit, incremental);

+ if (m != U_MATCH) {

+ return m;

+ }

+ // Record the match position

+ matchStart = offset;

+ matchLimit = cursor;

+ }

+ offset = cursor;

+ return U_MATCH;

+/**

+ * Implement UnicodeMatcher

+ */

+UnicodeString& StringMatcher::toPattern(UnicodeString& result,

+ UBool escapeUnprintable) const

+ result.truncate(0);

+ UnicodeString str, quoteBuf;

+ if (segmentNumber > 0) {

+ result.append((UChar)40); /*(*/

+ }

+ for (int32_t i=0; i<pattern.length(); ++i) {

+ UChar keyChar = pattern.charAt(i);

+ const UnicodeMatcher* m = data->lookupMatcher(keyChar);

+ if (m == 0) {

+ ICU_Utility::appendToRule(result, keyChar, FALSE, escapeUnprintable, quoteBuf);

+ } else {

+ ICU_Utility::appendToRule(result, m->toPattern(str, escapeUnprintable),

+ TRUE, escapeUnprintable, quoteBuf);

+ }

+ if (segmentNumber > 0) {

+ result.append((UChar)41); /*)*/

+ }

+ // Flush quoteBuf out to result

+ ICU_Utility::appendToRule(result, -1,

+ TRUE, escapeUnprintable, quoteBuf);

+ return result;

+/**

+ * Implement UnicodeMatcher

+ */

+UBool StringMatcher::matchesIndexValue(uint8_t v) const {

+ if (pattern.length() == 0) {

+ return TRUE;

+ }

+ UChar32 c = pattern.char32At(0);

+ const UnicodeMatcher *m = data->lookupMatcher(c);

+ return (m == 0) ? ((c & 0xFF) == v) : m->matchesIndexValue(v);

+/**

+ * Implement UnicodeMatcher

+ */

+void StringMatcher::addMatchSetTo(UnicodeSet& toUnionTo) const {

+ UChar32 ch;

+ for (int32_t i=0; i<pattern.length(); i+=UTF_CHAR_LENGTH(ch)) {

+ ch = pattern.char32At(i);

+ const UnicodeMatcher* matcher = data->lookupMatcher(ch);

+ if (matcher == NULL) {

+ toUnionTo.add(ch);

+ } else {

+ matcher->addMatchSetTo(toUnionTo);

+ }

+/**

+ * UnicodeReplacer API

+ */

+int32_t StringMatcher::replace(Replaceable& text,

+ int32_t start,

+ int32_t limit,

+ int32_t& /*cursor*/) {

+ int32_t outLen = 0;

+ // Copy segment with out-of-band data

+ int32_t dest = limit;

+ // If there was no match, that means that a quantifier

+ // matched zero-length. E.g., x (a)* y matched "xy".

+ if (matchStart >= 0) {

+ if (matchStart != matchLimit) {

+ text.copy(matchStart, matchLimit, dest);

+ outLen = matchLimit - matchStart;

+ }

+ text.handleReplaceBetween(start, limit, EMPTY); // delete original text

+ return outLen;

+/**

+ * UnicodeReplacer API

+ */

+UnicodeString& StringMatcher::toReplacerPattern(UnicodeString& rule,

+ UBool /*escapeUnprintable*/) const {

+ // assert(segmentNumber > 0);

+ rule.truncate(0);

+ rule.append((UChar)0x0024 /*$*/);

+ ICU_Utility::appendNumber(rule, segmentNumber, 10, 1);

+ return rule;

+/**

+ * Remove any match info. This must be called before performing a

+ * set of matches with this segment.

+ */

+ void StringMatcher::resetMatch() {

+ matchStart = matchLimit = -1;

+/**

+ * Union the set of all characters that may output by this object

+ * into the given set.

+ * @param toUnionTo the set into which to union the output characters

+ */

+void StringMatcher::addReplacementSetTo(UnicodeSet& /*toUnionTo*/) const {

+ // The output of this replacer varies; it is the source text between

+ // matchStart and matchLimit. Since this varies depending on the

+ // input text, we can't compute it here. We can either do nothing

+ // or we can add ALL characters to the set. It's probably more useful

+ // to do nothing.

+/**

+ * Implement UnicodeFunctor

+ */

+void StringMatcher::setData(const TransliterationRuleData* d) {

+ data = d;

+ int32_t i = 0;

+ while (i<pattern.length()) {

+ UChar32 c = pattern.char32At(i);

+ UnicodeFunctor* f = data->lookup(c);

+ if (f != NULL) {

+ f->setData(data);

+ }

+ i += UTF_CHAR_LENGTH(c);

+ }

+U_NAMESPACE_END

+#endif /* #if !UCONFIG_NO_TRANSLITERATION */

+//eof

Property changes on: icu46/source/i18n/strmatch.cpp

___________________________________________________________________

Added: svn:eol-style

+ LF

« no previous file with comments | « icu46/source/i18n/strmatch.h ('k') | icu46/source/i18n/strrepl.h » ('j') | no next file with comments »