icu46/source/common/normlzr.cpp - Issue 5516007: Check in the pristine copy of ICU 4.6...

Unified Diff: icu46/source/common/normlzr.cpp

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: icu46/source/common/normlzr.cpp

===================================================================

--- icu46/source/common/normlzr.cpp (revision 0)

+++ icu46/source/common/normlzr.cpp (revision 0)

@@ -0,0 +1,522 @@

+/*

+ *************************************************************************

+ * COPYRIGHT:

+ *************************************************************************

+ */

+#include "unicode/utypes.h"

+#if !UCONFIG_NO_NORMALIZATION

+#include "unicode/uniset.h"

+#include "unicode/unistr.h"

+#include "unicode/chariter.h"

+#include "unicode/schriter.h"

+#include "unicode/uchriter.h"

+#include "unicode/normlzr.h"

+#include "cmemory.h"

+#include "normalizer2impl.h"

+#include "uprops.h" // for uniset_getUnicode32Instance()

+U_NAMESPACE_BEGIN

+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Normalizer)

+//-------------------------------------------------------------------------

+// Constructors and other boilerplate

+//-------------------------------------------------------------------------

+Normalizer::Normalizer(const UnicodeString& str, UNormalizationMode mode) :

+ UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0),

+ text(new StringCharacterIterator(str)),

+ currentIndex(0), nextIndex(0),

+ buffer(), bufferPos(0)

+ init();

+Normalizer::Normalizer(const UChar *str, int32_t length, UNormalizationMode mode) :

+ UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0),

+ text(new UCharCharacterIterator(str, length)),

+ currentIndex(0), nextIndex(0),

+ buffer(), bufferPos(0)

+ init();

+Normalizer::Normalizer(const CharacterIterator& iter, UNormalizationMode mode) :

+ UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0),

+ text(iter.clone()),

+ currentIndex(0), nextIndex(0),

+ buffer(), bufferPos(0)

+ init();

+Normalizer::Normalizer(const Normalizer &copy) :

+ UObject(copy), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(copy.fUMode), fOptions(copy.fOptions),

+ text(copy.text->clone()),

+ currentIndex(copy.currentIndex), nextIndex(copy.nextIndex),

+ buffer(copy.buffer), bufferPos(copy.bufferPos)

+ init();

+static const UChar _NUL=0;

+void

+Normalizer::init() {

+ UErrorCode errorCode=U_ZERO_ERROR;

+ fNorm2=Normalizer2Factory::getInstance(fUMode, errorCode);

+ if(fOptions&UNORM_UNICODE_3_2) {

+ delete fFilteredNorm2;

+ fNorm2=fFilteredNorm2=

+ new FilteredNormalizer2(*fNorm2, *uniset_getUnicode32Instance(errorCode));

+ }

+ if(U_FAILURE(errorCode)) {

+ errorCode=U_ZERO_ERROR;

+ fNorm2=Normalizer2Factory::getNoopInstance(errorCode);

+ }

+Normalizer::~Normalizer()

+ delete fFilteredNorm2;

+ delete text;

+Normalizer*

+Normalizer::clone() const

+ return new Normalizer(*this);

+/**

+ * Generates a hash code for this iterator.

+ */

+int32_t Normalizer::hashCode() const

+ return text->hashCode() + fUMode + fOptions + buffer.hashCode() + bufferPos + currentIndex + nextIndex;

+UBool Normalizer::operator==(const Normalizer& that) const

+ return

+ this==&that ||

+ (fUMode==that.fUMode &&

+ fOptions==that.fOptions &&

+ *text==*that.text &&

+ buffer==that.buffer &&

+ bufferPos==that.bufferPos &&

+ nextIndex==that.nextIndex);

+//-------------------------------------------------------------------------

+// Static utility methods

+//-------------------------------------------------------------------------

+void U_EXPORT2

+Normalizer::normalize(const UnicodeString& source,

+ UNormalizationMode mode, int32_t options,

+ UnicodeString& result,

+ UErrorCode &status) {

+ if(source.isBogus() || U_FAILURE(status)) {

+ result.setToBogus();

+ if(U_SUCCESS(status)) {

+ status=U_ILLEGAL_ARGUMENT_ERROR;

+ }

+ } else {

+ UnicodeString localDest;

+ UnicodeString *dest;

+ if(&source!=&result) {

+ dest=&result;

+ } else {

+ // the source and result strings are the same object, use a temporary one

+ dest=&localDest;

+ }

+ const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status);

+ if(U_SUCCESS(status)) {

+ if(options&UNORM_UNICODE_3_2) {

+ FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)).

+ normalize(source, *dest, status);

+ } else {

+ n2->normalize(source, *dest, status);

+ }

+ if(dest==&localDest && U_SUCCESS(status)) {

+ result=*dest;

+ }

+void U_EXPORT2

+Normalizer::compose(const UnicodeString& source,

+ UBool compat, int32_t options,

+ UnicodeString& result,

+ UErrorCode &status) {

+ normalize(source, compat ? UNORM_NFKC : UNORM_NFC, options, result, status);

+void U_EXPORT2

+Normalizer::decompose(const UnicodeString& source,

+ UBool compat, int32_t options,

+ UnicodeString& result,

+ UErrorCode &status) {

+ normalize(source, compat ? UNORM_NFKD : UNORM_NFD, options, result, status);

+UNormalizationCheckResult

+Normalizer::quickCheck(const UnicodeString& source,

+ UNormalizationMode mode, int32_t options,

+ UErrorCode &status) {

+ const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status);

+ if(U_SUCCESS(status)) {

+ if(options&UNORM_UNICODE_3_2) {

+ return FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)).

+ quickCheck(source, status);

+ } else {

+ return n2->quickCheck(source, status);

+ }

+ } else {

+ return UNORM_MAYBE;

+ }

+UBool

+Normalizer::isNormalized(const UnicodeString& source,

+ UNormalizationMode mode, int32_t options,

+ UErrorCode &status) {

+ const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status);

+ if(U_SUCCESS(status)) {

+ if(options&UNORM_UNICODE_3_2) {

+ return FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)).

+ isNormalized(source, status);

+ } else {

+ return n2->isNormalized(source, status);

+ }

+ } else {

+ return FALSE;

+ }

+UnicodeString & U_EXPORT2

+Normalizer::concatenate(UnicodeString &left, UnicodeString &right,

+ UnicodeString &result,

+ UNormalizationMode mode, int32_t options,

+ UErrorCode &errorCode) {

+ if(left.isBogus() || right.isBogus() || U_FAILURE(errorCode)) {

+ result.setToBogus();

+ if(U_SUCCESS(errorCode)) {

+ errorCode=U_ILLEGAL_ARGUMENT_ERROR;

+ }

+ } else {

+ UnicodeString localDest;

+ UnicodeString *dest;

+ if(&right!=&result) {

+ dest=&result;

+ } else {

+ // the right and result strings are the same object, use a temporary one

+ dest=&localDest;

+ }

+ *dest=left;

+ const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, errorCode);

+ if(U_SUCCESS(errorCode)) {

+ if(options&UNORM_UNICODE_3_2) {

+ FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(errorCode)).

+ append(*dest, right, errorCode);

+ } else {

+ n2->append(*dest, right, errorCode);

+ }

+ if(dest==&localDest && U_SUCCESS(errorCode)) {

+ result=*dest;

+ }

+ return result;

+//-------------------------------------------------------------------------

+// Iteration API

+//-------------------------------------------------------------------------

+/**

+ * Return the current character in the normalized text.

+ */

+UChar32 Normalizer::current() {

+ if(bufferPos<buffer.length() || nextNormalize()) {

+ return buffer.char32At(bufferPos);

+ } else {

+ return DONE;

+ }

+/**

+ * Return the next character in the normalized text and advance

+ * the iteration position by one. If the end

+ * of the text has already been reached, {@link #DONE} is returned.

+ */

+UChar32 Normalizer::next() {

+ if(bufferPos<buffer.length() || nextNormalize()) {

+ UChar32 c=buffer.char32At(bufferPos);

+ bufferPos+=UTF_CHAR_LENGTH(c);

+ return c;

+ } else {

+ return DONE;

+ }

+/**

+ * Return the previous character in the normalized text and decrement

+ * the iteration position by one. If the beginning

+ * of the text has already been reached, {@link #DONE} is returned.

+ */

+UChar32 Normalizer::previous() {

+ if(bufferPos>0 || previousNormalize()) {

+ UChar32 c=buffer.char32At(bufferPos-1);

+ bufferPos-=UTF_CHAR_LENGTH(c);

+ return c;

+ } else {

+ return DONE;

+ }

+void Normalizer::reset() {

+ currentIndex=nextIndex=text->setToStart();

+ clearBuffer();

+void

+Normalizer::setIndexOnly(int32_t index) {

+ text->setIndex(index); // pins index

+ currentIndex=nextIndex=text->getIndex();

+ clearBuffer();

+/**

+ * Return the first character in the normalized text. This resets

+ * the <tt>Normalizer's</tt> position to the beginning of the text.

+ */

+UChar32 Normalizer::first() {

+ reset();

+ return next();

+/**

+ * Return the last character in the normalized text. This resets

+ * the <tt>Normalizer's</tt> position to be just before the

+ * the input text corresponding to that normalized character.

+ */

+UChar32 Normalizer::last() {

+ currentIndex=nextIndex=text->setToEnd();

+ clearBuffer();

+ return previous();

+/**

+ * Retrieve the current iteration position in the input text that is

+ * being normalized. This method is useful in applications such as

+ * searching, where you need to be able to determine the position in

+ * the input text that corresponds to a given normalized output character.

+ *

+ * Note: This method sets the position in the input, while

+ * {@link #next} and {@link #previous} iterate through characters in the

+ * output. This means that there is not necessarily a one-to-one

+ * correspondence between characters returned by <tt>next</tt> and

+ * <tt>previous</tt> and the indices passed to and returned from

+ * <tt>setIndex</tt> and {@link #getIndex}.

+ *

+ */

+int32_t Normalizer::getIndex() const {

+ if(bufferPos<buffer.length()) {

+ return currentIndex;

+ } else {

+ return nextIndex;

+ }

+/**

+ * Retrieve the index of the start of the input text. This is the begin index

+ * of the <tt>CharacterIterator</tt> or the start (i.e. 0) of the <tt>String</tt>

+ * over which this <tt>Normalizer</tt> is iterating

+ */

+int32_t Normalizer::startIndex() const {

+ return text->startIndex();

+/**

+ * Retrieve the index of the end of the input text. This is the end index

+ * of the <tt>CharacterIterator</tt> or the length of the <tt>String</tt>

+ * over which this <tt>Normalizer</tt> is iterating

+ */

+int32_t Normalizer::endIndex() const {

+ return text->endIndex();

+//-------------------------------------------------------------------------

+// Property access methods

+//-------------------------------------------------------------------------

+void

+Normalizer::setMode(UNormalizationMode newMode)

+ fUMode = newMode;

+ init();

+UNormalizationMode

+Normalizer::getUMode() const

+ return fUMode;

+void

+Normalizer::setOption(int32_t option,

+ UBool value)

+ if (value) {

+ fOptions |= option;

+ } else {

+ fOptions &= (~option);

+ }

+ init();

+UBool

+Normalizer::getOption(int32_t option) const

+ return (fOptions & option) != 0;

+/**

+ * Set the input text over which this <tt>Normalizer</tt> will iterate.

+ * The iteration position is set to the beginning of the input text.

+ */

+void

+Normalizer::setText(const UnicodeString& newText,

+ UErrorCode &status)

+ if (U_FAILURE(status)) {

+ return;

+ }

+ CharacterIterator *newIter = new StringCharacterIterator(newText);

+ if (newIter == NULL) {

+ status = U_MEMORY_ALLOCATION_ERROR;

+ return;

+ }

+ delete text;

+ text = newIter;

+ reset();

+/**

+ * Set the input text over which this <tt>Normalizer</tt> will iterate.

+ * The iteration position is set to the beginning of the string.

+ */

+void

+Normalizer::setText(const CharacterIterator& newText,

+ UErrorCode &status)

+ if (U_FAILURE(status)) {

+ return;

+ }

+ CharacterIterator *newIter = newText.clone();

+ if (newIter == NULL) {

+ status = U_MEMORY_ALLOCATION_ERROR;

+ return;

+ }

+ delete text;

+ text = newIter;

+ reset();

+void

+Normalizer::setText(const UChar* newText,

+ int32_t length,

+ UErrorCode &status)

+ if (U_FAILURE(status)) {

+ return;

+ }

+ CharacterIterator *newIter = new UCharCharacterIterator(newText, length);

+ if (newIter == NULL) {

+ status = U_MEMORY_ALLOCATION_ERROR;

+ return;

+ }

+ delete text;

+ text = newIter;

+ reset();

+/**

+ * Copies the text under iteration into the UnicodeString referred to by "result".

+ * @param result Receives a copy of the text under iteration.

+ */

+void

+Normalizer::getText(UnicodeString& result)

+ text->getText(result);

+//-------------------------------------------------------------------------

+// Private utility methods

+//-------------------------------------------------------------------------

+void Normalizer::clearBuffer() {

+ buffer.remove();

+ bufferPos=0;

+UBool

+Normalizer::nextNormalize() {

+ clearBuffer();

+ currentIndex=nextIndex;

+ text->setIndex(nextIndex);

+ if(!text->hasNext()) {

+ return FALSE;

+ }

+ // Skip at least one character so we make progress.

+ UnicodeString segment(text->next32PostInc());

+ while(text->hasNext()) {

+ UChar32 c;

+ if(fNorm2->hasBoundaryBefore(c=text->next32PostInc())) {

+ text->move32(-1, CharacterIterator::kCurrent);

+ break;

+ }

+ segment.append(c);

+ }

+ nextIndex=text->getIndex();

+ UErrorCode errorCode=U_ZERO_ERROR;

+ fNorm2->normalize(segment, buffer, errorCode);

+ return U_SUCCESS(errorCode) && !buffer.isEmpty();

+UBool

+Normalizer::previousNormalize() {

+ clearBuffer();

+ nextIndex=currentIndex;

+ text->setIndex(currentIndex);

+ if(!text->hasPrevious()) {

+ return FALSE;

+ }

+ UnicodeString segment;

+ while(text->hasPrevious()) {

+ UChar32 c=text->previous32();

+ segment.insert(0, c);

+ if(fNorm2->hasBoundaryBefore(c)) {

+ break;

+ }

+ currentIndex=text->getIndex();

+ UErrorCode errorCode=U_ZERO_ERROR;

+ fNorm2->normalize(segment, buffer, errorCode);

+ bufferPos=buffer.length();

+ return U_SUCCESS(errorCode) && !buffer.isEmpty();

+U_NAMESPACE_END

+#endif /* #if !UCONFIG_NO_NORMALIZATION */

Property changes on: icu46/source/common/normlzr.cpp

___________________________________________________________________

Added: svn:eol-style

+ LF

« no previous file with comments | « icu46/source/common/normalizer2impl.cpp ('k') | icu46/source/common/parsepos.cpp » ('j') | no next file with comments »