source/i18n/utf8collationiterator.h - Issue 845603002: Update ICU to 54.1 step 1

Unified Diff: source/i18n/utf8collationiterator.h

Issue 845603002: Update ICU to 54.1 step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@master

Patch Set: remove unusued directories Created 5 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: source/i18n/utf8collationiterator.h

diff --git a/source/i18n/utf8collationiterator.h b/source/i18n/utf8collationiterator.h

new file mode 100644

index 0000000000000000000000000000000000000000..3ec348d9f2ed98f555e3c96e8449a08bbb65c58c

--- /dev/null

+++ b/source/i18n/utf8collationiterator.h

@@ -0,0 +1,171 @@

+/*

+*******************************************************************************

+* utf8collationiterator.h

+* created on: 2012nov12 (from utf16collationiterator.h & uitercollationiterator.h)

+* created by: Markus W. Scherer

+*/

+#ifndef __UTF8COLLATIONITERATOR_H__

+#define __UTF8COLLATIONITERATOR_H__

+#include "unicode/utypes.h"

+#if !UCONFIG_NO_COLLATION

+#include "cmemory.h"

+#include "collation.h"

+#include "collationdata.h"

+#include "normalizer2impl.h"

+U_NAMESPACE_BEGIN

+/**

+ * UTF-8 collation element and character iterator.

+ * Handles normalized UTF-8 text inline, with length or NUL-terminated.

+ * Unnormalized text is handled by a subclass.

+ */

+class U_I18N_API UTF8CollationIterator : public CollationIterator {

+public:

+ UTF8CollationIterator(const CollationData *d, UBool numeric,

+ const uint8_t *s, int32_t p, int32_t len)

+ : CollationIterator(d, numeric),

+ u8(s), pos(p), length(len) {}

+ virtual ~UTF8CollationIterator();

+ virtual void resetToOffset(int32_t newOffset);

+ virtual int32_t getOffset() const;

+ virtual UChar32 nextCodePoint(UErrorCode &errorCode);

+ virtual UChar32 previousCodePoint(UErrorCode &errorCode);

+protected:

+ /**

+ * For byte sequences that are illegal in UTF-8, an error value may be returned

+ * together with a bogus code point. The caller will ignore that code point.

+ *

+ * Special values may be returned for surrogate code points, which are also illegal in UTF-8,

+ * but the caller will treat them like U+FFFD because forbidSurrogateCodePoints() returns TRUE.

+ *

+ * Valid lead surrogates are returned from inside a normalized text segment,

+ * where handleGetTrailSurrogate() will return the matching trail surrogate.

+ */

+ virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode);

+ virtual UBool foundNULTerminator();

+ virtual UBool forbidSurrogateCodePoints() const;

+ virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode);

+ virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode);

+ const uint8_t *u8;

+ int32_t pos;

+ int32_t length; // <0 for NUL-terminated strings

+};

+/**

+ * Incrementally checks the input text for FCD and normalizes where necessary.

+ */

+class U_I18N_API FCDUTF8CollationIterator : public UTF8CollationIterator {

+public:

+ FCDUTF8CollationIterator(const CollationData *data, UBool numeric,

+ const uint8_t *s, int32_t p, int32_t len)

+ : UTF8CollationIterator(data, numeric, s, p, len),

+ state(CHECK_FWD), start(p),

+ nfcImpl(data->nfcImpl) {}

+ virtual ~FCDUTF8CollationIterator();

+ virtual void resetToOffset(int32_t newOffset);

+ virtual int32_t getOffset() const;

+ virtual UChar32 nextCodePoint(UErrorCode &errorCode);

+ virtual UChar32 previousCodePoint(UErrorCode &errorCode);

+protected:

+ virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode);

+ virtual UChar handleGetTrailSurrogate();

+ virtual UBool foundNULTerminator();

+ virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode);

+ virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode);

+private:

+ UBool nextHasLccc() const;

+ UBool previousHasTccc() const;

+ /**

+ * Switches to forward checking if possible.

+ */

+ void switchToForward();

+ /**

+ * Extends the FCD text segment forward or normalizes around pos.

+ * @return TRUE if success

+ */

+ UBool nextSegment(UErrorCode &errorCode);

+ /**

+ * Switches to backward checking.

+ */

+ void switchToBackward();

+ /**

+ * Extends the FCD text segment backward or normalizes around pos.

+ * @return TRUE if success

+ */

+ UBool previousSegment(UErrorCode &errorCode);

+ UBool normalize(const UnicodeString &s, UErrorCode &errorCode);

+ enum State {

+ /**

+ * The input text [start..pos[ passes the FCD check.

+ * Moving forward checks incrementally.

+ * limit is undefined.

+ */

+ CHECK_FWD,

+ /**

+ * The input text [pos..limit[ passes the FCD check.

+ * Moving backward checks incrementally.

+ * start is undefined.

+ */

+ CHECK_BWD,

+ /**

+ * The input text [start..limit[ passes the FCD check.

+ * pos tracks the current text index.

+ */

+ IN_FCD_SEGMENT,

+ /**

+ * The input text [start..limit[ failed the FCD check and was normalized.

+ * pos tracks the current index in the normalized string.

+ */

+ IN_NORMALIZED

+ };

+ State state;

+ int32_t start;

+ int32_t limit;

+ const Normalizer2Impl &nfcImpl;

+ UnicodeString normalized;

+};

+U_NAMESPACE_END

+#endif // !UCONFIG_NO_COLLATION

+#endif // __UTF8COLLATIONITERATOR_H__

« no previous file with comments | « source/i18n/utf16collationiterator.cpp ('k') | source/i18n/utf8collationiterator.cpp » ('j') | no next file with comments »