Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(5)

Unified Diff: source/i18n/collationdatareader.h

Issue 845603002: Update ICU to 54.1 step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@master
Patch Set: remove unusued directories Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « source/i18n/collationdatabuilder.cpp ('k') | source/i18n/collationdatareader.cpp » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: source/i18n/collationdatareader.h
diff --git a/source/i18n/collationdatareader.h b/source/i18n/collationdatareader.h
new file mode 100644
index 0000000000000000000000000000000000000000..df65b862ab4049459287d9d56016166275995c50
--- /dev/null
+++ b/source/i18n/collationdatareader.h
@@ -0,0 +1,208 @@
+/*
+*******************************************************************************
+* Copyright (C) 2013-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* collationdatareader.h
+*
+* created on: 2013feb07
+* created by: Markus W. Scherer
+*/
+
+#ifndef __COLLATIONDATAREADER_H__
+#define __COLLATIONDATAREADER_H__
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_COLLATION
+
+#include "unicode/udata.h"
+
+struct UDataMemory;
+
+U_NAMESPACE_BEGIN
+
+struct CollationTailoring;
+
+/**
+ * Collation binary data reader.
+ */
+struct U_I18N_API CollationDataReader /* all static */ {
+ // The following constants are also copied into source/common/ucol_swp.cpp.
+ // Keep them in sync!
+ enum {
+ /**
+ * Number of int32_t indexes.
+ *
+ * Can be 2 if there are only options.
+ * Can be 7 or 8 if there are only options and a script reordering.
+ * The loader treats any index>=indexes[IX_INDEXES_LENGTH] as 0.
+ */
+ IX_INDEXES_LENGTH, // 0
+ /**
+ * Bits 31..24: numericPrimary, for numeric collation
+ * 23..16: fast Latin format version (0 = no fast Latin table)
+ * 15.. 0: options bit set
+ */
+ IX_OPTIONS,
+ IX_RESERVED2,
+ IX_RESERVED3,
+
+ /** Array offset to Jamo CE32s in ce32s[], or <0 if none. */
+ IX_JAMO_CE32S_START, // 4
+
+ // Byte offsets from the start of the data, after the generic header.
+ // The indexes[] are at byte offset 0, other data follows.
+ // Each data item is aligned properly.
+ // The data items should be in descending order of unit size,
+ // to minimize the need for padding.
+ // Each item's byte length is given by the difference between its offset and
+ // the next index/offset value.
+ /** Byte offset to int32_t reorderCodes[]. */
+ IX_REORDER_CODES_OFFSET,
+ /**
+ * Byte offset to uint8_t reorderTable[].
+ * Empty table if <256 bytes (padding only).
+ * Otherwise 256 bytes or more (with padding).
+ */
+ IX_REORDER_TABLE_OFFSET,
+ /** Byte offset to the collation trie. Its length is a multiple of 8 bytes. */
+ IX_TRIE_OFFSET,
+
+ IX_RESERVED8_OFFSET, // 8
+ /** Byte offset to int64_t ces[]. */
+ IX_CES_OFFSET,
+ IX_RESERVED10_OFFSET,
+ /** Byte offset to uint32_t ce32s[]. */
+ IX_CE32S_OFFSET,
+
+ /** Byte offset to uint32_t rootElements[]. */
+ IX_ROOT_ELEMENTS_OFFSET, // 12
+ /** Byte offset to UChar *contexts[]. */
+ IX_CONTEXTS_OFFSET,
+ /** Byte offset to uint16_t [] with serialized unsafeBackwardSet. */
+ IX_UNSAFE_BWD_OFFSET,
+ /** Byte offset to uint16_t fastLatinTable[]. */
+ IX_FAST_LATIN_TABLE_OFFSET,
+
+ /** Byte offset to uint16_t scripts[]. */
+ IX_SCRIPTS_OFFSET, // 16
+ /**
+ * Byte offset to UBool compressibleBytes[].
+ * Empty table if <256 bytes (padding only).
+ * Otherwise 256 bytes or more (with padding).
+ */
+ IX_COMPRESSIBLE_BYTES_OFFSET,
+ IX_RESERVED18_OFFSET,
+ IX_TOTAL_SIZE
+ };
+
+ static void read(const CollationTailoring *base, const uint8_t *inBytes, int32_t inLength,
+ CollationTailoring &tailoring, UErrorCode &errorCode);
+
+ static UBool U_CALLCONV
+ isAcceptable(void *context, const char *type, const char *name, const UDataInfo *pInfo);
+
+private:
+ CollationDataReader(); // no constructor
+};
+
+/*
+ * Format of collation data (ucadata.icu, binary data in coll/ *.res files).
+ * Format version 4.0.
+ *
+ * The root collation data is stored in the ucadata.icu file.
+ * Tailorings are stored inside .res resource bundle files, with a complete file header.
+ *
+ * Collation data begins with a standard ICU data file header
+ * (DataHeader, see ucmndata.h and unicode/udata.h).
+ * The UDataInfo.dataVersion field contains the UCA and other version numbers,
+ * see the comments for CollationTailoring.version.
+ *
+ * After the header, the file contains the following parts.
+ * Constants are defined as enum values of the CollationDataReader class.
+ * See also the Collation class.
+ *
+ * int32_t indexes[indexesLength];
+ * The indexes array has variable length.
+ * Some tailorings only need the length and the options,
+ * others only add reorderCodes and the reorderTable,
+ * some need to store mappings.
+ * Only as many indexes are stored as needed to read all of the data.
+ *
+ * Index 0: indexesLength
+ * Index 1: numericPrimary, CollationFastLatin::VERSION, and options: see IX_OPTIONS
+ * Index 2..3: Unused/reserved/0.
+ * Index 4: Index into the ce32s array where the CE32s of the conjoining Jamo
+ * are stored in a short, contiguous part of the ce32s array.
+ *
+ * Indexes 5..19 are byte offsets in ascending order.
+ * Each byte offset marks the start of the next part in the data file,
+ * and the end of the previous one.
+ * When two consecutive byte offsets are the same (or too short),
+ * then the corresponding part is empty.
+ * Byte offsets are offsets from after the header,
+ * that is, from the beginning of the indexes[].
+ * Each part starts at an offset with proper alignment for its data.
+ * If necessary, the previous part may include padding bytes to achieve this alignment.
+ * The last byte offset that is stored in the indexes indicates the total size of the data
+ * (starting with the indexes).
+ *
+ * int32_t reorderCodes[]; -- empty in root
+ * The list of script and reordering codes.
+ *
+ * uint8_t reorderTable[256]; -- empty in root; can be longer to include padding bytes
+ * Primary-weight lead byte permutation table.
+ * Normally present when the reorderCodes are, but can be built at load time.
+ *
+ * UTrie2 trie; -- see utrie2_impl.h and utrie2.h
+ * The trie holds the main collation data. Each code point is mapped to a 32-bit value.
+ * It encodes a simple collation element (CE) in compact form, unless bits 7..6 are both set,
+ * in which case it is a special CE32 and contains a 4-bit tag and further data.
+ * See the Collation class for details.
+ *
+ * The trie has a value for each lead surrogate code unit with some bits encoding
+ * collective properties of the 1024 supplementary characters whose UTF-16 form starts with
+ * the lead surrogate. See Collation::LEAD_SURROGATE_TAG..
+ *
+ * int64_t ces[];
+ * 64-bit CEs and expansions that cannot be stored in a more compact form.
+ *
+ * uint32_t ce32s[];
+ * CE32s for expansions in compact form, and for characters whose trie values
+ * contain special data.
+ *
+ * uint32_t rootElements[]; -- empty in all tailorings
+ * Compact storage for all of the CEs that occur in the root collation.
+ * See the CollationRootElements class.
+ *
+ * UChar *contexts[];
+ * Serialized UCharsTrie structures with prefix (pre-context) and contraction mappings.
+ *
+ * uint16_t unsafeBackwardSet[]; -- see UnicodeSet::serialize()
+ * Serialized form of characters that are unsafe when iterating backwards,
+ * and at the end of an identical string prefix.
+ * Back up to a safe character.
+ * Lead surrogates are "unsafe" when any of their corresponding supplementary
+ * code points are unsafe.
+ * Does not include [:^lccc=0:][:^tccc=0:].
+ * For each tailoring, the root unsafeBackwardSet is subtracted.
+ * (As a result, in many tailorings no set needs to be stored.)
+ *
+ * uint16_t fastLatinTable[];
+ * Optional optimization for Latin text.
+ * See the CollationFastLatin class.
+ *
+ * uint16_t scripts[]; -- empty in all tailorings
+ * Table of the reordering groups with their first and last lead bytes,
+ * and their script and reordering codes.
+ * See CollationData::scripts.
+ *
+ * UBool compressibleBytes[]; -- empty in all tailorings
+ * Flag for getSortKey(), indicating primary weight lead bytes that are compressible.
+ */
+
+U_NAMESPACE_END
+
+#endif // !UCONFIG_NO_COLLATION
+#endif // __COLLATIONDATAREADER_H__
« no previous file with comments | « source/i18n/collationdatabuilder.cpp ('k') | source/i18n/collationdatareader.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698