public/common/unicode/uchar.h - Issue 18836004: Move ICU headers from public/{common,i18n} to source/{common,i18n}

Unified Diff: public/common/unicode/uchar.h

Issue 18836004: Move ICU headers from public/{common,i18n} to source/{common,i18n} (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu46.git@master

Patch Set: same as ps #3. retry uploading Created 7 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: public/common/unicode/uchar.h

diff --git a/public/common/unicode/uchar.h b/public/common/unicode/uchar.h

deleted file mode 100644

index 93aa663206b2b4c4820b82cfc66c84a2d00ae9b2..0000000000000000000000000000000000000000

--- a/public/common/unicode/uchar.h

+++ /dev/null

@@ -1,3168 +0,0 @@

-/*

-**********************************************************************

-* File UCHAR.H

-* Modification History:

-* Date Name Description

-* 04/02/97 aliu Creation.

-* 03/29/99 helena Updated for C APIs.

-* 4/15/99 Madhu Updated for C Implementation and Javadoc

-* 5/20/99 Madhu Added the function u_getVersion()

-* 8/19/1999 srl Upgraded scripts to Unicode 3.0

-* 8/27/1999 schererm UCharDirection constants: U_...

-* 11/11/1999 weiv added u_isalnum(), cleaned comments

-* 01/11/2000 helena Renamed u_getVersion to u_getUnicodeVersion().

-******************************************************************************

-*/

-#ifndef UCHAR_H

-#define UCHAR_H

-#include "unicode/utypes.h"

-U_CDECL_BEGIN

-/*==========================================================================*/

-/* Unicode version number */

-/*==========================================================================*/

-/**

- * Unicode version number, default for the current ICU version.

- * The actual Unicode Character Database (UCD) data is stored in uprops.dat

- * and may be generated from UCD files from a different Unicode version.

- * Call u_getUnicodeVersion to get the actual Unicode version of the data.

- *

- * @see u_getUnicodeVersion

- * @stable ICU 2.0

- */

-#define U_UNICODE_VERSION "6.0"

-/**

- * \file

- * \brief C API: Unicode Properties

- *

- * This C API provides low-level access to the Unicode Character Database.

- * In addition to raw property values, some convenience functions calculate

- * derived properties, for example for Java-style programming.

- *

- * Unicode assigns each code point (not just assigned character) values for

- * many properties.

- * Most of them are simple boolean flags, or constants from a small enumerated list.

- * For some properties, values are strings or other relatively more complex types.

- *

- * For more information see

- * "About the Unicode Character Database" (http://www.unicode.org/ucd/)

- * and the ICU User Guide chapter on Properties (http://icu-project.org/userguide/properties.html).

- *

- * Many functions are designed to match java.lang.Character functions.

- * See the individual function documentation,

- * and see the JDK 1.4 java.lang.Character documentation

- * at http://java.sun.com/j2se/1.4/docs/api/java/lang/Character.html

- *

- * There are also functions that provide easy migration from C/POSIX functions

- * like isblank(). Their use is generally discouraged because the C/POSIX

- * standards do not define their semantics beyond the ASCII range, which means

- * that different implementations exhibit very different behavior.

- * Instead, Unicode properties should be used directly.

- *

- * There are also only a few, broad C/POSIX character classes, and they tend

- * to be used for conflicting purposes. For example, the "isalpha()" class

- * is sometimes used to determine word boundaries, while a more sophisticated

- * approach would at least distinguish initial letters from continuation

- * characters (the latter including combining marks).

- * (In ICU, BreakIterator is the most sophisticated API for word boundaries.)

- * Another example: There is no "istitle()" class for titlecase characters.

- *

- * ICU 3.4 and later provides API access for all twelve C/POSIX character classes.

- * ICU implements them according to the Standard Recommendations in

- * Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions

- * (http://www.unicode.org/reports/tr18/#Compatibility_Properties).

- *

- * API access for C/POSIX character classes is as follows:

- * - alpha: u_isUAlphabetic(c) or u_hasBinaryProperty(c, UCHAR_ALPHABETIC)

- * - lower: u_isULowercase(c) or u_hasBinaryProperty(c, UCHAR_LOWERCASE)

- * - upper: u_isUUppercase(c) or u_hasBinaryProperty(c, UCHAR_UPPERCASE)

- * - punct: u_ispunct(c)

- * - digit: u_isdigit(c) or u_charType(c)==U_DECIMAL_DIGIT_NUMBER

- * - xdigit: u_isxdigit(c) or u_hasBinaryProperty(c, UCHAR_POSIX_XDIGIT)

- * - alnum: u_hasBinaryProperty(c, UCHAR_POSIX_ALNUM)

- * - space: u_isUWhiteSpace(c) or u_hasBinaryProperty(c, UCHAR_WHITE_SPACE)

- * - blank: u_isblank(c) or u_hasBinaryProperty(c, UCHAR_POSIX_BLANK)

- * - cntrl: u_charType(c)==U_CONTROL_CHAR

- * - graph: u_hasBinaryProperty(c, UCHAR_POSIX_GRAPH)

- * - print: u_hasBinaryProperty(c, UCHAR_POSIX_PRINT)

- *

- * Note: Some of the u_isxyz() functions in uchar.h predate, and do not match,

- * the Standard Recommendations in UTS #18. Instead, they match Java

- * functions according to their API documentation.

- *

- * \htmlonly

- * The C/POSIX character classes are also available in UnicodeSet patterns,

- * using patterns like [:graph:] or \p{graph}.

- * \endhtmlonly

- *

- * Note: There are several ICU whitespace functions.

- * Comparison:

- * - u_isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property;

- * most of general categories "Z" (separators) + most whitespace ISO controls

- * (including no-break spaces, but excluding IS1..IS4 and ZWSP)

- * - u_isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces

- * - u_isJavaSpaceChar: Java isSpaceChar; just Z (including no-break spaces)

- * - u_isspace: Z + whitespace ISO controls (including no-break spaces)

- * - u_isblank: "horizontal spaces" = TAB + Zs - ZWSP

- */

-/**

- * Constants.

- */

-/** The lowest Unicode code point value. Code points are non-negative. @stable ICU 2.0 */

-#define UCHAR_MIN_VALUE 0

-/**

- * The highest Unicode code point value (scalar value) according to

- * The Unicode Standard. This is a 21-bit value (20.1 bits, rounded up).

- * For a single character, UChar32 is a simple type that can hold any code point value.

- *

- * @see UChar32

- * @stable ICU 2.0

- */

-#define UCHAR_MAX_VALUE 0x10ffff

-/**

- * Get a single-bit bit set (a flag) from a bit number 0..31.

- * @stable ICU 2.1

- */

-#define U_MASK(x) ((uint32_t)1<<(x))

-/*

- * !! Note: Several comments in this file are machine-read by the

- * genpname tool. These comments describe the correspondence between

- * icu enum constants and UCD entities. Do not delete them. Update

- * these comments as needed.

- *

- * Any comment of the form "/ *[name]* /" (spaces added) is such

- * a comment.

- *

- * The U_JG_* and U_GC_*_MASK constants are matched by their symbolic

- * name, which must match PropertyValueAliases.txt.

- */

-/**

- * Selection constants for Unicode properties.

- * These constants are used in functions like u_hasBinaryProperty to select

- * one of the Unicode properties.

- *

- * The properties APIs are intended to reflect Unicode properties as defined

- * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR).

- * For details about the properties see http://www.unicode.org/ucd/ .

- * For names of Unicode properties see the UCD file PropertyAliases.txt.

- *

- * Important: If ICU is built with UCD files from Unicode versions below, e.g., 3.2,

- * then properties marked with "new in Unicode 3.2" are not or not fully available.

- * Check u_getUnicodeVersion to be sure.

- *

- * @see u_hasBinaryProperty

- * @see u_getIntPropertyValue

- * @see u_getUnicodeVersion

- * @stable ICU 2.1

- */

-typedef enum UProperty {

- /* See note !!. Comments of the form "Binary property Dash",

- "Enumerated property Script", "Double property Numeric_Value",

- and "String property Age" are read by genpname. */

- /* Note: Place UCHAR_ALPHABETIC before UCHAR_BINARY_START so that

- debuggers display UCHAR_ALPHABETIC as the symbolic name for 0,

- rather than UCHAR_BINARY_START. Likewise for other *_START

- identifiers. */

- /** Binary property Alphabetic. Same as u_isUAlphabetic, different from u_isalpha.

- Lu+Ll+Lt+Lm+Lo+Nl+Other_Alphabetic @stable ICU 2.1 */

- UCHAR_ALPHABETIC=0,

- /** First constant for binary Unicode properties. @stable ICU 2.1 */

- UCHAR_BINARY_START=UCHAR_ALPHABETIC,

- /** Binary property ASCII_Hex_Digit. 0-9 A-F a-f @stable ICU 2.1 */

- UCHAR_ASCII_HEX_DIGIT=1,

- /** Binary property Bidi_Control.

- Format controls which have specific functions

- in the Bidi Algorithm. @stable ICU 2.1 */

- UCHAR_BIDI_CONTROL=2,

- /** Binary property Bidi_Mirrored.

- Characters that may change display in RTL text.

- Same as u_isMirrored.

- See Bidi Algorithm, UTR 9. @stable ICU 2.1 */

- UCHAR_BIDI_MIRRORED=3,

- /** Binary property Dash. Variations of dashes. @stable ICU 2.1 */

- UCHAR_DASH=4,

- /** Binary property Default_Ignorable_Code_Point (new in Unicode 3.2).

- Ignorable in most processing.

- <2060..206F, FFF0..FFFB, E0000..E0FFF>+Other_Default_Ignorable_Code_Point+(Cf+Cc+Cs-White_Space) @stable ICU 2.1 */

- UCHAR_DEFAULT_IGNORABLE_CODE_POINT=5,

- /** Binary property Deprecated (new in Unicode 3.2).

- The usage of deprecated characters is strongly discouraged. @stable ICU 2.1 */

- UCHAR_DEPRECATED=6,

- /** Binary property Diacritic. Characters that linguistically modify

- the meaning of another character to which they apply. @stable ICU 2.1 */

- UCHAR_DIACRITIC=7,

- /** Binary property Extender.

- Extend the value or shape of a preceding alphabetic character,

- e.g., length and iteration marks. @stable ICU 2.1 */

- UCHAR_EXTENDER=8,

- /** Binary property Full_Composition_Exclusion.

- CompositionExclusions.txt+Singleton Decompositions+

- Non-Starter Decompositions. @stable ICU 2.1 */

- UCHAR_FULL_COMPOSITION_EXCLUSION=9,

- /** Binary property Grapheme_Base (new in Unicode 3.2).

- For programmatic determination of grapheme cluster boundaries.

- [0..10FFFF]-Cc-Cf-Cs-Co-Cn-Zl-Zp-Grapheme_Link-Grapheme_Extend-CGJ @stable ICU 2.1 */

- UCHAR_GRAPHEME_BASE=10,

- /** Binary property Grapheme_Extend (new in Unicode 3.2).

- For programmatic determination of grapheme cluster boundaries.

- Me+Mn+Mc+Other_Grapheme_Extend-Grapheme_Link-CGJ @stable ICU 2.1 */

- UCHAR_GRAPHEME_EXTEND=11,

- /** Binary property Grapheme_Link (new in Unicode 3.2).

- For programmatic determination of grapheme cluster boundaries. @stable ICU 2.1 */

- UCHAR_GRAPHEME_LINK=12,

- /** Binary property Hex_Digit.

- Characters commonly used for hexadecimal numbers. @stable ICU 2.1 */

- UCHAR_HEX_DIGIT=13,

- /** Binary property Hyphen. Dashes used to mark connections

- between pieces of words, plus the Katakana middle dot. @stable ICU 2.1 */

- UCHAR_HYPHEN=14,

- /** Binary property ID_Continue.

- Characters that can continue an identifier.

- DerivedCoreProperties.txt also says "NOTE: Cf characters should be filtered out."

- ID_Start+Mn+Mc+Nd+Pc @stable ICU 2.1 */

- UCHAR_ID_CONTINUE=15,

- /** Binary property ID_Start.

- Characters that can start an identifier.

- Lu+Ll+Lt+Lm+Lo+Nl @stable ICU 2.1 */

- UCHAR_ID_START=16,

- /** Binary property Ideographic.

- CJKV ideographs. @stable ICU 2.1 */

- UCHAR_IDEOGRAPHIC=17,

- /** Binary property IDS_Binary_Operator (new in Unicode 3.2).

- For programmatic determination of

- Ideographic Description Sequences. @stable ICU 2.1 */

- UCHAR_IDS_BINARY_OPERATOR=18,

- /** Binary property IDS_Trinary_Operator (new in Unicode 3.2).

- For programmatic determination of

- Ideographic Description Sequences. @stable ICU 2.1 */

- UCHAR_IDS_TRINARY_OPERATOR=19,

- /** Binary property Join_Control.

- Format controls for cursive joining and ligation. @stable ICU 2.1 */

- UCHAR_JOIN_CONTROL=20,

- /** Binary property Logical_Order_Exception (new in Unicode 3.2).

- Characters that do not use logical order and

- require special handling in most processing. @stable ICU 2.1 */

- UCHAR_LOGICAL_ORDER_EXCEPTION=21,

- /** Binary property Lowercase. Same as u_isULowercase, different from u_islower.

- Ll+Other_Lowercase @stable ICU 2.1 */

- UCHAR_LOWERCASE=22,

- /** Binary property Math. Sm+Other_Math @stable ICU 2.1 */

- UCHAR_MATH=23,

- /** Binary property Noncharacter_Code_Point.

- Code points that are explicitly defined as illegal

- for the encoding of characters. @stable ICU 2.1 */

- UCHAR_NONCHARACTER_CODE_POINT=24,

- /** Binary property Quotation_Mark. @stable ICU 2.1 */

- UCHAR_QUOTATION_MARK=25,

- /** Binary property Radical (new in Unicode 3.2).

- For programmatic determination of

- Ideographic Description Sequences. @stable ICU 2.1 */

- UCHAR_RADICAL=26,

- /** Binary property Soft_Dotted (new in Unicode 3.2).

- Characters with a "soft dot", like i or j.

- An accent placed on these characters causes

- the dot to disappear. @stable ICU 2.1 */

- UCHAR_SOFT_DOTTED=27,

- /** Binary property Terminal_Punctuation.

- Punctuation characters that generally mark

- the end of textual units. @stable ICU 2.1 */

- UCHAR_TERMINAL_PUNCTUATION=28,

- /** Binary property Unified_Ideograph (new in Unicode 3.2).

- For programmatic determination of

- Ideographic Description Sequences. @stable ICU 2.1 */

- UCHAR_UNIFIED_IDEOGRAPH=29,

- /** Binary property Uppercase. Same as u_isUUppercase, different from u_isupper.

- Lu+Other_Uppercase @stable ICU 2.1 */

- UCHAR_UPPERCASE=30,

- /** Binary property White_Space.

- Same as u_isUWhiteSpace, different from u_isspace and u_isWhitespace.

- Space characters+TAB+CR+LF-ZWSP-ZWNBSP @stable ICU 2.1 */

- UCHAR_WHITE_SPACE=31,

- /** Binary property XID_Continue.

- ID_Continue modified to allow closure under

- normalization forms NFKC and NFKD. @stable ICU 2.1 */

- UCHAR_XID_CONTINUE=32,

- /** Binary property XID_Start. ID_Start modified to allow

- closure under normalization forms NFKC and NFKD. @stable ICU 2.1 */

- UCHAR_XID_START=33,

- /** Binary property Case_Sensitive. Either the source of a case

- mapping or _in_ the target of a case mapping. Not the same as

- the general category Cased_Letter. @stable ICU 2.6 */

- UCHAR_CASE_SENSITIVE=34,

- /** Binary property STerm (new in Unicode 4.0.1).

- Sentence Terminal. Used in UAX #29: Text Boundaries

- (http://www.unicode.org/reports/tr29/)

- @stable ICU 3.0 */

- UCHAR_S_TERM=35,

- /** Binary property Variation_Selector (new in Unicode 4.0.1).

- Indicates all those characters that qualify as Variation Selectors.

- For details on the behavior of these characters,

- see StandardizedVariants.html and 15.6 Variation Selectors.

- @stable ICU 3.0 */

- UCHAR_VARIATION_SELECTOR=36,

- /** Binary property NFD_Inert.

- ICU-specific property for characters that are inert under NFD,

- i.e., they do not interact with adjacent characters.

- See the documentation for the Normalizer2 class and the

- Normalizer2::isInert() method.

- @stable ICU 3.0 */

- UCHAR_NFD_INERT=37,

- /** Binary property NFKD_Inert.

- ICU-specific property for characters that are inert under NFKD,

- i.e., they do not interact with adjacent characters.

- See the documentation for the Normalizer2 class and the

- Normalizer2::isInert() method.

- @stable ICU 3.0 */

- UCHAR_NFKD_INERT=38,

- /** Binary property NFC_Inert.

- ICU-specific property for characters that are inert under NFC,

- i.e., they do not interact with adjacent characters.

- See the documentation for the Normalizer2 class and the

- Normalizer2::isInert() method.

- @stable ICU 3.0 */

- UCHAR_NFC_INERT=39,

- /** Binary property NFKC_Inert.

- ICU-specific property for characters that are inert under NFKC,

- i.e., they do not interact with adjacent characters.

- See the documentation for the Normalizer2 class and the

- Normalizer2::isInert() method.

- @stable ICU 3.0 */

- UCHAR_NFKC_INERT=40,

- /** Binary Property Segment_Starter.

- ICU-specific property for characters that are starters in terms of

- Unicode normalization and combining character sequences.

- They have ccc=0 and do not occur in non-initial position of the

- canonical decomposition of any character

- (like a-umlaut in NFD and a Jamo T in an NFD(Hangul LVT)).

- ICU uses this property for segmenting a string for generating a set of

- canonically equivalent strings, e.g. for canonical closure while

- processing collation tailoring rules.

- @stable ICU 3.0 */

- UCHAR_SEGMENT_STARTER=41,

- /** Binary property Pattern_Syntax (new in Unicode 4.1).

- See UAX #31 Identifier and Pattern Syntax

- (http://www.unicode.org/reports/tr31/)

- @stable ICU 3.4 */

- UCHAR_PATTERN_SYNTAX=42,

- /** Binary property Pattern_White_Space (new in Unicode 4.1).

- See UAX #31 Identifier and Pattern Syntax

- (http://www.unicode.org/reports/tr31/)

- @stable ICU 3.4 */

- UCHAR_PATTERN_WHITE_SPACE=43,

- /** Binary property alnum (a C/POSIX character class).

- Implemented according to the UTS #18 Annex C Standard Recommendation.

- See the uchar.h file documentation.

- @stable ICU 3.4 */

- UCHAR_POSIX_ALNUM=44,

- /** Binary property blank (a C/POSIX character class).

- Implemented according to the UTS #18 Annex C Standard Recommendation.

- See the uchar.h file documentation.

- @stable ICU 3.4 */

- UCHAR_POSIX_BLANK=45,

- /** Binary property graph (a C/POSIX character class).

- Implemented according to the UTS #18 Annex C Standard Recommendation.

- See the uchar.h file documentation.

- @stable ICU 3.4 */

- UCHAR_POSIX_GRAPH=46,

- /** Binary property print (a C/POSIX character class).

- Implemented according to the UTS #18 Annex C Standard Recommendation.

- See the uchar.h file documentation.

- @stable ICU 3.4 */

- UCHAR_POSIX_PRINT=47,

- /** Binary property xdigit (a C/POSIX character class).

- Implemented according to the UTS #18 Annex C Standard Recommendation.

- See the uchar.h file documentation.

- @stable ICU 3.4 */

- UCHAR_POSIX_XDIGIT=48,

- /** Binary property Cased. For Lowercase, Uppercase and Titlecase characters. @stable ICU 4.4 */

- UCHAR_CASED=49,

- /** Binary property Case_Ignorable. Used in context-sensitive case mappings. @stable ICU 4.4 */

- UCHAR_CASE_IGNORABLE=50,

- /** Binary property Changes_When_Lowercased. @stable ICU 4.4 */

- UCHAR_CHANGES_WHEN_LOWERCASED=51,

- /** Binary property Changes_When_Uppercased. @stable ICU 4.4 */

- UCHAR_CHANGES_WHEN_UPPERCASED=52,

- /** Binary property Changes_When_Titlecased. @stable ICU 4.4 */

- UCHAR_CHANGES_WHEN_TITLECASED=53,

- /** Binary property Changes_When_Casefolded. @stable ICU 4.4 */

- UCHAR_CHANGES_WHEN_CASEFOLDED=54,

- /** Binary property Changes_When_Casemapped. @stable ICU 4.4 */

- UCHAR_CHANGES_WHEN_CASEMAPPED=55,

- /** Binary property Changes_When_NFKC_Casefolded. @stable ICU 4.4 */

- UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED=56,

- /** One more than the last constant for binary Unicode properties. @stable ICU 2.1 */

- UCHAR_BINARY_LIMIT=57,

- /** Enumerated property Bidi_Class.

- Same as u_charDirection, returns UCharDirection values. @stable ICU 2.2 */

- UCHAR_BIDI_CLASS=0x1000,

- /** First constant for enumerated/integer Unicode properties. @stable ICU 2.2 */

- UCHAR_INT_START=UCHAR_BIDI_CLASS,

- /** Enumerated property Block.

- Same as ublock_getCode, returns UBlockCode values. @stable ICU 2.2 */

- UCHAR_BLOCK=0x1001,

- /** Enumerated property Canonical_Combining_Class.

- Same as u_getCombiningClass, returns 8-bit numeric values. @stable ICU 2.2 */

- UCHAR_CANONICAL_COMBINING_CLASS=0x1002,

- /** Enumerated property Decomposition_Type.

- Returns UDecompositionType values. @stable ICU 2.2 */

- UCHAR_DECOMPOSITION_TYPE=0x1003,

- /** Enumerated property East_Asian_Width.

- See http://www.unicode.org/reports/tr11/

- Returns UEastAsianWidth values. @stable ICU 2.2 */

- UCHAR_EAST_ASIAN_WIDTH=0x1004,

- /** Enumerated property General_Category.

- Same as u_charType, returns UCharCategory values. @stable ICU 2.2 */

- UCHAR_GENERAL_CATEGORY=0x1005,

- /** Enumerated property Joining_Group.

- Returns UJoiningGroup values. @stable ICU 2.2 */

- UCHAR_JOINING_GROUP=0x1006,

- /** Enumerated property Joining_Type.

- Returns UJoiningType values. @stable ICU 2.2 */

- UCHAR_JOINING_TYPE=0x1007,

- /** Enumerated property Line_Break.

- Returns ULineBreak values. @stable ICU 2.2 */

- UCHAR_LINE_BREAK=0x1008,

- /** Enumerated property Numeric_Type.

- Returns UNumericType values. @stable ICU 2.2 */

- UCHAR_NUMERIC_TYPE=0x1009,

- /** Enumerated property Script.

- Same as uscript_getScript, returns UScriptCode values. @stable ICU 2.2 */

- UCHAR_SCRIPT=0x100A,

- /** Enumerated property Hangul_Syllable_Type, new in Unicode 4.

- Returns UHangulSyllableType values. @stable ICU 2.6 */

- UCHAR_HANGUL_SYLLABLE_TYPE=0x100B,

- /** Enumerated property NFD_Quick_Check.

- Returns UNormalizationCheckResult values. @stable ICU 3.0 */

- UCHAR_NFD_QUICK_CHECK=0x100C,

- /** Enumerated property NFKD_Quick_Check.

- Returns UNormalizationCheckResult values. @stable ICU 3.0 */

- UCHAR_NFKD_QUICK_CHECK=0x100D,

- /** Enumerated property NFC_Quick_Check.

- Returns UNormalizationCheckResult values. @stable ICU 3.0 */

- UCHAR_NFC_QUICK_CHECK=0x100E,

- /** Enumerated property NFKC_Quick_Check.

- Returns UNormalizationCheckResult values. @stable ICU 3.0 */

- UCHAR_NFKC_QUICK_CHECK=0x100F,

- /** Enumerated property Lead_Canonical_Combining_Class.

- ICU-specific property for the ccc of the first code point

- of the decomposition, or lccc(c)=ccc(NFD(c)[0]).

- Useful for checking for canonically ordered text;

- see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD .

- Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. @stable ICU 3.0 */

- UCHAR_LEAD_CANONICAL_COMBINING_CLASS=0x1010,

- /** Enumerated property Trail_Canonical_Combining_Class.

- ICU-specific property for the ccc of the last code point

- of the decomposition, or tccc(c)=ccc(NFD(c)[last]).

- Useful for checking for canonically ordered text;

- see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD .

- Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. @stable ICU 3.0 */

- UCHAR_TRAIL_CANONICAL_COMBINING_CLASS=0x1011,

- /** Enumerated property Grapheme_Cluster_Break (new in Unicode 4.1).

- Used in UAX #29: Text Boundaries

- (http://www.unicode.org/reports/tr29/)

- Returns UGraphemeClusterBreak values. @stable ICU 3.4 */

- UCHAR_GRAPHEME_CLUSTER_BREAK=0x1012,

- /** Enumerated property Sentence_Break (new in Unicode 4.1).

- Used in UAX #29: Text Boundaries

- (http://www.unicode.org/reports/tr29/)

- Returns USentenceBreak values. @stable ICU 3.4 */

- UCHAR_SENTENCE_BREAK=0x1013,

- /** Enumerated property Word_Break (new in Unicode 4.1).

- Used in UAX #29: Text Boundaries

- (http://www.unicode.org/reports/tr29/)

- Returns UWordBreakValues values. @stable ICU 3.4 */

- UCHAR_WORD_BREAK=0x1014,

- /** One more than the last constant for enumerated/integer Unicode properties. @stable ICU 2.2 */

- UCHAR_INT_LIMIT=0x1015,

- /** Bitmask property General_Category_Mask.

- This is the General_Category property returned as a bit mask.

- When used in u_getIntPropertyValue(c), same as U_MASK(u_charType(c)),

- returns bit masks for UCharCategory values where exactly one bit is set.

- When used with u_getPropertyValueName() and u_getPropertyValueEnum(),

- a multi-bit mask is used for sets of categories like "Letters".

- Mask values should be cast to uint32_t.

- @stable ICU 2.4 */

- UCHAR_GENERAL_CATEGORY_MASK=0x2000,

- /** First constant for bit-mask Unicode properties. @stable ICU 2.4 */

- UCHAR_MASK_START=UCHAR_GENERAL_CATEGORY_MASK,

- /** One more than the last constant for bit-mask Unicode properties. @stable ICU 2.4 */

- UCHAR_MASK_LIMIT=0x2001,

- /** Double property Numeric_Value.

- Corresponds to u_getNumericValue. @stable ICU 2.4 */

- UCHAR_NUMERIC_VALUE=0x3000,

- /** First constant for double Unicode properties. @stable ICU 2.4 */

- UCHAR_DOUBLE_START=UCHAR_NUMERIC_VALUE,

- /** One more than the last constant for double Unicode properties. @stable ICU 2.4 */

- UCHAR_DOUBLE_LIMIT=0x3001,

- /** String property Age.

- Corresponds to u_charAge. @stable ICU 2.4 */

- UCHAR_AGE=0x4000,

- /** First constant for string Unicode properties. @stable ICU 2.4 */

- UCHAR_STRING_START=UCHAR_AGE,

- /** String property Bidi_Mirroring_Glyph.

- Corresponds to u_charMirror. @stable ICU 2.4 */

- UCHAR_BIDI_MIRRORING_GLYPH=0x4001,

- /** String property Case_Folding.

- Corresponds to u_strFoldCase in ustring.h. @stable ICU 2.4 */

- UCHAR_CASE_FOLDING=0x4002,

- /** String property ISO_Comment.

- Corresponds to u_getISOComment. @stable ICU 2.4 */

- UCHAR_ISO_COMMENT=0x4003,

- /** String property Lowercase_Mapping.

- Corresponds to u_strToLower in ustring.h. @stable ICU 2.4 */

- UCHAR_LOWERCASE_MAPPING=0x4004,

- /** String property Name.

- Corresponds to u_charName. @stable ICU 2.4 */

- UCHAR_NAME=0x4005,

- /** String property Simple_Case_Folding.

- Corresponds to u_foldCase. @stable ICU 2.4 */

- UCHAR_SIMPLE_CASE_FOLDING=0x4006,

- /** String property Simple_Lowercase_Mapping.

- Corresponds to u_tolower. @stable ICU 2.4 */

- UCHAR_SIMPLE_LOWERCASE_MAPPING=0x4007,

- /** String property Simple_Titlecase_Mapping.

- Corresponds to u_totitle. @stable ICU 2.4 */

- UCHAR_SIMPLE_TITLECASE_MAPPING=0x4008,

- /** String property Simple_Uppercase_Mapping.

- Corresponds to u_toupper. @stable ICU 2.4 */

- UCHAR_SIMPLE_UPPERCASE_MAPPING=0x4009,

- /** String property Titlecase_Mapping.

- Corresponds to u_strToTitle in ustring.h. @stable ICU 2.4 */

- UCHAR_TITLECASE_MAPPING=0x400A,

- /** String property Unicode_1_Name.

- Corresponds to u_charName. @stable ICU 2.4 */

- UCHAR_UNICODE_1_NAME=0x400B,

- /** String property Uppercase_Mapping.

- Corresponds to u_strToUpper in ustring.h. @stable ICU 2.4 */

- UCHAR_UPPERCASE_MAPPING=0x400C,

- /** One more than the last constant for string Unicode properties. @stable ICU 2.4 */

- UCHAR_STRING_LIMIT=0x400D,

- /** Provisional property Script_Extensions (new in Unicode 6.0).

- As a provisional property, it may be modified or removed

- in future versions of the Unicode Standard, and thus in ICU.

- Some characters are commonly used in multiple scripts.

- For more information, see UAX #24: http://www.unicode.org/reports/tr24/.

- Corresponds to uscript_hasScript and uscript_getScriptExtensions in uscript.h.

- @draft ICU 4.6 */

- UCHAR_SCRIPT_EXTENSIONS=0x7000,

- /** First constant for Unicode properties with unusual value types. @draft ICU 4.6 */

- UCHAR_OTHER_PROPERTY_START=UCHAR_SCRIPT_EXTENSIONS,

- /** One more than the last constant for Unicode properties with unusual value types.

- * @draft ICU 4.6 */

- UCHAR_OTHER_PROPERTY_LIMIT=0x7001,

- /** Represents a nonexistent or invalid property or property value. @stable ICU 2.4 */

- UCHAR_INVALID_CODE = -1

-} UProperty;

-/**

- * Data for enumerated Unicode general category types.

- * See http://www.unicode.org/Public/UNIDATA/UnicodeData.html .

- * @stable ICU 2.0

- */

-typedef enum UCharCategory

- /** See note !!. Comments of the form "Cn" are read by genpname. */

- /** Non-category for unassigned and non-character code points. @stable ICU 2.0 */

- U_UNASSIGNED = 0,

- /** Cn "Other, Not Assigned (no characters in [UnicodeData.txt] have this property)" (same as U_UNASSIGNED!) @stable ICU 2.0 */

- U_GENERAL_OTHER_TYPES = 0,

- /** Lu @stable ICU 2.0 */

- U_UPPERCASE_LETTER = 1,

- /** Ll @stable ICU 2.0 */

- U_LOWERCASE_LETTER = 2,

- /** Lt @stable ICU 2.0 */

- U_TITLECASE_LETTER = 3,

- /** Lm @stable ICU 2.0 */

- U_MODIFIER_LETTER = 4,

- /** Lo @stable ICU 2.0 */

- U_OTHER_LETTER = 5,

- /** Mn @stable ICU 2.0 */

- U_NON_SPACING_MARK = 6,

- /** Me @stable ICU 2.0 */

- U_ENCLOSING_MARK = 7,

- /** Mc @stable ICU 2.0 */

- U_COMBINING_SPACING_MARK = 8,

- /** Nd @stable ICU 2.0 */

- U_DECIMAL_DIGIT_NUMBER = 9,

- /** Nl @stable ICU 2.0 */

- U_LETTER_NUMBER = 10,

- /** No @stable ICU 2.0 */

- U_OTHER_NUMBER = 11,

- /** Zs @stable ICU 2.0 */

- U_SPACE_SEPARATOR = 12,

- /** Zl @stable ICU 2.0 */

- U_LINE_SEPARATOR = 13,

- /** Zp @stable ICU 2.0 */

- U_PARAGRAPH_SEPARATOR = 14,

- /** Cc @stable ICU 2.0 */

- U_CONTROL_CHAR = 15,

- /** Cf @stable ICU 2.0 */

- U_FORMAT_CHAR = 16,

- /** Co @stable ICU 2.0 */

- U_PRIVATE_USE_CHAR = 17,

- /** Cs @stable ICU 2.0 */

- U_SURROGATE = 18,

- /** Pd @stable ICU 2.0 */

- U_DASH_PUNCTUATION = 19,

- /** Ps @stable ICU 2.0 */

- U_START_PUNCTUATION = 20,

- /** Pe @stable ICU 2.0 */

- U_END_PUNCTUATION = 21,

- /** Pc @stable ICU 2.0 */

- U_CONNECTOR_PUNCTUATION = 22,

- /** Po @stable ICU 2.0 */

- U_OTHER_PUNCTUATION = 23,

- /** Sm @stable ICU 2.0 */

- U_MATH_SYMBOL = 24,

- /** Sc @stable ICU 2.0 */

- U_CURRENCY_SYMBOL = 25,

- /** Sk @stable ICU 2.0 */

- U_MODIFIER_SYMBOL = 26,

- /** So @stable ICU 2.0 */

- U_OTHER_SYMBOL = 27,

- /** Pi @stable ICU 2.0 */

- U_INITIAL_PUNCTUATION = 28,

- /** Pf @stable ICU 2.0 */

- U_FINAL_PUNCTUATION = 29,

- /** One higher than the last enum UCharCategory constant. @stable ICU 2.0 */

- U_CHAR_CATEGORY_COUNT

-} UCharCategory;

-/**

- * U_GC_XX_MASK constants are bit flags corresponding to Unicode

- * general category values.

- * For each category, the nth bit is set if the numeric value of the

- * corresponding UCharCategory constant is n.

- *

- * There are also some U_GC_Y_MASK constants for groups of general categories

- * like L for all letter categories.

- *

- * @see u_charType

- * @see U_GET_GC_MASK

- * @see UCharCategory

- * @stable ICU 2.1

- */

-#define U_GC_CN_MASK U_MASK(U_GENERAL_OTHER_TYPES)