OLD | NEW |
(Empty) | |
| 1 /* |
| 2 * Copyright (C) 2009 Red Hat, Inc. |
| 3 * Copyright (C) 2009 Keith Stribley |
| 4 * |
| 5 * This is part of HarfBuzz, a text shaping library. |
| 6 * |
| 7 * Permission is hereby granted, without written agreement and without |
| 8 * license or royalty fees, to use, copy, modify, and distribute this |
| 9 * software and its documentation for any purpose, provided that the |
| 10 * above copyright notice and the following two paragraphs appear in |
| 11 * all copies of this software. |
| 12 * |
| 13 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR |
| 14 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES |
| 15 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN |
| 16 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH |
| 17 * DAMAGE. |
| 18 * |
| 19 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, |
| 20 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
| 21 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS |
| 22 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO |
| 23 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. |
| 24 * |
| 25 * Red Hat Author(s): Behdad Esfahbod |
| 26 */ |
| 27 |
| 28 #include "hb-private.h" |
| 29 |
| 30 #include "hb-icu.h" |
| 31 |
| 32 #include "hb-unicode-private.h" |
| 33 |
| 34 #include <unicode/uversion.h> |
| 35 #include <unicode/uchar.h> |
| 36 #include <unicode/uscript.h> |
| 37 |
| 38 HB_BEGIN_DECLS |
| 39 |
| 40 |
| 41 static hb_codepoint_t hb_icu_get_mirroring (hb_codepoint_t unicode) { return u_c
harMirror(unicode); } |
| 42 static unsigned int hb_icu_get_combining_class (hb_codepoint_t unicode) { return
u_getCombiningClass (unicode); } |
| 43 |
| 44 static unsigned int |
| 45 hb_icu_get_eastasian_width (hb_codepoint_t unicode) |
| 46 { |
| 47 switch (u_getIntPropertyValue(unicode, UCHAR_EAST_ASIAN_WIDTH)) |
| 48 { |
| 49 case U_EA_WIDE: |
| 50 case U_EA_FULLWIDTH: |
| 51 return 2; |
| 52 case U_EA_NEUTRAL: |
| 53 case U_EA_AMBIGUOUS: |
| 54 case U_EA_HALFWIDTH: |
| 55 case U_EA_NARROW: |
| 56 return 1; |
| 57 } |
| 58 return 1; |
| 59 } |
| 60 |
| 61 static hb_category_t |
| 62 hb_icu_get_general_category (hb_codepoint_t unicode) |
| 63 { |
| 64 switch (u_getIntPropertyValue(unicode, UCHAR_GENERAL_CATEGORY)) |
| 65 { |
| 66 case U_UNASSIGNED: return HB_CATEGORY_UNASSIGNED; |
| 67 |
| 68 case U_UPPERCASE_LETTER: return HB_CATEGORY_UPPERCASE_LETTER;
/* Lu */ |
| 69 case U_LOWERCASE_LETTER: return HB_CATEGORY_LOWERCASE_LETTER;
/* Ll */ |
| 70 case U_TITLECASE_LETTER: return HB_CATEGORY_TITLECASE_LETTER;
/* Lt */ |
| 71 case U_MODIFIER_LETTER: return HB_CATEGORY_MODIFIER_LETTER;
/* Lm */ |
| 72 case U_OTHER_LETTER: return HB_CATEGORY_OTHER_LETTER;
/* Lo */ |
| 73 |
| 74 case U_NON_SPACING_MARK: return HB_CATEGORY_NON_SPACING_MARK;
/* Mn */ |
| 75 case U_ENCLOSING_MARK: return HB_CATEGORY_ENCLOSING_MARK;
/* Me */ |
| 76 case U_COMBINING_SPACING_MARK: return HB_CATEGORY_COMBINING_MARK;
/* Mc */ |
| 77 |
| 78 case U_DECIMAL_DIGIT_NUMBER: return HB_CATEGORY_DECIMAL_NUMBER;
/* Nd */ |
| 79 case U_LETTER_NUMBER: return HB_CATEGORY_LETTER_NUMBER;
/* Nl */ |
| 80 case U_OTHER_NUMBER: return HB_CATEGORY_OTHER_NUMBER;
/* No */ |
| 81 |
| 82 case U_SPACE_SEPARATOR: return HB_CATEGORY_SPACE_SEPARATOR;
/* Zs */ |
| 83 case U_LINE_SEPARATOR: return HB_CATEGORY_LINE_SEPARATOR;
/* Zl */ |
| 84 case U_PARAGRAPH_SEPARATOR: return HB_CATEGORY_PARAGRAPH_SEPARATOR;
/* Zp */ |
| 85 |
| 86 case U_CONTROL_CHAR: return HB_CATEGORY_CONTROL;
/* Cc */ |
| 87 case U_FORMAT_CHAR: return HB_CATEGORY_FORMAT;
/* Cf */ |
| 88 case U_PRIVATE_USE_CHAR: return HB_CATEGORY_PRIVATE_USE;
/* Co */ |
| 89 case U_SURROGATE: return HB_CATEGORY_SURROGATE;
/* Cs */ |
| 90 |
| 91 |
| 92 case U_DASH_PUNCTUATION: return HB_CATEGORY_DASH_PUNCTUATION;
/* Pd */ |
| 93 case U_START_PUNCTUATION: return HB_CATEGORY_OPEN_PUNCTUATION;
/* Ps */ |
| 94 case U_END_PUNCTUATION: return HB_CATEGORY_CLOSE_PUNCTUATION;
/* Pe */ |
| 95 case U_CONNECTOR_PUNCTUATION: return HB_CATEGORY_CONNECT_PUNCTUATION;
/* Pc */ |
| 96 case U_OTHER_PUNCTUATION: return HB_CATEGORY_OTHER_PUNCTUATION;
/* Po */ |
| 97 |
| 98 case U_MATH_SYMBOL: return HB_CATEGORY_MATH_SYMBOL;
/* Sm */ |
| 99 case U_CURRENCY_SYMBOL: return HB_CATEGORY_CURRENCY_SYMBOL;
/* Sc */ |
| 100 case U_MODIFIER_SYMBOL: return HB_CATEGORY_MODIFIER_SYMBOL;
/* Sk */ |
| 101 case U_OTHER_SYMBOL: return HB_CATEGORY_OTHER_SYMBOL;
/* So */ |
| 102 |
| 103 case U_INITIAL_PUNCTUATION: return HB_CATEGORY_INITIAL_PUNCTUATION;
/* Pi */ |
| 104 case U_FINAL_PUNCTUATION: return HB_CATEGORY_FINAL_PUNCTUATION;
/* Pf */ |
| 105 } |
| 106 |
| 107 return HB_CATEGORY_UNASSIGNED; |
| 108 } |
| 109 |
| 110 static hb_script_t |
| 111 hb_icu_get_script (hb_codepoint_t unicode) |
| 112 { |
| 113 UErrorCode status = U_ZERO_ERROR; |
| 114 UScriptCode scriptCode = uscript_getScript(unicode, &status); |
| 115 switch ((int) scriptCode) |
| 116 { |
| 117 #define CHECK_ICU_VERSION(major, minor) \ |
| 118 U_ICU_VERSION_MAJOR_NUM > (major) || (U_ICU_VERSION_MAJOR_NUM == (major)
&& U_ICU_VERSION_MINOR_NUM >= (minor)) |
| 119 #define MATCH_SCRIPT(C) case USCRIPT_##C: return HB_SCRIPT_##C |
| 120 #define MATCH_SCRIPT2(C1, C2) case USCRIPT_##C1: return HB_SCRIPT_##C2 |
| 121 MATCH_SCRIPT (INVALID_CODE); |
| 122 MATCH_SCRIPT (COMMON); /* Zyyy */ |
| 123 MATCH_SCRIPT (INHERITED); /* Qaai */ |
| 124 MATCH_SCRIPT (ARABIC); /* Arab */ |
| 125 MATCH_SCRIPT (ARMENIAN); /* Armn */ |
| 126 MATCH_SCRIPT (BENGALI); /* Beng */ |
| 127 MATCH_SCRIPT (BOPOMOFO); /* Bopo */ |
| 128 MATCH_SCRIPT (CHEROKEE); /* Cher */ |
| 129 MATCH_SCRIPT (COPTIC); /* Qaac */ |
| 130 MATCH_SCRIPT (CYRILLIC); /* Cyrl (Cyrs) */ |
| 131 MATCH_SCRIPT (DESERET); /* Dsrt */ |
| 132 MATCH_SCRIPT (DEVANAGARI); /* Deva */ |
| 133 MATCH_SCRIPT (ETHIOPIC); /* Ethi */ |
| 134 MATCH_SCRIPT (GEORGIAN); /* Geor (Geon); Geoa) */ |
| 135 MATCH_SCRIPT (GOTHIC); /* Goth */ |
| 136 MATCH_SCRIPT (GREEK); /* Grek */ |
| 137 MATCH_SCRIPT (GUJARATI); /* Gujr */ |
| 138 MATCH_SCRIPT (GURMUKHI); /* Guru */ |
| 139 MATCH_SCRIPT (HAN); /* Hani */ |
| 140 MATCH_SCRIPT (HANGUL); /* Hang */ |
| 141 MATCH_SCRIPT (HEBREW); /* Hebr */ |
| 142 MATCH_SCRIPT (HIRAGANA); /* Hira */ |
| 143 MATCH_SCRIPT (KANNADA); /* Knda */ |
| 144 MATCH_SCRIPT (KATAKANA); /* Kana */ |
| 145 MATCH_SCRIPT (KHMER); /* Khmr */ |
| 146 MATCH_SCRIPT (LAO); /* Laoo */ |
| 147 MATCH_SCRIPT (LATIN); /* Latn (Latf); Latg) */ |
| 148 MATCH_SCRIPT (MALAYALAM); /* Mlym */ |
| 149 MATCH_SCRIPT (MONGOLIAN); /* Mong */ |
| 150 MATCH_SCRIPT (MYANMAR); /* Mymr */ |
| 151 MATCH_SCRIPT (OGHAM); /* Ogam */ |
| 152 MATCH_SCRIPT (OLD_ITALIC); /* Ital */ |
| 153 MATCH_SCRIPT (ORIYA); /* Orya */ |
| 154 MATCH_SCRIPT (RUNIC); /* Runr */ |
| 155 MATCH_SCRIPT (SINHALA); /* Sinh */ |
| 156 MATCH_SCRIPT (SYRIAC); /* Syrc (Syrj, Syrn); Syre) */ |
| 157 MATCH_SCRIPT (TAMIL); /* Taml */ |
| 158 MATCH_SCRIPT (TELUGU); /* Telu */ |
| 159 MATCH_SCRIPT (THAANA); /* Thaa */ |
| 160 MATCH_SCRIPT (THAI); /* Thai */ |
| 161 MATCH_SCRIPT (TIBETAN); /* Tibt */ |
| 162 MATCH_SCRIPT (CANADIAN_ABORIGINAL);/* Cans */ |
| 163 MATCH_SCRIPT (YI); /* Yiii */ |
| 164 MATCH_SCRIPT (TAGALOG); /* Tglg */ |
| 165 MATCH_SCRIPT (HANUNOO); /* Hano */ |
| 166 MATCH_SCRIPT (BUHID); /* Buhd */ |
| 167 MATCH_SCRIPT (TAGBANWA); /* Tagb */ |
| 168 |
| 169 /* Unicode-4.0 additions */ |
| 170 MATCH_SCRIPT (BRAILLE); /* Brai */ |
| 171 MATCH_SCRIPT (CYPRIOT); /* Cprt */ |
| 172 MATCH_SCRIPT (LIMBU); /* Limb */ |
| 173 MATCH_SCRIPT (OSMANYA); /* Osma */ |
| 174 MATCH_SCRIPT (SHAVIAN); /* Shaw */ |
| 175 MATCH_SCRIPT (LINEAR_B); /* Linb */ |
| 176 MATCH_SCRIPT (TAI_LE); /* Tale */ |
| 177 MATCH_SCRIPT (UGARITIC); /* Ugar */ |
| 178 |
| 179 /* Unicode-4.1 additions */ |
| 180 MATCH_SCRIPT (NEW_TAI_LUE); /* Talu */ |
| 181 MATCH_SCRIPT (BUGINESE); /* Bugi */ |
| 182 MATCH_SCRIPT (GLAGOLITIC); /* Glag */ |
| 183 MATCH_SCRIPT (TIFINAGH); /* Tfng */ |
| 184 MATCH_SCRIPT (SYLOTI_NAGRI); /* Sylo */ |
| 185 MATCH_SCRIPT (OLD_PERSIAN); /* Xpeo */ |
| 186 MATCH_SCRIPT (KHAROSHTHI); /* Khar */ |
| 187 |
| 188 /* Unicode-5.0 additions */ |
| 189 MATCH_SCRIPT (UNKNOWN); /* Zzzz */ |
| 190 MATCH_SCRIPT (BALINESE); /* Bali */ |
| 191 MATCH_SCRIPT (CUNEIFORM); /* Xsux */ |
| 192 MATCH_SCRIPT (PHOENICIAN); /* Phnx */ |
| 193 MATCH_SCRIPT (PHAGS_PA); /* Phag */ |
| 194 MATCH_SCRIPT (NKO); /* Nkoo */ |
| 195 |
| 196 /* Unicode-5.1 additions */ |
| 197 MATCH_SCRIPT (KAYAH_LI); /* Kali */ |
| 198 MATCH_SCRIPT (LEPCHA); /* Lepc */ |
| 199 MATCH_SCRIPT (REJANG); /* Rjng */ |
| 200 MATCH_SCRIPT (SUNDANESE); /* Sund */ |
| 201 MATCH_SCRIPT (SAURASHTRA); /* Saur */ |
| 202 MATCH_SCRIPT (CHAM); /* Cham */ |
| 203 MATCH_SCRIPT (OL_CHIKI); /* Olck */ |
| 204 MATCH_SCRIPT (VAI); /* Vaii */ |
| 205 MATCH_SCRIPT (CARIAN); /* Cari */ |
| 206 MATCH_SCRIPT (LYCIAN); /* Lyci */ |
| 207 MATCH_SCRIPT (LYDIAN); /* Lydi */ |
| 208 |
| 209 /* Unicode-5.2 additions */ |
| 210 MATCH_SCRIPT (AVESTAN); /* Avst */ |
| 211 #if CHECK_ICU_VERSION (4, 4) |
| 212 MATCH_SCRIPT (BAMUM); /* Bamu */ |
| 213 #endif |
| 214 MATCH_SCRIPT (EGYPTIAN_HIEROGLYPHS); /* Egyp */ |
| 215 MATCH_SCRIPT (IMPERIAL_ARAMAIC); /* Armi */ |
| 216 MATCH_SCRIPT (INSCRIPTIONAL_PAHLAVI); /* Phli */ |
| 217 MATCH_SCRIPT (INSCRIPTIONAL_PARTHIAN); /* Prti */ |
| 218 MATCH_SCRIPT (JAVANESE); /* Java */ |
| 219 MATCH_SCRIPT (KAITHI); /* Kthi */ |
| 220 MATCH_SCRIPT2(LANNA, TAI_THAM); /* Lana */ |
| 221 #if CHECK_ICU_VERSION (4, 4) |
| 222 MATCH_SCRIPT (LISU); /* Lisu */ |
| 223 #endif |
| 224 MATCH_SCRIPT2(MEITEI_MAYEK, MEETEI_MAYEK);/* Mtei */ |
| 225 #if CHECK_ICU_VERSION (4, 4) |
| 226 MATCH_SCRIPT (OLD_SOUTH_ARABIAN); /* Sarb */ |
| 227 #endif |
| 228 MATCH_SCRIPT2(ORKHON, OLD_TURKIC); /* Orkh */ |
| 229 MATCH_SCRIPT (SAMARITAN); /* Samr */ |
| 230 MATCH_SCRIPT (TAI_VIET); /* Tavt */ |
| 231 |
| 232 /* Unicode-6.0 additions */ |
| 233 MATCH_SCRIPT (BATAK); /* Batk */ |
| 234 MATCH_SCRIPT (BRAHMI); /* Brah */ |
| 235 MATCH_SCRIPT2(MANDAEAN, MANDAIC); /* Mand */ |
| 236 |
| 237 } |
| 238 return HB_SCRIPT_UNKNOWN; |
| 239 } |
| 240 |
| 241 static hb_unicode_funcs_t icu_ufuncs = { |
| 242 HB_REFERENCE_COUNT_INVALID, /* ref_count */ |
| 243 TRUE, /* immutable */ |
| 244 { |
| 245 hb_icu_get_general_category, |
| 246 hb_icu_get_combining_class, |
| 247 hb_icu_get_mirroring, |
| 248 hb_icu_get_script, |
| 249 hb_icu_get_eastasian_width |
| 250 } |
| 251 }; |
| 252 |
| 253 hb_unicode_funcs_t * |
| 254 hb_icu_get_unicode_funcs (void) |
| 255 { |
| 256 return &icu_ufuncs; |
| 257 } |
| 258 |
| 259 |
| 260 HB_END_DECLS |
OLD | NEW |