| OLD | NEW |
| (Empty) |
| 1 /* | |
| 2 * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies) | |
| 3 * | |
| 4 * This is part of HarfBuzz, an OpenType Layout engine library. | |
| 5 * | |
| 6 * Permission is hereby granted, without written agreement and without | |
| 7 * license or royalty fees, to use, copy, modify, and distribute this | |
| 8 * software and its documentation for any purpose, provided that the | |
| 9 * above copyright notice and the following two paragraphs appear in | |
| 10 * all copies of this software. | |
| 11 * | |
| 12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR | |
| 13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES | |
| 14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN | |
| 15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH | |
| 16 * DAMAGE. | |
| 17 * | |
| 18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, | |
| 19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND | |
| 20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS | |
| 21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO | |
| 22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. | |
| 23 */ | |
| 24 | |
| 25 #include "harfbuzz-shaper.h" | |
| 26 #include "harfbuzz-shaper-private.h" | |
| 27 | |
| 28 #include <assert.h> | |
| 29 #include <stdio.h> | |
| 30 | |
| 31 /* | |
| 32 // Vocabulary | |
| 33 // Base -> A consonant or an independent vowel in its full (not sub
script) form. It is the | |
| 34 // center of the syllable, it can be surrounded by coeng (s
ubscript) consonants, vowels, | |
| 35 // split vowels, signs... but there is only one base in a s
yllable, it has to be coded as | |
| 36 // the first character of the syllable. | |
| 37 // split vowel --> vowel that has two parts placed separately (e.g. Before
and after the consonant). | |
| 38 // Khmer language has five of them. Khmer split vowels eith
er have one part before the | |
| 39 // base and one after the base or they have a part before t
he base and a part above the base. | |
| 40 // The first part of all Khmer split vowels is the same cha
racter, identical to | |
| 41 // the glyph of Khmer dependent vowel SRA EI | |
| 42 // coeng --> modifier used in Khmer to construct coeng (subscript) consona
nts | |
| 43 // Differently than indian languages, the coeng modifies the con
sonant that follows it, | |
| 44 // not the one preceding it Each consonant has two forms, the b
ase form and the subscript form | |
| 45 // the base form is the normal one (using the consonants code-po
int), the subscript form is | |
| 46 // displayed when the combination coeng + consonant is encounter
ed. | |
| 47 // Consonant of type 1 -> A consonant which has subscript for that only occ
upies space under a base consonant | |
| 48 // Consonant of type 2.-> Its subscript form occupies space under and befor
e the base (only one, RO) | |
| 49 // Consonant of Type 3 -> Its subscript form occupies space under and after
the base (KHO, CHHO, THHO, BA, YO, SA) | |
| 50 // Consonant shifter -> Khmer has to series of consonants. The same depende
nt vowel has different sounds | |
| 51 // if it is attached to a consonant of the first serie
s or a consonant of the second series | |
| 52 // Most consonants have an equivalent in the other ser
ies, but some of theme exist only in | |
| 53 // one series (for example SA). If we want to use the
consonant SA with a vowel sound that | |
| 54 // can only be done with a vowel sound that correspond
s to a vowel accompanying a consonant | |
| 55 // of the other series, then we need to use a consonan
t shifter: TRIISAP or MUSIKATOAN | |
| 56 // x17C9 y x17CA. TRIISAP changes a first series conso
nant to second series sound and | |
| 57 // MUSIKATOAN a second series consonant to have a firs
t series vowel sound. | |
| 58 // Consonant shifter are both normally supercript mark
s, but, when they are followed by a | |
| 59 // superscript, they change shape and take the form of
subscript dependent vowel SRA U. | |
| 60 // If they are in the same syllable as a coeng consona
nt, Unicode 3.0 says that they | |
| 61 // should be typed before the coeng. Unicode 4.0 break
s the standard and says that it should | |
| 62 // be placed after the coeng consonant. | |
| 63 // Dependent vowel -> In khmer dependent vowels can be placed above, belo
w, before or after the base | |
| 64 // Each vowel has its own position. Only one vowel per
syllable is allowed. | |
| 65 // Signs -> Khmer has above signs and post signs. Only one abov
e sign and/or one post sign are | |
| 66 // Allowed in a syllable. | |
| 67 // | |
| 68 // | |
| 69 // order is important here! This order must be the same that is found in each
horizontal | |
| 70 // line in the statetable for Khmer (see khmerStateTable) . | |
| 71 */ | |
| 72 enum KhmerCharClassValues { | |
| 73 CC_RESERVED = 0, | |
| 74 CC_CONSONANT = 1, /* Consonant of type 1 or independent vowel */ | |
| 75 CC_CONSONANT2 = 2, /* Consonant of type 2 */ | |
| 76 CC_CONSONANT3 = 3, /* Consonant of type 3 */ | |
| 77 CC_ZERO_WIDTH_NJ_MARK = 4, /* Zero Width non joiner character (0x200C) */ | |
| 78 CC_CONSONANT_SHIFTER = 5, | |
| 79 CC_ROBAT = 6, /* Khmer special diacritic accent -treated dif
ferently in state table */ | |
| 80 CC_COENG = 7, /* Subscript consonant combining character */ | |
| 81 CC_DEPENDENT_VOWEL = 8, | |
| 82 CC_SIGN_ABOVE = 9, | |
| 83 CC_SIGN_AFTER = 10, | |
| 84 CC_ZERO_WIDTH_J_MARK = 11, /* Zero width joiner character */ | |
| 85 CC_COUNT = 12 /* This is the number of character classes */ | |
| 86 }; | |
| 87 | |
| 88 | |
| 89 enum KhmerCharClassFlags { | |
| 90 CF_CLASS_MASK = 0x0000FFFF, | |
| 91 | |
| 92 CF_CONSONANT = 0x01000000, /* flag to speed up comparing */ | |
| 93 CF_SPLIT_VOWEL = 0x02000000, /* flag for a split vowel -> the first part
is added in front of the syllable */ | |
| 94 CF_DOTTED_CIRCLE = 0x04000000, /* add a dotted circle if a character with t
his flag is the first in a syllable */ | |
| 95 CF_COENG = 0x08000000, /* flag to speed up comparing */ | |
| 96 CF_SHIFTER = 0x10000000, /* flag to speed up comparing */ | |
| 97 CF_ABOVE_VOWEL = 0x20000000, /* flag to speed up comparing */ | |
| 98 | |
| 99 /* position flags */ | |
| 100 CF_POS_BEFORE = 0x00080000, | |
| 101 CF_POS_BELOW = 0x00040000, | |
| 102 CF_POS_ABOVE = 0x00020000, | |
| 103 CF_POS_AFTER = 0x00010000, | |
| 104 CF_POS_MASK = 0x000f0000 | |
| 105 }; | |
| 106 | |
| 107 | |
| 108 /* Characters that get referred to by name */ | |
| 109 enum KhmerChar { | |
| 110 C_SIGN_ZWNJ = 0x200C, | |
| 111 C_SIGN_ZWJ = 0x200D, | |
| 112 C_RO = 0x179A, | |
| 113 C_VOWEL_AA = 0x17B6, | |
| 114 C_SIGN_NIKAHIT = 0x17C6, | |
| 115 C_VOWEL_E = 0x17C1, | |
| 116 C_COENG = 0x17D2 | |
| 117 }; | |
| 118 | |
| 119 | |
| 120 /* | |
| 121 // simple classes, they are used in the statetable (in this file) to control th
e length of a syllable | |
| 122 // they are also used to know where a character should be placed (location in r
eference to the base character) | |
| 123 // and also to know if a character, when independently displayed, should be dis
played with a dotted-circle to | |
| 124 // indicate error in syllable construction | |
| 125 */ | |
| 126 enum { | |
| 127 _xx = CC_RESERVED, | |
| 128 _sa = CC_SIGN_ABOVE | CF_DOTTED_CIRCLE | CF_POS_ABOVE, | |
| 129 _sp = CC_SIGN_AFTER | CF_DOTTED_CIRCLE| CF_POS_AFTER, | |
| 130 _c1 = CC_CONSONANT | CF_CONSONANT, | |
| 131 _c2 = CC_CONSONANT2 | CF_CONSONANT, | |
| 132 _c3 = CC_CONSONANT3 | CF_CONSONANT, | |
| 133 _rb = CC_ROBAT | CF_POS_ABOVE | CF_DOTTED_CIRCLE, | |
| 134 _cs = CC_CONSONANT_SHIFTER | CF_DOTTED_CIRCLE | CF_SHIFTER, | |
| 135 _dl = CC_DEPENDENT_VOWEL | CF_POS_BEFORE | CF_DOTTED_CIRCLE, | |
| 136 _db = CC_DEPENDENT_VOWEL | CF_POS_BELOW | CF_DOTTED_CIRCLE, | |
| 137 _da = CC_DEPENDENT_VOWEL | CF_POS_ABOVE | CF_DOTTED_CIRCLE | CF_ABOVE_VOWEL, | |
| 138 _dr = CC_DEPENDENT_VOWEL | CF_POS_AFTER | CF_DOTTED_CIRCLE, | |
| 139 _co = CC_COENG | CF_COENG | CF_DOTTED_CIRCLE, | |
| 140 | |
| 141 /* split vowel */ | |
| 142 _va = _da | CF_SPLIT_VOWEL, | |
| 143 _vr = _dr | CF_SPLIT_VOWEL | |
| 144 }; | |
| 145 | |
| 146 | |
| 147 /* | |
| 148 // Character class: a character class value | |
| 149 // ORed with character class flags. | |
| 150 */ | |
| 151 typedef unsigned long KhmerCharClass; | |
| 152 | |
| 153 | |
| 154 /* | |
| 155 // Character class tables | |
| 156 // _xx character does not combine into syllable, such as numbers, puntuation ma
rks, non-Khmer signs... | |
| 157 // _sa Sign placed above the base | |
| 158 // _sp Sign placed after the base | |
| 159 // _c1 Consonant of type 1 or independent vowel (independent vowels behave as t
ype 1 consonants) | |
| 160 // _c2 Consonant of type 2 (only RO) | |
| 161 // _c3 Consonant of type 3 | |
| 162 // _rb Khmer sign robat u17CC. combining mark for subscript consonants | |
| 163 // _cd Consonant-shifter | |
| 164 // _dl Dependent vowel placed before the base (left of the base) | |
| 165 // _db Dependent vowel placed below the base | |
| 166 // _da Dependent vowel placed above the base | |
| 167 // _dr Dependent vowel placed behind the base (right of the base) | |
| 168 // _co Khmer combining mark COENG u17D2, combines with the consonant or indepen
dent vowel following | |
| 169 // it to create a subscript consonant or independent vowel | |
| 170 // _va Khmer split vowel in which the first part is before the base and the sec
ond one above the base | |
| 171 // _vr Khmer split vowel in which the first part is before the base and the sec
ond one behind (right of) the base | |
| 172 */ | |
| 173 static const KhmerCharClass khmerCharClasses[] = { | |
| 174 _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c1, _
c1, /* 1780 - 178F */ | |
| 175 _c1, _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c2, _c1, _c1, _c1, _c3, _
c3, /* 1790 - 179F */ | |
| 176 _c1, _c3, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _
c1, /* 17A0 - 17AF */ | |
| 177 _c1, _c1, _c1, _c1, _dr, _dr, _dr, _da, _da, _da, _da, _db, _db, _db, _va, _
vr, /* 17B0 - 17BF */ | |
| 178 _vr, _dl, _dl, _dl, _vr, _vr, _sa, _sp, _sp, _cs, _cs, _sa, _rb, _sa, _sa, _
sa, /* 17C0 - 17CF */ | |
| 179 _sa, _sa, _co, _sa, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _sa, _xx, _
xx /* 17D0 - 17DF */ | |
| 180 }; | |
| 181 | |
| 182 /* this enum must reflect the range of khmerCharClasses */ | |
| 183 enum KhmerCharClassesRange { | |
| 184 KhmerFirstChar = 0x1780, | |
| 185 KhmerLastChar = 0x17df | |
| 186 }; | |
| 187 | |
| 188 /* | |
| 189 // Below we define how a character in the input string is either in the khmerCh
arClasses table | |
| 190 // (in which case we get its type back), a ZWJ or ZWNJ (two characters that may
appear | |
| 191 // within the syllable, but are not in the table) we also get their type back,
or an unknown object | |
| 192 // in which case we get _xx (CC_RESERVED) back | |
| 193 */ | |
| 194 static KhmerCharClass getKhmerCharClass(HB_UChar16 uc) | |
| 195 { | |
| 196 if (uc == C_SIGN_ZWJ) { | |
| 197 return CC_ZERO_WIDTH_J_MARK; | |
| 198 } | |
| 199 | |
| 200 if (uc == C_SIGN_ZWNJ) { | |
| 201 return CC_ZERO_WIDTH_NJ_MARK; | |
| 202 } | |
| 203 | |
| 204 if (uc < KhmerFirstChar || uc > KhmerLastChar) { | |
| 205 return CC_RESERVED; | |
| 206 } | |
| 207 | |
| 208 return khmerCharClasses[uc - KhmerFirstChar]; | |
| 209 } | |
| 210 | |
| 211 | |
| 212 /* | |
| 213 // The stateTable is used to calculate the end (the length) of a well | |
| 214 // formed Khmer Syllable. | |
| 215 // | |
| 216 // Each horizontal line is ordered exactly the same way as the values in KhmerC
lassTable | |
| 217 // CharClassValues. This coincidence of values allows the follow up of the tabl
e. | |
| 218 // | |
| 219 // Each line corresponds to a state, which does not necessarily need to be a ty
pe | |
| 220 // of component... for example, state 2 is a base, with is always a first chara
cter | |
| 221 // in the syllable, but the state could be produced a consonant of any type whe
n | |
| 222 // it is the first character that is analysed (in ground state). | |
| 223 // | |
| 224 // Differentiating 3 types of consonants is necessary in order to | |
| 225 // forbid the use of certain combinations, such as having a second | |
| 226 // coeng after a coeng RO, | |
| 227 // The inexistent possibility of having a type 3 after another type 3 is permit
ted, | |
| 228 // eliminating it would very much complicate the table, and it does not create
typing | |
| 229 // problems, as the case above. | |
| 230 // | |
| 231 // The table is quite complex, in order to limit the number of coeng consonants | |
| 232 // to 2 (by means of the table). | |
| 233 // | |
| 234 // There a peculiarity, as far as Unicode is concerned: | |
| 235 // - The consonant-shifter is considered in two possible different | |
| 236 // locations, the one considered in Unicode 3.0 and the one considered in | |
| 237 // Unicode 4.0. (there is a backwards compatibility problem in this standard)
. | |
| 238 // | |
| 239 // | |
| 240 // xx independent character, such as a number, punctuation sign or non-khmer
char | |
| 241 // | |
| 242 // c1 Khmer consonant of type 1 or an independent vowel | |
| 243 // that is, a letter in which the subscript for is only under the | |
| 244 // base, not taking any space to the right or to the left | |
| 245 // | |
| 246 // c2 Khmer consonant of type 2, the coeng form takes space under | |
| 247 // and to the left of the base (only RO is of this type) | |
| 248 // | |
| 249 // c3 Khmer consonant of type 3. Its subscript form takes space under | |
| 250 // and to the right of the base. | |
| 251 // | |
| 252 // cs Khmer consonant shifter | |
| 253 // | |
| 254 // rb Khmer robat | |
| 255 // | |
| 256 // co coeng character (u17D2) | |
| 257 // | |
| 258 // dv dependent vowel (including split vowels, they are treated in the same
way). | |
| 259 // even if dv is not defined above, the component that is really tested f
or is | |
| 260 // KhmerClassTable::CC_DEPENDENT_VOWEL, which is common to all dependent
vowels | |
| 261 // | |
| 262 // zwj Zero Width joiner | |
| 263 // | |
| 264 // zwnj Zero width non joiner | |
| 265 // | |
| 266 // sa above sign | |
| 267 // | |
| 268 // sp post sign | |
| 269 // | |
| 270 // there are lines with equal content but for an easier understanding | |
| 271 // (and maybe change in the future) we did not join them | |
| 272 */ | |
| 273 static const signed char khmerStateTable[][CC_COUNT] = | |
| 274 { | |
| 275 /* xx c1 c2 c3 zwnj cs rb co dv sa sp zwj */ | |
| 276 { 1, 2, 2, 2, 1, 1, 1, 6, 1, 1, 1, 2}, /* 0 - ground state */ | |
| 277 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, /* 1 - exit state (or sig
n to the right of the syllable) */ | |
| 278 {-1, -1, -1, -1, 3, 4, 5, 6, 16, 17, 1, -1}, /* 2 - Base consonant */ | |
| 279 {-1, -1, -1, -1, -1, 4, -1, -1, 16, -1, -1, -1}, /* 3 - First ZWNJ before
a register shifter It can only be followed by a shifter or a vowel */ | |
| 280 {-1, -1, -1, -1, 15, -1, -1, 6, 16, 17, 1, 14}, /* 4 - First register shi
fter */ | |
| 281 {-1, -1, -1, -1, -1, -1, -1, -1, 20, -1, 1, -1}, /* 5 - Robat */ | |
| 282 {-1, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, -1}, /* 6 - First Coeng */ | |
| 283 {-1, -1, -1, -1, 12, 13, -1, 10, 16, 17, 1, 14}, /* 7 - First consonant of
type 1 after coeng */ | |
| 284 {-1, -1, -1, -1, 12, 13, -1, -1, 16, 17, 1, 14}, /* 8 - First consonant of
type 2 after coeng */ | |
| 285 {-1, -1, -1, -1, 12, 13, -1, 10, 16, 17, 1, 14}, /* 9 - First consonant or
type 3 after ceong */ | |
| 286 {-1, 11, 11, 11, -1, -1, -1, -1, -1, -1, -1, -1}, /* 10 - Second Coeng (no r
egister shifter before) */ | |
| 287 {-1, -1, -1, -1, 15, -1, -1, -1, 16, 17, 1, 14}, /* 11 - Second coeng conso
nant (or ind. vowel) no register shifter before */ | |
| 288 {-1, -1, -1, -1, -1, 13, -1, -1, 16, -1, -1, -1}, /* 12 - Second ZWNJ before
a register shifter */ | |
| 289 {-1, -1, -1, -1, 15, -1, -1, -1, 16, 17, 1, 14}, /* 13 - Second register sh
ifter */ | |
| 290 {-1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1}, /* 14 - ZWJ before vowel *
/ | |
| 291 {-1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1}, /* 15 - ZWNJ before vowel
*/ | |
| 292 {-1, -1, -1, -1, -1, -1, -1, -1, -1, 17, 1, 18}, /* 16 - dependent vowel */ | |
| 293 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 18}, /* 17 - sign above */ | |
| 294 {-1, -1, -1, -1, -1, -1, -1, 19, -1, -1, -1, -1}, /* 18 - ZWJ after vowel */ | |
| 295 {-1, 1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1}, /* 19 - Third coeng */ | |
| 296 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, -1}, /* 20 - dependent vowel af
ter a Robat */ | |
| 297 }; | |
| 298 | |
| 299 | |
| 300 /* #define KHMER_DEBUG */ | |
| 301 #ifdef KHMER_DEBUG | |
| 302 #define KHDEBUG qDebug | |
| 303 #else | |
| 304 #define KHDEBUG if(0) printf | |
| 305 #endif | |
| 306 | |
| 307 /* | |
| 308 // Given an input string of characters and a location in which to start looking | |
| 309 // calculate, using the state table, which one is the last character of the syl
lable | |
| 310 // that starts in the starting position. | |
| 311 */ | |
| 312 static int khmer_nextSyllableBoundary(const HB_UChar16 *s, int start, int end, H
B_Bool *invalid) | |
| 313 { | |
| 314 const HB_UChar16 *uc = s + start; | |
| 315 int state = 0; | |
| 316 int pos = start; | |
| 317 *invalid = FALSE; | |
| 318 | |
| 319 while (pos < end) { | |
| 320 KhmerCharClass charClass = getKhmerCharClass(*uc); | |
| 321 if (pos == start) { | |
| 322 *invalid = (charClass > 0) && ! (charClass & CF_CONSONANT); | |
| 323 } | |
| 324 state = khmerStateTable[state][charClass & CF_CLASS_MASK]; | |
| 325 | |
| 326 KHDEBUG("state[%d]=%d class=%8lx (uc=%4x)", pos - start, state, | |
| 327 charClass, *uc ); | |
| 328 | |
| 329 if (state < 0) { | |
| 330 break; | |
| 331 } | |
| 332 ++uc; | |
| 333 ++pos; | |
| 334 } | |
| 335 return pos; | |
| 336 } | |
| 337 | |
| 338 #ifndef NO_OPENTYPE | |
| 339 static const HB_OpenTypeFeature khmer_features[] = { | |
| 340 { HB_MAKE_TAG( 'p', 'r', 'e', 'f' ), PreFormProperty }, | |
| 341 { HB_MAKE_TAG( 'b', 'l', 'w', 'f' ), BelowFormProperty }, | |
| 342 { HB_MAKE_TAG( 'a', 'b', 'v', 'f' ), AboveFormProperty }, | |
| 343 { HB_MAKE_TAG( 'p', 's', 't', 'f' ), PostFormProperty }, | |
| 344 { HB_MAKE_TAG( 'p', 'r', 'e', 's' ), PreSubstProperty }, | |
| 345 { HB_MAKE_TAG( 'b', 'l', 'w', 's' ), BelowSubstProperty }, | |
| 346 { HB_MAKE_TAG( 'a', 'b', 'v', 's' ), AboveSubstProperty }, | |
| 347 { HB_MAKE_TAG( 'p', 's', 't', 's' ), PostSubstProperty }, | |
| 348 { HB_MAKE_TAG( 'c', 'l', 'i', 'g' ), CligProperty }, | |
| 349 { 0, 0 } | |
| 350 }; | |
| 351 #endif | |
| 352 | |
| 353 | |
| 354 static HB_Bool khmer_shape_syllable(HB_Bool openType, HB_ShaperItem *item) | |
| 355 { | |
| 356 /* KHDEBUG("syllable from %d len %d, str='%s'", item->from, item->length, | |
| 357 item->string->mid(item->from, item->length).toUtf8().data()); */ | |
| 358 | |
| 359 int len = 0; | |
| 360 int syllableEnd = item->item.pos + item->item.length; | |
| 361 unsigned short reordered[16]; | |
| 362 unsigned char properties[16]; | |
| 363 enum { | |
| 364 AboveForm = 0x01, | |
| 365 PreForm = 0x02, | |
| 366 PostForm = 0x04, | |
| 367 BelowForm = 0x08 | |
| 368 }; | |
| 369 #ifndef NO_OPENTYPE | |
| 370 const int availableGlyphs = item->num_glyphs; | |
| 371 #endif | |
| 372 int coengRo; | |
| 373 int i; | |
| 374 | |
| 375 /* according to the specs this is the max length one can get | |
| 376 ### the real value should be smaller */ | |
| 377 assert(item->item.length < 13); | |
| 378 | |
| 379 memset(properties, 0, 16*sizeof(unsigned char)); | |
| 380 | |
| 381 #ifdef KHMER_DEBUG | |
| 382 qDebug("original:"); | |
| 383 for (int i = from; i < syllableEnd; i++) { | |
| 384 qDebug(" %d: %4x", i, string[i]); | |
| 385 } | |
| 386 #endif | |
| 387 | |
| 388 /* | |
| 389 // write a pre vowel or the pre part of a split vowel first | |
| 390 // and look out for coeng + ro. RO is the only vowel of type 2, and | |
| 391 // therefore the only one that requires saving space before the base. | |
| 392 */ | |
| 393 coengRo = -1; /* There is no Coeng Ro, if found this value will change */ | |
| 394 for (i = item->item.pos; i < syllableEnd; i += 1) { | |
| 395 KhmerCharClass charClass = getKhmerCharClass(item->string[i]); | |
| 396 | |
| 397 /* if a split vowel, write the pre part. In Khmer the pre part | |
| 398 is the same for all split vowels, same glyph as pre vowel C_VOWEL_E *
/ | |
| 399 if (charClass & CF_SPLIT_VOWEL) { | |
| 400 reordered[len] = C_VOWEL_E; | |
| 401 properties[len] = PreForm; | |
| 402 ++len; | |
| 403 break; /* there can be only one vowel */ | |
| 404 } | |
| 405 /* if a vowel with pos before write it out */ | |
| 406 if (charClass & CF_POS_BEFORE) { | |
| 407 reordered[len] = item->string[i]; | |
| 408 properties[len] = PreForm; | |
| 409 ++len; | |
| 410 break; /* there can be only one vowel */ | |
| 411 } | |
| 412 /* look for coeng + ro and remember position | |
| 413 works because coeng + ro is always in front of a vowel (if there is a
vowel) | |
| 414 and because CC_CONSONANT2 is enough to identify it, as it is the only
consonant | |
| 415 with this flag */ | |
| 416 if ( (charClass & CF_COENG) && (i + 1 < syllableEnd) && | |
| 417 ( (getKhmerCharClass(item->string[i+1]) & CF_CLASS_MASK) == CC_CON
SONANT2) ) { | |
| 418 coengRo = i; | |
| 419 } | |
| 420 } | |
| 421 | |
| 422 /* write coeng + ro if found */ | |
| 423 if (coengRo > -1) { | |
| 424 reordered[len] = C_COENG; | |
| 425 properties[len] = PreForm; | |
| 426 ++len; | |
| 427 reordered[len] = C_RO; | |
| 428 properties[len] = PreForm; | |
| 429 ++len; | |
| 430 } | |
| 431 | |
| 432 /* | |
| 433 shall we add a dotted circle? | |
| 434 If in the position in which the base should be (first char in the string)
there is | |
| 435 a character that has the Dotted circle flag (a character that cannot be a
base) | |
| 436 then write a dotted circle */ | |
| 437 if (getKhmerCharClass(item->string[item->item.pos]) & CF_DOTTED_CIRCLE) { | |
| 438 reordered[len] = C_DOTTED_CIRCLE; | |
| 439 ++len; | |
| 440 } | |
| 441 | |
| 442 /* copy what is left to the output, skipping before vowels and | |
| 443 coeng Ro if they are present */ | |
| 444 for (i = item->item.pos; i < syllableEnd; i += 1) { | |
| 445 HB_UChar16 uc = item->string[i]; | |
| 446 KhmerCharClass charClass = getKhmerCharClass(uc); | |
| 447 | |
| 448 /* skip a before vowel, it was already processed */ | |
| 449 if (charClass & CF_POS_BEFORE) { | |
| 450 continue; | |
| 451 } | |
| 452 | |
| 453 /* skip coeng + ro, it was already processed */ | |
| 454 if (i == coengRo) { | |
| 455 i += 1; | |
| 456 continue; | |
| 457 } | |
| 458 | |
| 459 switch (charClass & CF_POS_MASK) | |
| 460 { | |
| 461 case CF_POS_ABOVE : | |
| 462 reordered[len] = uc; | |
| 463 properties[len] = AboveForm; | |
| 464 ++len; | |
| 465 break; | |
| 466 | |
| 467 case CF_POS_AFTER : | |
| 468 reordered[len] = uc; | |
| 469 properties[len] = PostForm; | |
| 470 ++len; | |
| 471 break; | |
| 472 | |
| 473 case CF_POS_BELOW : | |
| 474 reordered[len] = uc; | |
| 475 properties[len] = BelowForm; | |
| 476 ++len; | |
| 477 break; | |
| 478 | |
| 479 default: | |
| 480 /* assign the correct flags to a coeng consonant | |
| 481 Consonants of type 3 are taged as Post forms and those type 1
as below forms */ | |
| 482 if ( (charClass & CF_COENG) && i + 1 < syllableEnd ) { | |
| 483 unsigned char property = (getKhmerCharClass(item->string[i+1
]) & CF_CLASS_MASK) == CC_CONSONANT3 ? | |
| 484 PostForm : BelowForm; | |
| 485 reordered[len] = uc; | |
| 486 properties[len] = property; | |
| 487 ++len; | |
| 488 i += 1; | |
| 489 reordered[len] = item->string[i]; | |
| 490 properties[len] = property; | |
| 491 ++len; | |
| 492 break; | |
| 493 } | |
| 494 | |
| 495 /* if a shifter is followed by an above vowel change the shifter
to below form, | |
| 496 an above vowel can have two possible positions i + 1 or i + 3 | |
| 497 (position i+1 corresponds to unicode 3, position i+3 to Unico
de 4) | |
| 498 and there is an extra rule for C_VOWEL_AA + C_SIGN_NIKAHIT al
so for two | |
| 499 different positions, right after the shifter or after a vowel
(Unicode 4) */ | |
| 500 if ( (charClass & CF_SHIFTER) && (i + 1 < syllableEnd) ) { | |
| 501 if (getKhmerCharClass(item->string[i+1]) & CF_ABOVE_VOWEL )
{ | |
| 502 reordered[len] = uc; | |
| 503 properties[len] = BelowForm; | |
| 504 ++len; | |
| 505 break; | |
| 506 } | |
| 507 if (i + 2 < syllableEnd && | |
| 508 (item->string[i+1] == C_VOWEL_AA) && | |
| 509 (item->string[i+2] == C_SIGN_NIKAHIT) ) | |
| 510 { | |
| 511 reordered[len] = uc; | |
| 512 properties[len] = BelowForm; | |
| 513 ++len; | |
| 514 break; | |
| 515 } | |
| 516 if (i + 3 < syllableEnd && (getKhmerCharClass(item->string[i
+3]) & CF_ABOVE_VOWEL) ) { | |
| 517 reordered[len] = uc; | |
| 518 properties[len] = BelowForm; | |
| 519 ++len; | |
| 520 break; | |
| 521 } | |
| 522 if (i + 4 < syllableEnd && | |
| 523 (item->string[i+3] == C_VOWEL_AA) && | |
| 524 (item->string[i+4] == C_SIGN_NIKAHIT) ) | |
| 525 { | |
| 526 reordered[len] = uc; | |
| 527 properties[len] = BelowForm; | |
| 528 ++len; | |
| 529 break; | |
| 530 } | |
| 531 } | |
| 532 | |
| 533 /* default - any other characters */ | |
| 534 reordered[len] = uc; | |
| 535 ++len; | |
| 536 break; | |
| 537 } /* switch */ | |
| 538 } /* for */ | |
| 539 | |
| 540 if (!item->font->klass->convertStringToGlyphIndices(item->font, | |
| 541 reordered, len, | |
| 542 item->glyphs, &item->num
_glyphs, | |
| 543 item->item.bidiLevel % 2
)) | |
| 544 return FALSE; | |
| 545 | |
| 546 | |
| 547 KHDEBUG("after shaping: len=%d", len); | |
| 548 for (i = 0; i < len; i++) { | |
| 549 item->attributes[i].mark = FALSE; | |
| 550 item->attributes[i].clusterStart = FALSE; | |
| 551 item->attributes[i].justification = 0; | |
| 552 item->attributes[i].zeroWidth = FALSE; | |
| 553 KHDEBUG(" %d: %4x property=%x", i, reordered[i], properties[i]); | |
| 554 } | |
| 555 | |
| 556 /* now we have the syllable in the right order, and can start running it thr
ough open type. */ | |
| 557 | |
| 558 #ifndef NO_OPENTYPE | |
| 559 if (openType) { | |
| 560 hb_uint32 where[16]; | |
| 561 for (i = 0; i < len; ++i) { | |
| 562 where[i] = ~(PreSubstProperty | |
| 563 | BelowSubstProperty | |
| 564 | AboveSubstProperty | |
| 565 | PostSubstProperty | |
| 566 | CligProperty | |
| 567 | PositioningProperties); | |
| 568 if (properties[i] == PreForm) | |
| 569 where[i] &= ~PreFormProperty; | |
| 570 else if (properties[i] == BelowForm) | |
| 571 where[i] &= ~BelowFormProperty; | |
| 572 else if (properties[i] == AboveForm) | |
| 573 where[i] &= ~AboveFormProperty; | |
| 574 else if (properties[i] == PostForm) | |
| 575 where[i] &= ~PostFormProperty; | |
| 576 } | |
| 577 | |
| 578 HB_OpenTypeShape(item, where); | |
| 579 if (!HB_OpenTypePosition(item, availableGlyphs, /*doLogClusters*/FALSE)) | |
| 580 return FALSE; | |
| 581 } else | |
| 582 #endif | |
| 583 { | |
| 584 KHDEBUG("Not using openType"); | |
| 585 HB_HeuristicPosition(item); | |
| 586 } | |
| 587 | |
| 588 item->attributes[0].clusterStart = TRUE; | |
| 589 return TRUE; | |
| 590 } | |
| 591 | |
| 592 HB_Bool HB_KhmerShape(HB_ShaperItem *item) | |
| 593 { | |
| 594 HB_Bool openType = FALSE; | |
| 595 unsigned short *logClusters = item->log_clusters; | |
| 596 int i; | |
| 597 | |
| 598 HB_ShaperItem syllable = *item; | |
| 599 int first_glyph = 0; | |
| 600 | |
| 601 int sstart = item->item.pos; | |
| 602 int end = sstart + item->item.length; | |
| 603 | |
| 604 assert(item->item.script == HB_Script_Khmer); | |
| 605 | |
| 606 #ifndef NO_OPENTYPE | |
| 607 openType = HB_SelectScript(item, khmer_features); | |
| 608 #endif | |
| 609 | |
| 610 KHDEBUG("khmer_shape: from %d length %d", item->item.pos, item->item.length)
; | |
| 611 while (sstart < end) { | |
| 612 HB_Bool invalid; | |
| 613 int send = khmer_nextSyllableBoundary(item->string, sstart, end, &invali
d); | |
| 614 KHDEBUG("syllable from %d, length %d, invalid=%s", sstart, send-sstart, | |
| 615 invalid ? "TRUE" : "FALSE"); | |
| 616 syllable.item.pos = sstart; | |
| 617 syllable.item.length = send-sstart; | |
| 618 syllable.glyphs = item->glyphs + first_glyph; | |
| 619 syllable.attributes = item->attributes + first_glyph; | |
| 620 syllable.offsets = item->offsets + first_glyph; | |
| 621 syllable.advances = item->advances + first_glyph; | |
| 622 syllable.num_glyphs = item->num_glyphs - first_glyph; | |
| 623 if (!khmer_shape_syllable(openType, &syllable)) { | |
| 624 KHDEBUG("syllable shaping failed, syllable requests %d glyphs", syll
able.num_glyphs); | |
| 625 item->num_glyphs += syllable.num_glyphs; | |
| 626 return FALSE; | |
| 627 } | |
| 628 /* fix logcluster array */ | |
| 629 KHDEBUG("syllable:"); | |
| 630 for (i = first_glyph; i < first_glyph + (int)syllable.num_glyphs; ++i) | |
| 631 KHDEBUG(" %d -> glyph %x", i, item->glyphs[i]); | |
| 632 KHDEBUG(" logclusters:"); | |
| 633 for (i = sstart; i < send; ++i) { | |
| 634 KHDEBUG(" %d -> glyph %d", i, first_glyph); | |
| 635 logClusters[i-item->item.pos] = first_glyph; | |
| 636 } | |
| 637 sstart = send; | |
| 638 first_glyph += syllable.num_glyphs; | |
| 639 } | |
| 640 item->num_glyphs = first_glyph; | |
| 641 return TRUE; | |
| 642 } | |
| 643 | |
| 644 void HB_KhmerAttributes(HB_Script script, const HB_UChar16 *text, hb_uint32 from
, hb_uint32 len, HB_CharAttributes *attributes) | |
| 645 { | |
| 646 int end = from + len; | |
| 647 const HB_UChar16 *uc = text + from; | |
| 648 hb_uint32 i = 0; | |
| 649 HB_UNUSED(script); | |
| 650 attributes += from; | |
| 651 while ( i < len ) { | |
| 652 HB_Bool invalid; | |
| 653 hb_uint32 boundary = khmer_nextSyllableBoundary( text, from+i, end, &inv
alid ) - from; | |
| 654 | |
| 655 attributes[i].charStop = TRUE; | |
| 656 | |
| 657 if ( boundary > len-1 ) boundary = len; | |
| 658 i++; | |
| 659 while ( i < boundary ) { | |
| 660 attributes[i].charStop = FALSE; | |
| 661 ++uc; | |
| 662 ++i; | |
| 663 } | |
| 664 assert( i == boundary ); | |
| 665 } | |
| 666 } | |
| 667 | |
| OLD | NEW |