third_party/harfbuzz/src/harfbuzz-khmer.c - Issue 12413010: Remove unused harfbuzz.

Side by Side Diff: third_party/harfbuzz/src/harfbuzz-khmer.c

Issue 12413010: Remove unused harfbuzz. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Created 7 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
	(Empty)
1 /*

2 * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)

3 *

4 * This is part of HarfBuzz, an OpenType Layout engine library.

5 *

6 * Permission is hereby granted, without written agreement and without

7 * license or royalty fees, to use, copy, modify, and distribute this

8 * software and its documentation for any purpose, provided that the

9 * above copyright notice and the following two paragraphs appear in

10 * all copies of this software.

11 *

12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR

13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES

14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN

15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH

16 * DAMAGE.

17 *

18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,

19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND

20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS

21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO

22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.

23 */

24

25 #include "harfbuzz-shaper.h"

26 #include "harfbuzz-shaper-private.h"

27

28 #include <assert.h>

29 #include <stdio.h>

30

31 /*

32 // Vocabulary

33 // Base -> A consonant or an independent vowel in its full (not sub script) form. It is the

34 // center of the syllable, it can be surrounded by coeng (s ubscript) consonants, vowels,

35 // split vowels, signs... but there is only one base in a s yllable, it has to be coded as

36 // the first character of the syllable.

37 // split vowel --> vowel that has two parts placed separately (e.g. Before and after the consonant).

38 // Khmer language has five of them. Khmer split vowels eith er have one part before the

39 // base and one after the base or they have a part before t he base and a part above the base.

40 // The first part of all Khmer split vowels is the same cha racter, identical to

41 // the glyph of Khmer dependent vowel SRA EI

42 // coeng --> modifier used in Khmer to construct coeng (subscript) consona nts

43 // Differently than indian languages, the coeng modifies the con sonant that follows it,

44 // not the one preceding it Each consonant has two forms, the b ase form and the subscript form

45 // the base form is the normal one (using the consonants code-po int), the subscript form is

46 // displayed when the combination coeng + consonant is encounter ed.

47 // Consonant of type 1 -> A consonant which has subscript for that only occ upies space under a base consonant

48 // Consonant of type 2.-> Its subscript form occupies space under and befor e the base (only one, RO)

49 // Consonant of Type 3 -> Its subscript form occupies space under and after the base (KHO, CHHO, THHO, BA, YO, SA)

50 // Consonant shifter -> Khmer has to series of consonants. The same depende nt vowel has different sounds

51 // if it is attached to a consonant of the first serie s or a consonant of the second series

52 // Most consonants have an equivalent in the other ser ies, but some of theme exist only in

53 // one series (for example SA). If we want to use the consonant SA with a vowel sound that

54 // can only be done with a vowel sound that correspond s to a vowel accompanying a consonant

55 // of the other series, then we need to use a consonan t shifter: TRIISAP or MUSIKATOAN

56 // x17C9 y x17CA. TRIISAP changes a first series conso nant to second series sound and

57 // MUSIKATOAN a second series consonant to have a firs t series vowel sound.

58 // Consonant shifter are both normally supercript mark s, but, when they are followed by a

59 // superscript, they change shape and take the form of subscript dependent vowel SRA U.

60 // If they are in the same syllable as a coeng consona nt, Unicode 3.0 says that they

61 // should be typed before the coeng. Unicode 4.0 break s the standard and says that it should

62 // be placed after the coeng consonant.

63 // Dependent vowel -> In khmer dependent vowels can be placed above, belo w, before or after the base

64 // Each vowel has its own position. Only one vowel per syllable is allowed.

65 // Signs -> Khmer has above signs and post signs. Only one abov e sign and/or one post sign are

66 // Allowed in a syllable.

67 //

68 //

69 // order is important here! This order must be the same that is found in each horizontal

70 // line in the statetable for Khmer (see khmerStateTable) .

71 */

72 enum KhmerCharClassValues {

73 CC_RESERVED = 0,

74 CC_CONSONANT = 1, /* Consonant of type 1 or independent vowel */

75 CC_CONSONANT2 = 2, /* Consonant of type 2 */

76 CC_CONSONANT3 = 3, /* Consonant of type 3 */

77 CC_ZERO_WIDTH_NJ_MARK = 4, /* Zero Width non joiner character (0x200C) */

78 CC_CONSONANT_SHIFTER = 5,

79 CC_ROBAT = 6, /* Khmer special diacritic accent -treated dif ferently in state table */

80 CC_COENG = 7, /* Subscript consonant combining character */

81 CC_DEPENDENT_VOWEL = 8,

82 CC_SIGN_ABOVE = 9,

83 CC_SIGN_AFTER = 10,

84 CC_ZERO_WIDTH_J_MARK = 11, /* Zero width joiner character */

85 CC_COUNT = 12 /* This is the number of character classes */

86 };

87

88

89 enum KhmerCharClassFlags {

90 CF_CLASS_MASK = 0x0000FFFF,

91

92 CF_CONSONANT = 0x01000000, /* flag to speed up comparing */

93 CF_SPLIT_VOWEL = 0x02000000, /* flag for a split vowel -> the first part is added in front of the syllable */

94 CF_DOTTED_CIRCLE = 0x04000000, /* add a dotted circle if a character with t his flag is the first in a syllable */

95 CF_COENG = 0x08000000, /* flag to speed up comparing */

96 CF_SHIFTER = 0x10000000, /* flag to speed up comparing */

97 CF_ABOVE_VOWEL = 0x20000000, /* flag to speed up comparing */

98

99 /* position flags */

100 CF_POS_BEFORE = 0x00080000,

101 CF_POS_BELOW = 0x00040000,

102 CF_POS_ABOVE = 0x00020000,

103 CF_POS_AFTER = 0x00010000,

104 CF_POS_MASK = 0x000f0000

105 };

106

107

108 /* Characters that get referred to by name */

109 enum KhmerChar {

110 C_SIGN_ZWNJ = 0x200C,

111 C_SIGN_ZWJ = 0x200D,

112 C_RO = 0x179A,

113 C_VOWEL_AA = 0x17B6,

114 C_SIGN_NIKAHIT = 0x17C6,

115 C_VOWEL_E = 0x17C1,

116 C_COENG = 0x17D2

117 };

118

119

120 /*

121 // simple classes, they are used in the statetable (in this file) to control th e length of a syllable

122 // they are also used to know where a character should be placed (location in r eference to the base character)

123 // and also to know if a character, when independently displayed, should be dis played with a dotted-circle to

124 // indicate error in syllable construction

125 */

126 enum {

127 _xx = CC_RESERVED,

128 _sa = CC_SIGN_ABOVE \| CF_DOTTED_CIRCLE \| CF_POS_ABOVE,

129 _sp = CC_SIGN_AFTER \| CF_DOTTED_CIRCLE\| CF_POS_AFTER,

130 _c1 = CC_CONSONANT \| CF_CONSONANT,

131 _c2 = CC_CONSONANT2 \| CF_CONSONANT,

132 _c3 = CC_CONSONANT3 \| CF_CONSONANT,

133 _rb = CC_ROBAT \| CF_POS_ABOVE \| CF_DOTTED_CIRCLE,

134 _cs = CC_CONSONANT_SHIFTER \| CF_DOTTED_CIRCLE \| CF_SHIFTER,

135 _dl = CC_DEPENDENT_VOWEL \| CF_POS_BEFORE \| CF_DOTTED_CIRCLE,

136 _db = CC_DEPENDENT_VOWEL \| CF_POS_BELOW \| CF_DOTTED_CIRCLE,

137 _da = CC_DEPENDENT_VOWEL \| CF_POS_ABOVE \| CF_DOTTED_CIRCLE \| CF_ABOVE_VOWEL,

138 _dr = CC_DEPENDENT_VOWEL \| CF_POS_AFTER \| CF_DOTTED_CIRCLE,

139 _co = CC_COENG \| CF_COENG \| CF_DOTTED_CIRCLE,

140

141 /* split vowel */

142 _va = _da \| CF_SPLIT_VOWEL,

143 _vr = _dr \| CF_SPLIT_VOWEL

144 };

145

146

147 /*

148 // Character class: a character class value

149 // ORed with character class flags.

150 */

151 typedef unsigned long KhmerCharClass;

152

153

154 /*

155 // Character class tables

156 // _xx character does not combine into syllable, such as numbers, puntuation ma rks, non-Khmer signs...

157 // _sa Sign placed above the base

158 // _sp Sign placed after the base

159 // _c1 Consonant of type 1 or independent vowel (independent vowels behave as t ype 1 consonants)

160 // _c2 Consonant of type 2 (only RO)

161 // _c3 Consonant of type 3

162 // _rb Khmer sign robat u17CC. combining mark for subscript consonants

163 // _cd Consonant-shifter

164 // _dl Dependent vowel placed before the base (left of the base)

165 // _db Dependent vowel placed below the base

166 // _da Dependent vowel placed above the base

167 // _dr Dependent vowel placed behind the base (right of the base)

168 // _co Khmer combining mark COENG u17D2, combines with the consonant or indepen dent vowel following

169 // it to create a subscript consonant or independent vowel

170 // _va Khmer split vowel in which the first part is before the base and the sec ond one above the base

171 // _vr Khmer split vowel in which the first part is before the base and the sec ond one behind (right of) the base

172 */

173 static const KhmerCharClass khmerCharClasses[] = {

174 _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c1, _ c1, /* 1780 - 178F */

175 _c1, _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c2, _c1, _c1, _c1, _c3, _ c3, /* 1790 - 179F */

176 _c1, _c3, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _ c1, /* 17A0 - 17AF */

177 _c1, _c1, _c1, _c1, _dr, _dr, _dr, _da, _da, _da, _da, _db, _db, _db, _va, _ vr, /* 17B0 - 17BF */

178 _vr, _dl, _dl, _dl, _vr, _vr, _sa, _sp, _sp, _cs, _cs, _sa, _rb, _sa, _sa, _ sa, /* 17C0 - 17CF */

179 _sa, _sa, _co, _sa, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _sa, _xx, _ xx /* 17D0 - 17DF */

180 };

181

182 /* this enum must reflect the range of khmerCharClasses */

183 enum KhmerCharClassesRange {

184 KhmerFirstChar = 0x1780,

185 KhmerLastChar = 0x17df

186 };

187

188 /*

189 // Below we define how a character in the input string is either in the khmerCh arClasses table

190 // (in which case we get its type back), a ZWJ or ZWNJ (two characters that may appear

191 // within the syllable, but are not in the table) we also get their type back, or an unknown object

192 // in which case we get _xx (CC_RESERVED) back

193 */

194 static KhmerCharClass getKhmerCharClass(HB_UChar16 uc)

195 {

196 if (uc == C_SIGN_ZWJ) {

197 return CC_ZERO_WIDTH_J_MARK;

198 }

199

200 if (uc == C_SIGN_ZWNJ) {

201 return CC_ZERO_WIDTH_NJ_MARK;

202 }

203

204 if (uc < KhmerFirstChar \|\| uc > KhmerLastChar) {

205 return CC_RESERVED;

206 }

207

208 return khmerCharClasses[uc - KhmerFirstChar];

209 }

210

211

212 /*

213 // The stateTable is used to calculate the end (the length) of a well

214 // formed Khmer Syllable.

215 //

216 // Each horizontal line is ordered exactly the same way as the values in KhmerC lassTable

217 // CharClassValues. This coincidence of values allows the follow up of the tabl e.

218 //

219 // Each line corresponds to a state, which does not necessarily need to be a ty pe

220 // of component... for example, state 2 is a base, with is always a first chara cter

221 // in the syllable, but the state could be produced a consonant of any type whe n

222 // it is the first character that is analysed (in ground state).

223 //

224 // Differentiating 3 types of consonants is necessary in order to

225 // forbid the use of certain combinations, such as having a second

226 // coeng after a coeng RO,

227 // The inexistent possibility of having a type 3 after another type 3 is permit ted,

228 // eliminating it would very much complicate the table, and it does not create typing

229 // problems, as the case above.

230 //

231 // The table is quite complex, in order to limit the number of coeng consonants

232 // to 2 (by means of the table).

233 //

234 // There a peculiarity, as far as Unicode is concerned:

235 // - The consonant-shifter is considered in two possible different

236 // locations, the one considered in Unicode 3.0 and the one considered in

237 // Unicode 4.0. (there is a backwards compatibility problem in this standard) .

238 //

239 //

240 // xx independent character, such as a number, punctuation sign or non-khmer char

241 //

242 // c1 Khmer consonant of type 1 or an independent vowel

243 // that is, a letter in which the subscript for is only under the

244 // base, not taking any space to the right or to the left

245 //

246 // c2 Khmer consonant of type 2, the coeng form takes space under

247 // and to the left of the base (only RO is of this type)

248 //

249 // c3 Khmer consonant of type 3. Its subscript form takes space under

250 // and to the right of the base.

251 //

252 // cs Khmer consonant shifter

253 //

254 // rb Khmer robat

255 //

256 // co coeng character (u17D2)

257 //

258 // dv dependent vowel (including split vowels, they are treated in the same way).

259 // even if dv is not defined above, the component that is really tested f or is

260 // KhmerClassTable::CC_DEPENDENT_VOWEL, which is common to all dependent vowels

261 //

262 // zwj Zero Width joiner

263 //

264 // zwnj Zero width non joiner

265 //

266 // sa above sign

267 //

268 // sp post sign

269 //

270 // there are lines with equal content but for an easier understanding

271 // (and maybe change in the future) we did not join them

272 */

273 static const signed char khmerStateTable[][CC_COUNT] =

274 {

275 /* xx c1 c2 c3 zwnj cs rb co dv sa sp zwj */

276 { 1, 2, 2, 2, 1, 1, 1, 6, 1, 1, 1, 2}, /* 0 - ground state */

277 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, /* 1 - exit state (or sig n to the right of the syllable) */

278 {-1, -1, -1, -1, 3, 4, 5, 6, 16, 17, 1, -1}, /* 2 - Base consonant */

279 {-1, -1, -1, -1, -1, 4, -1, -1, 16, -1, -1, -1}, /* 3 - First ZWNJ before a register shifter It can only be followed by a shifter or a vowel */

280 {-1, -1, -1, -1, 15, -1, -1, 6, 16, 17, 1, 14}, /* 4 - First register shi fter */

281 {-1, -1, -1, -1, -1, -1, -1, -1, 20, -1, 1, -1}, /* 5 - Robat */

282 {-1, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, -1}, /* 6 - First Coeng */

283 {-1, -1, -1, -1, 12, 13, -1, 10, 16, 17, 1, 14}, /* 7 - First consonant of type 1 after coeng */

284 {-1, -1, -1, -1, 12, 13, -1, -1, 16, 17, 1, 14}, /* 8 - First consonant of type 2 after coeng */

285 {-1, -1, -1, -1, 12, 13, -1, 10, 16, 17, 1, 14}, /* 9 - First consonant or type 3 after ceong */

286 {-1, 11, 11, 11, -1, -1, -1, -1, -1, -1, -1, -1}, /* 10 - Second Coeng (no r egister shifter before) */

287 {-1, -1, -1, -1, 15, -1, -1, -1, 16, 17, 1, 14}, /* 11 - Second coeng conso nant (or ind. vowel) no register shifter before */

288 {-1, -1, -1, -1, -1, 13, -1, -1, 16, -1, -1, -1}, /* 12 - Second ZWNJ before a register shifter */

289 {-1, -1, -1, -1, 15, -1, -1, -1, 16, 17, 1, 14}, /* 13 - Second register sh ifter */

290 {-1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1}, /* 14 - ZWJ before vowel * /

291 {-1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1}, /* 15 - ZWNJ before vowel */

292 {-1, -1, -1, -1, -1, -1, -1, -1, -1, 17, 1, 18}, /* 16 - dependent vowel */

293 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 18}, /* 17 - sign above */

294 {-1, -1, -1, -1, -1, -1, -1, 19, -1, -1, -1, -1}, /* 18 - ZWJ after vowel */

295 {-1, 1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1}, /* 19 - Third coeng */

296 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, -1}, /* 20 - dependent vowel af ter a Robat */

297 };

298

299

300 /* #define KHMER_DEBUG */

301 #ifdef KHMER_DEBUG

302 #define KHDEBUG qDebug

303 #else

304 #define KHDEBUG if(0) printf

305 #endif

306

307 /*

308 // Given an input string of characters and a location in which to start looking

309 // calculate, using the state table, which one is the last character of the syl lable

310 // that starts in the starting position.

311 */

312 static int khmer_nextSyllableBoundary(const HB_UChar16 s, int start, int end, H B_Bool invalid)

313 {

314 const HB_UChar16 *uc = s + start;

315 int state = 0;

316 int pos = start;

317 *invalid = FALSE;

318

319 while (pos < end) {

320 KhmerCharClass charClass = getKhmerCharClass(*uc);

321 if (pos == start) {

322 *invalid = (charClass > 0) && ! (charClass & CF_CONSONANT);

323 }

324 state = khmerStateTable[state][charClass & CF_CLASS_MASK];

325

326 KHDEBUG("state[%d]=%d class=%8lx (uc=%4x)", pos - start, state,

327 charClass, *uc );

328

329 if (state < 0) {

330 break;

331 }

332 ++uc;

333 ++pos;

334 }

335 return pos;

336 }

337

338 #ifndef NO_OPENTYPE

339 static const HB_OpenTypeFeature khmer_features[] = {

340 { HB_MAKE_TAG( 'p', 'r', 'e', 'f' ), PreFormProperty },

341 { HB_MAKE_TAG( 'b', 'l', 'w', 'f' ), BelowFormProperty },

342 { HB_MAKE_TAG( 'a', 'b', 'v', 'f' ), AboveFormProperty },

343 { HB_MAKE_TAG( 'p', 's', 't', 'f' ), PostFormProperty },

344 { HB_MAKE_TAG( 'p', 'r', 'e', 's' ), PreSubstProperty },

345 { HB_MAKE_TAG( 'b', 'l', 'w', 's' ), BelowSubstProperty },

346 { HB_MAKE_TAG( 'a', 'b', 'v', 's' ), AboveSubstProperty },

347 { HB_MAKE_TAG( 'p', 's', 't', 's' ), PostSubstProperty },

348 { HB_MAKE_TAG( 'c', 'l', 'i', 'g' ), CligProperty },

349 { 0, 0 }

350 };

351 #endif

352

353

354 static HB_Bool khmer_shape_syllable(HB_Bool openType, HB_ShaperItem *item)

355 {

356 /* KHDEBUG("syllable from %d len %d, str='%s'", item->from, item->length,

357 item->string->mid(item->from, item->length).toUtf8().data()); */

358

359 int len = 0;

360 int syllableEnd = item->item.pos + item->item.length;

361 unsigned short reordered[16];

362 unsigned char properties[16];

363 enum {

364 AboveForm = 0x01,

365 PreForm = 0x02,

366 PostForm = 0x04,

367 BelowForm = 0x08

368 };

369 #ifndef NO_OPENTYPE

370 const int availableGlyphs = item->num_glyphs;

371 #endif

372 int coengRo;

373 int i;

374

375 /* according to the specs this is the max length one can get

376 ### the real value should be smaller */

377 assert(item->item.length < 13);

378

379 memset(properties, 0, 16*sizeof(unsigned char));

380

381 #ifdef KHMER_DEBUG

382 qDebug("original:");

383 for (int i = from; i < syllableEnd; i++) {

384 qDebug(" %d: %4x", i, string[i]);

385 }

386 #endif

387

388 /*

389 // write a pre vowel or the pre part of a split vowel first

390 // and look out for coeng + ro. RO is the only vowel of type 2, and

391 // therefore the only one that requires saving space before the base.

392 */

393 coengRo = -1; /* There is no Coeng Ro, if found this value will change */

394 for (i = item->item.pos; i < syllableEnd; i += 1) {

395 KhmerCharClass charClass = getKhmerCharClass(item->string[i]);

396

397 /* if a split vowel, write the pre part. In Khmer the pre part

398 is the same for all split vowels, same glyph as pre vowel C_VOWEL_E * /

399 if (charClass & CF_SPLIT_VOWEL) {

400 reordered[len] = C_VOWEL_E;

401 properties[len] = PreForm;

402 ++len;

403 break; /* there can be only one vowel */

404 }

405 /* if a vowel with pos before write it out */

406 if (charClass & CF_POS_BEFORE) {

407 reordered[len] = item->string[i];

408 properties[len] = PreForm;

409 ++len;

410 break; /* there can be only one vowel */

411 }

412 /* look for coeng + ro and remember position

413 works because coeng + ro is always in front of a vowel (if there is a vowel)

414 and because CC_CONSONANT2 is enough to identify it, as it is the only consonant

415 with this flag */

416 if ( (charClass & CF_COENG) && (i + 1 < syllableEnd) &&

417 ( (getKhmerCharClass(item->string[i+1]) & CF_CLASS_MASK) == CC_CON SONANT2) ) {

418 coengRo = i;

419 }

420 }

421

422 /* write coeng + ro if found */

423 if (coengRo > -1) {

424 reordered[len] = C_COENG;

425 properties[len] = PreForm;

426 ++len;

427 reordered[len] = C_RO;

428 properties[len] = PreForm;

429 ++len;

430 }

431

432 /*

433 shall we add a dotted circle?

434 If in the position in which the base should be (first char in the string) there is

435 a character that has the Dotted circle flag (a character that cannot be a base)

436 then write a dotted circle */

437 if (getKhmerCharClass(item->string[item->item.pos]) & CF_DOTTED_CIRCLE) {

438 reordered[len] = C_DOTTED_CIRCLE;

439 ++len;

440 }

441

442 /* copy what is left to the output, skipping before vowels and

443 coeng Ro if they are present */

444 for (i = item->item.pos; i < syllableEnd; i += 1) {

445 HB_UChar16 uc = item->string[i];

446 KhmerCharClass charClass = getKhmerCharClass(uc);

447

448 /* skip a before vowel, it was already processed */

449 if (charClass & CF_POS_BEFORE) {

450 continue;

451 }

452

453 /* skip coeng + ro, it was already processed */

454 if (i == coengRo) {

455 i += 1;

456 continue;

457 }

458

459 switch (charClass & CF_POS_MASK)

460 {

461 case CF_POS_ABOVE :

462 reordered[len] = uc;

463 properties[len] = AboveForm;

464 ++len;

465 break;

466

467 case CF_POS_AFTER :

468 reordered[len] = uc;

469 properties[len] = PostForm;

470 ++len;

471 break;

472

473 case CF_POS_BELOW :

474 reordered[len] = uc;

475 properties[len] = BelowForm;

476 ++len;

477 break;

478

479 default:

480 /* assign the correct flags to a coeng consonant

481 Consonants of type 3 are taged as Post forms and those type 1 as below forms */

482 if ( (charClass & CF_COENG) && i + 1 < syllableEnd ) {

483 unsigned char property = (getKhmerCharClass(item->string[i+1 ]) & CF_CLASS_MASK) == CC_CONSONANT3 ?

484 PostForm : BelowForm;

485 reordered[len] = uc;

486 properties[len] = property;

487 ++len;

488 i += 1;

489 reordered[len] = item->string[i];

490 properties[len] = property;

491 ++len;

492 break;

493 }

494

495 /* if a shifter is followed by an above vowel change the shifter to below form,

496 an above vowel can have two possible positions i + 1 or i + 3

497 (position i+1 corresponds to unicode 3, position i+3 to Unico de 4)

498 and there is an extra rule for C_VOWEL_AA + C_SIGN_NIKAHIT al so for two

499 different positions, right after the shifter or after a vowel (Unicode 4) */

500 if ( (charClass & CF_SHIFTER) && (i + 1 < syllableEnd) ) {

501 if (getKhmerCharClass(item->string[i+1]) & CF_ABOVE_VOWEL ) {

502 reordered[len] = uc;

503 properties[len] = BelowForm;

504 ++len;

505 break;

506 }

507 if (i + 2 < syllableEnd &&

508 (item->string[i+1] == C_VOWEL_AA) &&

509 (item->string[i+2] == C_SIGN_NIKAHIT) )

510 {

511 reordered[len] = uc;

512 properties[len] = BelowForm;

513 ++len;

514 break;

515 }

516 if (i + 3 < syllableEnd && (getKhmerCharClass(item->string[i +3]) & CF_ABOVE_VOWEL) ) {

517 reordered[len] = uc;

518 properties[len] = BelowForm;

519 ++len;

520 break;

521 }

522 if (i + 4 < syllableEnd &&

523 (item->string[i+3] == C_VOWEL_AA) &&

524 (item->string[i+4] == C_SIGN_NIKAHIT) )

525 {

526 reordered[len] = uc;

527 properties[len] = BelowForm;

528 ++len;

529 break;

530 }

531 }

532

533 /* default - any other characters */

534 reordered[len] = uc;

535 ++len;

536 break;

537 } /* switch */

538 } /* for */

539

540 if (!item->font->klass->convertStringToGlyphIndices(item->font,

541 reordered, len,

542 item->glyphs, &item->num _glyphs,

543 item->item.bidiLevel % 2 ))

544 return FALSE;

545

546

547 KHDEBUG("after shaping: len=%d", len);

548 for (i = 0; i < len; i++) {

549 item->attributes[i].mark = FALSE;

550 item->attributes[i].clusterStart = FALSE;

551 item->attributes[i].justification = 0;

552 item->attributes[i].zeroWidth = FALSE;

553 KHDEBUG(" %d: %4x property=%x", i, reordered[i], properties[i]);

554 }

555

556 /* now we have the syllable in the right order, and can start running it thr ough open type. */

557

558 #ifndef NO_OPENTYPE

559 if (openType) {

560 hb_uint32 where[16];

561 for (i = 0; i < len; ++i) {

562 where[i] = ~(PreSubstProperty

563 \| BelowSubstProperty

564 \| AboveSubstProperty

565 \| PostSubstProperty

566 \| CligProperty

567 \| PositioningProperties);

568 if (properties[i] == PreForm)

569 where[i] &= ~PreFormProperty;

570 else if (properties[i] == BelowForm)

571 where[i] &= ~BelowFormProperty;

572 else if (properties[i] == AboveForm)

573 where[i] &= ~AboveFormProperty;

574 else if (properties[i] == PostForm)

575 where[i] &= ~PostFormProperty;

576 }

577

578 HB_OpenTypeShape(item, where);

579 if (!HB_OpenTypePosition(item, availableGlyphs, /doLogClusters/FALSE))

580 return FALSE;

581 } else

582 #endif

583 {

584 KHDEBUG("Not using openType");

585 HB_HeuristicPosition(item);

586 }

587

588 item->attributes[0].clusterStart = TRUE;

589 return TRUE;

590 }

591

592 HB_Bool HB_KhmerShape(HB_ShaperItem *item)

593 {

594 HB_Bool openType = FALSE;

595 unsigned short *logClusters = item->log_clusters;

596 int i;

597

598 HB_ShaperItem syllable = *item;

599 int first_glyph = 0;

600

601 int sstart = item->item.pos;

602 int end = sstart + item->item.length;

603

604 assert(item->item.script == HB_Script_Khmer);

605

606 #ifndef NO_OPENTYPE

607 openType = HB_SelectScript(item, khmer_features);

608 #endif

609

610 KHDEBUG("khmer_shape: from %d length %d", item->item.pos, item->item.length) ;

611 while (sstart < end) {

612 HB_Bool invalid;

613 int send = khmer_nextSyllableBoundary(item->string, sstart, end, &invali d);

614 KHDEBUG("syllable from %d, length %d, invalid=%s", sstart, send-sstart,

615 invalid ? "TRUE" : "FALSE");

616 syllable.item.pos = sstart;

617 syllable.item.length = send-sstart;

618 syllable.glyphs = item->glyphs + first_glyph;

619 syllable.attributes = item->attributes + first_glyph;

620 syllable.offsets = item->offsets + first_glyph;

621 syllable.advances = item->advances + first_glyph;

622 syllable.num_glyphs = item->num_glyphs - first_glyph;

623 if (!khmer_shape_syllable(openType, &syllable)) {

624 KHDEBUG("syllable shaping failed, syllable requests %d glyphs", syll able.num_glyphs);

625 item->num_glyphs += syllable.num_glyphs;

626 return FALSE;

627 }

628 /* fix logcluster array */

629 KHDEBUG("syllable:");

630 for (i = first_glyph; i < first_glyph + (int)syllable.num_glyphs; ++i)

631 KHDEBUG(" %d -> glyph %x", i, item->glyphs[i]);

632 KHDEBUG(" logclusters:");

633 for (i = sstart; i < send; ++i) {

634 KHDEBUG(" %d -> glyph %d", i, first_glyph);

635 logClusters[i-item->item.pos] = first_glyph;

636 }

637 sstart = send;

638 first_glyph += syllable.num_glyphs;

639 }

640 item->num_glyphs = first_glyph;

641 return TRUE;

642 }

643

644 void HB_KhmerAttributes(HB_Script script, const HB_UChar16 text, hb_uint32 from , hb_uint32 len, HB_CharAttributes attributes)

645 {

646 int end = from + len;

647 const HB_UChar16 *uc = text + from;

648 hb_uint32 i = 0;

649 HB_UNUSED(script);

650 attributes += from;

651 while ( i < len ) {

652 HB_Bool invalid;

653 hb_uint32 boundary = khmer_nextSyllableBoundary( text, from+i, end, &inv alid ) - from;

654

655 attributes[i].charStop = TRUE;

656

657 if ( boundary > len-1 ) boundary = len;

658 i++;

659 while ( i < boundary ) {

660 attributes[i].charStop = FALSE;

661 ++uc;

662 ++i;

663 }

664 assert( i == boundary );

665 }

666 }

667

OLD	NEW

« no previous file with comments | « third_party/harfbuzz/src/harfbuzz-indic.cpp ('k') | third_party/harfbuzz/src/harfbuzz-myanmar.c » ('j') | no next file with comments »