third_party/WebKit/Source/platform/fonts/Character.cpp - Issue 1847853004: Move Character.h from platform/fonts to platform/text

Side by Side Diff: third_party/WebKit/Source/platform/fonts/Character.cpp

Issue 1847853004: Move Character.h from platform/fonts to platform/text (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: rebase again Created 4 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
	(Empty)
1 /*

2 * Copyright (C) 2014 Google Inc. All rights reserved.

3 *

4 * Redistribution and use in source and binary forms, with or without

5 * modification, are permitted provided that the following conditions are

6 * met:

7 *

8 * * Redistributions of source code must retain the above copyright

9 * notice, this list of conditions and the following disclaimer.

10 * * Redistributions in binary form must reproduce the above

11 * copyright notice, this list of conditions and the following disclaimer

12 * in the documentation and/or other materials provided with the

13 * distribution.

14 * * Neither the name of Google Inc. nor the names of its

15 * contributors may be used to endorse or promote products derived from

16 * this software without specific prior written permission.

17 *

18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

29 */

30

31 #include "platform/fonts/Character.h"

32

33 #include "wtf/StdLibExtras.h"

34 #include "wtf/text/StringBuilder.h"

35 #include <algorithm>

36 #include <unicode/uobject.h>

37 #include <unicode/uscript.h>

38

39 #if defined(USING_SYSTEM_ICU)

40 #include "platform/fonts/CharacterPropertyDataGenerator.h"

41 #include <unicode/uniset.h>

42 #else

43 #define MUTEX_H // Prevent compile failure of utrie2.h on Windows

44 #include <utrie2.h>

45 #endif

46

47 using namespace WTF;

48 using namespace Unicode;

49

50 namespace blink {

51

52 #if defined(USING_SYSTEM_ICU)

53 static icu::UnicodeSet* createUnicodeSet(

54 const UChar32* characters, size_t charactersCount,

55 const UChar32* ranges, size_t rangesCount)

56 {

57 icu::UnicodeSet* unicodeSet = new icu::UnicodeSet();

58 for (size_t i = 0; i < charactersCount; i++)

59 unicodeSet->add(characters[i]);

60 for (size_t i = 0; i < rangesCount; i += 2)

61 unicodeSet->add(ranges[i], ranges[i + 1]);

62 unicodeSet->freeze();

63 return unicodeSet;

64 }

65

66 #define CREATE_UNICODE_SET(name) \

67 createUnicodeSet( \

68 name##Array, WTF_ARRAY_LENGTH(name##Array), \

69 name##Ranges, WTF_ARRAY_LENGTH(name##Ranges))

70

71 #define RETURN_HAS_PROPERTY(c, name) \

72 static icu::UnicodeSet* unicodeSet = nullptr; \

73 if (!unicodeSet) \

74 unicodeSet = CREATE_UNICODE_SET(name); \

75 return unicodeSet->contains(c);

76 #else

77 // Freezed trie tree, see CharacterDataGenerator.cpp.

78 extern int32_t serializedCharacterDataSize;

79 extern uint8_t serializedCharacterData[];

80

81 static UTrie2* createTrie()

82 {

83 // Create a Trie from the value array.

84 UErrorCode error = U_ZERO_ERROR;

85 UTrie2* trie = utrie2_openFromSerialized(

86 UTrie2ValueBits::UTRIE2_16_VALUE_BITS,

87 serializedCharacterData, serializedCharacterDataSize,

88 nullptr, &error);

89 ASSERT(error == U_ZERO_ERROR);

90 return trie;

91 }

92

93 static bool hasProperty(UChar32 c, CharacterProperty property)

94 {

95 static UTrie2* trie = nullptr;

96 if (!trie)

97 trie = createTrie();

98 return UTRIE2_GET16(trie, c)

99 & static_cast<CharacterPropertyType>(property);

100 }

101

102 #define RETURN_HAS_PROPERTY(c, name) \

103 return hasProperty(c, CharacterProperty::name);

104 #endif

105

106 // Takes a flattened list of closed intervals

107 template <class T, size_t size>

108 bool valueInIntervalList(const T (&intervalList)[size], const T& value)

109 {

110 const T* bound = std::upper_bound(&intervalList[0], &intervalList[size], val ue);

111 if ((bound - intervalList) % 2 == 1)

112 return true;

113 return bound > intervalList && *(bound - 1) == value;

114 }

115

116 CodePath Character::characterRangeCodePath(const UChar* characters, unsigned len )

117 {

118 static const UChar complexCodePathRanges[] = {

119 // U+02E5 through U+02E9 (Modifier Letters : Tone letters)

120 0x2E5, 0x2E9,

121 // U+0300 through U+036F Combining diacritical marks

122 0x300, 0x36F,

123 // U+0591 through U+05CF excluding U+05BE Hebrew combining marks, ...

124 0x0591, 0x05BD,

125 // ... Hebrew punctuation Paseq, Sof Pasuq and Nun Hafukha

126 0x05BF, 0x05CF,

127 // U+0600 through U+109F Arabic, Syriac, Thaana, NKo, Samaritan, Mandaic ,

128 // Devanagari, Bengali, Gurmukhi, Gujarati, Oriya, Tamil, Telugu, Kannad a,

129 // Malayalam, Sinhala, Thai, Lao, Tibetan, Myanmar

130 0x0600, 0x109F,

131 // U+1100 through U+11FF Hangul Jamo (only Ancient Korean should be left

132 // here if you precompose; Modern Korean will be precomposed as a result of step A)

133 0x1100, 0x11FF,

134 // U+135D through U+135F Ethiopic combining marks

135 0x135D, 0x135F,

136 // U+1780 through U+18AF Tagalog, Hanunoo, Buhid, Taghanwa,Khmer, Mongol ian

137 0x1700, 0x18AF,

138 // U+1900 through U+194F Limbu (Unicode 4.0)

139 0x1900, 0x194F,

140 // U+1980 through U+19DF New Tai Lue

141 0x1980, 0x19DF,

142 // U+1A00 through U+1CFF Buginese, Tai Tham, Balinese, Batak, Lepcha, Ve dic

143 0x1A00, 0x1CFF,

144 // U+1DC0 through U+1DFF Comining diacritical mark supplement

145 0x1DC0, 0x1DFF,

146 // U+20D0 through U+20FF Combining marks for symbols

147 0x20D0, 0x20FF,

148 // U+2CEF through U+2CF1 Combining marks for Coptic

149 0x2CEF, 0x2CF1,

150 // U+302A through U+302F Ideographic and Hangul Tone marks

151 0x302A, 0x302F,

152 // Combining Katakana-Hiragana Voiced/Semi-voiced Sound Mark

153 0x3099, 0x309A,

154 // U+A67C through U+A67D Combining marks for old Cyrillic

155 0xA67C, 0xA67D,

156 // U+A6F0 through U+A6F1 Combining mark for Bamum

157 0xA6F0, 0xA6F1,

158 // U+A800 through U+ABFF Nagri, Phags-pa, Saurashtra, Devanagari Extende d,

159 // Hangul Jamo Ext. A, Javanese, Myanmar Extended A, Tai Viet, Meetei Ma yek

160 0xA800, 0xABFF,

161 // U+D7B0 through U+D7FF Hangul Jamo Ext. B

162 0xD7B0, 0xD7FF,

163 // U+FE00 through U+FE0F Unicode variation selectors

164 0xFE00, 0xFE0F,

165 // U+FE20 through U+FE2F Combining half marks

166 0xFE20, 0xFE2F

167 };

168

169 CodePath result = SimplePath;

170 for (unsigned i = 0; i < len; i++) {

171 const UChar c = characters[i];

172

173 // Shortcut for common case

174 if (c < 0x2E5)

175 continue;

176

177 // Surrogate pairs

178 if (c > 0xD7FF && c <= 0xDBFF) {

179 if (i == len - 1)

180 continue;

181

182 UChar next = characters[++i];

183 if (!U16_IS_TRAIL(next))

184 continue;

185

186 UChar32 supplementaryCharacter = U16_GET_SUPPLEMENTARY(c, next);

187

188 if (supplementaryCharacter < 0x1F1E6) // U+1F1E6 through U+1F1FF Reg ional Indicator Symbols

189 continue;

190 if (supplementaryCharacter <= 0x1F1FF)

191 return ComplexPath;

192

193 // Emoji Fitzpatrick modifiers trigger upgrade to complex path for s haping them.

194 if (supplementaryCharacter < 0x1F3FB)

195 continue;

196 if (supplementaryCharacter <= 0x1F3FF)

197 return ComplexPath;

198

199 if (supplementaryCharacter == eyeCharacter)

200 return ComplexPath;

201

202 // Man and Woman Emojies,

203 // in order to support emoji joiner combinations for family and coup le pictographs.

204 // Compare http://unicode.org/reports/tr51/#Emoji_ZWJ_Sequences

205 if (supplementaryCharacter < 0x1F468)

206 continue;

207 if (supplementaryCharacter <= 0x1F469)

208 return ComplexPath;

209

210 if (supplementaryCharacter == leftSpeechBubbleCharacter)

211 return ComplexPath;

212

213 if (supplementaryCharacter < 0xE0100) // U+E0100 through U+E01EF Uni code variation selectors.

214 continue;

215 if (supplementaryCharacter <= 0xE01EF)

216 return ComplexPath;

217

218 // FIXME: Check for Brahmi (U+11000 block), Kaithi (U+11080 block) a nd other complex scripts

219 // in plane 1 or higher.

220

221 continue;

222 }

223

224 // Search for other Complex cases

225 if (valueInIntervalList(complexCodePathRanges, c))

226 return ComplexPath;

227 }

228

229 return result;

230 }

231

232 bool Character::isUprightInMixedVertical(UChar32 character)

233 {

234 RETURN_HAS_PROPERTY(character, isUprightInMixedVertical)

235 }

236

237 bool Character::isCJKIdeographOrSymbol(UChar32 c)

238 {

239 // Likely common case

240 if (c < 0x2C7)

241 return false;

242

243 RETURN_HAS_PROPERTY(c, isCJKIdeographOrSymbol)

244 }

245

246 unsigned Character::expansionOpportunityCount(const LChar* characters, size_t le ngth, TextDirection direction, bool& isAfterExpansion, const TextJustify textJus tify)

247 {

248 unsigned count = 0;

249 if (textJustify == TextJustifyDistribute) {

250 isAfterExpansion = true;

251 return length;

252 }

253

254 if (direction == LTR) {

255 for (size_t i = 0; i < length; ++i) {

256 if (treatAsSpace(characters[i])) {

257 count++;

258 isAfterExpansion = true;

259 } else {

260 isAfterExpansion = false;

261 }

262 }

263 } else {

264 for (size_t i = length; i > 0; --i) {

265 if (treatAsSpace(characters[i - 1])) {

266 count++;

267 isAfterExpansion = true;

268 } else {

269 isAfterExpansion = false;

270 }

271 }

272 }

273

274 return count;

275 }

276

277 unsigned Character::expansionOpportunityCount(const UChar* characters, size_t le ngth, TextDirection direction, bool& isAfterExpansion, const TextJustify textJus tify)

278 {

279 unsigned count = 0;

280 if (direction == LTR) {

281 for (size_t i = 0; i < length; ++i) {

282 UChar32 character = characters[i];

283 if (treatAsSpace(character)) {

284 count++;

285 isAfterExpansion = true;

286 continue;

287 }

288 if (U16_IS_LEAD(character) && i + 1 < length && U16_IS_TRAIL(charact ers[i + 1])) {

289 character = U16_GET_SUPPLEMENTARY(character, characters[i + 1]);

290 i++;

291 }

292 if (textJustify == TextJustify::TextJustifyAuto && isCJKIdeographOrS ymbol(character)) {

293 if (!isAfterExpansion)

294 count++;

295 count++;

296 isAfterExpansion = true;

297 continue;

298 }

299 isAfterExpansion = false;

300 }

301 } else {

302 for (size_t i = length; i > 0; --i) {

303 UChar32 character = characters[i - 1];

304 if (treatAsSpace(character)) {

305 count++;

306 isAfterExpansion = true;

307 continue;

308 }

309 if (U16_IS_TRAIL(character) && i > 1 && U16_IS_LEAD(characters[i - 2 ])) {

310 character = U16_GET_SUPPLEMENTARY(characters[i - 2], character);

311 i--;

312 }

313 if (textJustify == TextJustify::TextJustifyAuto && isCJKIdeographOrS ymbol(character)) {

314 if (!isAfterExpansion)

315 count++;

316 count++;

317 isAfterExpansion = true;

318 continue;

319 }

320 isAfterExpansion = false;

321 }

322 }

323 return count;

324 }

325

326 bool Character::canReceiveTextEmphasis(UChar32 c)

327 {

328 CharCategory category = Unicode::category(c);

329 if (category & (Separator_Space \| Separator_Line \| Separator_Paragraph \| Oth er_NotAssigned \| Other_Control \| Other_Format))

330 return false;

331

332 // Additional word-separator characters listed in CSS Text Level 3 Editor's Draft 3 November 2010.

333 if (c == ethiopicWordspaceCharacter \|\| c == aegeanWordSeparatorLineCharacter \|\| c == aegeanWordSeparatorDotCharacter

334 \|\| c == ugariticWordDividerCharacter \|\| c == tibetanMarkIntersyllabicTsh egCharacter \|\| c == tibetanMarkDelimiterTshegBstarCharacter)

335 return false;

336

337 return true;

338 }

339

340 template <typename CharacterType>

341 static inline String normalizeSpacesInternal(const CharacterType* characters, un signed length)

342 {

343 StringBuilder normalized;

344 normalized.reserveCapacity(length);

345

346 for (unsigned i = 0; i < length; ++i)

347 normalized.append(Character::normalizeSpaces(characters[i]));

348

349 return normalized.toString();

350 }

351

352 String Character::normalizeSpaces(const LChar* characters, unsigned length)

353 {

354 return normalizeSpacesInternal(characters, length);

355 }

356

357 String Character::normalizeSpaces(const UChar* characters, unsigned length)

358 {

359 return normalizeSpacesInternal(characters, length);

360 }

361

362 bool Character::isCommonOrInheritedScript(UChar32 character)

363 {

364 UErrorCode status = U_ZERO_ERROR;

365 UScriptCode script = uscript_getScript(character, &status);

366 return U_SUCCESS(status) && (script == USCRIPT_COMMON \|\| script == USCRIPT_I NHERITED);

367 }

368

369 } // namespace blink

OLD	NEW