Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(71)

Side by Side Diff: third_party/WebKit/Source/platform/text/TextBreakIterator.cpp

Issue 1779693003: Fix emoji ZWJ and modifier sequence line breaking (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Rebased Created 4 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * (C) 1999 Lars Knoll (knoll@kde.org) 2 * (C) 1999 Lars Knoll (knoll@kde.org)
3 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010 Apple Inc. All rights reserv ed. 3 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010 Apple Inc. All rights reserv ed.
4 * Copyright (C) 2007-2009 Torch Mobile, Inc. 4 * Copyright (C) 2007-2009 Torch Mobile, Inc.
5 * Copyright (C) 2011 Google Inc. All rights reserved. 5 * Copyright (C) 2011 Google Inc. All rights reserved.
6 * 6 *
7 * This library is free software; you can redistribute it and/or 7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public 8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either 9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version. 10 * version 2 of the License, or (at your option) any later version.
11 * 11 *
12 * This library is distributed in the hope that it will be useful, 12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details. 15 * Library General Public License for more details.
16 * 16 *
17 * You should have received a copy of the GNU Library General Public License 17 * You should have received a copy of the GNU Library General Public License
18 * along with this library; see the file COPYING.LIB. If not, write to 18 * along with this library; see the file COPYING.LIB. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 19 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 * Boston, MA 02110-1301, USA. 20 * Boston, MA 02110-1301, USA.
21 */ 21 */
22 22
23 #include "platform/text/TextBreakIterator.h" 23 #include "platform/text/TextBreakIterator.h"
24 24
25 #include "platform/fonts/Character.h"
25 #include "wtf/ASCIICType.h" 26 #include "wtf/ASCIICType.h"
26 #include "wtf/StdLibExtras.h" 27 #include "wtf/StdLibExtras.h"
27 #include "wtf/text/CharacterNames.h" 28 #include "wtf/text/CharacterNames.h"
28 29
29 namespace blink { 30 namespace blink {
30 31
31 unsigned numGraphemeClusters(const String& string) 32 unsigned numGraphemeClusters(const String& string)
32 { 33 {
33 unsigned stringLength = string.length(); 34 unsigned stringLength = string.length();
34 35
(...skipping 179 matching lines...) Expand 10 before | Expand all | Expand 10 after
214 return tableRow[lineBreak / 8] & (1 << (lineBreak % 8)); 215 return tableRow[lineBreak / 8] & (1 << (lineBreak % 8));
215 } 216 }
216 return false; 217 return false;
217 } 218 }
218 219
219 inline bool needsLineBreakIterator(UChar ch) 220 inline bool needsLineBreakIterator(UChar ch)
220 { 221 {
221 return ch > asciiLineBreakTableLastChar && ch != noBreakSpaceCharacter; 222 return ch > asciiLineBreakTableLastChar && ch != noBreakSpaceCharacter;
222 } 223 }
223 224
225 // Customization for ICU line breaking behavior. This allows us to reject ICU
226 // line break suggestions which would split an emoji sequence.
227 // FIXME crbug.com/593260: Remove this customization once ICU implements this
228 // natively.
229 static bool isBreakValid(const UChar* buf, size_t length, size_t breakPos)
230 {
231 UChar32 codepoint;
232 size_t prevOffset = breakPos;
233 U16_PREV(buf, 0, prevOffset, codepoint);
234 uint32_t nextCodepoint;
235 size_t nextOffset = breakPos;
236 U16_NEXT(buf, nextOffset, length, nextCodepoint);
237
238 // Possible Emoji ZWJ sequence
239 if (codepoint == zeroWidthJoinerCharacter) {
240 if (nextCodepoint == 0x2764 // HEAVY BLACK HEART
241 || nextCodepoint == 0x1F466 // BOY
242 || nextCodepoint == 0x1F467 // GIRL
243 || nextCodepoint == 0x1F468 // MAN
244 || nextCodepoint == 0x1F469 // WOMAN
245 || nextCodepoint == 0x1F48B // KISS MARK
246 || nextCodepoint == 0x1F5E8) // LEFT SPEECH BUBBLE
247 {
248 return false;
249 }
250 }
251
252 // Possible emoji modifier sequence
253 // Proposed Rule LB30b from http://www.unicode.org/L2/L2016/16011r3-break-pr op-emoji.pdf
254 // EB x EM
255 if (Character::isModifier(nextCodepoint)) {
256 if (codepoint == variationSelector16Character && prevOffset > 0) {
257 // Skip over emoji variation selector.
258 U16_PREV(buf, 0, prevOffset, codepoint);
259 }
260 if (Character::isEmojiModifierBase(codepoint)) {
261 return false;
262 }
263 }
264 return true;
265 }
266
267 // Trivial implementation to match possible template paramters in
268 // nextBreakablePosition. There are no emoji sequences in 8bit strings, so we
269 // accept all break opportunities.
270 static bool isBreakValid(const LChar*, size_t, size_t)
271 {
272 return true;
273 }
274
224 template<typename CharacterType, LineBreakType lineBreakType> 275 template<typename CharacterType, LineBreakType lineBreakType>
225 static inline int nextBreakablePosition(LazyLineBreakIterator& lazyBreakIterator , const CharacterType* str, unsigned length, int pos) 276 static inline int nextBreakablePosition(LazyLineBreakIterator& lazyBreakIterator , const CharacterType* str, unsigned length, int pos)
226 { 277 {
227 int len = static_cast<int>(length); 278 int len = static_cast<int>(length);
228 int nextBreak = -1; 279 int nextBreak = -1;
229 280
230 CharacterType lastLastCh = pos > 1 ? str[pos - 2] : static_cast<CharacterTyp e>(lazyBreakIterator.secondToLastCharacter()); 281 CharacterType lastLastCh = pos > 1 ? str[pos - 2] : static_cast<CharacterTyp e>(lazyBreakIterator.secondToLastCharacter());
231 CharacterType lastCh = pos > 0 ? str[pos - 1] : static_cast<CharacterType>(l azyBreakIterator.lastCharacter()); 282 CharacterType lastCh = pos > 0 ? str[pos - 1] : static_cast<CharacterType>(l azyBreakIterator.lastCharacter());
232 ULineBreak lastLineBreak; 283 ULineBreak lastLineBreak;
233 if (lineBreakType == LineBreakType::BreakAll) 284 if (lineBreakType == LineBreakType::BreakAll)
(...skipping 19 matching lines...) Expand all
253 if (i || priorContextLength) { 304 if (i || priorContextLength) {
254 TextBreakIterator* breakIterator = lazyBreakIterator.get(pri orContextLength); 305 TextBreakIterator* breakIterator = lazyBreakIterator.get(pri orContextLength);
255 if (breakIterator) { 306 if (breakIterator) {
256 nextBreak = breakIterator->following(i - 1 + priorContex tLength); 307 nextBreak = breakIterator->following(i - 1 + priorContex tLength);
257 if (nextBreak >= 0) { 308 if (nextBreak >= 0) {
258 nextBreak -= priorContextLength; 309 nextBreak -= priorContextLength;
259 } 310 }
260 } 311 }
261 } 312 }
262 } 313 }
263 if (i == nextBreak && !isBreakableSpace(lastCh)) 314 if (i == nextBreak && !isBreakableSpace(lastCh) && isBreakValid(str, length, i)) {
264 return i; 315 return i;
316 }
265 } 317 }
266 318
267 lastLastCh = lastCh; 319 lastLastCh = lastCh;
268 lastCh = ch; 320 lastCh = ch;
269 } 321 }
270 322
271 return len; 323 return len;
272 } 324 }
273 325
274 static inline bool shouldKeepAfter(UChar lastCh, UChar ch, UChar nextCh) 326 static inline bool shouldKeepAfter(UChar lastCh, UChar ch, UChar nextCh)
(...skipping 25 matching lines...) Expand all
300 if (i || priorContextLength) { 352 if (i || priorContextLength) {
301 TextBreakIterator* breakIterator = lazyBreakIterator.get(pri orContextLength); 353 TextBreakIterator* breakIterator = lazyBreakIterator.get(pri orContextLength);
302 if (breakIterator) { 354 if (breakIterator) {
303 nextBreak = breakIterator->following(i - 1 + priorContex tLength); 355 nextBreak = breakIterator->following(i - 1 + priorContex tLength);
304 if (nextBreak >= 0) { 356 if (nextBreak >= 0) {
305 nextBreak -= priorContextLength; 357 nextBreak -= priorContextLength;
306 } 358 }
307 } 359 }
308 } 360 }
309 } 361 }
310 if (i == nextBreak && !isBreakableSpace(lastCh)) 362 if (i == nextBreak && !isBreakableSpace(lastCh) && isBreakValid(str, length, i))
311 return i; 363 return i;
312 } 364 }
313 365
314 lastLastCh = lastCh; 366 lastLastCh = lastCh;
315 lastCh = ch; 367 lastCh = ch;
316 } 368 }
317 369
318 return len; 370 return len;
319 } 371 }
320 372
(...skipping 16 matching lines...) Expand all
337 } 389 }
338 390
339 int LazyLineBreakIterator::nextBreakablePositionKeepAll(int pos) 391 int LazyLineBreakIterator::nextBreakablePositionKeepAll(int pos)
340 { 392 {
341 if (m_string.is8Bit()) 393 if (m_string.is8Bit())
342 return nextBreakablePosition<LChar, LineBreakType::Normal>(*this, m_stri ng.characters8(), m_string.length(), pos); 394 return nextBreakablePosition<LChar, LineBreakType::Normal>(*this, m_stri ng.characters8(), m_string.length(), pos);
343 return nextBreakablePositionKeepAllInternal(*this, m_string.characters16(), m_string.length(), pos); 395 return nextBreakablePositionKeepAllInternal(*this, m_string.characters16(), m_string.length(), pos);
344 } 396 }
345 397
346 } // namespace blink 398 } // namespace blink
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698