third_party/WebKit/Source/platform/text/TextBreakIterator.cpp - Issue 2478383003: Remove isBreakValid for Emoji sequences

Side by Side Diff: third_party/WebKit/Source/platform/text/TextBreakIterator.cpp

Issue 2478383003: Remove isBreakValid for Emoji sequences (Closed)

Patch Set: Created 4 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 /*	1 /*

2 * (C) 1999 Lars Knoll (knoll@kde.org)	2 * (C) 1999 Lars Knoll (knoll@kde.org)

3 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010 Apple Inc. All rights	3 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010 Apple Inc. All rights

4 * reserved.	4 * reserved.

5 * Copyright (C) 2007-2009 Torch Mobile, Inc.	5 * Copyright (C) 2007-2009 Torch Mobile, Inc.

6 * Copyright (C) 2011 Google Inc. All rights reserved.	6 * Copyright (C) 2011 Google Inc. All rights reserved.

7 *	7 *

8 * This library is free software; you can redistribute it and/or	8 * This library is free software; you can redistribute it and/or

9 * modify it under the terms of the GNU Library General Public	9 * modify it under the terms of the GNU Library General Public

10 * License as published by the Free Software Foundation; either	10 * License as published by the Free Software Foundation; either

(...skipping 228 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
239 const unsigned char* tableRow = breakAllLineBreakClassTable[lastLineBreak];	239 const unsigned char* tableRow = breakAllLineBreakClassTable[lastLineBreak];

240 return tableRow[lineBreak / 8] & (1 << (lineBreak % 8));	240 return tableRow[lineBreak / 8] & (1 << (lineBreak % 8));

241 }	241 }

242 return false;	242 return false;

243 }	243 }

244	244

245 inline bool needsLineBreakIterator(UChar ch) {	245 inline bool needsLineBreakIterator(UChar ch) {

246 return ch > asciiLineBreakTableLastChar && ch != noBreakSpaceCharacter;	246 return ch > asciiLineBreakTableLastChar && ch != noBreakSpaceCharacter;

247 }	247 }

248	248

249 // Customization for ICU line breaking behavior. This allows us to reject ICU

250 // line break suggestions which would split an emoji sequence.

251 // FIXME crbug.com/593260: Remove this customization once ICU implements this

252 // natively.

253 static bool isBreakValid(const UChar* buf, size_t length, size_t breakPos) {

254 UChar32 codepoint;

255 size_t prevOffset = breakPos;

256 U16_PREV(buf, 0, prevOffset, codepoint);

257 uint32_t nextCodepoint;

258 size_t nextOffset = breakPos;

259 U16_NEXT(buf, nextOffset, length, nextCodepoint);

260

261 // Possible Emoji ZWJ sequence

262 if (codepoint == zeroWidthJoinerCharacter) {

263 if (nextCodepoint == 0x2764 // HEAVY BLACK HEART

264 \|\| nextCodepoint == 0x1F466 // BOY

265 \|\| nextCodepoint == 0x1F467 // GIRL

266 \|\| nextCodepoint == 0x1F468 // MAN

267 \|\| nextCodepoint == 0x1F469 // WOMAN

268 \|\| nextCodepoint == 0x1F48B // KISS MARK

269 \|\| nextCodepoint == 0x1F5E8) // LEFT SPEECH BUBBLE

270 {

271 return false;

272 }

273 }

274

275 // Possible emoji modifier sequence

276 // Proposed Rule LB30b from

277 // http://www.unicode.org/L2/L2016/16011r3-break-prop-emoji.pdf

278 // EB x EM

279 if (Character::isModifier(nextCodepoint)) {

280 if (codepoint == variationSelector16Character && prevOffset > 0) {

281 // Skip over emoji variation selector.

282 U16_PREV(buf, 0, prevOffset, codepoint);

283 }

284 if (Character::isEmojiModifierBase(codepoint)) {

285 return false;

286 }

287 }

288 return true;

289 }

290

291 // Trivial implementation to match possible template paramters in

292 // nextBreakablePosition. There are no emoji sequences in 8bit strings, so we

293 // accept all break opportunities.

294 static bool isBreakValid(const LChar*, size_t, size_t) {

295 return true;

296 }

297

298 template <typename CharacterType, LineBreakType lineBreakType>	249 template <typename CharacterType, LineBreakType lineBreakType>

299 static inline int nextBreakablePosition(	250 static inline int nextBreakablePosition(

300 LazyLineBreakIterator& lazyBreakIterator,	251 LazyLineBreakIterator& lazyBreakIterator,

301 const CharacterType* str,	252 const CharacterType* str,

302 unsigned length,	253 unsigned length,

303 int pos) {	254 int pos) {

304 int len = static_cast<int>(length);	255 int len = static_cast<int>(length);

305 int nextBreak = -1;	256 int nextBreak = -1;

306	257

307 CharacterType lastLastCh =	258 CharacterType lastLastCh =

(...skipping 28 matching lines...) Expand all Loading...
336 TextBreakIterator* breakIterator =	287 TextBreakIterator* breakIterator =

337 lazyBreakIterator.get(priorContextLength);	288 lazyBreakIterator.get(priorContextLength);

338 if (breakIterator) {	289 if (breakIterator) {

339 nextBreak = breakIterator->following(i - 1 + priorContextLength);	290 nextBreak = breakIterator->following(i - 1 + priorContextLength);

340 if (nextBreak >= 0) {	291 if (nextBreak >= 0) {

341 nextBreak -= priorContextLength;	292 nextBreak -= priorContextLength;

342 }	293 }

343 }	294 }

344 }	295 }

345 }	296 }

346 if (i == nextBreak && !isBreakableSpace(lastCh) &&	297 if (i == nextBreak && !isBreakableSpace(lastCh))

347 isBreakValid(str, length, i)) {

348 return i;	298 return i;

349 }

350 }	299 }

351	300

352 lastLastCh = lastCh;	301 lastLastCh = lastCh;

353 lastCh = ch;	302 lastCh = ch;

354 }	303 }

355	304

356 return len;	305 return len;

357 }	306 }

358	307

359 static inline bool shouldKeepAfter(UChar lastCh, UChar ch, UChar nextCh) {	308 static inline bool shouldKeepAfter(UChar lastCh, UChar ch, UChar nextCh) {

(...skipping 34 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
394 TextBreakIterator* breakIterator =	343 TextBreakIterator* breakIterator =

395 lazyBreakIterator.get(priorContextLength);	344 lazyBreakIterator.get(priorContextLength);

396 if (breakIterator) {	345 if (breakIterator) {

397 nextBreak = breakIterator->following(i - 1 + priorContextLength);	346 nextBreak = breakIterator->following(i - 1 + priorContextLength);

398 if (nextBreak >= 0) {	347 if (nextBreak >= 0) {

399 nextBreak -= priorContextLength;	348 nextBreak -= priorContextLength;

400 }	349 }

401 }	350 }

402 }	351 }

403 }	352 }

404 if (i == nextBreak && !isBreakableSpace(lastCh) &&	353 if (i == nextBreak && !isBreakableSpace(lastCh))

405 isBreakValid(str, length, i))

406 return i;	354 return i;

407 }	355 }

408	356

409 lastLastCh = lastCh;	357 lastLastCh = lastCh;

410 lastCh = ch;	358 lastCh = ch;

411 }	359 }

412	360

413 return len;	361 return len;

414 }	362 }

415	363

(...skipping 19 matching lines...) Expand all Loading...
435	383

436 int LazyLineBreakIterator::nextBreakablePositionKeepAll(int pos) {	384 int LazyLineBreakIterator::nextBreakablePositionKeepAll(int pos) {

437 if (m_string.is8Bit())	385 if (m_string.is8Bit())

438 return nextBreakablePosition<LChar, LineBreakType::Normal>(	386 return nextBreakablePosition<LChar, LineBreakType::Normal>(

439 *this, m_string.characters8(), m_string.length(), pos);	387 *this, m_string.characters8(), m_string.length(), pos);

440 return nextBreakablePositionKeepAllInternal(*this, m_string.characters16(),	388 return nextBreakablePositionKeepAllInternal(*this, m_string.characters16(),

441 m_string.length(), pos);	389 m_string.length(), pos);

442 }	390 }

443	391

444 } // namespace blink	392 } // namespace blink

OLD	NEW

« no previous file with comments | « no previous file | third_party/WebKit/Source/platform/text/TextBreakIteratorTest.cpp » ('j') | no next file with comments »