| OLD | NEW |
| 1 /* | 1 /* |
| 2 * (C) 1999 Lars Knoll (knoll@kde.org) | 2 * (C) 1999 Lars Knoll (knoll@kde.org) |
| 3 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010 Apple Inc. All rights reserv
ed. | 3 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010 Apple Inc. All rights reserv
ed. |
| 4 * Copyright (C) 2007-2009 Torch Mobile, Inc. | 4 * Copyright (C) 2007-2009 Torch Mobile, Inc. |
| 5 * Copyright (C) 2011 Google Inc. All rights reserved. | 5 * Copyright (C) 2011 Google Inc. All rights reserved. |
| 6 * | 6 * |
| 7 * This library is free software; you can redistribute it and/or | 7 * This library is free software; you can redistribute it and/or |
| 8 * modify it under the terms of the GNU Library General Public | 8 * modify it under the terms of the GNU Library General Public |
| 9 * License as published by the Free Software Foundation; either | 9 * License as published by the Free Software Foundation; either |
| 10 * version 2 of the License, or (at your option) any later version. | 10 * version 2 of the License, or (at your option) any later version. |
| 11 * | 11 * |
| 12 * This library is distributed in the hope that it will be useful, | 12 * This library is distributed in the hope that it will be useful, |
| 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of | 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 15 * Library General Public License for more details. | 15 * Library General Public License for more details. |
| 16 * | 16 * |
| 17 * You should have received a copy of the GNU Library General Public License | 17 * You should have received a copy of the GNU Library General Public License |
| 18 * along with this library; see the file COPYING.LIB. If not, write to | 18 * along with this library; see the file COPYING.LIB. If not, write to |
| 19 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, | 19 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
| 20 * Boston, MA 02110-1301, USA. | 20 * Boston, MA 02110-1301, USA. |
| 21 */ | 21 */ |
| 22 | 22 |
| 23 #include "platform/text/TextBreakIterator.h" | 23 #include "platform/text/TextBreakIterator.h" |
| 24 | 24 |
| 25 #include "platform/fonts/Character.h" |
| 25 #include "wtf/ASCIICType.h" | 26 #include "wtf/ASCIICType.h" |
| 26 #include "wtf/StdLibExtras.h" | 27 #include "wtf/StdLibExtras.h" |
| 27 #include "wtf/text/CharacterNames.h" | 28 #include "wtf/text/CharacterNames.h" |
| 28 | 29 |
| 29 namespace blink { | 30 namespace blink { |
| 30 | 31 |
| 31 unsigned numGraphemeClusters(const String& string) | 32 unsigned numGraphemeClusters(const String& string) |
| 32 { | 33 { |
| 33 unsigned stringLength = string.length(); | 34 unsigned stringLength = string.length(); |
| 34 | 35 |
| (...skipping 179 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 214 return tableRow[lineBreak / 8] & (1 << (lineBreak % 8)); | 215 return tableRow[lineBreak / 8] & (1 << (lineBreak % 8)); |
| 215 } | 216 } |
| 216 return false; | 217 return false; |
| 217 } | 218 } |
| 218 | 219 |
| 219 inline bool needsLineBreakIterator(UChar ch) | 220 inline bool needsLineBreakIterator(UChar ch) |
| 220 { | 221 { |
| 221 return ch > asciiLineBreakTableLastChar && ch != noBreakSpaceCharacter; | 222 return ch > asciiLineBreakTableLastChar && ch != noBreakSpaceCharacter; |
| 222 } | 223 } |
| 223 | 224 |
| 225 // Customization for ICU line breaking behavior. This allows us to reject ICU |
| 226 // line break suggestions which would split an emoji sequence. |
| 227 // FIXME crbug.com/593260: Remove this customization once ICU implements this |
| 228 // natively. |
| 229 static bool isBreakValid(const UChar* buf, size_t length, size_t breakPos) |
| 230 { |
| 231 UChar32 codepoint; |
| 232 size_t prevOffset = breakPos; |
| 233 U16_PREV(buf, 0, prevOffset, codepoint); |
| 234 uint32_t nextCodepoint; |
| 235 size_t nextOffset = breakPos; |
| 236 U16_NEXT(buf, nextOffset, length, nextCodepoint); |
| 237 |
| 238 // Possible Emoji ZWJ sequence |
| 239 if (codepoint == zeroWidthJoinerCharacter) { |
| 240 if (nextCodepoint == 0x2764 // HEAVY BLACK HEART |
| 241 || nextCodepoint == 0x1F466 // BOY |
| 242 || nextCodepoint == 0x1F467 // GIRL |
| 243 || nextCodepoint == 0x1F468 // MAN |
| 244 || nextCodepoint == 0x1F469 // WOMAN |
| 245 || nextCodepoint == 0x1F48B // KISS MARK |
| 246 || nextCodepoint == 0x1F5E8) // LEFT SPEECH BUBBLE |
| 247 { |
| 248 return false; |
| 249 } |
| 250 } |
| 251 |
| 252 // Possible emoji modifier sequence |
| 253 // Proposed Rule LB30b from http://www.unicode.org/L2/L2016/16011r3-break-pr
op-emoji.pdf |
| 254 // EB x EM |
| 255 if (Character::isModifier(nextCodepoint)) { |
| 256 if (codepoint == variationSelector16Character && prevOffset > 0) { |
| 257 // Skip over emoji variation selector. |
| 258 U16_PREV(buf, 0, prevOffset, codepoint); |
| 259 } |
| 260 if (Character::isEmojiModifierBase(codepoint)) { |
| 261 return false; |
| 262 } |
| 263 } |
| 264 return true; |
| 265 } |
| 266 |
| 267 // Trivial implementation to match possible template paramters in |
| 268 // nextBreakablePosition. There are no emoji sequences in 8bit strings, so we |
| 269 // accept all break opportunities. |
| 270 static bool isBreakValid(const LChar*, size_t, size_t) |
| 271 { |
| 272 return true; |
| 273 } |
| 274 |
| 224 template<typename CharacterType, LineBreakType lineBreakType> | 275 template<typename CharacterType, LineBreakType lineBreakType> |
| 225 static inline int nextBreakablePosition(LazyLineBreakIterator& lazyBreakIterator
, const CharacterType* str, unsigned length, int pos) | 276 static inline int nextBreakablePosition(LazyLineBreakIterator& lazyBreakIterator
, const CharacterType* str, unsigned length, int pos) |
| 226 { | 277 { |
| 227 int len = static_cast<int>(length); | 278 int len = static_cast<int>(length); |
| 228 int nextBreak = -1; | 279 int nextBreak = -1; |
| 229 | 280 |
| 230 CharacterType lastLastCh = pos > 1 ? str[pos - 2] : static_cast<CharacterTyp
e>(lazyBreakIterator.secondToLastCharacter()); | 281 CharacterType lastLastCh = pos > 1 ? str[pos - 2] : static_cast<CharacterTyp
e>(lazyBreakIterator.secondToLastCharacter()); |
| 231 CharacterType lastCh = pos > 0 ? str[pos - 1] : static_cast<CharacterType>(l
azyBreakIterator.lastCharacter()); | 282 CharacterType lastCh = pos > 0 ? str[pos - 1] : static_cast<CharacterType>(l
azyBreakIterator.lastCharacter()); |
| 232 ULineBreak lastLineBreak; | 283 ULineBreak lastLineBreak; |
| 233 if (lineBreakType == LineBreakType::BreakAll) | 284 if (lineBreakType == LineBreakType::BreakAll) |
| (...skipping 19 matching lines...) Expand all Loading... |
| 253 if (i || priorContextLength) { | 304 if (i || priorContextLength) { |
| 254 TextBreakIterator* breakIterator = lazyBreakIterator.get(pri
orContextLength); | 305 TextBreakIterator* breakIterator = lazyBreakIterator.get(pri
orContextLength); |
| 255 if (breakIterator) { | 306 if (breakIterator) { |
| 256 nextBreak = breakIterator->following(i - 1 + priorContex
tLength); | 307 nextBreak = breakIterator->following(i - 1 + priorContex
tLength); |
| 257 if (nextBreak >= 0) { | 308 if (nextBreak >= 0) { |
| 258 nextBreak -= priorContextLength; | 309 nextBreak -= priorContextLength; |
| 259 } | 310 } |
| 260 } | 311 } |
| 261 } | 312 } |
| 262 } | 313 } |
| 263 if (i == nextBreak && !isBreakableSpace(lastCh)) | 314 if (i == nextBreak && !isBreakableSpace(lastCh) && isBreakValid(str,
length, i)) { |
| 264 return i; | 315 return i; |
| 316 } |
| 265 } | 317 } |
| 266 | 318 |
| 267 lastLastCh = lastCh; | 319 lastLastCh = lastCh; |
| 268 lastCh = ch; | 320 lastCh = ch; |
| 269 } | 321 } |
| 270 | 322 |
| 271 return len; | 323 return len; |
| 272 } | 324 } |
| 273 | 325 |
| 274 static inline bool shouldKeepAfter(UChar lastCh, UChar ch, UChar nextCh) | 326 static inline bool shouldKeepAfter(UChar lastCh, UChar ch, UChar nextCh) |
| (...skipping 25 matching lines...) Expand all Loading... |
| 300 if (i || priorContextLength) { | 352 if (i || priorContextLength) { |
| 301 TextBreakIterator* breakIterator = lazyBreakIterator.get(pri
orContextLength); | 353 TextBreakIterator* breakIterator = lazyBreakIterator.get(pri
orContextLength); |
| 302 if (breakIterator) { | 354 if (breakIterator) { |
| 303 nextBreak = breakIterator->following(i - 1 + priorContex
tLength); | 355 nextBreak = breakIterator->following(i - 1 + priorContex
tLength); |
| 304 if (nextBreak >= 0) { | 356 if (nextBreak >= 0) { |
| 305 nextBreak -= priorContextLength; | 357 nextBreak -= priorContextLength; |
| 306 } | 358 } |
| 307 } | 359 } |
| 308 } | 360 } |
| 309 } | 361 } |
| 310 if (i == nextBreak && !isBreakableSpace(lastCh)) | 362 if (i == nextBreak && !isBreakableSpace(lastCh) && isBreakValid(str,
length, i)) |
| 311 return i; | 363 return i; |
| 312 } | 364 } |
| 313 | 365 |
| 314 lastLastCh = lastCh; | 366 lastLastCh = lastCh; |
| 315 lastCh = ch; | 367 lastCh = ch; |
| 316 } | 368 } |
| 317 | 369 |
| 318 return len; | 370 return len; |
| 319 } | 371 } |
| 320 | 372 |
| (...skipping 16 matching lines...) Expand all Loading... |
| 337 } | 389 } |
| 338 | 390 |
| 339 int LazyLineBreakIterator::nextBreakablePositionKeepAll(int pos) | 391 int LazyLineBreakIterator::nextBreakablePositionKeepAll(int pos) |
| 340 { | 392 { |
| 341 if (m_string.is8Bit()) | 393 if (m_string.is8Bit()) |
| 342 return nextBreakablePosition<LChar, LineBreakType::Normal>(*this, m_stri
ng.characters8(), m_string.length(), pos); | 394 return nextBreakablePosition<LChar, LineBreakType::Normal>(*this, m_stri
ng.characters8(), m_string.length(), pos); |
| 343 return nextBreakablePositionKeepAllInternal(*this, m_string.characters16(),
m_string.length(), pos); | 395 return nextBreakablePositionKeepAllInternal(*this, m_string.characters16(),
m_string.length(), pos); |
| 344 } | 396 } |
| 345 | 397 |
| 346 } // namespace blink | 398 } // namespace blink |
| OLD | NEW |