 Chromium Code Reviews
 Chromium Code Reviews Issue 1525993005:
  Change CachingWordShapeIterator to delimit CJK characters  (Closed) 
  Base URL: https://chromium.googlesource.com/chromium/src.git@master
    
  
    Issue 1525993005:
  Change CachingWordShapeIterator to delimit CJK characters  (Closed) 
  Base URL: https://chromium.googlesource.com/chromium/src.git@master| OLD | NEW | 
|---|---|
| 1 /* | 1 /* | 
| 2 * Copyright (C) 2015 Google Inc. All rights reserved. | 2 * Copyright (C) 2015 Google Inc. All rights reserved. | 
| 3 * | 3 * | 
| 4 * Redistribution and use in source and binary forms, with or without | 4 * Redistribution and use in source and binary forms, with or without | 
| 5 * modification, are permitted provided that the following conditions | 5 * modification, are permitted provided that the following conditions | 
| 6 * are met: | 6 * are met: | 
| 7 * 1. Redistributions of source code must retain the above copyright | 7 * 1. Redistributions of source code must retain the above copyright | 
| 8 * notice, this list of conditions and the following disclaimer. | 8 * notice, this list of conditions and the following disclaimer. | 
| 9 * 2. Redistributions in binary form must reproduce the above copyright | 9 * 2. Redistributions in binary form must reproduce the above copyright | 
| 10 * notice, this list of conditions and the following disclaimer in the | 10 * notice, this list of conditions and the following disclaimer in the | 
| (...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 105 m_startIndex = end; | 105 m_startIndex = end; | 
| 106 return TextRun::Range(start, end - start); | 106 return TextRun::Range(start, end - start); | 
| 107 } | 107 } | 
| 108 | 108 | 
| 109 TextRun::Range nextWordRange() | 109 TextRun::Range nextWordRange() | 
| 110 { | 110 { | 
| 111 const unsigned length = m_textRun.length(); | 111 const unsigned length = m_textRun.length(); | 
| 112 if (m_startIndex >= length) | 112 if (m_startIndex >= length) | 
| 113 return TextRun::Range(0, 0); | 113 return TextRun::Range(0, 0); | 
| 114 | 114 | 
| 115 if (m_textRun[m_startIndex] == spaceCharacter | 115 if (m_startIndex + 1u == length | 
| 116 || m_textRun[m_startIndex] == spaceCharacter | |
| 116 || m_textRun[m_startIndex] == tabulationCharacter) { | 117 || m_textRun[m_startIndex] == tabulationCharacter) { | 
| 117 return TextRun::Range(m_startIndex++, 1); | 118 return TextRun::Range(m_startIndex++, 1); | 
| 118 } | 119 } | 
| 119 | 120 | 
| 121 // Delimit every CJK character because these scripts do not delimit | |
| 
drott
2015/12/21 10:36:01
AFAICS we have now three modes in this file: Separ
 
kojii
2015/12/21 11:59:56
I think that's not the correct way to see the opti
 | |
| 122 // words by spaces. | |
| 123 if (!m_textRun.is8Bit()) { | |
| 124 UChar32 ch; | |
| 125 unsigned end = m_startIndex; | |
| 126 U16_NEXT(m_textRun.characters16(), end, length, ch); | |
| 127 if (Character::isCJKIdeographOrSymbol(ch)) | |
| 128 return nextWordRangeForCJK(end, ch); | |
| 129 } | |
| 130 | |
| 120 for (unsigned i = m_startIndex + 1; ; i++) { | 131 for (unsigned i = m_startIndex + 1; ; i++) { | 
| 121 if (i == length || m_textRun[i] == spaceCharacter | 132 if (i == length || m_textRun[i] == spaceCharacter | 
| 122 || m_textRun[i] == tabulationCharacter) { | 133 || m_textRun[i] == tabulationCharacter | 
| 134 || (!m_textRun.is8Bit() && Character::isCJKIdeographOrSymbol(m_t extRun[i]))) { | |
| 123 return nextRangeToEndOffset(i); | 135 return nextRangeToEndOffset(i); | 
| 124 } | 136 } | 
| 125 } | 137 } | 
| 126 } | 138 } | 
| 127 | 139 | 
| 140 TextRun::Range nextWordRangeForCJK(unsigned end, UChar32 ch) | |
| 141 { | |
| 142 const unsigned length = m_textRun.length(); | |
| 143 bool hasAnySpecificScript = !Character::isCommonOrInheritedScript(ch); | |
| 144 for (unsigned i = end; i < length; end = i) { | |
| 145 U16_NEXT(m_textRun.characters16(), i, length, ch); | |
| 146 // Keep Marks and Modifiers together. | |
| 147 if (U_GET_GC_MASK(ch) & (U_GC_M_MASK | U_GC_LM_MASK | U_GC_SK_MASK)) | |
| 148 continue; | |
| 149 // Avoid delimiting COMMON/INHERITED alone, which makes harder to | |
| 150 // identify the script. | |
| 151 if (Character::isCJKIdeographOrSymbol(ch)) { | |
| 152 if (Character::isCommonOrInheritedScript(ch)) | |
| 153 continue; | |
| 154 if (!hasAnySpecificScript) { | |
| 155 hasAnySpecificScript = true; | |
| 156 continue; | |
| 157 } | |
| 158 } | |
| 159 return nextRangeToEndOffset(end); | |
| 160 } | |
| 161 return nextRangeToEndOffset(length); | |
| 162 } | |
| 163 | |
| 128 TextRun::Range nextRangeUntil(UChar ch) | 164 TextRun::Range nextRangeUntil(UChar ch) | 
| 129 { | 165 { | 
| 130 const unsigned length = m_textRun.length(); | 166 const unsigned length = m_textRun.length(); | 
| 131 ASSERT(m_startIndex < length); | 167 ASSERT(m_startIndex < length); | 
| 132 for (unsigned i = m_startIndex + 1; ; i++) { | 168 for (unsigned i = m_startIndex + 1; ; i++) { | 
| 133 if (i == length || m_textRun[i] == ch) | 169 if (i == length || m_textRun[i] == ch) | 
| 134 return nextRangeToEndOffset(i); | 170 return nextRangeToEndOffset(i); | 
| 135 } | 171 } | 
| 136 } | 172 } | 
| 137 | 173 | 
| (...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 192 const Font* m_font; | 228 const Font* m_font; | 
| 193 float m_widthSoFar; // Used only when allowTabs() | 229 float m_widthSoFar; // Used only when allowTabs() | 
| 194 unsigned m_startIndex : 30; | 230 unsigned m_startIndex : 30; | 
| 195 unsigned m_wordResultCachable : 1; | 231 unsigned m_wordResultCachable : 1; | 
| 196 unsigned m_shapeByWord : 1; | 232 unsigned m_shapeByWord : 1; | 
| 197 }; | 233 }; | 
| 198 | 234 | 
| 199 } // namespace blink | 235 } // namespace blink | 
| 200 | 236 | 
| 201 #endif // CachingWordShapeIterator_h | 237 #endif // CachingWordShapeIterator_h | 
| OLD | NEW |