Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(137)

Side by Side Diff: Source/platform/fonts/ScriptRunIterator.cpp

Issue 1323513006: Upstream ScriptRunIterator for segmenting text runs by script (Closed) Base URL: svn://svn.chromium.org/blink/trunk
Patch Set: Additional review comments addressed, new linkage attempt for kMaxScripts constant Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "config.h"
6 #include "ScriptRunIterator.h"
7
8 #include "platform/Logging.h"
9 #include "wtf/Threading.h"
10
11 #include <ubidi_props.h>
12
13 namespace blink {
14
15 typedef ScriptData::PairedBracketType PairedBracketType;
16
17 const int ScriptData::kMaxScriptCount = 20;
18
19 ScriptData::~ScriptData()
20 {
21 }
22
23 void ICUScriptData::getScripts(UChar32 ch, Vector<UScriptCode>& dst) const
24 {
25
26 UErrorCode status = U_ZERO_ERROR;
27 // Leave room to insert primary script. It's not strictly necessary but
28 // it ensures that the result won't ever be greater than kMaxScriptCount,
29 // which some client someday might expect.
30 dst.resize(kMaxScriptCount - 1);
31 // Note, ICU convention is to return the number of available items
32 // regardless of the capacity passed to the call. So count can be greater
33 // than dst->size(), if a later version of the unicode data has more
34 // than kMaxScriptCount items.
35 int count = uscript_getScriptExtensions(
36 ch, &dst[0], dst.size(), &status);
37 if (status == U_BUFFER_OVERFLOW_ERROR) {
38 // Allow this, we'll just use what we have.
39 WTF_LOG_ERROR("Exceeded maximum script count of %d for 0x%x", kMaxScript Count, ch);
40 count = dst.size();
41 status = U_ZERO_ERROR;
42 }
43 UScriptCode primaryScript = uscript_getScript(ch, &status);
44
45 if (U_FAILURE(status)) {
46 WTF_LOG_ERROR("Could not get icu script data: %d for 0x%x", status, ch);
47 dst.clear();
48 return;
49 }
50
51 dst.resize(count);
52
53 if (primaryScript == dst.at(0)) {
54 // Only one script (might be common or inherited -- these are never in
55 // the extensions unless they're the only script), or extensions are in
56 // priority order already.
57 return;
58 }
59
60 if (primaryScript != USCRIPT_INHERITED
61 && primaryScript != USCRIPT_COMMON
62 && primaryScript != USCRIPT_INVALID_CODE) {
63 // Not common or primary, with extensions that are not in order. We know
64 // the primary, so we insert it at the front and swap the previous front
65 // to somewhere else in the list.
66 auto it = std::find(dst.begin() + 1, dst.end(), primaryScript);
67 if (it == dst.end()) {
68 dst.append(primaryScript);
69 }
70 std::swap(*dst.begin(), *it);
71 return;
72 }
73
74 if (primaryScript == USCRIPT_COMMON) {
75 if (count == 1) {
76 // Common with a preferred script. Keep common at head.
77 dst.prepend(primaryScript);
78 return;
79 }
80
81 // Ignore common. Find the preferred script of the multiple scripts that
82 // remain, and ensure it is at the head. Just keep swapping them in,
83 // there aren't likely to be many.
84 for (size_t i = 1; i < dst.size(); ++i) {
85 if (dst.at(0) == USCRIPT_LATIN || dst.at(i) < dst.at(0)) {
86 std::swap(dst.at(0), dst.at(i));
87 }
88 }
89 return;
90 }
91
92 // The primary is inherited, and there are other scripts. Put inherited at
93 // the front, the true primary next, and then the others in random order.
94 dst.append(dst.at(0));
95 dst.at(0) = primaryScript;
96 for (size_t i = 2; i < dst.size(); ++i) {
97 if (dst.at(1) == USCRIPT_LATIN || dst.at(i) < dst.at(1)) {
98 std::swap(dst.at(1), dst.at(i));
99 }
100 }
101 }
102
103 UChar32 ICUScriptData::getPairedBracket(UChar32 ch) const
104 {
105 return u_getBidiPairedBracket(ch);
106 }
107
108 PairedBracketType ICUScriptData::getPairedBracketType(UChar32 ch) const
109 {
110 return static_cast<PairedBracketType>(
111 u_getIntPropertyValue(ch, UCHAR_BIDI_PAIRED_BRACKET_TYPE));
112 }
113
114 const ICUScriptData* ICUScriptData::instance()
115 {
116 AtomicallyInitializedStaticReference(const ICUScriptData, icuScriptDataInsta nce, (new ICUScriptData()));
117 return &icuScriptDataInstance;
118 }
119
120 ScriptRunIterator::ScriptRunIterator(const UChar* text, size_t length, const Scr iptData* data)
121 : m_text(text)
122 , m_length(length)
123 , m_bracketsFixupDepth(0)
124 // The initial value of m_aheadCharacter is not used.
125 , m_aheadCharacter(0)
126 , m_aheadPos(0)
127 , m_commonPreferred(USCRIPT_COMMON)
128 , m_scriptData(data)
129 {
130 ASSERT(text);
131 ASSERT(data);
132
133 if (m_aheadPos < m_length) {
134 m_currentSet.clear();
135 // Priming the m_currentSet with USCRIPT_COMMON here so that the first
136 // resolution between m_currentSet and m_nextSet in mergeSets() leads to
137 // chosing the script of the first consumed character.
138 m_currentSet.append(USCRIPT_COMMON);
139 U16_NEXT(m_text, m_aheadPos, m_length, m_aheadCharacter);
140 m_scriptData->getScripts(m_aheadCharacter, m_aheadSet);
141 }
142 }
143
144 ScriptRunIterator::ScriptRunIterator(const UChar* text, size_t length)
145 : ScriptRunIterator(text, length, ICUScriptData::instance())
146 {
147 }
148
149 bool ScriptRunIterator::consume(unsigned& limit, UScriptCode& script)
150 {
151 if (m_currentSet.isEmpty()) {
152 return false;
153 }
154
155 size_t pos;
156 UChar32 ch;
157 while (fetch(&pos, &ch)) {
158 PairedBracketType pairedType = m_scriptData->getPairedBracketType(ch);
159 switch (pairedType) {
160 case PairedBracketType::BracketTypeOpen:
161 openBracket(ch);
162 break;
163 case PairedBracketType::BracketTypeClose:
164 closeBracket(ch);
165 break;
166 default:
167 break;
168 }
169 if (!mergeSets()) {
170 limit = pos;
171 script = resolveCurrentScript();
172 fixupStack(script);
173 m_currentSet = m_nextSet;
174 return true;
175 }
176 }
177
178 limit = m_length;
179 script = resolveCurrentScript();
180 m_currentSet.clear();
181 return true;
182 }
183
184 void ScriptRunIterator::openBracket(UChar32 ch)
185 {
186 if (m_brackets.size() == kMaxBrackets) {
187 m_brackets.removeFirst();
188 if (m_bracketsFixupDepth == kMaxBrackets) {
189 --m_bracketsFixupDepth;
190 }
191 }
192 m_brackets.append(BracketRec({ ch, USCRIPT_COMMON }));
193 ++m_bracketsFixupDepth;
194 }
195
196 void ScriptRunIterator::closeBracket(UChar32 ch)
197 {
198 if (m_brackets.size() > 0) {
199 UChar32 target = m_scriptData->getPairedBracket(ch);
200 for (auto it = m_brackets.rbegin(); it != m_brackets.rend(); ++it) {
201 if (it->ch == target) {
202 // Have a match, use open paren's resolved script.
203 UScriptCode script = it->script;
204 m_nextSet.clear();
205 m_nextSet.append(script);
206
207 // And pop stack to this point.
208 int numPopped = std::distance(m_brackets.rbegin(), it);
209 // TODO: No resize operation in WTF::Deque?
210 for (int i = 0; i < numPopped; ++i)
211 m_brackets.removeLast();
212 m_bracketsFixupDepth = std::max(static_cast<size_t>(0),
213 m_bracketsFixupDepth - numPopped);
214 return;
215 }
216 }
217 }
218 // leave stack alone, no match
219 }
220
221 // Keep items in m_currentSet that are in m_nextSet.
222 //
223 // If the sets are disjoint, return false and leave m_currentSet unchanged. Else
224 // return true and make current set the intersection. Make sure to maintain
225 // current priority script as priority if it remains, else retain next priority
226 // script if it remains.
227 //
228 // Also maintain a common preferred script. If current and next are both
229 // common, and there is no common preferred script and next has a preferred
230 // script, set the common preferred script to that of next.
231 bool ScriptRunIterator::mergeSets()
232 {
233 if (m_nextSet.isEmpty() || m_currentSet.isEmpty()) {
234 return false;
235 }
236
237 auto currentSetIt = m_currentSet.begin();
238 auto currentEnd = m_currentSet.end();
239 // Most of the time, this is the only one.
240 // Advance the current iterator, we won't need to check it again later.
241 UScriptCode priorityScript = *currentSetIt++;
242
243 // If next is common or inherited, the only thing that might change
244 // is the common preferred script.
245 if (m_nextSet.at(0) <= USCRIPT_INHERITED) {
246 if (m_nextSet.size() == 2 && priorityScript <= USCRIPT_INHERITED && m_co mmonPreferred == USCRIPT_COMMON) {
247 m_commonPreferred = m_nextSet.at(1);
248 }
249 return true;
250 }
251
252 // If current is common or inherited, use the next script set.
253 if (priorityScript <= USCRIPT_INHERITED) {
254 m_currentSet = m_nextSet;
255 return true;
256 }
257
258 // Neither is common or inherited. If current is a singleton,
259 // just see if it exists in the next set. This is the common case.
260 auto next_it = m_nextSet.begin();
261 auto next_end = m_nextSet.end();
262 if (currentSetIt == currentEnd) {
263 return std::find(next_it, next_end, priorityScript) != next_end;
264 }
265
266 // Establish the priority script, if we have one.
267 // First try current priority script.
268 bool havePriority = std::find(next_it, next_end, priorityScript)
269 != next_end;
270 if (!havePriority) {
271 // So try next priority script.
272 // Skip the first current script, we already know it's not there.
273 // Advance the next iterator, later we won't need to check it again.
274 priorityScript = *next_it++;
275 havePriority = std::find(currentSetIt, currentEnd, priorityScript) != cu rrentEnd;
276 }
277
278 // Note that we can never write more scripts into the current vector than
279 // it already contains, so currentWriteIt won't ever exceed the size/capacit y.
280 auto currentWriteIt = m_currentSet.begin();
281 if (havePriority) {
282 // keep the priority script.
283 *currentWriteIt++ = priorityScript;
284 }
285
286 if (next_it != next_end) {
287 // Iterate over the remaining current scripts, and keep them if
288 // they occur in the remaining next scripts.
289 while (currentSetIt != currentEnd) {
290 UScriptCode sc = *currentSetIt++;
291 if (std::find(next_it, next_end, sc) != next_end) {
292 *currentWriteIt++ = sc;
293 }
294 }
295 }
296
297 // Only change current if the run continues.
298 int written = std::distance(m_currentSet.begin(), currentWriteIt);
299 if (written > 0) {
300 m_currentSet.resize(written);
301 return true;
302 }
303 return false;
304 }
305
306 // When we hit the end of the run, and resolve the script, we now know the
307 // resolved script of any open bracket that was pushed on the stack since
308 // the start of the run. Fixup depth records how many of these there
309 // were. We've maintained this count during pushes, and taken care to
310 // adjust it if the stack got overfull and open brackets were pushed off
311 // the bottom. This sets the script of the fixup_depth topmost entries of the
312 // stack to the resolved script.
313 void ScriptRunIterator::fixupStack(UScriptCode resolvedScript)
314 {
315 if (m_bracketsFixupDepth > 0) {
316 if (m_bracketsFixupDepth > m_brackets.size()) {
317 // Should never happen unless someone breaks the code.
318 WTF_LOG_ERROR("Brackets fixup depth exceeds size of bracket vector." );
319 m_bracketsFixupDepth = m_brackets.size();
320 }
321 auto it = m_brackets.rbegin();
322 for (size_t i = 0; i < m_bracketsFixupDepth; ++i) {
323 it->script = resolvedScript;
324 ++it;
325 }
326 m_bracketsFixupDepth = 0;
327 }
328 }
329
330 bool ScriptRunIterator::fetch(size_t* pos, UChar32* ch)
331 {
332 if (m_aheadPos > m_length) {
333 return false;
334 }
335 *pos = m_aheadPos - (m_aheadCharacter >= 0x10000 ? 2 : 1);
336 *ch = m_aheadCharacter;
337
338 m_nextSet.swap(m_aheadSet);
339 if (m_aheadPos == m_length) {
340 // No more data to fetch, but last character still needs to be
341 // processed. Advance m_aheadPos so that next time we will know
342 // this has been done.
343 m_aheadPos++;
344 return true;
345 }
346
347 U16_NEXT(m_text, m_aheadPos, m_length, m_aheadCharacter);
348 m_scriptData->getScripts(m_aheadCharacter, m_aheadSet);
349 if (m_aheadSet.isEmpty()) {
350 // No scripts for this character. This has already been logged, so
351 // we just terminate processing this text.
352 return false;
353 }
354 if (m_aheadSet[0] == USCRIPT_INHERITED && m_aheadSet.size() > 1) {
355 if (m_nextSet[0] == USCRIPT_COMMON) {
356 // Overwrite the next set with the non-inherited portion of the set.
357 m_nextSet = m_aheadSet;
358 m_nextSet.remove(0);
359 // Discard the remaining values, we'll inherit.
360 m_aheadSet.resize(1);
361 } else {
362 // Else, this applies to anything.
363 m_aheadSet.resize(1);
364 }
365 }
366 return true;
367 }
368
369 UScriptCode ScriptRunIterator::resolveCurrentScript() const
370 {
371 UScriptCode result = m_currentSet.at(0);
372 return result == USCRIPT_COMMON ? m_commonPreferred : result;
373 }
374
375 } // namespace blink
OLDNEW
« no previous file with comments | « Source/platform/fonts/ScriptRunIterator.h ('k') | Source/platform/fonts/ScriptRunIteratorTest.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698