Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2)

Side by Side Diff: Source/platform/fonts/ScriptRunIterator.cpp

Issue 1323513006: Upstream ScriptRunIterator for segmenting text runs by script (Closed) Base URL: svn://svn.chromium.org/blink/trunk
Patch Set: Hex escapes seem to be the safest cross-platform bet Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "config.h"
6 #include "ScriptRunIterator.h"
7
8 #include "platform/Logging.h"
9 #include "wtf/Threading.h"
10
11 #include <ubidi_props.h>
12
13 namespace blink {
14
15 typedef ScriptData::PairedBracketType PairedBracketType;
16
17 ScriptData::~ScriptData()
18 {
19 }
20
21 const int ScriptData::kMaxScriptCount;
22
23 void ICUScriptData::getScripts(UChar32 ch, Vector<UScriptCode>& dst) const
24 {
25
26 UErrorCode status = U_ZERO_ERROR;
27 // Leave room to insert primary script. It's not strictly necessary but
28 // it ensures that the result won't ever be greater than kMaxScriptCount,
29 // which some client someday might expect.
30 dst.resize(kMaxScriptCount - 1);
31 // Note, ICU convention is to return the number of available items
32 // regardless of the capacity passed to the call. So count can be greater
33 // than dst->size(), if a later version of the unicode data has more
34 // than kMaxScriptCount items.
35 int count = uscript_getScriptExtensions(
36 ch, &dst[0], dst.size(), &status);
37 if (status == U_BUFFER_OVERFLOW_ERROR) {
38 // Allow this, we'll just use what we have.
39 WTF_LOG_ERROR("Exceeded maximum script count of %d for 0x%x", kMaxScript Count, ch);
40 count = dst.size();
41 status = U_ZERO_ERROR;
42 }
43 UScriptCode primaryScript = uscript_getScript(ch, &status);
44
45 if (U_FAILURE(status)) {
46 WTF_LOG_ERROR("Could not get icu script data: %d for 0x%x", status, ch);
47 dst.clear();
48 return;
49 }
50
51 dst.resize(count);
52
53 if (primaryScript == dst.at(0)) {
54 // Only one script (might be common or inherited -- these are never in
55 // the extensions unless they're the only script), or extensions are in
56 // priority order already.
57 return;
58 }
59
60 if (primaryScript > USCRIPT_INHERITED) {
behdad_google 2015/09/01 11:16:36 This relies on order of USCRIPT values. Can you r
61 // Not common or primary, with extensions that are not in order. We know
62 // the primary, so we insert it at the front and swap the previous front
63 // to somewhere else in the list.
64 auto it = std::find(dst.begin() + 1, dst.end(), primaryScript);
65 if (it == dst.end()) {
66 dst.append(primaryScript);
67 }
68 std::swap(*dst.begin(), *it);
69 return;
70 }
71
72 if (primaryScript == USCRIPT_COMMON) {
73 if (count == 1) {
74 // Common with a preferred script. Keep common at head.
75 dst.prepend(primaryScript);
76 return;
77 }
78
79 // Ignore common. Find the preferred script of the multiple scripts that
80 // remain, and ensure it is at the head. Just keep swapping them in,
81 // there aren't likely to be many.
82 for (size_t i = 1; i < dst.size(); ++i) {
83 if (dst.at(0) == USCRIPT_LATIN || dst.at(i) < dst.at(0)) {
behdad_google 2015/09/01 11:16:36 Note to self. This algorithm uses the order that
84 std::swap(dst.at(0), dst.at(i));
85 }
86 }
87 return;
88 }
89
90 // The primary is inherited, and there are other scripts. Put inherited at
91 // the front, the true primary next, and then the others in random order.
92 dst.append(dst.at(0));
93 dst.at(0) = primaryScript;
94 for (size_t i = 2; i < dst.size(); ++i) {
95 if (dst.at(1) == USCRIPT_LATIN || dst.at(i) < dst.at(1)) {
96 std::swap(dst.at(1), dst.at(i));
97 }
98 }
99 }
100
101 UChar32 ICUScriptData::getPairedBracket(UChar32 ch) const
102 {
103 return u_getBidiPairedBracket(ch);
104 }
105
106 PairedBracketType ICUScriptData::getPairedBracketType(UChar32 ch) const
107 {
108 return static_cast<PairedBracketType>(
109 u_getIntPropertyValue(ch, UCHAR_BIDI_PAIRED_BRACKET_TYPE));
110 }
111
112 const ICUScriptData* ICUScriptData::instance()
113 {
114 AtomicallyInitializedStaticReference(const ICUScriptData, icuScriptDataInsta nce, (new ICUScriptData()));
115 return &icuScriptDataInstance;
116 }
117
118 ScriptRunIterator::ScriptRunIterator(const UChar* text, size_t length, const Scr iptData* data)
119 : m_text(text)
120 , m_length(length)
121 , m_bracketsFixupDepth(0)
122 // The initial value of m_aheadCharacter is not used.
123 , m_aheadCharacter(0)
124 , m_aheadPos(0)
125 , m_commonPreferred(USCRIPT_COMMON)
126 , m_scriptData(data)
127 {
128 ASSERT(text);
129 ASSERT(data);
130
131 if (m_aheadPos < m_length) {
132 m_currentSet.clear();
133 m_currentSet.append(USCRIPT_COMMON);
134 U16_NEXT(m_text, m_aheadPos, m_length, m_aheadCharacter);
135 m_scriptData->getScripts(m_aheadCharacter, m_aheadSet);
136 }
137 }
138
139 ScriptRunIterator::ScriptRunIterator(const UChar* text, size_t length)
140 : ScriptRunIterator(text, length, ICUScriptData::instance())
141 {
142 }
143
144 bool ScriptRunIterator::consume(unsigned& limit, UScriptCode& script)
145 {
146 if (m_currentSet.isEmpty()) {
147 return false;
148 }
149
150 size_t pos;
151 UChar32 ch;
152 while (fetch(&pos, &ch)) {
153 PairedBracketType pairedType = m_scriptData->getPairedBracketType(ch);
154 switch (pairedType) {
155 case PairedBracketType::BracketTypeOpen:
156 openBracket(ch);
157 break;
158 case PairedBracketType::BracketTypeClose:
159 closeBracket(ch);
160 break;
161 default:
162 break;
163 }
164 if (!mergeSets()) {
165 limit = pos;
166 script = resolveCurrentScript();
167 fixupStack(script);
168 m_currentSet = m_nextSet;
169 return true;
170 }
171 }
172
173 limit = m_length;
174 script = resolveCurrentScript();
175 m_currentSet.clear();
176 return true;
177 }
178
179 void ScriptRunIterator::openBracket(UChar32 ch)
180 {
181 if (m_brackets.size() == kMaxBrackets) {
182 m_brackets.removeFirst();
183 if (m_bracketsFixupDepth == kMaxBrackets) {
184 --m_bracketsFixupDepth;
185 }
186 }
187 m_brackets.append(BracketRec({ ch, USCRIPT_COMMON }));
188 ++m_bracketsFixupDepth;
189 }
190
191 void ScriptRunIterator::closeBracket(UChar32 ch)
192 {
193 if (m_brackets.size() > 0) {
194 UChar32 target = m_scriptData->getPairedBracket(ch);
195 for (auto it = m_brackets.rbegin(); it != m_brackets.rend(); ++it) {
196 if (it->ch == target) {
197 // Have a match, use open paren's resolved script.
198 UScriptCode script = it->script;
199 m_nextSet.clear();
200 m_nextSet.append(script);
201
202 // And pop stack to this point.
203 int numPopped = std::distance(m_brackets.rbegin(), it);
204 // TODO: No resize operation in WTF::Deque?
205 for (int i = 0; i < numPopped; ++i)
206 m_brackets.removeLast();
207 m_bracketsFixupDepth = std::max(static_cast<size_t>(0),
208 m_bracketsFixupDepth - numPopped);
209 return;
210 }
211 }
212 }
213 // leave stack alone, no match
214 }
215
216 // Keep items in m_currentSet that are in m_nextSet.
217 //
218 // If the sets are disjoint, return false and leave m_currentSet unchanged. Else
219 // return true and make current set the intersection. Make sure to maintain
220 // current priority script as priority if it remains, else retain next priority
221 // script if it remains.
222 //
223 // Also maintain a common preferred script. If current and next are both
224 // common, and there is no common preferred script and next has a preferred
225 // script, set the common preferred script to that of next.
226 bool ScriptRunIterator::mergeSets()
227 {
228 if (m_nextSet.isEmpty() || m_currentSet.isEmpty()) {
229 return false;
230 }
231
232 auto currentSetIt = m_currentSet.begin();
233 auto currentEnd = m_currentSet.end();
234 // Most of the time, this is the only one.
235 // Advance the current iterator, we won't need to check it again later.
236 UScriptCode priorityScript = *currentSetIt++;
237
238 // If next is common or inherited, the only thing that might change
239 // is the common preferred script.
240 if (m_nextSet.at(0) <= USCRIPT_INHERITED) {
241 if (m_nextSet.size() == 2 && priorityScript <= USCRIPT_INHERITED && m_co mmonPreferred == USCRIPT_COMMON) {
242 m_commonPreferred = m_nextSet.at(1);
243 }
244 return true;
245 }
246
247 // If current is common or inherited, use the next script set.
248 if (priorityScript <= USCRIPT_INHERITED) {
249 m_currentSet = m_nextSet;
250 return true;
251 }
252
253 // Neither is common or inherited. If current is a singleton,
254 // just see if it exists in the next set. This is the common case.
255 auto next_it = m_nextSet.begin();
256 auto next_end = m_nextSet.end();
257 if (currentSetIt == currentEnd) {
258 return std::find(next_it, next_end, priorityScript) != next_end;
259 }
260
261 // Establish the priority script, if we have one.
262 // First try current priority script.
263 bool havePriority = std::find(next_it, next_end, priorityScript)
264 != next_end;
265 if (!havePriority) {
266 // So try next priority script.
267 // Skip the first current script, we already know it's not there.
268 // Advance the next iterator, later we won't need to check it again.
269 priorityScript = *next_it++;
270 havePriority = std::find(currentSetIt, currentEnd, priorityScript) != cu rrentEnd;
271 }
272
273 // Note that we can never write more scripts into the current vector than
274 // it already contains, so currentWriteIt won't ever exceed the size/capacit y.
275 auto currentWriteIt = m_currentSet.begin();
276 if (havePriority) {
277 // keep the priority script.
278 *currentWriteIt++ = priorityScript;
279 }
280
281 if (next_it != next_end) {
282 // Iterate over the remaining current scripts, and keep them if
283 // they occur in the remaining next scripts.
284 while (currentSetIt != currentEnd) {
285 UScriptCode sc = *currentSetIt++;
286 if (std::find(next_it, next_end, sc) != next_end) {
287 *currentWriteIt++ = sc;
288 }
289 }
290 }
291
292 // Only change current if the run continues.
293 int written = std::distance(m_currentSet.begin(), currentWriteIt);
294 if (written > 0) {
295 m_currentSet.resize(written);
296 return true;
297 }
298 return false;
299 }
300
301 // When we hit the end of the run, and resolve the script, we now know the
302 // resolved script of any open bracket that was pushed on the stack since
303 // the start of the run. Fixup depth records how many of these there
304 // were. We've maintained this count during pushes, and taken care to
305 // adjust it if the stack got overfull and open brackets were pushed off
306 // the bottom. This sets the script of the fixup_depth topmost entries of the
307 // stack to the resolved script.
308 void ScriptRunIterator::fixupStack(UScriptCode resolvedScript)
309 {
310 if (m_bracketsFixupDepth > 0) {
311 if (m_bracketsFixupDepth > m_brackets.size()) {
312 // Should never happen unless someone breaks the code.
313 WTF_LOG_ERROR("Brackets fixup depth exceeds size of bracket vector." );
314 m_bracketsFixupDepth = m_brackets.size();
315 }
316 auto it = m_brackets.rbegin();
317 for (size_t i = 0; i < m_bracketsFixupDepth; ++i) {
318 it->script = resolvedScript;
319 ++it;
320 }
321 m_bracketsFixupDepth = 0;
322 }
323 }
324
325 bool ScriptRunIterator::fetch(size_t* pos, UChar32* ch)
326 {
327 if (m_aheadPos > m_length) {
328 return false;
329 }
330 *pos = m_aheadPos - (m_aheadCharacter >= 0x10000 ? 2 : 1);
331 *ch = m_aheadCharacter;
332
333 m_nextSet.swap(m_aheadSet);
334 if (m_aheadPos == m_length) {
335 // No more data to fetch, but last character still needs to be
336 // processed. Advance m_aheadPos so that next time we will know
337 // this has been done.
338 m_aheadPos++;
339 return true;
340 }
341
342 U16_NEXT(m_text, m_aheadPos, m_length, m_aheadCharacter);
343 m_scriptData->getScripts(m_aheadCharacter, m_aheadSet);
344 if (m_aheadSet.isEmpty()) {
345 // No scripts for this character. This has already been logged, so
346 // we just terminate processing this text.
347 return false;
348 }
349 if (m_aheadSet[0] == USCRIPT_INHERITED && m_aheadSet.size() > 1) {
350 if (m_nextSet[0] == USCRIPT_COMMON) {
351 // Overwrite the next set with the non-inherited portion of the set.
352 m_nextSet = m_aheadSet;
353 m_nextSet.remove(0);
354 // Discard the remaining values, we'll inherit.
355 m_aheadSet.resize(1);
356 } else {
357 // Else, this applies to anything.
358 m_aheadSet.resize(1);
359 }
360 }
361 return true;
362 }
363
364 UScriptCode ScriptRunIterator::resolveCurrentScript() const
365 {
366 UScriptCode result = m_currentSet.at(0);
367 return result == USCRIPT_COMMON ? m_commonPreferred : result;
368 }
369
370 } // namespace blink
OLDNEW
« no previous file with comments | « Source/platform/fonts/ScriptRunIterator.h ('k') | Source/platform/fonts/ScriptRunIteratorTest.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698