OLD | NEW |
---|---|
1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
6 | 6 |
7 #include "core/src/fpdftext/text_int.h" | |
8 | |
7 #include <algorithm> | 9 #include <algorithm> |
8 #include <cctype> | 10 #include <cctype> |
9 #include <cwctype> | 11 #include <cwctype> |
10 #include <memory> | 12 #include <memory> |
11 | 13 |
12 #include "core/include/fpdfapi/fpdf_module.h" | 14 #include "core/include/fpdfapi/fpdf_module.h" |
13 #include "core/include/fpdfapi/fpdf_page.h" | 15 #include "core/include/fpdfapi/fpdf_page.h" |
14 #include "core/include/fpdfapi/fpdf_pageobj.h" | 16 #include "core/include/fpdfapi/fpdf_pageobj.h" |
15 #include "core/include/fpdfapi/fpdf_resource.h" | 17 #include "core/include/fpdfapi/fpdf_resource.h" |
16 #include "core/include/fpdftext/fpdf_text.h" | 18 #include "core/include/fpdftext/fpdf_text.h" |
17 #include "core/include/fxcrt/fx_bidi.h" | 19 #include "core/include/fxcrt/fx_bidi.h" |
18 #include "core/include/fxcrt/fx_ext.h" | 20 #include "core/include/fxcrt/fx_ext.h" |
19 #include "core/include/fxcrt/fx_ucd.h" | 21 #include "core/include/fxcrt/fx_ucd.h" |
20 #include "text_int.h" | 22 #include "third_party/base/stl_util.h" |
21 | 23 |
22 namespace { | 24 namespace { |
23 | 25 |
24 FX_BOOL _IsIgnoreSpaceCharacter(FX_WCHAR curChar) { | 26 FX_BOOL _IsIgnoreSpaceCharacter(FX_WCHAR curChar) { |
25 if (curChar < 255) { | 27 if (curChar < 255) { |
26 return FALSE; | 28 return FALSE; |
27 } | 29 } |
28 if ((curChar >= 0x0600 && curChar <= 0x06FF) || | 30 if ((curChar >= 0x0600 && curChar <= 0x06FF) || |
29 (curChar >= 0xFE70 && curChar <= 0xFEFF) || | 31 (curChar >= 0xFE70 && curChar <= 0xFEFF) || |
30 (curChar >= 0xFB50 && curChar <= 0xFDFF) || | 32 (curChar >= 0xFB50 && curChar <= 0xFDFF) || |
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
104 #define TEXT_LINEFEED_CHAR L'\n' | 106 #define TEXT_LINEFEED_CHAR L'\n' |
105 #define TEXT_RETURN_CHAR L'\r' | 107 #define TEXT_RETURN_CHAR L'\r' |
106 #define TEXT_EMPTY L"" | 108 #define TEXT_EMPTY L"" |
107 #define TEXT_BLANK L" " | 109 #define TEXT_BLANK L" " |
108 #define TEXT_RETURN_LINEFEED L"\r\n" | 110 #define TEXT_RETURN_LINEFEED L"\r\n" |
109 #define TEXT_LINEFEED L"\n" | 111 #define TEXT_LINEFEED L"\n" |
110 #define TEXT_CHARRATIO_GAPDELTA 0.070 | 112 #define TEXT_CHARRATIO_GAPDELTA 0.070 |
111 | 113 |
112 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, int flags) | 114 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, int flags) |
113 : m_pPage(pPage), | 115 : m_pPage(pPage), |
114 m_charList(512), | |
115 m_TempCharList(50), | |
116 m_parserflag(flags), | 116 m_parserflag(flags), |
117 m_pPreTextObj(nullptr), | 117 m_pPreTextObj(nullptr), |
118 m_bIsParsed(false), | 118 m_bIsParsed(false), |
119 m_TextlineDir(-1), | 119 m_TextlineDir(-1), |
120 m_CurlineRect(0, 0, 0, 0) { | 120 m_CurlineRect(0, 0, 0, 0) { |
121 m_TextBuf.EstimateSize(0, 10240); | 121 m_TextBuf.EstimateSize(0, 10240); |
122 pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int)pPage->GetPageWidth(), | 122 pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int)pPage->GetPageWidth(), |
123 (int)pPage->GetPageHeight(), 0); | 123 (int)pPage->GetPageHeight(), 0); |
124 } | 124 } |
125 | 125 |
(...skipping 14 matching lines...) Expand all Loading... | |
140 default: | 140 default: |
141 return false; | 141 return false; |
142 } | 142 } |
143 } | 143 } |
144 FX_BOOL CPDF_TextPage::ParseTextPage() { | 144 FX_BOOL CPDF_TextPage::ParseTextPage() { |
145 m_bIsParsed = false; | 145 m_bIsParsed = false; |
146 if (!m_pPage) | 146 if (!m_pPage) |
147 return FALSE; | 147 return FALSE; |
148 | 148 |
149 m_TextBuf.Clear(); | 149 m_TextBuf.Clear(); |
150 m_charList.RemoveAll(); | 150 m_CharList.clear(); |
151 m_pPreTextObj = NULL; | 151 m_pPreTextObj = NULL; |
152 ProcessObject(); | 152 ProcessObject(); |
153 m_bIsParsed = true; | 153 m_bIsParsed = true; |
154 if (!m_ParseOptions.m_bGetCharCodeOnly) { | 154 if (!m_ParseOptions.m_bGetCharCodeOnly) { |
155 m_CharIndex.RemoveAll(); | 155 m_CharIndex.RemoveAll(); |
156 int nCount = m_charList.GetSize(); | 156 int nCount = pdfium::CollectionSize<int>(m_CharList); |
157 if (nCount) { | 157 if (nCount) { |
158 m_CharIndex.Add(0); | 158 m_CharIndex.Add(0); |
159 } | 159 } |
160 for (int i = 0; i < nCount; i++) { | 160 for (int i = 0; i < nCount; i++) { |
161 int indexSize = m_CharIndex.GetSize(); | 161 int indexSize = m_CharIndex.GetSize(); |
162 FX_BOOL bNormal = FALSE; | 162 FX_BOOL bNormal = FALSE; |
163 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(i); | 163 PAGECHAR_INFO charinfo = m_CharList[i]; |
Lei Zhang
2016/01/25 22:11:16
improvement over original code: const ref?
Tom Sepez
2016/01/25 23:04:47
Done.
| |
164 if (charinfo.m_Flag == FPDFTEXT_CHAR_GENERATED) { | 164 if (charinfo.m_Flag == FPDFTEXT_CHAR_GENERATED) { |
165 bNormal = TRUE; | 165 bNormal = TRUE; |
166 } else if (charinfo.m_Unicode == 0 || IsControlChar(charinfo)) { | 166 } else if (charinfo.m_Unicode == 0 || IsControlChar(charinfo)) { |
167 bNormal = FALSE; | 167 bNormal = FALSE; |
168 } else { | 168 } else { |
169 bNormal = TRUE; | 169 bNormal = TRUE; |
170 } | 170 } |
171 if (bNormal) { | 171 if (bNormal) { |
172 if (indexSize % 2) { | 172 if (indexSize % 2) { |
173 m_CharIndex.Add(1); | 173 m_CharIndex.Add(1); |
(...skipping 19 matching lines...) Expand all Loading... | |
193 if (indexSize % 2) { | 193 if (indexSize % 2) { |
194 m_CharIndex.RemoveAt(indexSize - 1); | 194 m_CharIndex.RemoveAt(indexSize - 1); |
195 } | 195 } |
196 } | 196 } |
197 return TRUE; | 197 return TRUE; |
198 } | 198 } |
199 int CPDF_TextPage::CountChars() const { | 199 int CPDF_TextPage::CountChars() const { |
200 if (m_ParseOptions.m_bGetCharCodeOnly) { | 200 if (m_ParseOptions.m_bGetCharCodeOnly) { |
201 return m_TextBuf.GetSize(); | 201 return m_TextBuf.GetSize(); |
202 } | 202 } |
203 return m_charList.GetSize(); | 203 return pdfium::CollectionSize<int>(m_CharList); |
204 } | 204 } |
205 int CPDF_TextPage::CharIndexFromTextIndex(int TextIndex) const { | 205 int CPDF_TextPage::CharIndexFromTextIndex(int TextIndex) const { |
206 int indexSize = m_CharIndex.GetSize(); | 206 int indexSize = m_CharIndex.GetSize(); |
207 int count = 0; | 207 int count = 0; |
208 for (int i = 0; i < indexSize; i += 2) { | 208 for (int i = 0; i < indexSize; i += 2) { |
209 count += m_CharIndex.GetAt(i + 1); | 209 count += m_CharIndex.GetAt(i + 1); |
210 if (count > TextIndex) { | 210 if (count > TextIndex) { |
211 return TextIndex - count + m_CharIndex.GetAt(i + 1) + | 211 return TextIndex - count + m_CharIndex.GetAt(i + 1) + |
212 m_CharIndex.GetAt(i); | 212 m_CharIndex.GetAt(i); |
213 } | 213 } |
(...skipping 20 matching lines...) Expand all Loading... | |
234 CFX_RectArray& rectArray) const { | 234 CFX_RectArray& rectArray) const { |
235 if (m_ParseOptions.m_bGetCharCodeOnly) { | 235 if (m_ParseOptions.m_bGetCharCodeOnly) { |
236 return; | 236 return; |
237 } | 237 } |
238 if (start < 0 || nCount == 0) { | 238 if (start < 0 || nCount == 0) { |
239 return; | 239 return; |
240 } | 240 } |
241 if (!m_bIsParsed) { | 241 if (!m_bIsParsed) { |
242 return; | 242 return; |
243 } | 243 } |
244 PAGECHAR_INFO info_curchar; | |
245 CPDF_TextObject* pCurObj = NULL; | 244 CPDF_TextObject* pCurObj = NULL; |
246 CFX_FloatRect rect; | 245 CFX_FloatRect rect; |
247 int curPos = start; | 246 int curPos = start; |
248 FX_BOOL flagNewRect = TRUE; | 247 FX_BOOL flagNewRect = TRUE; |
249 if (nCount + start > m_charList.GetSize() || nCount == -1) { | 248 if (nCount + start > pdfium::CollectionSize<int>(m_CharList) || |
250 nCount = m_charList.GetSize() - start; | 249 nCount == -1) { |
250 nCount = pdfium::CollectionSize<int>(m_CharList) - start; | |
251 } | 251 } |
252 while (nCount--) { | 252 while (nCount--) { |
253 info_curchar = *(PAGECHAR_INFO*)m_charList.GetAt(curPos++); | 253 PAGECHAR_INFO info_curchar = m_CharList[curPos++]; |
254 if (info_curchar.m_Flag == FPDFTEXT_CHAR_GENERATED) { | 254 if (info_curchar.m_Flag == FPDFTEXT_CHAR_GENERATED) { |
255 continue; | 255 continue; |
256 } | 256 } |
257 if (info_curchar.m_CharBox.Width() < 0.01 || | 257 if (info_curchar.m_CharBox.Width() < 0.01 || |
258 info_curchar.m_CharBox.Height() < 0.01) { | 258 info_curchar.m_CharBox.Height() < 0.01) { |
259 continue; | 259 continue; |
260 } | 260 } |
261 if (!pCurObj) { | 261 if (!pCurObj) { |
262 pCurObj = info_curchar.m_pTextObj; | 262 pCurObj = info_curchar.m_pTextObj; |
263 } | 263 } |
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
309 } | 309 } |
310 if (rect.top < info_curchar.m_CharBox.top) { | 310 if (rect.top < info_curchar.m_CharBox.top) { |
311 rect.top = info_curchar.m_CharBox.top; | 311 rect.top = info_curchar.m_CharBox.top; |
312 } | 312 } |
313 if (rect.bottom > info_curchar.m_CharBox.bottom) { | 313 if (rect.bottom > info_curchar.m_CharBox.bottom) { |
314 rect.bottom = info_curchar.m_CharBox.bottom; | 314 rect.bottom = info_curchar.m_CharBox.bottom; |
315 } | 315 } |
316 } | 316 } |
317 } | 317 } |
318 rectArray.Add(rect); | 318 rectArray.Add(rect); |
319 return; | |
320 } | 319 } |
321 int CPDF_TextPage::GetIndexAtPos(CPDF_Point point, | 320 int CPDF_TextPage::GetIndexAtPos(CPDF_Point point, |
322 FX_FLOAT xTolerance, | 321 FX_FLOAT xTolerance, |
323 FX_FLOAT yTolerance) const { | 322 FX_FLOAT yTolerance) const { |
324 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) | 323 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) |
325 return -3; | 324 return -3; |
326 | 325 |
327 int pos = 0; | 326 int pos = 0; |
328 int NearPos = -1; | 327 int NearPos = -1; |
329 double xdif = 5000, ydif = 5000; | 328 double xdif = 5000; |
330 while (pos < m_charList.GetSize()) { | 329 double ydif = 5000; |
331 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)(m_charList.GetAt(pos)); | 330 while (pos < pdfium::CollectionSize<int>(m_CharList)) { |
331 PAGECHAR_INFO charinfo = m_CharList[pos]; | |
332 CFX_FloatRect charrect = charinfo.m_CharBox; | 332 CFX_FloatRect charrect = charinfo.m_CharBox; |
333 if (charrect.Contains(point.x, point.y)) { | 333 if (charrect.Contains(point.x, point.y)) { |
334 break; | 334 break; |
335 } | 335 } |
336 if (xTolerance > 0 || yTolerance > 0) { | 336 if (xTolerance > 0 || yTolerance > 0) { |
337 CFX_FloatRect charRectExt; | 337 CFX_FloatRect charRectExt; |
338 charrect.Normalize(); | 338 charrect.Normalize(); |
339 charRectExt.left = charrect.left - xTolerance / 2; | 339 charRectExt.left = charrect.left - xTolerance / 2; |
340 charRectExt.right = charrect.right + xTolerance / 2; | 340 charRectExt.right = charrect.right + xTolerance / 2; |
341 charRectExt.top = charrect.top + yTolerance / 2; | 341 charRectExt.top = charrect.top + yTolerance / 2; |
(...skipping 10 matching lines...) Expand all Loading... | |
352 : FXSYS_fabs(point.y - charrect.top); | 352 : FXSYS_fabs(point.y - charrect.top); |
353 if (curYdif + curXdif < xdif + ydif) { | 353 if (curYdif + curXdif < xdif + ydif) { |
354 ydif = curYdif; | 354 ydif = curYdif; |
355 xdif = curXdif; | 355 xdif = curXdif; |
356 NearPos = pos; | 356 NearPos = pos; |
357 } | 357 } |
358 } | 358 } |
359 } | 359 } |
360 ++pos; | 360 ++pos; |
361 } | 361 } |
362 if (pos >= m_charList.GetSize()) { | 362 return pos < pdfium::CollectionSize<int>(m_CharList) ? pos : NearPos; |
363 pos = NearPos; | |
364 } | |
365 return pos; | |
366 } | 363 } |
364 | |
367 CFX_WideString CPDF_TextPage::GetTextByRect(const CFX_FloatRect& rect) const { | 365 CFX_WideString CPDF_TextPage::GetTextByRect(const CFX_FloatRect& rect) const { |
366 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) | |
367 return CFX_WideString(); | |
368 | |
369 FX_FLOAT posy = 0; | |
370 bool IsContainPreChar = false; | |
371 bool IsAddLineFeed = false; | |
368 CFX_WideString strText; | 372 CFX_WideString strText; |
369 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) | 373 for (const auto& charinfo : m_CharList) { |
370 return strText; | |
371 | |
372 int nCount = m_charList.GetSize(); | |
373 int pos = 0; | |
374 FX_FLOAT posy = 0; | |
375 FX_BOOL IsContainPreChar = FALSE; | |
376 FX_BOOL ISAddLineFeed = FALSE; | |
377 while (pos < nCount) { | |
378 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(pos++); | |
379 if (IsRectIntersect(rect, charinfo.m_CharBox)) { | 374 if (IsRectIntersect(rect, charinfo.m_CharBox)) { |
380 if (FXSYS_fabs(posy - charinfo.m_OriginY) > 0 && !IsContainPreChar && | 375 if (FXSYS_fabs(posy - charinfo.m_OriginY) > 0 && !IsContainPreChar && |
381 ISAddLineFeed) { | 376 IsAddLineFeed) { |
382 posy = charinfo.m_OriginY; | 377 posy = charinfo.m_OriginY; |
383 if (strText.GetLength() > 0) { | 378 if (strText.GetLength() > 0) { |
384 strText += L"\r\n"; | 379 strText += L"\r\n"; |
385 } | 380 } |
386 } | 381 } |
387 IsContainPreChar = TRUE; | 382 IsContainPreChar = true; |
388 ISAddLineFeed = FALSE; | 383 IsAddLineFeed = false; |
389 if (charinfo.m_Unicode) { | 384 if (charinfo.m_Unicode) { |
390 strText += charinfo.m_Unicode; | 385 strText += charinfo.m_Unicode; |
391 } | 386 } |
392 } else if (charinfo.m_Unicode == 32) { | 387 } else if (charinfo.m_Unicode == 32) { |
393 if (IsContainPreChar && charinfo.m_Unicode) { | 388 if (IsContainPreChar && charinfo.m_Unicode) { |
394 strText += charinfo.m_Unicode; | 389 strText += charinfo.m_Unicode; |
395 IsContainPreChar = FALSE; | 390 IsContainPreChar = false; |
396 ISAddLineFeed = FALSE; | 391 IsAddLineFeed = false; |
397 } | 392 } |
398 } else { | 393 } else { |
399 IsContainPreChar = FALSE; | 394 IsContainPreChar = false; |
400 ISAddLineFeed = TRUE; | 395 IsAddLineFeed = true; |
401 } | 396 } |
402 } | 397 } |
403 return strText; | 398 return strText; |
404 } | 399 } |
405 void CPDF_TextPage::GetRectsArrayByRect(const CFX_FloatRect& rect, | 400 void CPDF_TextPage::GetRectsArrayByRect(const CFX_FloatRect& rect, |
406 CFX_RectArray& resRectArray) const { | 401 CFX_RectArray& resRectArray) const { |
407 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) | 402 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) |
408 return; | 403 return; |
409 | 404 |
410 CFX_FloatRect curRect; | 405 CFX_FloatRect curRect; |
411 FX_BOOL flagNewRect = TRUE; | 406 bool flagNewRect = true; |
412 CPDF_TextObject* pCurObj = NULL; | 407 CPDF_TextObject* pCurObj = nullptr; |
413 int nCount = m_charList.GetSize(); | 408 for (auto info_curchar : m_CharList) { |
414 int pos = 0; | |
415 while (pos < nCount) { | |
416 PAGECHAR_INFO info_curchar = *(PAGECHAR_INFO*)m_charList.GetAt(pos++); | |
417 if (info_curchar.m_Flag == FPDFTEXT_CHAR_GENERATED) { | 409 if (info_curchar.m_Flag == FPDFTEXT_CHAR_GENERATED) { |
418 continue; | 410 continue; |
419 } | 411 } |
420 if (IsRectIntersect(rect, info_curchar.m_CharBox)) { | 412 if (!IsRectIntersect(rect, info_curchar.m_CharBox)) { |
421 if (!pCurObj) { | 413 continue; |
422 pCurObj = info_curchar.m_pTextObj; | 414 } |
423 } | 415 if (!pCurObj) { |
424 if (pCurObj != info_curchar.m_pTextObj) { | 416 pCurObj = info_curchar.m_pTextObj; |
425 resRectArray.Add(curRect); | 417 } |
426 pCurObj = info_curchar.m_pTextObj; | 418 if (pCurObj != info_curchar.m_pTextObj) { |
427 flagNewRect = TRUE; | 419 resRectArray.Add(curRect); |
428 } | 420 pCurObj = info_curchar.m_pTextObj; |
429 if (flagNewRect) { | 421 flagNewRect = true; |
430 curRect = info_curchar.m_CharBox; | 422 } |
431 flagNewRect = FALSE; | 423 if (flagNewRect) { |
432 curRect.Normalize(); | 424 curRect = info_curchar.m_CharBox; |
433 } else { | 425 curRect.Normalize(); |
434 info_curchar.m_CharBox.Normalize(); | 426 flagNewRect = false; |
435 if (curRect.left > info_curchar.m_CharBox.left) { | 427 } else { |
436 curRect.left = info_curchar.m_CharBox.left; | 428 info_curchar.m_CharBox.Normalize(); |
437 } | 429 curRect.left = std::min(curRect.left, info_curchar.m_CharBox.left); |
438 if (curRect.right < info_curchar.m_CharBox.right) { | 430 curRect.bottom = std::min(curRect.bottom, info_curchar.m_CharBox.bottom); |
439 curRect.right = info_curchar.m_CharBox.right; | 431 curRect.right = std::max(curRect.right, info_curchar.m_CharBox.right); |
440 } | 432 curRect.top = std::max(curRect.top, info_curchar.m_CharBox.top); |
441 if (curRect.top < info_curchar.m_CharBox.top) { | |
442 curRect.top = info_curchar.m_CharBox.top; | |
443 } | |
444 if (curRect.bottom > info_curchar.m_CharBox.bottom) { | |
445 curRect.bottom = info_curchar.m_CharBox.bottom; | |
446 } | |
447 } | |
448 } | 433 } |
449 } | 434 } |
450 resRectArray.Add(curRect); | 435 resRectArray.Add(curRect); |
451 return; | |
452 } | 436 } |
453 int CPDF_TextPage::GetIndexAtPos(FX_FLOAT x, | 437 int CPDF_TextPage::GetIndexAtPos(FX_FLOAT x, |
454 FX_FLOAT y, | 438 FX_FLOAT y, |
455 FX_FLOAT xTolerance, | 439 FX_FLOAT xTolerance, |
456 FX_FLOAT yTolerance) const { | 440 FX_FLOAT yTolerance) const { |
457 if (m_ParseOptions.m_bGetCharCodeOnly) { | 441 if (m_ParseOptions.m_bGetCharCodeOnly) { |
458 return -3; | 442 return -3; |
459 } | 443 } |
460 CPDF_Point point(x, y); | 444 CPDF_Point point(x, y); |
461 return GetIndexAtPos(point, xTolerance, yTolerance); | 445 return GetIndexAtPos(point, xTolerance, yTolerance); |
462 } | 446 } |
463 | 447 |
464 void CPDF_TextPage::GetCharInfo(int index, FPDF_CHAR_INFO* info) const { | 448 void CPDF_TextPage::GetCharInfo(int index, FPDF_CHAR_INFO* info) const { |
465 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) | 449 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) |
466 return; | 450 return; |
467 | 451 |
468 if (index < 0 || index >= m_charList.GetSize()) | 452 if (index < 0 || index >= pdfium::CollectionSize<int>(m_CharList)) |
469 return; | 453 return; |
470 | 454 |
471 const PAGECHAR_INFO* charinfo = | 455 const PAGECHAR_INFO* charinfo = &m_CharList[index]; |
Lei Zhang
2016/01/25 22:11:16
Also const ref?
Tom Sepez
2016/01/25 23:04:47
Done.
| |
472 static_cast<PAGECHAR_INFO*>(m_charList.GetAt(index)); | |
473 info->m_Charcode = charinfo->m_CharCode; | 456 info->m_Charcode = charinfo->m_CharCode; |
474 info->m_OriginX = charinfo->m_OriginX; | 457 info->m_OriginX = charinfo->m_OriginX; |
475 info->m_OriginY = charinfo->m_OriginY; | 458 info->m_OriginY = charinfo->m_OriginY; |
476 info->m_Unicode = charinfo->m_Unicode; | 459 info->m_Unicode = charinfo->m_Unicode; |
477 info->m_Flag = charinfo->m_Flag; | 460 info->m_Flag = charinfo->m_Flag; |
478 info->m_CharBox = charinfo->m_CharBox; | 461 info->m_CharBox = charinfo->m_CharBox; |
479 info->m_pTextObj = charinfo->m_pTextObj; | 462 info->m_pTextObj = charinfo->m_pTextObj; |
480 if (charinfo->m_pTextObj && charinfo->m_pTextObj->GetFont()) { | 463 if (charinfo->m_pTextObj && charinfo->m_pTextObj->GetFont()) { |
481 info->m_FontSize = charinfo->m_pTextObj->GetFontSize(); | 464 info->m_FontSize = charinfo->m_pTextObj->GetFontSize(); |
482 } else { | 465 } else { |
483 info->m_FontSize = kDefaultFontSize; | 466 info->m_FontSize = kDefaultFontSize; |
484 } | 467 } |
485 info->m_Matrix.Copy(charinfo->m_Matrix); | 468 info->m_Matrix.Copy(charinfo->m_Matrix); |
486 } | 469 } |
487 | 470 |
488 void CPDF_TextPage::CheckMarkedContentObject(int32_t& start, | 471 void CPDF_TextPage::CheckMarkedContentObject(int32_t& start, |
489 int32_t& nCount) const { | 472 int32_t& nCount) const { |
490 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(start); | 473 PAGECHAR_INFO charinfo = m_CharList[start]; |
491 PAGECHAR_INFO charinfo2 = | 474 PAGECHAR_INFO charinfo2 = m_CharList[start + nCount - 1]; |
492 *(PAGECHAR_INFO*)m_charList.GetAt(start + nCount - 1); | |
493 if (FPDFTEXT_CHAR_PIECE != charinfo.m_Flag && | 475 if (FPDFTEXT_CHAR_PIECE != charinfo.m_Flag && |
494 FPDFTEXT_CHAR_PIECE != charinfo2.m_Flag) { | 476 FPDFTEXT_CHAR_PIECE != charinfo2.m_Flag) { |
495 return; | 477 return; |
496 } | 478 } |
497 if (FPDFTEXT_CHAR_PIECE == charinfo.m_Flag) { | 479 if (FPDFTEXT_CHAR_PIECE == charinfo.m_Flag) { |
498 PAGECHAR_INFO charinfo1 = charinfo; | 480 PAGECHAR_INFO charinfo1 = charinfo; |
499 int startIndex = start; | 481 int startIndex = start; |
Tom Sepez
2016/01/23 00:07:56
nit: someday, someone should make this loop sane.
| |
500 while (FPDFTEXT_CHAR_PIECE == charinfo1.m_Flag && | 482 while (FPDFTEXT_CHAR_PIECE == charinfo1.m_Flag && |
501 charinfo1.m_Index == charinfo.m_Index) { | 483 charinfo1.m_Index == charinfo.m_Index) { |
502 startIndex--; | 484 startIndex--; |
503 if (startIndex < 0) { | 485 if (startIndex < 0) { |
504 break; | 486 break; |
505 } | 487 } |
506 charinfo1 = *(PAGECHAR_INFO*)m_charList.GetAt(startIndex); | 488 charinfo1 = m_CharList[startIndex]; |
507 } | 489 } |
508 startIndex++; | 490 startIndex++; |
Tom Sepez
2016/01/23 00:07:56
nit: uh huh, postincrement then assign then never
| |
509 start = startIndex; | 491 start = startIndex; |
510 } | 492 } |
511 if (FPDFTEXT_CHAR_PIECE == charinfo2.m_Flag) { | 493 if (FPDFTEXT_CHAR_PIECE == charinfo2.m_Flag) { |
512 PAGECHAR_INFO charinfo3 = charinfo2; | 494 PAGECHAR_INFO charinfo3 = charinfo2; |
513 int endIndex = start + nCount - 1; | 495 int endIndex = start + nCount - 1; |
514 while (FPDFTEXT_CHAR_PIECE == charinfo3.m_Flag && | 496 while (FPDFTEXT_CHAR_PIECE == charinfo3.m_Flag && |
515 charinfo3.m_Index == charinfo2.m_Index) { | 497 charinfo3.m_Index == charinfo2.m_Index) { |
516 endIndex++; | 498 endIndex++; |
517 if (endIndex >= m_charList.GetSize()) { | 499 if (endIndex >= pdfium::CollectionSize<int>(m_CharList)) { |
518 break; | 500 break; |
519 } | 501 } |
520 charinfo3 = *(PAGECHAR_INFO*)m_charList.GetAt(endIndex); | 502 charinfo3 = m_CharList[endIndex]; |
521 } | 503 } |
522 endIndex--; | 504 endIndex--; |
Tom Sepez
2016/01/23 00:07:56
nit: then postdecrement, then add 1 on the next li
| |
523 nCount = endIndex - start + 1; | 505 nCount = endIndex - start + 1; |
524 } | 506 } |
525 } | 507 } |
526 CFX_WideString CPDF_TextPage::GetPageText(int start, int nCount) const { | 508 CFX_WideString CPDF_TextPage::GetPageText(int start, int nCount) const { |
527 if (!m_bIsParsed || nCount == 0) | 509 if (!m_bIsParsed || nCount == 0) |
528 return L""; | 510 return L""; |
529 | 511 |
530 if (start < 0) | 512 if (start < 0) |
531 start = 0; | 513 start = 0; |
532 | 514 |
533 if (nCount == -1) { | 515 if (nCount == -1) { |
534 nCount = m_charList.GetSize() - start; | 516 nCount = pdfium::CollectionSize<int>(m_CharList) - start; |
535 return m_TextBuf.GetWideString().Mid(start, | 517 return m_TextBuf.GetWideString().Mid(start, |
536 m_TextBuf.GetWideString().GetLength()); | 518 m_TextBuf.GetWideString().GetLength()); |
537 } | 519 } |
538 if (nCount <= 0 || m_charList.GetSize() <= 0) { | 520 if (nCount <= 0 || m_CharList.empty()) { |
539 return L""; | 521 return L""; |
540 } | 522 } |
541 if (nCount + start > m_charList.GetSize() - 1) { | 523 if (nCount + start > pdfium::CollectionSize<int>(m_CharList) - 1) { |
542 nCount = m_charList.GetSize() - start; | 524 nCount = pdfium::CollectionSize<int>(m_CharList) - start; |
543 } | 525 } |
544 if (nCount <= 0) { | 526 if (nCount <= 0) { |
545 return L""; | 527 return L""; |
546 } | 528 } |
547 CheckMarkedContentObject(start, nCount); | 529 CheckMarkedContentObject(start, nCount); |
548 int startindex = 0; | 530 int startindex = 0; |
549 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(start); | 531 PAGECHAR_INFO charinfo = m_CharList[start]; |
550 int startOffset = 0; | 532 int startOffset = 0; |
551 while (charinfo.m_Index == -1) { | 533 while (charinfo.m_Index == -1) { |
552 startOffset++; | 534 startOffset++; |
553 if (startOffset > nCount || start + startOffset >= m_charList.GetSize()) { | 535 if (startOffset > nCount || |
536 start + startOffset >= pdfium::CollectionSize<int>(m_CharList)) { | |
554 return L""; | 537 return L""; |
555 } | 538 } |
556 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(start + startOffset); | 539 charinfo = m_CharList[start + startOffset]; |
557 } | 540 } |
558 startindex = charinfo.m_Index; | 541 startindex = charinfo.m_Index; |
559 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(start + nCount - 1); | 542 charinfo = m_CharList[start + nCount - 1]; |
560 int nCountOffset = 0; | 543 int nCountOffset = 0; |
561 while (charinfo.m_Index == -1) { | 544 while (charinfo.m_Index == -1) { |
562 nCountOffset++; | 545 nCountOffset++; |
563 if (nCountOffset >= nCount) { | 546 if (nCountOffset >= nCount) { |
564 return L""; | 547 return L""; |
565 } | 548 } |
566 charinfo = | 549 charinfo = m_CharList[start + nCount - nCountOffset - 1]; |
567 *(PAGECHAR_INFO*)m_charList.GetAt(start + nCount - nCountOffset - 1); | |
568 } | 550 } |
569 nCount = start + nCount - nCountOffset - startindex; | 551 nCount = start + nCount - nCountOffset - startindex; |
570 if (nCount <= 0) { | 552 if (nCount <= 0) { |
571 return L""; | 553 return L""; |
572 } | 554 } |
573 return m_TextBuf.GetWideString().Mid(startindex, nCount); | 555 return m_TextBuf.GetWideString().Mid(startindex, nCount); |
574 } | 556 } |
575 int CPDF_TextPage::CountRects(int start, int nCount) { | 557 int CPDF_TextPage::CountRects(int start, int nCount) { |
576 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed || start < 0) | 558 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed || start < 0) |
577 return -1; | 559 return -1; |
578 | 560 |
579 if (nCount == -1 || nCount + start > m_charList.GetSize()) { | 561 if (nCount == -1 || |
580 nCount = m_charList.GetSize() - start; | 562 nCount + start > pdfium::CollectionSize<int>(m_CharList)) { |
563 nCount = pdfium::CollectionSize<int>(m_CharList) - start; | |
581 } | 564 } |
582 m_SelRects.RemoveAll(); | 565 m_SelRects.RemoveAll(); |
583 GetRectArray(start, nCount, m_SelRects); | 566 GetRectArray(start, nCount, m_SelRects); |
584 return m_SelRects.GetSize(); | 567 return m_SelRects.GetSize(); |
585 } | 568 } |
586 void CPDF_TextPage::GetRect(int rectIndex, | 569 void CPDF_TextPage::GetRect(int rectIndex, |
587 FX_FLOAT& left, | 570 FX_FLOAT& left, |
588 FX_FLOAT& top, | 571 FX_FLOAT& top, |
589 FX_FLOAT& right, | 572 FX_FLOAT& right, |
590 FX_FLOAT& bottom) const { | 573 FX_FLOAT& bottom) const { |
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
671 return GetBaselineRotate(rect, Rotate); | 654 return GetBaselineRotate(rect, Rotate); |
672 } | 655 } |
673 int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left, | 656 int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left, |
674 FX_FLOAT top, | 657 FX_FLOAT top, |
675 FX_FLOAT right, | 658 FX_FLOAT right, |
676 FX_FLOAT bottom, | 659 FX_FLOAT bottom, |
677 FX_BOOL bContains) { | 660 FX_BOOL bContains) { |
678 if (m_ParseOptions.m_bGetCharCodeOnly) | 661 if (m_ParseOptions.m_bGetCharCodeOnly) |
679 return -1; | 662 return -1; |
680 | 663 |
681 m_Segment.RemoveAll(); | 664 m_Segments.RemoveAll(); |
682 if (!m_bIsParsed) | 665 if (!m_bIsParsed) |
683 return -1; | 666 return -1; |
684 | 667 |
685 CFX_FloatRect rect(left, bottom, right, top); | 668 CFX_FloatRect rect(left, bottom, right, top); |
686 rect.Normalize(); | 669 rect.Normalize(); |
687 int nCount = m_charList.GetSize(); | 670 |
688 int pos = 0; | |
689 FPDF_SEGMENT segment; | 671 FPDF_SEGMENT segment; |
690 segment.m_Start = 0; | 672 segment.m_Start = 0; |
691 segment.m_nCount = 0; | 673 segment.m_nCount = 0; |
674 | |
675 int pos = 0; | |
692 int segmentStatus = 0; | 676 int segmentStatus = 0; |
693 FX_BOOL IsContainPreChar = FALSE; | 677 FX_BOOL IsContainPreChar = FALSE; |
694 while (pos < nCount) { | 678 for (const auto& charinfo : m_CharList) { |
695 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(pos); | |
696 if (bContains && rect.Contains(charinfo.m_CharBox)) { | 679 if (bContains && rect.Contains(charinfo.m_CharBox)) { |
697 if (segmentStatus == 0 || segmentStatus == 2) { | 680 if (segmentStatus == 0 || segmentStatus == 2) { |
698 segment.m_Start = pos; | 681 segment.m_Start = pos; |
699 segment.m_nCount = 1; | 682 segment.m_nCount = 1; |
700 segmentStatus = 1; | 683 segmentStatus = 1; |
701 } else if (segmentStatus == 1) { | 684 } else if (segmentStatus == 1) { |
702 segment.m_nCount++; | 685 segment.m_nCount++; |
703 } | 686 } |
704 IsContainPreChar = TRUE; | 687 IsContainPreChar = TRUE; |
705 } else if (!bContains && | 688 } else if (!bContains && |
(...skipping 13 matching lines...) Expand all Loading... | |
719 segment.m_Start = pos; | 702 segment.m_Start = pos; |
720 segment.m_nCount = 1; | 703 segment.m_nCount = 1; |
721 segmentStatus = 1; | 704 segmentStatus = 1; |
722 } else if (segmentStatus == 1) { | 705 } else if (segmentStatus == 1) { |
723 segment.m_nCount++; | 706 segment.m_nCount++; |
724 } | 707 } |
725 IsContainPreChar = FALSE; | 708 IsContainPreChar = FALSE; |
726 } else { | 709 } else { |
727 if (segmentStatus == 1) { | 710 if (segmentStatus == 1) { |
728 segmentStatus = 2; | 711 segmentStatus = 2; |
729 m_Segment.Add(segment); | 712 m_Segments.Add(segment); |
730 segment.m_Start = 0; | 713 segment.m_Start = 0; |
731 segment.m_nCount = 0; | 714 segment.m_nCount = 0; |
732 } | 715 } |
733 } | 716 } |
734 } else { | 717 } else { |
735 if (segmentStatus == 1) { | 718 if (segmentStatus == 1) { |
736 segmentStatus = 2; | 719 segmentStatus = 2; |
737 m_Segment.Add(segment); | 720 m_Segments.Add(segment); |
738 segment.m_Start = 0; | 721 segment.m_Start = 0; |
739 segment.m_nCount = 0; | 722 segment.m_nCount = 0; |
740 } | 723 } |
741 IsContainPreChar = FALSE; | 724 IsContainPreChar = FALSE; |
742 } | 725 } |
743 pos++; | 726 pos++; |
744 } | 727 } |
745 if (segmentStatus == 1) { | 728 if (segmentStatus == 1) { |
746 segmentStatus = 2; | 729 segmentStatus = 2; |
747 m_Segment.Add(segment); | 730 m_Segments.Add(segment); |
748 segment.m_Start = 0; | 731 segment.m_Start = 0; |
749 segment.m_nCount = 0; | 732 segment.m_nCount = 0; |
750 } | 733 } |
751 return m_Segment.GetSize(); | 734 return m_Segments.GetSize(); |
752 } | 735 } |
753 void CPDF_TextPage::GetBoundedSegment(int index, int& start, int& count) const { | 736 void CPDF_TextPage::GetBoundedSegment(int index, int& start, int& count) const { |
754 if (m_ParseOptions.m_bGetCharCodeOnly) { | 737 if (m_ParseOptions.m_bGetCharCodeOnly) { |
755 return; | 738 return; |
756 } | 739 } |
757 if (index < 0 || index >= m_Segment.GetSize()) { | 740 if (index < 0 || index >= m_Segments.GetSize()) { |
758 return; | 741 return; |
759 } | 742 } |
760 start = m_Segment.GetAt(index).m_Start; | 743 start = m_Segments.GetAt(index).m_Start; |
761 count = m_Segment.GetAt(index).m_nCount; | 744 count = m_Segments.GetAt(index).m_nCount; |
762 } | 745 } |
763 int CPDF_TextPage::GetWordBreak(int index, int direction) const { | 746 int CPDF_TextPage::GetWordBreak(int index, int direction) const { |
764 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) | 747 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) |
765 return -1; | 748 return -1; |
766 | 749 |
767 if (direction != FPDFTEXT_LEFT && direction != FPDFTEXT_RIGHT) | 750 if (direction != FPDFTEXT_LEFT && direction != FPDFTEXT_RIGHT) |
768 return -1; | 751 return -1; |
769 | 752 |
770 if (index < 0 || index >= m_charList.GetSize()) | 753 if (index < 0 || index >= pdfium::CollectionSize<int>(m_CharList)) |
771 return -1; | 754 return -1; |
772 | 755 |
773 PAGECHAR_INFO charinfo; | 756 PAGECHAR_INFO charinfo = m_CharList[index]; |
Lei Zhang
2016/01/25 22:11:15
const ref
Tom Sepez
2016/01/25 23:04:47
Done.
| |
774 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index); | |
775 if (charinfo.m_Index == -1 || charinfo.m_Flag == FPDFTEXT_CHAR_GENERATED) { | 757 if (charinfo.m_Index == -1 || charinfo.m_Flag == FPDFTEXT_CHAR_GENERATED) { |
776 return index; | 758 return index; |
777 } | 759 } |
778 if (!IsLetter(charinfo.m_Unicode)) { | 760 if (!IsLetter(charinfo.m_Unicode)) { |
779 return index; | 761 return index; |
780 } | 762 } |
781 int breakPos = index; | 763 int breakPos = index; |
782 if (direction == FPDFTEXT_LEFT) { | 764 if (direction == FPDFTEXT_LEFT) { |
783 while (--breakPos > 0) { | 765 while (--breakPos > 0) { |
784 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(breakPos); | 766 if (!IsLetter(m_CharList[breakPos].m_Unicode)) |
785 if (!IsLetter(charinfo.m_Unicode)) { | 767 break; |
786 return breakPos; | |
787 } | |
788 } | 768 } |
789 } else if (direction == FPDFTEXT_RIGHT) { | 769 } else if (direction == FPDFTEXT_RIGHT) { |
790 while (++breakPos < m_charList.GetSize()) { | 770 while (++breakPos < pdfium::CollectionSize<int>(m_CharList)) { |
791 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(breakPos); | 771 if (!IsLetter(m_CharList[breakPos].m_Unicode)) |
792 if (!IsLetter(charinfo.m_Unicode)) { | 772 break; |
793 return breakPos; | |
794 } | |
795 } | 773 } |
796 } | 774 } |
797 return breakPos; | 775 return breakPos; |
798 } | 776 } |
799 int32_t CPDF_TextPage::FindTextlineFlowDirection() { | 777 int32_t CPDF_TextPage::FindTextlineFlowDirection() { |
800 if (!m_pPage) { | 778 if (!m_pPage) { |
801 return -1; | 779 return -1; |
802 } | 780 } |
803 const int32_t nPageWidth = (int32_t)((CPDF_Page*)m_pPage)->GetPageWidth(); | 781 const int32_t nPageWidth = (int32_t)((CPDF_Page*)m_pPage)->GetPageWidth(); |
804 const int32_t nPageHeight = (int32_t)((CPDF_Page*)m_pPage)->GetPageHeight(); | 782 const int32_t nPageHeight = (int32_t)((CPDF_Page*)m_pPage)->GetPageHeight(); |
(...skipping 169 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
974 } | 952 } |
975 } | 953 } |
976 return w; | 954 return w; |
977 } | 955 } |
978 void CPDF_TextPage::OnPiece(CFX_BidiChar* pBidi, CFX_WideString& str) { | 956 void CPDF_TextPage::OnPiece(CFX_BidiChar* pBidi, CFX_WideString& str) { |
979 int32_t start, count; | 957 int32_t start, count; |
980 CFX_BidiChar::Direction ret = pBidi->GetBidiInfo(&start, &count); | 958 CFX_BidiChar::Direction ret = pBidi->GetBidiInfo(&start, &count); |
981 if (ret == CFX_BidiChar::RIGHT) { | 959 if (ret == CFX_BidiChar::RIGHT) { |
982 for (int i = start + count - 1; i >= start; i--) { | 960 for (int i = start + count - 1; i >= start; i--) { |
983 m_TextBuf.AppendChar(str.GetAt(i)); | 961 m_TextBuf.AppendChar(str.GetAt(i)); |
984 m_charList.Add(*(PAGECHAR_INFO*)m_TempCharList.GetAt(i)); | 962 m_CharList.push_back(m_TempCharList[i]); |
985 } | 963 } |
986 } else { | 964 } else { |
987 int end = start + count; | 965 int end = start + count; |
988 for (int i = start; i < end; i++) { | 966 for (int i = start; i < end; i++) { |
989 m_TextBuf.AppendChar(str.GetAt(i)); | 967 m_TextBuf.AppendChar(str.GetAt(i)); |
990 m_charList.Add(*(PAGECHAR_INFO*)m_TempCharList.GetAt(i)); | 968 m_CharList.push_back(m_TempCharList[i]); |
991 } | 969 } |
992 } | 970 } |
993 } | 971 } |
994 void CPDF_TextPage::AddCharInfoByLRDirection(CFX_WideString& str, int i) { | 972 void CPDF_TextPage::AddCharInfoByLRDirection(CFX_WideString& str, int i) { |
995 PAGECHAR_INFO Info = *(PAGECHAR_INFO*)m_TempCharList.GetAt(i); | 973 PAGECHAR_INFO Info = m_TempCharList[i]; |
Lei Zhang
2016/01/25 22:11:15
Would you mind renaming |Info| and |Info2| while y
Tom Sepez
2016/01/25 23:04:47
As in starts with lower case? sure.
| |
996 FX_WCHAR wChar = str.GetAt(i); | 974 FX_WCHAR wChar = str.GetAt(i); |
997 if (!IsControlChar(Info)) { | 975 if (!IsControlChar(Info)) { |
998 Info.m_Index = m_TextBuf.GetLength(); | 976 Info.m_Index = m_TextBuf.GetLength(); |
999 if (wChar >= 0xFB00 && wChar <= 0xFB06) { | 977 if (wChar >= 0xFB00 && wChar <= 0xFB06) { |
1000 FX_WCHAR* pDst = NULL; | 978 FX_WCHAR* pDst = NULL; |
1001 FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst); | 979 FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst); |
1002 if (nCount >= 1) { | 980 if (nCount >= 1) { |
1003 pDst = FX_Alloc(FX_WCHAR, nCount); | 981 pDst = FX_Alloc(FX_WCHAR, nCount); |
1004 FX_Unicode_GetNormalization(wChar, pDst); | 982 FX_Unicode_GetNormalization(wChar, pDst); |
1005 for (int nIndex = 0; nIndex < nCount; nIndex++) { | 983 for (int nIndex = 0; nIndex < nCount; nIndex++) { |
1006 PAGECHAR_INFO Info2 = Info; | 984 PAGECHAR_INFO Info2 = Info; |
1007 Info2.m_Unicode = pDst[nIndex]; | 985 Info2.m_Unicode = pDst[nIndex]; |
1008 Info2.m_Flag = FPDFTEXT_CHAR_PIECE; | 986 Info2.m_Flag = FPDFTEXT_CHAR_PIECE; |
1009 m_TextBuf.AppendChar(Info2.m_Unicode); | 987 m_TextBuf.AppendChar(Info2.m_Unicode); |
1010 if (!m_ParseOptions.m_bGetCharCodeOnly) { | 988 if (!m_ParseOptions.m_bGetCharCodeOnly) { |
1011 m_charList.Add(Info2); | 989 m_CharList.push_back(Info2); |
1012 } | 990 } |
1013 } | 991 } |
1014 FX_Free(pDst); | 992 FX_Free(pDst); |
1015 return; | 993 return; |
1016 } | 994 } |
1017 } | 995 } |
1018 m_TextBuf.AppendChar(wChar); | 996 m_TextBuf.AppendChar(wChar); |
1019 } else { | 997 } else { |
1020 Info.m_Index = -1; | 998 Info.m_Index = -1; |
1021 } | 999 } |
1022 if (!m_ParseOptions.m_bGetCharCodeOnly) { | 1000 if (!m_ParseOptions.m_bGetCharCodeOnly) { |
1023 m_charList.Add(Info); | 1001 m_CharList.push_back(Info); |
1024 } | 1002 } |
1025 } | 1003 } |
1026 void CPDF_TextPage::AddCharInfoByRLDirection(CFX_WideString& str, int i) { | 1004 void CPDF_TextPage::AddCharInfoByRLDirection(CFX_WideString& str, int i) { |
1027 PAGECHAR_INFO Info = *(PAGECHAR_INFO*)m_TempCharList.GetAt(i); | 1005 PAGECHAR_INFO Info = m_TempCharList[i]; |
Lei Zhang
2016/01/25 22:11:16
Ditto.
| |
1028 if (!IsControlChar(Info)) { | 1006 if (!IsControlChar(Info)) { |
1029 Info.m_Index = m_TextBuf.GetLength(); | 1007 Info.m_Index = m_TextBuf.GetLength(); |
1030 FX_WCHAR wChar = FX_GetMirrorChar(str.GetAt(i), TRUE, FALSE); | 1008 FX_WCHAR wChar = FX_GetMirrorChar(str.GetAt(i), TRUE, FALSE); |
1031 FX_WCHAR* pDst = NULL; | 1009 FX_WCHAR* pDst = NULL; |
1032 FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst); | 1010 FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst); |
1033 if (nCount >= 1) { | 1011 if (nCount >= 1) { |
1034 pDst = FX_Alloc(FX_WCHAR, nCount); | 1012 pDst = FX_Alloc(FX_WCHAR, nCount); |
1035 FX_Unicode_GetNormalization(wChar, pDst); | 1013 FX_Unicode_GetNormalization(wChar, pDst); |
1036 for (int nIndex = 0; nIndex < nCount; nIndex++) { | 1014 for (int nIndex = 0; nIndex < nCount; nIndex++) { |
1037 PAGECHAR_INFO Info2 = Info; | 1015 PAGECHAR_INFO Info2 = Info; |
1038 Info2.m_Unicode = pDst[nIndex]; | 1016 Info2.m_Unicode = pDst[nIndex]; |
1039 Info2.m_Flag = FPDFTEXT_CHAR_PIECE; | 1017 Info2.m_Flag = FPDFTEXT_CHAR_PIECE; |
1040 m_TextBuf.AppendChar(Info2.m_Unicode); | 1018 m_TextBuf.AppendChar(Info2.m_Unicode); |
1041 if (!m_ParseOptions.m_bGetCharCodeOnly) { | 1019 if (!m_ParseOptions.m_bGetCharCodeOnly) { |
1042 m_charList.Add(Info2); | 1020 m_CharList.push_back(Info2); |
1043 } | 1021 } |
1044 } | 1022 } |
1045 FX_Free(pDst); | 1023 FX_Free(pDst); |
1046 return; | 1024 return; |
1047 } | 1025 } |
1048 Info.m_Unicode = wChar; | 1026 Info.m_Unicode = wChar; |
1049 m_TextBuf.AppendChar(Info.m_Unicode); | 1027 m_TextBuf.AppendChar(Info.m_Unicode); |
1050 } else { | 1028 } else { |
1051 Info.m_Index = -1; | 1029 Info.m_Index = -1; |
1052 } | 1030 } |
1053 if (!m_ParseOptions.m_bGetCharCodeOnly) { | 1031 if (!m_ParseOptions.m_bGetCharCodeOnly) { |
1054 m_charList.Add(Info); | 1032 m_CharList.push_back(Info); |
1055 } | 1033 } |
1056 } | 1034 } |
1057 void CPDF_TextPage::CloseTempLine() { | 1035 void CPDF_TextPage::CloseTempLine() { |
1058 int count1 = m_TempCharList.GetSize(); | 1036 if (m_TempCharList.empty()) { |
1059 if (count1 <= 0) { | |
1060 return; | 1037 return; |
1061 } | 1038 } |
1062 std::unique_ptr<CFX_BidiChar> pBidiChar(new CFX_BidiChar); | 1039 std::unique_ptr<CFX_BidiChar> pBidiChar(new CFX_BidiChar); |
1063 CFX_WideString str = m_TempTextBuf.GetWideString(); | 1040 CFX_WideString str = m_TempTextBuf.GetWideString(); |
1064 CFX_WordArray order; | 1041 CFX_WordArray order; |
1065 FX_BOOL bR2L = FALSE; | 1042 FX_BOOL bR2L = FALSE; |
1066 int32_t start = 0, count = 0; | 1043 int32_t start = 0, count = 0; |
1067 int nR2L = 0, nL2R = 0; | 1044 int nR2L = 0, nL2R = 0; |
1068 FX_BOOL bPrevSpace = FALSE; | 1045 FX_BOOL bPrevSpace = FALSE; |
1069 for (int i = 0; i < str.GetLength(); i++) { | 1046 for (int i = 0; i < str.GetLength(); i++) { |
1070 if (str.GetAt(i) == 32) { | 1047 if (str.GetAt(i) == 32) { |
1071 if (bPrevSpace) { | 1048 if (bPrevSpace) { |
1072 m_TempTextBuf.Delete(i, 1); | 1049 m_TempTextBuf.Delete(i, 1); |
1073 m_TempCharList.Delete(i); | 1050 m_TempCharList.erase(m_TempCharList.begin() + i); |
1074 str.Delete(i); | 1051 str.Delete(i); |
1075 count1--; | |
1076 i--; | 1052 i--; |
1077 continue; | 1053 continue; |
1078 } | 1054 } |
1079 bPrevSpace = TRUE; | 1055 bPrevSpace = TRUE; |
1080 } else { | 1056 } else { |
1081 bPrevSpace = FALSE; | 1057 bPrevSpace = FALSE; |
1082 } | 1058 } |
1083 if (pBidiChar->AppendChar(str.GetAt(i))) { | 1059 if (pBidiChar->AppendChar(str.GetAt(i))) { |
1084 CFX_BidiChar::Direction ret = pBidiChar->GetBidiInfo(&start, &count); | 1060 CFX_BidiChar::Direction ret = pBidiChar->GetBidiInfo(&start, &count); |
1085 order.Add(start); | 1061 order.Add(start); |
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1166 break; | 1142 break; |
1167 } else { | 1143 } else { |
1168 j += 3; | 1144 j += 3; |
1169 } | 1145 } |
1170 } | 1146 } |
1171 if (j == 3) { | 1147 if (j == 3) { |
1172 i = -3; | 1148 i = -3; |
1173 bL2R = TRUE; | 1149 bL2R = TRUE; |
1174 continue; | 1150 continue; |
1175 } | 1151 } |
1176 int end = m_TempCharList.GetSize() - 1; | 1152 int end = pdfium::CollectionSize<int>(m_TempCharList) - 1; |
1177 if (j < count) { | 1153 if (j < count) { |
1178 end = order.GetAt(j) - 1; | 1154 end = order.GetAt(j) - 1; |
1179 } | 1155 } |
1180 i = j - 3; | 1156 i = j - 3; |
1181 for (int n = end; n >= start; n--) { | 1157 for (int n = end; n >= start; n--) { |
1182 AddCharInfoByRLDirection(str, n); | 1158 AddCharInfoByRLDirection(str, n); |
1183 } | 1159 } |
1184 } else { | 1160 } else { |
1185 int end = start + count1; | 1161 int end = start + count1; |
1186 for (int n = start; n < end; n++) { | 1162 for (int n = start; n < end; n++) { |
1187 AddCharInfoByLRDirection(str, n); | 1163 AddCharInfoByLRDirection(str, n); |
1188 } | 1164 } |
1189 } | 1165 } |
1190 } | 1166 } |
1191 } | 1167 } |
1192 order.RemoveAll(); | 1168 order.RemoveAll(); |
1193 m_TempCharList.RemoveAll(); | 1169 m_TempCharList.clear(); |
1194 m_TempTextBuf.Delete(0, m_TempTextBuf.GetLength()); | 1170 m_TempTextBuf.Delete(0, m_TempTextBuf.GetLength()); |
1195 } | 1171 } |
1196 void CPDF_TextPage::ProcessTextObject(CPDF_TextObject* pTextObj, | 1172 void CPDF_TextPage::ProcessTextObject(CPDF_TextObject* pTextObj, |
1197 const CFX_Matrix& formMatrix, | 1173 const CFX_Matrix& formMatrix, |
1198 FX_POSITION ObjPos) { | 1174 FX_POSITION ObjPos) { |
1199 CFX_FloatRect re(pTextObj->m_Left, pTextObj->m_Bottom, pTextObj->m_Right, | 1175 CFX_FloatRect re(pTextObj->m_Left, pTextObj->m_Bottom, pTextObj->m_Right, |
1200 pTextObj->m_Top); | 1176 pTextObj->m_Top); |
1201 if (FXSYS_fabs(pTextObj->m_Right - pTextObj->m_Left) < 0.01f) { | 1177 if (FXSYS_fabs(pTextObj->m_Right - pTextObj->m_Left) < 0.01f) { |
1202 return; | 1178 return; |
1203 } | 1179 } |
(...skipping 199 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1403 charinfo.m_Unicode = wChar; | 1379 charinfo.m_Unicode = wChar; |
1404 charinfo.m_CharCode = pFont->CharCodeFromUnicode(wChar); | 1380 charinfo.m_CharCode = pFont->CharCodeFromUnicode(wChar); |
1405 charinfo.m_Flag = FPDFTEXT_CHAR_PIECE; | 1381 charinfo.m_Flag = FPDFTEXT_CHAR_PIECE; |
1406 charinfo.m_pTextObj = pTextObj; | 1382 charinfo.m_pTextObj = pTextObj; |
1407 charinfo.m_CharBox.top = charBox.top; | 1383 charinfo.m_CharBox.top = charBox.top; |
1408 charinfo.m_CharBox.left = charBox.left; | 1384 charinfo.m_CharBox.left = charBox.left; |
1409 charinfo.m_CharBox.right = charBox.right; | 1385 charinfo.m_CharBox.right = charBox.right; |
1410 charinfo.m_CharBox.bottom = charBox.bottom; | 1386 charinfo.m_CharBox.bottom = charBox.bottom; |
1411 charinfo.m_Matrix.Copy(matrix); | 1387 charinfo.m_Matrix.Copy(matrix); |
1412 m_TempTextBuf.AppendChar(wChar); | 1388 m_TempTextBuf.AppendChar(wChar); |
1413 m_TempCharList.Add(charinfo); | 1389 m_TempCharList.push_back(charinfo); |
1414 } | 1390 } |
1415 } | 1391 } |
1416 void CPDF_TextPage::FindPreviousTextObject(void) { | 1392 void CPDF_TextPage::FindPreviousTextObject() { |
1417 if (m_TempCharList.GetSize() < 1 && m_charList.GetSize() < 1) { | 1393 if (m_TempCharList.empty() && m_CharList.empty()) |
1418 return; | 1394 return; |
1419 } | 1395 |
1420 PAGECHAR_INFO preChar; | 1396 PAGECHAR_INFO preChar = |
1421 if (m_TempCharList.GetSize() >= 1) { | 1397 m_TempCharList.empty() ? m_CharList.back() : m_TempCharList.back(); |
1422 preChar = | 1398 |
1423 *(PAGECHAR_INFO*)m_TempCharList.GetAt(m_TempCharList.GetSize() - 1); | 1399 if (preChar.m_pTextObj) |
1424 } else { | |
1425 preChar = *(PAGECHAR_INFO*)m_charList.GetAt(m_charList.GetSize() - 1); | |
1426 } | |
1427 if (preChar.m_pTextObj) { | |
1428 m_pPreTextObj = preChar.m_pTextObj; | 1400 m_pPreTextObj = preChar.m_pTextObj; |
1429 } | |
1430 } | 1401 } |
1431 void CPDF_TextPage::SwapTempTextBuf(int32_t iCharListStartAppend, | 1402 void CPDF_TextPage::SwapTempTextBuf(int32_t iCharListStartAppend, |
1432 int32_t iBufStartAppend) { | 1403 int32_t iBufStartAppend) { |
1433 int32_t i, j; | 1404 int32_t i = iCharListStartAppend; |
1434 i = iCharListStartAppend; | 1405 int32_t j = pdfium::CollectionSize<int32_t>(m_TempCharList) - 1; |
1435 j = m_TempCharList.GetSize() - 1; | |
1436 for (; i < j; i++, j--) { | 1406 for (; i < j; i++, j--) { |
1437 std::swap(m_TempCharList[i], m_TempCharList[j]); | 1407 std::swap(m_TempCharList[i], m_TempCharList[j]); |
1438 std::swap(m_TempCharList[i].m_Index, m_TempCharList[j].m_Index); | 1408 std::swap(m_TempCharList[i].m_Index, m_TempCharList[j].m_Index); |
1439 } | 1409 } |
1440 FX_WCHAR* pTempBuffer = m_TempTextBuf.GetBuffer(); | 1410 FX_WCHAR* pTempBuffer = m_TempTextBuf.GetBuffer(); |
1441 i = iBufStartAppend; | 1411 i = iBufStartAppend; |
1442 j = m_TempTextBuf.GetLength() - 1; | 1412 j = m_TempTextBuf.GetLength() - 1; |
1443 for (; i < j; i++, j--) { | 1413 for (; i < j; i++, j--) { |
1444 std::swap(pTempBuffer[i], pTempBuffer[j]); | 1414 std::swap(pTempBuffer[i], pTempBuffer[j]); |
1445 } | 1415 } |
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1512 CFX_FloatRect(Obj.m_pTextObj->m_Left, Obj.m_pTextObj->m_Bottom, | 1482 CFX_FloatRect(Obj.m_pTextObj->m_Left, Obj.m_pTextObj->m_Bottom, |
1513 Obj.m_pTextObj->m_Right, Obj.m_pTextObj->m_Top)); | 1483 Obj.m_pTextObj->m_Right, Obj.m_pTextObj->m_Top)); |
1514 } | 1484 } |
1515 PAGECHAR_INFO generateChar; | 1485 PAGECHAR_INFO generateChar; |
1516 if (result == 1) { | 1486 if (result == 1) { |
1517 if (GenerateCharInfo(TEXT_BLANK_CHAR, generateChar)) { | 1487 if (GenerateCharInfo(TEXT_BLANK_CHAR, generateChar)) { |
1518 if (!formMatrix.IsIdentity()) { | 1488 if (!formMatrix.IsIdentity()) { |
1519 generateChar.m_Matrix.Copy(formMatrix); | 1489 generateChar.m_Matrix.Copy(formMatrix); |
1520 } | 1490 } |
1521 m_TempTextBuf.AppendChar(TEXT_BLANK_CHAR); | 1491 m_TempTextBuf.AppendChar(TEXT_BLANK_CHAR); |
1522 m_TempCharList.Add(generateChar); | 1492 m_TempCharList.push_back(generateChar); |
1523 } | 1493 } |
1524 } else if (result == 2) { | 1494 } else if (result == 2) { |
1525 CloseTempLine(); | 1495 CloseTempLine(); |
1526 if (m_TextBuf.GetSize()) { | 1496 if (m_TextBuf.GetSize()) { |
1527 if (m_ParseOptions.m_bGetCharCodeOnly) { | 1497 if (m_ParseOptions.m_bGetCharCodeOnly) { |
1528 m_TextBuf.AppendChar(TEXT_RETURN_CHAR); | 1498 m_TextBuf.AppendChar(TEXT_RETURN_CHAR); |
1529 m_TextBuf.AppendChar(TEXT_LINEFEED_CHAR); | 1499 m_TextBuf.AppendChar(TEXT_LINEFEED_CHAR); |
1530 } else { | 1500 } else { |
1531 if (GenerateCharInfo(TEXT_RETURN_CHAR, generateChar)) { | 1501 if (GenerateCharInfo(TEXT_RETURN_CHAR, generateChar)) { |
1532 m_TextBuf.AppendChar(TEXT_RETURN_CHAR); | 1502 m_TextBuf.AppendChar(TEXT_RETURN_CHAR); |
1533 if (!formMatrix.IsIdentity()) { | 1503 if (!formMatrix.IsIdentity()) { |
1534 generateChar.m_Matrix.Copy(formMatrix); | 1504 generateChar.m_Matrix.Copy(formMatrix); |
1535 } | 1505 } |
1536 m_charList.Add(generateChar); | 1506 m_CharList.push_back(generateChar); |
1537 } | 1507 } |
1538 if (GenerateCharInfo(TEXT_LINEFEED_CHAR, generateChar)) { | 1508 if (GenerateCharInfo(TEXT_LINEFEED_CHAR, generateChar)) { |
1539 m_TextBuf.AppendChar(TEXT_LINEFEED_CHAR); | 1509 m_TextBuf.AppendChar(TEXT_LINEFEED_CHAR); |
1540 if (!formMatrix.IsIdentity()) { | 1510 if (!formMatrix.IsIdentity()) { |
1541 generateChar.m_Matrix.Copy(formMatrix); | 1511 generateChar.m_Matrix.Copy(formMatrix); |
1542 } | 1512 } |
1543 m_charList.Add(generateChar); | 1513 m_CharList.push_back(generateChar); |
1544 } | 1514 } |
1545 } | 1515 } |
1546 } | 1516 } |
1547 } else if (result == 3 && !m_ParseOptions.m_bOutputHyphen) { | 1517 } else if (result == 3 && !m_ParseOptions.m_bOutputHyphen) { |
1548 int32_t nChars = pTextObj->CountChars(); | 1518 int32_t nChars = pTextObj->CountChars(); |
1549 if (nChars == 1) { | 1519 if (nChars == 1) { |
1550 CPDF_TextObjectItem item; | 1520 CPDF_TextObjectItem item; |
1551 pTextObj->GetCharInfo(0, &item); | 1521 pTextObj->GetCharInfo(0, &item); |
1552 CFX_WideString wstrItem = | 1522 CFX_WideString wstrItem = |
1553 pTextObj->GetFont()->UnicodeFromCharCode(item.m_CharCode); | 1523 pTextObj->GetFont()->UnicodeFromCharCode(item.m_CharCode); |
1554 if (wstrItem.IsEmpty()) { | 1524 if (wstrItem.IsEmpty()) { |
1555 wstrItem += (FX_WCHAR)item.m_CharCode; | 1525 wstrItem += (FX_WCHAR)item.m_CharCode; |
1556 } | 1526 } |
1557 FX_WCHAR curChar = wstrItem.GetAt(0); | 1527 FX_WCHAR curChar = wstrItem.GetAt(0); |
1558 if (0x2D == curChar || 0xAD == curChar) { | 1528 if (0x2D == curChar || 0xAD == curChar) { |
1559 return; | 1529 return; |
1560 } | 1530 } |
1561 } | 1531 } |
1562 while (m_TempTextBuf.GetSize() > 0 && | 1532 while (m_TempTextBuf.GetSize() > 0 && |
1563 m_TempTextBuf.GetWideString().GetAt(m_TempTextBuf.GetLength() - | 1533 m_TempTextBuf.GetWideString().GetAt(m_TempTextBuf.GetLength() - |
1564 1) == 0x20) { | 1534 1) == 0x20) { |
1565 m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1); | 1535 m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1); |
1566 m_TempCharList.Delete(m_TempCharList.GetSize() - 1); | 1536 m_TempCharList.pop_back(); |
1567 } | 1537 } |
1568 PAGECHAR_INFO* cha = | 1538 PAGECHAR_INFO* cha = &m_TempCharList.back(); |
Lei Zhang
2016/01/25 22:11:16
rename |cha| ?
Tom Sepez
2016/01/25 23:04:47
Done.
| |
1569 (PAGECHAR_INFO*)m_TempCharList.GetAt(m_TempCharList.GetSize() - 1); | |
1570 m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1); | 1539 m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1); |
1571 cha->m_Unicode = 0x2; | 1540 cha->m_Unicode = 0x2; |
1572 cha->m_Flag = FPDFTEXT_CHAR_HYPHEN; | 1541 cha->m_Flag = FPDFTEXT_CHAR_HYPHEN; |
1573 m_TempTextBuf.AppendChar(0xfffe); | 1542 m_TempTextBuf.AppendChar(0xfffe); |
1574 } | 1543 } |
1575 } else { | 1544 } else { |
1576 m_CurlineRect = | 1545 m_CurlineRect = |
1577 CFX_FloatRect(Obj.m_pTextObj->m_Left, Obj.m_pTextObj->m_Bottom, | 1546 CFX_FloatRect(Obj.m_pTextObj->m_Left, Obj.m_pTextObj->m_Bottom, |
1578 Obj.m_pTextObj->m_Right, Obj.m_pTextObj->m_Top); | 1547 Obj.m_pTextObj->m_Right, Obj.m_pTextObj->m_Top); |
1579 } | 1548 } |
1580 if (FPDFTEXT_MC_DELAY == bPreMKC) { | 1549 if (FPDFTEXT_MC_DELAY == bPreMKC) { |
1581 ProcessMarkedContent(Obj); | 1550 ProcessMarkedContent(Obj); |
1582 m_pPreTextObj = pTextObj; | 1551 m_pPreTextObj = pTextObj; |
1583 m_perMatrix.Copy(formMatrix); | 1552 m_perMatrix.Copy(formMatrix); |
1584 return; | 1553 return; |
1585 } | 1554 } |
1586 m_pPreTextObj = pTextObj; | 1555 m_pPreTextObj = pTextObj; |
1587 m_perMatrix.Copy(formMatrix); | 1556 m_perMatrix.Copy(formMatrix); |
1588 int nItems = pTextObj->CountItems(); | 1557 int nItems = pTextObj->CountItems(); |
1589 FX_FLOAT baseSpace = _CalculateBaseSpace(pTextObj, matrix); | 1558 FX_FLOAT baseSpace = _CalculateBaseSpace(pTextObj, matrix); |
1590 | 1559 |
1591 const FX_BOOL bR2L = IsRightToLeft(pTextObj, pFont, nItems); | 1560 const FX_BOOL bR2L = IsRightToLeft(pTextObj, pFont, nItems); |
1592 const FX_BOOL bIsBidiAndMirrorInverse = | 1561 const FX_BOOL bIsBidiAndMirrorInverse = |
1593 bR2L && (matrix.a * matrix.d - matrix.b * matrix.c) < 0; | 1562 bR2L && (matrix.a * matrix.d - matrix.b * matrix.c) < 0; |
1594 int32_t iBufStartAppend = m_TempTextBuf.GetLength(); | 1563 int32_t iBufStartAppend = m_TempTextBuf.GetLength(); |
1595 int32_t iCharListStartAppend = m_TempCharList.GetSize(); | 1564 int32_t iCharListStartAppend = |
1565 pdfium::CollectionSize<int32_t>(m_TempCharList); | |
1596 | 1566 |
1597 FX_FLOAT spacing = 0; | 1567 FX_FLOAT spacing = 0; |
1598 for (int i = 0; i < nItems; i++) { | 1568 for (int i = 0; i < nItems; i++) { |
1599 CPDF_TextObjectItem item; | 1569 CPDF_TextObjectItem item; |
1600 PAGECHAR_INFO charinfo; | 1570 PAGECHAR_INFO charinfo; |
1601 charinfo.m_OriginX = 0; | 1571 charinfo.m_OriginX = 0; |
1602 charinfo.m_OriginY = 0; | 1572 charinfo.m_OriginY = 0; |
1603 pTextObj->GetItemInfo(i, &item); | 1573 pTextObj->GetItemInfo(i, &item); |
1604 if (item.m_CharCode == (FX_DWORD)-1) { | 1574 if (item.m_CharCode == (FX_DWORD)-1) { |
1605 CFX_WideString str = m_TempTextBuf.GetWideString(); | 1575 CFX_WideString str = m_TempTextBuf.GetWideString(); |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1647 charinfo.m_pTextObj = pTextObj; | 1617 charinfo.m_pTextObj = pTextObj; |
1648 charinfo.m_Index = m_TextBuf.GetLength(); | 1618 charinfo.m_Index = m_TextBuf.GetLength(); |
1649 m_TempTextBuf.AppendChar(TEXT_BLANK_CHAR); | 1619 m_TempTextBuf.AppendChar(TEXT_BLANK_CHAR); |
1650 charinfo.m_CharCode = -1; | 1620 charinfo.m_CharCode = -1; |
1651 charinfo.m_Matrix.Copy(formMatrix); | 1621 charinfo.m_Matrix.Copy(formMatrix); |
1652 matrix.Transform(item.m_OriginX, item.m_OriginY, charinfo.m_OriginX, | 1622 matrix.Transform(item.m_OriginX, item.m_OriginY, charinfo.m_OriginX, |
1653 charinfo.m_OriginY); | 1623 charinfo.m_OriginY); |
1654 charinfo.m_CharBox = | 1624 charinfo.m_CharBox = |
1655 CFX_FloatRect(charinfo.m_OriginX, charinfo.m_OriginY, | 1625 CFX_FloatRect(charinfo.m_OriginX, charinfo.m_OriginY, |
1656 charinfo.m_OriginX, charinfo.m_OriginY); | 1626 charinfo.m_OriginX, charinfo.m_OriginY); |
1657 m_TempCharList.Add(charinfo); | 1627 m_TempCharList.push_back(charinfo); |
1658 } | 1628 } |
1659 if (item.m_CharCode == (FX_DWORD)-1) { | 1629 if (item.m_CharCode == (FX_DWORD)-1) { |
1660 continue; | 1630 continue; |
1661 } | 1631 } |
1662 } | 1632 } |
1663 spacing = 0; | 1633 spacing = 0; |
1664 CFX_WideString wstrItem = pFont->UnicodeFromCharCode(item.m_CharCode); | 1634 CFX_WideString wstrItem = pFont->UnicodeFromCharCode(item.m_CharCode); |
1665 FX_BOOL bNoUnicode = FALSE; | 1635 FX_BOOL bNoUnicode = FALSE; |
1666 FX_WCHAR wChar = wstrItem.GetAt(0); | 1636 FX_WCHAR wChar = wstrItem.GetAt(0); |
1667 if ((wstrItem.IsEmpty() || wChar == 0) && item.m_CharCode) { | 1637 if ((wstrItem.IsEmpty() || wChar == 0) && item.m_CharCode) { |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1699 charinfo.m_CharBox.bottom + pTextObj->GetFontSize(); | 1669 charinfo.m_CharBox.bottom + pTextObj->GetFontSize(); |
1700 } | 1670 } |
1701 if (fabsf(charinfo.m_CharBox.right - charinfo.m_CharBox.left) < 0.01f) { | 1671 if (fabsf(charinfo.m_CharBox.right - charinfo.m_CharBox.left) < 0.01f) { |
1702 charinfo.m_CharBox.right = | 1672 charinfo.m_CharBox.right = |
1703 charinfo.m_CharBox.left + pTextObj->GetCharWidth(charinfo.m_CharCode); | 1673 charinfo.m_CharBox.left + pTextObj->GetCharWidth(charinfo.m_CharCode); |
1704 } | 1674 } |
1705 matrix.TransformRect(charinfo.m_CharBox); | 1675 matrix.TransformRect(charinfo.m_CharBox); |
1706 charinfo.m_Matrix.Copy(matrix); | 1676 charinfo.m_Matrix.Copy(matrix); |
1707 if (wstrItem.IsEmpty()) { | 1677 if (wstrItem.IsEmpty()) { |
1708 charinfo.m_Unicode = 0; | 1678 charinfo.m_Unicode = 0; |
1709 m_TempCharList.Add(charinfo); | 1679 m_TempCharList.push_back(charinfo); |
1710 m_TempTextBuf.AppendChar(0xfffe); | 1680 m_TempTextBuf.AppendChar(0xfffe); |
1711 continue; | 1681 continue; |
1712 } else { | 1682 } else { |
1713 int nTotal = wstrItem.GetLength(); | 1683 int nTotal = wstrItem.GetLength(); |
1714 FX_BOOL bDel = FALSE; | 1684 FX_BOOL bDel = FALSE; |
1715 const int count = std::min(m_TempCharList.GetSize(), 7); | 1685 const int count = |
1686 std::min(pdfium::CollectionSize<int>(m_TempCharList), 7); | |
1716 FX_FLOAT threshold = charinfo.m_Matrix.TransformXDistance( | 1687 FX_FLOAT threshold = charinfo.m_Matrix.TransformXDistance( |
1717 (FX_FLOAT)TEXT_CHARRATIO_GAPDELTA * pTextObj->GetFontSize()); | 1688 (FX_FLOAT)TEXT_CHARRATIO_GAPDELTA * pTextObj->GetFontSize()); |
1718 for (int n = m_TempCharList.GetSize(); | 1689 for (int n = pdfium::CollectionSize<int>(m_TempCharList); |
1719 n > m_TempCharList.GetSize() - count; n--) { | 1690 n > pdfium::CollectionSize<int>(m_TempCharList) - count; n--) { |
1720 PAGECHAR_INFO* charinfo1 = (PAGECHAR_INFO*)m_TempCharList.GetAt(n - 1); | 1691 PAGECHAR_INFO* charinfo1 = &m_TempCharList[n - 1]; |
Lei Zhang
2016/01/25 22:11:15
const ref instead?
Tom Sepez
2016/01/25 23:04:47
Done.
| |
1721 if (charinfo1->m_CharCode == charinfo.m_CharCode && | 1692 if (charinfo1->m_CharCode == charinfo.m_CharCode && |
1722 charinfo1->m_pTextObj->GetFont() == | 1693 charinfo1->m_pTextObj->GetFont() == |
1723 charinfo.m_pTextObj->GetFont() && | 1694 charinfo.m_pTextObj->GetFont() && |
1724 FXSYS_fabs(charinfo1->m_OriginX - charinfo.m_OriginX) < threshold && | 1695 FXSYS_fabs(charinfo1->m_OriginX - charinfo.m_OriginX) < threshold && |
1725 FXSYS_fabs(charinfo1->m_OriginY - charinfo.m_OriginY) < threshold) { | 1696 FXSYS_fabs(charinfo1->m_OriginY - charinfo.m_OriginY) < threshold) { |
1726 bDel = TRUE; | 1697 bDel = TRUE; |
1727 break; | 1698 break; |
1728 } | 1699 } |
1729 } | 1700 } |
1730 if (!bDel) { | 1701 if (!bDel) { |
1731 for (int nIndex = 0; nIndex < nTotal; nIndex++) { | 1702 for (int nIndex = 0; nIndex < nTotal; nIndex++) { |
1732 charinfo.m_Unicode = wstrItem.GetAt(nIndex); | 1703 charinfo.m_Unicode = wstrItem.GetAt(nIndex); |
1733 if (charinfo.m_Unicode) { | 1704 if (charinfo.m_Unicode) { |
1734 charinfo.m_Index = m_TextBuf.GetLength(); | 1705 charinfo.m_Index = m_TextBuf.GetLength(); |
1735 m_TempTextBuf.AppendChar(charinfo.m_Unicode); | 1706 m_TempTextBuf.AppendChar(charinfo.m_Unicode); |
1736 } else { | 1707 } else { |
1737 m_TempTextBuf.AppendChar(0xfffe); | 1708 m_TempTextBuf.AppendChar(0xfffe); |
1738 } | 1709 } |
1739 m_TempCharList.Add(charinfo); | 1710 m_TempCharList.push_back(charinfo); |
1740 } | 1711 } |
1741 } else if (i == 0) { | 1712 } else if (i == 0) { |
1742 CFX_WideString str = m_TempTextBuf.GetWideString(); | 1713 CFX_WideString str = m_TempTextBuf.GetWideString(); |
1743 if (!str.IsEmpty() && | 1714 if (!str.IsEmpty() && |
1744 str.GetAt(str.GetLength() - 1) == TEXT_BLANK_CHAR) { | 1715 str.GetAt(str.GetLength() - 1) == TEXT_BLANK_CHAR) { |
1745 m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1); | 1716 m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1); |
1746 m_TempCharList.Delete(m_TempCharList.GetSize() - 1); | 1717 m_TempCharList.pop_back(); |
1747 } | 1718 } |
1748 } | 1719 } |
1749 } | 1720 } |
1750 } | 1721 } |
1751 if (bIsBidiAndMirrorInverse) { | 1722 if (bIsBidiAndMirrorInverse) { |
1752 SwapTempTextBuf(iCharListStartAppend, iBufStartAppend); | 1723 SwapTempTextBuf(iCharListStartAppend, iBufStartAppend); |
1753 } | 1724 } |
1754 } | 1725 } |
1755 int32_t CPDF_TextPage::GetTextObjectWritingMode( | 1726 int32_t CPDF_TextPage::GetTextObjectWritingMode( |
1756 const CPDF_TextObject* pTextObj) { | 1727 const CPDF_TextObject* pTextObj) { |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1795 if (0x2D == wcTmp || 0xAD == wcTmp) { | 1766 if (0x2D == wcTmp || 0xAD == wcTmp) { |
1796 if (--nIndex > 0) { | 1767 if (--nIndex > 0) { |
1797 FX_WCHAR preChar = strCurText.GetAt((nIndex)); | 1768 FX_WCHAR preChar = strCurText.GetAt((nIndex)); |
1798 if (((preChar >= L'A' && preChar <= L'Z') || | 1769 if (((preChar >= L'A' && preChar <= L'Z') || |
1799 (preChar >= L'a' && preChar <= L'z')) && | 1770 (preChar >= L'a' && preChar <= L'z')) && |
1800 ((curChar >= L'A' && curChar <= L'Z') || | 1771 ((curChar >= L'A' && curChar <= L'Z') || |
1801 (curChar >= L'a' && curChar <= L'z'))) { | 1772 (curChar >= L'a' && curChar <= L'z'))) { |
1802 return TRUE; | 1773 return TRUE; |
1803 } | 1774 } |
1804 } | 1775 } |
1805 int size = m_TempCharList.GetSize(); | |
1806 PAGECHAR_INFO preChar; | 1776 PAGECHAR_INFO preChar; |
Lei Zhang
2016/01/25 22:11:16
cosnt PAGECHAR_INFO* ?
Tom Sepez
2016/01/25 23:04:47
Const ptr, also renamed since preChar is also a wc
| |
1807 if (size) { | 1777 if (!m_TempCharList.empty()) { |
1808 preChar = (PAGECHAR_INFO)m_TempCharList[size - 1]; | 1778 preChar = m_TempCharList.back(); |
1779 } else if (!m_CharList.empty()) { | |
1780 preChar = m_CharList.back(); | |
1809 } else { | 1781 } else { |
1810 size = m_charList.GetSize(); | 1782 return FALSE; |
1811 if (size == 0) { | |
1812 return FALSE; | |
1813 } | |
1814 preChar = (PAGECHAR_INFO)m_charList[size - 1]; | |
1815 } | 1783 } |
1816 if (FPDFTEXT_CHAR_PIECE == preChar.m_Flag && | 1784 if (FPDFTEXT_CHAR_PIECE == preChar.m_Flag && |
1817 (0xAD == preChar.m_Unicode || 0x2D == preChar.m_Unicode)) { | 1785 (0xAD == preChar.m_Unicode || 0x2D == preChar.m_Unicode)) { |
1818 return TRUE; | 1786 return TRUE; |
1819 } | 1787 } |
1820 } | 1788 } |
1821 return FALSE; | 1789 return FALSE; |
1822 } | 1790 } |
1823 int CPDF_TextPage::ProcessInsertObject(const CPDF_TextObject* pObj, | 1791 int CPDF_TextPage::ProcessInsertObject(const CPDF_TextObject* pObj, |
1824 const CFX_Matrix& formMatrix) { | 1792 const CFX_Matrix& formMatrix) { |
(...skipping 156 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1981 if (!pTextObj1 || !pTextObj2) { | 1949 if (!pTextObj1 || !pTextObj2) { |
1982 return FALSE; | 1950 return FALSE; |
1983 } | 1951 } |
1984 CFX_FloatRect rcPreObj(pTextObj2->m_Left, pTextObj2->m_Bottom, | 1952 CFX_FloatRect rcPreObj(pTextObj2->m_Left, pTextObj2->m_Bottom, |
1985 pTextObj2->m_Right, pTextObj2->m_Top); | 1953 pTextObj2->m_Right, pTextObj2->m_Top); |
1986 CFX_FloatRect rcCurObj(pTextObj1->m_Left, pTextObj1->m_Bottom, | 1954 CFX_FloatRect rcCurObj(pTextObj1->m_Left, pTextObj1->m_Bottom, |
1987 pTextObj1->m_Right, pTextObj1->m_Top); | 1955 pTextObj1->m_Right, pTextObj1->m_Top); |
1988 if (rcPreObj.IsEmpty() && rcCurObj.IsEmpty() && | 1956 if (rcPreObj.IsEmpty() && rcCurObj.IsEmpty() && |
1989 !m_ParseOptions.m_bGetCharCodeOnly) { | 1957 !m_ParseOptions.m_bGetCharCodeOnly) { |
1990 FX_FLOAT dbXdif = FXSYS_fabs(rcPreObj.left - rcCurObj.left); | 1958 FX_FLOAT dbXdif = FXSYS_fabs(rcPreObj.left - rcCurObj.left); |
1991 int nCount = m_charList.GetSize(); | 1959 size_t nCount = m_CharList.size(); |
1992 if (nCount >= 2) { | 1960 if (nCount >= 2) { |
1993 PAGECHAR_INFO perCharTemp = (PAGECHAR_INFO)m_charList[nCount - 2]; | 1961 PAGECHAR_INFO perCharTemp = m_CharList[nCount - 2]; |
1994 FX_FLOAT dbSpace = perCharTemp.m_CharBox.Width(); | 1962 FX_FLOAT dbSpace = perCharTemp.m_CharBox.Width(); |
1995 if (dbXdif > dbSpace) { | 1963 if (dbXdif > dbSpace) { |
1996 return FALSE; | 1964 return FALSE; |
1997 } | 1965 } |
1998 } | 1966 } |
1999 } | 1967 } |
2000 if (!rcPreObj.IsEmpty() || !rcCurObj.IsEmpty()) { | 1968 if (!rcPreObj.IsEmpty() || !rcCurObj.IsEmpty()) { |
2001 rcPreObj.Intersect(rcCurObj); | 1969 rcPreObj.Intersect(rcCurObj); |
2002 if (rcPreObj.IsEmpty()) { | 1970 if (rcPreObj.IsEmpty()) { |
2003 return FALSE; | 1971 return FALSE; |
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2054 } | 2022 } |
2055 if (IsSameTextObject((CPDF_TextObject*)pObj, pTextObj)) { | 2023 if (IsSameTextObject((CPDF_TextObject*)pObj, pTextObj)) { |
2056 return TRUE; | 2024 return TRUE; |
2057 } | 2025 } |
2058 i++; | 2026 i++; |
2059 } | 2027 } |
2060 return FALSE; | 2028 return FALSE; |
2061 } | 2029 } |
2062 | 2030 |
2063 FX_BOOL CPDF_TextPage::GenerateCharInfo(FX_WCHAR unicode, PAGECHAR_INFO& info) { | 2031 FX_BOOL CPDF_TextPage::GenerateCharInfo(FX_WCHAR unicode, PAGECHAR_INFO& info) { |
2064 int size = m_TempCharList.GetSize(); | |
2065 PAGECHAR_INFO preChar; | 2032 PAGECHAR_INFO preChar; |
Lei Zhang
2016/01/25 22:11:15
Ditto, const PAGECHAR_INFO*
| |
2066 if (size) { | 2033 if (!m_TempCharList.empty()) { |
2067 preChar = (PAGECHAR_INFO)m_TempCharList[size - 1]; | 2034 preChar = m_TempCharList.back(); |
2035 } else if (!m_CharList.empty()) { | |
2036 preChar = m_CharList.back(); | |
2068 } else { | 2037 } else { |
2069 size = m_charList.GetSize(); | 2038 return FALSE; |
2070 if (size == 0) { | |
2071 return FALSE; | |
2072 } | |
2073 preChar = (PAGECHAR_INFO)m_charList[size - 1]; | |
2074 } | 2039 } |
2075 info.m_Index = m_TextBuf.GetLength(); | 2040 info.m_Index = m_TextBuf.GetLength(); |
2076 info.m_Unicode = unicode; | 2041 info.m_Unicode = unicode; |
2077 info.m_pTextObj = NULL; | 2042 info.m_pTextObj = NULL; |
2078 info.m_CharCode = -1; | 2043 info.m_CharCode = -1; |
2079 info.m_Flag = FPDFTEXT_CHAR_GENERATED; | 2044 info.m_Flag = FPDFTEXT_CHAR_GENERATED; |
2080 int preWidth = 0; | 2045 int preWidth = 0; |
2081 if (preChar.m_pTextObj && preChar.m_CharCode != (FX_DWORD)-1) | 2046 if (preChar.m_pTextObj && preChar.m_CharCode != (FX_DWORD)-1) |
2082 preWidth = GetCharWidth(preChar.m_CharCode, preChar.m_pTextObj->GetFont()); | 2047 preWidth = GetCharWidth(preChar.m_CharCode, preChar.m_pTextObj->GetFont()); |
2083 | 2048 |
(...skipping 634 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2718 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { | 2683 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { |
2719 return; | 2684 return; |
2720 } | 2685 } |
2721 CPDF_LinkExt* link = NULL; | 2686 CPDF_LinkExt* link = NULL; |
2722 link = m_LinkList.GetAt(index); | 2687 link = m_LinkList.GetAt(index); |
2723 if (!link) { | 2688 if (!link) { |
2724 return; | 2689 return; |
2725 } | 2690 } |
2726 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); | 2691 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); |
2727 } | 2692 } |
OLD | NEW |