Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(268)

Side by Side Diff: core/src/fpdftext/fpdf_text_int.cpp

Issue 1695633004: Fix the way to access marked content. (Closed) Base URL: https://pdfium.googlesource.com/pdfium.git@master
Patch Set: address comments Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2014 PDFium Authors. All rights reserved. 1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 6
7 #include "core/src/fpdftext/text_int.h" 7 #include "core/src/fpdftext/text_int.h"
8 8
9 #include <algorithm> 9 #include <algorithm>
10 #include <cctype> 10 #include <cctype>
(...skipping 116 matching lines...) Expand 10 before | Expand all | Expand 10 after
127 case 0x94: 127 case 0x94:
128 case 0x96: 128 case 0x96:
129 case 0x97: 129 case 0x97:
130 case 0x98: 130 case 0x98:
131 case 0xfffe: 131 case 0xfffe:
132 return charInfo.m_Flag != FPDFTEXT_CHAR_HYPHEN; 132 return charInfo.m_Flag != FPDFTEXT_CHAR_HYPHEN;
133 default: 133 default:
134 return false; 134 return false;
135 } 135 }
136 } 136 }
137
137 FX_BOOL CPDF_TextPage::ParseTextPage() { 138 FX_BOOL CPDF_TextPage::ParseTextPage() {
138 m_bIsParsed = false; 139 m_bIsParsed = false;
139 if (!m_pPage) 140 if (!m_pPage)
140 return FALSE; 141 return FALSE;
141 142
142 m_TextBuf.Clear(); 143 m_TextBuf.Clear();
143 m_CharList.clear(); 144 m_CharList.clear();
144 m_pPreTextObj = NULL; 145 m_pPreTextObj = NULL;
145 ProcessObject(); 146 ProcessObject();
146 m_bIsParsed = true; 147 m_bIsParsed = true;
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
179 m_CharIndex.push_back(i + 1); 180 m_CharIndex.push_back(i + 1);
180 } 181 }
181 } 182 }
182 } 183 }
183 int indexSize = pdfium::CollectionSize<int>(m_CharIndex); 184 int indexSize = pdfium::CollectionSize<int>(m_CharIndex);
184 if (indexSize % 2) { 185 if (indexSize % 2) {
185 m_CharIndex.erase(m_CharIndex.begin() + indexSize - 1); 186 m_CharIndex.erase(m_CharIndex.begin() + indexSize - 1);
186 } 187 }
187 return TRUE; 188 return TRUE;
188 } 189 }
190
189 int CPDF_TextPage::CountChars() const { 191 int CPDF_TextPage::CountChars() const {
190 return pdfium::CollectionSize<int>(m_CharList); 192 return pdfium::CollectionSize<int>(m_CharList);
191 } 193 }
194
192 int CPDF_TextPage::CharIndexFromTextIndex(int TextIndex) const { 195 int CPDF_TextPage::CharIndexFromTextIndex(int TextIndex) const {
193 int indexSize = pdfium::CollectionSize<int>(m_CharIndex); 196 int indexSize = pdfium::CollectionSize<int>(m_CharIndex);
194 int count = 0; 197 int count = 0;
195 for (int i = 0; i < indexSize; i += 2) { 198 for (int i = 0; i < indexSize; i += 2) {
196 count += m_CharIndex[i + 1]; 199 count += m_CharIndex[i + 1];
197 if (count > TextIndex) 200 if (count > TextIndex)
198 return TextIndex - count + m_CharIndex[i + 1] + m_CharIndex[i]; 201 return TextIndex - count + m_CharIndex[i + 1] + m_CharIndex[i];
199 } 202 }
200 return -1; 203 return -1;
201 } 204 }
205
202 int CPDF_TextPage::TextIndexFromCharIndex(int CharIndex) const { 206 int CPDF_TextPage::TextIndexFromCharIndex(int CharIndex) const {
203 int indexSize = pdfium::CollectionSize<int>(m_CharIndex); 207 int indexSize = pdfium::CollectionSize<int>(m_CharIndex);
204 int count = 0; 208 int count = 0;
205 for (int i = 0; i < indexSize; i += 2) { 209 for (int i = 0; i < indexSize; i += 2) {
206 count += m_CharIndex[i + 1]; 210 count += m_CharIndex[i + 1];
207 if (m_CharIndex[i + 1] + m_CharIndex[i] > CharIndex) { 211 if (m_CharIndex[i + 1] + m_CharIndex[i] > CharIndex) {
208 if (CharIndex - m_CharIndex[i] < 0) 212 if (CharIndex - m_CharIndex[i] < 0)
209 return -1; 213 return -1;
210 214
211 return CharIndex - m_CharIndex[i] + count - m_CharIndex[i + 1]; 215 return CharIndex - m_CharIndex[i] + count - m_CharIndex[i + 1];
212 } 216 }
213 } 217 }
214 return -1; 218 return -1;
215 } 219 }
220
216 void CPDF_TextPage::GetRectArray(int start, 221 void CPDF_TextPage::GetRectArray(int start,
217 int nCount, 222 int nCount,
218 CFX_RectArray& rectArray) const { 223 CFX_RectArray& rectArray) const {
219 if (start < 0 || nCount == 0) { 224 if (start < 0 || nCount == 0) {
220 return; 225 return;
221 } 226 }
222 if (!m_bIsParsed) { 227 if (!m_bIsParsed) {
223 return; 228 return;
224 } 229 }
225 CPDF_TextObject* pCurObj = NULL; 230 CPDF_TextObject* pCurObj = NULL;
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after
291 if (rect.top < info_curchar.m_CharBox.top) { 296 if (rect.top < info_curchar.m_CharBox.top) {
292 rect.top = info_curchar.m_CharBox.top; 297 rect.top = info_curchar.m_CharBox.top;
293 } 298 }
294 if (rect.bottom > info_curchar.m_CharBox.bottom) { 299 if (rect.bottom > info_curchar.m_CharBox.bottom) {
295 rect.bottom = info_curchar.m_CharBox.bottom; 300 rect.bottom = info_curchar.m_CharBox.bottom;
296 } 301 }
297 } 302 }
298 } 303 }
299 rectArray.Add(rect); 304 rectArray.Add(rect);
300 } 305 }
306
301 int CPDF_TextPage::GetIndexAtPos(CPDF_Point point, 307 int CPDF_TextPage::GetIndexAtPos(CPDF_Point point,
302 FX_FLOAT xTolerance, 308 FX_FLOAT xTolerance,
303 FX_FLOAT yTolerance) const { 309 FX_FLOAT yTolerance) const {
304 if (!m_bIsParsed) 310 if (!m_bIsParsed)
305 return -3; 311 return -3;
306 312
307 int pos = 0; 313 int pos = 0;
308 int NearPos = -1; 314 int NearPos = -1;
309 double xdif = 5000; 315 double xdif = 5000;
310 double ydif = 5000; 316 double ydif = 5000;
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after
371 IsContainPreChar = false; 377 IsContainPreChar = false;
372 IsAddLineFeed = false; 378 IsAddLineFeed = false;
373 } 379 }
374 } else { 380 } else {
375 IsContainPreChar = false; 381 IsContainPreChar = false;
376 IsAddLineFeed = true; 382 IsAddLineFeed = true;
377 } 383 }
378 } 384 }
379 return strText; 385 return strText;
380 } 386 }
387
381 void CPDF_TextPage::GetRectsArrayByRect(const CFX_FloatRect& rect, 388 void CPDF_TextPage::GetRectsArrayByRect(const CFX_FloatRect& rect,
382 CFX_RectArray& resRectArray) const { 389 CFX_RectArray& resRectArray) const {
383 if (!m_bIsParsed) 390 if (!m_bIsParsed)
384 return; 391 return;
385 392
386 CFX_FloatRect curRect; 393 CFX_FloatRect curRect;
387 bool flagNewRect = true; 394 bool flagNewRect = true;
388 CPDF_TextObject* pCurObj = nullptr; 395 CPDF_TextObject* pCurObj = nullptr;
389 for (auto info_curchar : m_CharList) { 396 for (auto info_curchar : m_CharList) {
390 if (info_curchar.m_Flag == FPDFTEXT_CHAR_GENERATED) { 397 if (info_curchar.m_Flag == FPDFTEXT_CHAR_GENERATED) {
(...skipping 17 matching lines...) Expand all
408 } else { 415 } else {
409 info_curchar.m_CharBox.Normalize(); 416 info_curchar.m_CharBox.Normalize();
410 curRect.left = std::min(curRect.left, info_curchar.m_CharBox.left); 417 curRect.left = std::min(curRect.left, info_curchar.m_CharBox.left);
411 curRect.bottom = std::min(curRect.bottom, info_curchar.m_CharBox.bottom); 418 curRect.bottom = std::min(curRect.bottom, info_curchar.m_CharBox.bottom);
412 curRect.right = std::max(curRect.right, info_curchar.m_CharBox.right); 419 curRect.right = std::max(curRect.right, info_curchar.m_CharBox.right);
413 curRect.top = std::max(curRect.top, info_curchar.m_CharBox.top); 420 curRect.top = std::max(curRect.top, info_curchar.m_CharBox.top);
414 } 421 }
415 } 422 }
416 resRectArray.Add(curRect); 423 resRectArray.Add(curRect);
417 } 424 }
425
418 int CPDF_TextPage::GetIndexAtPos(FX_FLOAT x, 426 int CPDF_TextPage::GetIndexAtPos(FX_FLOAT x,
419 FX_FLOAT y, 427 FX_FLOAT y,
420 FX_FLOAT xTolerance, 428 FX_FLOAT xTolerance,
421 FX_FLOAT yTolerance) const { 429 FX_FLOAT yTolerance) const {
422 CPDF_Point point(x, y); 430 CPDF_Point point(x, y);
423 return GetIndexAtPos(point, xTolerance, yTolerance); 431 return GetIndexAtPos(point, xTolerance, yTolerance);
424 } 432 }
425 433
426 void CPDF_TextPage::GetCharInfo(int index, FPDF_CHAR_INFO* info) const { 434 void CPDF_TextPage::GetCharInfo(int index, FPDF_CHAR_INFO* info) const {
427 if (!m_bIsParsed) 435 if (!m_bIsParsed)
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after
476 endIndex++; 484 endIndex++;
477 if (endIndex >= pdfium::CollectionSize<int>(m_CharList)) { 485 if (endIndex >= pdfium::CollectionSize<int>(m_CharList)) {
478 break; 486 break;
479 } 487 }
480 charinfo3 = m_CharList[endIndex]; 488 charinfo3 = m_CharList[endIndex];
481 } 489 }
482 endIndex--; 490 endIndex--;
483 nCount = endIndex - start + 1; 491 nCount = endIndex - start + 1;
484 } 492 }
485 } 493 }
494
486 CFX_WideString CPDF_TextPage::GetPageText(int start, int nCount) const { 495 CFX_WideString CPDF_TextPage::GetPageText(int start, int nCount) const {
487 if (!m_bIsParsed || nCount == 0) 496 if (!m_bIsParsed || nCount == 0)
488 return L""; 497 return L"";
489 498
490 if (start < 0) 499 if (start < 0)
491 start = 0; 500 start = 0;
492 501
493 if (nCount == -1) { 502 if (nCount == -1) {
494 nCount = pdfium::CollectionSize<int>(m_CharList) - start; 503 nCount = pdfium::CollectionSize<int>(m_CharList) - start;
495 return m_TextBuf.GetWideString().Mid(start, 504 return m_TextBuf.GetWideString().Mid(start,
(...skipping 29 matching lines...) Expand all
525 return L""; 534 return L"";
526 } 535 }
527 charinfo = m_CharList[start + nCount - nCountOffset - 1]; 536 charinfo = m_CharList[start + nCount - nCountOffset - 1];
528 } 537 }
529 nCount = start + nCount - nCountOffset - startindex; 538 nCount = start + nCount - nCountOffset - startindex;
530 if (nCount <= 0) { 539 if (nCount <= 0) {
531 return L""; 540 return L"";
532 } 541 }
533 return m_TextBuf.GetWideString().Mid(startindex, nCount); 542 return m_TextBuf.GetWideString().Mid(startindex, nCount);
534 } 543 }
544
535 int CPDF_TextPage::CountRects(int start, int nCount) { 545 int CPDF_TextPage::CountRects(int start, int nCount) {
536 if (!m_bIsParsed || start < 0) 546 if (!m_bIsParsed || start < 0)
537 return -1; 547 return -1;
538 548
539 if (nCount == -1 || 549 if (nCount == -1 ||
540 nCount + start > pdfium::CollectionSize<int>(m_CharList)) { 550 nCount + start > pdfium::CollectionSize<int>(m_CharList)) {
541 nCount = pdfium::CollectionSize<int>(m_CharList) - start; 551 nCount = pdfium::CollectionSize<int>(m_CharList) - start;
542 } 552 }
543 m_SelRects.RemoveAll(); 553 m_SelRects.RemoveAll();
544 GetRectArray(start, nCount, m_SelRects); 554 GetRectArray(start, nCount, m_SelRects);
545 return m_SelRects.GetSize(); 555 return m_SelRects.GetSize();
546 } 556 }
557
547 void CPDF_TextPage::GetRect(int rectIndex, 558 void CPDF_TextPage::GetRect(int rectIndex,
548 FX_FLOAT& left, 559 FX_FLOAT& left,
549 FX_FLOAT& top, 560 FX_FLOAT& top,
550 FX_FLOAT& right, 561 FX_FLOAT& right,
551 FX_FLOAT& bottom) const { 562 FX_FLOAT& bottom) const {
552 if (!m_bIsParsed) 563 if (!m_bIsParsed)
553 return; 564 return;
554 565
555 if (rectIndex < 0 || rectIndex >= m_SelRects.GetSize()) 566 if (rectIndex < 0 || rectIndex >= m_SelRects.GetSize())
556 return; 567 return;
(...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after
618 FX_BOOL CPDF_TextPage::GetBaselineRotate(int rectIndex, int& Rotate) { 629 FX_BOOL CPDF_TextPage::GetBaselineRotate(int rectIndex, int& Rotate) {
619 if (!m_bIsParsed) 630 if (!m_bIsParsed)
620 return FALSE; 631 return FALSE;
621 632
622 if (rectIndex < 0 || rectIndex >= m_SelRects.GetSize()) 633 if (rectIndex < 0 || rectIndex >= m_SelRects.GetSize())
623 return FALSE; 634 return FALSE;
624 635
625 CFX_FloatRect rect = m_SelRects.GetAt(rectIndex); 636 CFX_FloatRect rect = m_SelRects.GetAt(rectIndex);
626 return GetBaselineRotate(rect, Rotate); 637 return GetBaselineRotate(rect, Rotate);
627 } 638 }
639
628 int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left, 640 int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left,
629 FX_FLOAT top, 641 FX_FLOAT top,
630 FX_FLOAT right, 642 FX_FLOAT right,
631 FX_FLOAT bottom, 643 FX_FLOAT bottom,
632 FX_BOOL bContains) { 644 FX_BOOL bContains) {
633 m_Segments.RemoveAll(); 645 m_Segments.RemoveAll();
634 if (!m_bIsParsed) 646 if (!m_bIsParsed)
635 return -1; 647 return -1;
636 648
637 CFX_FloatRect rect(left, bottom, right, top); 649 CFX_FloatRect rect(left, bottom, right, top);
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after
695 pos++; 707 pos++;
696 } 708 }
697 if (segmentStatus == 1) { 709 if (segmentStatus == 1) {
698 segmentStatus = 2; 710 segmentStatus = 2;
699 m_Segments.Add(segment); 711 m_Segments.Add(segment);
700 segment.m_Start = 0; 712 segment.m_Start = 0;
701 segment.m_nCount = 0; 713 segment.m_nCount = 0;
702 } 714 }
703 return m_Segments.GetSize(); 715 return m_Segments.GetSize();
704 } 716 }
717
705 void CPDF_TextPage::GetBoundedSegment(int index, int& start, int& count) const { 718 void CPDF_TextPage::GetBoundedSegment(int index, int& start, int& count) const {
706 if (index < 0 || index >= m_Segments.GetSize()) { 719 if (index < 0 || index >= m_Segments.GetSize()) {
707 return; 720 return;
708 } 721 }
709 start = m_Segments.GetAt(index).m_Start; 722 start = m_Segments.GetAt(index).m_Start;
710 count = m_Segments.GetAt(index).m_nCount; 723 count = m_Segments.GetAt(index).m_nCount;
711 } 724 }
725
712 int CPDF_TextPage::GetWordBreak(int index, int direction) const { 726 int CPDF_TextPage::GetWordBreak(int index, int direction) const {
713 if (!m_bIsParsed) 727 if (!m_bIsParsed)
714 return -1; 728 return -1;
715 729
716 if (direction != FPDFTEXT_LEFT && direction != FPDFTEXT_RIGHT) 730 if (direction != FPDFTEXT_LEFT && direction != FPDFTEXT_RIGHT)
717 return -1; 731 return -1;
718 732
719 if (index < 0 || index >= pdfium::CollectionSize<int>(m_CharList)) 733 if (index < 0 || index >= pdfium::CollectionSize<int>(m_CharList))
720 return -1; 734 return -1;
721 735
(...skipping 11 matching lines...) Expand all
733 break; 747 break;
734 } 748 }
735 } else if (direction == FPDFTEXT_RIGHT) { 749 } else if (direction == FPDFTEXT_RIGHT) {
736 while (++breakPos < pdfium::CollectionSize<int>(m_CharList)) { 750 while (++breakPos < pdfium::CollectionSize<int>(m_CharList)) {
737 if (!IsLetter(m_CharList[breakPos].m_Unicode)) 751 if (!IsLetter(m_CharList[breakPos].m_Unicode))
738 break; 752 break;
739 } 753 }
740 } 754 }
741 return breakPos; 755 return breakPos;
742 } 756 }
757
743 int32_t CPDF_TextPage::FindTextlineFlowDirection() { 758 int32_t CPDF_TextPage::FindTextlineFlowDirection() {
744 if (!m_pPage) { 759 if (!m_pPage) {
745 return -1; 760 return -1;
746 } 761 }
747 const int32_t nPageWidth = (int32_t)((CPDF_Page*)m_pPage)->GetPageWidth(); 762 const int32_t nPageWidth = (int32_t)((CPDF_Page*)m_pPage)->GetPageWidth();
748 const int32_t nPageHeight = (int32_t)((CPDF_Page*)m_pPage)->GetPageHeight(); 763 const int32_t nPageHeight = (int32_t)((CPDF_Page*)m_pPage)->GetPageHeight();
749 std::vector<uint8_t> nHorizontalMask(nPageWidth); 764 std::vector<uint8_t> nHorizontalMask(nPageWidth);
750 std::vector<uint8_t> nVerticalMask(nPageHeight); 765 std::vector<uint8_t> nVerticalMask(nPageHeight);
751 uint8_t* pDataH = nHorizontalMask.data(); 766 uint8_t* pDataH = nHorizontalMask.data();
752 uint8_t* pDataV = nVerticalMask.data(); 767 uint8_t* pDataV = nVerticalMask.data();
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after
830 return 0; 845 return 0;
831 } 846 }
832 if (nSumH - nSumV > 0.0f) { 847 if (nSumH - nSumV > 0.0f) {
833 return 0; 848 return 0;
834 } 849 }
835 if (nSumV - nSumH > 0.0f) { 850 if (nSumV - nSumH > 0.0f) {
836 return 1; 851 return 1;
837 } 852 }
838 return -1; 853 return -1;
839 } 854 }
855
840 void CPDF_TextPage::ProcessObject() { 856 void CPDF_TextPage::ProcessObject() {
841 CPDF_PageObject* pPageObj = NULL; 857 CPDF_PageObject* pPageObj = NULL;
842 if (!m_pPage) { 858 if (!m_pPage) {
843 return; 859 return;
844 } 860 }
845 FX_POSITION pos; 861 FX_POSITION pos;
846 pos = m_pPage->GetFirstObjectPosition(); 862 pos = m_pPage->GetFirstObjectPosition();
847 if (!pos) { 863 if (!pos) {
848 return; 864 return;
849 } 865 }
(...skipping 13 matching lines...) Expand all
863 } 879 }
864 pPageObj = NULL; 880 pPageObj = NULL;
865 } 881 }
866 int count = m_LineObj.GetSize(); 882 int count = m_LineObj.GetSize();
867 for (int i = 0; i < count; i++) { 883 for (int i = 0; i < count; i++) {
868 ProcessTextObject(m_LineObj.GetAt(i)); 884 ProcessTextObject(m_LineObj.GetAt(i));
869 } 885 }
870 m_LineObj.RemoveAll(); 886 m_LineObj.RemoveAll();
871 CloseTempLine(); 887 CloseTempLine();
872 } 888 }
889
873 void CPDF_TextPage::ProcessFormObject(CPDF_FormObject* pFormObj, 890 void CPDF_TextPage::ProcessFormObject(CPDF_FormObject* pFormObj,
874 const CFX_Matrix& formMatrix) { 891 const CFX_Matrix& formMatrix) {
875 CPDF_PageObject* pPageObj = NULL; 892 CPDF_PageObject* pPageObj = NULL;
876 FX_POSITION pos; 893 FX_POSITION pos;
877 if (!pFormObj) { 894 if (!pFormObj) {
878 return; 895 return;
879 } 896 }
880 pos = pFormObj->m_pForm->GetFirstObjectPosition(); 897 pos = pFormObj->m_pForm->GetFirstObjectPosition();
881 if (!pos) { 898 if (!pos) {
882 return; 899 return;
883 } 900 }
884 CFX_Matrix curFormMatrix; 901 CFX_Matrix curFormMatrix;
885 curFormMatrix.Copy(pFormObj->m_FormMatrix); 902 curFormMatrix.Copy(pFormObj->m_FormMatrix);
886 curFormMatrix.Concat(formMatrix); 903 curFormMatrix.Concat(formMatrix);
887 while (pos) { 904 while (pos) {
888 pPageObj = pFormObj->m_pForm->GetNextObject(pos); 905 pPageObj = pFormObj->m_pForm->GetNextObject(pos);
889 if (pPageObj) { 906 if (pPageObj) {
890 if (pPageObj->m_Type == CPDF_PageObject::TEXT) { 907 if (pPageObj->m_Type == CPDF_PageObject::TEXT) {
891 ProcessTextObject((CPDF_TextObject*)pPageObj, curFormMatrix, pos); 908 ProcessTextObject((CPDF_TextObject*)pPageObj, curFormMatrix, pos);
892 } else if (pPageObj->m_Type == CPDF_PageObject::FORM) { 909 } else if (pPageObj->m_Type == CPDF_PageObject::FORM) {
893 ProcessFormObject((CPDF_FormObject*)pPageObj, curFormMatrix); 910 ProcessFormObject((CPDF_FormObject*)pPageObj, curFormMatrix);
894 } 911 }
895 } 912 }
896 pPageObj = NULL; 913 pPageObj = NULL;
897 } 914 }
898 } 915 }
916
899 int CPDF_TextPage::GetCharWidth(FX_DWORD charCode, CPDF_Font* pFont) const { 917 int CPDF_TextPage::GetCharWidth(FX_DWORD charCode, CPDF_Font* pFont) const {
900 if (charCode == -1) { 918 if (charCode == -1) {
901 return 0; 919 return 0;
902 } 920 }
903 int w = pFont->GetCharWidthF(charCode); 921 int w = pFont->GetCharWidthF(charCode);
904 if (w == 0) { 922 if (w == 0) {
905 CFX_ByteString str; 923 CFX_ByteString str;
906 pFont->AppendChar(str, charCode); 924 pFont->AppendChar(str, charCode);
907 w = pFont->GetStringWidth(str, 1); 925 w = pFont->GetStringWidth(str, 1);
908 if (w == 0) { 926 if (w == 0) {
909 FX_RECT BBox; 927 FX_RECT BBox;
910 pFont->GetCharBBox(charCode, BBox); 928 pFont->GetCharBBox(charCode, BBox);
911 w = BBox.right - BBox.left; 929 w = BBox.right - BBox.left;
912 } 930 }
913 } 931 }
914 return w; 932 return w;
915 } 933 }
934
916 void CPDF_TextPage::OnPiece(CFX_BidiChar* pBidi, CFX_WideString& str) { 935 void CPDF_TextPage::OnPiece(CFX_BidiChar* pBidi, CFX_WideString& str) {
917 CFX_BidiChar::Segment seg = pBidi->GetSegmentInfo(); 936 CFX_BidiChar::Segment seg = pBidi->GetSegmentInfo();
918 if (seg.direction == CFX_BidiChar::RIGHT) { 937 if (seg.direction == CFX_BidiChar::RIGHT) {
919 for (int i = seg.start + seg.count; i > seg.start; i--) { 938 for (int i = seg.start + seg.count; i > seg.start; i--) {
920 m_TextBuf.AppendChar(str.GetAt(i - i)); 939 m_TextBuf.AppendChar(str.GetAt(i - i));
921 m_CharList.push_back(m_TempCharList[i - 1]); 940 m_CharList.push_back(m_TempCharList[i - 1]);
922 } 941 }
923 } else { 942 } else {
924 for (int i = seg.start; i < seg.start + seg.count; i++) { 943 for (int i = seg.start; i < seg.start + seg.count; i++) {
925 m_TextBuf.AppendChar(str.GetAt(i)); 944 m_TextBuf.AppendChar(str.GetAt(i));
926 m_CharList.push_back(m_TempCharList[i]); 945 m_CharList.push_back(m_TempCharList[i]);
927 } 946 }
928 } 947 }
929 } 948 }
949
930 void CPDF_TextPage::AddCharInfoByLRDirection(FX_WCHAR wChar, 950 void CPDF_TextPage::AddCharInfoByLRDirection(FX_WCHAR wChar,
931 PAGECHAR_INFO info) { 951 PAGECHAR_INFO info) {
932 if (!IsControlChar(info)) { 952 if (!IsControlChar(info)) {
933 info.m_Index = m_TextBuf.GetLength(); 953 info.m_Index = m_TextBuf.GetLength();
934 if (wChar >= 0xFB00 && wChar <= 0xFB06) { 954 if (wChar >= 0xFB00 && wChar <= 0xFB06) {
935 FX_WCHAR* pDst = NULL; 955 FX_WCHAR* pDst = NULL;
936 FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst); 956 FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst);
937 if (nCount >= 1) { 957 if (nCount >= 1) {
938 pDst = FX_Alloc(FX_WCHAR, nCount); 958 pDst = FX_Alloc(FX_WCHAR, nCount);
939 FX_Unicode_GetNormalization(wChar, pDst); 959 FX_Unicode_GetNormalization(wChar, pDst);
940 for (int nIndex = 0; nIndex < nCount; nIndex++) { 960 for (int nIndex = 0; nIndex < nCount; nIndex++) {
941 PAGECHAR_INFO info2 = info; 961 PAGECHAR_INFO info2 = info;
942 info2.m_Unicode = pDst[nIndex]; 962 info2.m_Unicode = pDst[nIndex];
943 info2.m_Flag = FPDFTEXT_CHAR_PIECE; 963 info2.m_Flag = FPDFTEXT_CHAR_PIECE;
944 m_TextBuf.AppendChar(info2.m_Unicode); 964 m_TextBuf.AppendChar(info2.m_Unicode);
945 m_CharList.push_back(info2); 965 m_CharList.push_back(info2);
946 } 966 }
947 FX_Free(pDst); 967 FX_Free(pDst);
948 return; 968 return;
949 } 969 }
950 } 970 }
951 m_TextBuf.AppendChar(wChar); 971 m_TextBuf.AppendChar(wChar);
952 } else { 972 } else {
953 info.m_Index = -1; 973 info.m_Index = -1;
954 } 974 }
955 m_CharList.push_back(info); 975 m_CharList.push_back(info);
956 } 976 }
977
957 void CPDF_TextPage::AddCharInfoByRLDirection(FX_WCHAR wChar, 978 void CPDF_TextPage::AddCharInfoByRLDirection(FX_WCHAR wChar,
958 PAGECHAR_INFO info) { 979 PAGECHAR_INFO info) {
959 if (!IsControlChar(info)) { 980 if (!IsControlChar(info)) {
960 info.m_Index = m_TextBuf.GetLength(); 981 info.m_Index = m_TextBuf.GetLength();
961 wChar = FX_GetMirrorChar(wChar, TRUE, FALSE); 982 wChar = FX_GetMirrorChar(wChar, TRUE, FALSE);
962 FX_WCHAR* pDst = NULL; 983 FX_WCHAR* pDst = NULL;
963 FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst); 984 FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst);
964 if (nCount >= 1) { 985 if (nCount >= 1) {
965 pDst = FX_Alloc(FX_WCHAR, nCount); 986 pDst = FX_Alloc(FX_WCHAR, nCount);
966 FX_Unicode_GetNormalization(wChar, pDst); 987 FX_Unicode_GetNormalization(wChar, pDst);
(...skipping 132 matching lines...) Expand 10 before | Expand all | Expand 10 after
1099 } 1120 }
1100 if (i < 0) { 1121 if (i < 0) {
1101 m_LineObj.InsertAt(0, Obj); 1122 m_LineObj.InsertAt(0, Obj);
1102 } 1123 }
1103 } 1124 }
1104 1125
1105 int32_t CPDF_TextPage::PreMarkedContent(PDFTEXT_Obj Obj) { 1126 int32_t CPDF_TextPage::PreMarkedContent(PDFTEXT_Obj Obj) {
1106 CPDF_TextObject* pTextObj = Obj.m_pTextObj; 1127 CPDF_TextObject* pTextObj = Obj.m_pTextObj;
1107 CPDF_ContentMarkData* pMarkData = 1128 CPDF_ContentMarkData* pMarkData =
1108 (CPDF_ContentMarkData*)pTextObj->m_ContentMark.GetObject(); 1129 (CPDF_ContentMarkData*)pTextObj->m_ContentMark.GetObject();
1109 if (!pMarkData) { 1130 if (!pMarkData)
1110 return FPDFTEXT_MC_PASS; 1131 return FPDFTEXT_MC_PASS;
1111 } 1132
1112 int nContentMark = pMarkData->CountItems(); 1133 int nContentMark = pMarkData->CountItems();
1113 if (nContentMark < 1) { 1134 if (nContentMark < 1)
1114 return FPDFTEXT_MC_PASS; 1135 return FPDFTEXT_MC_PASS;
1115 }
1116 CFX_WideString actText; 1136 CFX_WideString actText;
1117 FX_BOOL bExist = FALSE; 1137 FX_BOOL bExist = FALSE;
1118 CPDF_Dictionary* pDict = NULL; 1138 CPDF_Dictionary* pDict = NULL;
1119 int n = 0; 1139 int n = 0;
1120 for (n = 0; n < nContentMark; n++) { 1140 for (n = 0; n < nContentMark; n++) {
1121 CPDF_ContentMarkItem& item = pMarkData->GetItem(n); 1141 CPDF_ContentMarkItem& item = pMarkData->GetItem(n);
1122 CFX_ByteString tagStr = (CFX_ByteString)item.GetName(); 1142 if (item.GetParamType() == CPDF_ContentMarkItem::ParamType::None)
1123 pDict = ToDictionary(static_cast<CPDF_Object*>(item.GetParam())); 1143 continue;
1144 pDict = item.GetParam();
1124 CPDF_String* temp = 1145 CPDF_String* temp =
1125 ToString(pDict ? pDict->GetElement("ActualText") : nullptr); 1146 ToString(pDict ? pDict->GetElement("ActualText") : nullptr);
1126 if (temp) { 1147 if (temp) {
1127 bExist = TRUE; 1148 bExist = TRUE;
1128 actText = temp->GetUnicodeText(); 1149 actText = temp->GetUnicodeText();
1129 } 1150 }
1130 } 1151 }
1131 if (!bExist) { 1152 if (!bExist)
1132 return FPDFTEXT_MC_PASS; 1153 return FPDFTEXT_MC_PASS;
1133 } 1154
1134 if (m_pPreTextObj) { 1155 if (m_pPreTextObj) {
1135 if (CPDF_ContentMarkData* pPreMarkData = 1156 CPDF_ContentMarkData* pPreMarkData =
1136 (CPDF_ContentMarkData*)m_pPreTextObj->m_ContentMark.GetObject()) { 1157 (CPDF_ContentMarkData*)m_pPreTextObj->m_ContentMark.GetObject();
1137 if (pPreMarkData->CountItems() == n) { 1158 if (pPreMarkData && pPreMarkData->CountItems() == n &&
1138 CPDF_ContentMarkItem& item = pPreMarkData->GetItem(n - 1); 1159 pDict == pPreMarkData->GetItem(n - 1).GetParam()) {
1139 if (pDict == item.GetParam()) { 1160 return FPDFTEXT_MC_DONE;
1140 return FPDFTEXT_MC_DONE;
1141 }
1142 }
1143 } 1161 }
1144 } 1162 }
1163 FX_STRSIZE nItems = actText.GetLength();
1164 if (nItems < 1)
1165 return FPDFTEXT_MC_PASS;
1166
1145 CPDF_Font* pFont = pTextObj->GetFont(); 1167 CPDF_Font* pFont = pTextObj->GetFont();
1146 FX_STRSIZE nItems = actText.GetLength();
1147 if (nItems < 1) {
1148 return FPDFTEXT_MC_PASS;
1149 }
1150 bExist = FALSE; 1168 bExist = FALSE;
1151 for (FX_STRSIZE i = 0; i < nItems; i++) { 1169 for (FX_STRSIZE i = 0; i < nItems; i++) {
1152 FX_WCHAR wChar = actText.GetAt(i); 1170 if (pFont->CharCodeFromUnicode(actText.GetAt(i)) != -1) {
1153 if (-1 == pFont->CharCodeFromUnicode(wChar)) {
1154 continue;
1155 } else {
1156 bExist = TRUE; 1171 bExist = TRUE;
1157 break; 1172 break;
1158 } 1173 }
1159 } 1174 }
1160 if (!bExist) { 1175 if (!bExist)
1161 return FPDFTEXT_MC_PASS; 1176 return FPDFTEXT_MC_PASS;
1162 } 1177
1163 bExist = FALSE; 1178 bExist = FALSE;
1164 for (FX_STRSIZE i = 0; i < nItems; i++) { 1179 for (FX_STRSIZE i = 0; i < nItems; i++) {
1165 FX_WCHAR wChar = actText.GetAt(i); 1180 FX_WCHAR wChar = actText.GetAt(i);
1166 if ((wChar > 0x80 && wChar < 0xFFFD) || (wChar <= 0x80 && isprint(wChar))) { 1181 if ((wChar > 0x80 && wChar < 0xFFFD) || (wChar <= 0x80 && isprint(wChar))) {
1167 bExist = TRUE; 1182 bExist = TRUE;
1168 break; 1183 break;
1169 } 1184 }
1170 } 1185 }
1171 if (!bExist) { 1186 if (!bExist)
1172 return FPDFTEXT_MC_DONE; 1187 return FPDFTEXT_MC_DONE;
1173 } 1188
1174 return FPDFTEXT_MC_DELAY; 1189 return FPDFTEXT_MC_DELAY;
1175 } 1190 }
1191
1176 void CPDF_TextPage::ProcessMarkedContent(PDFTEXT_Obj Obj) { 1192 void CPDF_TextPage::ProcessMarkedContent(PDFTEXT_Obj Obj) {
1177 CPDF_TextObject* pTextObj = Obj.m_pTextObj; 1193 CPDF_TextObject* pTextObj = Obj.m_pTextObj;
1178 CPDF_ContentMarkData* pMarkData = 1194 CPDF_ContentMarkData* pMarkData =
1179 (CPDF_ContentMarkData*)pTextObj->m_ContentMark.GetObject(); 1195 (CPDF_ContentMarkData*)pTextObj->m_ContentMark.GetObject();
1180 if (!pMarkData) { 1196 if (!pMarkData)
1181 return; 1197 return;
1182 } 1198
1183 int nContentMark = pMarkData->CountItems(); 1199 int nContentMark = pMarkData->CountItems();
1184 if (nContentMark < 1) { 1200 if (nContentMark < 1)
1185 return; 1201 return;
1186 }
1187 CFX_WideString actText; 1202 CFX_WideString actText;
1188 CPDF_Dictionary* pDict = NULL; 1203 CPDF_Dictionary* pDict = NULL;
1189 int n = 0; 1204 for (int n = 0; n < nContentMark; n++) {
1190 for (n = 0; n < nContentMark; n++) {
1191 CPDF_ContentMarkItem& item = pMarkData->GetItem(n); 1205 CPDF_ContentMarkItem& item = pMarkData->GetItem(n);
1192 CFX_ByteString tagStr = (CFX_ByteString)item.GetName(); 1206 if (item.GetParamType() == CPDF_ContentMarkItem::ParamType::None)
1193 pDict = ToDictionary(static_cast<CPDF_Object*>(item.GetParam())); 1207 continue;
1194 CPDF_String* temp = 1208 pDict = item.GetParam();
1195 ToString(pDict ? pDict->GetElement("ActualText") : nullptr); 1209 if (pDict)
1196 if (temp) { 1210 actText = pDict->GetUnicodeTextBy("ActualText");
1197 actText = temp->GetUnicodeText();
1198 }
1199 } 1211 }
1200 FX_STRSIZE nItems = actText.GetLength(); 1212 FX_STRSIZE nItems = actText.GetLength();
1201 if (nItems < 1) { 1213 if (nItems < 1)
1202 return; 1214 return;
1203 } 1215
1204 CPDF_Font* pFont = pTextObj->GetFont(); 1216 CPDF_Font* pFont = pTextObj->GetFont();
1205 CFX_Matrix formMatrix = Obj.m_formMatrix; 1217 CFX_Matrix formMatrix = Obj.m_formMatrix;
1206 CFX_Matrix matrix; 1218 CFX_Matrix matrix;
1207 pTextObj->GetTextMatrix(&matrix); 1219 pTextObj->GetTextMatrix(&matrix);
1208 matrix.Concat(formMatrix); 1220 matrix.Concat(formMatrix);
1209 FX_FLOAT fPosX = pTextObj->GetPosX(); 1221 FX_FLOAT fPosX = pTextObj->GetPosX();
1210 FX_FLOAT fPosY = pTextObj->GetPosY(); 1222 FX_FLOAT fPosY = pTextObj->GetPosY();
1211 int nCharInfoIndex = m_TextBuf.GetLength(); 1223 int nCharInfoIndex = m_TextBuf.GetLength();
1212 CFX_FloatRect charBox; 1224 CFX_FloatRect charBox;
1213 charBox.top = pTextObj->m_Top; 1225 charBox.top = pTextObj->m_Top;
(...skipping 18 matching lines...) Expand all
1232 charinfo.m_pTextObj = pTextObj; 1244 charinfo.m_pTextObj = pTextObj;
1233 charinfo.m_CharBox.top = charBox.top; 1245 charinfo.m_CharBox.top = charBox.top;
1234 charinfo.m_CharBox.left = charBox.left; 1246 charinfo.m_CharBox.left = charBox.left;
1235 charinfo.m_CharBox.right = charBox.right; 1247 charinfo.m_CharBox.right = charBox.right;
1236 charinfo.m_CharBox.bottom = charBox.bottom; 1248 charinfo.m_CharBox.bottom = charBox.bottom;
1237 charinfo.m_Matrix.Copy(matrix); 1249 charinfo.m_Matrix.Copy(matrix);
1238 m_TempTextBuf.AppendChar(wChar); 1250 m_TempTextBuf.AppendChar(wChar);
1239 m_TempCharList.push_back(charinfo); 1251 m_TempCharList.push_back(charinfo);
1240 } 1252 }
1241 } 1253 }
1254
1242 void CPDF_TextPage::FindPreviousTextObject() { 1255 void CPDF_TextPage::FindPreviousTextObject() {
1243 if (m_TempCharList.empty() && m_CharList.empty()) 1256 if (m_TempCharList.empty() && m_CharList.empty())
1244 return; 1257 return;
1245 1258
1246 PAGECHAR_INFO preChar = 1259 PAGECHAR_INFO preChar =
1247 m_TempCharList.empty() ? m_CharList.back() : m_TempCharList.back(); 1260 m_TempCharList.empty() ? m_CharList.back() : m_TempCharList.back();
1248 1261
1249 if (preChar.m_pTextObj) 1262 if (preChar.m_pTextObj)
1250 m_pPreTextObj = preChar.m_pTextObj; 1263 m_pPreTextObj = preChar.m_pTextObj;
1251 } 1264 }
(...skipping 290 matching lines...) Expand 10 before | Expand all | Expand 10 after
1542 m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1); 1555 m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1);
1543 m_TempCharList.pop_back(); 1556 m_TempCharList.pop_back();
1544 } 1557 }
1545 } 1558 }
1546 } 1559 }
1547 } 1560 }
1548 if (bIsBidiAndMirrorInverse) { 1561 if (bIsBidiAndMirrorInverse) {
1549 SwapTempTextBuf(iCharListStartAppend, iBufStartAppend); 1562 SwapTempTextBuf(iCharListStartAppend, iBufStartAppend);
1550 } 1563 }
1551 } 1564 }
1565
1552 int32_t CPDF_TextPage::GetTextObjectWritingMode( 1566 int32_t CPDF_TextPage::GetTextObjectWritingMode(
1553 const CPDF_TextObject* pTextObj) { 1567 const CPDF_TextObject* pTextObj) {
1554 int32_t nChars = pTextObj->CountChars(); 1568 int32_t nChars = pTextObj->CountChars();
1555 if (nChars == 1) { 1569 if (nChars == 1) {
1556 return m_TextlineDir; 1570 return m_TextlineDir;
1557 } 1571 }
1558 CPDF_TextObjectItem first, last; 1572 CPDF_TextObjectItem first, last;
1559 pTextObj->GetCharInfo(0, &first); 1573 pTextObj->GetCharInfo(0, &first);
1560 pTextObj->GetCharInfo(nChars - 1, &last); 1574 pTextObj->GetCharInfo(nChars - 1, &last);
1561 CFX_Matrix textMatrix; 1575 CFX_Matrix textMatrix;
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after
1607 } else { 1621 } else {
1608 return FALSE; 1622 return FALSE;
1609 } 1623 }
1610 if (FPDFTEXT_CHAR_PIECE == preInfo->m_Flag && 1624 if (FPDFTEXT_CHAR_PIECE == preInfo->m_Flag &&
1611 (0xAD == preInfo->m_Unicode || 0x2D == preInfo->m_Unicode)) { 1625 (0xAD == preInfo->m_Unicode || 0x2D == preInfo->m_Unicode)) {
1612 return TRUE; 1626 return TRUE;
1613 } 1627 }
1614 } 1628 }
1615 return FALSE; 1629 return FALSE;
1616 } 1630 }
1631
1617 int CPDF_TextPage::ProcessInsertObject(const CPDF_TextObject* pObj, 1632 int CPDF_TextPage::ProcessInsertObject(const CPDF_TextObject* pObj,
1618 const CFX_Matrix& formMatrix) { 1633 const CFX_Matrix& formMatrix) {
1619 FindPreviousTextObject(); 1634 FindPreviousTextObject();
1620 FX_BOOL bNewline = FALSE; 1635 FX_BOOL bNewline = FALSE;
1621 int WritingMode = GetTextObjectWritingMode(pObj); 1636 int WritingMode = GetTextObjectWritingMode(pObj);
1622 if (WritingMode == -1) { 1637 if (WritingMode == -1) {
1623 WritingMode = GetTextObjectWritingMode(m_pPreTextObj); 1638 WritingMode = GetTextObjectWritingMode(m_pPreTextObj);
1624 } 1639 }
1625 CFX_FloatRect this_rect(pObj->m_Left, pObj->m_Bottom, pObj->m_Right, 1640 CFX_FloatRect this_rect(pObj->m_Left, pObj->m_Bottom, pObj->m_Right,
1626 pObj->m_Top); 1641 pObj->m_Top);
(...skipping 136 matching lines...) Expand 10 before | Expand all | Expand 10 after
1763 return 1; 1778 return 1;
1764 } 1779 }
1765 if ((x - last_pos - last_width) > this_width || 1780 if ((x - last_pos - last_width) > this_width ||
1766 (x - last_pos - this_width) > last_width) { 1781 (x - last_pos - this_width) > last_width) {
1767 return 1; 1782 return 1;
1768 } 1783 }
1769 } 1784 }
1770 } 1785 }
1771 return 0; 1786 return 0;
1772 } 1787 }
1788
1773 FX_BOOL CPDF_TextPage::IsSameTextObject(CPDF_TextObject* pTextObj1, 1789 FX_BOOL CPDF_TextPage::IsSameTextObject(CPDF_TextObject* pTextObj1,
1774 CPDF_TextObject* pTextObj2) { 1790 CPDF_TextObject* pTextObj2) {
1775 if (!pTextObj1 || !pTextObj2) { 1791 if (!pTextObj1 || !pTextObj2) {
1776 return FALSE; 1792 return FALSE;
1777 } 1793 }
1778 CFX_FloatRect rcPreObj(pTextObj2->m_Left, pTextObj2->m_Bottom, 1794 CFX_FloatRect rcPreObj(pTextObj2->m_Left, pTextObj2->m_Bottom,
1779 pTextObj2->m_Right, pTextObj2->m_Top); 1795 pTextObj2->m_Right, pTextObj2->m_Top);
1780 CFX_FloatRect rcCurObj(pTextObj1->m_Left, pTextObj1->m_Bottom, 1796 CFX_FloatRect rcCurObj(pTextObj1->m_Left, pTextObj1->m_Bottom,
1781 pTextObj1->m_Right, pTextObj1->m_Top); 1797 pTextObj1->m_Right, pTextObj1->m_Top);
1782 if (rcPreObj.IsEmpty() && rcCurObj.IsEmpty()) { 1798 if (rcPreObj.IsEmpty() && rcCurObj.IsEmpty()) {
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
1820 GetCharWidth(itemPer.m_CharCode, pTextObj2->GetFont()) * 1836 GetCharWidth(itemPer.m_CharCode, pTextObj2->GetFont()) *
1821 pTextObj2->GetFontSize() / 1000 * 0.9 || 1837 pTextObj2->GetFontSize() / 1000 * 0.9 ||
1822 FXSYS_fabs(pTextObj1->GetPosY() - pTextObj2->GetPosY()) > 1838 FXSYS_fabs(pTextObj1->GetPosY() - pTextObj2->GetPosY()) >
1823 std::max(std::max(rcPreObj.Height(), rcPreObj.Width()), 1839 std::max(std::max(rcPreObj.Height(), rcPreObj.Width()),
1824 pTextObj2->GetFontSize()) / 1840 pTextObj2->GetFontSize()) /
1825 8) { 1841 8) {
1826 return FALSE; 1842 return FALSE;
1827 } 1843 }
1828 return TRUE; 1844 return TRUE;
1829 } 1845 }
1846
1830 FX_BOOL CPDF_TextPage::IsSameAsPreTextObject(CPDF_TextObject* pTextObj, 1847 FX_BOOL CPDF_TextPage::IsSameAsPreTextObject(CPDF_TextObject* pTextObj,
1831 FX_POSITION ObjPos) { 1848 FX_POSITION ObjPos) {
1832 if (!pTextObj) { 1849 if (!pTextObj) {
1833 return FALSE; 1850 return FALSE;
1834 } 1851 }
1835 int i = 0; 1852 int i = 0;
1836 if (!ObjPos) { 1853 if (!ObjPos) {
1837 ObjPos = m_pPage->GetLastObjectPosition(); 1854 ObjPos = m_pPage->GetLastObjectPosition();
1838 } 1855 }
1839 CPDF_PageObject* pObj = m_pPage->GetPrevObject(ObjPos); 1856 CPDF_PageObject* pObj = m_pPage->GetPrevObject(ObjPos);
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after
1883 info.m_OriginY); 1900 info.m_OriginY);
1884 return TRUE; 1901 return TRUE;
1885 } 1902 }
1886 1903
1887 FX_BOOL CPDF_TextPage::IsRectIntersect(const CFX_FloatRect& rect1, 1904 FX_BOOL CPDF_TextPage::IsRectIntersect(const CFX_FloatRect& rect1,
1888 const CFX_FloatRect& rect2) { 1905 const CFX_FloatRect& rect2) {
1889 CFX_FloatRect rect = rect1; 1906 CFX_FloatRect rect = rect1;
1890 rect.Intersect(rect2); 1907 rect.Intersect(rect2);
1891 return !rect.IsEmpty(); 1908 return !rect.IsEmpty();
1892 } 1909 }
1910
1893 FX_BOOL CPDF_TextPage::IsLetter(FX_WCHAR unicode) { 1911 FX_BOOL CPDF_TextPage::IsLetter(FX_WCHAR unicode) {
1894 if (unicode < L'A') { 1912 if (unicode < L'A') {
1895 return FALSE; 1913 return FALSE;
1896 } 1914 }
1897 if (unicode > L'Z' && unicode < L'a') { 1915 if (unicode > L'Z' && unicode < L'a') {
1898 return FALSE; 1916 return FALSE;
1899 } 1917 }
1900 if (unicode > L'z') { 1918 if (unicode > L'z') {
1901 return FALSE; 1919 return FALSE;
1902 } 1920 }
1903 return TRUE; 1921 return TRUE;
1904 } 1922 }
1923
1905 CPDF_TextPageFind::CPDF_TextPageFind(const IPDF_TextPage* pTextPage) 1924 CPDF_TextPageFind::CPDF_TextPageFind(const IPDF_TextPage* pTextPage)
1906 : m_pTextPage(pTextPage), 1925 : m_pTextPage(pTextPage),
1907 m_flags(0), 1926 m_flags(0),
1908 m_findNextStart(-1), 1927 m_findNextStart(-1),
1909 m_findPreStart(-1), 1928 m_findPreStart(-1),
1910 m_bMatchCase(FALSE), 1929 m_bMatchCase(FALSE),
1911 m_bMatchWholeWord(FALSE), 1930 m_bMatchWholeWord(FALSE),
1912 m_resStart(0), 1931 m_resStart(0),
1913 m_resEnd(-1), 1932 m_resEnd(-1),
1914 m_IsFind(FALSE) { 1933 m_IsFind(FALSE) {
(...skipping 24 matching lines...) Expand all
1939 } else { 1958 } else {
1940 m_CharIndex.push_back(i + 1); 1959 m_CharIndex.push_back(i + 1);
1941 } 1960 }
1942 } 1961 }
1943 } 1962 }
1944 int indexSize = pdfium::CollectionSize<int>(m_CharIndex); 1963 int indexSize = pdfium::CollectionSize<int>(m_CharIndex);
1945 if (indexSize % 2) { 1964 if (indexSize % 2) {
1946 m_CharIndex.erase(m_CharIndex.begin() + indexSize - 1); 1965 m_CharIndex.erase(m_CharIndex.begin() + indexSize - 1);
1947 } 1966 }
1948 } 1967 }
1968
1949 int CPDF_TextPageFind::GetCharIndex(int index) const { 1969 int CPDF_TextPageFind::GetCharIndex(int index) const {
1950 return m_pTextPage->CharIndexFromTextIndex(index); 1970 return m_pTextPage->CharIndexFromTextIndex(index);
1951 } 1971 }
1972
1952 FX_BOOL CPDF_TextPageFind::FindFirst(const CFX_WideString& findwhat, 1973 FX_BOOL CPDF_TextPageFind::FindFirst(const CFX_WideString& findwhat,
1953 int flags, 1974 int flags,
1954 int startPos) { 1975 int startPos) {
1955 if (!m_pTextPage) { 1976 if (!m_pTextPage) {
1956 return FALSE; 1977 return FALSE;
1957 } 1978 }
1958 if (m_strText.IsEmpty() || m_bMatchCase != (flags & FPDFTEXT_MATCHCASE)) { 1979 if (m_strText.IsEmpty() || m_bMatchCase != (flags & FPDFTEXT_MATCHCASE)) {
1959 m_strText = m_pTextPage->GetPageText(); 1980 m_strText = m_pTextPage->GetPageText();
1960 } 1981 }
1961 CFX_WideString findwhatStr = findwhat; 1982 CFX_WideString findwhatStr = findwhat;
(...skipping 30 matching lines...) Expand all
1992 m_csFindWhatArray.Add(findwhatStr); 2013 m_csFindWhatArray.Add(findwhatStr);
1993 } 2014 }
1994 if (m_csFindWhatArray.GetSize() <= 0) { 2015 if (m_csFindWhatArray.GetSize() <= 0) {
1995 return FALSE; 2016 return FALSE;
1996 } 2017 }
1997 m_IsFind = TRUE; 2018 m_IsFind = TRUE;
1998 m_resStart = 0; 2019 m_resStart = 0;
1999 m_resEnd = -1; 2020 m_resEnd = -1;
2000 return TRUE; 2021 return TRUE;
2001 } 2022 }
2023
2002 FX_BOOL CPDF_TextPageFind::FindNext() { 2024 FX_BOOL CPDF_TextPageFind::FindNext() {
2003 if (!m_pTextPage) { 2025 if (!m_pTextPage) {
2004 return FALSE; 2026 return FALSE;
2005 } 2027 }
2006 m_resArray.RemoveAll(); 2028 m_resArray.RemoveAll();
2007 if (m_findNextStart == -1) { 2029 if (m_findNextStart == -1) {
2008 return FALSE; 2030 return FALSE;
2009 } 2031 }
2010 if (m_strText.IsEmpty()) { 2032 if (m_strText.IsEmpty()) {
2011 m_IsFind = FALSE; 2033 m_IsFind = FALSE;
(...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after
2099 m_pTextPage->GetRectArray(resStart, resEnd - resStart + 1, m_resArray); 2121 m_pTextPage->GetRectArray(resStart, resEnd - resStart + 1, m_resArray);
2100 if (m_flags & FPDFTEXT_CONSECUTIVE) { 2122 if (m_flags & FPDFTEXT_CONSECUTIVE) {
2101 m_findNextStart = m_resStart + 1; 2123 m_findNextStart = m_resStart + 1;
2102 m_findPreStart = m_resEnd - 1; 2124 m_findPreStart = m_resEnd - 1;
2103 } else { 2125 } else {
2104 m_findNextStart = m_resEnd + 1; 2126 m_findNextStart = m_resEnd + 1;
2105 m_findPreStart = m_resStart - 1; 2127 m_findPreStart = m_resStart - 1;
2106 } 2128 }
2107 return m_IsFind; 2129 return m_IsFind;
2108 } 2130 }
2131
2109 FX_BOOL CPDF_TextPageFind::FindPrev() { 2132 FX_BOOL CPDF_TextPageFind::FindPrev() {
2110 if (!m_pTextPage) { 2133 if (!m_pTextPage) {
2111 return FALSE; 2134 return FALSE;
2112 } 2135 }
2113 m_resArray.RemoveAll(); 2136 m_resArray.RemoveAll();
2114 if (m_strText.IsEmpty() || m_findPreStart < 0) { 2137 if (m_strText.IsEmpty() || m_findPreStart < 0) {
2115 m_IsFind = FALSE; 2138 m_IsFind = FALSE;
2116 return m_IsFind; 2139 return m_IsFind;
2117 } 2140 }
2118 CPDF_TextPageFind findEngine(m_pTextPage); 2141 CPDF_TextPageFind findEngine(m_pTextPage);
(...skipping 25 matching lines...) Expand all
2144 m_pTextPage->GetRectArray(order, MatchedCount, m_resArray); 2167 m_pTextPage->GetRectArray(order, MatchedCount, m_resArray);
2145 if (m_flags & FPDFTEXT_CONSECUTIVE) { 2168 if (m_flags & FPDFTEXT_CONSECUTIVE) {
2146 m_findNextStart = m_resStart + 1; 2169 m_findNextStart = m_resStart + 1;
2147 m_findPreStart = m_resEnd - 1; 2170 m_findPreStart = m_resEnd - 1;
2148 } else { 2171 } else {
2149 m_findNextStart = m_resEnd + 1; 2172 m_findNextStart = m_resEnd + 1;
2150 m_findPreStart = m_resStart - 1; 2173 m_findPreStart = m_resStart - 1;
2151 } 2174 }
2152 return m_IsFind; 2175 return m_IsFind;
2153 } 2176 }
2177
2154 void CPDF_TextPageFind::ExtractFindWhat(const CFX_WideString& findwhat) { 2178 void CPDF_TextPageFind::ExtractFindWhat(const CFX_WideString& findwhat) {
2155 if (findwhat.IsEmpty()) { 2179 if (findwhat.IsEmpty()) {
2156 return; 2180 return;
2157 } 2181 }
2158 int index = 0; 2182 int index = 0;
2159 while (1) { 2183 while (1) {
2160 CFX_WideString csWord = TEXT_EMPTY; 2184 CFX_WideString csWord = TEXT_EMPTY;
2161 int ret = 2185 int ret =
2162 ExtractSubString(csWord, findwhat.c_str(), index, TEXT_BLANK_CHAR); 2186 ExtractSubString(csWord, findwhat.c_str(), index, TEXT_BLANK_CHAR);
2163 if (csWord.IsEmpty()) { 2187 if (csWord.IsEmpty()) {
(...skipping 28 matching lines...) Expand all
2192 continue; 2216 continue;
2193 } 2217 }
2194 pos++; 2218 pos++;
2195 } 2219 }
2196 if (!csWord.IsEmpty()) { 2220 if (!csWord.IsEmpty()) {
2197 m_csFindWhatArray.Add(csWord); 2221 m_csFindWhatArray.Add(csWord);
2198 } 2222 }
2199 index++; 2223 index++;
2200 } 2224 }
2201 } 2225 }
2226
2202 FX_BOOL CPDF_TextPageFind::IsMatchWholeWord(const CFX_WideString& csPageText, 2227 FX_BOOL CPDF_TextPageFind::IsMatchWholeWord(const CFX_WideString& csPageText,
2203 int startPos, 2228 int startPos,
2204 int endPos) { 2229 int endPos) {
2205 FX_WCHAR char_left = 0; 2230 FX_WCHAR char_left = 0;
2206 FX_WCHAR char_right = 0; 2231 FX_WCHAR char_right = 0;
2207 int char_count = endPos - startPos + 1; 2232 int char_count = endPos - startPos + 1;
2208 if (char_count < 1) { 2233 if (char_count < 1) {
2209 return FALSE; 2234 return FALSE;
2210 } 2235 }
2211 if (char_count == 1 && csPageText.GetAt(startPos) > 255) { 2236 if (char_count == 1 && csPageText.GetAt(startPos) > 255) {
(...skipping 26 matching lines...) Expand all
2238 char_left <= L'9') { 2263 char_left <= L'9') {
2239 return FALSE; 2264 return FALSE;
2240 } 2265 }
2241 if (csPageText.GetAt(endPos) >= L'0' && csPageText.GetAt(endPos) <= L'9' && 2266 if (csPageText.GetAt(endPos) >= L'0' && csPageText.GetAt(endPos) <= L'9' &&
2242 char_right >= L'0' && char_right <= L'9') { 2267 char_right >= L'0' && char_right <= L'9') {
2243 return FALSE; 2268 return FALSE;
2244 } 2269 }
2245 } 2270 }
2246 return TRUE; 2271 return TRUE;
2247 } 2272 }
2273
2248 FX_BOOL CPDF_TextPageFind::ExtractSubString(CFX_WideString& rString, 2274 FX_BOOL CPDF_TextPageFind::ExtractSubString(CFX_WideString& rString,
2249 const FX_WCHAR* lpszFullString, 2275 const FX_WCHAR* lpszFullString,
2250 int iSubString, 2276 int iSubString,
2251 FX_WCHAR chSep) { 2277 FX_WCHAR chSep) {
2252 if (!lpszFullString) { 2278 if (!lpszFullString) {
2253 return FALSE; 2279 return FALSE;
2254 } 2280 }
2255 while (iSubString--) { 2281 while (iSubString--) {
2256 lpszFullString = FXSYS_wcschr(lpszFullString, chSep); 2282 lpszFullString = FXSYS_wcschr(lpszFullString, chSep);
2257 if (!lpszFullString) { 2283 if (!lpszFullString) {
2258 rString.Empty(); 2284 rString.Empty();
2259 return FALSE; 2285 return FALSE;
2260 } 2286 }
2261 lpszFullString++; 2287 lpszFullString++;
2262 while (*lpszFullString == chSep) { 2288 while (*lpszFullString == chSep) {
2263 lpszFullString++; 2289 lpszFullString++;
2264 } 2290 }
2265 } 2291 }
2266 const FX_WCHAR* lpchEnd = FXSYS_wcschr(lpszFullString, chSep); 2292 const FX_WCHAR* lpchEnd = FXSYS_wcschr(lpszFullString, chSep);
2267 int nLen = lpchEnd ? (int)(lpchEnd - lpszFullString) 2293 int nLen = lpchEnd ? (int)(lpchEnd - lpszFullString)
2268 : (int)FXSYS_wcslen(lpszFullString); 2294 : (int)FXSYS_wcslen(lpszFullString);
2269 ASSERT(nLen >= 0); 2295 ASSERT(nLen >= 0);
2270 FXSYS_memcpy(rString.GetBuffer(nLen), lpszFullString, 2296 FXSYS_memcpy(rString.GetBuffer(nLen), lpszFullString,
2271 nLen * sizeof(FX_WCHAR)); 2297 nLen * sizeof(FX_WCHAR));
2272 rString.ReleaseBuffer(); 2298 rString.ReleaseBuffer();
2273 return TRUE; 2299 return TRUE;
2274 } 2300 }
2301
2275 CFX_WideString CPDF_TextPageFind::MakeReverse(const CFX_WideString& str) { 2302 CFX_WideString CPDF_TextPageFind::MakeReverse(const CFX_WideString& str) {
2276 CFX_WideString str2; 2303 CFX_WideString str2;
2277 str2.Empty(); 2304 str2.Empty();
2278 int nlen = str.GetLength(); 2305 int nlen = str.GetLength();
2279 for (int i = nlen - 1; i >= 0; i--) { 2306 for (int i = nlen - 1; i >= 0; i--) {
2280 str2 += str.GetAt(i); 2307 str2 += str.GetAt(i);
2281 } 2308 }
2282 return str2; 2309 return str2;
2283 } 2310 }
2311
2284 void CPDF_TextPageFind::GetRectArray(CFX_RectArray& rects) const { 2312 void CPDF_TextPageFind::GetRectArray(CFX_RectArray& rects) const {
2285 rects.Copy(m_resArray); 2313 rects.Copy(m_resArray);
2286 } 2314 }
2315
2287 int CPDF_TextPageFind::GetCurOrder() const { 2316 int CPDF_TextPageFind::GetCurOrder() const {
2288 return GetCharIndex(m_resStart); 2317 return GetCharIndex(m_resStart);
2289 } 2318 }
2319
2290 int CPDF_TextPageFind::GetMatchedCount() const { 2320 int CPDF_TextPageFind::GetMatchedCount() const {
2291 int resStart = GetCharIndex(m_resStart); 2321 int resStart = GetCharIndex(m_resStart);
2292 int resEnd = GetCharIndex(m_resEnd); 2322 int resEnd = GetCharIndex(m_resEnd);
2293 return resEnd - resStart + 1; 2323 return resEnd - resStart + 1;
2294 } 2324 }
2295 2325
2296 CPDF_LinkExtract::CPDF_LinkExtract() 2326 CPDF_LinkExtract::CPDF_LinkExtract()
2297 : m_pTextPage(nullptr), m_bIsParsed(false) { 2327 : m_pTextPage(nullptr), m_bIsParsed(false) {
2298 } 2328 }
2299 2329
(...skipping 18 matching lines...) Expand all
2318 2348
2319 void CPDF_LinkExtract::DeleteLinkList() { 2349 void CPDF_LinkExtract::DeleteLinkList() {
2320 while (m_LinkList.GetSize()) { 2350 while (m_LinkList.GetSize()) {
2321 CPDF_LinkExt* linkinfo = NULL; 2351 CPDF_LinkExt* linkinfo = NULL;
2322 linkinfo = m_LinkList.GetAt(0); 2352 linkinfo = m_LinkList.GetAt(0);
2323 m_LinkList.RemoveAt(0); 2353 m_LinkList.RemoveAt(0);
2324 delete linkinfo; 2354 delete linkinfo;
2325 } 2355 }
2326 m_LinkList.RemoveAll(); 2356 m_LinkList.RemoveAll();
2327 } 2357 }
2358
2328 int CPDF_LinkExtract::CountLinks() const { 2359 int CPDF_LinkExtract::CountLinks() const {
2329 if (!m_bIsParsed) { 2360 if (!m_bIsParsed) {
2330 return -1; 2361 return -1;
2331 } 2362 }
2332 return m_LinkList.GetSize(); 2363 return m_LinkList.GetSize();
2333 } 2364 }
2365
2334 void CPDF_LinkExtract::ParseLink() { 2366 void CPDF_LinkExtract::ParseLink() {
2335 int start = 0, pos = 0; 2367 int start = 0, pos = 0;
2336 int TotalChar = m_pTextPage->CountChars(); 2368 int TotalChar = m_pTextPage->CountChars();
2337 while (pos < TotalChar) { 2369 while (pos < TotalChar) {
2338 FPDF_CHAR_INFO pageChar; 2370 FPDF_CHAR_INFO pageChar;
2339 m_pTextPage->GetCharInfo(pos, &pageChar); 2371 m_pTextPage->GetCharInfo(pos, &pageChar);
2340 if (pageChar.m_Flag == CHAR_GENERATED || pageChar.m_Unicode == 0x20 || 2372 if (pageChar.m_Flag == CHAR_GENERATED || pageChar.m_Unicode == 0x20 ||
2341 pos == TotalChar - 1) { 2373 pos == TotalChar - 1) {
2342 int nCount = pos - start; 2374 int nCount = pos - start;
2343 if (pos == TotalChar - 1) { 2375 if (pos == TotalChar - 1) {
(...skipping 15 matching lines...) Expand all
2359 (CheckWebLink(strBeCheck) || CheckMailLink(strBeCheck))) { 2391 (CheckWebLink(strBeCheck) || CheckMailLink(strBeCheck))) {
2360 AppendToLinkList(start, nCount, strBeCheck); 2392 AppendToLinkList(start, nCount, strBeCheck);
2361 } 2393 }
2362 } 2394 }
2363 start = ++pos; 2395 start = ++pos;
2364 } else { 2396 } else {
2365 pos++; 2397 pos++;
2366 } 2398 }
2367 } 2399 }
2368 } 2400 }
2401
2369 FX_BOOL CPDF_LinkExtract::CheckWebLink(CFX_WideString& strBeCheck) { 2402 FX_BOOL CPDF_LinkExtract::CheckWebLink(CFX_WideString& strBeCheck) {
2370 CFX_WideString str = strBeCheck; 2403 CFX_WideString str = strBeCheck;
2371 str.MakeLower(); 2404 str.MakeLower();
2372 if (str.Find(L"http://www.") != -1) { 2405 if (str.Find(L"http://www.") != -1) {
2373 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"http://www.")); 2406 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"http://www."));
2374 return TRUE; 2407 return TRUE;
2375 } 2408 }
2376 if (str.Find(L"http://") != -1) { 2409 if (str.Find(L"http://") != -1) {
2377 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"http://")); 2410 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"http://"));
2378 return TRUE; 2411 return TRUE;
2379 } 2412 }
2380 if (str.Find(L"https://www.") != -1) { 2413 if (str.Find(L"https://www.") != -1) {
2381 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"https://www.")); 2414 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"https://www."));
2382 return TRUE; 2415 return TRUE;
2383 } 2416 }
2384 if (str.Find(L"https://") != -1) { 2417 if (str.Find(L"https://") != -1) {
2385 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"https://")); 2418 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"https://"));
2386 return TRUE; 2419 return TRUE;
2387 } 2420 }
2388 if (str.Find(L"www.") != -1) { 2421 if (str.Find(L"www.") != -1) {
2389 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"www.")); 2422 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"www."));
2390 strBeCheck = L"http://" + strBeCheck; 2423 strBeCheck = L"http://" + strBeCheck;
2391 return TRUE; 2424 return TRUE;
2392 } 2425 }
2393 return FALSE; 2426 return FALSE;
2394 } 2427 }
2428
2395 bool CPDF_LinkExtract::CheckMailLink(CFX_WideString& str) { 2429 bool CPDF_LinkExtract::CheckMailLink(CFX_WideString& str) {
2396 int aPos = str.Find(L'@'); 2430 int aPos = str.Find(L'@');
2397 // Invalid when no '@'. 2431 // Invalid when no '@'.
2398 if (aPos < 1) { 2432 if (aPos < 1) {
2399 return FALSE; 2433 return FALSE;
2400 } 2434 }
2401 2435
2402 // Check the local part. 2436 // Check the local part.
2403 int pPos = aPos; // Used to track the position of '@' or '.'. 2437 int pPos = aPos; // Used to track the position of '@' or '.'.
2404 for (int i = aPos - 1; i >= 0; i--) { 2438 for (int i = aPos - 1; i >= 0; i--) {
(...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after
2489 return; 2523 return;
2490 } 2524 }
2491 CPDF_LinkExt* link = NULL; 2525 CPDF_LinkExt* link = NULL;
2492 link = m_LinkList.GetAt(index); 2526 link = m_LinkList.GetAt(index);
2493 if (!link) { 2527 if (!link) {
2494 return; 2528 return;
2495 } 2529 }
2496 start = link->m_Start; 2530 start = link->m_Start;
2497 count = link->m_Count; 2531 count = link->m_Count;
2498 } 2532 }
2533
2499 void CPDF_LinkExtract::GetRects(int index, CFX_RectArray& rects) const { 2534 void CPDF_LinkExtract::GetRects(int index, CFX_RectArray& rects) const {
2500 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { 2535 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) {
2501 return; 2536 return;
2502 } 2537 }
2503 CPDF_LinkExt* link = NULL; 2538 CPDF_LinkExt* link = NULL;
2504 link = m_LinkList.GetAt(index); 2539 link = m_LinkList.GetAt(index);
2505 if (!link) { 2540 if (!link) {
2506 return; 2541 return;
2507 } 2542 }
2508 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); 2543 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects);
2509 } 2544 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698