Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
| 6 | 6 |
| 7 #include "core/src/fpdftext/text_int.h" | 7 #include "core/src/fpdftext/text_int.h" |
| 8 | 8 |
| 9 #include <algorithm> | 9 #include <algorithm> |
| 10 #include <cctype> | 10 #include <cctype> |
| (...skipping 116 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 127 case 0x94: | 127 case 0x94: |
| 128 case 0x96: | 128 case 0x96: |
| 129 case 0x97: | 129 case 0x97: |
| 130 case 0x98: | 130 case 0x98: |
| 131 case 0xfffe: | 131 case 0xfffe: |
| 132 return charInfo.m_Flag != FPDFTEXT_CHAR_HYPHEN; | 132 return charInfo.m_Flag != FPDFTEXT_CHAR_HYPHEN; |
| 133 default: | 133 default: |
| 134 return false; | 134 return false; |
| 135 } | 135 } |
| 136 } | 136 } |
| 137 | |
| 137 FX_BOOL CPDF_TextPage::ParseTextPage() { | 138 FX_BOOL CPDF_TextPage::ParseTextPage() { |
| 138 m_bIsParsed = false; | 139 m_bIsParsed = false; |
| 139 if (!m_pPage) | 140 if (!m_pPage) |
| 140 return FALSE; | 141 return FALSE; |
| 141 | 142 |
| 142 m_TextBuf.Clear(); | 143 m_TextBuf.Clear(); |
| 143 m_CharList.clear(); | 144 m_CharList.clear(); |
| 144 m_pPreTextObj = NULL; | 145 m_pPreTextObj = NULL; |
| 145 ProcessObject(); | 146 ProcessObject(); |
| 146 m_bIsParsed = true; | 147 m_bIsParsed = true; |
| (...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 179 m_CharIndex.push_back(i + 1); | 180 m_CharIndex.push_back(i + 1); |
| 180 } | 181 } |
| 181 } | 182 } |
| 182 } | 183 } |
| 183 int indexSize = pdfium::CollectionSize<int>(m_CharIndex); | 184 int indexSize = pdfium::CollectionSize<int>(m_CharIndex); |
| 184 if (indexSize % 2) { | 185 if (indexSize % 2) { |
| 185 m_CharIndex.erase(m_CharIndex.begin() + indexSize - 1); | 186 m_CharIndex.erase(m_CharIndex.begin() + indexSize - 1); |
| 186 } | 187 } |
| 187 return TRUE; | 188 return TRUE; |
| 188 } | 189 } |
| 190 | |
| 189 int CPDF_TextPage::CountChars() const { | 191 int CPDF_TextPage::CountChars() const { |
| 190 return pdfium::CollectionSize<int>(m_CharList); | 192 return pdfium::CollectionSize<int>(m_CharList); |
| 191 } | 193 } |
| 194 | |
| 192 int CPDF_TextPage::CharIndexFromTextIndex(int TextIndex) const { | 195 int CPDF_TextPage::CharIndexFromTextIndex(int TextIndex) const { |
| 193 int indexSize = pdfium::CollectionSize<int>(m_CharIndex); | 196 int indexSize = pdfium::CollectionSize<int>(m_CharIndex); |
| 194 int count = 0; | 197 int count = 0; |
| 195 for (int i = 0; i < indexSize; i += 2) { | 198 for (int i = 0; i < indexSize; i += 2) { |
| 196 count += m_CharIndex[i + 1]; | 199 count += m_CharIndex[i + 1]; |
| 197 if (count > TextIndex) | 200 if (count > TextIndex) |
| 198 return TextIndex - count + m_CharIndex[i + 1] + m_CharIndex[i]; | 201 return TextIndex - count + m_CharIndex[i + 1] + m_CharIndex[i]; |
| 199 } | 202 } |
| 200 return -1; | 203 return -1; |
| 201 } | 204 } |
| 205 | |
| 202 int CPDF_TextPage::TextIndexFromCharIndex(int CharIndex) const { | 206 int CPDF_TextPage::TextIndexFromCharIndex(int CharIndex) const { |
| 203 int indexSize = pdfium::CollectionSize<int>(m_CharIndex); | 207 int indexSize = pdfium::CollectionSize<int>(m_CharIndex); |
| 204 int count = 0; | 208 int count = 0; |
| 205 for (int i = 0; i < indexSize; i += 2) { | 209 for (int i = 0; i < indexSize; i += 2) { |
| 206 count += m_CharIndex[i + 1]; | 210 count += m_CharIndex[i + 1]; |
| 207 if (m_CharIndex[i + 1] + m_CharIndex[i] > CharIndex) { | 211 if (m_CharIndex[i + 1] + m_CharIndex[i] > CharIndex) { |
| 208 if (CharIndex - m_CharIndex[i] < 0) | 212 if (CharIndex - m_CharIndex[i] < 0) |
| 209 return -1; | 213 return -1; |
| 210 | 214 |
| 211 return CharIndex - m_CharIndex[i] + count - m_CharIndex[i + 1]; | 215 return CharIndex - m_CharIndex[i] + count - m_CharIndex[i + 1]; |
| 212 } | 216 } |
| 213 } | 217 } |
| 214 return -1; | 218 return -1; |
| 215 } | 219 } |
| 220 | |
| 216 void CPDF_TextPage::GetRectArray(int start, | 221 void CPDF_TextPage::GetRectArray(int start, |
| 217 int nCount, | 222 int nCount, |
| 218 CFX_RectArray& rectArray) const { | 223 CFX_RectArray& rectArray) const { |
| 219 if (start < 0 || nCount == 0) { | 224 if (start < 0 || nCount == 0) { |
| 220 return; | 225 return; |
| 221 } | 226 } |
| 222 if (!m_bIsParsed) { | 227 if (!m_bIsParsed) { |
| 223 return; | 228 return; |
| 224 } | 229 } |
| 225 CPDF_TextObject* pCurObj = NULL; | 230 CPDF_TextObject* pCurObj = NULL; |
| (...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 291 if (rect.top < info_curchar.m_CharBox.top) { | 296 if (rect.top < info_curchar.m_CharBox.top) { |
| 292 rect.top = info_curchar.m_CharBox.top; | 297 rect.top = info_curchar.m_CharBox.top; |
| 293 } | 298 } |
| 294 if (rect.bottom > info_curchar.m_CharBox.bottom) { | 299 if (rect.bottom > info_curchar.m_CharBox.bottom) { |
| 295 rect.bottom = info_curchar.m_CharBox.bottom; | 300 rect.bottom = info_curchar.m_CharBox.bottom; |
| 296 } | 301 } |
| 297 } | 302 } |
| 298 } | 303 } |
| 299 rectArray.Add(rect); | 304 rectArray.Add(rect); |
| 300 } | 305 } |
| 306 | |
| 301 int CPDF_TextPage::GetIndexAtPos(CPDF_Point point, | 307 int CPDF_TextPage::GetIndexAtPos(CPDF_Point point, |
| 302 FX_FLOAT xTolerance, | 308 FX_FLOAT xTolerance, |
| 303 FX_FLOAT yTolerance) const { | 309 FX_FLOAT yTolerance) const { |
| 304 if (!m_bIsParsed) | 310 if (!m_bIsParsed) |
| 305 return -3; | 311 return -3; |
| 306 | 312 |
| 307 int pos = 0; | 313 int pos = 0; |
| 308 int NearPos = -1; | 314 int NearPos = -1; |
| 309 double xdif = 5000; | 315 double xdif = 5000; |
| 310 double ydif = 5000; | 316 double ydif = 5000; |
| (...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 371 IsContainPreChar = false; | 377 IsContainPreChar = false; |
| 372 IsAddLineFeed = false; | 378 IsAddLineFeed = false; |
| 373 } | 379 } |
| 374 } else { | 380 } else { |
| 375 IsContainPreChar = false; | 381 IsContainPreChar = false; |
| 376 IsAddLineFeed = true; | 382 IsAddLineFeed = true; |
| 377 } | 383 } |
| 378 } | 384 } |
| 379 return strText; | 385 return strText; |
| 380 } | 386 } |
| 387 | |
| 381 void CPDF_TextPage::GetRectsArrayByRect(const CFX_FloatRect& rect, | 388 void CPDF_TextPage::GetRectsArrayByRect(const CFX_FloatRect& rect, |
| 382 CFX_RectArray& resRectArray) const { | 389 CFX_RectArray& resRectArray) const { |
| 383 if (!m_bIsParsed) | 390 if (!m_bIsParsed) |
| 384 return; | 391 return; |
| 385 | 392 |
| 386 CFX_FloatRect curRect; | 393 CFX_FloatRect curRect; |
| 387 bool flagNewRect = true; | 394 bool flagNewRect = true; |
| 388 CPDF_TextObject* pCurObj = nullptr; | 395 CPDF_TextObject* pCurObj = nullptr; |
| 389 for (auto info_curchar : m_CharList) { | 396 for (auto info_curchar : m_CharList) { |
| 390 if (info_curchar.m_Flag == FPDFTEXT_CHAR_GENERATED) { | 397 if (info_curchar.m_Flag == FPDFTEXT_CHAR_GENERATED) { |
| (...skipping 17 matching lines...) Expand all Loading... | |
| 408 } else { | 415 } else { |
| 409 info_curchar.m_CharBox.Normalize(); | 416 info_curchar.m_CharBox.Normalize(); |
| 410 curRect.left = std::min(curRect.left, info_curchar.m_CharBox.left); | 417 curRect.left = std::min(curRect.left, info_curchar.m_CharBox.left); |
| 411 curRect.bottom = std::min(curRect.bottom, info_curchar.m_CharBox.bottom); | 418 curRect.bottom = std::min(curRect.bottom, info_curchar.m_CharBox.bottom); |
| 412 curRect.right = std::max(curRect.right, info_curchar.m_CharBox.right); | 419 curRect.right = std::max(curRect.right, info_curchar.m_CharBox.right); |
| 413 curRect.top = std::max(curRect.top, info_curchar.m_CharBox.top); | 420 curRect.top = std::max(curRect.top, info_curchar.m_CharBox.top); |
| 414 } | 421 } |
| 415 } | 422 } |
| 416 resRectArray.Add(curRect); | 423 resRectArray.Add(curRect); |
| 417 } | 424 } |
| 425 | |
| 418 int CPDF_TextPage::GetIndexAtPos(FX_FLOAT x, | 426 int CPDF_TextPage::GetIndexAtPos(FX_FLOAT x, |
| 419 FX_FLOAT y, | 427 FX_FLOAT y, |
| 420 FX_FLOAT xTolerance, | 428 FX_FLOAT xTolerance, |
| 421 FX_FLOAT yTolerance) const { | 429 FX_FLOAT yTolerance) const { |
| 422 CPDF_Point point(x, y); | 430 CPDF_Point point(x, y); |
| 423 return GetIndexAtPos(point, xTolerance, yTolerance); | 431 return GetIndexAtPos(point, xTolerance, yTolerance); |
| 424 } | 432 } |
| 425 | 433 |
| 426 void CPDF_TextPage::GetCharInfo(int index, FPDF_CHAR_INFO* info) const { | 434 void CPDF_TextPage::GetCharInfo(int index, FPDF_CHAR_INFO* info) const { |
| 427 if (!m_bIsParsed) | 435 if (!m_bIsParsed) |
| (...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 476 endIndex++; | 484 endIndex++; |
| 477 if (endIndex >= pdfium::CollectionSize<int>(m_CharList)) { | 485 if (endIndex >= pdfium::CollectionSize<int>(m_CharList)) { |
| 478 break; | 486 break; |
| 479 } | 487 } |
| 480 charinfo3 = m_CharList[endIndex]; | 488 charinfo3 = m_CharList[endIndex]; |
| 481 } | 489 } |
| 482 endIndex--; | 490 endIndex--; |
| 483 nCount = endIndex - start + 1; | 491 nCount = endIndex - start + 1; |
| 484 } | 492 } |
| 485 } | 493 } |
| 494 | |
| 486 CFX_WideString CPDF_TextPage::GetPageText(int start, int nCount) const { | 495 CFX_WideString CPDF_TextPage::GetPageText(int start, int nCount) const { |
| 487 if (!m_bIsParsed || nCount == 0) | 496 if (!m_bIsParsed || nCount == 0) |
| 488 return L""; | 497 return L""; |
| 489 | 498 |
| 490 if (start < 0) | 499 if (start < 0) |
| 491 start = 0; | 500 start = 0; |
| 492 | 501 |
| 493 if (nCount == -1) { | 502 if (nCount == -1) { |
| 494 nCount = pdfium::CollectionSize<int>(m_CharList) - start; | 503 nCount = pdfium::CollectionSize<int>(m_CharList) - start; |
| 495 return m_TextBuf.GetWideString().Mid(start, | 504 return m_TextBuf.GetWideString().Mid(start, |
| (...skipping 29 matching lines...) Expand all Loading... | |
| 525 return L""; | 534 return L""; |
| 526 } | 535 } |
| 527 charinfo = m_CharList[start + nCount - nCountOffset - 1]; | 536 charinfo = m_CharList[start + nCount - nCountOffset - 1]; |
| 528 } | 537 } |
| 529 nCount = start + nCount - nCountOffset - startindex; | 538 nCount = start + nCount - nCountOffset - startindex; |
| 530 if (nCount <= 0) { | 539 if (nCount <= 0) { |
| 531 return L""; | 540 return L""; |
| 532 } | 541 } |
| 533 return m_TextBuf.GetWideString().Mid(startindex, nCount); | 542 return m_TextBuf.GetWideString().Mid(startindex, nCount); |
| 534 } | 543 } |
| 544 | |
| 535 int CPDF_TextPage::CountRects(int start, int nCount) { | 545 int CPDF_TextPage::CountRects(int start, int nCount) { |
| 536 if (!m_bIsParsed || start < 0) | 546 if (!m_bIsParsed || start < 0) |
| 537 return -1; | 547 return -1; |
| 538 | 548 |
| 539 if (nCount == -1 || | 549 if (nCount == -1 || |
| 540 nCount + start > pdfium::CollectionSize<int>(m_CharList)) { | 550 nCount + start > pdfium::CollectionSize<int>(m_CharList)) { |
| 541 nCount = pdfium::CollectionSize<int>(m_CharList) - start; | 551 nCount = pdfium::CollectionSize<int>(m_CharList) - start; |
| 542 } | 552 } |
| 543 m_SelRects.RemoveAll(); | 553 m_SelRects.RemoveAll(); |
| 544 GetRectArray(start, nCount, m_SelRects); | 554 GetRectArray(start, nCount, m_SelRects); |
| 545 return m_SelRects.GetSize(); | 555 return m_SelRects.GetSize(); |
| 546 } | 556 } |
| 557 | |
| 547 void CPDF_TextPage::GetRect(int rectIndex, | 558 void CPDF_TextPage::GetRect(int rectIndex, |
| 548 FX_FLOAT& left, | 559 FX_FLOAT& left, |
| 549 FX_FLOAT& top, | 560 FX_FLOAT& top, |
| 550 FX_FLOAT& right, | 561 FX_FLOAT& right, |
| 551 FX_FLOAT& bottom) const { | 562 FX_FLOAT& bottom) const { |
| 552 if (!m_bIsParsed) | 563 if (!m_bIsParsed) |
| 553 return; | 564 return; |
| 554 | 565 |
| 555 if (rectIndex < 0 || rectIndex >= m_SelRects.GetSize()) | 566 if (rectIndex < 0 || rectIndex >= m_SelRects.GetSize()) |
| 556 return; | 567 return; |
| (...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 618 FX_BOOL CPDF_TextPage::GetBaselineRotate(int rectIndex, int& Rotate) { | 629 FX_BOOL CPDF_TextPage::GetBaselineRotate(int rectIndex, int& Rotate) { |
| 619 if (!m_bIsParsed) | 630 if (!m_bIsParsed) |
| 620 return FALSE; | 631 return FALSE; |
| 621 | 632 |
| 622 if (rectIndex < 0 || rectIndex >= m_SelRects.GetSize()) | 633 if (rectIndex < 0 || rectIndex >= m_SelRects.GetSize()) |
| 623 return FALSE; | 634 return FALSE; |
| 624 | 635 |
| 625 CFX_FloatRect rect = m_SelRects.GetAt(rectIndex); | 636 CFX_FloatRect rect = m_SelRects.GetAt(rectIndex); |
| 626 return GetBaselineRotate(rect, Rotate); | 637 return GetBaselineRotate(rect, Rotate); |
| 627 } | 638 } |
| 639 | |
| 628 int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left, | 640 int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left, |
| 629 FX_FLOAT top, | 641 FX_FLOAT top, |
| 630 FX_FLOAT right, | 642 FX_FLOAT right, |
| 631 FX_FLOAT bottom, | 643 FX_FLOAT bottom, |
| 632 FX_BOOL bContains) { | 644 FX_BOOL bContains) { |
| 633 m_Segments.RemoveAll(); | 645 m_Segments.RemoveAll(); |
| 634 if (!m_bIsParsed) | 646 if (!m_bIsParsed) |
| 635 return -1; | 647 return -1; |
| 636 | 648 |
| 637 CFX_FloatRect rect(left, bottom, right, top); | 649 CFX_FloatRect rect(left, bottom, right, top); |
| (...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 695 pos++; | 707 pos++; |
| 696 } | 708 } |
| 697 if (segmentStatus == 1) { | 709 if (segmentStatus == 1) { |
| 698 segmentStatus = 2; | 710 segmentStatus = 2; |
| 699 m_Segments.Add(segment); | 711 m_Segments.Add(segment); |
| 700 segment.m_Start = 0; | 712 segment.m_Start = 0; |
| 701 segment.m_nCount = 0; | 713 segment.m_nCount = 0; |
| 702 } | 714 } |
| 703 return m_Segments.GetSize(); | 715 return m_Segments.GetSize(); |
| 704 } | 716 } |
| 717 | |
| 705 void CPDF_TextPage::GetBoundedSegment(int index, int& start, int& count) const { | 718 void CPDF_TextPage::GetBoundedSegment(int index, int& start, int& count) const { |
| 706 if (index < 0 || index >= m_Segments.GetSize()) { | 719 if (index < 0 || index >= m_Segments.GetSize()) { |
| 707 return; | 720 return; |
| 708 } | 721 } |
| 709 start = m_Segments.GetAt(index).m_Start; | 722 start = m_Segments.GetAt(index).m_Start; |
| 710 count = m_Segments.GetAt(index).m_nCount; | 723 count = m_Segments.GetAt(index).m_nCount; |
| 711 } | 724 } |
| 725 | |
| 712 int CPDF_TextPage::GetWordBreak(int index, int direction) const { | 726 int CPDF_TextPage::GetWordBreak(int index, int direction) const { |
| 713 if (!m_bIsParsed) | 727 if (!m_bIsParsed) |
| 714 return -1; | 728 return -1; |
| 715 | 729 |
| 716 if (direction != FPDFTEXT_LEFT && direction != FPDFTEXT_RIGHT) | 730 if (direction != FPDFTEXT_LEFT && direction != FPDFTEXT_RIGHT) |
| 717 return -1; | 731 return -1; |
| 718 | 732 |
| 719 if (index < 0 || index >= pdfium::CollectionSize<int>(m_CharList)) | 733 if (index < 0 || index >= pdfium::CollectionSize<int>(m_CharList)) |
| 720 return -1; | 734 return -1; |
| 721 | 735 |
| (...skipping 11 matching lines...) Expand all Loading... | |
| 733 break; | 747 break; |
| 734 } | 748 } |
| 735 } else if (direction == FPDFTEXT_RIGHT) { | 749 } else if (direction == FPDFTEXT_RIGHT) { |
| 736 while (++breakPos < pdfium::CollectionSize<int>(m_CharList)) { | 750 while (++breakPos < pdfium::CollectionSize<int>(m_CharList)) { |
| 737 if (!IsLetter(m_CharList[breakPos].m_Unicode)) | 751 if (!IsLetter(m_CharList[breakPos].m_Unicode)) |
| 738 break; | 752 break; |
| 739 } | 753 } |
| 740 } | 754 } |
| 741 return breakPos; | 755 return breakPos; |
| 742 } | 756 } |
| 757 | |
| 743 int32_t CPDF_TextPage::FindTextlineFlowDirection() { | 758 int32_t CPDF_TextPage::FindTextlineFlowDirection() { |
| 744 if (!m_pPage) { | 759 if (!m_pPage) { |
| 745 return -1; | 760 return -1; |
| 746 } | 761 } |
| 747 const int32_t nPageWidth = (int32_t)((CPDF_Page*)m_pPage)->GetPageWidth(); | 762 const int32_t nPageWidth = (int32_t)((CPDF_Page*)m_pPage)->GetPageWidth(); |
| 748 const int32_t nPageHeight = (int32_t)((CPDF_Page*)m_pPage)->GetPageHeight(); | 763 const int32_t nPageHeight = (int32_t)((CPDF_Page*)m_pPage)->GetPageHeight(); |
| 749 std::vector<uint8_t> nHorizontalMask(nPageWidth); | 764 std::vector<uint8_t> nHorizontalMask(nPageWidth); |
| 750 std::vector<uint8_t> nVerticalMask(nPageHeight); | 765 std::vector<uint8_t> nVerticalMask(nPageHeight); |
| 751 uint8_t* pDataH = nHorizontalMask.data(); | 766 uint8_t* pDataH = nHorizontalMask.data(); |
| 752 uint8_t* pDataV = nVerticalMask.data(); | 767 uint8_t* pDataV = nVerticalMask.data(); |
| (...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 830 return 0; | 845 return 0; |
| 831 } | 846 } |
| 832 if (nSumH - nSumV > 0.0f) { | 847 if (nSumH - nSumV > 0.0f) { |
| 833 return 0; | 848 return 0; |
| 834 } | 849 } |
| 835 if (nSumV - nSumH > 0.0f) { | 850 if (nSumV - nSumH > 0.0f) { |
| 836 return 1; | 851 return 1; |
| 837 } | 852 } |
| 838 return -1; | 853 return -1; |
| 839 } | 854 } |
| 855 | |
| 840 void CPDF_TextPage::ProcessObject() { | 856 void CPDF_TextPage::ProcessObject() { |
| 841 CPDF_PageObject* pPageObj = NULL; | 857 CPDF_PageObject* pPageObj = NULL; |
| 842 if (!m_pPage) { | 858 if (!m_pPage) { |
| 843 return; | 859 return; |
| 844 } | 860 } |
| 845 FX_POSITION pos; | 861 FX_POSITION pos; |
| 846 pos = m_pPage->GetFirstObjectPosition(); | 862 pos = m_pPage->GetFirstObjectPosition(); |
| 847 if (!pos) { | 863 if (!pos) { |
| 848 return; | 864 return; |
| 849 } | 865 } |
| (...skipping 13 matching lines...) Expand all Loading... | |
| 863 } | 879 } |
| 864 pPageObj = NULL; | 880 pPageObj = NULL; |
| 865 } | 881 } |
| 866 int count = m_LineObj.GetSize(); | 882 int count = m_LineObj.GetSize(); |
| 867 for (int i = 0; i < count; i++) { | 883 for (int i = 0; i < count; i++) { |
| 868 ProcessTextObject(m_LineObj.GetAt(i)); | 884 ProcessTextObject(m_LineObj.GetAt(i)); |
| 869 } | 885 } |
| 870 m_LineObj.RemoveAll(); | 886 m_LineObj.RemoveAll(); |
| 871 CloseTempLine(); | 887 CloseTempLine(); |
| 872 } | 888 } |
| 889 | |
| 873 void CPDF_TextPage::ProcessFormObject(CPDF_FormObject* pFormObj, | 890 void CPDF_TextPage::ProcessFormObject(CPDF_FormObject* pFormObj, |
| 874 const CFX_Matrix& formMatrix) { | 891 const CFX_Matrix& formMatrix) { |
| 875 CPDF_PageObject* pPageObj = NULL; | 892 CPDF_PageObject* pPageObj = NULL; |
| 876 FX_POSITION pos; | 893 FX_POSITION pos; |
| 877 if (!pFormObj) { | 894 if (!pFormObj) { |
| 878 return; | 895 return; |
| 879 } | 896 } |
| 880 pos = pFormObj->m_pForm->GetFirstObjectPosition(); | 897 pos = pFormObj->m_pForm->GetFirstObjectPosition(); |
| 881 if (!pos) { | 898 if (!pos) { |
| 882 return; | 899 return; |
| 883 } | 900 } |
| 884 CFX_Matrix curFormMatrix; | 901 CFX_Matrix curFormMatrix; |
| 885 curFormMatrix.Copy(pFormObj->m_FormMatrix); | 902 curFormMatrix.Copy(pFormObj->m_FormMatrix); |
| 886 curFormMatrix.Concat(formMatrix); | 903 curFormMatrix.Concat(formMatrix); |
| 887 while (pos) { | 904 while (pos) { |
| 888 pPageObj = pFormObj->m_pForm->GetNextObject(pos); | 905 pPageObj = pFormObj->m_pForm->GetNextObject(pos); |
| 889 if (pPageObj) { | 906 if (pPageObj) { |
| 890 if (pPageObj->m_Type == CPDF_PageObject::TEXT) { | 907 if (pPageObj->m_Type == CPDF_PageObject::TEXT) { |
| 891 ProcessTextObject((CPDF_TextObject*)pPageObj, curFormMatrix, pos); | 908 ProcessTextObject((CPDF_TextObject*)pPageObj, curFormMatrix, pos); |
| 892 } else if (pPageObj->m_Type == CPDF_PageObject::FORM) { | 909 } else if (pPageObj->m_Type == CPDF_PageObject::FORM) { |
| 893 ProcessFormObject((CPDF_FormObject*)pPageObj, curFormMatrix); | 910 ProcessFormObject((CPDF_FormObject*)pPageObj, curFormMatrix); |
| 894 } | 911 } |
| 895 } | 912 } |
| 896 pPageObj = NULL; | 913 pPageObj = NULL; |
| 897 } | 914 } |
| 898 } | 915 } |
| 916 | |
| 899 int CPDF_TextPage::GetCharWidth(FX_DWORD charCode, CPDF_Font* pFont) const { | 917 int CPDF_TextPage::GetCharWidth(FX_DWORD charCode, CPDF_Font* pFont) const { |
| 900 if (charCode == -1) { | 918 if (charCode == -1) { |
| 901 return 0; | 919 return 0; |
| 902 } | 920 } |
| 903 int w = pFont->GetCharWidthF(charCode); | 921 int w = pFont->GetCharWidthF(charCode); |
| 904 if (w == 0) { | 922 if (w == 0) { |
| 905 CFX_ByteString str; | 923 CFX_ByteString str; |
| 906 pFont->AppendChar(str, charCode); | 924 pFont->AppendChar(str, charCode); |
| 907 w = pFont->GetStringWidth(str, 1); | 925 w = pFont->GetStringWidth(str, 1); |
| 908 if (w == 0) { | 926 if (w == 0) { |
| 909 FX_RECT BBox; | 927 FX_RECT BBox; |
| 910 pFont->GetCharBBox(charCode, BBox); | 928 pFont->GetCharBBox(charCode, BBox); |
| 911 w = BBox.right - BBox.left; | 929 w = BBox.right - BBox.left; |
| 912 } | 930 } |
| 913 } | 931 } |
| 914 return w; | 932 return w; |
| 915 } | 933 } |
| 934 | |
| 916 void CPDF_TextPage::OnPiece(CFX_BidiChar* pBidi, CFX_WideString& str) { | 935 void CPDF_TextPage::OnPiece(CFX_BidiChar* pBidi, CFX_WideString& str) { |
| 917 CFX_BidiChar::Segment seg = pBidi->GetSegmentInfo(); | 936 CFX_BidiChar::Segment seg = pBidi->GetSegmentInfo(); |
| 918 if (seg.direction == CFX_BidiChar::RIGHT) { | 937 if (seg.direction == CFX_BidiChar::RIGHT) { |
| 919 for (int i = seg.start + seg.count; i > seg.start; i--) { | 938 for (int i = seg.start + seg.count; i > seg.start; i--) { |
| 920 m_TextBuf.AppendChar(str.GetAt(i - i)); | 939 m_TextBuf.AppendChar(str.GetAt(i - i)); |
| 921 m_CharList.push_back(m_TempCharList[i - 1]); | 940 m_CharList.push_back(m_TempCharList[i - 1]); |
| 922 } | 941 } |
| 923 } else { | 942 } else { |
| 924 for (int i = seg.start; i < seg.start + seg.count; i++) { | 943 for (int i = seg.start; i < seg.start + seg.count; i++) { |
| 925 m_TextBuf.AppendChar(str.GetAt(i)); | 944 m_TextBuf.AppendChar(str.GetAt(i)); |
| 926 m_CharList.push_back(m_TempCharList[i]); | 945 m_CharList.push_back(m_TempCharList[i]); |
| 927 } | 946 } |
| 928 } | 947 } |
| 929 } | 948 } |
| 949 | |
| 930 void CPDF_TextPage::AddCharInfoByLRDirection(FX_WCHAR wChar, | 950 void CPDF_TextPage::AddCharInfoByLRDirection(FX_WCHAR wChar, |
| 931 PAGECHAR_INFO info) { | 951 PAGECHAR_INFO info) { |
| 932 if (!IsControlChar(info)) { | 952 if (!IsControlChar(info)) { |
| 933 info.m_Index = m_TextBuf.GetLength(); | 953 info.m_Index = m_TextBuf.GetLength(); |
| 934 if (wChar >= 0xFB00 && wChar <= 0xFB06) { | 954 if (wChar >= 0xFB00 && wChar <= 0xFB06) { |
| 935 FX_WCHAR* pDst = NULL; | 955 FX_WCHAR* pDst = NULL; |
| 936 FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst); | 956 FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst); |
| 937 if (nCount >= 1) { | 957 if (nCount >= 1) { |
| 938 pDst = FX_Alloc(FX_WCHAR, nCount); | 958 pDst = FX_Alloc(FX_WCHAR, nCount); |
| 939 FX_Unicode_GetNormalization(wChar, pDst); | 959 FX_Unicode_GetNormalization(wChar, pDst); |
| 940 for (int nIndex = 0; nIndex < nCount; nIndex++) { | 960 for (int nIndex = 0; nIndex < nCount; nIndex++) { |
| 941 PAGECHAR_INFO info2 = info; | 961 PAGECHAR_INFO info2 = info; |
| 942 info2.m_Unicode = pDst[nIndex]; | 962 info2.m_Unicode = pDst[nIndex]; |
| 943 info2.m_Flag = FPDFTEXT_CHAR_PIECE; | 963 info2.m_Flag = FPDFTEXT_CHAR_PIECE; |
| 944 m_TextBuf.AppendChar(info2.m_Unicode); | 964 m_TextBuf.AppendChar(info2.m_Unicode); |
| 945 m_CharList.push_back(info2); | 965 m_CharList.push_back(info2); |
| 946 } | 966 } |
| 947 FX_Free(pDst); | 967 FX_Free(pDst); |
| 948 return; | 968 return; |
| 949 } | 969 } |
| 950 } | 970 } |
| 951 m_TextBuf.AppendChar(wChar); | 971 m_TextBuf.AppendChar(wChar); |
| 952 } else { | 972 } else { |
| 953 info.m_Index = -1; | 973 info.m_Index = -1; |
| 954 } | 974 } |
| 955 m_CharList.push_back(info); | 975 m_CharList.push_back(info); |
| 956 } | 976 } |
| 977 | |
| 957 void CPDF_TextPage::AddCharInfoByRLDirection(FX_WCHAR wChar, | 978 void CPDF_TextPage::AddCharInfoByRLDirection(FX_WCHAR wChar, |
| 958 PAGECHAR_INFO info) { | 979 PAGECHAR_INFO info) { |
| 959 if (!IsControlChar(info)) { | 980 if (!IsControlChar(info)) { |
| 960 info.m_Index = m_TextBuf.GetLength(); | 981 info.m_Index = m_TextBuf.GetLength(); |
| 961 wChar = FX_GetMirrorChar(wChar, TRUE, FALSE); | 982 wChar = FX_GetMirrorChar(wChar, TRUE, FALSE); |
| 962 FX_WCHAR* pDst = NULL; | 983 FX_WCHAR* pDst = NULL; |
| 963 FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst); | 984 FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst); |
| 964 if (nCount >= 1) { | 985 if (nCount >= 1) { |
| 965 pDst = FX_Alloc(FX_WCHAR, nCount); | 986 pDst = FX_Alloc(FX_WCHAR, nCount); |
| 966 FX_Unicode_GetNormalization(wChar, pDst); | 987 FX_Unicode_GetNormalization(wChar, pDst); |
| (...skipping 132 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1099 } | 1120 } |
| 1100 if (i < 0) { | 1121 if (i < 0) { |
| 1101 m_LineObj.InsertAt(0, Obj); | 1122 m_LineObj.InsertAt(0, Obj); |
| 1102 } | 1123 } |
| 1103 } | 1124 } |
| 1104 | 1125 |
| 1105 int32_t CPDF_TextPage::PreMarkedContent(PDFTEXT_Obj Obj) { | 1126 int32_t CPDF_TextPage::PreMarkedContent(PDFTEXT_Obj Obj) { |
| 1106 CPDF_TextObject* pTextObj = Obj.m_pTextObj; | 1127 CPDF_TextObject* pTextObj = Obj.m_pTextObj; |
| 1107 CPDF_ContentMarkData* pMarkData = | 1128 CPDF_ContentMarkData* pMarkData = |
| 1108 (CPDF_ContentMarkData*)pTextObj->m_ContentMark.GetObject(); | 1129 (CPDF_ContentMarkData*)pTextObj->m_ContentMark.GetObject(); |
| 1109 if (!pMarkData) { | 1130 if (!pMarkData) |
| 1110 return FPDFTEXT_MC_PASS; | 1131 return FPDFTEXT_MC_PASS; |
| 1111 } | 1132 |
| 1112 int nContentMark = pMarkData->CountItems(); | 1133 int nContentMark = pMarkData->CountItems(); |
| 1113 if (nContentMark < 1) { | 1134 if (nContentMark < 1) |
| 1114 return FPDFTEXT_MC_PASS; | 1135 return FPDFTEXT_MC_PASS; |
| 1115 } | |
| 1116 CFX_WideString actText; | 1136 CFX_WideString actText; |
| 1117 FX_BOOL bExist = FALSE; | 1137 FX_BOOL bExist = FALSE; |
| 1118 CPDF_Dictionary* pDict = NULL; | 1138 CPDF_Dictionary* pDict = NULL; |
| 1119 int n = 0; | 1139 int n = 0; |
| 1120 for (n = 0; n < nContentMark; n++) { | 1140 for (n = 0; n < nContentMark; n++) { |
| 1121 CPDF_ContentMarkItem& item = pMarkData->GetItem(n); | 1141 CPDF_ContentMarkItem& item = pMarkData->GetItem(n); |
| 1122 CFX_ByteString tagStr = (CFX_ByteString)item.GetName(); | 1142 if (item.GetParamType() == CPDF_ContentMarkItem::ParamType::None) |
| 1143 continue; | |
| 1123 pDict = ToDictionary(static_cast<CPDF_Object*>(item.GetParam())); | 1144 pDict = ToDictionary(static_cast<CPDF_Object*>(item.GetParam())); |
|
Lei Zhang
2016/02/12 23:53:27
BTW, can we change GetParam() to return a CPDF_Obj
Wei Li
2016/02/13 01:29:18
Good idea. Now the code looks much cleaner. :)
| |
| 1124 CPDF_String* temp = | 1145 CPDF_String* temp = |
| 1125 ToString(pDict ? pDict->GetElement("ActualText") : nullptr); | 1146 ToString(pDict ? pDict->GetElement("ActualText") : nullptr); |
| 1126 if (temp) { | 1147 if (temp) { |
| 1127 bExist = TRUE; | 1148 bExist = TRUE; |
| 1128 actText = temp->GetUnicodeText(); | 1149 actText = temp->GetUnicodeText(); |
| 1129 } | 1150 } |
| 1130 } | 1151 } |
| 1131 if (!bExist) { | 1152 if (!bExist) |
| 1132 return FPDFTEXT_MC_PASS; | 1153 return FPDFTEXT_MC_PASS; |
| 1133 } | 1154 |
| 1134 if (m_pPreTextObj) { | 1155 if (m_pPreTextObj) { |
| 1135 if (CPDF_ContentMarkData* pPreMarkData = | 1156 CPDF_ContentMarkData* pPreMarkData = |
| 1136 (CPDF_ContentMarkData*)m_pPreTextObj->m_ContentMark.GetObject()) { | 1157 (CPDF_ContentMarkData*)m_pPreTextObj->m_ContentMark.GetObject(); |
| 1137 if (pPreMarkData->CountItems() == n) { | 1158 if (pPreMarkData && pPreMarkData->CountItems() == n && |
| 1138 CPDF_ContentMarkItem& item = pPreMarkData->GetItem(n - 1); | 1159 pDict == pPreMarkData->GetItem(n - 1).GetParam()) { |
| 1139 if (pDict == item.GetParam()) { | 1160 return FPDFTEXT_MC_DONE; |
| 1140 return FPDFTEXT_MC_DONE; | |
| 1141 } | |
| 1142 } | |
| 1143 } | 1161 } |
| 1144 } | 1162 } |
| 1163 FX_STRSIZE nItems = actText.GetLength(); | |
| 1164 if (nItems < 1) | |
| 1165 return FPDFTEXT_MC_PASS; | |
| 1166 | |
| 1145 CPDF_Font* pFont = pTextObj->GetFont(); | 1167 CPDF_Font* pFont = pTextObj->GetFont(); |
| 1146 FX_STRSIZE nItems = actText.GetLength(); | |
| 1147 if (nItems < 1) { | |
| 1148 return FPDFTEXT_MC_PASS; | |
| 1149 } | |
| 1150 bExist = FALSE; | 1168 bExist = FALSE; |
| 1151 for (FX_STRSIZE i = 0; i < nItems; i++) { | 1169 for (FX_STRSIZE i = 0; i < nItems; i++) { |
| 1152 FX_WCHAR wChar = actText.GetAt(i); | 1170 if (-1 != pFont->CharCodeFromUnicode(actText.GetAt(i))) { |
|
Lei Zhang
2016/02/12 23:53:27
Flip this to foo != -1 while we are here?
Wei Li
2016/02/13 01:29:18
Done.
| |
| 1153 if (-1 == pFont->CharCodeFromUnicode(wChar)) { | |
| 1154 continue; | |
| 1155 } else { | |
| 1156 bExist = TRUE; | 1171 bExist = TRUE; |
| 1157 break; | 1172 break; |
| 1158 } | 1173 } |
| 1159 } | 1174 } |
| 1160 if (!bExist) { | 1175 if (!bExist) |
| 1161 return FPDFTEXT_MC_PASS; | 1176 return FPDFTEXT_MC_PASS; |
| 1162 } | 1177 |
| 1163 bExist = FALSE; | 1178 bExist = FALSE; |
| 1164 for (FX_STRSIZE i = 0; i < nItems; i++) { | 1179 for (FX_STRSIZE i = 0; i < nItems; i++) { |
| 1165 FX_WCHAR wChar = actText.GetAt(i); | 1180 FX_WCHAR wChar = actText.GetAt(i); |
| 1166 if ((wChar > 0x80 && wChar < 0xFFFD) || (wChar <= 0x80 && isprint(wChar))) { | 1181 if ((wChar > 0x80 && wChar < 0xFFFD) || (wChar <= 0x80 && isprint(wChar))) { |
| 1167 bExist = TRUE; | 1182 bExist = TRUE; |
| 1168 break; | 1183 break; |
| 1169 } | 1184 } |
| 1170 } | 1185 } |
| 1171 if (!bExist) { | 1186 if (!bExist) |
| 1172 return FPDFTEXT_MC_DONE; | 1187 return FPDFTEXT_MC_DONE; |
| 1173 } | 1188 |
| 1174 return FPDFTEXT_MC_DELAY; | 1189 return FPDFTEXT_MC_DELAY; |
| 1175 } | 1190 } |
| 1191 | |
| 1176 void CPDF_TextPage::ProcessMarkedContent(PDFTEXT_Obj Obj) { | 1192 void CPDF_TextPage::ProcessMarkedContent(PDFTEXT_Obj Obj) { |
| 1177 CPDF_TextObject* pTextObj = Obj.m_pTextObj; | 1193 CPDF_TextObject* pTextObj = Obj.m_pTextObj; |
| 1178 CPDF_ContentMarkData* pMarkData = | 1194 CPDF_ContentMarkData* pMarkData = |
| 1179 (CPDF_ContentMarkData*)pTextObj->m_ContentMark.GetObject(); | 1195 (CPDF_ContentMarkData*)pTextObj->m_ContentMark.GetObject(); |
| 1180 if (!pMarkData) { | 1196 if (!pMarkData) |
| 1181 return; | 1197 return; |
| 1182 } | 1198 |
| 1183 int nContentMark = pMarkData->CountItems(); | 1199 int nContentMark = pMarkData->CountItems(); |
| 1184 if (nContentMark < 1) { | 1200 if (nContentMark < 1) |
| 1185 return; | 1201 return; |
| 1186 } | |
| 1187 CFX_WideString actText; | 1202 CFX_WideString actText; |
| 1188 CPDF_Dictionary* pDict = NULL; | 1203 CPDF_Dictionary* pDict = NULL; |
| 1189 int n = 0; | 1204 for (int n = 0; n < nContentMark; n++) { |
| 1190 for (n = 0; n < nContentMark; n++) { | |
| 1191 CPDF_ContentMarkItem& item = pMarkData->GetItem(n); | 1205 CPDF_ContentMarkItem& item = pMarkData->GetItem(n); |
| 1192 CFX_ByteString tagStr = (CFX_ByteString)item.GetName(); | 1206 if (item.GetParamType() == CPDF_ContentMarkItem::ParamType::None) |
| 1207 continue; | |
| 1193 pDict = ToDictionary(static_cast<CPDF_Object*>(item.GetParam())); | 1208 pDict = ToDictionary(static_cast<CPDF_Object*>(item.GetParam())); |
| 1194 CPDF_String* temp = | 1209 if (pDict) |
| 1195 ToString(pDict ? pDict->GetElement("ActualText") : nullptr); | 1210 actText = pDict->GetUnicodeTextBy("ActualText"); |
| 1196 if (temp) { | |
| 1197 actText = temp->GetUnicodeText(); | |
| 1198 } | |
| 1199 } | 1211 } |
| 1200 FX_STRSIZE nItems = actText.GetLength(); | 1212 FX_STRSIZE nItems = actText.GetLength(); |
| 1201 if (nItems < 1) { | 1213 if (nItems < 1) |
| 1202 return; | 1214 return; |
| 1203 } | 1215 |
| 1204 CPDF_Font* pFont = pTextObj->GetFont(); | 1216 CPDF_Font* pFont = pTextObj->GetFont(); |
| 1205 CFX_Matrix formMatrix = Obj.m_formMatrix; | 1217 CFX_Matrix formMatrix = Obj.m_formMatrix; |
| 1206 CFX_Matrix matrix; | 1218 CFX_Matrix matrix; |
| 1207 pTextObj->GetTextMatrix(&matrix); | 1219 pTextObj->GetTextMatrix(&matrix); |
| 1208 matrix.Concat(formMatrix); | 1220 matrix.Concat(formMatrix); |
| 1209 FX_FLOAT fPosX = pTextObj->GetPosX(); | 1221 FX_FLOAT fPosX = pTextObj->GetPosX(); |
| 1210 FX_FLOAT fPosY = pTextObj->GetPosY(); | 1222 FX_FLOAT fPosY = pTextObj->GetPosY(); |
| 1211 int nCharInfoIndex = m_TextBuf.GetLength(); | 1223 int nCharInfoIndex = m_TextBuf.GetLength(); |
| 1212 CFX_FloatRect charBox; | 1224 CFX_FloatRect charBox; |
| 1213 charBox.top = pTextObj->m_Top; | 1225 charBox.top = pTextObj->m_Top; |
| (...skipping 18 matching lines...) Expand all Loading... | |
| 1232 charinfo.m_pTextObj = pTextObj; | 1244 charinfo.m_pTextObj = pTextObj; |
| 1233 charinfo.m_CharBox.top = charBox.top; | 1245 charinfo.m_CharBox.top = charBox.top; |
| 1234 charinfo.m_CharBox.left = charBox.left; | 1246 charinfo.m_CharBox.left = charBox.left; |
| 1235 charinfo.m_CharBox.right = charBox.right; | 1247 charinfo.m_CharBox.right = charBox.right; |
| 1236 charinfo.m_CharBox.bottom = charBox.bottom; | 1248 charinfo.m_CharBox.bottom = charBox.bottom; |
| 1237 charinfo.m_Matrix.Copy(matrix); | 1249 charinfo.m_Matrix.Copy(matrix); |
| 1238 m_TempTextBuf.AppendChar(wChar); | 1250 m_TempTextBuf.AppendChar(wChar); |
| 1239 m_TempCharList.push_back(charinfo); | 1251 m_TempCharList.push_back(charinfo); |
| 1240 } | 1252 } |
| 1241 } | 1253 } |
| 1254 | |
| 1242 void CPDF_TextPage::FindPreviousTextObject() { | 1255 void CPDF_TextPage::FindPreviousTextObject() { |
| 1243 if (m_TempCharList.empty() && m_CharList.empty()) | 1256 if (m_TempCharList.empty() && m_CharList.empty()) |
| 1244 return; | 1257 return; |
| 1245 | 1258 |
| 1246 PAGECHAR_INFO preChar = | 1259 PAGECHAR_INFO preChar = |
| 1247 m_TempCharList.empty() ? m_CharList.back() : m_TempCharList.back(); | 1260 m_TempCharList.empty() ? m_CharList.back() : m_TempCharList.back(); |
| 1248 | 1261 |
| 1249 if (preChar.m_pTextObj) | 1262 if (preChar.m_pTextObj) |
| 1250 m_pPreTextObj = preChar.m_pTextObj; | 1263 m_pPreTextObj = preChar.m_pTextObj; |
| 1251 } | 1264 } |
| (...skipping 290 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1542 m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1); | 1555 m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1); |
| 1543 m_TempCharList.pop_back(); | 1556 m_TempCharList.pop_back(); |
| 1544 } | 1557 } |
| 1545 } | 1558 } |
| 1546 } | 1559 } |
| 1547 } | 1560 } |
| 1548 if (bIsBidiAndMirrorInverse) { | 1561 if (bIsBidiAndMirrorInverse) { |
| 1549 SwapTempTextBuf(iCharListStartAppend, iBufStartAppend); | 1562 SwapTempTextBuf(iCharListStartAppend, iBufStartAppend); |
| 1550 } | 1563 } |
| 1551 } | 1564 } |
| 1565 | |
| 1552 int32_t CPDF_TextPage::GetTextObjectWritingMode( | 1566 int32_t CPDF_TextPage::GetTextObjectWritingMode( |
| 1553 const CPDF_TextObject* pTextObj) { | 1567 const CPDF_TextObject* pTextObj) { |
| 1554 int32_t nChars = pTextObj->CountChars(); | 1568 int32_t nChars = pTextObj->CountChars(); |
| 1555 if (nChars == 1) { | 1569 if (nChars == 1) { |
| 1556 return m_TextlineDir; | 1570 return m_TextlineDir; |
| 1557 } | 1571 } |
| 1558 CPDF_TextObjectItem first, last; | 1572 CPDF_TextObjectItem first, last; |
| 1559 pTextObj->GetCharInfo(0, &first); | 1573 pTextObj->GetCharInfo(0, &first); |
| 1560 pTextObj->GetCharInfo(nChars - 1, &last); | 1574 pTextObj->GetCharInfo(nChars - 1, &last); |
| 1561 CFX_Matrix textMatrix; | 1575 CFX_Matrix textMatrix; |
| (...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1607 } else { | 1621 } else { |
| 1608 return FALSE; | 1622 return FALSE; |
| 1609 } | 1623 } |
| 1610 if (FPDFTEXT_CHAR_PIECE == preInfo->m_Flag && | 1624 if (FPDFTEXT_CHAR_PIECE == preInfo->m_Flag && |
| 1611 (0xAD == preInfo->m_Unicode || 0x2D == preInfo->m_Unicode)) { | 1625 (0xAD == preInfo->m_Unicode || 0x2D == preInfo->m_Unicode)) { |
| 1612 return TRUE; | 1626 return TRUE; |
| 1613 } | 1627 } |
| 1614 } | 1628 } |
| 1615 return FALSE; | 1629 return FALSE; |
| 1616 } | 1630 } |
| 1631 | |
| 1617 int CPDF_TextPage::ProcessInsertObject(const CPDF_TextObject* pObj, | 1632 int CPDF_TextPage::ProcessInsertObject(const CPDF_TextObject* pObj, |
| 1618 const CFX_Matrix& formMatrix) { | 1633 const CFX_Matrix& formMatrix) { |
| 1619 FindPreviousTextObject(); | 1634 FindPreviousTextObject(); |
| 1620 FX_BOOL bNewline = FALSE; | 1635 FX_BOOL bNewline = FALSE; |
| 1621 int WritingMode = GetTextObjectWritingMode(pObj); | 1636 int WritingMode = GetTextObjectWritingMode(pObj); |
| 1622 if (WritingMode == -1) { | 1637 if (WritingMode == -1) { |
| 1623 WritingMode = GetTextObjectWritingMode(m_pPreTextObj); | 1638 WritingMode = GetTextObjectWritingMode(m_pPreTextObj); |
| 1624 } | 1639 } |
| 1625 CFX_FloatRect this_rect(pObj->m_Left, pObj->m_Bottom, pObj->m_Right, | 1640 CFX_FloatRect this_rect(pObj->m_Left, pObj->m_Bottom, pObj->m_Right, |
| 1626 pObj->m_Top); | 1641 pObj->m_Top); |
| (...skipping 136 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1763 return 1; | 1778 return 1; |
| 1764 } | 1779 } |
| 1765 if ((x - last_pos - last_width) > this_width || | 1780 if ((x - last_pos - last_width) > this_width || |
| 1766 (x - last_pos - this_width) > last_width) { | 1781 (x - last_pos - this_width) > last_width) { |
| 1767 return 1; | 1782 return 1; |
| 1768 } | 1783 } |
| 1769 } | 1784 } |
| 1770 } | 1785 } |
| 1771 return 0; | 1786 return 0; |
| 1772 } | 1787 } |
| 1788 | |
| 1773 FX_BOOL CPDF_TextPage::IsSameTextObject(CPDF_TextObject* pTextObj1, | 1789 FX_BOOL CPDF_TextPage::IsSameTextObject(CPDF_TextObject* pTextObj1, |
| 1774 CPDF_TextObject* pTextObj2) { | 1790 CPDF_TextObject* pTextObj2) { |
| 1775 if (!pTextObj1 || !pTextObj2) { | 1791 if (!pTextObj1 || !pTextObj2) { |
| 1776 return FALSE; | 1792 return FALSE; |
| 1777 } | 1793 } |
| 1778 CFX_FloatRect rcPreObj(pTextObj2->m_Left, pTextObj2->m_Bottom, | 1794 CFX_FloatRect rcPreObj(pTextObj2->m_Left, pTextObj2->m_Bottom, |
| 1779 pTextObj2->m_Right, pTextObj2->m_Top); | 1795 pTextObj2->m_Right, pTextObj2->m_Top); |
| 1780 CFX_FloatRect rcCurObj(pTextObj1->m_Left, pTextObj1->m_Bottom, | 1796 CFX_FloatRect rcCurObj(pTextObj1->m_Left, pTextObj1->m_Bottom, |
| 1781 pTextObj1->m_Right, pTextObj1->m_Top); | 1797 pTextObj1->m_Right, pTextObj1->m_Top); |
| 1782 if (rcPreObj.IsEmpty() && rcCurObj.IsEmpty()) { | 1798 if (rcPreObj.IsEmpty() && rcCurObj.IsEmpty()) { |
| (...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1820 GetCharWidth(itemPer.m_CharCode, pTextObj2->GetFont()) * | 1836 GetCharWidth(itemPer.m_CharCode, pTextObj2->GetFont()) * |
| 1821 pTextObj2->GetFontSize() / 1000 * 0.9 || | 1837 pTextObj2->GetFontSize() / 1000 * 0.9 || |
| 1822 FXSYS_fabs(pTextObj1->GetPosY() - pTextObj2->GetPosY()) > | 1838 FXSYS_fabs(pTextObj1->GetPosY() - pTextObj2->GetPosY()) > |
| 1823 std::max(std::max(rcPreObj.Height(), rcPreObj.Width()), | 1839 std::max(std::max(rcPreObj.Height(), rcPreObj.Width()), |
| 1824 pTextObj2->GetFontSize()) / | 1840 pTextObj2->GetFontSize()) / |
| 1825 8) { | 1841 8) { |
| 1826 return FALSE; | 1842 return FALSE; |
| 1827 } | 1843 } |
| 1828 return TRUE; | 1844 return TRUE; |
| 1829 } | 1845 } |
| 1846 | |
| 1830 FX_BOOL CPDF_TextPage::IsSameAsPreTextObject(CPDF_TextObject* pTextObj, | 1847 FX_BOOL CPDF_TextPage::IsSameAsPreTextObject(CPDF_TextObject* pTextObj, |
| 1831 FX_POSITION ObjPos) { | 1848 FX_POSITION ObjPos) { |
| 1832 if (!pTextObj) { | 1849 if (!pTextObj) { |
| 1833 return FALSE; | 1850 return FALSE; |
| 1834 } | 1851 } |
| 1835 int i = 0; | 1852 int i = 0; |
| 1836 if (!ObjPos) { | 1853 if (!ObjPos) { |
| 1837 ObjPos = m_pPage->GetLastObjectPosition(); | 1854 ObjPos = m_pPage->GetLastObjectPosition(); |
| 1838 } | 1855 } |
| 1839 CPDF_PageObject* pObj = m_pPage->GetPrevObject(ObjPos); | 1856 CPDF_PageObject* pObj = m_pPage->GetPrevObject(ObjPos); |
| (...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1883 info.m_OriginY); | 1900 info.m_OriginY); |
| 1884 return TRUE; | 1901 return TRUE; |
| 1885 } | 1902 } |
| 1886 | 1903 |
| 1887 FX_BOOL CPDF_TextPage::IsRectIntersect(const CFX_FloatRect& rect1, | 1904 FX_BOOL CPDF_TextPage::IsRectIntersect(const CFX_FloatRect& rect1, |
| 1888 const CFX_FloatRect& rect2) { | 1905 const CFX_FloatRect& rect2) { |
| 1889 CFX_FloatRect rect = rect1; | 1906 CFX_FloatRect rect = rect1; |
| 1890 rect.Intersect(rect2); | 1907 rect.Intersect(rect2); |
| 1891 return !rect.IsEmpty(); | 1908 return !rect.IsEmpty(); |
| 1892 } | 1909 } |
| 1910 | |
| 1893 FX_BOOL CPDF_TextPage::IsLetter(FX_WCHAR unicode) { | 1911 FX_BOOL CPDF_TextPage::IsLetter(FX_WCHAR unicode) { |
| 1894 if (unicode < L'A') { | 1912 if (unicode < L'A') { |
| 1895 return FALSE; | 1913 return FALSE; |
| 1896 } | 1914 } |
| 1897 if (unicode > L'Z' && unicode < L'a') { | 1915 if (unicode > L'Z' && unicode < L'a') { |
| 1898 return FALSE; | 1916 return FALSE; |
| 1899 } | 1917 } |
| 1900 if (unicode > L'z') { | 1918 if (unicode > L'z') { |
| 1901 return FALSE; | 1919 return FALSE; |
| 1902 } | 1920 } |
| 1903 return TRUE; | 1921 return TRUE; |
| 1904 } | 1922 } |
| 1923 | |
| 1905 CPDF_TextPageFind::CPDF_TextPageFind(const IPDF_TextPage* pTextPage) | 1924 CPDF_TextPageFind::CPDF_TextPageFind(const IPDF_TextPage* pTextPage) |
| 1906 : m_pTextPage(pTextPage), | 1925 : m_pTextPage(pTextPage), |
| 1907 m_flags(0), | 1926 m_flags(0), |
| 1908 m_findNextStart(-1), | 1927 m_findNextStart(-1), |
| 1909 m_findPreStart(-1), | 1928 m_findPreStart(-1), |
| 1910 m_bMatchCase(FALSE), | 1929 m_bMatchCase(FALSE), |
| 1911 m_bMatchWholeWord(FALSE), | 1930 m_bMatchWholeWord(FALSE), |
| 1912 m_resStart(0), | 1931 m_resStart(0), |
| 1913 m_resEnd(-1), | 1932 m_resEnd(-1), |
| 1914 m_IsFind(FALSE) { | 1933 m_IsFind(FALSE) { |
| (...skipping 24 matching lines...) Expand all Loading... | |
| 1939 } else { | 1958 } else { |
| 1940 m_CharIndex.push_back(i + 1); | 1959 m_CharIndex.push_back(i + 1); |
| 1941 } | 1960 } |
| 1942 } | 1961 } |
| 1943 } | 1962 } |
| 1944 int indexSize = pdfium::CollectionSize<int>(m_CharIndex); | 1963 int indexSize = pdfium::CollectionSize<int>(m_CharIndex); |
| 1945 if (indexSize % 2) { | 1964 if (indexSize % 2) { |
| 1946 m_CharIndex.erase(m_CharIndex.begin() + indexSize - 1); | 1965 m_CharIndex.erase(m_CharIndex.begin() + indexSize - 1); |
| 1947 } | 1966 } |
| 1948 } | 1967 } |
| 1968 | |
| 1949 int CPDF_TextPageFind::GetCharIndex(int index) const { | 1969 int CPDF_TextPageFind::GetCharIndex(int index) const { |
| 1950 return m_pTextPage->CharIndexFromTextIndex(index); | 1970 return m_pTextPage->CharIndexFromTextIndex(index); |
| 1951 } | 1971 } |
| 1972 | |
| 1952 FX_BOOL CPDF_TextPageFind::FindFirst(const CFX_WideString& findwhat, | 1973 FX_BOOL CPDF_TextPageFind::FindFirst(const CFX_WideString& findwhat, |
| 1953 int flags, | 1974 int flags, |
| 1954 int startPos) { | 1975 int startPos) { |
| 1955 if (!m_pTextPage) { | 1976 if (!m_pTextPage) { |
| 1956 return FALSE; | 1977 return FALSE; |
| 1957 } | 1978 } |
| 1958 if (m_strText.IsEmpty() || m_bMatchCase != (flags & FPDFTEXT_MATCHCASE)) { | 1979 if (m_strText.IsEmpty() || m_bMatchCase != (flags & FPDFTEXT_MATCHCASE)) { |
| 1959 m_strText = m_pTextPage->GetPageText(); | 1980 m_strText = m_pTextPage->GetPageText(); |
| 1960 } | 1981 } |
| 1961 CFX_WideString findwhatStr = findwhat; | 1982 CFX_WideString findwhatStr = findwhat; |
| (...skipping 30 matching lines...) Expand all Loading... | |
| 1992 m_csFindWhatArray.Add(findwhatStr); | 2013 m_csFindWhatArray.Add(findwhatStr); |
| 1993 } | 2014 } |
| 1994 if (m_csFindWhatArray.GetSize() <= 0) { | 2015 if (m_csFindWhatArray.GetSize() <= 0) { |
| 1995 return FALSE; | 2016 return FALSE; |
| 1996 } | 2017 } |
| 1997 m_IsFind = TRUE; | 2018 m_IsFind = TRUE; |
| 1998 m_resStart = 0; | 2019 m_resStart = 0; |
| 1999 m_resEnd = -1; | 2020 m_resEnd = -1; |
| 2000 return TRUE; | 2021 return TRUE; |
| 2001 } | 2022 } |
| 2023 | |
| 2002 FX_BOOL CPDF_TextPageFind::FindNext() { | 2024 FX_BOOL CPDF_TextPageFind::FindNext() { |
| 2003 if (!m_pTextPage) { | 2025 if (!m_pTextPage) { |
| 2004 return FALSE; | 2026 return FALSE; |
| 2005 } | 2027 } |
| 2006 m_resArray.RemoveAll(); | 2028 m_resArray.RemoveAll(); |
| 2007 if (m_findNextStart == -1) { | 2029 if (m_findNextStart == -1) { |
| 2008 return FALSE; | 2030 return FALSE; |
| 2009 } | 2031 } |
| 2010 if (m_strText.IsEmpty()) { | 2032 if (m_strText.IsEmpty()) { |
| 2011 m_IsFind = FALSE; | 2033 m_IsFind = FALSE; |
| (...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2099 m_pTextPage->GetRectArray(resStart, resEnd - resStart + 1, m_resArray); | 2121 m_pTextPage->GetRectArray(resStart, resEnd - resStart + 1, m_resArray); |
| 2100 if (m_flags & FPDFTEXT_CONSECUTIVE) { | 2122 if (m_flags & FPDFTEXT_CONSECUTIVE) { |
| 2101 m_findNextStart = m_resStart + 1; | 2123 m_findNextStart = m_resStart + 1; |
| 2102 m_findPreStart = m_resEnd - 1; | 2124 m_findPreStart = m_resEnd - 1; |
| 2103 } else { | 2125 } else { |
| 2104 m_findNextStart = m_resEnd + 1; | 2126 m_findNextStart = m_resEnd + 1; |
| 2105 m_findPreStart = m_resStart - 1; | 2127 m_findPreStart = m_resStart - 1; |
| 2106 } | 2128 } |
| 2107 return m_IsFind; | 2129 return m_IsFind; |
| 2108 } | 2130 } |
| 2131 | |
| 2109 FX_BOOL CPDF_TextPageFind::FindPrev() { | 2132 FX_BOOL CPDF_TextPageFind::FindPrev() { |
| 2110 if (!m_pTextPage) { | 2133 if (!m_pTextPage) { |
| 2111 return FALSE; | 2134 return FALSE; |
| 2112 } | 2135 } |
| 2113 m_resArray.RemoveAll(); | 2136 m_resArray.RemoveAll(); |
| 2114 if (m_strText.IsEmpty() || m_findPreStart < 0) { | 2137 if (m_strText.IsEmpty() || m_findPreStart < 0) { |
| 2115 m_IsFind = FALSE; | 2138 m_IsFind = FALSE; |
| 2116 return m_IsFind; | 2139 return m_IsFind; |
| 2117 } | 2140 } |
| 2118 CPDF_TextPageFind findEngine(m_pTextPage); | 2141 CPDF_TextPageFind findEngine(m_pTextPage); |
| (...skipping 25 matching lines...) Expand all Loading... | |
| 2144 m_pTextPage->GetRectArray(order, MatchedCount, m_resArray); | 2167 m_pTextPage->GetRectArray(order, MatchedCount, m_resArray); |
| 2145 if (m_flags & FPDFTEXT_CONSECUTIVE) { | 2168 if (m_flags & FPDFTEXT_CONSECUTIVE) { |
| 2146 m_findNextStart = m_resStart + 1; | 2169 m_findNextStart = m_resStart + 1; |
| 2147 m_findPreStart = m_resEnd - 1; | 2170 m_findPreStart = m_resEnd - 1; |
| 2148 } else { | 2171 } else { |
| 2149 m_findNextStart = m_resEnd + 1; | 2172 m_findNextStart = m_resEnd + 1; |
| 2150 m_findPreStart = m_resStart - 1; | 2173 m_findPreStart = m_resStart - 1; |
| 2151 } | 2174 } |
| 2152 return m_IsFind; | 2175 return m_IsFind; |
| 2153 } | 2176 } |
| 2177 | |
| 2154 void CPDF_TextPageFind::ExtractFindWhat(const CFX_WideString& findwhat) { | 2178 void CPDF_TextPageFind::ExtractFindWhat(const CFX_WideString& findwhat) { |
| 2155 if (findwhat.IsEmpty()) { | 2179 if (findwhat.IsEmpty()) { |
| 2156 return; | 2180 return; |
| 2157 } | 2181 } |
| 2158 int index = 0; | 2182 int index = 0; |
| 2159 while (1) { | 2183 while (1) { |
| 2160 CFX_WideString csWord = TEXT_EMPTY; | 2184 CFX_WideString csWord = TEXT_EMPTY; |
| 2161 int ret = | 2185 int ret = |
| 2162 ExtractSubString(csWord, findwhat.c_str(), index, TEXT_BLANK_CHAR); | 2186 ExtractSubString(csWord, findwhat.c_str(), index, TEXT_BLANK_CHAR); |
| 2163 if (csWord.IsEmpty()) { | 2187 if (csWord.IsEmpty()) { |
| (...skipping 28 matching lines...) Expand all Loading... | |
| 2192 continue; | 2216 continue; |
| 2193 } | 2217 } |
| 2194 pos++; | 2218 pos++; |
| 2195 } | 2219 } |
| 2196 if (!csWord.IsEmpty()) { | 2220 if (!csWord.IsEmpty()) { |
| 2197 m_csFindWhatArray.Add(csWord); | 2221 m_csFindWhatArray.Add(csWord); |
| 2198 } | 2222 } |
| 2199 index++; | 2223 index++; |
| 2200 } | 2224 } |
| 2201 } | 2225 } |
| 2226 | |
| 2202 FX_BOOL CPDF_TextPageFind::IsMatchWholeWord(const CFX_WideString& csPageText, | 2227 FX_BOOL CPDF_TextPageFind::IsMatchWholeWord(const CFX_WideString& csPageText, |
| 2203 int startPos, | 2228 int startPos, |
| 2204 int endPos) { | 2229 int endPos) { |
| 2205 FX_WCHAR char_left = 0; | 2230 FX_WCHAR char_left = 0; |
| 2206 FX_WCHAR char_right = 0; | 2231 FX_WCHAR char_right = 0; |
| 2207 int char_count = endPos - startPos + 1; | 2232 int char_count = endPos - startPos + 1; |
| 2208 if (char_count < 1) { | 2233 if (char_count < 1) { |
| 2209 return FALSE; | 2234 return FALSE; |
| 2210 } | 2235 } |
| 2211 if (char_count == 1 && csPageText.GetAt(startPos) > 255) { | 2236 if (char_count == 1 && csPageText.GetAt(startPos) > 255) { |
| (...skipping 26 matching lines...) Expand all Loading... | |
| 2238 char_left <= L'9') { | 2263 char_left <= L'9') { |
| 2239 return FALSE; | 2264 return FALSE; |
| 2240 } | 2265 } |
| 2241 if (csPageText.GetAt(endPos) >= L'0' && csPageText.GetAt(endPos) <= L'9' && | 2266 if (csPageText.GetAt(endPos) >= L'0' && csPageText.GetAt(endPos) <= L'9' && |
| 2242 char_right >= L'0' && char_right <= L'9') { | 2267 char_right >= L'0' && char_right <= L'9') { |
| 2243 return FALSE; | 2268 return FALSE; |
| 2244 } | 2269 } |
| 2245 } | 2270 } |
| 2246 return TRUE; | 2271 return TRUE; |
| 2247 } | 2272 } |
| 2273 | |
| 2248 FX_BOOL CPDF_TextPageFind::ExtractSubString(CFX_WideString& rString, | 2274 FX_BOOL CPDF_TextPageFind::ExtractSubString(CFX_WideString& rString, |
| 2249 const FX_WCHAR* lpszFullString, | 2275 const FX_WCHAR* lpszFullString, |
| 2250 int iSubString, | 2276 int iSubString, |
| 2251 FX_WCHAR chSep) { | 2277 FX_WCHAR chSep) { |
| 2252 if (!lpszFullString) { | 2278 if (!lpszFullString) { |
| 2253 return FALSE; | 2279 return FALSE; |
| 2254 } | 2280 } |
| 2255 while (iSubString--) { | 2281 while (iSubString--) { |
| 2256 lpszFullString = FXSYS_wcschr(lpszFullString, chSep); | 2282 lpszFullString = FXSYS_wcschr(lpszFullString, chSep); |
| 2257 if (!lpszFullString) { | 2283 if (!lpszFullString) { |
| 2258 rString.Empty(); | 2284 rString.Empty(); |
| 2259 return FALSE; | 2285 return FALSE; |
| 2260 } | 2286 } |
| 2261 lpszFullString++; | 2287 lpszFullString++; |
| 2262 while (*lpszFullString == chSep) { | 2288 while (*lpszFullString == chSep) { |
| 2263 lpszFullString++; | 2289 lpszFullString++; |
| 2264 } | 2290 } |
| 2265 } | 2291 } |
| 2266 const FX_WCHAR* lpchEnd = FXSYS_wcschr(lpszFullString, chSep); | 2292 const FX_WCHAR* lpchEnd = FXSYS_wcschr(lpszFullString, chSep); |
| 2267 int nLen = lpchEnd ? (int)(lpchEnd - lpszFullString) | 2293 int nLen = lpchEnd ? (int)(lpchEnd - lpszFullString) |
| 2268 : (int)FXSYS_wcslen(lpszFullString); | 2294 : (int)FXSYS_wcslen(lpszFullString); |
| 2269 ASSERT(nLen >= 0); | 2295 ASSERT(nLen >= 0); |
| 2270 FXSYS_memcpy(rString.GetBuffer(nLen), lpszFullString, | 2296 FXSYS_memcpy(rString.GetBuffer(nLen), lpszFullString, |
| 2271 nLen * sizeof(FX_WCHAR)); | 2297 nLen * sizeof(FX_WCHAR)); |
| 2272 rString.ReleaseBuffer(); | 2298 rString.ReleaseBuffer(); |
| 2273 return TRUE; | 2299 return TRUE; |
| 2274 } | 2300 } |
| 2301 | |
| 2275 CFX_WideString CPDF_TextPageFind::MakeReverse(const CFX_WideString& str) { | 2302 CFX_WideString CPDF_TextPageFind::MakeReverse(const CFX_WideString& str) { |
| 2276 CFX_WideString str2; | 2303 CFX_WideString str2; |
| 2277 str2.Empty(); | 2304 str2.Empty(); |
| 2278 int nlen = str.GetLength(); | 2305 int nlen = str.GetLength(); |
| 2279 for (int i = nlen - 1; i >= 0; i--) { | 2306 for (int i = nlen - 1; i >= 0; i--) { |
| 2280 str2 += str.GetAt(i); | 2307 str2 += str.GetAt(i); |
| 2281 } | 2308 } |
| 2282 return str2; | 2309 return str2; |
| 2283 } | 2310 } |
| 2311 | |
| 2284 void CPDF_TextPageFind::GetRectArray(CFX_RectArray& rects) const { | 2312 void CPDF_TextPageFind::GetRectArray(CFX_RectArray& rects) const { |
| 2285 rects.Copy(m_resArray); | 2313 rects.Copy(m_resArray); |
| 2286 } | 2314 } |
| 2315 | |
| 2287 int CPDF_TextPageFind::GetCurOrder() const { | 2316 int CPDF_TextPageFind::GetCurOrder() const { |
| 2288 return GetCharIndex(m_resStart); | 2317 return GetCharIndex(m_resStart); |
| 2289 } | 2318 } |
| 2319 | |
| 2290 int CPDF_TextPageFind::GetMatchedCount() const { | 2320 int CPDF_TextPageFind::GetMatchedCount() const { |
| 2291 int resStart = GetCharIndex(m_resStart); | 2321 int resStart = GetCharIndex(m_resStart); |
| 2292 int resEnd = GetCharIndex(m_resEnd); | 2322 int resEnd = GetCharIndex(m_resEnd); |
| 2293 return resEnd - resStart + 1; | 2323 return resEnd - resStart + 1; |
| 2294 } | 2324 } |
| 2295 | 2325 |
| 2296 CPDF_LinkExtract::CPDF_LinkExtract() | 2326 CPDF_LinkExtract::CPDF_LinkExtract() |
| 2297 : m_pTextPage(nullptr), m_bIsParsed(false) { | 2327 : m_pTextPage(nullptr), m_bIsParsed(false) { |
| 2298 } | 2328 } |
| 2299 | 2329 |
| (...skipping 18 matching lines...) Expand all Loading... | |
| 2318 | 2348 |
| 2319 void CPDF_LinkExtract::DeleteLinkList() { | 2349 void CPDF_LinkExtract::DeleteLinkList() { |
| 2320 while (m_LinkList.GetSize()) { | 2350 while (m_LinkList.GetSize()) { |
| 2321 CPDF_LinkExt* linkinfo = NULL; | 2351 CPDF_LinkExt* linkinfo = NULL; |
| 2322 linkinfo = m_LinkList.GetAt(0); | 2352 linkinfo = m_LinkList.GetAt(0); |
| 2323 m_LinkList.RemoveAt(0); | 2353 m_LinkList.RemoveAt(0); |
| 2324 delete linkinfo; | 2354 delete linkinfo; |
| 2325 } | 2355 } |
| 2326 m_LinkList.RemoveAll(); | 2356 m_LinkList.RemoveAll(); |
| 2327 } | 2357 } |
| 2358 | |
| 2328 int CPDF_LinkExtract::CountLinks() const { | 2359 int CPDF_LinkExtract::CountLinks() const { |
| 2329 if (!m_bIsParsed) { | 2360 if (!m_bIsParsed) { |
| 2330 return -1; | 2361 return -1; |
| 2331 } | 2362 } |
| 2332 return m_LinkList.GetSize(); | 2363 return m_LinkList.GetSize(); |
| 2333 } | 2364 } |
| 2365 | |
| 2334 void CPDF_LinkExtract::ParseLink() { | 2366 void CPDF_LinkExtract::ParseLink() { |
| 2335 int start = 0, pos = 0; | 2367 int start = 0, pos = 0; |
| 2336 int TotalChar = m_pTextPage->CountChars(); | 2368 int TotalChar = m_pTextPage->CountChars(); |
| 2337 while (pos < TotalChar) { | 2369 while (pos < TotalChar) { |
| 2338 FPDF_CHAR_INFO pageChar; | 2370 FPDF_CHAR_INFO pageChar; |
| 2339 m_pTextPage->GetCharInfo(pos, &pageChar); | 2371 m_pTextPage->GetCharInfo(pos, &pageChar); |
| 2340 if (pageChar.m_Flag == CHAR_GENERATED || pageChar.m_Unicode == 0x20 || | 2372 if (pageChar.m_Flag == CHAR_GENERATED || pageChar.m_Unicode == 0x20 || |
| 2341 pos == TotalChar - 1) { | 2373 pos == TotalChar - 1) { |
| 2342 int nCount = pos - start; | 2374 int nCount = pos - start; |
| 2343 if (pos == TotalChar - 1) { | 2375 if (pos == TotalChar - 1) { |
| (...skipping 15 matching lines...) Expand all Loading... | |
| 2359 (CheckWebLink(strBeCheck) || CheckMailLink(strBeCheck))) { | 2391 (CheckWebLink(strBeCheck) || CheckMailLink(strBeCheck))) { |
| 2360 AppendToLinkList(start, nCount, strBeCheck); | 2392 AppendToLinkList(start, nCount, strBeCheck); |
| 2361 } | 2393 } |
| 2362 } | 2394 } |
| 2363 start = ++pos; | 2395 start = ++pos; |
| 2364 } else { | 2396 } else { |
| 2365 pos++; | 2397 pos++; |
| 2366 } | 2398 } |
| 2367 } | 2399 } |
| 2368 } | 2400 } |
| 2401 | |
| 2369 FX_BOOL CPDF_LinkExtract::CheckWebLink(CFX_WideString& strBeCheck) { | 2402 FX_BOOL CPDF_LinkExtract::CheckWebLink(CFX_WideString& strBeCheck) { |
| 2370 CFX_WideString str = strBeCheck; | 2403 CFX_WideString str = strBeCheck; |
| 2371 str.MakeLower(); | 2404 str.MakeLower(); |
| 2372 if (str.Find(L"http://www.") != -1) { | 2405 if (str.Find(L"http://www.") != -1) { |
| 2373 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"http://www.")); | 2406 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"http://www.")); |
| 2374 return TRUE; | 2407 return TRUE; |
| 2375 } | 2408 } |
| 2376 if (str.Find(L"http://") != -1) { | 2409 if (str.Find(L"http://") != -1) { |
| 2377 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"http://")); | 2410 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"http://")); |
| 2378 return TRUE; | 2411 return TRUE; |
| 2379 } | 2412 } |
| 2380 if (str.Find(L"https://www.") != -1) { | 2413 if (str.Find(L"https://www.") != -1) { |
| 2381 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"https://www.")); | 2414 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"https://www.")); |
| 2382 return TRUE; | 2415 return TRUE; |
| 2383 } | 2416 } |
| 2384 if (str.Find(L"https://") != -1) { | 2417 if (str.Find(L"https://") != -1) { |
| 2385 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"https://")); | 2418 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"https://")); |
| 2386 return TRUE; | 2419 return TRUE; |
| 2387 } | 2420 } |
| 2388 if (str.Find(L"www.") != -1) { | 2421 if (str.Find(L"www.") != -1) { |
| 2389 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"www.")); | 2422 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"www.")); |
| 2390 strBeCheck = L"http://" + strBeCheck; | 2423 strBeCheck = L"http://" + strBeCheck; |
| 2391 return TRUE; | 2424 return TRUE; |
| 2392 } | 2425 } |
| 2393 return FALSE; | 2426 return FALSE; |
| 2394 } | 2427 } |
| 2428 | |
| 2395 bool CPDF_LinkExtract::CheckMailLink(CFX_WideString& str) { | 2429 bool CPDF_LinkExtract::CheckMailLink(CFX_WideString& str) { |
| 2396 int aPos = str.Find(L'@'); | 2430 int aPos = str.Find(L'@'); |
| 2397 // Invalid when no '@'. | 2431 // Invalid when no '@'. |
| 2398 if (aPos < 1) { | 2432 if (aPos < 1) { |
| 2399 return FALSE; | 2433 return FALSE; |
| 2400 } | 2434 } |
| 2401 | 2435 |
| 2402 // Check the local part. | 2436 // Check the local part. |
| 2403 int pPos = aPos; // Used to track the position of '@' or '.'. | 2437 int pPos = aPos; // Used to track the position of '@' or '.'. |
| 2404 for (int i = aPos - 1; i >= 0; i--) { | 2438 for (int i = aPos - 1; i >= 0; i--) { |
| (...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2489 return; | 2523 return; |
| 2490 } | 2524 } |
| 2491 CPDF_LinkExt* link = NULL; | 2525 CPDF_LinkExt* link = NULL; |
| 2492 link = m_LinkList.GetAt(index); | 2526 link = m_LinkList.GetAt(index); |
| 2493 if (!link) { | 2527 if (!link) { |
| 2494 return; | 2528 return; |
| 2495 } | 2529 } |
| 2496 start = link->m_Start; | 2530 start = link->m_Start; |
| 2497 count = link->m_Count; | 2531 count = link->m_Count; |
| 2498 } | 2532 } |
| 2533 | |
| 2499 void CPDF_LinkExtract::GetRects(int index, CFX_RectArray& rects) const { | 2534 void CPDF_LinkExtract::GetRects(int index, CFX_RectArray& rects) const { |
| 2500 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { | 2535 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { |
| 2501 return; | 2536 return; |
| 2502 } | 2537 } |
| 2503 CPDF_LinkExt* link = NULL; | 2538 CPDF_LinkExt* link = NULL; |
| 2504 link = m_LinkList.GetAt(index); | 2539 link = m_LinkList.GetAt(index); |
| 2505 if (!link) { | 2540 if (!link) { |
| 2506 return; | 2541 return; |
| 2507 } | 2542 } |
| 2508 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); | 2543 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); |
| 2509 } | 2544 } |
| OLD | NEW |