| OLD | NEW |
| 1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 #include <cctype> | 8 #include <cctype> |
| 9 #include <cwctype> | 9 #include <cwctype> |
| 10 #include <memory> | 10 #include <memory> |
| (...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 70 bAllChar = FALSE; | 70 bAllChar = FALSE; |
| 71 } | 71 } |
| 72 } | 72 } |
| 73 if (baseSpace < 0.0 || (nItems == 3 && !bAllChar)) { | 73 if (baseSpace < 0.0 || (nItems == 3 && !bAllChar)) { |
| 74 baseSpace = 0.0; | 74 baseSpace = 0.0; |
| 75 } | 75 } |
| 76 } | 76 } |
| 77 return baseSpace; | 77 return baseSpace; |
| 78 } | 78 } |
| 79 | 79 |
| 80 const FX_FLOAT kDefaultFontSize = 1.0f; |
| 81 |
| 80 } // namespace | 82 } // namespace |
| 81 | 83 |
| 82 CPDFText_ParseOptions::CPDFText_ParseOptions() | 84 CPDFText_ParseOptions::CPDFText_ParseOptions() |
| 83 : m_bGetCharCodeOnly(FALSE), | 85 : m_bGetCharCodeOnly(FALSE), |
| 84 m_bNormalizeObjs(TRUE), | 86 m_bNormalizeObjs(TRUE), |
| 85 m_bOutputHyphen(FALSE) {} | 87 m_bOutputHyphen(FALSE) {} |
| 86 | 88 |
| 87 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_Page* pPage, | 89 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_Page* pPage, |
| 88 int flags) { | 90 int flags) { |
| 89 return new CPDF_TextPage(pPage, flags); | 91 return new CPDF_TextPage(pPage, flags); |
| (...skipping 361 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 451 int CPDF_TextPage::GetIndexAtPos(FX_FLOAT x, | 453 int CPDF_TextPage::GetIndexAtPos(FX_FLOAT x, |
| 452 FX_FLOAT y, | 454 FX_FLOAT y, |
| 453 FX_FLOAT xTolerance, | 455 FX_FLOAT xTolerance, |
| 454 FX_FLOAT yTolerance) const { | 456 FX_FLOAT yTolerance) const { |
| 455 if (m_ParseOptions.m_bGetCharCodeOnly) { | 457 if (m_ParseOptions.m_bGetCharCodeOnly) { |
| 456 return -3; | 458 return -3; |
| 457 } | 459 } |
| 458 CPDF_Point point(x, y); | 460 CPDF_Point point(x, y); |
| 459 return GetIndexAtPos(point, xTolerance, yTolerance); | 461 return GetIndexAtPos(point, xTolerance, yTolerance); |
| 460 } | 462 } |
| 461 void CPDF_TextPage::GetCharInfo(int index, FPDF_CHAR_INFO& info) const { | 463 |
| 464 void CPDF_TextPage::GetCharInfo(int index, FPDF_CHAR_INFO* info) const { |
| 462 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) | 465 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) |
| 463 return; | 466 return; |
| 464 | 467 |
| 465 if (index < 0 || index >= m_charList.GetSize()) | 468 if (index < 0 || index >= m_charList.GetSize()) |
| 466 return; | 469 return; |
| 467 | 470 |
| 468 PAGECHAR_INFO charinfo; | 471 const PAGECHAR_INFO* charinfo = |
| 469 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index); | 472 static_cast<PAGECHAR_INFO*>(m_charList.GetAt(index)); |
| 470 info.m_Charcode = charinfo.m_CharCode; | 473 info->m_Charcode = charinfo->m_CharCode; |
| 471 info.m_OriginX = charinfo.m_OriginX; | 474 info->m_OriginX = charinfo->m_OriginX; |
| 472 info.m_OriginY = charinfo.m_OriginY; | 475 info->m_OriginY = charinfo->m_OriginY; |
| 473 info.m_Unicode = charinfo.m_Unicode; | 476 info->m_Unicode = charinfo->m_Unicode; |
| 474 info.m_Flag = charinfo.m_Flag; | 477 info->m_Flag = charinfo->m_Flag; |
| 475 info.m_CharBox = charinfo.m_CharBox; | 478 info->m_CharBox = charinfo->m_CharBox; |
| 476 info.m_pTextObj = charinfo.m_pTextObj; | 479 info->m_pTextObj = charinfo->m_pTextObj; |
| 477 if (charinfo.m_pTextObj && charinfo.m_pTextObj->GetFont()) { | 480 if (charinfo->m_pTextObj && charinfo->m_pTextObj->GetFont()) { |
| 478 info.m_FontSize = charinfo.m_pTextObj->GetFontSize(); | 481 info->m_FontSize = charinfo->m_pTextObj->GetFontSize(); |
| 482 } else { |
| 483 info->m_FontSize = kDefaultFontSize; |
| 479 } | 484 } |
| 480 info.m_Matrix.Copy(charinfo.m_Matrix); | 485 info->m_Matrix.Copy(charinfo->m_Matrix); |
| 481 return; | |
| 482 } | 486 } |
| 487 |
| 483 void CPDF_TextPage::CheckMarkedContentObject(int32_t& start, | 488 void CPDF_TextPage::CheckMarkedContentObject(int32_t& start, |
| 484 int32_t& nCount) const { | 489 int32_t& nCount) const { |
| 485 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(start); | 490 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(start); |
| 486 PAGECHAR_INFO charinfo2 = | 491 PAGECHAR_INFO charinfo2 = |
| 487 *(PAGECHAR_INFO*)m_charList.GetAt(start + nCount - 1); | 492 *(PAGECHAR_INFO*)m_charList.GetAt(start + nCount - 1); |
| 488 if (FPDFTEXT_CHAR_PIECE != charinfo.m_Flag && | 493 if (FPDFTEXT_CHAR_PIECE != charinfo.m_Flag && |
| 489 FPDFTEXT_CHAR_PIECE != charinfo2.m_Flag) { | 494 FPDFTEXT_CHAR_PIECE != charinfo2.m_Flag) { |
| 490 return; | 495 return; |
| 491 } | 496 } |
| 492 if (FPDFTEXT_CHAR_PIECE == charinfo.m_Flag) { | 497 if (FPDFTEXT_CHAR_PIECE == charinfo.m_Flag) { |
| (...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 587 return; | 592 return; |
| 588 | 593 |
| 589 if (rectIndex < 0 || rectIndex >= m_SelRects.GetSize()) | 594 if (rectIndex < 0 || rectIndex >= m_SelRects.GetSize()) |
| 590 return; | 595 return; |
| 591 | 596 |
| 592 left = m_SelRects.GetAt(rectIndex).left; | 597 left = m_SelRects.GetAt(rectIndex).left; |
| 593 top = m_SelRects.GetAt(rectIndex).top; | 598 top = m_SelRects.GetAt(rectIndex).top; |
| 594 right = m_SelRects.GetAt(rectIndex).right; | 599 right = m_SelRects.GetAt(rectIndex).right; |
| 595 bottom = m_SelRects.GetAt(rectIndex).bottom; | 600 bottom = m_SelRects.GetAt(rectIndex).bottom; |
| 596 } | 601 } |
| 602 |
| 597 FX_BOOL CPDF_TextPage::GetBaselineRotate(int start, int end, int& Rotate) { | 603 FX_BOOL CPDF_TextPage::GetBaselineRotate(int start, int end, int& Rotate) { |
| 598 if (m_ParseOptions.m_bGetCharCodeOnly) { | 604 if (m_ParseOptions.m_bGetCharCodeOnly) { |
| 599 return FALSE; | 605 return FALSE; |
| 600 } | 606 } |
| 601 if (end == start) { | 607 if (end == start) { |
| 602 return FALSE; | 608 return FALSE; |
| 603 } | 609 } |
| 604 FX_FLOAT dx, dy; | 610 FPDF_CHAR_INFO info_start; |
| 605 FPDF_CHAR_INFO info1, info2; | 611 FPDF_CHAR_INFO info_end; |
| 606 GetCharInfo(start, info1); | 612 GetCharInfo(start, &info_start); |
| 607 GetCharInfo(end, info2); | 613 GetCharInfo(end, &info_end); |
| 608 while (info2.m_CharBox.Width() == 0 || info2.m_CharBox.Height() == 0) { | 614 while (info_end.m_CharBox.Width() == 0 || info_end.m_CharBox.Height() == 0) { |
| 609 end--; | 615 if (--end <= start) |
| 610 if (end <= start) { | |
| 611 return FALSE; | 616 return FALSE; |
| 612 } | 617 |
| 613 GetCharInfo(end, info2); | 618 GetCharInfo(end, &info_end); |
| 614 } | 619 } |
| 615 dx = (info2.m_OriginX - info1.m_OriginX); | 620 FX_FLOAT dx = (info_end.m_OriginX - info_start.m_OriginX); |
| 616 dy = (info2.m_OriginY - info1.m_OriginY); | 621 FX_FLOAT dy = (info_end.m_OriginY - info_start.m_OriginY); |
| 617 if (dx == 0) { | 622 if (dx == 0) { |
| 618 if (dy > 0) { | 623 if (dy > 0) { |
| 619 Rotate = 90; | 624 Rotate = 90; |
| 620 } else if (dy < 0) { | 625 } else if (dy < 0) { |
| 621 Rotate = 270; | 626 Rotate = 270; |
| 622 } else { | 627 } else { |
| 623 Rotate = 0; | 628 Rotate = 0; |
| 624 } | 629 } |
| 625 } else { | 630 } else { |
| 626 float a = FXSYS_atan2(dy, dx); | 631 float a = FXSYS_atan2(dy, dx); |
| 627 Rotate = (int)(a * 180 / FX_PI + 0.5); | 632 Rotate = (int)(a * 180 / FX_PI + 0.5); |
| 628 } | 633 } |
| 629 if (Rotate < 0) { | 634 if (Rotate < 0) { |
| 630 Rotate = -Rotate; | 635 Rotate = -Rotate; |
| 631 } else if (Rotate > 0) { | 636 } else if (Rotate > 0) { |
| 632 Rotate = 360 - Rotate; | 637 Rotate = 360 - Rotate; |
| 633 } | 638 } |
| 634 return TRUE; | 639 return TRUE; |
| 635 } | 640 } |
| 641 |
| 636 FX_BOOL CPDF_TextPage::GetBaselineRotate(const CFX_FloatRect& rect, | 642 FX_BOOL CPDF_TextPage::GetBaselineRotate(const CFX_FloatRect& rect, |
| 637 int& Rotate) { | 643 int& Rotate) { |
| 638 if (m_ParseOptions.m_bGetCharCodeOnly) { | 644 if (m_ParseOptions.m_bGetCharCodeOnly) { |
| 639 return FALSE; | 645 return FALSE; |
| 640 } | 646 } |
| 641 int start, end, count, | 647 int start, end, count, |
| 642 n = CountBoundedSegments(rect.left, rect.top, rect.right, rect.bottom, | 648 n = CountBoundedSegments(rect.left, rect.top, rect.right, rect.bottom, |
| 643 TRUE); | 649 TRUE); |
| 644 if (n < 1) { | 650 if (n < 1) { |
| 645 return FALSE; | 651 return FALSE; |
| (...skipping 1400 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2046 if (pObj->m_Type != PDFPAGE_TEXT) { | 2052 if (pObj->m_Type != PDFPAGE_TEXT) { |
| 2047 continue; | 2053 continue; |
| 2048 } | 2054 } |
| 2049 if (IsSameTextObject((CPDF_TextObject*)pObj, pTextObj)) { | 2055 if (IsSameTextObject((CPDF_TextObject*)pObj, pTextObj)) { |
| 2050 return TRUE; | 2056 return TRUE; |
| 2051 } | 2057 } |
| 2052 i++; | 2058 i++; |
| 2053 } | 2059 } |
| 2054 return FALSE; | 2060 return FALSE; |
| 2055 } | 2061 } |
| 2062 |
| 2056 FX_BOOL CPDF_TextPage::GenerateCharInfo(FX_WCHAR unicode, PAGECHAR_INFO& info) { | 2063 FX_BOOL CPDF_TextPage::GenerateCharInfo(FX_WCHAR unicode, PAGECHAR_INFO& info) { |
| 2057 int size = m_TempCharList.GetSize(); | 2064 int size = m_TempCharList.GetSize(); |
| 2058 PAGECHAR_INFO preChar; | 2065 PAGECHAR_INFO preChar; |
| 2059 if (size) { | 2066 if (size) { |
| 2060 preChar = (PAGECHAR_INFO)m_TempCharList[size - 1]; | 2067 preChar = (PAGECHAR_INFO)m_TempCharList[size - 1]; |
| 2061 } else { | 2068 } else { |
| 2062 size = m_charList.GetSize(); | 2069 size = m_charList.GetSize(); |
| 2063 if (size == 0) { | 2070 if (size == 0) { |
| 2064 return FALSE; | 2071 return FALSE; |
| 2065 } | 2072 } |
| 2066 preChar = (PAGECHAR_INFO)m_charList[size - 1]; | 2073 preChar = (PAGECHAR_INFO)m_charList[size - 1]; |
| 2067 } | 2074 } |
| 2068 info.m_Index = m_TextBuf.GetLength(); | 2075 info.m_Index = m_TextBuf.GetLength(); |
| 2069 info.m_Unicode = unicode; | 2076 info.m_Unicode = unicode; |
| 2070 info.m_pTextObj = NULL; | 2077 info.m_pTextObj = NULL; |
| 2071 info.m_CharCode = -1; | 2078 info.m_CharCode = -1; |
| 2072 info.m_Flag = FPDFTEXT_CHAR_GENERATED; | 2079 info.m_Flag = FPDFTEXT_CHAR_GENERATED; |
| 2073 int preWidth = 0; | 2080 int preWidth = 0; |
| 2074 if (preChar.m_pTextObj && preChar.m_CharCode != (FX_DWORD)-1) { | 2081 if (preChar.m_pTextObj && preChar.m_CharCode != (FX_DWORD)-1) |
| 2075 preWidth = GetCharWidth(preChar.m_CharCode, preChar.m_pTextObj->GetFont()); | 2082 preWidth = GetCharWidth(preChar.m_CharCode, preChar.m_pTextObj->GetFont()); |
| 2076 } | 2083 |
| 2077 FX_FLOAT fs = 0; | 2084 FX_FLOAT fFontSize = preChar.m_pTextObj ? preChar.m_pTextObj->GetFontSize() |
| 2078 if (preChar.m_pTextObj) { | 2085 : preChar.m_CharBox.Height(); |
| 2079 fs = preChar.m_pTextObj->GetFontSize(); | 2086 if (!fFontSize) |
| 2080 } else { | 2087 fFontSize = kDefaultFontSize; |
| 2081 fs = preChar.m_CharBox.Height(); | 2088 |
| 2082 } | 2089 info.m_OriginX = preChar.m_OriginX + preWidth * (fFontSize) / 1000; |
| 2083 if (!fs) { | |
| 2084 fs = 1; | |
| 2085 } | |
| 2086 info.m_OriginX = preChar.m_OriginX + preWidth * (fs) / 1000; | |
| 2087 info.m_OriginY = preChar.m_OriginY; | 2090 info.m_OriginY = preChar.m_OriginY; |
| 2088 info.m_CharBox = CFX_FloatRect(info.m_OriginX, info.m_OriginY, info.m_OriginX, | 2091 info.m_CharBox = CFX_FloatRect(info.m_OriginX, info.m_OriginY, info.m_OriginX, |
| 2089 info.m_OriginY); | 2092 info.m_OriginY); |
| 2090 return TRUE; | 2093 return TRUE; |
| 2091 } | 2094 } |
| 2095 |
| 2092 FX_BOOL CPDF_TextPage::IsRectIntersect(const CFX_FloatRect& rect1, | 2096 FX_BOOL CPDF_TextPage::IsRectIntersect(const CFX_FloatRect& rect1, |
| 2093 const CFX_FloatRect& rect2) { | 2097 const CFX_FloatRect& rect2) { |
| 2094 CFX_FloatRect rect = rect1; | 2098 CFX_FloatRect rect = rect1; |
| 2095 rect.Intersect(rect2); | 2099 rect.Intersect(rect2); |
| 2096 return !rect.IsEmpty(); | 2100 return !rect.IsEmpty(); |
| 2097 } | 2101 } |
| 2098 FX_BOOL CPDF_TextPage::IsLetter(FX_WCHAR unicode) { | 2102 FX_BOOL CPDF_TextPage::IsLetter(FX_WCHAR unicode) { |
| 2099 if (unicode < L'A') { | 2103 if (unicode < L'A') { |
| 2100 return FALSE; | 2104 return FALSE; |
| 2101 } | 2105 } |
| (...skipping 15 matching lines...) Expand all Loading... |
| 2117 m_resStart(0), | 2121 m_resStart(0), |
| 2118 m_resEnd(-1), | 2122 m_resEnd(-1), |
| 2119 m_IsFind(FALSE) { | 2123 m_IsFind(FALSE) { |
| 2120 m_strText = m_pTextPage->GetPageText(); | 2124 m_strText = m_pTextPage->GetPageText(); |
| 2121 int nCount = pTextPage->CountChars(); | 2125 int nCount = pTextPage->CountChars(); |
| 2122 if (nCount) { | 2126 if (nCount) { |
| 2123 m_CharIndex.Add(0); | 2127 m_CharIndex.Add(0); |
| 2124 } | 2128 } |
| 2125 for (int i = 0; i < nCount; i++) { | 2129 for (int i = 0; i < nCount; i++) { |
| 2126 FPDF_CHAR_INFO info; | 2130 FPDF_CHAR_INFO info; |
| 2127 pTextPage->GetCharInfo(i, info); | 2131 pTextPage->GetCharInfo(i, &info); |
| 2128 int indexSize = m_CharIndex.GetSize(); | 2132 int indexSize = m_CharIndex.GetSize(); |
| 2129 if (info.m_Flag == CHAR_NORMAL || info.m_Flag == CHAR_GENERATED) { | 2133 if (info.m_Flag == CHAR_NORMAL || info.m_Flag == CHAR_GENERATED) { |
| 2130 if (indexSize % 2) { | 2134 if (indexSize % 2) { |
| 2131 m_CharIndex.Add(1); | 2135 m_CharIndex.Add(1); |
| 2132 } else { | 2136 } else { |
| 2133 if (indexSize <= 0) { | 2137 if (indexSize <= 0) { |
| 2134 continue; | 2138 continue; |
| 2135 } | 2139 } |
| 2136 m_CharIndex.SetAt(indexSize - 1, m_CharIndex.GetAt(indexSize - 1) + 1); | 2140 m_CharIndex.SetAt(indexSize - 1, m_CharIndex.GetAt(indexSize - 1) + 1); |
| 2137 } | 2141 } |
| (...skipping 405 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2543 if (!m_bIsParsed) { | 2547 if (!m_bIsParsed) { |
| 2544 return -1; | 2548 return -1; |
| 2545 } | 2549 } |
| 2546 return m_LinkList.GetSize(); | 2550 return m_LinkList.GetSize(); |
| 2547 } | 2551 } |
| 2548 void CPDF_LinkExtract::ParseLink() { | 2552 void CPDF_LinkExtract::ParseLink() { |
| 2549 int start = 0, pos = 0; | 2553 int start = 0, pos = 0; |
| 2550 int TotalChar = m_pTextPage->CountChars(); | 2554 int TotalChar = m_pTextPage->CountChars(); |
| 2551 while (pos < TotalChar) { | 2555 while (pos < TotalChar) { |
| 2552 FPDF_CHAR_INFO pageChar; | 2556 FPDF_CHAR_INFO pageChar; |
| 2553 m_pTextPage->GetCharInfo(pos, pageChar); | 2557 m_pTextPage->GetCharInfo(pos, &pageChar); |
| 2554 if (pageChar.m_Flag == CHAR_GENERATED || pageChar.m_Unicode == 0x20 || | 2558 if (pageChar.m_Flag == CHAR_GENERATED || pageChar.m_Unicode == 0x20 || |
| 2555 pos == TotalChar - 1) { | 2559 pos == TotalChar - 1) { |
| 2556 int nCount = pos - start; | 2560 int nCount = pos - start; |
| 2557 if (pos == TotalChar - 1) { | 2561 if (pos == TotalChar - 1) { |
| 2558 nCount++; | 2562 nCount++; |
| 2559 } | 2563 } |
| 2560 CFX_WideString strBeCheck; | 2564 CFX_WideString strBeCheck; |
| 2561 strBeCheck = m_pTextPage->GetPageText(start, nCount); | 2565 strBeCheck = m_pTextPage->GetPageText(start, nCount); |
| 2562 if (strBeCheck.GetLength() > 5) { | 2566 if (strBeCheck.GetLength() > 5) { |
| 2563 while (strBeCheck.GetLength() > 0) { | 2567 while (strBeCheck.GetLength() > 0) { |
| (...skipping 150 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2714 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { | 2718 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { |
| 2715 return; | 2719 return; |
| 2716 } | 2720 } |
| 2717 CPDF_LinkExt* link = NULL; | 2721 CPDF_LinkExt* link = NULL; |
| 2718 link = m_LinkList.GetAt(index); | 2722 link = m_LinkList.GetAt(index); |
| 2719 if (!link) { | 2723 if (!link) { |
| 2720 return; | 2724 return; |
| 2721 } | 2725 } |
| 2722 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); | 2726 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); |
| 2723 } | 2727 } |
| OLD | NEW |