Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
| 6 | 6 |
| 7 #include <ctype.h> | 7 #include <ctype.h> |
| 8 #include <algorithm> | 8 #include <algorithm> |
| 9 | 9 |
| 10 #include "../../../third_party/base/nonstd_unique_ptr.h" | 10 #include "../../../third_party/base/nonstd_unique_ptr.h" |
| (...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 74 } | 74 } |
| 75 return baseSpace; | 75 return baseSpace; |
| 76 } | 76 } |
| 77 | 77 |
| 78 } // namespace | 78 } // namespace |
| 79 | 79 |
| 80 CPDFText_ParseOptions::CPDFText_ParseOptions() | 80 CPDFText_ParseOptions::CPDFText_ParseOptions() |
| 81 : m_bGetCharCodeOnly(FALSE), | 81 : m_bGetCharCodeOnly(FALSE), |
| 82 m_bNormalizeObjs(TRUE), | 82 m_bNormalizeObjs(TRUE), |
| 83 m_bOutputHyphen(FALSE) {} | 83 m_bOutputHyphen(FALSE) {} |
| 84 #ifndef PDF_ENABLE_XFA | |
| 84 | 85 |
| 86 #else | |
| 87 IPDF_TextPage* IPDF_TextPage::CreateTextPage( | |
| 88 const CPDF_Page* pPage, | |
| 89 CPDFText_ParseOptions ParserOptions) { | |
| 90 return new CPDF_TextPage(pPage, ParserOptions); | |
| 91 } | |
| 92 #endif | |
| 85 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_Page* pPage, | 93 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_Page* pPage, |
| 86 int flags) { | 94 int flags) { |
| 87 return new CPDF_TextPage(pPage, flags); | 95 return new CPDF_TextPage(pPage, flags); |
| 88 } | 96 } |
| 97 #ifndef PDF_ENABLE_XFA | |
| 89 | 98 |
| 99 #else | |
| 100 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_PageObjects* pObjs, | |
| 101 int flags) { | |
| 102 return new CPDF_TextPage(pObjs, flags); | |
| 103 } | |
| 104 #endif | |
| 90 IPDF_TextPageFind* IPDF_TextPageFind::CreatePageFind( | 105 IPDF_TextPageFind* IPDF_TextPageFind::CreatePageFind( |
| 91 const IPDF_TextPage* pTextPage) { | 106 const IPDF_TextPage* pTextPage) { |
| 92 return pTextPage ? new CPDF_TextPageFind(pTextPage) : nullptr; | 107 return pTextPage ? new CPDF_TextPageFind(pTextPage) : nullptr; |
| 93 } | 108 } |
| 109 #ifndef PDF_ENABLE_XFA | |
| 94 | 110 |
| 111 #endif | |
| 95 IPDF_LinkExtract* IPDF_LinkExtract::CreateLinkExtract() { | 112 IPDF_LinkExtract* IPDF_LinkExtract::CreateLinkExtract() { |
| 96 return new CPDF_LinkExtract(); | 113 return new CPDF_LinkExtract(); |
| 97 } | 114 } |
| 115 #ifndef PDF_ENABLE_XFA | |
| 98 | 116 |
| 117 #endif | |
| 99 #define TEXT_BLANK_CHAR L' ' | 118 #define TEXT_BLANK_CHAR L' ' |
| 100 #define TEXT_LINEFEED_CHAR L'\n' | 119 #define TEXT_LINEFEED_CHAR L'\n' |
| 101 #define TEXT_RETURN_CHAR L'\r' | 120 #define TEXT_RETURN_CHAR L'\r' |
| 102 #define TEXT_EMPTY L"" | 121 #define TEXT_EMPTY L"" |
| 103 #define TEXT_BLANK L" " | 122 #define TEXT_BLANK L" " |
| 104 #define TEXT_RETURN_LINEFEED L"\r\n" | 123 #define TEXT_RETURN_LINEFEED L"\r\n" |
| 105 #define TEXT_LINEFEED L"\n" | 124 #define TEXT_LINEFEED L"\n" |
| 106 #define TEXT_CHARRATIO_GAPDELTA 0.070 | 125 #define TEXT_CHARRATIO_GAPDELTA 0.070 |
| 107 | 126 |
| 108 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, int flags) | 127 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, int flags) |
| 109 : m_pPage(pPage), | 128 : m_pPage(pPage), |
| 110 m_charList(512), | 129 m_charList(512), |
| 111 m_TempCharList(50), | 130 m_TempCharList(50), |
| 112 m_parserflag(flags), | 131 m_parserflag(flags), |
| 113 m_pPreTextObj(nullptr), | 132 m_pPreTextObj(nullptr), |
| 114 m_bIsParsed(false), | 133 m_bIsParsed(false), |
| 115 m_TextlineDir(-1), | 134 m_TextlineDir(-1), |
| 116 m_CurlineRect(0, 0, 0, 0) { | 135 m_CurlineRect(0, 0, 0, 0) { |
| 117 m_TextBuf.EstimateSize(0, 10240); | 136 m_TextBuf.EstimateSize(0, 10240); |
| 118 pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int)pPage->GetPageWidth(), | 137 pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int)pPage->GetPageWidth(), |
| 119 (int)pPage->GetPageHeight(), 0); | 138 (int)pPage->GetPageHeight(), 0); |
| 120 } | 139 } |
| 121 | 140 |
| 141 #ifdef PDF_ENABLE_XFA | |
| 142 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, | |
| 143 CPDFText_ParseOptions ParserOptions) | |
| 144 : m_ParseOptions(ParserOptions), | |
| 145 m_pPage(pPage), | |
| 146 m_charList(512), | |
| 147 m_TempCharList(50), | |
| 148 m_parserflag(0), | |
| 149 m_pPreTextObj(nullptr), | |
| 150 m_bIsParsed(false), | |
| 151 m_TextlineDir(-1), | |
| 152 m_CurlineRect(0, 0, 0, 0) { | |
| 153 m_TextBuf.EstimateSize(0, 10240); | |
| 154 pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int)pPage->GetPageWidth(), | |
| 155 (int)pPage->GetPageHeight(), 0); | |
| 156 } | |
| 157 | |
| 158 CPDF_TextPage::CPDF_TextPage(const CPDF_PageObjects* pPage, int flags) | |
| 159 : m_pPage(pPage), | |
| 160 m_charList(512), | |
| 161 m_TempCharList(50), | |
| 162 m_parserflag(flags), | |
| 163 m_pPreTextObj(nullptr), | |
| 164 m_bIsParsed(false), | |
| 165 m_TextlineDir(-1), | |
| 166 m_CurlineRect(0, 0, 0, 0) { | |
| 167 m_TextBuf.EstimateSize(0, 10240); | |
| 168 CFX_FloatRect pageRect = pPage->CalcBoundingBox(); | |
| 169 m_DisplayMatrix = CFX_AffineMatrix(1, 0, 0, -1, pageRect.right, pageRect.top); | |
| 170 } | |
| 171 #endif | |
| 122 void CPDF_TextPage::NormalizeObjects(FX_BOOL bNormalize) { | 172 void CPDF_TextPage::NormalizeObjects(FX_BOOL bNormalize) { |
| 123 m_ParseOptions.m_bNormalizeObjs = bNormalize; | 173 m_ParseOptions.m_bNormalizeObjs = bNormalize; |
| 124 } | 174 } |
| 125 bool CPDF_TextPage::IsControlChar(const PAGECHAR_INFO& charInfo) { | 175 bool CPDF_TextPage::IsControlChar(const PAGECHAR_INFO& charInfo) { |
| 126 switch (charInfo.m_Unicode) { | 176 switch (charInfo.m_Unicode) { |
| 127 case 0x2: | 177 case 0x2: |
| 128 case 0x3: | 178 case 0x3: |
| 129 case 0x93: | 179 case 0x93: |
| 130 case 0x94: | 180 case 0x94: |
| 131 case 0x96: | 181 case 0x96: |
| (...skipping 1217 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1349 int nContentMark = pMarkData->CountItems(); | 1399 int nContentMark = pMarkData->CountItems(); |
| 1350 if (nContentMark < 1) { | 1400 if (nContentMark < 1) { |
| 1351 return; | 1401 return; |
| 1352 } | 1402 } |
| 1353 CFX_WideString actText; | 1403 CFX_WideString actText; |
| 1354 CPDF_Dictionary* pDict = NULL; | 1404 CPDF_Dictionary* pDict = NULL; |
| 1355 int n = 0; | 1405 int n = 0; |
| 1356 for (n = 0; n < nContentMark; n++) { | 1406 for (n = 0; n < nContentMark; n++) { |
| 1357 CPDF_ContentMarkItem& item = pMarkData->GetItem(n); | 1407 CPDF_ContentMarkItem& item = pMarkData->GetItem(n); |
| 1358 CFX_ByteString tagStr = (CFX_ByteString)item.GetName(); | 1408 CFX_ByteString tagStr = (CFX_ByteString)item.GetName(); |
| 1409 #ifdef PDF_ENABLE_XFA | |
| 1410 | |
| 1411 #endif | |
| 1359 pDict = ToDictionary(static_cast<CPDF_Object*>(item.GetParam())); | 1412 pDict = ToDictionary(static_cast<CPDF_Object*>(item.GetParam())); |
| 1360 CPDF_String* temp = | 1413 CPDF_String* temp = |
| 1361 ToString(pDict ? pDict->GetElement(FX_BSTRC("ActualText")) : nullptr); | 1414 ToString(pDict ? pDict->GetElement(FX_BSTRC("ActualText")) : nullptr); |
| 1362 if (temp) { | 1415 if (temp) { |
| 1363 actText = temp->GetUnicodeText(); | 1416 actText = temp->GetUnicodeText(); |
| 1364 } | 1417 } |
| 1365 } | 1418 } |
| 1366 FX_STRSIZE nItems = actText.GetLength(); | 1419 FX_STRSIZE nItems = actText.GetLength(); |
| 1367 if (nItems < 1) { | 1420 if (nItems < 1) { |
| 1368 return; | 1421 return; |
| (...skipping 148 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1517 ProcessMarkedContent(Obj); | 1570 ProcessMarkedContent(Obj); |
| 1518 m_pPreTextObj = pTextObj; | 1571 m_pPreTextObj = pTextObj; |
| 1519 m_perMatrix.Copy(formMatrix); | 1572 m_perMatrix.Copy(formMatrix); |
| 1520 return; | 1573 return; |
| 1521 } | 1574 } |
| 1522 m_pPreTextObj = pTextObj; | 1575 m_pPreTextObj = pTextObj; |
| 1523 m_perMatrix.Copy(formMatrix); | 1576 m_perMatrix.Copy(formMatrix); |
| 1524 int nItems = pTextObj->CountItems(); | 1577 int nItems = pTextObj->CountItems(); |
| 1525 FX_FLOAT baseSpace = _CalculateBaseSpace(pTextObj, matrix); | 1578 FX_FLOAT baseSpace = _CalculateBaseSpace(pTextObj, matrix); |
| 1526 | 1579 |
| 1580 #ifndef PDF_ENABLE_XFA | |
| 1527 const FX_BOOL bR2L = IsRightToLeft(pTextObj, pFont, nItems); | 1581 const FX_BOOL bR2L = IsRightToLeft(pTextObj, pFont, nItems); |
| 1528 const FX_BOOL bIsBidiAndMirrorInverse = | 1582 const FX_BOOL bIsBidiAndMirrorInverse = |
| 1583 #else | |
| 1584 FX_BOOL bIsBidiAndMirrosInverse = FALSE; | |
|
Lei Zhang
2015/10/30 06:15:30
Weird that this file has this discrepancy.
| |
| 1585 CFX_BidiChar* BidiChar = new CFX_BidiChar; | |
| 1586 int32_t nR2L = 0; | |
| 1587 int32_t nL2R = 0; | |
| 1588 int32_t start = 0, count = 0; | |
| 1589 CPDF_TextObjectItem item; | |
| 1590 for (int32_t i = 0; i < nItems; i++) { | |
| 1591 pTextObj->GetItemInfo(i, &item); | |
| 1592 if (item.m_CharCode == (FX_DWORD)-1) { | |
| 1593 continue; | |
| 1594 } | |
| 1595 CFX_WideString wstrItem = pFont->UnicodeFromCharCode(item.m_CharCode); | |
| 1596 FX_WCHAR wChar = wstrItem.GetAt(0); | |
| 1597 if ((wstrItem.IsEmpty() || wChar == 0) && item.m_CharCode) { | |
| 1598 wChar = (FX_WCHAR)item.m_CharCode; | |
| 1599 } | |
| 1600 if (!wChar) { | |
| 1601 continue; | |
| 1602 } | |
| 1603 if (BidiChar && BidiChar->AppendChar(wChar)) { | |
| 1604 CFX_BidiChar::Direction ret = BidiChar->GetBidiInfo(&start, &count); | |
| 1605 if (ret == CFX_BidiChar::RIGHT) { | |
| 1606 nR2L++; | |
| 1607 } else if (ret == CFX_BidiChar::LEFT) { | |
| 1608 nL2R++; | |
| 1609 } | |
| 1610 } | |
| 1611 } | |
| 1612 if (BidiChar && BidiChar->EndChar()) { | |
| 1613 CFX_BidiChar::Direction ret = BidiChar->GetBidiInfo(&start, &count); | |
| 1614 if (ret == CFX_BidiChar::RIGHT) { | |
| 1615 nR2L++; | |
| 1616 } else if (ret == CFX_BidiChar::LEFT) { | |
| 1617 nL2R++; | |
| 1618 } | |
| 1619 } | |
| 1620 FX_BOOL bR2L = FALSE; | |
| 1621 if (nR2L > 0 && nR2L >= nL2R) { | |
| 1622 bR2L = TRUE; | |
| 1623 } | |
| 1624 bIsBidiAndMirrosInverse = | |
| 1625 #endif | |
| 1529 bR2L && (matrix.a * matrix.d - matrix.b * matrix.c) < 0; | 1626 bR2L && (matrix.a * matrix.d - matrix.b * matrix.c) < 0; |
| 1530 int32_t iBufStartAppend = m_TempTextBuf.GetLength(); | 1627 int32_t iBufStartAppend = m_TempTextBuf.GetLength(); |
| 1531 int32_t iCharListStartAppend = m_TempCharList.GetSize(); | 1628 int32_t iCharListStartAppend = m_TempCharList.GetSize(); |
| 1532 | 1629 |
| 1533 FX_FLOAT spacing = 0; | 1630 FX_FLOAT spacing = 0; |
| 1534 for (int i = 0; i < nItems; i++) { | 1631 for (int i = 0; i < nItems; i++) { |
| 1535 CPDF_TextObjectItem item; | 1632 CPDF_TextObjectItem item; |
| 1536 PAGECHAR_INFO charinfo; | 1633 PAGECHAR_INFO charinfo; |
| 1537 charinfo.m_OriginX = 0; | 1634 charinfo.m_OriginX = 0; |
| 1538 charinfo.m_OriginY = 0; | 1635 charinfo.m_OriginY = 0; |
| (...skipping 138 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1677 } else if (i == 0) { | 1774 } else if (i == 0) { |
| 1678 CFX_WideString str = m_TempTextBuf.GetWideString(); | 1775 CFX_WideString str = m_TempTextBuf.GetWideString(); |
| 1679 if (!str.IsEmpty() && | 1776 if (!str.IsEmpty() && |
| 1680 str.GetAt(str.GetLength() - 1) == TEXT_BLANK_CHAR) { | 1777 str.GetAt(str.GetLength() - 1) == TEXT_BLANK_CHAR) { |
| 1681 m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1); | 1778 m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1); |
| 1682 m_TempCharList.Delete(m_TempCharList.GetSize() - 1); | 1779 m_TempCharList.Delete(m_TempCharList.GetSize() - 1); |
| 1683 } | 1780 } |
| 1684 } | 1781 } |
| 1685 } | 1782 } |
| 1686 } | 1783 } |
| 1784 #ifndef PDF_ENABLE_XFA | |
| 1687 if (bIsBidiAndMirrorInverse) { | 1785 if (bIsBidiAndMirrorInverse) { |
| 1688 SwapTempTextBuf(iCharListStartAppend, iBufStartAppend); | 1786 SwapTempTextBuf(iCharListStartAppend, iBufStartAppend); |
| 1689 } | 1787 } |
| 1690 } | 1788 } |
| 1691 void CPDF_TextPage::SwapTempTextBuf(int32_t iCharListStartAppend, | 1789 void CPDF_TextPage::SwapTempTextBuf(int32_t iCharListStartAppend, |
| 1692 int32_t iBufStartAppend) { | 1790 int32_t iBufStartAppend) { |
| 1693 int32_t i, j; | 1791 int32_t i, j; |
| 1694 i = iCharListStartAppend; | 1792 i = iCharListStartAppend; |
| 1695 j = m_TempCharList.GetSize() - 1; | 1793 j = m_TempCharList.GetSize() - 1; |
| 1696 for (; i < j; i++, j--) { | 1794 for (; i < j; i++, j--) { |
| (...skipping 28 matching lines...) Expand all Loading... | |
| 1725 if (!wChar) { | 1823 if (!wChar) { |
| 1726 continue; | 1824 continue; |
| 1727 } | 1825 } |
| 1728 if (pBidiChar->AppendChar(wChar)) { | 1826 if (pBidiChar->AppendChar(wChar)) { |
| 1729 CFX_BidiChar::Direction ret = pBidiChar->GetBidiInfo(&start, &count); | 1827 CFX_BidiChar::Direction ret = pBidiChar->GetBidiInfo(&start, &count); |
| 1730 if (ret == CFX_BidiChar::RIGHT) { | 1828 if (ret == CFX_BidiChar::RIGHT) { |
| 1731 nR2L++; | 1829 nR2L++; |
| 1732 } else if (ret == CFX_BidiChar::LEFT) { | 1830 } else if (ret == CFX_BidiChar::LEFT) { |
| 1733 nL2R++; | 1831 nL2R++; |
| 1734 } | 1832 } |
| 1833 #else | |
| 1834 if (bIsBidiAndMirrosInverse) { | |
| 1835 int32_t i, j; | |
| 1836 i = iCharListStartAppend; | |
| 1837 j = m_TempCharList.GetSize() - 1; | |
| 1838 for (; i < j; i++, j--) { | |
| 1839 std::swap(m_TempCharList[i], m_TempCharList[j]); | |
| 1840 std::swap(m_TempCharList[i].m_Index, m_TempCharList[j].m_Index); | |
| 1841 #endif | |
| 1735 } | 1842 } |
| 1843 #ifndef PDF_ENABLE_XFA | |
| 1736 } | 1844 } |
| 1737 if (pBidiChar->EndChar()) { | 1845 if (pBidiChar->EndChar()) { |
| 1738 CFX_BidiChar::Direction ret = pBidiChar->GetBidiInfo(&start, &count); | 1846 CFX_BidiChar::Direction ret = pBidiChar->GetBidiInfo(&start, &count); |
| 1739 if (ret == CFX_BidiChar::RIGHT) { | 1847 if (ret == CFX_BidiChar::RIGHT) { |
| 1740 nR2L++; | 1848 nR2L++; |
| 1741 } else if (ret == CFX_BidiChar::LEFT) { | 1849 } else if (ret == CFX_BidiChar::LEFT) { |
| 1742 nL2R++; | 1850 nL2R++; |
| 1851 #else | |
| 1852 FX_WCHAR* pTempBuffer = m_TempTextBuf.GetBuffer(); | |
| 1853 i = iBufStartAppend; | |
| 1854 j = m_TempTextBuf.GetLength() - 1; | |
| 1855 for (; i < j; i++, j--) { | |
| 1856 std::swap(pTempBuffer[i], pTempBuffer[j]); | |
| 1857 #endif | |
| 1743 } | 1858 } |
| 1744 } | 1859 } |
| 1860 #ifndef PDF_ENABLE_XFA | |
| 1745 return (nR2L > 0 && nR2L >= nL2R); | 1861 return (nR2L > 0 && nR2L >= nL2R); |
| 1862 #endif | |
| 1746 } | 1863 } |
| 1747 int32_t CPDF_TextPage::GetTextObjectWritingMode( | 1864 int32_t CPDF_TextPage::GetTextObjectWritingMode( |
| 1748 const CPDF_TextObject* pTextObj) { | 1865 const CPDF_TextObject* pTextObj) { |
| 1749 int32_t nChars = pTextObj->CountChars(); | 1866 int32_t nChars = pTextObj->CountChars(); |
| 1750 if (nChars == 1) { | 1867 if (nChars == 1) { |
| 1751 return m_TextlineDir; | 1868 return m_TextlineDir; |
| 1752 } | 1869 } |
| 1753 CPDF_TextObjectItem first, last; | 1870 CPDF_TextObjectItem first, last; |
| 1754 pTextObj->GetCharInfo(0, &first); | 1871 pTextObj->GetCharInfo(0, &first); |
| 1755 pTextObj->GetCharInfo(nChars - 1, &last); | 1872 pTextObj->GetCharInfo(nChars - 1, &last); |
| (...skipping 970 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2726 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { | 2843 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { |
| 2727 return; | 2844 return; |
| 2728 } | 2845 } |
| 2729 CPDF_LinkExt* link = NULL; | 2846 CPDF_LinkExt* link = NULL; |
| 2730 link = m_LinkList.GetAt(index); | 2847 link = m_LinkList.GetAt(index); |
| 2731 if (!link) { | 2848 if (!link) { |
| 2732 return; | 2849 return; |
| 2733 } | 2850 } |
| 2734 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); | 2851 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); |
| 2735 } | 2852 } |
| OLD | NEW |