OLD | NEW |
---|---|
1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
6 | 6 |
7 #include <ctype.h> | 7 #include <ctype.h> |
8 #include <algorithm> | 8 #include <algorithm> |
9 | 9 |
10 #include "../../../third_party/base/nonstd_unique_ptr.h" | 10 #include "../../../third_party/base/nonstd_unique_ptr.h" |
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
74 } | 74 } |
75 return baseSpace; | 75 return baseSpace; |
76 } | 76 } |
77 | 77 |
78 } // namespace | 78 } // namespace |
79 | 79 |
80 CPDFText_ParseOptions::CPDFText_ParseOptions() | 80 CPDFText_ParseOptions::CPDFText_ParseOptions() |
81 : m_bGetCharCodeOnly(FALSE), | 81 : m_bGetCharCodeOnly(FALSE), |
82 m_bNormalizeObjs(TRUE), | 82 m_bNormalizeObjs(TRUE), |
83 m_bOutputHyphen(FALSE) {} | 83 m_bOutputHyphen(FALSE) {} |
84 #ifndef PDF_ENABLE_XFA | |
84 | 85 |
86 #else | |
87 IPDF_TextPage* IPDF_TextPage::CreateTextPage( | |
88 const CPDF_Page* pPage, | |
89 CPDFText_ParseOptions ParserOptions) { | |
90 return new CPDF_TextPage(pPage, ParserOptions); | |
91 } | |
92 #endif | |
85 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_Page* pPage, | 93 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_Page* pPage, |
86 int flags) { | 94 int flags) { |
87 return new CPDF_TextPage(pPage, flags); | 95 return new CPDF_TextPage(pPage, flags); |
88 } | 96 } |
97 #ifndef PDF_ENABLE_XFA | |
89 | 98 |
99 #else | |
100 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_PageObjects* pObjs, | |
101 int flags) { | |
102 return new CPDF_TextPage(pObjs, flags); | |
103 } | |
104 #endif | |
90 IPDF_TextPageFind* IPDF_TextPageFind::CreatePageFind( | 105 IPDF_TextPageFind* IPDF_TextPageFind::CreatePageFind( |
91 const IPDF_TextPage* pTextPage) { | 106 const IPDF_TextPage* pTextPage) { |
92 return pTextPage ? new CPDF_TextPageFind(pTextPage) : nullptr; | 107 return pTextPage ? new CPDF_TextPageFind(pTextPage) : nullptr; |
93 } | 108 } |
109 #ifndef PDF_ENABLE_XFA | |
94 | 110 |
111 #endif | |
95 IPDF_LinkExtract* IPDF_LinkExtract::CreateLinkExtract() { | 112 IPDF_LinkExtract* IPDF_LinkExtract::CreateLinkExtract() { |
96 return new CPDF_LinkExtract(); | 113 return new CPDF_LinkExtract(); |
97 } | 114 } |
115 #ifndef PDF_ENABLE_XFA | |
98 | 116 |
117 #endif | |
99 #define TEXT_BLANK_CHAR L' ' | 118 #define TEXT_BLANK_CHAR L' ' |
100 #define TEXT_LINEFEED_CHAR L'\n' | 119 #define TEXT_LINEFEED_CHAR L'\n' |
101 #define TEXT_RETURN_CHAR L'\r' | 120 #define TEXT_RETURN_CHAR L'\r' |
102 #define TEXT_EMPTY L"" | 121 #define TEXT_EMPTY L"" |
103 #define TEXT_BLANK L" " | 122 #define TEXT_BLANK L" " |
104 #define TEXT_RETURN_LINEFEED L"\r\n" | 123 #define TEXT_RETURN_LINEFEED L"\r\n" |
105 #define TEXT_LINEFEED L"\n" | 124 #define TEXT_LINEFEED L"\n" |
106 #define TEXT_CHARRATIO_GAPDELTA 0.070 | 125 #define TEXT_CHARRATIO_GAPDELTA 0.070 |
107 | 126 |
108 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, int flags) | 127 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, int flags) |
109 : m_pPage(pPage), | 128 : m_pPage(pPage), |
110 m_charList(512), | 129 m_charList(512), |
111 m_TempCharList(50), | 130 m_TempCharList(50), |
112 m_parserflag(flags), | 131 m_parserflag(flags), |
113 m_pPreTextObj(nullptr), | 132 m_pPreTextObj(nullptr), |
114 m_bIsParsed(false), | 133 m_bIsParsed(false), |
115 m_TextlineDir(-1), | 134 m_TextlineDir(-1), |
116 m_CurlineRect(0, 0, 0, 0) { | 135 m_CurlineRect(0, 0, 0, 0) { |
117 m_TextBuf.EstimateSize(0, 10240); | 136 m_TextBuf.EstimateSize(0, 10240); |
118 pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int)pPage->GetPageWidth(), | 137 pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int)pPage->GetPageWidth(), |
119 (int)pPage->GetPageHeight(), 0); | 138 (int)pPage->GetPageHeight(), 0); |
120 } | 139 } |
121 | 140 |
141 #ifdef PDF_ENABLE_XFA | |
142 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, | |
143 CPDFText_ParseOptions ParserOptions) | |
144 : m_ParseOptions(ParserOptions), | |
145 m_pPage(pPage), | |
146 m_charList(512), | |
147 m_TempCharList(50), | |
148 m_parserflag(0), | |
149 m_pPreTextObj(nullptr), | |
150 m_bIsParsed(false), | |
151 m_TextlineDir(-1), | |
152 m_CurlineRect(0, 0, 0, 0) { | |
153 m_TextBuf.EstimateSize(0, 10240); | |
154 pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int)pPage->GetPageWidth(), | |
155 (int)pPage->GetPageHeight(), 0); | |
156 } | |
157 | |
158 CPDF_TextPage::CPDF_TextPage(const CPDF_PageObjects* pPage, int flags) | |
159 : m_pPage(pPage), | |
160 m_charList(512), | |
161 m_TempCharList(50), | |
162 m_parserflag(flags), | |
163 m_pPreTextObj(nullptr), | |
164 m_bIsParsed(false), | |
165 m_TextlineDir(-1), | |
166 m_CurlineRect(0, 0, 0, 0) { | |
167 m_TextBuf.EstimateSize(0, 10240); | |
168 CFX_FloatRect pageRect = pPage->CalcBoundingBox(); | |
169 m_DisplayMatrix = CFX_AffineMatrix(1, 0, 0, -1, pageRect.right, pageRect.top); | |
170 } | |
171 #endif | |
122 void CPDF_TextPage::NormalizeObjects(FX_BOOL bNormalize) { | 172 void CPDF_TextPage::NormalizeObjects(FX_BOOL bNormalize) { |
123 m_ParseOptions.m_bNormalizeObjs = bNormalize; | 173 m_ParseOptions.m_bNormalizeObjs = bNormalize; |
124 } | 174 } |
125 bool CPDF_TextPage::IsControlChar(const PAGECHAR_INFO& charInfo) { | 175 bool CPDF_TextPage::IsControlChar(const PAGECHAR_INFO& charInfo) { |
126 switch (charInfo.m_Unicode) { | 176 switch (charInfo.m_Unicode) { |
127 case 0x2: | 177 case 0x2: |
128 case 0x3: | 178 case 0x3: |
129 case 0x93: | 179 case 0x93: |
130 case 0x94: | 180 case 0x94: |
131 case 0x96: | 181 case 0x96: |
(...skipping 1217 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1349 int nContentMark = pMarkData->CountItems(); | 1399 int nContentMark = pMarkData->CountItems(); |
1350 if (nContentMark < 1) { | 1400 if (nContentMark < 1) { |
1351 return; | 1401 return; |
1352 } | 1402 } |
1353 CFX_WideString actText; | 1403 CFX_WideString actText; |
1354 CPDF_Dictionary* pDict = NULL; | 1404 CPDF_Dictionary* pDict = NULL; |
1355 int n = 0; | 1405 int n = 0; |
1356 for (n = 0; n < nContentMark; n++) { | 1406 for (n = 0; n < nContentMark; n++) { |
1357 CPDF_ContentMarkItem& item = pMarkData->GetItem(n); | 1407 CPDF_ContentMarkItem& item = pMarkData->GetItem(n); |
1358 CFX_ByteString tagStr = (CFX_ByteString)item.GetName(); | 1408 CFX_ByteString tagStr = (CFX_ByteString)item.GetName(); |
1409 #ifdef PDF_ENABLE_XFA | |
1410 | |
1411 #endif | |
1359 pDict = ToDictionary(static_cast<CPDF_Object*>(item.GetParam())); | 1412 pDict = ToDictionary(static_cast<CPDF_Object*>(item.GetParam())); |
1360 CPDF_String* temp = | 1413 CPDF_String* temp = |
1361 ToString(pDict ? pDict->GetElement(FX_BSTRC("ActualText")) : nullptr); | 1414 ToString(pDict ? pDict->GetElement(FX_BSTRC("ActualText")) : nullptr); |
1362 if (temp) { | 1415 if (temp) { |
1363 actText = temp->GetUnicodeText(); | 1416 actText = temp->GetUnicodeText(); |
1364 } | 1417 } |
1365 } | 1418 } |
1366 FX_STRSIZE nItems = actText.GetLength(); | 1419 FX_STRSIZE nItems = actText.GetLength(); |
1367 if (nItems < 1) { | 1420 if (nItems < 1) { |
1368 return; | 1421 return; |
(...skipping 148 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1517 ProcessMarkedContent(Obj); | 1570 ProcessMarkedContent(Obj); |
1518 m_pPreTextObj = pTextObj; | 1571 m_pPreTextObj = pTextObj; |
1519 m_perMatrix.Copy(formMatrix); | 1572 m_perMatrix.Copy(formMatrix); |
1520 return; | 1573 return; |
1521 } | 1574 } |
1522 m_pPreTextObj = pTextObj; | 1575 m_pPreTextObj = pTextObj; |
1523 m_perMatrix.Copy(formMatrix); | 1576 m_perMatrix.Copy(formMatrix); |
1524 int nItems = pTextObj->CountItems(); | 1577 int nItems = pTextObj->CountItems(); |
1525 FX_FLOAT baseSpace = _CalculateBaseSpace(pTextObj, matrix); | 1578 FX_FLOAT baseSpace = _CalculateBaseSpace(pTextObj, matrix); |
1526 | 1579 |
1580 #ifndef PDF_ENABLE_XFA | |
1527 const FX_BOOL bR2L = IsRightToLeft(pTextObj, pFont, nItems); | 1581 const FX_BOOL bR2L = IsRightToLeft(pTextObj, pFont, nItems); |
1528 const FX_BOOL bIsBidiAndMirrorInverse = | 1582 const FX_BOOL bIsBidiAndMirrorInverse = |
1583 #else | |
1584 FX_BOOL bIsBidiAndMirrosInverse = FALSE; | |
Lei Zhang
2015/10/30 06:15:30
Weird that this file has this discrepancy.
| |
1585 CFX_BidiChar* BidiChar = new CFX_BidiChar; | |
1586 int32_t nR2L = 0; | |
1587 int32_t nL2R = 0; | |
1588 int32_t start = 0, count = 0; | |
1589 CPDF_TextObjectItem item; | |
1590 for (int32_t i = 0; i < nItems; i++) { | |
1591 pTextObj->GetItemInfo(i, &item); | |
1592 if (item.m_CharCode == (FX_DWORD)-1) { | |
1593 continue; | |
1594 } | |
1595 CFX_WideString wstrItem = pFont->UnicodeFromCharCode(item.m_CharCode); | |
1596 FX_WCHAR wChar = wstrItem.GetAt(0); | |
1597 if ((wstrItem.IsEmpty() || wChar == 0) && item.m_CharCode) { | |
1598 wChar = (FX_WCHAR)item.m_CharCode; | |
1599 } | |
1600 if (!wChar) { | |
1601 continue; | |
1602 } | |
1603 if (BidiChar && BidiChar->AppendChar(wChar)) { | |
1604 CFX_BidiChar::Direction ret = BidiChar->GetBidiInfo(&start, &count); | |
1605 if (ret == CFX_BidiChar::RIGHT) { | |
1606 nR2L++; | |
1607 } else if (ret == CFX_BidiChar::LEFT) { | |
1608 nL2R++; | |
1609 } | |
1610 } | |
1611 } | |
1612 if (BidiChar && BidiChar->EndChar()) { | |
1613 CFX_BidiChar::Direction ret = BidiChar->GetBidiInfo(&start, &count); | |
1614 if (ret == CFX_BidiChar::RIGHT) { | |
1615 nR2L++; | |
1616 } else if (ret == CFX_BidiChar::LEFT) { | |
1617 nL2R++; | |
1618 } | |
1619 } | |
1620 FX_BOOL bR2L = FALSE; | |
1621 if (nR2L > 0 && nR2L >= nL2R) { | |
1622 bR2L = TRUE; | |
1623 } | |
1624 bIsBidiAndMirrosInverse = | |
1625 #endif | |
1529 bR2L && (matrix.a * matrix.d - matrix.b * matrix.c) < 0; | 1626 bR2L && (matrix.a * matrix.d - matrix.b * matrix.c) < 0; |
1530 int32_t iBufStartAppend = m_TempTextBuf.GetLength(); | 1627 int32_t iBufStartAppend = m_TempTextBuf.GetLength(); |
1531 int32_t iCharListStartAppend = m_TempCharList.GetSize(); | 1628 int32_t iCharListStartAppend = m_TempCharList.GetSize(); |
1532 | 1629 |
1533 FX_FLOAT spacing = 0; | 1630 FX_FLOAT spacing = 0; |
1534 for (int i = 0; i < nItems; i++) { | 1631 for (int i = 0; i < nItems; i++) { |
1535 CPDF_TextObjectItem item; | 1632 CPDF_TextObjectItem item; |
1536 PAGECHAR_INFO charinfo; | 1633 PAGECHAR_INFO charinfo; |
1537 charinfo.m_OriginX = 0; | 1634 charinfo.m_OriginX = 0; |
1538 charinfo.m_OriginY = 0; | 1635 charinfo.m_OriginY = 0; |
(...skipping 138 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1677 } else if (i == 0) { | 1774 } else if (i == 0) { |
1678 CFX_WideString str = m_TempTextBuf.GetWideString(); | 1775 CFX_WideString str = m_TempTextBuf.GetWideString(); |
1679 if (!str.IsEmpty() && | 1776 if (!str.IsEmpty() && |
1680 str.GetAt(str.GetLength() - 1) == TEXT_BLANK_CHAR) { | 1777 str.GetAt(str.GetLength() - 1) == TEXT_BLANK_CHAR) { |
1681 m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1); | 1778 m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1); |
1682 m_TempCharList.Delete(m_TempCharList.GetSize() - 1); | 1779 m_TempCharList.Delete(m_TempCharList.GetSize() - 1); |
1683 } | 1780 } |
1684 } | 1781 } |
1685 } | 1782 } |
1686 } | 1783 } |
1784 #ifndef PDF_ENABLE_XFA | |
1687 if (bIsBidiAndMirrorInverse) { | 1785 if (bIsBidiAndMirrorInverse) { |
1688 SwapTempTextBuf(iCharListStartAppend, iBufStartAppend); | 1786 SwapTempTextBuf(iCharListStartAppend, iBufStartAppend); |
1689 } | 1787 } |
1690 } | 1788 } |
1691 void CPDF_TextPage::SwapTempTextBuf(int32_t iCharListStartAppend, | 1789 void CPDF_TextPage::SwapTempTextBuf(int32_t iCharListStartAppend, |
1692 int32_t iBufStartAppend) { | 1790 int32_t iBufStartAppend) { |
1693 int32_t i, j; | 1791 int32_t i, j; |
1694 i = iCharListStartAppend; | 1792 i = iCharListStartAppend; |
1695 j = m_TempCharList.GetSize() - 1; | 1793 j = m_TempCharList.GetSize() - 1; |
1696 for (; i < j; i++, j--) { | 1794 for (; i < j; i++, j--) { |
(...skipping 28 matching lines...) Expand all Loading... | |
1725 if (!wChar) { | 1823 if (!wChar) { |
1726 continue; | 1824 continue; |
1727 } | 1825 } |
1728 if (pBidiChar->AppendChar(wChar)) { | 1826 if (pBidiChar->AppendChar(wChar)) { |
1729 CFX_BidiChar::Direction ret = pBidiChar->GetBidiInfo(&start, &count); | 1827 CFX_BidiChar::Direction ret = pBidiChar->GetBidiInfo(&start, &count); |
1730 if (ret == CFX_BidiChar::RIGHT) { | 1828 if (ret == CFX_BidiChar::RIGHT) { |
1731 nR2L++; | 1829 nR2L++; |
1732 } else if (ret == CFX_BidiChar::LEFT) { | 1830 } else if (ret == CFX_BidiChar::LEFT) { |
1733 nL2R++; | 1831 nL2R++; |
1734 } | 1832 } |
1833 #else | |
1834 if (bIsBidiAndMirrosInverse) { | |
1835 int32_t i, j; | |
1836 i = iCharListStartAppend; | |
1837 j = m_TempCharList.GetSize() - 1; | |
1838 for (; i < j; i++, j--) { | |
1839 std::swap(m_TempCharList[i], m_TempCharList[j]); | |
1840 std::swap(m_TempCharList[i].m_Index, m_TempCharList[j].m_Index); | |
1841 #endif | |
1735 } | 1842 } |
1843 #ifndef PDF_ENABLE_XFA | |
1736 } | 1844 } |
1737 if (pBidiChar->EndChar()) { | 1845 if (pBidiChar->EndChar()) { |
1738 CFX_BidiChar::Direction ret = pBidiChar->GetBidiInfo(&start, &count); | 1846 CFX_BidiChar::Direction ret = pBidiChar->GetBidiInfo(&start, &count); |
1739 if (ret == CFX_BidiChar::RIGHT) { | 1847 if (ret == CFX_BidiChar::RIGHT) { |
1740 nR2L++; | 1848 nR2L++; |
1741 } else if (ret == CFX_BidiChar::LEFT) { | 1849 } else if (ret == CFX_BidiChar::LEFT) { |
1742 nL2R++; | 1850 nL2R++; |
1851 #else | |
1852 FX_WCHAR* pTempBuffer = m_TempTextBuf.GetBuffer(); | |
1853 i = iBufStartAppend; | |
1854 j = m_TempTextBuf.GetLength() - 1; | |
1855 for (; i < j; i++, j--) { | |
1856 std::swap(pTempBuffer[i], pTempBuffer[j]); | |
1857 #endif | |
1743 } | 1858 } |
1744 } | 1859 } |
1860 #ifndef PDF_ENABLE_XFA | |
1745 return (nR2L > 0 && nR2L >= nL2R); | 1861 return (nR2L > 0 && nR2L >= nL2R); |
1862 #endif | |
1746 } | 1863 } |
1747 int32_t CPDF_TextPage::GetTextObjectWritingMode( | 1864 int32_t CPDF_TextPage::GetTextObjectWritingMode( |
1748 const CPDF_TextObject* pTextObj) { | 1865 const CPDF_TextObject* pTextObj) { |
1749 int32_t nChars = pTextObj->CountChars(); | 1866 int32_t nChars = pTextObj->CountChars(); |
1750 if (nChars == 1) { | 1867 if (nChars == 1) { |
1751 return m_TextlineDir; | 1868 return m_TextlineDir; |
1752 } | 1869 } |
1753 CPDF_TextObjectItem first, last; | 1870 CPDF_TextObjectItem first, last; |
1754 pTextObj->GetCharInfo(0, &first); | 1871 pTextObj->GetCharInfo(0, &first); |
1755 pTextObj->GetCharInfo(nChars - 1, &last); | 1872 pTextObj->GetCharInfo(nChars - 1, &last); |
(...skipping 970 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2726 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { | 2843 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { |
2727 return; | 2844 return; |
2728 } | 2845 } |
2729 CPDF_LinkExt* link = NULL; | 2846 CPDF_LinkExt* link = NULL; |
2730 link = m_LinkList.GetAt(index); | 2847 link = m_LinkList.GetAt(index); |
2731 if (!link) { | 2848 if (!link) { |
2732 return; | 2849 return; |
2733 } | 2850 } |
2734 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); | 2851 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); |
2735 } | 2852 } |
OLD | NEW |