Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(233)

Side by Side Diff: core/src/fpdftext/fpdf_text_int.cpp

Issue 1398383002: core/ difference with XFA (for information only). (Closed) Base URL: https://pdfium.googlesource.com/pdfium.git@master
Patch Set: After bidi Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2014 PDFium Authors. All rights reserved. 1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 6
7 #include <ctype.h> 7 #include <ctype.h>
8 #include <algorithm> 8 #include <algorithm>
9 9
10 #include "../../../third_party/base/nonstd_unique_ptr.h" 10 #include "../../../third_party/base/nonstd_unique_ptr.h"
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after
74 } 74 }
75 return baseSpace; 75 return baseSpace;
76 } 76 }
77 77
78 } // namespace 78 } // namespace
79 79
80 CPDFText_ParseOptions::CPDFText_ParseOptions() 80 CPDFText_ParseOptions::CPDFText_ParseOptions()
81 : m_bGetCharCodeOnly(FALSE), 81 : m_bGetCharCodeOnly(FALSE),
82 m_bNormalizeObjs(TRUE), 82 m_bNormalizeObjs(TRUE),
83 m_bOutputHyphen(FALSE) {} 83 m_bOutputHyphen(FALSE) {}
84 #ifndef PDF_ENABLE_XFA
84 85
86 #else
87 IPDF_TextPage* IPDF_TextPage::CreateTextPage(
88 const CPDF_Page* pPage,
89 CPDFText_ParseOptions ParserOptions) {
90 return new CPDF_TextPage(pPage, ParserOptions);
91 }
92 #endif
85 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_Page* pPage, 93 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_Page* pPage,
86 int flags) { 94 int flags) {
87 return new CPDF_TextPage(pPage, flags); 95 return new CPDF_TextPage(pPage, flags);
88 } 96 }
97 #ifndef PDF_ENABLE_XFA
89 98
99 #else
100 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_PageObjects* pObjs,
101 int flags) {
102 return new CPDF_TextPage(pObjs, flags);
103 }
104 #endif
90 IPDF_TextPageFind* IPDF_TextPageFind::CreatePageFind( 105 IPDF_TextPageFind* IPDF_TextPageFind::CreatePageFind(
91 const IPDF_TextPage* pTextPage) { 106 const IPDF_TextPage* pTextPage) {
92 return pTextPage ? new CPDF_TextPageFind(pTextPage) : nullptr; 107 return pTextPage ? new CPDF_TextPageFind(pTextPage) : nullptr;
93 } 108 }
109 #ifndef PDF_ENABLE_XFA
94 110
111 #endif
95 IPDF_LinkExtract* IPDF_LinkExtract::CreateLinkExtract() { 112 IPDF_LinkExtract* IPDF_LinkExtract::CreateLinkExtract() {
96 return new CPDF_LinkExtract(); 113 return new CPDF_LinkExtract();
97 } 114 }
115 #ifndef PDF_ENABLE_XFA
98 116
117 #endif
99 #define TEXT_BLANK_CHAR L' ' 118 #define TEXT_BLANK_CHAR L' '
100 #define TEXT_LINEFEED_CHAR L'\n' 119 #define TEXT_LINEFEED_CHAR L'\n'
101 #define TEXT_RETURN_CHAR L'\r' 120 #define TEXT_RETURN_CHAR L'\r'
102 #define TEXT_EMPTY L"" 121 #define TEXT_EMPTY L""
103 #define TEXT_BLANK L" " 122 #define TEXT_BLANK L" "
104 #define TEXT_RETURN_LINEFEED L"\r\n" 123 #define TEXT_RETURN_LINEFEED L"\r\n"
105 #define TEXT_LINEFEED L"\n" 124 #define TEXT_LINEFEED L"\n"
106 #define TEXT_CHARRATIO_GAPDELTA 0.070 125 #define TEXT_CHARRATIO_GAPDELTA 0.070
107 126
108 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, int flags) 127 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, int flags)
109 : m_pPage(pPage), 128 : m_pPage(pPage),
110 m_charList(512), 129 m_charList(512),
111 m_TempCharList(50), 130 m_TempCharList(50),
112 m_parserflag(flags), 131 m_parserflag(flags),
113 m_pPreTextObj(nullptr), 132 m_pPreTextObj(nullptr),
114 m_bIsParsed(false), 133 m_bIsParsed(false),
115 m_TextlineDir(-1), 134 m_TextlineDir(-1),
116 m_CurlineRect(0, 0, 0, 0) { 135 m_CurlineRect(0, 0, 0, 0) {
117 m_TextBuf.EstimateSize(0, 10240); 136 m_TextBuf.EstimateSize(0, 10240);
118 pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int)pPage->GetPageWidth(), 137 pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int)pPage->GetPageWidth(),
119 (int)pPage->GetPageHeight(), 0); 138 (int)pPage->GetPageHeight(), 0);
120 } 139 }
121 140
141 #ifdef PDF_ENABLE_XFA
142 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage,
143 CPDFText_ParseOptions ParserOptions)
144 : m_ParseOptions(ParserOptions),
145 m_pPage(pPage),
146 m_charList(512),
147 m_TempCharList(50),
148 m_parserflag(0),
149 m_pPreTextObj(nullptr),
150 m_bIsParsed(false),
151 m_TextlineDir(-1),
152 m_CurlineRect(0, 0, 0, 0) {
153 m_TextBuf.EstimateSize(0, 10240);
154 pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int)pPage->GetPageWidth(),
155 (int)pPage->GetPageHeight(), 0);
156 }
157
158 CPDF_TextPage::CPDF_TextPage(const CPDF_PageObjects* pPage, int flags)
159 : m_pPage(pPage),
160 m_charList(512),
161 m_TempCharList(50),
162 m_parserflag(flags),
163 m_pPreTextObj(nullptr),
164 m_bIsParsed(false),
165 m_TextlineDir(-1),
166 m_CurlineRect(0, 0, 0, 0) {
167 m_TextBuf.EstimateSize(0, 10240);
168 CFX_FloatRect pageRect = pPage->CalcBoundingBox();
169 m_DisplayMatrix = CFX_AffineMatrix(1, 0, 0, -1, pageRect.right, pageRect.top);
170 }
171 #endif
122 void CPDF_TextPage::NormalizeObjects(FX_BOOL bNormalize) { 172 void CPDF_TextPage::NormalizeObjects(FX_BOOL bNormalize) {
123 m_ParseOptions.m_bNormalizeObjs = bNormalize; 173 m_ParseOptions.m_bNormalizeObjs = bNormalize;
124 } 174 }
125 bool CPDF_TextPage::IsControlChar(const PAGECHAR_INFO& charInfo) { 175 bool CPDF_TextPage::IsControlChar(const PAGECHAR_INFO& charInfo) {
126 switch (charInfo.m_Unicode) { 176 switch (charInfo.m_Unicode) {
127 case 0x2: 177 case 0x2:
128 case 0x3: 178 case 0x3:
129 case 0x93: 179 case 0x93:
130 case 0x94: 180 case 0x94:
131 case 0x96: 181 case 0x96:
(...skipping 1217 matching lines...) Expand 10 before | Expand all | Expand 10 after
1349 int nContentMark = pMarkData->CountItems(); 1399 int nContentMark = pMarkData->CountItems();
1350 if (nContentMark < 1) { 1400 if (nContentMark < 1) {
1351 return; 1401 return;
1352 } 1402 }
1353 CFX_WideString actText; 1403 CFX_WideString actText;
1354 CPDF_Dictionary* pDict = NULL; 1404 CPDF_Dictionary* pDict = NULL;
1355 int n = 0; 1405 int n = 0;
1356 for (n = 0; n < nContentMark; n++) { 1406 for (n = 0; n < nContentMark; n++) {
1357 CPDF_ContentMarkItem& item = pMarkData->GetItem(n); 1407 CPDF_ContentMarkItem& item = pMarkData->GetItem(n);
1358 CFX_ByteString tagStr = (CFX_ByteString)item.GetName(); 1408 CFX_ByteString tagStr = (CFX_ByteString)item.GetName();
1409 #ifdef PDF_ENABLE_XFA
1410
1411 #endif
1359 pDict = ToDictionary(static_cast<CPDF_Object*>(item.GetParam())); 1412 pDict = ToDictionary(static_cast<CPDF_Object*>(item.GetParam()));
1360 CPDF_String* temp = 1413 CPDF_String* temp =
1361 ToString(pDict ? pDict->GetElement(FX_BSTRC("ActualText")) : nullptr); 1414 ToString(pDict ? pDict->GetElement(FX_BSTRC("ActualText")) : nullptr);
1362 if (temp) { 1415 if (temp) {
1363 actText = temp->GetUnicodeText(); 1416 actText = temp->GetUnicodeText();
1364 } 1417 }
1365 } 1418 }
1366 FX_STRSIZE nItems = actText.GetLength(); 1419 FX_STRSIZE nItems = actText.GetLength();
1367 if (nItems < 1) { 1420 if (nItems < 1) {
1368 return; 1421 return;
(...skipping 148 matching lines...) Expand 10 before | Expand all | Expand 10 after
1517 ProcessMarkedContent(Obj); 1570 ProcessMarkedContent(Obj);
1518 m_pPreTextObj = pTextObj; 1571 m_pPreTextObj = pTextObj;
1519 m_perMatrix.Copy(formMatrix); 1572 m_perMatrix.Copy(formMatrix);
1520 return; 1573 return;
1521 } 1574 }
1522 m_pPreTextObj = pTextObj; 1575 m_pPreTextObj = pTextObj;
1523 m_perMatrix.Copy(formMatrix); 1576 m_perMatrix.Copy(formMatrix);
1524 int nItems = pTextObj->CountItems(); 1577 int nItems = pTextObj->CountItems();
1525 FX_FLOAT baseSpace = _CalculateBaseSpace(pTextObj, matrix); 1578 FX_FLOAT baseSpace = _CalculateBaseSpace(pTextObj, matrix);
1526 1579
1580 #ifndef PDF_ENABLE_XFA
1527 const FX_BOOL bR2L = IsRightToLeft(pTextObj, pFont, nItems); 1581 const FX_BOOL bR2L = IsRightToLeft(pTextObj, pFont, nItems);
1528 const FX_BOOL bIsBidiAndMirrorInverse = 1582 const FX_BOOL bIsBidiAndMirrorInverse =
1583 #else
1584 FX_BOOL bIsBidiAndMirrosInverse = FALSE;
Lei Zhang 2015/10/30 06:15:30 Weird that this file has this discrepancy.
1585 CFX_BidiChar* BidiChar = new CFX_BidiChar;
1586 int32_t nR2L = 0;
1587 int32_t nL2R = 0;
1588 int32_t start = 0, count = 0;
1589 CPDF_TextObjectItem item;
1590 for (int32_t i = 0; i < nItems; i++) {
1591 pTextObj->GetItemInfo(i, &item);
1592 if (item.m_CharCode == (FX_DWORD)-1) {
1593 continue;
1594 }
1595 CFX_WideString wstrItem = pFont->UnicodeFromCharCode(item.m_CharCode);
1596 FX_WCHAR wChar = wstrItem.GetAt(0);
1597 if ((wstrItem.IsEmpty() || wChar == 0) && item.m_CharCode) {
1598 wChar = (FX_WCHAR)item.m_CharCode;
1599 }
1600 if (!wChar) {
1601 continue;
1602 }
1603 if (BidiChar && BidiChar->AppendChar(wChar)) {
1604 CFX_BidiChar::Direction ret = BidiChar->GetBidiInfo(&start, &count);
1605 if (ret == CFX_BidiChar::RIGHT) {
1606 nR2L++;
1607 } else if (ret == CFX_BidiChar::LEFT) {
1608 nL2R++;
1609 }
1610 }
1611 }
1612 if (BidiChar && BidiChar->EndChar()) {
1613 CFX_BidiChar::Direction ret = BidiChar->GetBidiInfo(&start, &count);
1614 if (ret == CFX_BidiChar::RIGHT) {
1615 nR2L++;
1616 } else if (ret == CFX_BidiChar::LEFT) {
1617 nL2R++;
1618 }
1619 }
1620 FX_BOOL bR2L = FALSE;
1621 if (nR2L > 0 && nR2L >= nL2R) {
1622 bR2L = TRUE;
1623 }
1624 bIsBidiAndMirrosInverse =
1625 #endif
1529 bR2L && (matrix.a * matrix.d - matrix.b * matrix.c) < 0; 1626 bR2L && (matrix.a * matrix.d - matrix.b * matrix.c) < 0;
1530 int32_t iBufStartAppend = m_TempTextBuf.GetLength(); 1627 int32_t iBufStartAppend = m_TempTextBuf.GetLength();
1531 int32_t iCharListStartAppend = m_TempCharList.GetSize(); 1628 int32_t iCharListStartAppend = m_TempCharList.GetSize();
1532 1629
1533 FX_FLOAT spacing = 0; 1630 FX_FLOAT spacing = 0;
1534 for (int i = 0; i < nItems; i++) { 1631 for (int i = 0; i < nItems; i++) {
1535 CPDF_TextObjectItem item; 1632 CPDF_TextObjectItem item;
1536 PAGECHAR_INFO charinfo; 1633 PAGECHAR_INFO charinfo;
1537 charinfo.m_OriginX = 0; 1634 charinfo.m_OriginX = 0;
1538 charinfo.m_OriginY = 0; 1635 charinfo.m_OriginY = 0;
(...skipping 138 matching lines...) Expand 10 before | Expand all | Expand 10 after
1677 } else if (i == 0) { 1774 } else if (i == 0) {
1678 CFX_WideString str = m_TempTextBuf.GetWideString(); 1775 CFX_WideString str = m_TempTextBuf.GetWideString();
1679 if (!str.IsEmpty() && 1776 if (!str.IsEmpty() &&
1680 str.GetAt(str.GetLength() - 1) == TEXT_BLANK_CHAR) { 1777 str.GetAt(str.GetLength() - 1) == TEXT_BLANK_CHAR) {
1681 m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1); 1778 m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1);
1682 m_TempCharList.Delete(m_TempCharList.GetSize() - 1); 1779 m_TempCharList.Delete(m_TempCharList.GetSize() - 1);
1683 } 1780 }
1684 } 1781 }
1685 } 1782 }
1686 } 1783 }
1784 #ifndef PDF_ENABLE_XFA
1687 if (bIsBidiAndMirrorInverse) { 1785 if (bIsBidiAndMirrorInverse) {
1688 SwapTempTextBuf(iCharListStartAppend, iBufStartAppend); 1786 SwapTempTextBuf(iCharListStartAppend, iBufStartAppend);
1689 } 1787 }
1690 } 1788 }
1691 void CPDF_TextPage::SwapTempTextBuf(int32_t iCharListStartAppend, 1789 void CPDF_TextPage::SwapTempTextBuf(int32_t iCharListStartAppend,
1692 int32_t iBufStartAppend) { 1790 int32_t iBufStartAppend) {
1693 int32_t i, j; 1791 int32_t i, j;
1694 i = iCharListStartAppend; 1792 i = iCharListStartAppend;
1695 j = m_TempCharList.GetSize() - 1; 1793 j = m_TempCharList.GetSize() - 1;
1696 for (; i < j; i++, j--) { 1794 for (; i < j; i++, j--) {
(...skipping 28 matching lines...) Expand all
1725 if (!wChar) { 1823 if (!wChar) {
1726 continue; 1824 continue;
1727 } 1825 }
1728 if (pBidiChar->AppendChar(wChar)) { 1826 if (pBidiChar->AppendChar(wChar)) {
1729 CFX_BidiChar::Direction ret = pBidiChar->GetBidiInfo(&start, &count); 1827 CFX_BidiChar::Direction ret = pBidiChar->GetBidiInfo(&start, &count);
1730 if (ret == CFX_BidiChar::RIGHT) { 1828 if (ret == CFX_BidiChar::RIGHT) {
1731 nR2L++; 1829 nR2L++;
1732 } else if (ret == CFX_BidiChar::LEFT) { 1830 } else if (ret == CFX_BidiChar::LEFT) {
1733 nL2R++; 1831 nL2R++;
1734 } 1832 }
1833 #else
1834 if (bIsBidiAndMirrosInverse) {
1835 int32_t i, j;
1836 i = iCharListStartAppend;
1837 j = m_TempCharList.GetSize() - 1;
1838 for (; i < j; i++, j--) {
1839 std::swap(m_TempCharList[i], m_TempCharList[j]);
1840 std::swap(m_TempCharList[i].m_Index, m_TempCharList[j].m_Index);
1841 #endif
1735 } 1842 }
1843 #ifndef PDF_ENABLE_XFA
1736 } 1844 }
1737 if (pBidiChar->EndChar()) { 1845 if (pBidiChar->EndChar()) {
1738 CFX_BidiChar::Direction ret = pBidiChar->GetBidiInfo(&start, &count); 1846 CFX_BidiChar::Direction ret = pBidiChar->GetBidiInfo(&start, &count);
1739 if (ret == CFX_BidiChar::RIGHT) { 1847 if (ret == CFX_BidiChar::RIGHT) {
1740 nR2L++; 1848 nR2L++;
1741 } else if (ret == CFX_BidiChar::LEFT) { 1849 } else if (ret == CFX_BidiChar::LEFT) {
1742 nL2R++; 1850 nL2R++;
1851 #else
1852 FX_WCHAR* pTempBuffer = m_TempTextBuf.GetBuffer();
1853 i = iBufStartAppend;
1854 j = m_TempTextBuf.GetLength() - 1;
1855 for (; i < j; i++, j--) {
1856 std::swap(pTempBuffer[i], pTempBuffer[j]);
1857 #endif
1743 } 1858 }
1744 } 1859 }
1860 #ifndef PDF_ENABLE_XFA
1745 return (nR2L > 0 && nR2L >= nL2R); 1861 return (nR2L > 0 && nR2L >= nL2R);
1862 #endif
1746 } 1863 }
1747 int32_t CPDF_TextPage::GetTextObjectWritingMode( 1864 int32_t CPDF_TextPage::GetTextObjectWritingMode(
1748 const CPDF_TextObject* pTextObj) { 1865 const CPDF_TextObject* pTextObj) {
1749 int32_t nChars = pTextObj->CountChars(); 1866 int32_t nChars = pTextObj->CountChars();
1750 if (nChars == 1) { 1867 if (nChars == 1) {
1751 return m_TextlineDir; 1868 return m_TextlineDir;
1752 } 1869 }
1753 CPDF_TextObjectItem first, last; 1870 CPDF_TextObjectItem first, last;
1754 pTextObj->GetCharInfo(0, &first); 1871 pTextObj->GetCharInfo(0, &first);
1755 pTextObj->GetCharInfo(nChars - 1, &last); 1872 pTextObj->GetCharInfo(nChars - 1, &last);
(...skipping 970 matching lines...) Expand 10 before | Expand all | Expand 10 after
2726 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { 2843 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) {
2727 return; 2844 return;
2728 } 2845 }
2729 CPDF_LinkExt* link = NULL; 2846 CPDF_LinkExt* link = NULL;
2730 link = m_LinkList.GetAt(index); 2847 link = m_LinkList.GetAt(index);
2731 if (!link) { 2848 if (!link) {
2732 return; 2849 return;
2733 } 2850 }
2734 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); 2851 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects);
2735 } 2852 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698