Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(400)

Side by Side Diff: core/src/fpdftext/fpdf_text_int.cpp

Issue 810223003: Cleanup: Refactor some code into its own function in fpdf_text_int.cpp. (Closed) Base URL: https://pdfium.googlesource.com/pdfium.git@master
Patch Set: address comments Created 6 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2014 PDFium Authors. All rights reserved. 1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 6
7 #include "../../include/fpdfapi/fpdf_resource.h" 7 #include "../../include/fpdfapi/fpdf_resource.h"
8 #include "../../include/fpdfapi/fpdf_pageobj.h" 8 #include "../../include/fpdfapi/fpdf_pageobj.h"
9 #include "../../include/fpdftext/fpdf_text.h" 9 #include "../../include/fpdftext/fpdf_text.h"
10 #include "../../include/fpdfapi/fpdf_page.h" 10 #include "../../include/fpdfapi/fpdf_page.h"
11 #include "../../include/fpdfapi/fpdf_module.h" 11 #include "../../include/fpdfapi/fpdf_module.h"
12 #include <ctype.h> 12 #include <ctype.h>
13 #include <algorithm>
13 #include "text_int.h" 14 #include "text_int.h"
15
16 namespace {
17
14 FX_BOOL _IsIgnoreSpaceCharacter(FX_WCHAR curChar) 18 FX_BOOL _IsIgnoreSpaceCharacter(FX_WCHAR curChar)
15 { 19 {
16 if(curChar < 255 ) { 20 if(curChar < 255 ) {
17 return FALSE; 21 return FALSE;
18 } 22 }
19 if ( (curChar >= 0x0600 && curChar <= 0x06FF) 23 if ( (curChar >= 0x0600 && curChar <= 0x06FF)
20 || (curChar >= 0xFE70 && curChar <= 0xFEFF) 24 || (curChar >= 0xFE70 && curChar <= 0xFEFF)
21 || (curChar >= 0xFB50 && curChar <= 0xFDFF) 25 || (curChar >= 0xFB50 && curChar <= 0xFDFF)
22 || (curChar >= 0x0400 && curChar <= 0x04FF) 26 || (curChar >= 0x0400 && curChar <= 0x04FF)
23 || (curChar >= 0x0500 && curChar <= 0x052F) 27 || (curChar >= 0x0500 && curChar <= 0x052F)
24 || (curChar >= 0xA640 && curChar <= 0xA69F) 28 || (curChar >= 0xA640 && curChar <= 0xA69F)
25 || (curChar >= 0x2DE0 && curChar <= 0x2DFF) 29 || (curChar >= 0x2DE0 && curChar <= 0x2DFF)
26 || curChar == 8467 30 || curChar == 8467
27 || (curChar >= 0x2000 && curChar <= 0x206F)) { 31 || (curChar >= 0x2000 && curChar <= 0x206F)) {
28 return FALSE; 32 return FALSE;
29 } 33 }
30 return TRUE; 34 return TRUE;
31 } 35 }
36
37 FX_FLOAT _NormalizeThreshold(FX_FLOAT threshold)
38 {
39 if (threshold < 300) {
40 return threshold / 2.0;
41 } else if (threshold < 500) {
42 return threshold / 4.0;
43 } else if (threshold < 700) {
44 return threshold / 5.0;
45 }
46 return threshold / 6.0;
47 }
48
49 FX_FLOAT _CalculateBaseSpace(const CPDF_TextObject* pTextObj,
50 const CFX_AffineMatrix& matrix)
51 {
52 FX_FLOAT baseSpace = 0.0;
53 const int nItems = pTextObj->CountItems();
54 if (pTextObj->m_TextState.GetObject()->m_CharSpace && nItems >= 3) {
55 FX_BOOL bAllChar = TRUE;
56 FX_FLOAT spacing = matrix.TransformDistance(
57 pTextObj->m_TextState.GetObject()->m_CharSpace);
58 baseSpace = spacing;
59 for (int i = 0; i < nItems; i++) {
60 CPDF_TextObjectItem item;
61 pTextObj->GetItemInfo(i, &item);
62 if (item.m_CharCode == (FX_DWORD) - 1) {
63 FX_FLOAT fontsize_h = pTextObj->m_TextState.GetFontSizeH();
64 FX_FLOAT kerning = -fontsize_h * item.m_OriginX / 1000;
65 baseSpace = std::min(baseSpace, kerning + spacing);
66 bAllChar = FALSE;
67 }
68 }
69 if (baseSpace < 0.0 || (nItems == 3 && !bAllChar)) {
70 baseSpace = 0.0;
71 }
72 }
73 return baseSpace;
74 }
75
76 } // namespace
77
32 CPDFText_ParseOptions::CPDFText_ParseOptions() 78 CPDFText_ParseOptions::CPDFText_ParseOptions()
33 : m_bGetCharCodeOnly(FALSE), m_bNormalizeObjs(TRUE), m_bOutputHyphen(FALSE) 79 : m_bGetCharCodeOnly(FALSE), m_bNormalizeObjs(TRUE), m_bOutputHyphen(FALSE)
34 { 80 {
35 } 81 }
36 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_Page* pPage, CPDFText_Pa rseOptions ParserOptions) 82 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_Page* pPage, CPDFText_Pa rseOptions ParserOptions)
37 { 83 {
38 CPDF_TextPage* pTextPageEx = FX_NEW CPDF_TextPage(pPage, ParserOptions); 84 CPDF_TextPage* pTextPageEx = FX_NEW CPDF_TextPage(pPage, ParserOptions);
39 return pTextPageEx; 85 return pTextPageEx;
40 } 86 }
41 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_Page* pPage, int flags) 87 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_Page* pPage, int flags)
(...skipping 1593 matching lines...) Expand 10 before | Expand all | Expand 10 after
1635 } 1681 }
1636 if (FPDFTEXT_MC_DELAY == bPreMKC) { 1682 if (FPDFTEXT_MC_DELAY == bPreMKC) {
1637 ProcessMarkedContent(Obj); 1683 ProcessMarkedContent(Obj);
1638 m_pPreTextObj = pTextObj; 1684 m_pPreTextObj = pTextObj;
1639 m_perMatrix.Copy(formMatrix); 1685 m_perMatrix.Copy(formMatrix);
1640 return; 1686 return;
1641 } 1687 }
1642 m_pPreTextObj = pTextObj; 1688 m_pPreTextObj = pTextObj;
1643 m_perMatrix.Copy(formMatrix); 1689 m_perMatrix.Copy(formMatrix);
1644 int nItems = pTextObj->CountItems(); 1690 int nItems = pTextObj->CountItems();
1645 FX_FLOAT spacing = 0; 1691 FX_FLOAT baseSpace = _CalculateBaseSpace(pTextObj, matrix);
1646 FX_FLOAT baseSpace = 0.0;
1647 FX_BOOL bAllChar = TRUE;
1648 if (pTextObj->m_TextState.GetObject()->m_CharSpace && nItems >= 3) {
1649 spacing = matrix.TransformDistance(pTextObj->m_TextState.GetObject()->m_ CharSpace);
1650 baseSpace = spacing;
1651 for (int i = 0; i < nItems; i++) {
1652 CPDF_TextObjectItem item;
1653 pTextObj->GetItemInfo(i, &item);
1654 if (item.m_CharCode == (FX_DWORD) - 1) {
1655 FX_FLOAT fontsize_h = pTextObj->m_TextState.GetFontSizeH();
1656 FX_FLOAT kerning = -fontsize_h * item.m_OriginX / 1000;
1657 if(kerning + spacing < baseSpace) {
1658 baseSpace = kerning + spacing;
1659 }
1660 bAllChar = FALSE;
1661 }
1662 }
1663 spacing = 0;
1664 if(baseSpace < 0.0 || (nItems == 3 && !bAllChar)) {
1665 baseSpace = 0.0;
1666 }
1667 }
1668 1692
1669 FX_BOOL bIsBidiAndMirrosInverse = FALSE; 1693 FX_BOOL bIsBidiAndMirrosInverse = FALSE;
1670 IFX_BidiChar* BidiChar = IFX_BidiChar::Create(); 1694 IFX_BidiChar* BidiChar = IFX_BidiChar::Create();
1671 FX_INT32 nR2L = 0; 1695 FX_INT32 nR2L = 0;
1672 FX_INT32 nL2R = 0; 1696 FX_INT32 nL2R = 0;
1673 FX_INT32 start = 0, count = 0; 1697 FX_INT32 start = 0, count = 0;
1674 CPDF_TextObjectItem item; 1698 CPDF_TextObjectItem item;
1675 for (FX_INT32 i = 0; i < nItems; i++) { 1699 for (FX_INT32 i = 0; i < nItems; i++) {
1676 pTextObj->GetItemInfo(i, &item); 1700 pTextObj->GetItemInfo(i, &item);
1677 if (item.m_CharCode == (FX_DWORD)-1) { 1701 if (item.m_CharCode == (FX_DWORD)-1) {
(...skipping 27 matching lines...) Expand all
1705 } 1729 }
1706 } 1730 }
1707 FX_BOOL bR2L = FALSE; 1731 FX_BOOL bR2L = FALSE;
1708 if (nR2L > 0 && nR2L >= nL2R) { 1732 if (nR2L > 0 && nR2L >= nL2R) {
1709 bR2L = TRUE; 1733 bR2L = TRUE;
1710 } 1734 }
1711 bIsBidiAndMirrosInverse = bR2L && (matrix.a * matrix.d - matrix.b * matrix.c ) < 0; 1735 bIsBidiAndMirrosInverse = bR2L && (matrix.a * matrix.d - matrix.b * matrix.c ) < 0;
1712 FX_INT32 iBufStartAppend = m_TempTextBuf.GetLength(); 1736 FX_INT32 iBufStartAppend = m_TempTextBuf.GetLength();
1713 FX_INT32 iCharListStartAppend = m_TempCharList.GetSize(); 1737 FX_INT32 iCharListStartAppend = m_TempCharList.GetSize();
1714 1738
1739 FX_FLOAT spacing = 0;
1715 for (int i = 0; i < nItems; i++) { 1740 for (int i = 0; i < nItems; i++) {
1716 CPDF_TextObjectItem item; 1741 CPDF_TextObjectItem item;
1717 PAGECHAR_INFO charinfo; 1742 PAGECHAR_INFO charinfo;
1718 charinfo.m_OriginX = 0; 1743 charinfo.m_OriginX = 0;
1719 charinfo.m_OriginY = 0; 1744 charinfo.m_OriginY = 0;
1720 pTextObj->GetItemInfo(i, &item); 1745 pTextObj->GetItemInfo(i, &item);
1721 if (item.m_CharCode == (FX_DWORD) - 1) { 1746 if (item.m_CharCode == (FX_DWORD) - 1) {
1722 CFX_WideString str = m_TempTextBuf.GetWideString(); 1747 CFX_WideString str = m_TempTextBuf.GetWideString();
1723 if(str.IsEmpty()) { 1748 if(str.IsEmpty()) {
1724 str = m_TextBuf.GetWideString(); 1749 str = m_TextBuf.GetWideString();
(...skipping 22 matching lines...) Expand all
1747 } 1772 }
1748 if (threshold > fontsize_h / 3) { 1773 if (threshold > fontsize_h / 3) {
1749 threshold = 0; 1774 threshold = 0;
1750 } else { 1775 } else {
1751 threshold /= 2; 1776 threshold /= 2;
1752 } 1777 }
1753 if (threshold == 0) { 1778 if (threshold == 0) {
1754 threshold = fontsize_h; 1779 threshold = fontsize_h;
1755 int this_width = FXSYS_abs(GetCharWidth(item.m_CharCode, pFont)) ; 1780 int this_width = FXSYS_abs(GetCharWidth(item.m_CharCode, pFont)) ;
1756 threshold = this_width > last_width ? (FX_FLOAT)this_width : (FX _FLOAT)last_width; 1781 threshold = this_width > last_width ? (FX_FLOAT)this_width : (FX _FLOAT)last_width;
1757 int nDivide = 6; 1782 threshold = _NormalizeThreshold(threshold);
1758 if (threshold < 300) {
1759 nDivide = 2;
1760 } else if (threshold < 500) {
1761 nDivide = 4;
1762 } else if (threshold < 700) {
1763 nDivide = 5;
1764 }
1765 threshold = threshold / nDivide;
1766 threshold = fontsize_h * threshold / 1000; 1783 threshold = fontsize_h * threshold / 1000;
1767 } 1784 }
1768 if (threshold && (spacing && spacing >= threshold) ) { 1785 if (threshold && (spacing && spacing >= threshold) ) {
1769 charinfo.m_Unicode = TEXT_BLANK_CHAR; 1786 charinfo.m_Unicode = TEXT_BLANK_CHAR;
1770 charinfo.m_Flag = FPDFTEXT_CHAR_GENERATED; 1787 charinfo.m_Flag = FPDFTEXT_CHAR_GENERATED;
1771 charinfo.m_pTextObj = pTextObj; 1788 charinfo.m_pTextObj = pTextObj;
1772 charinfo.m_Index = m_TextBuf.GetLength(); 1789 charinfo.m_Index = m_TextBuf.GetLength();
1773 m_TempTextBuf.AppendChar(TEXT_BLANK_CHAR); 1790 m_TempTextBuf.AppendChar(TEXT_BLANK_CHAR);
1774 charinfo.m_CharCode = -1; 1791 charinfo.m_CharCode = -1;
1775 charinfo.m_Matrix.Copy(formMatrix); 1792 charinfo.m_Matrix.Copy(formMatrix);
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
1818 } 1835 }
1819 matrix.TransformRect(charinfo.m_CharBox); 1836 matrix.TransformRect(charinfo.m_CharBox);
1820 charinfo.m_Matrix.Copy(matrix); 1837 charinfo.m_Matrix.Copy(matrix);
1821 if (wstrItem.IsEmpty()) { 1838 if (wstrItem.IsEmpty()) {
1822 charinfo.m_Unicode = 0; 1839 charinfo.m_Unicode = 0;
1823 m_TempCharList.Add(charinfo); 1840 m_TempCharList.Add(charinfo);
1824 m_TempTextBuf.AppendChar(0xfffe); 1841 m_TempTextBuf.AppendChar(0xfffe);
1825 continue; 1842 continue;
1826 } else { 1843 } else {
1827 int nTotal = wstrItem.GetLength(); 1844 int nTotal = wstrItem.GetLength();
1828 int n = 0;
1829 FX_BOOL bDel = FALSE; 1845 FX_BOOL bDel = FALSE;
1830 while (n < m_TempCharList.GetSize() && n < 7) { 1846 const int count = std::min(m_TempCharList.GetSize(), 7);
1831 n++; 1847 for (int n = m_TempCharList.GetSize() - 1;
1832 PAGECHAR_INFO* charinfo1 = (PAGECHAR_INFO*)m_TempCharList.GetAt( m_TempCharList.GetSize() - n); 1848 n > m_TempCharList.GetSize() - count - 1;
Tom Sepez 2014/12/18 21:24:43 Hmmm. This is just as confusing. I think the way
Lei Zhang 2014/12/18 21:49:03 How about: for (int n = m_TempCharList.GetSize();
1849 n--) {
1850 PAGECHAR_INFO* charinfo1 = (PAGECHAR_INFO*)m_TempCharList.GetAt( n);
1833 if(charinfo1->m_CharCode == charinfo.m_CharCode && 1851 if(charinfo1->m_CharCode == charinfo.m_CharCode &&
1834 charinfo1->m_pTextObj->GetFont() == charinfo.m_pTextObj- >GetFont() && 1852 charinfo1->m_pTextObj->GetFont() == charinfo.m_pTextObj- >GetFont() &&
1835 FXSYS_fabs(charinfo1->m_OriginX - charinfo.m_OriginX) < TEXT_CHARRATIO_GAPDELTA * pTextObj->GetFontSize() && 1853 FXSYS_fabs(charinfo1->m_OriginX - charinfo.m_OriginX) < TEXT_CHARRATIO_GAPDELTA * pTextObj->GetFontSize() &&
1836 FXSYS_fabs(charinfo1->m_OriginY - charinfo.m_OriginY) < TEXT_CHARRATIO_GAPDELTA * pTextObj->GetFontSize() ) { 1854 FXSYS_fabs(charinfo1->m_OriginY - charinfo.m_OriginY) < TEXT_CHARRATIO_GAPDELTA * pTextObj->GetFontSize() ) {
1837 bDel = TRUE; 1855 bDel = TRUE;
1838 break; 1856 break;
1839 } 1857 }
1840 } 1858 }
1841 if(!bDel) { 1859 if(!bDel) {
1842 for (int nIndex = 0; nIndex < nTotal; nIndex++) { 1860 for (int nIndex = 0; nIndex < nTotal; nIndex++) {
(...skipping 12 matching lines...) Expand all
1855 m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1); 1873 m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1);
1856 m_TempCharList.Delete(m_TempCharList.GetSize() - 1); 1874 m_TempCharList.Delete(m_TempCharList.GetSize() - 1);
1857 } 1875 }
1858 } 1876 }
1859 } 1877 }
1860 } 1878 }
1861 if (bIsBidiAndMirrosInverse) { 1879 if (bIsBidiAndMirrosInverse) {
1862 FX_INT32 i, j; 1880 FX_INT32 i, j;
1863 i = iCharListStartAppend; 1881 i = iCharListStartAppend;
1864 j = m_TempCharList.GetSize() - 1; 1882 j = m_TempCharList.GetSize() - 1;
1865 PAGECHAR_INFO tempCharInfo;
1866 FX_INT32 tempIndex = 0;
1867 for (; i < j; i++, j--) { 1883 for (; i < j; i++, j--) {
1868 tempCharInfo = m_TempCharList[i]; 1884 std::swap(m_TempCharList[i], m_TempCharList[j]);
1869 m_TempCharList[i] = m_TempCharList[j]; 1885 std::swap(m_TempCharList[i].m_Index, m_TempCharList[j].m_Index);
1870 m_TempCharList[j] = tempCharInfo;
1871 tempIndex = m_TempCharList[i].m_Index;
1872 m_TempCharList[i].m_Index = m_TempCharList[j].m_Index;
1873 m_TempCharList[j].m_Index = tempIndex;
1874 } 1886 }
1875 FX_WCHAR * pTempBuffer = m_TempTextBuf.GetBuffer(); 1887 FX_WCHAR * pTempBuffer = m_TempTextBuf.GetBuffer();
1876 i = iBufStartAppend; 1888 i = iBufStartAppend;
1877 j = m_TempTextBuf.GetLength() - 1; 1889 j = m_TempTextBuf.GetLength() - 1;
1878 FX_WCHAR wTemp; 1890 FX_WCHAR wTemp;
1879 for (; i < j; i++, j--) { 1891 for (; i < j; i++, j--) {
1880 wTemp = pTempBuffer[i]; 1892 std::swap(pTempBuffer[i], pTempBuffer[j]);
1881 pTempBuffer[i] = pTempBuffer[j];
1882 pTempBuffer[j] = wTemp;
1883 } 1893 }
1884 } 1894 }
1885 } 1895 }
1886 FX_INT32 CPDF_TextPage::GetTextObjectWritingMode(const CPDF_TextObject* pTextObj ) 1896 FX_INT32 CPDF_TextPage::GetTextObjectWritingMode(const CPDF_TextObject* pTextObj )
1887 { 1897 {
1888 FX_INT32 nChars = pTextObj->CountChars(); 1898 FX_INT32 nChars = pTextObj->CountChars();
1889 if (nChars == 1) { 1899 if (nChars == 1) {
1890 return m_TextlineDir; 1900 return m_TextlineDir;
1891 } 1901 }
1892 CPDF_TextObjectItem first, last; 1902 CPDF_TextObjectItem first, last;
(...skipping 937 matching lines...) Expand 10 before | Expand all | Expand 10 after
2830 if (!m_IsParserd || index < 0 || index >= m_LinkList.GetSize()) { 2840 if (!m_IsParserd || index < 0 || index >= m_LinkList.GetSize()) {
2831 return; 2841 return;
2832 } 2842 }
2833 CPDF_LinkExt* link = NULL; 2843 CPDF_LinkExt* link = NULL;
2834 link = m_LinkList.GetAt(index); 2844 link = m_LinkList.GetAt(index);
2835 if (!link) { 2845 if (!link) {
2836 return ; 2846 return ;
2837 } 2847 }
2838 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); 2848 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects);
2839 } 2849 }
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698