Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
| 6 | 6 |
| 7 #include "../../include/fpdfapi/fpdf_resource.h" | 7 #include "../../include/fpdfapi/fpdf_resource.h" |
| 8 #include "../../include/fpdfapi/fpdf_pageobj.h" | 8 #include "../../include/fpdfapi/fpdf_pageobj.h" |
| 9 #include "../../include/fpdftext/fpdf_text.h" | 9 #include "../../include/fpdftext/fpdf_text.h" |
| 10 #include "../../include/fpdfapi/fpdf_page.h" | 10 #include "../../include/fpdfapi/fpdf_page.h" |
| 11 #include "../../include/fpdfapi/fpdf_module.h" | 11 #include "../../include/fpdfapi/fpdf_module.h" |
| 12 #include <ctype.h> | 12 #include <ctype.h> |
| 13 #include <algorithm> | |
| 13 #include "text_int.h" | 14 #include "text_int.h" |
| 15 | |
| 16 namespace { | |
| 17 | |
| 14 FX_BOOL _IsIgnoreSpaceCharacter(FX_WCHAR curChar) | 18 FX_BOOL _IsIgnoreSpaceCharacter(FX_WCHAR curChar) |
| 15 { | 19 { |
| 16 if(curChar < 255 ) { | 20 if(curChar < 255 ) { |
| 17 return FALSE; | 21 return FALSE; |
| 18 } | 22 } |
| 19 if ( (curChar >= 0x0600 && curChar <= 0x06FF) | 23 if ( (curChar >= 0x0600 && curChar <= 0x06FF) |
| 20 || (curChar >= 0xFE70 && curChar <= 0xFEFF) | 24 || (curChar >= 0xFE70 && curChar <= 0xFEFF) |
| 21 || (curChar >= 0xFB50 && curChar <= 0xFDFF) | 25 || (curChar >= 0xFB50 && curChar <= 0xFDFF) |
| 22 || (curChar >= 0x0400 && curChar <= 0x04FF) | 26 || (curChar >= 0x0400 && curChar <= 0x04FF) |
| 23 || (curChar >= 0x0500 && curChar <= 0x052F) | 27 || (curChar >= 0x0500 && curChar <= 0x052F) |
| 24 || (curChar >= 0xA640 && curChar <= 0xA69F) | 28 || (curChar >= 0xA640 && curChar <= 0xA69F) |
| 25 || (curChar >= 0x2DE0 && curChar <= 0x2DFF) | 29 || (curChar >= 0x2DE0 && curChar <= 0x2DFF) |
| 26 || curChar == 8467 | 30 || curChar == 8467 |
| 27 || (curChar >= 0x2000 && curChar <= 0x206F)) { | 31 || (curChar >= 0x2000 && curChar <= 0x206F)) { |
| 28 return FALSE; | 32 return FALSE; |
| 29 } | 33 } |
| 30 return TRUE; | 34 return TRUE; |
| 31 } | 35 } |
| 36 | |
| 37 FX_FLOAT _NormalizeThreshold(FX_FLOAT threshold) | |
|
Tom Sepez
2014/12/18 19:33:36
Ok, lets just follow their naming convention even
Lei Zhang
2014/12/18 21:07:10
I'm a bit unclear what the style guide is exactly,
| |
| 38 { | |
| 39 int nDivide = 6; | |
|
Tom Sepez
2014/12/18 19:33:36
Why we need nDivide local? Why is it int? I reali
Lei Zhang
2014/12/18 21:07:09
Done. I was just shuffling code around.
| |
| 40 if (threshold < 300) { | |
| 41 nDivide = 2; | |
| 42 } else if (threshold < 500) { | |
| 43 nDivide = 4; | |
| 44 } else if (threshold < 700) { | |
| 45 nDivide = 5; | |
| 46 } | |
| 47 return threshold / nDivide; | |
| 48 } | |
| 49 | |
| 50 FX_FLOAT _CalculateBaseSpace(const CPDF_TextObject* pTextObj, | |
| 51 const CFX_AffineMatrix& matrix) | |
| 52 { | |
| 53 FX_FLOAT baseSpace = 0.0; | |
| 54 const int nItems = pTextObj->CountItems(); | |
| 55 if (pTextObj->m_TextState.GetObject()->m_CharSpace && nItems >= 3) { | |
| 56 FX_BOOL bAllChar = TRUE; | |
| 57 FX_FLOAT spacing = matrix.TransformDistance( | |
| 58 pTextObj->m_TextState.GetObject()->m_CharSpace); | |
| 59 baseSpace = spacing; | |
| 60 for (int i = 0; i < nItems; i++) { | |
| 61 CPDF_TextObjectItem item; | |
| 62 pTextObj->GetItemInfo(i, &item); | |
| 63 if (item.m_CharCode == (FX_DWORD) - 1) { | |
| 64 FX_FLOAT fontsize_h = pTextObj->m_TextState.GetFontSizeH(); | |
| 65 FX_FLOAT kerning = -fontsize_h * item.m_OriginX / 1000; | |
| 66 baseSpace = std::min(baseSpace, kerning + spacing); | |
| 67 bAllChar = FALSE; | |
| 68 } | |
| 69 } | |
| 70 if (baseSpace < 0.0 || (nItems == 3 && !bAllChar)) { | |
| 71 baseSpace = 0.0; | |
| 72 } | |
| 73 } | |
| 74 return baseSpace; | |
| 75 } | |
| 76 | |
| 77 } // namespace | |
| 78 | |
| 32 CPDFText_ParseOptions::CPDFText_ParseOptions() | 79 CPDFText_ParseOptions::CPDFText_ParseOptions() |
| 33 : m_bGetCharCodeOnly(FALSE), m_bNormalizeObjs(TRUE), m_bOutputHyphen(FALSE) | 80 : m_bGetCharCodeOnly(FALSE), m_bNormalizeObjs(TRUE), m_bOutputHyphen(FALSE) |
| 34 { | 81 { |
| 35 } | 82 } |
| 36 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_Page* pPage, CPDFText_Pa rseOptions ParserOptions) | 83 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_Page* pPage, CPDFText_Pa rseOptions ParserOptions) |
| 37 { | 84 { |
| 38 CPDF_TextPage* pTextPageEx = FX_NEW CPDF_TextPage(pPage, ParserOptions); | 85 CPDF_TextPage* pTextPageEx = FX_NEW CPDF_TextPage(pPage, ParserOptions); |
| 39 return pTextPageEx; | 86 return pTextPageEx; |
| 40 } | 87 } |
| 41 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_Page* pPage, int flags) | 88 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_Page* pPage, int flags) |
| (...skipping 1593 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1635 } | 1682 } |
| 1636 if (FPDFTEXT_MC_DELAY == bPreMKC) { | 1683 if (FPDFTEXT_MC_DELAY == bPreMKC) { |
| 1637 ProcessMarkedContent(Obj); | 1684 ProcessMarkedContent(Obj); |
| 1638 m_pPreTextObj = pTextObj; | 1685 m_pPreTextObj = pTextObj; |
| 1639 m_perMatrix.Copy(formMatrix); | 1686 m_perMatrix.Copy(formMatrix); |
| 1640 return; | 1687 return; |
| 1641 } | 1688 } |
| 1642 m_pPreTextObj = pTextObj; | 1689 m_pPreTextObj = pTextObj; |
| 1643 m_perMatrix.Copy(formMatrix); | 1690 m_perMatrix.Copy(formMatrix); |
| 1644 int nItems = pTextObj->CountItems(); | 1691 int nItems = pTextObj->CountItems(); |
| 1645 FX_FLOAT spacing = 0; | 1692 FX_FLOAT baseSpace = _CalculateBaseSpace(pTextObj, matrix); |
| 1646 FX_FLOAT baseSpace = 0.0; | |
| 1647 FX_BOOL bAllChar = TRUE; | |
| 1648 if (pTextObj->m_TextState.GetObject()->m_CharSpace && nItems >= 3) { | |
| 1649 spacing = matrix.TransformDistance(pTextObj->m_TextState.GetObject()->m_ CharSpace); | |
| 1650 baseSpace = spacing; | |
| 1651 for (int i = 0; i < nItems; i++) { | |
| 1652 CPDF_TextObjectItem item; | |
| 1653 pTextObj->GetItemInfo(i, &item); | |
| 1654 if (item.m_CharCode == (FX_DWORD) - 1) { | |
| 1655 FX_FLOAT fontsize_h = pTextObj->m_TextState.GetFontSizeH(); | |
| 1656 FX_FLOAT kerning = -fontsize_h * item.m_OriginX / 1000; | |
| 1657 if(kerning + spacing < baseSpace) { | |
| 1658 baseSpace = kerning + spacing; | |
| 1659 } | |
| 1660 bAllChar = FALSE; | |
| 1661 } | |
| 1662 } | |
| 1663 spacing = 0; | |
| 1664 if(baseSpace < 0.0 || (nItems == 3 && !bAllChar)) { | |
| 1665 baseSpace = 0.0; | |
| 1666 } | |
| 1667 } | |
| 1668 | 1693 |
| 1669 FX_BOOL bIsBidiAndMirrosInverse = FALSE; | 1694 FX_BOOL bIsBidiAndMirrosInverse = FALSE; |
| 1670 IFX_BidiChar* BidiChar = IFX_BidiChar::Create(); | 1695 IFX_BidiChar* BidiChar = IFX_BidiChar::Create(); |
| 1671 FX_INT32 nR2L = 0; | 1696 FX_INT32 nR2L = 0; |
| 1672 FX_INT32 nL2R = 0; | 1697 FX_INT32 nL2R = 0; |
| 1673 FX_INT32 start = 0, count = 0; | 1698 FX_INT32 start = 0, count = 0; |
| 1674 CPDF_TextObjectItem item; | 1699 CPDF_TextObjectItem item; |
| 1675 for (FX_INT32 i = 0; i < nItems; i++) { | 1700 for (FX_INT32 i = 0; i < nItems; i++) { |
| 1676 pTextObj->GetItemInfo(i, &item); | 1701 pTextObj->GetItemInfo(i, &item); |
| 1677 if (item.m_CharCode == (FX_DWORD)-1) { | 1702 if (item.m_CharCode == (FX_DWORD)-1) { |
| (...skipping 27 matching lines...) Expand all Loading... | |
| 1705 } | 1730 } |
| 1706 } | 1731 } |
| 1707 FX_BOOL bR2L = FALSE; | 1732 FX_BOOL bR2L = FALSE; |
| 1708 if (nR2L > 0 && nR2L >= nL2R) { | 1733 if (nR2L > 0 && nR2L >= nL2R) { |
| 1709 bR2L = TRUE; | 1734 bR2L = TRUE; |
| 1710 } | 1735 } |
| 1711 bIsBidiAndMirrosInverse = bR2L && (matrix.a * matrix.d - matrix.b * matrix.c ) < 0; | 1736 bIsBidiAndMirrosInverse = bR2L && (matrix.a * matrix.d - matrix.b * matrix.c ) < 0; |
| 1712 FX_INT32 iBufStartAppend = m_TempTextBuf.GetLength(); | 1737 FX_INT32 iBufStartAppend = m_TempTextBuf.GetLength(); |
| 1713 FX_INT32 iCharListStartAppend = m_TempCharList.GetSize(); | 1738 FX_INT32 iCharListStartAppend = m_TempCharList.GetSize(); |
| 1714 | 1739 |
| 1740 FX_FLOAT spacing = 0; | |
|
Tom Sepez
2014/12/18 19:33:36
nit: 0.0
Lei Zhang
2014/12/18 21:07:09
Does that actually make a difference? There's too
| |
| 1715 for (int i = 0; i < nItems; i++) { | 1741 for (int i = 0; i < nItems; i++) { |
| 1716 CPDF_TextObjectItem item; | 1742 CPDF_TextObjectItem item; |
| 1717 PAGECHAR_INFO charinfo; | 1743 PAGECHAR_INFO charinfo; |
| 1718 charinfo.m_OriginX = 0; | 1744 charinfo.m_OriginX = 0; |
| 1719 charinfo.m_OriginY = 0; | 1745 charinfo.m_OriginY = 0; |
| 1720 pTextObj->GetItemInfo(i, &item); | 1746 pTextObj->GetItemInfo(i, &item); |
| 1721 if (item.m_CharCode == (FX_DWORD) - 1) { | 1747 if (item.m_CharCode == (FX_DWORD) - 1) { |
| 1722 CFX_WideString str = m_TempTextBuf.GetWideString(); | 1748 CFX_WideString str = m_TempTextBuf.GetWideString(); |
| 1723 if(str.IsEmpty()) { | 1749 if(str.IsEmpty()) { |
| 1724 str = m_TextBuf.GetWideString(); | 1750 str = m_TextBuf.GetWideString(); |
| (...skipping 22 matching lines...) Expand all Loading... | |
| 1747 } | 1773 } |
| 1748 if (threshold > fontsize_h / 3) { | 1774 if (threshold > fontsize_h / 3) { |
| 1749 threshold = 0; | 1775 threshold = 0; |
| 1750 } else { | 1776 } else { |
| 1751 threshold /= 2; | 1777 threshold /= 2; |
| 1752 } | 1778 } |
| 1753 if (threshold == 0) { | 1779 if (threshold == 0) { |
| 1754 threshold = fontsize_h; | 1780 threshold = fontsize_h; |
| 1755 int this_width = FXSYS_abs(GetCharWidth(item.m_CharCode, pFont)) ; | 1781 int this_width = FXSYS_abs(GetCharWidth(item.m_CharCode, pFont)) ; |
| 1756 threshold = this_width > last_width ? (FX_FLOAT)this_width : (FX _FLOAT)last_width; | 1782 threshold = this_width > last_width ? (FX_FLOAT)this_width : (FX _FLOAT)last_width; |
| 1757 int nDivide = 6; | 1783 threshold = _NormalizeThreshold(threshold); |
| 1758 if (threshold < 300) { | |
| 1759 nDivide = 2; | |
| 1760 } else if (threshold < 500) { | |
| 1761 nDivide = 4; | |
| 1762 } else if (threshold < 700) { | |
| 1763 nDivide = 5; | |
| 1764 } | |
| 1765 threshold = threshold / nDivide; | |
| 1766 threshold = fontsize_h * threshold / 1000; | 1784 threshold = fontsize_h * threshold / 1000; |
| 1767 } | 1785 } |
| 1768 if (threshold && (spacing && spacing >= threshold) ) { | 1786 if (threshold && (spacing && spacing >= threshold) ) { |
| 1769 charinfo.m_Unicode = TEXT_BLANK_CHAR; | 1787 charinfo.m_Unicode = TEXT_BLANK_CHAR; |
| 1770 charinfo.m_Flag = FPDFTEXT_CHAR_GENERATED; | 1788 charinfo.m_Flag = FPDFTEXT_CHAR_GENERATED; |
| 1771 charinfo.m_pTextObj = pTextObj; | 1789 charinfo.m_pTextObj = pTextObj; |
| 1772 charinfo.m_Index = m_TextBuf.GetLength(); | 1790 charinfo.m_Index = m_TextBuf.GetLength(); |
| 1773 m_TempTextBuf.AppendChar(TEXT_BLANK_CHAR); | 1791 m_TempTextBuf.AppendChar(TEXT_BLANK_CHAR); |
| 1774 charinfo.m_CharCode = -1; | 1792 charinfo.m_CharCode = -1; |
| 1775 charinfo.m_Matrix.Copy(formMatrix); | 1793 charinfo.m_Matrix.Copy(formMatrix); |
| (...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1820 charinfo.m_Matrix.Copy(matrix); | 1838 charinfo.m_Matrix.Copy(matrix); |
| 1821 if (wstrItem.IsEmpty()) { | 1839 if (wstrItem.IsEmpty()) { |
| 1822 charinfo.m_Unicode = 0; | 1840 charinfo.m_Unicode = 0; |
| 1823 m_TempCharList.Add(charinfo); | 1841 m_TempCharList.Add(charinfo); |
| 1824 m_TempTextBuf.AppendChar(0xfffe); | 1842 m_TempTextBuf.AppendChar(0xfffe); |
| 1825 continue; | 1843 continue; |
| 1826 } else { | 1844 } else { |
| 1827 int nTotal = wstrItem.GetLength(); | 1845 int nTotal = wstrItem.GetLength(); |
| 1828 int n = 0; | 1846 int n = 0; |
| 1829 FX_BOOL bDel = FALSE; | 1847 FX_BOOL bDel = FALSE; |
| 1830 while (n < m_TempCharList.GetSize() && n < 7) { | 1848 const int count = std::min(m_TempCharList.GetSize(), 7); |
| 1849 while (n < count) { | |
| 1831 n++; | 1850 n++; |
| 1832 PAGECHAR_INFO* charinfo1 = (PAGECHAR_INFO*)m_TempCharList.GetAt( m_TempCharList.GetSize() - n); | 1851 int index = m_TempCharList.GetSize() - n; |
|
Tom Sepez
2014/12/18 19:33:36
why not just count down instead? Is n used somewhe
Lei Zhang
2014/12/18 21:07:09
Done.
| |
| 1852 PAGECHAR_INFO* charinfo1 = (PAGECHAR_INFO*)m_TempCharList.GetAt( index); | |
|
Tom Sepez
2014/12/18 19:33:36
Doesn't the first iteration do ... .GetAt(... .Get
Lei Zhang
2014/12/18 21:07:10
No, it's confusing because the first thing we do w
| |
| 1833 if(charinfo1->m_CharCode == charinfo.m_CharCode && | 1853 if(charinfo1->m_CharCode == charinfo.m_CharCode && |
| 1834 charinfo1->m_pTextObj->GetFont() == charinfo.m_pTextObj- >GetFont() && | 1854 charinfo1->m_pTextObj->GetFont() == charinfo.m_pTextObj- >GetFont() && |
| 1835 FXSYS_fabs(charinfo1->m_OriginX - charinfo.m_OriginX) < TEXT_CHARRATIO_GAPDELTA * pTextObj->GetFontSize() && | 1855 FXSYS_fabs(charinfo1->m_OriginX - charinfo.m_OriginX) < TEXT_CHARRATIO_GAPDELTA * pTextObj->GetFontSize() && |
| 1836 FXSYS_fabs(charinfo1->m_OriginY - charinfo.m_OriginY) < TEXT_CHARRATIO_GAPDELTA * pTextObj->GetFontSize() ) { | 1856 FXSYS_fabs(charinfo1->m_OriginY - charinfo.m_OriginY) < TEXT_CHARRATIO_GAPDELTA * pTextObj->GetFontSize() ) { |
| 1837 bDel = TRUE; | 1857 bDel = TRUE; |
| 1838 break; | 1858 break; |
| 1839 } | 1859 } |
| 1840 } | 1860 } |
| 1841 if(!bDel) { | 1861 if(!bDel) { |
| 1842 for (int nIndex = 0; nIndex < nTotal; nIndex++) { | 1862 for (int nIndex = 0; nIndex < nTotal; nIndex++) { |
| (...skipping 12 matching lines...) Expand all Loading... | |
| 1855 m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1); | 1875 m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1); |
| 1856 m_TempCharList.Delete(m_TempCharList.GetSize() - 1); | 1876 m_TempCharList.Delete(m_TempCharList.GetSize() - 1); |
| 1857 } | 1877 } |
| 1858 } | 1878 } |
| 1859 } | 1879 } |
| 1860 } | 1880 } |
| 1861 if (bIsBidiAndMirrosInverse) { | 1881 if (bIsBidiAndMirrosInverse) { |
| 1862 FX_INT32 i, j; | 1882 FX_INT32 i, j; |
| 1863 i = iCharListStartAppend; | 1883 i = iCharListStartAppend; |
| 1864 j = m_TempCharList.GetSize() - 1; | 1884 j = m_TempCharList.GetSize() - 1; |
| 1865 PAGECHAR_INFO tempCharInfo; | |
| 1866 FX_INT32 tempIndex = 0; | |
| 1867 for (; i < j; i++, j--) { | 1885 for (; i < j; i++, j--) { |
| 1868 tempCharInfo = m_TempCharList[i]; | 1886 std::swap(m_TempCharList[i], m_TempCharList[j]); |
| 1869 m_TempCharList[i] = m_TempCharList[j]; | 1887 std::swap(m_TempCharList[i].m_Index, m_TempCharList[j].m_Index); |
| 1870 m_TempCharList[j] = tempCharInfo; | |
| 1871 tempIndex = m_TempCharList[i].m_Index; | |
| 1872 m_TempCharList[i].m_Index = m_TempCharList[j].m_Index; | |
| 1873 m_TempCharList[j].m_Index = tempIndex; | |
| 1874 } | 1888 } |
| 1875 FX_WCHAR * pTempBuffer = m_TempTextBuf.GetBuffer(); | 1889 FX_WCHAR * pTempBuffer = m_TempTextBuf.GetBuffer(); |
| 1876 i = iBufStartAppend; | 1890 i = iBufStartAppend; |
| 1877 j = m_TempTextBuf.GetLength() - 1; | 1891 j = m_TempTextBuf.GetLength() - 1; |
| 1878 FX_WCHAR wTemp; | 1892 FX_WCHAR wTemp; |
| 1879 for (; i < j; i++, j--) { | 1893 for (; i < j; i++, j--) { |
| 1880 wTemp = pTempBuffer[i]; | 1894 std::swap(pTempBuffer[i], pTempBuffer[j]); |
| 1881 pTempBuffer[i] = pTempBuffer[j]; | |
| 1882 pTempBuffer[j] = wTemp; | |
| 1883 } | 1895 } |
| 1884 } | 1896 } |
| 1885 } | 1897 } |
| 1886 FX_INT32 CPDF_TextPage::GetTextObjectWritingMode(const CPDF_TextObject* pTextObj ) | 1898 FX_INT32 CPDF_TextPage::GetTextObjectWritingMode(const CPDF_TextObject* pTextObj ) |
| 1887 { | 1899 { |
| 1888 FX_INT32 nChars = pTextObj->CountChars(); | 1900 FX_INT32 nChars = pTextObj->CountChars(); |
| 1889 if (nChars == 1) { | 1901 if (nChars == 1) { |
| 1890 return m_TextlineDir; | 1902 return m_TextlineDir; |
| 1891 } | 1903 } |
| 1892 CPDF_TextObjectItem first, last; | 1904 CPDF_TextObjectItem first, last; |
| (...skipping 937 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2830 if (!m_IsParserd || index < 0 || index >= m_LinkList.GetSize()) { | 2842 if (!m_IsParserd || index < 0 || index >= m_LinkList.GetSize()) { |
| 2831 return; | 2843 return; |
| 2832 } | 2844 } |
| 2833 CPDF_LinkExt* link = NULL; | 2845 CPDF_LinkExt* link = NULL; |
| 2834 link = m_LinkList.GetAt(index); | 2846 link = m_LinkList.GetAt(index); |
| 2835 if (!link) { | 2847 if (!link) { |
| 2836 return ; | 2848 return ; |
| 2837 } | 2849 } |
| 2838 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); | 2850 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); |
| 2839 } | 2851 } |
| OLD | NEW |