OLD | NEW |
---|---|
1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
6 | 6 |
7 #include "../../include/fpdfapi/fpdf_resource.h" | 7 #include "../../include/fpdfapi/fpdf_resource.h" |
8 #include "../../include/fpdfapi/fpdf_pageobj.h" | 8 #include "../../include/fpdfapi/fpdf_pageobj.h" |
9 #include "../../include/fpdftext/fpdf_text.h" | 9 #include "../../include/fpdftext/fpdf_text.h" |
10 #include "../../include/fpdfapi/fpdf_page.h" | 10 #include "../../include/fpdfapi/fpdf_page.h" |
(...skipping 1664 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1675 baseSpace = kerning + spacing; | 1675 baseSpace = kerning + spacing; |
1676 } | 1676 } |
1677 bAllChar = FALSE; | 1677 bAllChar = FALSE; |
1678 } | 1678 } |
1679 } | 1679 } |
1680 spacing = 0; | 1680 spacing = 0; |
1681 if(baseSpace < 0.0 || (nItems == 3 && !bAllChar)) { | 1681 if(baseSpace < 0.0 || (nItems == 3 && !bAllChar)) { |
1682 baseSpace = 0.0; | 1682 baseSpace = 0.0; |
1683 } | 1683 } |
1684 } | 1684 } |
1685 | |
1686 FX_BOOL bIsBidiAndMirrosInverse = FALSE; | |
jbreiden
2014/08/18 19:10:50
What does MirrosInverse mean?
| |
1687 IFX_BidiChar* BidiChar = IFX_BidiChar::Create(); | |
1688 FX_INT32 nR2L = 0; | |
1689 FX_INT32 nL2R = 0; | |
1690 FX_INT32 start = 0, count = 0; | |
1691 CPDF_TextObjectItem item; | |
1692 for (FX_INT32 i = 0; i < nItems; i++) { | |
1693 pTextObj->GetItemInfo(i, &item); | |
1694 if (item.m_CharCode == (FX_DWORD)-1) { | |
1695 continue; | |
1696 } | |
1697 CFX_WideString wstrItem = pFont->UnicodeFromCharCode(item.m_CharCode); | |
1698 FX_WCHAR wChar = wstrItem.GetAt(0); | |
1699 if ((wstrItem.IsEmpty() || wChar == 0) && item.m_CharCode) { | |
jbreiden
2014/08/18 19:10:49
Same question. Is the character to Unicode mapping
Bo Xu
2014/08/19 03:31:49
Yes, in CFX_WideString CPDF_Font::UnicodeFromCharC
jbreiden
2014/08/22 16:58:44
Okay. If I understand correctly:
The PDF files I
| |
1700 wChar = (FX_WCHAR)item.m_CharCode; | |
1701 } | |
1702 if (!wChar) { | |
1703 continue; | |
1704 } | |
1705 if (BidiChar && BidiChar->AppendChar(wChar)) { | |
1706 FX_INT32 ret = BidiChar->GetBidiInfo(start, count); | |
1707 if (ret == 2) { | |
1708 nR2L++; | |
1709 } | |
1710 else if (ret == 1) { | |
1711 nL2R++; | |
1712 } | |
1713 } | |
1714 } | |
1715 if (BidiChar && BidiChar->EndChar()) { | |
1716 FX_INT32 ret = BidiChar->GetBidiInfo(start, count); | |
1717 if (ret == 2) { | |
1718 nR2L++; | |
1719 } | |
1720 else if (ret == 1) { | |
1721 nL2R++; | |
1722 } | |
1723 } | |
1724 FX_BOOL bR2L = FALSE; | |
1725 if (nR2L > 0 && nR2L >= nL2R) { | |
1726 bR2L = TRUE; | |
1727 } | |
1728 bIsBidiAndMirrosInverse = bR2L && (matrix.a * matrix.d - matrix.b * matrix.c ) < 0; | |
1729 FX_INT32 iBufStartAppend = m_TempTextBuf.GetLength(); | |
1730 FX_INT32 iCharListStartAppend = m_TempCharList.GetSize(); | |
1731 | |
1685 for (int i = 0; i < nItems; i++) { | 1732 for (int i = 0; i < nItems; i++) { |
1686 CPDF_TextObjectItem item; | 1733 CPDF_TextObjectItem item; |
1687 PAGECHAR_INFO charinfo; | 1734 PAGECHAR_INFO charinfo; |
1688 charinfo.m_OriginX = 0; | 1735 charinfo.m_OriginX = 0; |
1689 charinfo.m_OriginY = 0; | 1736 charinfo.m_OriginY = 0; |
1690 pTextObj->GetItemInfo(i, &item); | 1737 pTextObj->GetItemInfo(i, &item); |
1691 if (item.m_CharCode == (FX_DWORD) - 1) { | 1738 if (item.m_CharCode == (FX_DWORD) - 1) { |
1692 CFX_WideString str = m_TempTextBuf.GetWideString(); | 1739 CFX_WideString str = m_TempTextBuf.GetWideString(); |
1693 if(str.IsEmpty()) { | 1740 if(str.IsEmpty()) { |
1694 str = m_TextBuf.GetWideString(); | 1741 str = m_TextBuf.GetWideString(); |
(...skipping 126 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1821 } | 1868 } |
1822 } else if(i == 0) { | 1869 } else if(i == 0) { |
1823 CFX_WideString str = m_TempTextBuf.GetWideString(); | 1870 CFX_WideString str = m_TempTextBuf.GetWideString(); |
1824 if (!str.IsEmpty() && str.GetAt(str.GetLength() - 1) == TEXT_BLA NK_CHAR) { | 1871 if (!str.IsEmpty() && str.GetAt(str.GetLength() - 1) == TEXT_BLA NK_CHAR) { |
1825 m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1); | 1872 m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1); |
1826 m_TempCharList.Delete(m_TempCharList.GetSize() - 1); | 1873 m_TempCharList.Delete(m_TempCharList.GetSize() - 1); |
1827 } | 1874 } |
1828 } | 1875 } |
1829 } | 1876 } |
1830 } | 1877 } |
1878 if (bIsBidiAndMirrosInverse) { | |
jbreiden
2014/08/18 19:10:50
Ok, I see what is going on here. The code is rever
Bo Xu
2014/08/19 03:31:49
I wonder do you generate this particular pdf file?
jbreiden
2014/08/22 16:58:44
The PDF file is generated by Tesseract, which is a
| |
1879 FX_INT32 i, j; | |
1880 i = iCharListStartAppend; | |
1881 j = m_TempCharList.GetSize() - 1; | |
1882 PAGECHAR_INFO tempCharInfo; | |
1883 FX_INT32 tempIndex = 0; | |
1884 for (; i < j; i++, j--) { | |
1885 tempCharInfo = m_TempCharList[i]; | |
1886 m_TempCharList[i] = m_TempCharList[j]; | |
1887 m_TempCharList[j] = tempCharInfo; | |
1888 tempIndex = m_TempCharList[i].m_Index; | |
1889 m_TempCharList[i].m_Index = m_TempCharList[j].m_Index; | |
1890 m_TempCharList[j].m_Index = tempIndex; | |
1891 } | |
1892 FX_WCHAR * pTempBuffer = m_TempTextBuf.GetBuffer(); | |
1893 i = iBufStartAppend; | |
1894 j = m_TempTextBuf.GetLength() - 1; | |
1895 FX_WCHAR wTemp; | |
1896 for (; i < j; i++, j--) { | |
1897 wTemp = pTempBuffer[i]; | |
1898 pTempBuffer[i] = pTempBuffer[j]; | |
1899 pTempBuffer[j] = wTemp; | |
1900 } | |
1901 } | |
1831 } | 1902 } |
1832 FX_INT32 CPDF_TextPage::GetTextObjectWritingMode(const CPDF_TextObject* pTextObj ) | 1903 FX_INT32 CPDF_TextPage::GetTextObjectWritingMode(const CPDF_TextObject* pTextObj ) |
1833 { | 1904 { |
1834 FX_INT32 nChars = pTextObj->CountChars(); | 1905 FX_INT32 nChars = pTextObj->CountChars(); |
1835 if (nChars == 1) { | 1906 if (nChars == 1) { |
1836 return m_TextlineDir; | 1907 return m_TextlineDir; |
1837 } | 1908 } |
1838 CPDF_TextObjectItem first, last; | 1909 CPDF_TextObjectItem first, last; |
1839 pTextObj->GetCharInfo(0, &first); | 1910 pTextObj->GetCharInfo(0, &first); |
1840 pTextObj->GetCharInfo(nChars - 1, &last); | 1911 pTextObj->GetCharInfo(nChars - 1, &last); |
(...skipping 936 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2777 if (!m_IsParserd || index < 0 || index >= m_LinkList.GetSize()) { | 2848 if (!m_IsParserd || index < 0 || index >= m_LinkList.GetSize()) { |
2778 return; | 2849 return; |
2779 } | 2850 } |
2780 CPDF_LinkExt* link = NULL; | 2851 CPDF_LinkExt* link = NULL; |
2781 link = m_LinkList.GetAt(index); | 2852 link = m_LinkList.GetAt(index); |
2782 if (!link) { | 2853 if (!link) { |
2783 return ; | 2854 return ; |
2784 } | 2855 } |
2785 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); | 2856 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); |
2786 } | 2857 } |
OLD | NEW |