 Chromium Code Reviews
 Chromium Code Reviews Issue 484503002:
  Fix hebrew character highlight issue in a special document  (Closed) 
  Base URL: https://pdfium.googlesource.com/pdfium.git@master
    
  
    Issue 484503002:
  Fix hebrew character highlight issue in a special document  (Closed) 
  Base URL: https://pdfium.googlesource.com/pdfium.git@master| OLD | NEW | 
|---|---|
| 1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. | 
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be | 
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. | 
| 4 | 4 | 
| 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 
| 6 | 6 | 
| 7 #include "../../include/fpdfapi/fpdf_resource.h" | 7 #include "../../include/fpdfapi/fpdf_resource.h" | 
| 8 #include "../../include/fpdfapi/fpdf_pageobj.h" | 8 #include "../../include/fpdfapi/fpdf_pageobj.h" | 
| 9 #include "../../include/fpdftext/fpdf_text.h" | 9 #include "../../include/fpdftext/fpdf_text.h" | 
| 10 #include "../../include/fpdfapi/fpdf_page.h" | 10 #include "../../include/fpdfapi/fpdf_page.h" | 
| (...skipping 1664 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1675 baseSpace = kerning + spacing; | 1675 baseSpace = kerning + spacing; | 
| 1676 } | 1676 } | 
| 1677 bAllChar = FALSE; | 1677 bAllChar = FALSE; | 
| 1678 } | 1678 } | 
| 1679 } | 1679 } | 
| 1680 spacing = 0; | 1680 spacing = 0; | 
| 1681 if(baseSpace < 0.0 || (nItems == 3 && !bAllChar)) { | 1681 if(baseSpace < 0.0 || (nItems == 3 && !bAllChar)) { | 
| 1682 baseSpace = 0.0; | 1682 baseSpace = 0.0; | 
| 1683 } | 1683 } | 
| 1684 } | 1684 } | 
| 1685 | |
| 1686 FX_BOOL bIsBidiAndMirrosInverse = FALSE; | |
| 
jbreiden
2014/08/18 19:10:50
What does MirrosInverse mean?
 | |
| 1687 IFX_BidiChar* BidiChar = IFX_BidiChar::Create(); | |
| 1688 FX_INT32 nR2L = 0; | |
| 1689 FX_INT32 nL2R = 0; | |
| 1690 FX_INT32 start = 0, count = 0; | |
| 1691 CPDF_TextObjectItem item; | |
| 1692 for (FX_INT32 i = 0; i < nItems; i++) { | |
| 1693 pTextObj->GetItemInfo(i, &item); | |
| 1694 if (item.m_CharCode == (FX_DWORD)-1) { | |
| 1695 continue; | |
| 1696 } | |
| 1697 CFX_WideString wstrItem = pFont->UnicodeFromCharCode(item.m_CharCode); | |
| 1698 FX_WCHAR wChar = wstrItem.GetAt(0); | |
| 1699 if ((wstrItem.IsEmpty() || wChar == 0) && item.m_CharCode) { | |
| 
jbreiden
2014/08/18 19:10:49
Same question. Is the character to Unicode mapping
 
Bo Xu
2014/08/19 03:31:49
Yes, in CFX_WideString CPDF_Font::UnicodeFromCharC
 
jbreiden
2014/08/22 16:58:44
Okay.  If I understand correctly:
The PDF files I
 | |
| 1700 wChar = (FX_WCHAR)item.m_CharCode; | |
| 1701 } | |
| 1702 if (!wChar) { | |
| 1703 continue; | |
| 1704 } | |
| 1705 if (BidiChar && BidiChar->AppendChar(wChar)) { | |
| 1706 FX_INT32 ret = BidiChar->GetBidiInfo(start, count); | |
| 1707 if (ret == 2) { | |
| 1708 nR2L++; | |
| 1709 } | |
| 1710 else if (ret == 1) { | |
| 1711 nL2R++; | |
| 1712 } | |
| 1713 } | |
| 1714 } | |
| 1715 if (BidiChar && BidiChar->EndChar()) { | |
| 1716 FX_INT32 ret = BidiChar->GetBidiInfo(start, count); | |
| 1717 if (ret == 2) { | |
| 1718 nR2L++; | |
| 1719 } | |
| 1720 else if (ret == 1) { | |
| 1721 nL2R++; | |
| 1722 } | |
| 1723 } | |
| 1724 FX_BOOL bR2L = FALSE; | |
| 1725 if (nR2L > 0 && nR2L >= nL2R) { | |
| 1726 bR2L = TRUE; | |
| 1727 } | |
| 1728 bIsBidiAndMirrosInverse = bR2L && (matrix.a * matrix.d - matrix.b * matrix.c ) < 0; | |
| 1729 FX_INT32 iBufStartAppend = m_TempTextBuf.GetLength(); | |
| 1730 FX_INT32 iCharListStartAppend = m_TempCharList.GetSize(); | |
| 1731 | |
| 1685 for (int i = 0; i < nItems; i++) { | 1732 for (int i = 0; i < nItems; i++) { | 
| 1686 CPDF_TextObjectItem item; | 1733 CPDF_TextObjectItem item; | 
| 1687 PAGECHAR_INFO charinfo; | 1734 PAGECHAR_INFO charinfo; | 
| 1688 charinfo.m_OriginX = 0; | 1735 charinfo.m_OriginX = 0; | 
| 1689 charinfo.m_OriginY = 0; | 1736 charinfo.m_OriginY = 0; | 
| 1690 pTextObj->GetItemInfo(i, &item); | 1737 pTextObj->GetItemInfo(i, &item); | 
| 1691 if (item.m_CharCode == (FX_DWORD) - 1) { | 1738 if (item.m_CharCode == (FX_DWORD) - 1) { | 
| 1692 CFX_WideString str = m_TempTextBuf.GetWideString(); | 1739 CFX_WideString str = m_TempTextBuf.GetWideString(); | 
| 1693 if(str.IsEmpty()) { | 1740 if(str.IsEmpty()) { | 
| 1694 str = m_TextBuf.GetWideString(); | 1741 str = m_TextBuf.GetWideString(); | 
| (...skipping 126 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1821 } | 1868 } | 
| 1822 } else if(i == 0) { | 1869 } else if(i == 0) { | 
| 1823 CFX_WideString str = m_TempTextBuf.GetWideString(); | 1870 CFX_WideString str = m_TempTextBuf.GetWideString(); | 
| 1824 if (!str.IsEmpty() && str.GetAt(str.GetLength() - 1) == TEXT_BLA NK_CHAR) { | 1871 if (!str.IsEmpty() && str.GetAt(str.GetLength() - 1) == TEXT_BLA NK_CHAR) { | 
| 1825 m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1); | 1872 m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1); | 
| 1826 m_TempCharList.Delete(m_TempCharList.GetSize() - 1); | 1873 m_TempCharList.Delete(m_TempCharList.GetSize() - 1); | 
| 1827 } | 1874 } | 
| 1828 } | 1875 } | 
| 1829 } | 1876 } | 
| 1830 } | 1877 } | 
| 1878 if (bIsBidiAndMirrosInverse) { | |
| 
jbreiden
2014/08/18 19:10:50
Ok, I see what is going on here. The code is rever
 
Bo Xu
2014/08/19 03:31:49
I wonder do you generate this particular pdf file?
 
jbreiden
2014/08/22 16:58:44
The PDF file is generated by Tesseract, which is a
 | |
| 1879 FX_INT32 i, j; | |
| 1880 i = iCharListStartAppend; | |
| 1881 j = m_TempCharList.GetSize() - 1; | |
| 1882 PAGECHAR_INFO tempCharInfo; | |
| 1883 FX_INT32 tempIndex = 0; | |
| 1884 for (; i < j; i++, j--) { | |
| 1885 tempCharInfo = m_TempCharList[i]; | |
| 1886 m_TempCharList[i] = m_TempCharList[j]; | |
| 1887 m_TempCharList[j] = tempCharInfo; | |
| 1888 tempIndex = m_TempCharList[i].m_Index; | |
| 1889 m_TempCharList[i].m_Index = m_TempCharList[j].m_Index; | |
| 1890 m_TempCharList[j].m_Index = tempIndex; | |
| 1891 } | |
| 1892 FX_WCHAR * pTempBuffer = m_TempTextBuf.GetBuffer(); | |
| 1893 i = iBufStartAppend; | |
| 1894 j = m_TempTextBuf.GetLength() - 1; | |
| 1895 FX_WCHAR wTemp; | |
| 1896 for (; i < j; i++, j--) { | |
| 1897 wTemp = pTempBuffer[i]; | |
| 1898 pTempBuffer[i] = pTempBuffer[j]; | |
| 1899 pTempBuffer[j] = wTemp; | |
| 1900 } | |
| 1901 } | |
| 1831 } | 1902 } | 
| 1832 FX_INT32 CPDF_TextPage::GetTextObjectWritingMode(const CPDF_TextObject* pTextObj ) | 1903 FX_INT32 CPDF_TextPage::GetTextObjectWritingMode(const CPDF_TextObject* pTextObj ) | 
| 1833 { | 1904 { | 
| 1834 FX_INT32 nChars = pTextObj->CountChars(); | 1905 FX_INT32 nChars = pTextObj->CountChars(); | 
| 1835 if (nChars == 1) { | 1906 if (nChars == 1) { | 
| 1836 return m_TextlineDir; | 1907 return m_TextlineDir; | 
| 1837 } | 1908 } | 
| 1838 CPDF_TextObjectItem first, last; | 1909 CPDF_TextObjectItem first, last; | 
| 1839 pTextObj->GetCharInfo(0, &first); | 1910 pTextObj->GetCharInfo(0, &first); | 
| 1840 pTextObj->GetCharInfo(nChars - 1, &last); | 1911 pTextObj->GetCharInfo(nChars - 1, &last); | 
| (...skipping 936 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2777 if (!m_IsParserd || index < 0 || index >= m_LinkList.GetSize()) { | 2848 if (!m_IsParserd || index < 0 || index >= m_LinkList.GetSize()) { | 
| 2778 return; | 2849 return; | 
| 2779 } | 2850 } | 
| 2780 CPDF_LinkExt* link = NULL; | 2851 CPDF_LinkExt* link = NULL; | 
| 2781 link = m_LinkList.GetAt(index); | 2852 link = m_LinkList.GetAt(index); | 
| 2782 if (!link) { | 2853 if (!link) { | 
| 2783 return ; | 2854 return ; | 
| 2784 } | 2855 } | 
| 2785 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); | 2856 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); | 
| 2786 } | 2857 } | 
| OLD | NEW |