Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(10)

Side by Side Diff: core/src/fpdftext/fpdf_text_int.cpp

Issue 484503002: Fix hebrew character highlight issue in a special document (Closed) Base URL: https://pdfium.googlesource.com/pdfium.git@master
Patch Set: Created 6 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2014 PDFium Authors. All rights reserved. 1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 6
7 #include "../../include/fpdfapi/fpdf_resource.h" 7 #include "../../include/fpdfapi/fpdf_resource.h"
8 #include "../../include/fpdfapi/fpdf_pageobj.h" 8 #include "../../include/fpdfapi/fpdf_pageobj.h"
9 #include "../../include/fpdftext/fpdf_text.h" 9 #include "../../include/fpdftext/fpdf_text.h"
10 #include "../../include/fpdfapi/fpdf_page.h" 10 #include "../../include/fpdfapi/fpdf_page.h"
(...skipping 1664 matching lines...) Expand 10 before | Expand all | Expand 10 after
1675 baseSpace = kerning + spacing; 1675 baseSpace = kerning + spacing;
1676 } 1676 }
1677 bAllChar = FALSE; 1677 bAllChar = FALSE;
1678 } 1678 }
1679 } 1679 }
1680 spacing = 0; 1680 spacing = 0;
1681 if(baseSpace < 0.0 || (nItems == 3 && !bAllChar)) { 1681 if(baseSpace < 0.0 || (nItems == 3 && !bAllChar)) {
1682 baseSpace = 0.0; 1682 baseSpace = 0.0;
1683 } 1683 }
1684 } 1684 }
1685
1686 FX_BOOL bIsBidiAndMirrosInverse = FALSE;
jbreiden 2014/08/18 19:10:50 What does MirrosInverse mean?
1687 IFX_BidiChar* BidiChar = IFX_BidiChar::Create();
1688 FX_INT32 nR2L = 0;
1689 FX_INT32 nL2R = 0;
1690 FX_INT32 start = 0, count = 0;
1691 CPDF_TextObjectItem item;
1692 for (FX_INT32 i = 0; i < nItems; i++) {
1693 pTextObj->GetItemInfo(i, &item);
1694 if (item.m_CharCode == (FX_DWORD)-1) {
1695 continue;
1696 }
1697 CFX_WideString wstrItem = pFont->UnicodeFromCharCode(item.m_CharCode);
1698 FX_WCHAR wChar = wstrItem.GetAt(0);
1699 if ((wstrItem.IsEmpty() || wChar == 0) && item.m_CharCode) {
jbreiden 2014/08/18 19:10:49 Same question. Is the character to Unicode mapping
Bo Xu 2014/08/19 03:31:49 Yes, in CFX_WideString CPDF_Font::UnicodeFromCharC
jbreiden 2014/08/22 16:58:44 Okay. If I understand correctly: The PDF files I
1700 wChar = (FX_WCHAR)item.m_CharCode;
1701 }
1702 if (!wChar) {
1703 continue;
1704 }
1705 if (BidiChar && BidiChar->AppendChar(wChar)) {
1706 FX_INT32 ret = BidiChar->GetBidiInfo(start, count);
1707 if (ret == 2) {
1708 nR2L++;
1709 }
1710 else if (ret == 1) {
1711 nL2R++;
1712 }
1713 }
1714 }
1715 if (BidiChar && BidiChar->EndChar()) {
1716 FX_INT32 ret = BidiChar->GetBidiInfo(start, count);
1717 if (ret == 2) {
1718 nR2L++;
1719 }
1720 else if (ret == 1) {
1721 nL2R++;
1722 }
1723 }
1724 FX_BOOL bR2L = FALSE;
1725 if (nR2L > 0 && nR2L >= nL2R) {
1726 bR2L = TRUE;
1727 }
1728 bIsBidiAndMirrosInverse = bR2L && (matrix.a * matrix.d - matrix.b * matrix.c ) < 0;
1729 FX_INT32 iBufStartAppend = m_TempTextBuf.GetLength();
1730 FX_INT32 iCharListStartAppend = m_TempCharList.GetSize();
1731
1685 for (int i = 0; i < nItems; i++) { 1732 for (int i = 0; i < nItems; i++) {
1686 CPDF_TextObjectItem item; 1733 CPDF_TextObjectItem item;
1687 PAGECHAR_INFO charinfo; 1734 PAGECHAR_INFO charinfo;
1688 charinfo.m_OriginX = 0; 1735 charinfo.m_OriginX = 0;
1689 charinfo.m_OriginY = 0; 1736 charinfo.m_OriginY = 0;
1690 pTextObj->GetItemInfo(i, &item); 1737 pTextObj->GetItemInfo(i, &item);
1691 if (item.m_CharCode == (FX_DWORD) - 1) { 1738 if (item.m_CharCode == (FX_DWORD) - 1) {
1692 CFX_WideString str = m_TempTextBuf.GetWideString(); 1739 CFX_WideString str = m_TempTextBuf.GetWideString();
1693 if(str.IsEmpty()) { 1740 if(str.IsEmpty()) {
1694 str = m_TextBuf.GetWideString(); 1741 str = m_TextBuf.GetWideString();
(...skipping 126 matching lines...) Expand 10 before | Expand all | Expand 10 after
1821 } 1868 }
1822 } else if(i == 0) { 1869 } else if(i == 0) {
1823 CFX_WideString str = m_TempTextBuf.GetWideString(); 1870 CFX_WideString str = m_TempTextBuf.GetWideString();
1824 if (!str.IsEmpty() && str.GetAt(str.GetLength() - 1) == TEXT_BLA NK_CHAR) { 1871 if (!str.IsEmpty() && str.GetAt(str.GetLength() - 1) == TEXT_BLA NK_CHAR) {
1825 m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1); 1872 m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1);
1826 m_TempCharList.Delete(m_TempCharList.GetSize() - 1); 1873 m_TempCharList.Delete(m_TempCharList.GetSize() - 1);
1827 } 1874 }
1828 } 1875 }
1829 } 1876 }
1830 } 1877 }
1878 if (bIsBidiAndMirrosInverse) {
jbreiden 2014/08/18 19:10:50 Ok, I see what is going on here. The code is rever
Bo Xu 2014/08/19 03:31:49 I wonder do you generate this particular pdf file?
jbreiden 2014/08/22 16:58:44 The PDF file is generated by Tesseract, which is a
1879 FX_INT32 i, j;
1880 i = iCharListStartAppend;
1881 j = m_TempCharList.GetSize() - 1;
1882 PAGECHAR_INFO tempCharInfo;
1883 FX_INT32 tempIndex = 0;
1884 for (; i < j; i++, j--) {
1885 tempCharInfo = m_TempCharList[i];
1886 m_TempCharList[i] = m_TempCharList[j];
1887 m_TempCharList[j] = tempCharInfo;
1888 tempIndex = m_TempCharList[i].m_Index;
1889 m_TempCharList[i].m_Index = m_TempCharList[j].m_Index;
1890 m_TempCharList[j].m_Index = tempIndex;
1891 }
1892 FX_WCHAR * pTempBuffer = m_TempTextBuf.GetBuffer();
1893 i = iBufStartAppend;
1894 j = m_TempTextBuf.GetLength() - 1;
1895 FX_WCHAR wTemp;
1896 for (; i < j; i++, j--) {
1897 wTemp = pTempBuffer[i];
1898 pTempBuffer[i] = pTempBuffer[j];
1899 pTempBuffer[j] = wTemp;
1900 }
1901 }
1831 } 1902 }
1832 FX_INT32 CPDF_TextPage::GetTextObjectWritingMode(const CPDF_TextObject* pTextObj ) 1903 FX_INT32 CPDF_TextPage::GetTextObjectWritingMode(const CPDF_TextObject* pTextObj )
1833 { 1904 {
1834 FX_INT32 nChars = pTextObj->CountChars(); 1905 FX_INT32 nChars = pTextObj->CountChars();
1835 if (nChars == 1) { 1906 if (nChars == 1) {
1836 return m_TextlineDir; 1907 return m_TextlineDir;
1837 } 1908 }
1838 CPDF_TextObjectItem first, last; 1909 CPDF_TextObjectItem first, last;
1839 pTextObj->GetCharInfo(0, &first); 1910 pTextObj->GetCharInfo(0, &first);
1840 pTextObj->GetCharInfo(nChars - 1, &last); 1911 pTextObj->GetCharInfo(nChars - 1, &last);
(...skipping 936 matching lines...) Expand 10 before | Expand all | Expand 10 after
2777 if (!m_IsParserd || index < 0 || index >= m_LinkList.GetSize()) { 2848 if (!m_IsParserd || index < 0 || index >= m_LinkList.GetSize()) {
2778 return; 2849 return;
2779 } 2850 }
2780 CPDF_LinkExt* link = NULL; 2851 CPDF_LinkExt* link = NULL;
2781 link = m_LinkList.GetAt(index); 2852 link = m_LinkList.GetAt(index);
2782 if (!link) { 2853 if (!link) {
2783 return ; 2854 return ;
2784 } 2855 }
2785 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); 2856 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects);
2786 } 2857 }
OLDNEW
« core/src/fpdftext/fpdf_text.cpp ('K') | « core/src/fpdftext/fpdf_text.cpp ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698