Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
| 6 | 6 |
| 7 #include <ctype.h> | 7 #include <ctype.h> |
| 8 #include <algorithm> | 8 #include <algorithm> |
| 9 | 9 |
| 10 #include "../../../third_party/base/nonstd_unique_ptr.h" | 10 #include "../../../third_party/base/nonstd_unique_ptr.h" |
| (...skipping 24 matching lines...) Expand all Loading... | |
| 35 || (curChar >= 0x2000 && curChar <= 0x206F)) { | 35 || (curChar >= 0x2000 && curChar <= 0x206F)) { |
| 36 return FALSE; | 36 return FALSE; |
| 37 } | 37 } |
| 38 return TRUE; | 38 return TRUE; |
| 39 } | 39 } |
| 40 | 40 |
| 41 FX_FLOAT _NormalizeThreshold(FX_FLOAT threshold) | 41 FX_FLOAT _NormalizeThreshold(FX_FLOAT threshold) |
| 42 { | 42 { |
| 43 if (threshold < 300) { | 43 if (threshold < 300) { |
| 44 return threshold / 2.0f; | 44 return threshold / 2.0f; |
| 45 } else if (threshold < 500) { | 45 } |
| 46 if (threshold < 500) { | |
| 46 return threshold / 4.0f; | 47 return threshold / 4.0f; |
| 47 } else if (threshold < 700) { | 48 } |
| 49 if (threshold < 700) { | |
| 48 return threshold / 5.0f; | 50 return threshold / 5.0f; |
| 49 } | 51 } |
| 50 return threshold / 6.0f; | 52 return threshold / 6.0f; |
| 51 } | 53 } |
| 52 | 54 |
| 53 FX_FLOAT _CalculateBaseSpace(const CPDF_TextObject* pTextObj, | 55 FX_FLOAT _CalculateBaseSpace(const CPDF_TextObject* pTextObj, |
| 54 const CFX_AffineMatrix& matrix) | 56 const CFX_AffineMatrix& matrix) |
| 55 { | 57 { |
| 56 FX_FLOAT baseSpace = 0.0; | 58 FX_FLOAT baseSpace = 0.0; |
| 57 const int nItems = pTextObj->CountItems(); | 59 const int nItems = pTextObj->CountItems(); |
| (...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 154 m_TextBuf.EstimateSize(0, 10240); | 156 m_TextBuf.EstimateSize(0, 10240); |
| 155 CFX_FloatRect pageRect = pPage->CalcBoundingBox(); | 157 CFX_FloatRect pageRect = pPage->CalcBoundingBox(); |
| 156 m_DisplayMatrix = CFX_AffineMatrix(1, 0, 0, -1, pageRect.right, pageRect.top ); | 158 m_DisplayMatrix = CFX_AffineMatrix(1, 0, 0, -1, pageRect.right, pageRect.top ); |
| 157 } | 159 } |
| 158 void CPDF_TextPage::NormalizeObjects(FX_BOOL bNormalize) | 160 void CPDF_TextPage::NormalizeObjects(FX_BOOL bNormalize) |
| 159 { | 161 { |
| 160 m_ParseOptions.m_bNormalizeObjs = bNormalize; | 162 m_ParseOptions.m_bNormalizeObjs = bNormalize; |
| 161 } | 163 } |
| 162 FX_BOOL CPDF_TextPage::IsControlChar(PAGECHAR_INFO* pCharInfo) | 164 FX_BOOL CPDF_TextPage::IsControlChar(PAGECHAR_INFO* pCharInfo) |
| 163 { | 165 { |
| 164 if(!pCharInfo) { | |
|
Tom Sepez
2015/07/20 23:59:58
note: always passed as &something so never null.
Lei Zhang
2015/07/21 21:44:46
pointer -> const ref then?
Tom Sepez
2015/07/22 20:38:24
Done.
| |
| 165 return FALSE; | |
| 166 } | |
| 167 switch(pCharInfo->m_Unicode) { | 166 switch(pCharInfo->m_Unicode) { |
| 168 case 0x2: | 167 case 0x2: |
| 169 case 0x3: | 168 case 0x3: |
| 170 case 0x93: | 169 case 0x93: |
| 171 case 0x94: | 170 case 0x94: |
| 172 case 0x96: | 171 case 0x96: |
| 173 case 0x97: | 172 case 0x97: |
| 174 case 0x98: | 173 case 0x98: |
| 175 case 0xfffe: | 174 case 0xfffe: |
| 176 if(pCharInfo->m_Flag == FPDFTEXT_CHAR_HYPHEN) { | 175 return pCharInfo->m_Flag != FPDFTEXT_CHAR_HYPHEN; |
|
Lei Zhang
2015/07/21 21:44:47
BTW, wouldn't this return true or false, rather th
Tom Sepez
2015/07/22 20:38:24
Made into |bool| return value.
| |
| 177 return FALSE; | |
| 178 } else { | |
| 179 return TRUE; | |
| 180 } | |
| 181 default: | 176 default: |
| 182 return FALSE; | 177 return FALSE; |
| 183 } | 178 } |
| 184 } | 179 } |
| 185 FX_BOOL CPDF_TextPage::ParseTextPage() | 180 FX_BOOL CPDF_TextPage::ParseTextPage() |
| 186 { | 181 { |
| 187 if (!m_pPage) { | 182 if (!m_pPage) { |
| 188 m_IsParsered = FALSE; | 183 m_IsParsered = FALSE; |
| 189 return FALSE; | 184 return FALSE; |
| 190 } | 185 } |
| (...skipping 363 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 554 if (curXdif == 0) { | 549 if (curXdif == 0) { |
| 555 return index; | 550 return index; |
| 556 } | 551 } |
| 557 int signflag = 0; | 552 int signflag = 0; |
| 558 if (curXdif > 0) { | 553 if (curXdif > 0) { |
| 559 signflag = 1; | 554 signflag = 1; |
| 560 } else { | 555 } else { |
| 561 signflag = -1; | 556 signflag = -1; |
| 562 } | 557 } |
| 563 if (signflag * PreXdif < 0) { | 558 if (signflag * PreXdif < 0) { |
| 564 if (FXSYS_fabs(PreXdif) < FXSYS_fabs(curXdif)) { | 559 return FXSYS_fabs(PreXdif) < FXSYS_fabs(curXdif) ? index + 1 : i ndex; |
| 565 return index + 1; | |
| 566 } else { | |
| 567 return index; | |
| 568 } | |
| 569 } | 560 } |
| 570 if (FXSYS_fabs(curXdif) < FXSYS_fabs(minXdif)) { | 561 if (FXSYS_fabs(curXdif) < FXSYS_fabs(minXdif)) { |
| 571 minIndex = index; | 562 minIndex = index; |
| 572 minXdif = curXdif; | 563 minXdif = curXdif; |
| 573 } | 564 } |
| 574 PreXdif = curXdif; | 565 PreXdif = curXdif; |
| 575 if (difPosY != charinfo.m_OriginY) { | 566 if (difPosY != charinfo.m_OriginY) { |
| 576 break; | 567 break; |
| 577 } | 568 } |
| 578 } | 569 } |
| 579 return minIndex; | 570 return minIndex; |
| 580 } else if(FPDFTEXT_DOWN) { | 571 } |
|
Tom Sepez
2015/07/20 23:59:58
note: this must be a bug. Not sure it matters.
Lei Zhang
2015/07/21 21:44:47
It does not matter because GetOrderByDirection() d
Tom Sepez
2015/07/22 20:38:24
Removed this dead code.
| |
| 572 if (direction == FPDFTEXT_DOWN) { | |
| 581 minIndex = -2; | 573 minIndex = -2; |
| 582 while (1) { | 574 while (1) { |
| 583 if (++index > m_charList.GetSize() - 1) { | 575 if (++index > m_charList.GetSize() - 1) { |
| 584 return minIndex; | 576 return minIndex; |
| 585 } | 577 } |
| 586 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index); | 578 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index); |
| 587 if (FXSYS_fabs(charinfo.m_OriginY - curPos.y) > FX_MAX(height, chari nfo.m_CharBox.Height()) / 2) { | 579 if (FXSYS_fabs(charinfo.m_OriginY - curPos.y) > FX_MAX(height, chari nfo.m_CharBox.Height()) / 2) { |
| 588 difPosY = charinfo.m_OriginY; | 580 difPosY = charinfo.m_OriginY; |
| 589 minIndex = index; | 581 minIndex = index; |
| 590 break; | 582 break; |
| 591 } | 583 } |
| 592 } | 584 } |
| 593 FX_FLOAT PreXdif = charinfo.m_OriginX - curPos.x; | 585 FX_FLOAT PreXdif = charinfo.m_OriginX - curPos.x; |
| 594 minXdif = PreXdif; | 586 minXdif = PreXdif; |
| 595 if (PreXdif == 0) { | 587 if (PreXdif == 0) { |
| 596 return index; | 588 return index; |
| 597 } | 589 } |
| 598 FX_FLOAT curXdif = 0; | 590 FX_FLOAT curXdif = 0; |
| 599 while (++index < m_charList.GetSize()) { | 591 while (++index < m_charList.GetSize()) { |
| 600 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index); | 592 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index); |
| 601 if (difPosY != charinfo.m_OriginY) { | 593 if (difPosY != charinfo.m_OriginY) { |
| 602 break; | 594 break; |
| 603 } | 595 } |
| 604 curXdif = charinfo.m_OriginX - curPos.x; | 596 curXdif = charinfo.m_OriginX - curPos.x; |
| 605 if (curXdif == 0) { | 597 if (curXdif == 0) { |
| 606 return index; | 598 return index; |
| 607 } | 599 } |
| 608 int signflag = 0; | 600 int signflag = curXdif > 0 ? 1 : -1; |
| 609 if (curXdif > 0) { | |
| 610 signflag = 1; | |
| 611 } else { | |
| 612 signflag = -1; | |
| 613 } | |
| 614 if (signflag * PreXdif < 0) { | 601 if (signflag * PreXdif < 0) { |
| 615 if (FXSYS_fabs(PreXdif) < FXSYS_fabs(curXdif)) { | 602 return FXSYS_fabs(PreXdif) < FXSYS_fabs(curXdif) ? index - 1 : index; |
| 616 return index - 1; | |
| 617 } else { | |
| 618 return index; | |
| 619 } | |
| 620 } | 603 } |
| 621 if (FXSYS_fabs(curXdif) < FXSYS_fabs(minXdif)) { | 604 if (FXSYS_fabs(curXdif) < FXSYS_fabs(minXdif)) { |
| 622 minXdif = curXdif; | 605 minXdif = curXdif; |
| 623 minIndex = index; | 606 minIndex = index; |
| 624 } | 607 } |
| 625 PreXdif = curXdif; | 608 PreXdif = curXdif; |
| 626 } | 609 } |
| 627 return minIndex; | 610 return minIndex; |
| 628 } | 611 } |
| 612 return minIndex; | |
| 629 } | 613 } |
| 630 void CPDF_TextPage::GetCharInfo(int index, FPDF_CHAR_INFO & info) const | 614 void CPDF_TextPage::GetCharInfo(int index, FPDF_CHAR_INFO & info) const |
| 631 { | 615 { |
| 632 if(m_ParseOptions.m_bGetCharCodeOnly) { | 616 if(m_ParseOptions.m_bGetCharCodeOnly) { |
| 633 return; | 617 return; |
| 634 } | 618 } |
| 635 if (!m_IsParsered) { | 619 if (!m_IsParsered) { |
| 636 return; | 620 return; |
| 637 } | 621 } |
| 638 if (index < 0 || index >= m_charList.GetSize()) { | 622 if (index < 0 || index >= m_charList.GetSize()) { |
| (...skipping 306 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 945 return index; | 929 return index; |
| 946 } | 930 } |
| 947 int breakPos = index; | 931 int breakPos = index; |
| 948 if (direction == FPDFTEXT_LEFT) { | 932 if (direction == FPDFTEXT_LEFT) { |
| 949 while (--breakPos > 0) { | 933 while (--breakPos > 0) { |
| 950 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(breakPos); | 934 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(breakPos); |
| 951 if (!IsLetter(charinfo.m_Unicode)) { | 935 if (!IsLetter(charinfo.m_Unicode)) { |
| 952 return breakPos; | 936 return breakPos; |
| 953 } | 937 } |
| 954 } | 938 } |
| 955 return breakPos; | |
| 956 } else if (direction == FPDFTEXT_RIGHT) { | 939 } else if (direction == FPDFTEXT_RIGHT) { |
| 957 while (++breakPos < m_charList.GetSize()) { | 940 while (++breakPos < m_charList.GetSize()) { |
| 958 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(breakPos); | 941 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(breakPos); |
| 959 if (!IsLetter(charinfo.m_Unicode)) { | 942 if (!IsLetter(charinfo.m_Unicode)) { |
| 960 return breakPos; | 943 return breakPos; |
| 961 } | 944 } |
| 962 } | 945 } |
| 963 return breakPos; | |
| 964 } | 946 } |
| 965 return breakPos; | 947 return breakPos; |
| 966 } | 948 } |
| 967 int32_t CPDF_TextPage::FindTextlineFlowDirection() | 949 int32_t CPDF_TextPage::FindTextlineFlowDirection() |
| 968 { | 950 { |
| 969 if (!m_pPage) { | 951 if (!m_pPage) { |
| 970 return -1; | 952 return -1; |
| 971 } | 953 } |
| 972 const int32_t nPageWidth = (int32_t)((CPDF_Page*)m_pPage)->GetPageWidth(); | 954 const int32_t nPageWidth = (int32_t)((CPDF_Page*)m_pPage)->GetPageWidth(); |
| 973 const int32_t nPageHeight = (int32_t)((CPDF_Page*)m_pPage)->GetPageHeight(); | 955 const int32_t nPageHeight = (int32_t)((CPDF_Page*)m_pPage)->GetPageHeight(); |
| (...skipping 231 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1205 PAGECHAR_INFO Info2 = Info; | 1187 PAGECHAR_INFO Info2 = Info; |
| 1206 Info2.m_Unicode = pDst[nIndex]; | 1188 Info2.m_Unicode = pDst[nIndex]; |
| 1207 Info2.m_Flag = FPDFTEXT_CHAR_PIECE; | 1189 Info2.m_Flag = FPDFTEXT_CHAR_PIECE; |
| 1208 m_TextBuf.AppendChar(Info2.m_Unicode); | 1190 m_TextBuf.AppendChar(Info2.m_Unicode); |
| 1209 if( !m_ParseOptions.m_bGetCharCodeOnly) { | 1191 if( !m_ParseOptions.m_bGetCharCodeOnly) { |
| 1210 m_charList.Add(Info2); | 1192 m_charList.Add(Info2); |
| 1211 } | 1193 } |
| 1212 } | 1194 } |
| 1213 FX_Free(pDst); | 1195 FX_Free(pDst); |
| 1214 return; | 1196 return; |
| 1215 } else { | |
| 1216 Info.m_Unicode = wChar; | |
| 1217 } | 1197 } |
| 1198 Info.m_Unicode = wChar; | |
| 1218 m_TextBuf.AppendChar(Info.m_Unicode); | 1199 m_TextBuf.AppendChar(Info.m_Unicode); |
| 1219 } else { | 1200 } else { |
| 1220 Info.m_Index = -1; | 1201 Info.m_Index = -1; |
| 1221 } | 1202 } |
| 1222 if( !m_ParseOptions.m_bGetCharCodeOnly) { | 1203 if( !m_ParseOptions.m_bGetCharCodeOnly) { |
| 1223 m_charList.Add(Info); | 1204 m_charList.Add(Info); |
| 1224 } | 1205 } |
| 1225 } | 1206 } |
| 1226 void CPDF_TextPage::CloseTempLine() | 1207 void CPDF_TextPage::CloseTempLine() |
| 1227 { | 1208 { |
| (...skipping 680 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1908 textMatrix.TransformPoint(last.m_OriginX, last.m_OriginY); | 1889 textMatrix.TransformPoint(last.m_OriginX, last.m_OriginY); |
| 1909 FX_FLOAT dX = FXSYS_fabs(last.m_OriginX - first.m_OriginX); | 1890 FX_FLOAT dX = FXSYS_fabs(last.m_OriginX - first.m_OriginX); |
| 1910 FX_FLOAT dY = FXSYS_fabs(last.m_OriginY - first.m_OriginY); | 1891 FX_FLOAT dY = FXSYS_fabs(last.m_OriginY - first.m_OriginY); |
| 1911 if (dX <= 0.0001f && dY <= 0.0001f) { | 1892 if (dX <= 0.0001f && dY <= 0.0001f) { |
| 1912 return -1; | 1893 return -1; |
| 1913 } | 1894 } |
| 1914 CFX_VectorF v; | 1895 CFX_VectorF v; |
| 1915 v.Set(dX, dY); | 1896 v.Set(dX, dY); |
| 1916 v.Normalize(); | 1897 v.Normalize(); |
| 1917 if (v.y <= 0.0872f) { | 1898 if (v.y <= 0.0872f) { |
| 1918 if (v.x <= 0.0872f) { | 1899 return v.x <= 0.0872f ? m_TextlineDir : 0; |
| 1919 return m_TextlineDir; | 1900 } |
| 1920 } | 1901 if (v.x <= 0.0872f) { |
| 1921 return 0; | |
| 1922 } else if (v.x <= 0.0872f) { | |
| 1923 return 1; | 1902 return 1; |
| 1924 } | 1903 } |
| 1925 return m_TextlineDir; | 1904 return m_TextlineDir; |
| 1926 } | 1905 } |
| 1927 FX_BOOL CPDF_TextPage::IsHyphen(FX_WCHAR curChar) | 1906 FX_BOOL CPDF_TextPage::IsHyphen(FX_WCHAR curChar) |
| 1928 { | 1907 { |
| 1929 CFX_WideString strCurText = m_TempTextBuf.GetWideString(); | 1908 CFX_WideString strCurText = m_TempTextBuf.GetWideString(); |
| 1930 if(strCurText.GetLength() == 0) { | 1909 if(strCurText.GetLength() == 0) { |
| 1931 strCurText = m_TextBuf.GetWideString(); | 1910 strCurText = m_TextBuf.GetWideString(); |
| 1932 } | 1911 } |
| (...skipping 758 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2691 } | 2670 } |
| 2692 } | 2671 } |
| 2693 } | 2672 } |
| 2694 FX_BOOL CPDF_LinkExtract::CheckWebLink(CFX_WideString& strBeCheck) | 2673 FX_BOOL CPDF_LinkExtract::CheckWebLink(CFX_WideString& strBeCheck) |
| 2695 { | 2674 { |
| 2696 CFX_WideString str = strBeCheck; | 2675 CFX_WideString str = strBeCheck; |
| 2697 str.MakeLower(); | 2676 str.MakeLower(); |
| 2698 if (str.Find(L"http://www.") != -1) { | 2677 if (str.Find(L"http://www.") != -1) { |
| 2699 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"http://www.") ); | 2678 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"http://www.") ); |
| 2700 return TRUE; | 2679 return TRUE; |
| 2701 } else if (str.Find(L"http://") != -1) { | 2680 } |
| 2681 if (str.Find(L"http://") != -1) { | |
| 2702 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"http://")); | 2682 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"http://")); |
| 2703 return TRUE; | 2683 return TRUE; |
| 2704 } else if (str.Find(L"https://www.") != -1) { | 2684 } |
| 2685 if (str.Find(L"https://www.") != -1) { | |
| 2705 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"https://www." )); | 2686 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"https://www." )); |
| 2706 return TRUE; | 2687 return TRUE; |
| 2707 } else if (str.Find(L"https://") != -1) { | 2688 } |
| 2689 if (str.Find(L"https://") != -1) { | |
| 2708 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"https://")); | 2690 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"https://")); |
| 2709 return TRUE; | 2691 return TRUE; |
| 2710 } else if (str.Find(L"www.") != -1) { | 2692 } |
| 2693 if (str.Find(L"www.") != -1) { | |
| 2711 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"www.")); | 2694 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"www.")); |
| 2712 strBeCheck = L"http://" + strBeCheck; | 2695 strBeCheck = L"http://" + strBeCheck; |
| 2713 return TRUE; | 2696 return TRUE; |
| 2714 } else { | |
| 2715 return FALSE; | |
| 2716 } | 2697 } |
| 2698 return FALSE; | |
| 2717 } | 2699 } |
| 2718 FX_BOOL CPDF_LinkExtract::CheckMailLink(CFX_WideString& str) | 2700 FX_BOOL CPDF_LinkExtract::CheckMailLink(CFX_WideString& str) |
| 2719 { | 2701 { |
| 2720 str.MakeLower(); | 2702 str.MakeLower(); |
| 2721 int aPos = str.Find(L'@'); | 2703 int aPos = str.Find(L'@'); |
| 2722 if (aPos < 1) { | 2704 if (aPos < 1) { |
| 2723 return FALSE; | 2705 return FALSE; |
| 2724 } | 2706 } |
| 2725 if (str.GetAt(aPos - 1) == L'.' || str.GetAt(aPos - 1) == L'_') { | 2707 if (str.GetAt(aPos - 1) == L'.' || str.GetAt(aPos - 1) == L'_') { |
| 2726 return FALSE; | 2708 return FALSE; |
| (...skipping 105 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2832 if (!m_IsParserd || index < 0 || index >= m_LinkList.GetSize()) { | 2814 if (!m_IsParserd || index < 0 || index >= m_LinkList.GetSize()) { |
| 2833 return; | 2815 return; |
| 2834 } | 2816 } |
| 2835 CPDF_LinkExt* link = NULL; | 2817 CPDF_LinkExt* link = NULL; |
| 2836 link = m_LinkList.GetAt(index); | 2818 link = m_LinkList.GetAt(index); |
| 2837 if (!link) { | 2819 if (!link) { |
| 2838 return ; | 2820 return ; |
| 2839 } | 2821 } |
| 2840 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); | 2822 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); |
| 2841 } | 2823 } |
| OLD | NEW |