OLD | NEW |
---|---|
1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
6 | 6 |
7 #include <ctype.h> | 7 #include <ctype.h> |
8 #include <algorithm> | 8 #include <algorithm> |
9 | 9 |
10 #include "../../../third_party/base/nonstd_unique_ptr.h" | 10 #include "../../../third_party/base/nonstd_unique_ptr.h" |
(...skipping 24 matching lines...) Expand all Loading... | |
35 || (curChar >= 0x2000 && curChar <= 0x206F)) { | 35 || (curChar >= 0x2000 && curChar <= 0x206F)) { |
36 return FALSE; | 36 return FALSE; |
37 } | 37 } |
38 return TRUE; | 38 return TRUE; |
39 } | 39 } |
40 | 40 |
41 FX_FLOAT _NormalizeThreshold(FX_FLOAT threshold) | 41 FX_FLOAT _NormalizeThreshold(FX_FLOAT threshold) |
42 { | 42 { |
43 if (threshold < 300) { | 43 if (threshold < 300) { |
44 return threshold / 2.0f; | 44 return threshold / 2.0f; |
45 } else if (threshold < 500) { | 45 } |
46 if (threshold < 500) { | |
46 return threshold / 4.0f; | 47 return threshold / 4.0f; |
47 } else if (threshold < 700) { | 48 } |
49 if (threshold < 700) { | |
48 return threshold / 5.0f; | 50 return threshold / 5.0f; |
49 } | 51 } |
50 return threshold / 6.0f; | 52 return threshold / 6.0f; |
51 } | 53 } |
52 | 54 |
53 FX_FLOAT _CalculateBaseSpace(const CPDF_TextObject* pTextObj, | 55 FX_FLOAT _CalculateBaseSpace(const CPDF_TextObject* pTextObj, |
54 const CFX_AffineMatrix& matrix) | 56 const CFX_AffineMatrix& matrix) |
55 { | 57 { |
56 FX_FLOAT baseSpace = 0.0; | 58 FX_FLOAT baseSpace = 0.0; |
57 const int nItems = pTextObj->CountItems(); | 59 const int nItems = pTextObj->CountItems(); |
(...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
154 m_TextBuf.EstimateSize(0, 10240); | 156 m_TextBuf.EstimateSize(0, 10240); |
155 CFX_FloatRect pageRect = pPage->CalcBoundingBox(); | 157 CFX_FloatRect pageRect = pPage->CalcBoundingBox(); |
156 m_DisplayMatrix = CFX_AffineMatrix(1, 0, 0, -1, pageRect.right, pageRect.top ); | 158 m_DisplayMatrix = CFX_AffineMatrix(1, 0, 0, -1, pageRect.right, pageRect.top ); |
157 } | 159 } |
158 void CPDF_TextPage::NormalizeObjects(FX_BOOL bNormalize) | 160 void CPDF_TextPage::NormalizeObjects(FX_BOOL bNormalize) |
159 { | 161 { |
160 m_ParseOptions.m_bNormalizeObjs = bNormalize; | 162 m_ParseOptions.m_bNormalizeObjs = bNormalize; |
161 } | 163 } |
162 FX_BOOL CPDF_TextPage::IsControlChar(PAGECHAR_INFO* pCharInfo) | 164 FX_BOOL CPDF_TextPage::IsControlChar(PAGECHAR_INFO* pCharInfo) |
163 { | 165 { |
164 if(!pCharInfo) { | |
Tom Sepez
2015/07/20 23:59:58
note: always passed as &something so never null.
Lei Zhang
2015/07/21 21:44:46
pointer -> const ref then?
Tom Sepez
2015/07/22 20:38:24
Done.
| |
165 return FALSE; | |
166 } | |
167 switch(pCharInfo->m_Unicode) { | 166 switch(pCharInfo->m_Unicode) { |
168 case 0x2: | 167 case 0x2: |
169 case 0x3: | 168 case 0x3: |
170 case 0x93: | 169 case 0x93: |
171 case 0x94: | 170 case 0x94: |
172 case 0x96: | 171 case 0x96: |
173 case 0x97: | 172 case 0x97: |
174 case 0x98: | 173 case 0x98: |
175 case 0xfffe: | 174 case 0xfffe: |
176 if(pCharInfo->m_Flag == FPDFTEXT_CHAR_HYPHEN) { | 175 return pCharInfo->m_Flag != FPDFTEXT_CHAR_HYPHEN; |
Lei Zhang
2015/07/21 21:44:47
BTW, wouldn't this return true or false, rather th
Tom Sepez
2015/07/22 20:38:24
Made into |bool| return value.
| |
177 return FALSE; | |
178 } else { | |
179 return TRUE; | |
180 } | |
181 default: | 176 default: |
182 return FALSE; | 177 return FALSE; |
183 } | 178 } |
184 } | 179 } |
185 FX_BOOL CPDF_TextPage::ParseTextPage() | 180 FX_BOOL CPDF_TextPage::ParseTextPage() |
186 { | 181 { |
187 if (!m_pPage) { | 182 if (!m_pPage) { |
188 m_IsParsered = FALSE; | 183 m_IsParsered = FALSE; |
189 return FALSE; | 184 return FALSE; |
190 } | 185 } |
(...skipping 363 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
554 if (curXdif == 0) { | 549 if (curXdif == 0) { |
555 return index; | 550 return index; |
556 } | 551 } |
557 int signflag = 0; | 552 int signflag = 0; |
558 if (curXdif > 0) { | 553 if (curXdif > 0) { |
559 signflag = 1; | 554 signflag = 1; |
560 } else { | 555 } else { |
561 signflag = -1; | 556 signflag = -1; |
562 } | 557 } |
563 if (signflag * PreXdif < 0) { | 558 if (signflag * PreXdif < 0) { |
564 if (FXSYS_fabs(PreXdif) < FXSYS_fabs(curXdif)) { | 559 return FXSYS_fabs(PreXdif) < FXSYS_fabs(curXdif) ? index + 1 : i ndex; |
565 return index + 1; | |
566 } else { | |
567 return index; | |
568 } | |
569 } | 560 } |
570 if (FXSYS_fabs(curXdif) < FXSYS_fabs(minXdif)) { | 561 if (FXSYS_fabs(curXdif) < FXSYS_fabs(minXdif)) { |
571 minIndex = index; | 562 minIndex = index; |
572 minXdif = curXdif; | 563 minXdif = curXdif; |
573 } | 564 } |
574 PreXdif = curXdif; | 565 PreXdif = curXdif; |
575 if (difPosY != charinfo.m_OriginY) { | 566 if (difPosY != charinfo.m_OriginY) { |
576 break; | 567 break; |
577 } | 568 } |
578 } | 569 } |
579 return minIndex; | 570 return minIndex; |
580 } else if(FPDFTEXT_DOWN) { | 571 } |
Tom Sepez
2015/07/20 23:59:58
note: this must be a bug. Not sure it matters.
Lei Zhang
2015/07/21 21:44:47
It does not matter because GetOrderByDirection() d
Tom Sepez
2015/07/22 20:38:24
Removed this dead code.
| |
572 if (direction == FPDFTEXT_DOWN) { | |
581 minIndex = -2; | 573 minIndex = -2; |
582 while (1) { | 574 while (1) { |
583 if (++index > m_charList.GetSize() - 1) { | 575 if (++index > m_charList.GetSize() - 1) { |
584 return minIndex; | 576 return minIndex; |
585 } | 577 } |
586 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index); | 578 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index); |
587 if (FXSYS_fabs(charinfo.m_OriginY - curPos.y) > FX_MAX(height, chari nfo.m_CharBox.Height()) / 2) { | 579 if (FXSYS_fabs(charinfo.m_OriginY - curPos.y) > FX_MAX(height, chari nfo.m_CharBox.Height()) / 2) { |
588 difPosY = charinfo.m_OriginY; | 580 difPosY = charinfo.m_OriginY; |
589 minIndex = index; | 581 minIndex = index; |
590 break; | 582 break; |
591 } | 583 } |
592 } | 584 } |
593 FX_FLOAT PreXdif = charinfo.m_OriginX - curPos.x; | 585 FX_FLOAT PreXdif = charinfo.m_OriginX - curPos.x; |
594 minXdif = PreXdif; | 586 minXdif = PreXdif; |
595 if (PreXdif == 0) { | 587 if (PreXdif == 0) { |
596 return index; | 588 return index; |
597 } | 589 } |
598 FX_FLOAT curXdif = 0; | 590 FX_FLOAT curXdif = 0; |
599 while (++index < m_charList.GetSize()) { | 591 while (++index < m_charList.GetSize()) { |
600 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index); | 592 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index); |
601 if (difPosY != charinfo.m_OriginY) { | 593 if (difPosY != charinfo.m_OriginY) { |
602 break; | 594 break; |
603 } | 595 } |
604 curXdif = charinfo.m_OriginX - curPos.x; | 596 curXdif = charinfo.m_OriginX - curPos.x; |
605 if (curXdif == 0) { | 597 if (curXdif == 0) { |
606 return index; | 598 return index; |
607 } | 599 } |
608 int signflag = 0; | 600 int signflag = curXdif > 0 ? 1 : -1; |
609 if (curXdif > 0) { | |
610 signflag = 1; | |
611 } else { | |
612 signflag = -1; | |
613 } | |
614 if (signflag * PreXdif < 0) { | 601 if (signflag * PreXdif < 0) { |
615 if (FXSYS_fabs(PreXdif) < FXSYS_fabs(curXdif)) { | 602 return FXSYS_fabs(PreXdif) < FXSYS_fabs(curXdif) ? index - 1 : index; |
616 return index - 1; | |
617 } else { | |
618 return index; | |
619 } | |
620 } | 603 } |
621 if (FXSYS_fabs(curXdif) < FXSYS_fabs(minXdif)) { | 604 if (FXSYS_fabs(curXdif) < FXSYS_fabs(minXdif)) { |
622 minXdif = curXdif; | 605 minXdif = curXdif; |
623 minIndex = index; | 606 minIndex = index; |
624 } | 607 } |
625 PreXdif = curXdif; | 608 PreXdif = curXdif; |
626 } | 609 } |
627 return minIndex; | 610 return minIndex; |
628 } | 611 } |
612 return minIndex; | |
629 } | 613 } |
630 void CPDF_TextPage::GetCharInfo(int index, FPDF_CHAR_INFO & info) const | 614 void CPDF_TextPage::GetCharInfo(int index, FPDF_CHAR_INFO & info) const |
631 { | 615 { |
632 if(m_ParseOptions.m_bGetCharCodeOnly) { | 616 if(m_ParseOptions.m_bGetCharCodeOnly) { |
633 return; | 617 return; |
634 } | 618 } |
635 if (!m_IsParsered) { | 619 if (!m_IsParsered) { |
636 return; | 620 return; |
637 } | 621 } |
638 if (index < 0 || index >= m_charList.GetSize()) { | 622 if (index < 0 || index >= m_charList.GetSize()) { |
(...skipping 306 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
945 return index; | 929 return index; |
946 } | 930 } |
947 int breakPos = index; | 931 int breakPos = index; |
948 if (direction == FPDFTEXT_LEFT) { | 932 if (direction == FPDFTEXT_LEFT) { |
949 while (--breakPos > 0) { | 933 while (--breakPos > 0) { |
950 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(breakPos); | 934 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(breakPos); |
951 if (!IsLetter(charinfo.m_Unicode)) { | 935 if (!IsLetter(charinfo.m_Unicode)) { |
952 return breakPos; | 936 return breakPos; |
953 } | 937 } |
954 } | 938 } |
955 return breakPos; | |
956 } else if (direction == FPDFTEXT_RIGHT) { | 939 } else if (direction == FPDFTEXT_RIGHT) { |
957 while (++breakPos < m_charList.GetSize()) { | 940 while (++breakPos < m_charList.GetSize()) { |
958 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(breakPos); | 941 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(breakPos); |
959 if (!IsLetter(charinfo.m_Unicode)) { | 942 if (!IsLetter(charinfo.m_Unicode)) { |
960 return breakPos; | 943 return breakPos; |
961 } | 944 } |
962 } | 945 } |
963 return breakPos; | |
964 } | 946 } |
965 return breakPos; | 947 return breakPos; |
966 } | 948 } |
967 int32_t CPDF_TextPage::FindTextlineFlowDirection() | 949 int32_t CPDF_TextPage::FindTextlineFlowDirection() |
968 { | 950 { |
969 if (!m_pPage) { | 951 if (!m_pPage) { |
970 return -1; | 952 return -1; |
971 } | 953 } |
972 const int32_t nPageWidth = (int32_t)((CPDF_Page*)m_pPage)->GetPageWidth(); | 954 const int32_t nPageWidth = (int32_t)((CPDF_Page*)m_pPage)->GetPageWidth(); |
973 const int32_t nPageHeight = (int32_t)((CPDF_Page*)m_pPage)->GetPageHeight(); | 955 const int32_t nPageHeight = (int32_t)((CPDF_Page*)m_pPage)->GetPageHeight(); |
(...skipping 231 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1205 PAGECHAR_INFO Info2 = Info; | 1187 PAGECHAR_INFO Info2 = Info; |
1206 Info2.m_Unicode = pDst[nIndex]; | 1188 Info2.m_Unicode = pDst[nIndex]; |
1207 Info2.m_Flag = FPDFTEXT_CHAR_PIECE; | 1189 Info2.m_Flag = FPDFTEXT_CHAR_PIECE; |
1208 m_TextBuf.AppendChar(Info2.m_Unicode); | 1190 m_TextBuf.AppendChar(Info2.m_Unicode); |
1209 if( !m_ParseOptions.m_bGetCharCodeOnly) { | 1191 if( !m_ParseOptions.m_bGetCharCodeOnly) { |
1210 m_charList.Add(Info2); | 1192 m_charList.Add(Info2); |
1211 } | 1193 } |
1212 } | 1194 } |
1213 FX_Free(pDst); | 1195 FX_Free(pDst); |
1214 return; | 1196 return; |
1215 } else { | |
1216 Info.m_Unicode = wChar; | |
1217 } | 1197 } |
1198 Info.m_Unicode = wChar; | |
1218 m_TextBuf.AppendChar(Info.m_Unicode); | 1199 m_TextBuf.AppendChar(Info.m_Unicode); |
1219 } else { | 1200 } else { |
1220 Info.m_Index = -1; | 1201 Info.m_Index = -1; |
1221 } | 1202 } |
1222 if( !m_ParseOptions.m_bGetCharCodeOnly) { | 1203 if( !m_ParseOptions.m_bGetCharCodeOnly) { |
1223 m_charList.Add(Info); | 1204 m_charList.Add(Info); |
1224 } | 1205 } |
1225 } | 1206 } |
1226 void CPDF_TextPage::CloseTempLine() | 1207 void CPDF_TextPage::CloseTempLine() |
1227 { | 1208 { |
(...skipping 680 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1908 textMatrix.TransformPoint(last.m_OriginX, last.m_OriginY); | 1889 textMatrix.TransformPoint(last.m_OriginX, last.m_OriginY); |
1909 FX_FLOAT dX = FXSYS_fabs(last.m_OriginX - first.m_OriginX); | 1890 FX_FLOAT dX = FXSYS_fabs(last.m_OriginX - first.m_OriginX); |
1910 FX_FLOAT dY = FXSYS_fabs(last.m_OriginY - first.m_OriginY); | 1891 FX_FLOAT dY = FXSYS_fabs(last.m_OriginY - first.m_OriginY); |
1911 if (dX <= 0.0001f && dY <= 0.0001f) { | 1892 if (dX <= 0.0001f && dY <= 0.0001f) { |
1912 return -1; | 1893 return -1; |
1913 } | 1894 } |
1914 CFX_VectorF v; | 1895 CFX_VectorF v; |
1915 v.Set(dX, dY); | 1896 v.Set(dX, dY); |
1916 v.Normalize(); | 1897 v.Normalize(); |
1917 if (v.y <= 0.0872f) { | 1898 if (v.y <= 0.0872f) { |
1918 if (v.x <= 0.0872f) { | 1899 return v.x <= 0.0872f ? m_TextlineDir : 0; |
1919 return m_TextlineDir; | 1900 } |
1920 } | 1901 if (v.x <= 0.0872f) { |
1921 return 0; | |
1922 } else if (v.x <= 0.0872f) { | |
1923 return 1; | 1902 return 1; |
1924 } | 1903 } |
1925 return m_TextlineDir; | 1904 return m_TextlineDir; |
1926 } | 1905 } |
1927 FX_BOOL CPDF_TextPage::IsHyphen(FX_WCHAR curChar) | 1906 FX_BOOL CPDF_TextPage::IsHyphen(FX_WCHAR curChar) |
1928 { | 1907 { |
1929 CFX_WideString strCurText = m_TempTextBuf.GetWideString(); | 1908 CFX_WideString strCurText = m_TempTextBuf.GetWideString(); |
1930 if(strCurText.GetLength() == 0) { | 1909 if(strCurText.GetLength() == 0) { |
1931 strCurText = m_TextBuf.GetWideString(); | 1910 strCurText = m_TextBuf.GetWideString(); |
1932 } | 1911 } |
(...skipping 758 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2691 } | 2670 } |
2692 } | 2671 } |
2693 } | 2672 } |
2694 FX_BOOL CPDF_LinkExtract::CheckWebLink(CFX_WideString& strBeCheck) | 2673 FX_BOOL CPDF_LinkExtract::CheckWebLink(CFX_WideString& strBeCheck) |
2695 { | 2674 { |
2696 CFX_WideString str = strBeCheck; | 2675 CFX_WideString str = strBeCheck; |
2697 str.MakeLower(); | 2676 str.MakeLower(); |
2698 if (str.Find(L"http://www.") != -1) { | 2677 if (str.Find(L"http://www.") != -1) { |
2699 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"http://www.") ); | 2678 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"http://www.") ); |
2700 return TRUE; | 2679 return TRUE; |
2701 } else if (str.Find(L"http://") != -1) { | 2680 } |
2681 if (str.Find(L"http://") != -1) { | |
2702 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"http://")); | 2682 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"http://")); |
2703 return TRUE; | 2683 return TRUE; |
2704 } else if (str.Find(L"https://www.") != -1) { | 2684 } |
2685 if (str.Find(L"https://www.") != -1) { | |
2705 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"https://www." )); | 2686 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"https://www." )); |
2706 return TRUE; | 2687 return TRUE; |
2707 } else if (str.Find(L"https://") != -1) { | 2688 } |
2689 if (str.Find(L"https://") != -1) { | |
2708 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"https://")); | 2690 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"https://")); |
2709 return TRUE; | 2691 return TRUE; |
2710 } else if (str.Find(L"www.") != -1) { | 2692 } |
2693 if (str.Find(L"www.") != -1) { | |
2711 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"www.")); | 2694 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"www.")); |
2712 strBeCheck = L"http://" + strBeCheck; | 2695 strBeCheck = L"http://" + strBeCheck; |
2713 return TRUE; | 2696 return TRUE; |
2714 } else { | |
2715 return FALSE; | |
2716 } | 2697 } |
2698 return FALSE; | |
2717 } | 2699 } |
2718 FX_BOOL CPDF_LinkExtract::CheckMailLink(CFX_WideString& str) | 2700 FX_BOOL CPDF_LinkExtract::CheckMailLink(CFX_WideString& str) |
2719 { | 2701 { |
2720 str.MakeLower(); | 2702 str.MakeLower(); |
2721 int aPos = str.Find(L'@'); | 2703 int aPos = str.Find(L'@'); |
2722 if (aPos < 1) { | 2704 if (aPos < 1) { |
2723 return FALSE; | 2705 return FALSE; |
2724 } | 2706 } |
2725 if (str.GetAt(aPos - 1) == L'.' || str.GetAt(aPos - 1) == L'_') { | 2707 if (str.GetAt(aPos - 1) == L'.' || str.GetAt(aPos - 1) == L'_') { |
2726 return FALSE; | 2708 return FALSE; |
(...skipping 105 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2832 if (!m_IsParserd || index < 0 || index >= m_LinkList.GetSize()) { | 2814 if (!m_IsParserd || index < 0 || index >= m_LinkList.GetSize()) { |
2833 return; | 2815 return; |
2834 } | 2816 } |
2835 CPDF_LinkExt* link = NULL; | 2817 CPDF_LinkExt* link = NULL; |
2836 link = m_LinkList.GetAt(index); | 2818 link = m_LinkList.GetAt(index); |
2837 if (!link) { | 2819 if (!link) { |
2838 return ; | 2820 return ; |
2839 } | 2821 } |
2840 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); | 2822 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); |
2841 } | 2823 } |
OLD | NEW |