OLD | NEW |
---|---|
1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <cctype> | 8 #include <cctype> |
9 #include <cwctype> | 9 #include <cwctype> |
10 #include <memory> | 10 #include <memory> |
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
70 bAllChar = FALSE; | 70 bAllChar = FALSE; |
71 } | 71 } |
72 } | 72 } |
73 if (baseSpace < 0.0 || (nItems == 3 && !bAllChar)) { | 73 if (baseSpace < 0.0 || (nItems == 3 && !bAllChar)) { |
74 baseSpace = 0.0; | 74 baseSpace = 0.0; |
75 } | 75 } |
76 } | 76 } |
77 return baseSpace; | 77 return baseSpace; |
78 } | 78 } |
79 | 79 |
80 const FX_FLOAT kDefaultFontSize = 1; | |
jun_fang
2016/01/09 08:38:56
nit: Replace 1 with 1.0f.
Lei Zhang
2016/01/11 19:44:43
Done.
| |
81 | |
80 } // namespace | 82 } // namespace |
81 | 83 |
82 CPDFText_ParseOptions::CPDFText_ParseOptions() | 84 CPDFText_ParseOptions::CPDFText_ParseOptions() |
83 : m_bGetCharCodeOnly(FALSE), | 85 : m_bGetCharCodeOnly(FALSE), |
84 m_bNormalizeObjs(TRUE), | 86 m_bNormalizeObjs(TRUE), |
85 m_bOutputHyphen(FALSE) {} | 87 m_bOutputHyphen(FALSE) {} |
86 | 88 |
87 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_Page* pPage, | 89 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_Page* pPage, |
88 int flags) { | 90 int flags) { |
89 return new CPDF_TextPage(pPage, flags); | 91 return new CPDF_TextPage(pPage, flags); |
(...skipping 361 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
451 int CPDF_TextPage::GetIndexAtPos(FX_FLOAT x, | 453 int CPDF_TextPage::GetIndexAtPos(FX_FLOAT x, |
452 FX_FLOAT y, | 454 FX_FLOAT y, |
453 FX_FLOAT xTolerance, | 455 FX_FLOAT xTolerance, |
454 FX_FLOAT yTolerance) const { | 456 FX_FLOAT yTolerance) const { |
455 if (m_ParseOptions.m_bGetCharCodeOnly) { | 457 if (m_ParseOptions.m_bGetCharCodeOnly) { |
456 return -3; | 458 return -3; |
457 } | 459 } |
458 CPDF_Point point(x, y); | 460 CPDF_Point point(x, y); |
459 return GetIndexAtPos(point, xTolerance, yTolerance); | 461 return GetIndexAtPos(point, xTolerance, yTolerance); |
460 } | 462 } |
461 void CPDF_TextPage::GetCharInfo(int index, FPDF_CHAR_INFO& info) const { | 463 |
464 void CPDF_TextPage::GetCharInfo(int index, FPDF_CHAR_INFO* info) const { | |
462 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) | 465 if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) |
463 return; | 466 return; |
464 | 467 |
465 if (index < 0 || index >= m_charList.GetSize()) | 468 if (index < 0 || index >= m_charList.GetSize()) |
466 return; | 469 return; |
467 | 470 |
468 PAGECHAR_INFO charinfo; | 471 const PAGECHAR_INFO* charinfo = |
469 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index); | 472 static_cast<PAGECHAR_INFO*>(m_charList.GetAt(index)); |
470 info.m_Charcode = charinfo.m_CharCode; | 473 info->m_Charcode = charinfo->m_CharCode; |
471 info.m_OriginX = charinfo.m_OriginX; | 474 info->m_OriginX = charinfo->m_OriginX; |
472 info.m_OriginY = charinfo.m_OriginY; | 475 info->m_OriginY = charinfo->m_OriginY; |
473 info.m_Unicode = charinfo.m_Unicode; | 476 info->m_Unicode = charinfo->m_Unicode; |
474 info.m_Flag = charinfo.m_Flag; | 477 info->m_Flag = charinfo->m_Flag; |
475 info.m_CharBox = charinfo.m_CharBox; | 478 info->m_CharBox = charinfo->m_CharBox; |
476 info.m_pTextObj = charinfo.m_pTextObj; | 479 info->m_pTextObj = charinfo->m_pTextObj; |
477 if (charinfo.m_pTextObj && charinfo.m_pTextObj->GetFont()) { | 480 if (charinfo->m_pTextObj && charinfo->m_pTextObj->GetFont()) { |
478 info.m_FontSize = charinfo.m_pTextObj->GetFontSize(); | 481 info->m_FontSize = charinfo->m_pTextObj->GetFontSize(); |
482 } else { | |
483 info->m_FontSize = kDefaultFontSize; | |
479 } | 484 } |
480 info.m_Matrix.Copy(charinfo.m_Matrix); | 485 info->m_Matrix.Copy(charinfo->m_Matrix); |
481 return; | |
482 } | 486 } |
487 | |
483 void CPDF_TextPage::CheckMarkedContentObject(int32_t& start, | 488 void CPDF_TextPage::CheckMarkedContentObject(int32_t& start, |
484 int32_t& nCount) const { | 489 int32_t& nCount) const { |
485 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(start); | 490 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(start); |
486 PAGECHAR_INFO charinfo2 = | 491 PAGECHAR_INFO charinfo2 = |
487 *(PAGECHAR_INFO*)m_charList.GetAt(start + nCount - 1); | 492 *(PAGECHAR_INFO*)m_charList.GetAt(start + nCount - 1); |
488 if (FPDFTEXT_CHAR_PIECE != charinfo.m_Flag && | 493 if (FPDFTEXT_CHAR_PIECE != charinfo.m_Flag && |
489 FPDFTEXT_CHAR_PIECE != charinfo2.m_Flag) { | 494 FPDFTEXT_CHAR_PIECE != charinfo2.m_Flag) { |
490 return; | 495 return; |
491 } | 496 } |
492 if (FPDFTEXT_CHAR_PIECE == charinfo.m_Flag) { | 497 if (FPDFTEXT_CHAR_PIECE == charinfo.m_Flag) { |
(...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
587 return; | 592 return; |
588 | 593 |
589 if (rectIndex < 0 || rectIndex >= m_SelRects.GetSize()) | 594 if (rectIndex < 0 || rectIndex >= m_SelRects.GetSize()) |
590 return; | 595 return; |
591 | 596 |
592 left = m_SelRects.GetAt(rectIndex).left; | 597 left = m_SelRects.GetAt(rectIndex).left; |
593 top = m_SelRects.GetAt(rectIndex).top; | 598 top = m_SelRects.GetAt(rectIndex).top; |
594 right = m_SelRects.GetAt(rectIndex).right; | 599 right = m_SelRects.GetAt(rectIndex).right; |
595 bottom = m_SelRects.GetAt(rectIndex).bottom; | 600 bottom = m_SelRects.GetAt(rectIndex).bottom; |
596 } | 601 } |
602 | |
597 FX_BOOL CPDF_TextPage::GetBaselineRotate(int start, int end, int& Rotate) { | 603 FX_BOOL CPDF_TextPage::GetBaselineRotate(int start, int end, int& Rotate) { |
598 if (m_ParseOptions.m_bGetCharCodeOnly) { | 604 if (m_ParseOptions.m_bGetCharCodeOnly) { |
599 return FALSE; | 605 return FALSE; |
600 } | 606 } |
601 if (end == start) { | 607 if (end == start) { |
602 return FALSE; | 608 return FALSE; |
603 } | 609 } |
604 FX_FLOAT dx, dy; | 610 FPDF_CHAR_INFO info_start; |
605 FPDF_CHAR_INFO info1, info2; | 611 FPDF_CHAR_INFO info_end; |
606 GetCharInfo(start, info1); | 612 GetCharInfo(start, &info_start); |
607 GetCharInfo(end, info2); | 613 GetCharInfo(end, &info_end); |
608 while (info2.m_CharBox.Width() == 0 || info2.m_CharBox.Height() == 0) { | 614 while (info_end.m_CharBox.Width() == 0 || info_end.m_CharBox.Height() == 0) { |
609 end--; | 615 if (--end <= start) |
610 if (end <= start) { | |
611 return FALSE; | 616 return FALSE; |
612 } | 617 |
613 GetCharInfo(end, info2); | 618 GetCharInfo(end, &info_end); |
614 } | 619 } |
615 dx = (info2.m_OriginX - info1.m_OriginX); | 620 FX_FLOAT dx = (info_end.m_OriginX - info_start.m_OriginX); |
616 dy = (info2.m_OriginY - info1.m_OriginY); | 621 FX_FLOAT dy = (info_end.m_OriginY - info_start.m_OriginY); |
617 if (dx == 0) { | 622 if (dx == 0) { |
618 if (dy > 0) { | 623 if (dy > 0) { |
619 Rotate = 90; | 624 Rotate = 90; |
620 } else if (dy < 0) { | 625 } else if (dy < 0) { |
621 Rotate = 270; | 626 Rotate = 270; |
622 } else { | 627 } else { |
623 Rotate = 0; | 628 Rotate = 0; |
624 } | 629 } |
625 } else { | 630 } else { |
626 float a = FXSYS_atan2(dy, dx); | 631 float a = FXSYS_atan2(dy, dx); |
627 Rotate = (int)(a * 180 / FX_PI + 0.5); | 632 Rotate = (int)(a * 180 / FX_PI + 0.5); |
628 } | 633 } |
629 if (Rotate < 0) { | 634 if (Rotate < 0) { |
630 Rotate = -Rotate; | 635 Rotate = -Rotate; |
631 } else if (Rotate > 0) { | 636 } else if (Rotate > 0) { |
632 Rotate = 360 - Rotate; | 637 Rotate = 360 - Rotate; |
633 } | 638 } |
634 return TRUE; | 639 return TRUE; |
635 } | 640 } |
641 | |
636 FX_BOOL CPDF_TextPage::GetBaselineRotate(const CFX_FloatRect& rect, | 642 FX_BOOL CPDF_TextPage::GetBaselineRotate(const CFX_FloatRect& rect, |
637 int& Rotate) { | 643 int& Rotate) { |
638 if (m_ParseOptions.m_bGetCharCodeOnly) { | 644 if (m_ParseOptions.m_bGetCharCodeOnly) { |
639 return FALSE; | 645 return FALSE; |
640 } | 646 } |
641 int start, end, count, | 647 int start, end, count, |
642 n = CountBoundedSegments(rect.left, rect.top, rect.right, rect.bottom, | 648 n = CountBoundedSegments(rect.left, rect.top, rect.right, rect.bottom, |
643 TRUE); | 649 TRUE); |
644 if (n < 1) { | 650 if (n < 1) { |
645 return FALSE; | 651 return FALSE; |
(...skipping 1400 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2046 if (pObj->m_Type != PDFPAGE_TEXT) { | 2052 if (pObj->m_Type != PDFPAGE_TEXT) { |
2047 continue; | 2053 continue; |
2048 } | 2054 } |
2049 if (IsSameTextObject((CPDF_TextObject*)pObj, pTextObj)) { | 2055 if (IsSameTextObject((CPDF_TextObject*)pObj, pTextObj)) { |
2050 return TRUE; | 2056 return TRUE; |
2051 } | 2057 } |
2052 i++; | 2058 i++; |
2053 } | 2059 } |
2054 return FALSE; | 2060 return FALSE; |
2055 } | 2061 } |
2062 | |
2056 FX_BOOL CPDF_TextPage::GenerateCharInfo(FX_WCHAR unicode, PAGECHAR_INFO& info) { | 2063 FX_BOOL CPDF_TextPage::GenerateCharInfo(FX_WCHAR unicode, PAGECHAR_INFO& info) { |
2057 int size = m_TempCharList.GetSize(); | 2064 int size = m_TempCharList.GetSize(); |
2058 PAGECHAR_INFO preChar; | 2065 PAGECHAR_INFO preChar; |
2059 if (size) { | 2066 if (size) { |
2060 preChar = (PAGECHAR_INFO)m_TempCharList[size - 1]; | 2067 preChar = (PAGECHAR_INFO)m_TempCharList[size - 1]; |
2061 } else { | 2068 } else { |
2062 size = m_charList.GetSize(); | 2069 size = m_charList.GetSize(); |
2063 if (size == 0) { | 2070 if (size == 0) { |
2064 return FALSE; | 2071 return FALSE; |
2065 } | 2072 } |
2066 preChar = (PAGECHAR_INFO)m_charList[size - 1]; | 2073 preChar = (PAGECHAR_INFO)m_charList[size - 1]; |
2067 } | 2074 } |
2068 info.m_Index = m_TextBuf.GetLength(); | 2075 info.m_Index = m_TextBuf.GetLength(); |
2069 info.m_Unicode = unicode; | 2076 info.m_Unicode = unicode; |
2070 info.m_pTextObj = NULL; | 2077 info.m_pTextObj = NULL; |
2071 info.m_CharCode = -1; | 2078 info.m_CharCode = -1; |
2072 info.m_Flag = FPDFTEXT_CHAR_GENERATED; | 2079 info.m_Flag = FPDFTEXT_CHAR_GENERATED; |
2073 int preWidth = 0; | 2080 int preWidth = 0; |
2074 if (preChar.m_pTextObj && preChar.m_CharCode != (FX_DWORD)-1) { | 2081 if (preChar.m_pTextObj && preChar.m_CharCode != (FX_DWORD)-1) |
2075 preWidth = GetCharWidth(preChar.m_CharCode, preChar.m_pTextObj->GetFont()); | 2082 preWidth = GetCharWidth(preChar.m_CharCode, preChar.m_pTextObj->GetFont()); |
2076 } | 2083 |
2077 FX_FLOAT fs = 0; | 2084 FX_FLOAT fFontSize = preChar.m_pTextObj ? preChar.m_pTextObj->GetFontSize() |
2078 if (preChar.m_pTextObj) { | 2085 : preChar.m_CharBox.Height(); |
2079 fs = preChar.m_pTextObj->GetFontSize(); | 2086 if (!fFontSize) |
2080 } else { | 2087 fFontSize = kDefaultFontSize; |
2081 fs = preChar.m_CharBox.Height(); | 2088 |
2082 } | 2089 info.m_OriginX = preChar.m_OriginX + preWidth * (fFontSize) / 1000; |
2083 if (!fs) { | |
2084 fs = 1; | |
2085 } | |
2086 info.m_OriginX = preChar.m_OriginX + preWidth * (fs) / 1000; | |
2087 info.m_OriginY = preChar.m_OriginY; | 2090 info.m_OriginY = preChar.m_OriginY; |
2088 info.m_CharBox = CFX_FloatRect(info.m_OriginX, info.m_OriginY, info.m_OriginX, | 2091 info.m_CharBox = CFX_FloatRect(info.m_OriginX, info.m_OriginY, info.m_OriginX, |
2089 info.m_OriginY); | 2092 info.m_OriginY); |
2090 return TRUE; | 2093 return TRUE; |
2091 } | 2094 } |
2095 | |
2092 FX_BOOL CPDF_TextPage::IsRectIntersect(const CFX_FloatRect& rect1, | 2096 FX_BOOL CPDF_TextPage::IsRectIntersect(const CFX_FloatRect& rect1, |
2093 const CFX_FloatRect& rect2) { | 2097 const CFX_FloatRect& rect2) { |
2094 CFX_FloatRect rect = rect1; | 2098 CFX_FloatRect rect = rect1; |
2095 rect.Intersect(rect2); | 2099 rect.Intersect(rect2); |
2096 return !rect.IsEmpty(); | 2100 return !rect.IsEmpty(); |
2097 } | 2101 } |
2098 FX_BOOL CPDF_TextPage::IsLetter(FX_WCHAR unicode) { | 2102 FX_BOOL CPDF_TextPage::IsLetter(FX_WCHAR unicode) { |
2099 if (unicode < L'A') { | 2103 if (unicode < L'A') { |
2100 return FALSE; | 2104 return FALSE; |
2101 } | 2105 } |
(...skipping 15 matching lines...) Expand all Loading... | |
2117 m_resStart(0), | 2121 m_resStart(0), |
2118 m_resEnd(-1), | 2122 m_resEnd(-1), |
2119 m_IsFind(FALSE) { | 2123 m_IsFind(FALSE) { |
2120 m_strText = m_pTextPage->GetPageText(); | 2124 m_strText = m_pTextPage->GetPageText(); |
2121 int nCount = pTextPage->CountChars(); | 2125 int nCount = pTextPage->CountChars(); |
2122 if (nCount) { | 2126 if (nCount) { |
2123 m_CharIndex.Add(0); | 2127 m_CharIndex.Add(0); |
2124 } | 2128 } |
2125 for (int i = 0; i < nCount; i++) { | 2129 for (int i = 0; i < nCount; i++) { |
2126 FPDF_CHAR_INFO info; | 2130 FPDF_CHAR_INFO info; |
2127 pTextPage->GetCharInfo(i, info); | 2131 pTextPage->GetCharInfo(i, &info); |
2128 int indexSize = m_CharIndex.GetSize(); | 2132 int indexSize = m_CharIndex.GetSize(); |
2129 if (info.m_Flag == CHAR_NORMAL || info.m_Flag == CHAR_GENERATED) { | 2133 if (info.m_Flag == CHAR_NORMAL || info.m_Flag == CHAR_GENERATED) { |
2130 if (indexSize % 2) { | 2134 if (indexSize % 2) { |
2131 m_CharIndex.Add(1); | 2135 m_CharIndex.Add(1); |
2132 } else { | 2136 } else { |
2133 if (indexSize <= 0) { | 2137 if (indexSize <= 0) { |
2134 continue; | 2138 continue; |
2135 } | 2139 } |
2136 m_CharIndex.SetAt(indexSize - 1, m_CharIndex.GetAt(indexSize - 1) + 1); | 2140 m_CharIndex.SetAt(indexSize - 1, m_CharIndex.GetAt(indexSize - 1) + 1); |
2137 } | 2141 } |
(...skipping 405 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2543 if (!m_bIsParsed) { | 2547 if (!m_bIsParsed) { |
2544 return -1; | 2548 return -1; |
2545 } | 2549 } |
2546 return m_LinkList.GetSize(); | 2550 return m_LinkList.GetSize(); |
2547 } | 2551 } |
2548 void CPDF_LinkExtract::ParseLink() { | 2552 void CPDF_LinkExtract::ParseLink() { |
2549 int start = 0, pos = 0; | 2553 int start = 0, pos = 0; |
2550 int TotalChar = m_pTextPage->CountChars(); | 2554 int TotalChar = m_pTextPage->CountChars(); |
2551 while (pos < TotalChar) { | 2555 while (pos < TotalChar) { |
2552 FPDF_CHAR_INFO pageChar; | 2556 FPDF_CHAR_INFO pageChar; |
2553 m_pTextPage->GetCharInfo(pos, pageChar); | 2557 m_pTextPage->GetCharInfo(pos, &pageChar); |
2554 if (pageChar.m_Flag == CHAR_GENERATED || pageChar.m_Unicode == 0x20 || | 2558 if (pageChar.m_Flag == CHAR_GENERATED || pageChar.m_Unicode == 0x20 || |
2555 pos == TotalChar - 1) { | 2559 pos == TotalChar - 1) { |
2556 int nCount = pos - start; | 2560 int nCount = pos - start; |
2557 if (pos == TotalChar - 1) { | 2561 if (pos == TotalChar - 1) { |
2558 nCount++; | 2562 nCount++; |
2559 } | 2563 } |
2560 CFX_WideString strBeCheck; | 2564 CFX_WideString strBeCheck; |
2561 strBeCheck = m_pTextPage->GetPageText(start, nCount); | 2565 strBeCheck = m_pTextPage->GetPageText(start, nCount); |
2562 if (strBeCheck.GetLength() > 5) { | 2566 if (strBeCheck.GetLength() > 5) { |
2563 while (strBeCheck.GetLength() > 0) { | 2567 while (strBeCheck.GetLength() > 0) { |
(...skipping 150 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2714 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { | 2718 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { |
2715 return; | 2719 return; |
2716 } | 2720 } |
2717 CPDF_LinkExt* link = NULL; | 2721 CPDF_LinkExt* link = NULL; |
2718 link = m_LinkList.GetAt(index); | 2722 link = m_LinkList.GetAt(index); |
2719 if (!link) { | 2723 if (!link) { |
2720 return; | 2724 return; |
2721 } | 2725 } |
2722 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); | 2726 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); |
2723 } | 2727 } |
OLD | NEW |