| OLD | NEW |
| 1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
| 6 | 6 |
| 7 #include "core/include/fpdfapi/fpdf_page.h" | 7 #include "core/include/fpdfapi/fpdf_page.h" |
| 8 #include "core/include/fpdfapi/fpdf_pageobj.h" | 8 #include "core/include/fpdfapi/fpdf_pageobj.h" |
| 9 #include "core/include/fpdfapi/fpdf_resource.h" | 9 #include "core/include/fpdfapi/fpdf_resource.h" |
| 10 #include "core/include/fpdftext/fpdf_text.h" | 10 #include "core/include/fpdftext/fpdf_text.h" |
| (...skipping 150 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 161 FX_FLOAT rightx, | 161 FX_FLOAT rightx, |
| 162 FX_FLOAT topy, | 162 FX_FLOAT topy, |
| 163 FX_FLOAT bottomy, | 163 FX_FLOAT bottomy, |
| 164 FX_FLOAT spacew, | 164 FX_FLOAT spacew, |
| 165 FX_FLOAT fontsize_v, | 165 FX_FLOAT fontsize_v, |
| 166 CFX_ByteString& str, | 166 CFX_ByteString& str, |
| 167 CPDF_Font* pFont) { | 167 CPDF_Font* pFont) { |
| 168 if (str.GetLength() == 0) { | 168 if (str.GetLength() == 0) { |
| 169 return NULL; | 169 return NULL; |
| 170 } | 170 } |
| 171 if (pBaseLine == NULL) { | 171 if (!pBaseLine) { |
| 172 int i; | 172 int i; |
| 173 for (i = 0; i < m_BaseLines.GetSize(); i++) { | 173 for (i = 0; i < m_BaseLines.GetSize(); i++) { |
| 174 CTextBaseLine* pExistLine = m_BaseLines.GetAt(i); | 174 CTextBaseLine* pExistLine = m_BaseLines.GetAt(i); |
| 175 if (pExistLine->m_BaseLine == basey) { | 175 if (pExistLine->m_BaseLine == basey) { |
| 176 pBaseLine = pExistLine; | 176 pBaseLine = pExistLine; |
| 177 break; | 177 break; |
| 178 } | 178 } |
| 179 if (pExistLine->m_BaseLine < basey) { | 179 if (pExistLine->m_BaseLine < basey) { |
| 180 break; | 180 break; |
| 181 } | 181 } |
| 182 } | 182 } |
| 183 if (pBaseLine == NULL) { | 183 if (!pBaseLine) { |
| 184 pBaseLine = new CTextBaseLine; | 184 pBaseLine = new CTextBaseLine; |
| 185 pBaseLine->m_BaseLine = basey; | 185 pBaseLine->m_BaseLine = basey; |
| 186 m_BaseLines.InsertAt(i, pBaseLine); | 186 m_BaseLines.InsertAt(i, pBaseLine); |
| 187 } | 187 } |
| 188 } | 188 } |
| 189 CFX_WideString text; | 189 CFX_WideString text; |
| 190 const FX_CHAR* pStr = str; | 190 const FX_CHAR* pStr = str; |
| 191 int len = str.GetLength(), offset = 0; | 191 int len = str.GetLength(), offset = 0; |
| 192 while (offset < len) { | 192 while (offset < len) { |
| 193 FX_DWORD ch = pFont->GetNextChar(pStr, len, offset); | 193 FX_DWORD ch = pFont->GetNextChar(pStr, len, offset); |
| (...skipping 247 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 441 } | 441 } |
| 442 return TRUE; | 442 return TRUE; |
| 443 } | 443 } |
| 444 void CTextPage::FindColumns() { | 444 void CTextPage::FindColumns() { |
| 445 int i; | 445 int i; |
| 446 for (i = 0; i < m_BaseLines.GetSize(); i++) { | 446 for (i = 0; i < m_BaseLines.GetSize(); i++) { |
| 447 CTextBaseLine* pBaseLine = m_BaseLines.GetAt(i); | 447 CTextBaseLine* pBaseLine = m_BaseLines.GetAt(i); |
| 448 for (int j = 0; j < pBaseLine->m_TextList.GetSize(); j++) { | 448 for (int j = 0; j < pBaseLine->m_TextList.GetSize(); j++) { |
| 449 CTextBox* pTextBox = pBaseLine->m_TextList.GetAt(j); | 449 CTextBox* pTextBox = pBaseLine->m_TextList.GetAt(j); |
| 450 CTextColumn* pColumn = FindColumn(pTextBox->m_Right); | 450 CTextColumn* pColumn = FindColumn(pTextBox->m_Right); |
| 451 if (pColumn == NULL) { | 451 if (pColumn) { |
| 452 pColumn->m_AvgPos = |
| 453 (pColumn->m_Count * pColumn->m_AvgPos + pTextBox->m_Right) / |
| 454 (pColumn->m_Count + 1); |
| 455 pColumn->m_Count++; |
| 456 } else { |
| 452 pColumn = new CTextColumn; | 457 pColumn = new CTextColumn; |
| 453 pColumn->m_Count = 1; | 458 pColumn->m_Count = 1; |
| 454 pColumn->m_AvgPos = pTextBox->m_Right; | 459 pColumn->m_AvgPos = pTextBox->m_Right; |
| 455 pColumn->m_TextPos = -1; | 460 pColumn->m_TextPos = -1; |
| 456 m_TextColumns.Add(pColumn); | 461 m_TextColumns.Add(pColumn); |
| 457 } else { | |
| 458 pColumn->m_AvgPos = | |
| 459 (pColumn->m_Count * pColumn->m_AvgPos + pTextBox->m_Right) / | |
| 460 (pColumn->m_Count + 1); | |
| 461 pColumn->m_Count++; | |
| 462 } | 462 } |
| 463 } | 463 } |
| 464 } | 464 } |
| 465 int mincount = m_BaseLines.GetSize() / 4; | 465 int mincount = m_BaseLines.GetSize() / 4; |
| 466 for (i = 0; i < m_TextColumns.GetSize(); i++) { | 466 for (i = 0; i < m_TextColumns.GetSize(); i++) { |
| 467 CTextColumn* pTextColumn = m_TextColumns.GetAt(i); | 467 CTextColumn* pTextColumn = m_TextColumns.GetAt(i); |
| 468 if (pTextColumn->m_Count >= mincount) { | 468 if (pTextColumn->m_Count >= mincount) { |
| 469 continue; | 469 continue; |
| 470 } | 470 } |
| 471 delete pTextColumn; | 471 delete pTextColumn; |
| (...skipping 247 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 719 } | 719 } |
| 720 page.Transform(matrix); | 720 page.Transform(matrix); |
| 721 page_bbox.Transform(&matrix); | 721 page_bbox.Transform(&matrix); |
| 722 } | 722 } |
| 723 void PDF_GetPageText_Unicode(CFX_WideStringArray& lines, | 723 void PDF_GetPageText_Unicode(CFX_WideStringArray& lines, |
| 724 CPDF_Document* pDoc, | 724 CPDF_Document* pDoc, |
| 725 CPDF_Dictionary* pPage, | 725 CPDF_Dictionary* pPage, |
| 726 int iMinWidth, | 726 int iMinWidth, |
| 727 FX_DWORD flags) { | 727 FX_DWORD flags) { |
| 728 lines.RemoveAll(); | 728 lines.RemoveAll(); |
| 729 if (pPage == NULL) { | 729 if (!pPage) { |
| 730 return; | 730 return; |
| 731 } | 731 } |
| 732 CPDF_Page page; | 732 CPDF_Page page; |
| 733 page.Load(pDoc, pPage); | 733 page.Load(pDoc, pPage); |
| 734 CPDF_ParseOptions options; | 734 CPDF_ParseOptions options; |
| 735 options.m_bTextOnly = TRUE; | 735 options.m_bTextOnly = TRUE; |
| 736 options.m_bSeparateForm = FALSE; | 736 options.m_bSeparateForm = FALSE; |
| 737 page.ParseContent(&options); | 737 page.ParseContent(&options); |
| 738 CFX_FloatRect page_bbox = page.GetPageBBox(); | 738 CFX_FloatRect page_bbox = page.GetPageBBox(); |
| 739 if (flags & PDF2TXT_AUTO_ROTATE) { | 739 if (flags & PDF2TXT_AUTO_ROTATE) { |
| (...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 780 FX_DWORD flags) { | 780 FX_DWORD flags) { |
| 781 buffer.EstimateSize(0, 10240); | 781 buffer.EstimateSize(0, 10240); |
| 782 CPDF_Page page; | 782 CPDF_Page page; |
| 783 page.Load(pDoc, pPage); | 783 page.Load(pDoc, pPage); |
| 784 CPDF_ParseOptions options; | 784 CPDF_ParseOptions options; |
| 785 options.m_bTextOnly = TRUE; | 785 options.m_bTextOnly = TRUE; |
| 786 options.m_bSeparateForm = FALSE; | 786 options.m_bSeparateForm = FALSE; |
| 787 page.ParseContent(&options); | 787 page.ParseContent(&options); |
| 788 GetTextStream_Unicode(buffer, &page, TRUE, NULL); | 788 GetTextStream_Unicode(buffer, &page, TRUE, NULL); |
| 789 } | 789 } |
| OLD | NEW |