OLD | NEW |
1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
6 | 6 |
7 #include "core/include/fpdfapi/fpdf_page.h" | 7 #include "core/include/fpdfapi/fpdf_page.h" |
8 #include "core/include/fpdfapi/fpdf_pageobj.h" | 8 #include "core/include/fpdfapi/fpdf_pageobj.h" |
9 #include "core/include/fpdfapi/fpdf_resource.h" | 9 #include "core/include/fpdfapi/fpdf_resource.h" |
10 #include "core/include/fpdftext/fpdf_text.h" | 10 #include "core/include/fpdftext/fpdf_text.h" |
(...skipping 150 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
161 FX_FLOAT rightx, | 161 FX_FLOAT rightx, |
162 FX_FLOAT topy, | 162 FX_FLOAT topy, |
163 FX_FLOAT bottomy, | 163 FX_FLOAT bottomy, |
164 FX_FLOAT spacew, | 164 FX_FLOAT spacew, |
165 FX_FLOAT fontsize_v, | 165 FX_FLOAT fontsize_v, |
166 CFX_ByteString& str, | 166 CFX_ByteString& str, |
167 CPDF_Font* pFont) { | 167 CPDF_Font* pFont) { |
168 if (str.GetLength() == 0) { | 168 if (str.GetLength() == 0) { |
169 return NULL; | 169 return NULL; |
170 } | 170 } |
171 if (pBaseLine == NULL) { | 171 if (!pBaseLine) { |
172 int i; | 172 int i; |
173 for (i = 0; i < m_BaseLines.GetSize(); i++) { | 173 for (i = 0; i < m_BaseLines.GetSize(); i++) { |
174 CTextBaseLine* pExistLine = m_BaseLines.GetAt(i); | 174 CTextBaseLine* pExistLine = m_BaseLines.GetAt(i); |
175 if (pExistLine->m_BaseLine == basey) { | 175 if (pExistLine->m_BaseLine == basey) { |
176 pBaseLine = pExistLine; | 176 pBaseLine = pExistLine; |
177 break; | 177 break; |
178 } | 178 } |
179 if (pExistLine->m_BaseLine < basey) { | 179 if (pExistLine->m_BaseLine < basey) { |
180 break; | 180 break; |
181 } | 181 } |
182 } | 182 } |
183 if (pBaseLine == NULL) { | 183 if (!pBaseLine) { |
184 pBaseLine = new CTextBaseLine; | 184 pBaseLine = new CTextBaseLine; |
185 pBaseLine->m_BaseLine = basey; | 185 pBaseLine->m_BaseLine = basey; |
186 m_BaseLines.InsertAt(i, pBaseLine); | 186 m_BaseLines.InsertAt(i, pBaseLine); |
187 } | 187 } |
188 } | 188 } |
189 CFX_WideString text; | 189 CFX_WideString text; |
190 const FX_CHAR* pStr = str; | 190 const FX_CHAR* pStr = str; |
191 int len = str.GetLength(), offset = 0; | 191 int len = str.GetLength(), offset = 0; |
192 while (offset < len) { | 192 while (offset < len) { |
193 FX_DWORD ch = pFont->GetNextChar(pStr, len, offset); | 193 FX_DWORD ch = pFont->GetNextChar(pStr, len, offset); |
(...skipping 247 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
441 } | 441 } |
442 return TRUE; | 442 return TRUE; |
443 } | 443 } |
444 void CTextPage::FindColumns() { | 444 void CTextPage::FindColumns() { |
445 int i; | 445 int i; |
446 for (i = 0; i < m_BaseLines.GetSize(); i++) { | 446 for (i = 0; i < m_BaseLines.GetSize(); i++) { |
447 CTextBaseLine* pBaseLine = m_BaseLines.GetAt(i); | 447 CTextBaseLine* pBaseLine = m_BaseLines.GetAt(i); |
448 for (int j = 0; j < pBaseLine->m_TextList.GetSize(); j++) { | 448 for (int j = 0; j < pBaseLine->m_TextList.GetSize(); j++) { |
449 CTextBox* pTextBox = pBaseLine->m_TextList.GetAt(j); | 449 CTextBox* pTextBox = pBaseLine->m_TextList.GetAt(j); |
450 CTextColumn* pColumn = FindColumn(pTextBox->m_Right); | 450 CTextColumn* pColumn = FindColumn(pTextBox->m_Right); |
451 if (pColumn == NULL) { | 451 if (pColumn) { |
| 452 pColumn->m_AvgPos = |
| 453 (pColumn->m_Count * pColumn->m_AvgPos + pTextBox->m_Right) / |
| 454 (pColumn->m_Count + 1); |
| 455 pColumn->m_Count++; |
| 456 } else { |
452 pColumn = new CTextColumn; | 457 pColumn = new CTextColumn; |
453 pColumn->m_Count = 1; | 458 pColumn->m_Count = 1; |
454 pColumn->m_AvgPos = pTextBox->m_Right; | 459 pColumn->m_AvgPos = pTextBox->m_Right; |
455 pColumn->m_TextPos = -1; | 460 pColumn->m_TextPos = -1; |
456 m_TextColumns.Add(pColumn); | 461 m_TextColumns.Add(pColumn); |
457 } else { | |
458 pColumn->m_AvgPos = | |
459 (pColumn->m_Count * pColumn->m_AvgPos + pTextBox->m_Right) / | |
460 (pColumn->m_Count + 1); | |
461 pColumn->m_Count++; | |
462 } | 462 } |
463 } | 463 } |
464 } | 464 } |
465 int mincount = m_BaseLines.GetSize() / 4; | 465 int mincount = m_BaseLines.GetSize() / 4; |
466 for (i = 0; i < m_TextColumns.GetSize(); i++) { | 466 for (i = 0; i < m_TextColumns.GetSize(); i++) { |
467 CTextColumn* pTextColumn = m_TextColumns.GetAt(i); | 467 CTextColumn* pTextColumn = m_TextColumns.GetAt(i); |
468 if (pTextColumn->m_Count >= mincount) { | 468 if (pTextColumn->m_Count >= mincount) { |
469 continue; | 469 continue; |
470 } | 470 } |
471 delete pTextColumn; | 471 delete pTextColumn; |
(...skipping 247 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
719 } | 719 } |
720 page.Transform(matrix); | 720 page.Transform(matrix); |
721 page_bbox.Transform(&matrix); | 721 page_bbox.Transform(&matrix); |
722 } | 722 } |
723 void PDF_GetPageText_Unicode(CFX_WideStringArray& lines, | 723 void PDF_GetPageText_Unicode(CFX_WideStringArray& lines, |
724 CPDF_Document* pDoc, | 724 CPDF_Document* pDoc, |
725 CPDF_Dictionary* pPage, | 725 CPDF_Dictionary* pPage, |
726 int iMinWidth, | 726 int iMinWidth, |
727 FX_DWORD flags) { | 727 FX_DWORD flags) { |
728 lines.RemoveAll(); | 728 lines.RemoveAll(); |
729 if (pPage == NULL) { | 729 if (!pPage) { |
730 return; | 730 return; |
731 } | 731 } |
732 CPDF_Page page; | 732 CPDF_Page page; |
733 page.Load(pDoc, pPage); | 733 page.Load(pDoc, pPage); |
734 CPDF_ParseOptions options; | 734 CPDF_ParseOptions options; |
735 options.m_bTextOnly = TRUE; | 735 options.m_bTextOnly = TRUE; |
736 options.m_bSeparateForm = FALSE; | 736 options.m_bSeparateForm = FALSE; |
737 page.ParseContent(&options); | 737 page.ParseContent(&options); |
738 CFX_FloatRect page_bbox = page.GetPageBBox(); | 738 CFX_FloatRect page_bbox = page.GetPageBBox(); |
739 if (flags & PDF2TXT_AUTO_ROTATE) { | 739 if (flags & PDF2TXT_AUTO_ROTATE) { |
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
780 FX_DWORD flags) { | 780 FX_DWORD flags) { |
781 buffer.EstimateSize(0, 10240); | 781 buffer.EstimateSize(0, 10240); |
782 CPDF_Page page; | 782 CPDF_Page page; |
783 page.Load(pDoc, pPage); | 783 page.Load(pDoc, pPage); |
784 CPDF_ParseOptions options; | 784 CPDF_ParseOptions options; |
785 options.m_bTextOnly = TRUE; | 785 options.m_bTextOnly = TRUE; |
786 options.m_bSeparateForm = FALSE; | 786 options.m_bSeparateForm = FALSE; |
787 page.ParseContent(&options); | 787 page.ParseContent(&options); |
788 GetTextStream_Unicode(buffer, &page, TRUE, NULL); | 788 GetTextStream_Unicode(buffer, &page, TRUE, NULL); |
789 } | 789 } |
OLD | NEW |