OLD | NEW |
1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
6 | 6 |
7 #include "../../include/fpdfapi/fpdf_page.h" | 7 #include "../../include/fpdfapi/fpdf_page.h" |
8 #include "../../include/fpdfapi/fpdf_pageobj.h" | 8 #include "../../include/fpdfapi/fpdf_pageobj.h" |
9 #include "../../include/fpdftext/fpdf_text.h" | 9 #include "../../include/fpdftext/fpdf_text.h" |
10 #include "txtproc.h" | 10 #include "txtproc.h" |
(...skipping 151 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
162 CTextBaseLine* pExistLine = (CTextBaseLine*)m_BaseLines.GetAt(i); | 162 CTextBaseLine* pExistLine = (CTextBaseLine*)m_BaseLines.GetAt(i); |
163 if (pExistLine->m_BaseLine == basey) { | 163 if (pExistLine->m_BaseLine == basey) { |
164 pBaseLine = pExistLine; | 164 pBaseLine = pExistLine; |
165 break; | 165 break; |
166 } | 166 } |
167 if (pExistLine->m_BaseLine < basey) { | 167 if (pExistLine->m_BaseLine < basey) { |
168 break; | 168 break; |
169 } | 169 } |
170 } | 170 } |
171 if (pBaseLine == NULL) { | 171 if (pBaseLine == NULL) { |
172 pBaseLine = FX_NEW CTextBaseLine; | 172 pBaseLine = new CTextBaseLine; |
173 if (NULL == pBaseLine) { | |
174 return NULL; | |
175 } | |
176 pBaseLine->m_BaseLine = basey; | 173 pBaseLine->m_BaseLine = basey; |
177 m_BaseLines.InsertAt(i, pBaseLine); | 174 m_BaseLines.InsertAt(i, pBaseLine); |
178 } | 175 } |
179 } | 176 } |
180 CFX_WideString text; | 177 CFX_WideString text; |
181 FX_LPCSTR pStr = str; | 178 FX_LPCSTR pStr = str; |
182 int len = str.GetLength(), offset = 0; | 179 int len = str.GetLength(), offset = 0; |
183 while (offset < len) { | 180 while (offset < len) { |
184 FX_DWORD ch = pFont->GetNextChar(pStr, len, offset); | 181 FX_DWORD ch = pFont->GetNextChar(pStr, len, offset); |
185 CFX_WideString unicode_str = pFont->UnicodeFromCharCode(ch); | 182 CFX_WideString unicode_str = pFont->UnicodeFromCharCode(ch); |
(...skipping 260 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
446 } | 443 } |
447 void CTextPage::FindColumns() | 444 void CTextPage::FindColumns() |
448 { | 445 { |
449 int i; | 446 int i; |
450 for (i = 0; i < m_BaseLines.GetSize(); i ++) { | 447 for (i = 0; i < m_BaseLines.GetSize(); i ++) { |
451 CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); | 448 CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); |
452 for (int j = 0; j < pBaseLine->m_TextList.GetSize(); j ++) { | 449 for (int j = 0; j < pBaseLine->m_TextList.GetSize(); j ++) { |
453 CTextBox* pTextBox = (CTextBox*)pBaseLine->m_TextList.GetAt(j); | 450 CTextBox* pTextBox = (CTextBox*)pBaseLine->m_TextList.GetAt(j); |
454 CTextColumn* pColumn = FindColumn(pTextBox->m_Right); | 451 CTextColumn* pColumn = FindColumn(pTextBox->m_Right); |
455 if (pColumn == NULL) { | 452 if (pColumn == NULL) { |
456 pColumn = FX_NEW CTextColumn; | 453 pColumn = new CTextColumn; |
457 if (pColumn) { | 454 pColumn->m_Count = 1; |
458 pColumn->m_Count = 1; | 455 pColumn->m_AvgPos = pTextBox->m_Right; |
459 pColumn->m_AvgPos = pTextBox->m_Right; | 456 pColumn->m_TextPos = -1; |
460 pColumn->m_TextPos = -1; | 457 m_TextColumns.Add(pColumn); |
461 m_TextColumns.Add(pColumn); | |
462 } | |
463 } else { | 458 } else { |
464 pColumn->m_AvgPos = (pColumn->m_Count * pColumn->m_AvgPos + pTex
tBox->m_Right) / | 459 pColumn->m_AvgPos = (pColumn->m_Count * pColumn->m_AvgPos + pTex
tBox->m_Right) / |
465 (pColumn->m_Count + 1); | 460 (pColumn->m_Count + 1); |
466 pColumn->m_Count ++; | 461 pColumn->m_Count ++; |
467 } | 462 } |
468 } | 463 } |
469 } | 464 } |
470 int mincount = m_BaseLines.GetSize() / 4; | 465 int mincount = m_BaseLines.GetSize() / 4; |
471 for (i = 0; i < m_TextColumns.GetSize(); i ++) { | 466 for (i = 0; i < m_TextColumns.GetSize(); i ++) { |
472 CTextColumn* pTextColumn = (CTextColumn*)m_TextColumns.GetAt(i); | 467 CTextColumn* pTextColumn = (CTextColumn*)m_TextColumns.GetAt(i); |
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
525 if (m_MaxFontSizeV < fontsize_v) { | 520 if (m_MaxFontSizeV < fontsize_v) { |
526 m_MaxFontSizeV = fontsize_v; | 521 m_MaxFontSizeV = fontsize_v; |
527 } | 522 } |
528 int i; | 523 int i; |
529 for (i = 0; i < m_TextList.GetSize(); i ++) { | 524 for (i = 0; i < m_TextList.GetSize(); i ++) { |
530 CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); | 525 CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); |
531 if (pText->m_Left > leftx) { | 526 if (pText->m_Left > leftx) { |
532 break; | 527 break; |
533 } | 528 } |
534 } | 529 } |
535 CTextBox* pText = FX_NEW CTextBox; | 530 CTextBox* pText = new CTextBox; |
536 if (NULL == pText) { | |
537 return; | |
538 } | |
539 pText->m_Text = text; | 531 pText->m_Text = text; |
540 pText->m_Left = leftx; | 532 pText->m_Left = leftx; |
541 pText->m_Right = rightx; | 533 pText->m_Right = rightx; |
542 pText->m_Top = topy; | 534 pText->m_Top = topy; |
543 pText->m_Bottom = bottomy; | 535 pText->m_Bottom = bottomy; |
544 pText->m_SpaceWidth = spacew; | 536 pText->m_SpaceWidth = spacew; |
545 pText->m_FontSizeV = fontsize_v; | 537 pText->m_FontSizeV = fontsize_v; |
546 pText->m_pColumn = NULL; | 538 pText->m_pColumn = NULL; |
547 m_TextList.InsertAt(i, pText); | 539 m_TextList.InsertAt(i, pText); |
548 } | 540 } |
(...skipping 232 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
781 { | 773 { |
782 buffer.EstimateSize(0, 10240); | 774 buffer.EstimateSize(0, 10240); |
783 CPDF_Page page; | 775 CPDF_Page page; |
784 page.Load(pDoc, pPage); | 776 page.Load(pDoc, pPage); |
785 CPDF_ParseOptions options; | 777 CPDF_ParseOptions options; |
786 options.m_bTextOnly = TRUE; | 778 options.m_bTextOnly = TRUE; |
787 options.m_bSeparateForm = FALSE; | 779 options.m_bSeparateForm = FALSE; |
788 page.ParseContent(&options); | 780 page.ParseContent(&options); |
789 _PDF_GetTextStream_Unicode(buffer, &page, TRUE, NULL); | 781 _PDF_GetTextStream_Unicode(buffer, &page, TRUE, NULL); |
790 } | 782 } |
OLD | NEW |