OLD | NEW |
1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
6 | 6 |
7 #include "../../include/fpdfapi/fpdf_page.h" | 7 #include "../../include/fpdfapi/fpdf_page.h" |
8 #include "../../include/fpdfapi/fpdf_pageobj.h" | 8 #include "../../include/fpdfapi/fpdf_pageobj.h" |
9 #include "../../include/fpdftext/fpdf_text.h" | 9 #include "../../include/fpdftext/fpdf_text.h" |
10 #include "txtproc.h" | 10 #include "txtproc.h" |
(...skipping 208 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
219 CTextBaseLine* pPrevLine = (CTextBaseLine*)m_BaseLines.GetAt(i - 1); | 219 CTextBaseLine* pPrevLine = (CTextBaseLine*)m_BaseLines.GetAt(i - 1); |
220 if (pBaseLine->CanMerge(pPrevLine)) { | 220 if (pBaseLine->CanMerge(pPrevLine)) { |
221 pPrevLine->Merge(pBaseLine); | 221 pPrevLine->Merge(pBaseLine); |
222 delete pBaseLine; | 222 delete pBaseLine; |
223 m_BaseLines.RemoveAt(i); | 223 m_BaseLines.RemoveAt(i); |
224 i --; | 224 i --; |
225 } | 225 } |
226 } | 226 } |
227 if (m_bAutoWidth) { | 227 if (m_bAutoWidth) { |
228 int* widths = FX_Alloc(int, m_BaseLines.GetSize()); | 228 int* widths = FX_Alloc(int, m_BaseLines.GetSize()); |
229 if (widths) { | 229 for (i = 0; i < m_BaseLines.GetSize(); i ++) { |
230 for (i = 0; i < m_BaseLines.GetSize(); i ++) { | 230 widths[i] = 0; |
231 widths[i] = 0; | 231 CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); |
232 CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); | 232 int TotalChars = 0; |
233 int TotalChars = 0; | 233 FX_FLOAT TotalWidth = 0; |
234 FX_FLOAT TotalWidth = 0; | 234 int minchars; |
235 int minchars; | 235 pBaseLine->CountChars(TotalChars, TotalWidth, minchars); |
236 pBaseLine->CountChars(TotalChars, TotalWidth, minchars); | 236 if (TotalChars) { |
237 if (TotalChars) { | 237 FX_FLOAT charwidth = TotalWidth / TotalChars; |
238 FX_FLOAT charwidth = TotalWidth / TotalChars; | 238 widths[i] = (int)((MaxRightX - MinLeftX) / charwidth); |
239 widths[i] = (int)((MaxRightX - MinLeftX) / charwidth); | |
240 } | |
241 if (widths[i] > 1000) { | |
242 widths[i] = 1000; | |
243 } | |
244 if (widths[i] < minchars) { | |
245 widths[i] = minchars; | |
246 } | |
247 } | 239 } |
248 int AvgWidth = 0, widthcount = 0; | 240 if (widths[i] > 1000) { |
249 for (i = 0; i < m_BaseLines.GetSize(); i ++) | 241 widths[i] = 1000; |
250 if (widths[i]) { | |
251 AvgWidth += widths[i]; | |
252 widthcount ++; | |
253 } | |
254 AvgWidth = int((FX_FLOAT)AvgWidth / widthcount + 0.5); | |
255 int MaxWidth = 0; | |
256 for (i = 0; i < m_BaseLines.GetSize(); i ++) | |
257 if (MaxWidth < widths[i]) { | |
258 MaxWidth = widths[i]; | |
259 } | |
260 if (MaxWidth > AvgWidth * 6 / 5) { | |
261 MaxWidth = AvgWidth * 6 / 5; | |
262 } | 242 } |
263 FX_Free(widths); | 243 if (widths[i] < minchars) { |
264 if (iMinWidth < MaxWidth) { | 244 widths[i] = minchars; |
265 iMinWidth = MaxWidth; | |
266 } | 245 } |
267 } | 246 } |
| 247 int AvgWidth = 0, widthcount = 0; |
| 248 for (i = 0; i < m_BaseLines.GetSize(); i ++) |
| 249 if (widths[i]) { |
| 250 AvgWidth += widths[i]; |
| 251 widthcount ++; |
| 252 } |
| 253 AvgWidth = int((FX_FLOAT)AvgWidth / widthcount + 0.5); |
| 254 int MaxWidth = 0; |
| 255 for (i = 0; i < m_BaseLines.GetSize(); i ++) |
| 256 if (MaxWidth < widths[i]) { |
| 257 MaxWidth = widths[i]; |
| 258 } |
| 259 if (MaxWidth > AvgWidth * 6 / 5) { |
| 260 MaxWidth = AvgWidth * 6 / 5; |
| 261 } |
| 262 FX_Free(widths); |
| 263 if (iMinWidth < MaxWidth) { |
| 264 iMinWidth = MaxWidth; |
| 265 } |
268 } | 266 } |
269 for (i = 0; i < m_BaseLines.GetSize(); i ++) { | 267 for (i = 0; i < m_BaseLines.GetSize(); i ++) { |
270 CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); | 268 CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); |
271 pBaseLine->MergeBoxes(); | 269 pBaseLine->MergeBoxes(); |
272 } | 270 } |
273 if (m_bKeepColumn) { | 271 if (m_bKeepColumn) { |
274 FindColumns(); | 272 FindColumns(); |
275 } | 273 } |
276 for (i = 0; i < m_BaseLines.GetSize(); i ++) { | 274 for (i = 0; i < m_BaseLines.GetSize(); i ++) { |
277 CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); | 275 CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); |
(...skipping 501 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
779 { | 777 { |
780 buffer.EstimateSize(0, 10240); | 778 buffer.EstimateSize(0, 10240); |
781 CPDF_Page page; | 779 CPDF_Page page; |
782 page.Load(pDoc, pPage); | 780 page.Load(pDoc, pPage); |
783 CPDF_ParseOptions options; | 781 CPDF_ParseOptions options; |
784 options.m_bTextOnly = TRUE; | 782 options.m_bTextOnly = TRUE; |
785 options.m_bSeparateForm = FALSE; | 783 options.m_bSeparateForm = FALSE; |
786 page.ParseContent(&options); | 784 page.ParseContent(&options); |
787 _PDF_GetTextStream_Unicode(buffer, &page, TRUE, NULL); | 785 _PDF_GetTextStream_Unicode(buffer, &page, TRUE, NULL); |
788 } | 786 } |
OLD | NEW |