OLD | NEW |
1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
6 | 6 |
7 #include "core/include/fpdfapi/fpdf_page.h" | 7 #include "core/include/fpdfapi/fpdf_page.h" |
8 #include "core/include/fpdfapi/fpdf_pageobj.h" | 8 #include "core/include/fpdfapi/fpdf_pageobj.h" |
9 #include "core/include/fpdfapi/fpdf_resource.h" | 9 #include "core/include/fpdfapi/fpdf_resource.h" |
10 #include "core/include/fpdftext/fpdf_text.h" | 10 #include "core/include/fpdftext/fpdf_text.h" |
(...skipping 23 matching lines...) Expand all Loading... |
34 if (ret && !iDef) { | 34 if (ret && !iDef) { |
35 return CFX_ByteString(buf, ret); | 35 return CFX_ByteString(buf, ret); |
36 } | 36 } |
37 const FX_CHAR* altstr = FCS_GetAltStr(unicode); | 37 const FX_CHAR* altstr = FCS_GetAltStr(unicode); |
38 return CFX_ByteString(altstr ? altstr : defchar); | 38 return CFX_ByteString(altstr ? altstr : defchar); |
39 } | 39 } |
40 CTextPage::CTextPage() {} | 40 CTextPage::CTextPage() {} |
41 CTextPage::~CTextPage() { | 41 CTextPage::~CTextPage() { |
42 int i; | 42 int i; |
43 for (i = 0; i < m_BaseLines.GetSize(); i++) { | 43 for (i = 0; i < m_BaseLines.GetSize(); i++) { |
44 CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); | 44 delete m_BaseLines.GetAt(i); |
45 delete pBaseLine; | |
46 } | 45 } |
47 for (i = 0; i < m_TextColumns.GetSize(); i++) { | 46 for (i = 0; i < m_TextColumns.GetSize(); i++) { |
48 CTextColumn* pTextColumn = (CTextColumn*)m_TextColumns.GetAt(i); | 47 delete m_TextColumns.GetAt(i); |
49 delete pTextColumn; | |
50 } | 48 } |
51 } | 49 } |
52 void CTextPage::ProcessObject(CPDF_PageObject* pObject) { | 50 void CTextPage::ProcessObject(CPDF_PageObject* pObject) { |
53 if (pObject->m_Type != PDFPAGE_TEXT) { | 51 if (pObject->m_Type != PDFPAGE_TEXT) { |
54 return; | 52 return; |
55 } | 53 } |
56 CPDF_TextObject* pText = (CPDF_TextObject*)pObject; | 54 CPDF_TextObject* pText = (CPDF_TextObject*)pObject; |
57 CPDF_Font* pFont = pText->m_TextState.GetFont(); | 55 CPDF_Font* pFont = pText->m_TextState.GetFont(); |
58 int count = pText->CountItems(); | 56 int count = pText->CountItems(); |
59 FX_FLOAT* pPosArray = FX_Alloc2D(FX_FLOAT, count, 2); | 57 FX_FLOAT* pPosArray = FX_Alloc2D(FX_FLOAT, count, 2); |
(...skipping 106 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
166 FX_FLOAT spacew, | 164 FX_FLOAT spacew, |
167 FX_FLOAT fontsize_v, | 165 FX_FLOAT fontsize_v, |
168 CFX_ByteString& str, | 166 CFX_ByteString& str, |
169 CPDF_Font* pFont) { | 167 CPDF_Font* pFont) { |
170 if (str.GetLength() == 0) { | 168 if (str.GetLength() == 0) { |
171 return NULL; | 169 return NULL; |
172 } | 170 } |
173 if (pBaseLine == NULL) { | 171 if (pBaseLine == NULL) { |
174 int i; | 172 int i; |
175 for (i = 0; i < m_BaseLines.GetSize(); i++) { | 173 for (i = 0; i < m_BaseLines.GetSize(); i++) { |
176 CTextBaseLine* pExistLine = (CTextBaseLine*)m_BaseLines.GetAt(i); | 174 CTextBaseLine* pExistLine = m_BaseLines.GetAt(i); |
177 if (pExistLine->m_BaseLine == basey) { | 175 if (pExistLine->m_BaseLine == basey) { |
178 pBaseLine = pExistLine; | 176 pBaseLine = pExistLine; |
179 break; | 177 break; |
180 } | 178 } |
181 if (pExistLine->m_BaseLine < basey) { | 179 if (pExistLine->m_BaseLine < basey) { |
182 break; | 180 break; |
183 } | 181 } |
184 } | 182 } |
185 if (pBaseLine == NULL) { | 183 if (pBaseLine == NULL) { |
186 pBaseLine = new CTextBaseLine; | 184 pBaseLine = new CTextBaseLine; |
(...skipping 17 matching lines...) Expand all Loading... |
204 text); | 202 text); |
205 return pBaseLine; | 203 return pBaseLine; |
206 } | 204 } |
207 void CTextPage::WriteOutput(CFX_WideStringArray& lines, int iMinWidth) { | 205 void CTextPage::WriteOutput(CFX_WideStringArray& lines, int iMinWidth) { |
208 FX_FLOAT lastheight = -1; | 206 FX_FLOAT lastheight = -1; |
209 FX_FLOAT lastbaseline = -1; | 207 FX_FLOAT lastbaseline = -1; |
210 FX_FLOAT MinLeftX = 1000000; | 208 FX_FLOAT MinLeftX = 1000000; |
211 FX_FLOAT MaxRightX = 0; | 209 FX_FLOAT MaxRightX = 0; |
212 int i; | 210 int i; |
213 for (i = 0; i < m_BaseLines.GetSize(); i++) { | 211 for (i = 0; i < m_BaseLines.GetSize(); i++) { |
214 CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); | 212 CTextBaseLine* pBaseLine = m_BaseLines.GetAt(i); |
215 FX_FLOAT leftx, rightx; | 213 FX_FLOAT leftx, rightx; |
216 if (pBaseLine->GetWidth(leftx, rightx)) { | 214 if (pBaseLine->GetWidth(leftx, rightx)) { |
217 if (leftx < MinLeftX) { | 215 if (leftx < MinLeftX) { |
218 MinLeftX = leftx; | 216 MinLeftX = leftx; |
219 } | 217 } |
220 if (rightx > MaxRightX) { | 218 if (rightx > MaxRightX) { |
221 MaxRightX = rightx; | 219 MaxRightX = rightx; |
222 } | 220 } |
223 } | 221 } |
224 } | 222 } |
225 for (i = 0; i < m_BaseLines.GetSize(); i++) { | 223 for (i = 0; i < m_BaseLines.GetSize(); i++) { |
226 CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); | 224 m_BaseLines.GetAt(i)->MergeBoxes(); |
227 pBaseLine->MergeBoxes(); | |
228 } | 225 } |
229 for (i = 1; i < m_BaseLines.GetSize(); i++) { | 226 for (i = 1; i < m_BaseLines.GetSize(); i++) { |
230 CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); | 227 CTextBaseLine* pBaseLine = m_BaseLines.GetAt(i); |
231 CTextBaseLine* pPrevLine = (CTextBaseLine*)m_BaseLines.GetAt(i - 1); | 228 CTextBaseLine* pPrevLine = m_BaseLines.GetAt(i - 1); |
232 if (pBaseLine->CanMerge(pPrevLine)) { | 229 if (pBaseLine->CanMerge(pPrevLine)) { |
233 pPrevLine->Merge(pBaseLine); | 230 pPrevLine->Merge(pBaseLine); |
234 delete pBaseLine; | 231 delete pBaseLine; |
235 m_BaseLines.RemoveAt(i); | 232 m_BaseLines.RemoveAt(i); |
236 i--; | 233 i--; |
237 } | 234 } |
238 } | 235 } |
239 if (m_bAutoWidth) { | 236 if (m_bAutoWidth) { |
240 int* widths = FX_Alloc(int, m_BaseLines.GetSize()); | 237 int* widths = FX_Alloc(int, m_BaseLines.GetSize()); |
241 for (i = 0; i < m_BaseLines.GetSize(); i++) { | 238 for (i = 0; i < m_BaseLines.GetSize(); i++) { |
242 widths[i] = 0; | 239 widths[i] = 0; |
243 CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); | 240 CTextBaseLine* pBaseLine = m_BaseLines.GetAt(i); |
244 int TotalChars = 0; | 241 int TotalChars = 0; |
245 FX_FLOAT TotalWidth = 0; | 242 FX_FLOAT TotalWidth = 0; |
246 int minchars; | 243 int minchars; |
247 pBaseLine->CountChars(TotalChars, TotalWidth, minchars); | 244 pBaseLine->CountChars(TotalChars, TotalWidth, minchars); |
248 if (TotalChars) { | 245 if (TotalChars) { |
249 FX_FLOAT charwidth = TotalWidth / TotalChars; | 246 FX_FLOAT charwidth = TotalWidth / TotalChars; |
250 widths[i] = (int)((MaxRightX - MinLeftX) / charwidth); | 247 widths[i] = (int)((MaxRightX - MinLeftX) / charwidth); |
251 } | 248 } |
252 if (widths[i] > 1000) { | 249 if (widths[i] > 1000) { |
253 widths[i] = 1000; | 250 widths[i] = 1000; |
(...skipping 16 matching lines...) Expand all Loading... |
270 } | 267 } |
271 if (MaxWidth > AvgWidth * 6 / 5) { | 268 if (MaxWidth > AvgWidth * 6 / 5) { |
272 MaxWidth = AvgWidth * 6 / 5; | 269 MaxWidth = AvgWidth * 6 / 5; |
273 } | 270 } |
274 FX_Free(widths); | 271 FX_Free(widths); |
275 if (iMinWidth < MaxWidth) { | 272 if (iMinWidth < MaxWidth) { |
276 iMinWidth = MaxWidth; | 273 iMinWidth = MaxWidth; |
277 } | 274 } |
278 } | 275 } |
279 for (i = 0; i < m_BaseLines.GetSize(); i++) { | 276 for (i = 0; i < m_BaseLines.GetSize(); i++) { |
280 CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); | 277 m_BaseLines.GetAt(i)->MergeBoxes(); |
281 pBaseLine->MergeBoxes(); | |
282 } | 278 } |
283 if (m_bKeepColumn) { | 279 if (m_bKeepColumn) { |
284 FindColumns(); | 280 FindColumns(); |
285 } | 281 } |
286 for (i = 0; i < m_BaseLines.GetSize(); i++) { | 282 for (i = 0; i < m_BaseLines.GetSize(); i++) { |
287 CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); | 283 CTextBaseLine* pBaseLine = m_BaseLines.GetAt(i); |
288 if (lastheight >= 0) { | 284 if (lastheight >= 0) { |
289 FX_FLOAT dy = lastbaseline - pBaseLine->m_BaseLine; | 285 FX_FLOAT dy = lastbaseline - pBaseLine->m_BaseLine; |
290 if (dy >= (pBaseLine->m_MaxFontSizeV) * 1.5 || dy >= lastheight * 1.5) { | 286 if (dy >= (pBaseLine->m_MaxFontSizeV) * 1.5 || dy >= lastheight * 1.5) { |
291 lines.Add(L""); | 287 lines.Add(L""); |
292 } | 288 } |
293 } | 289 } |
294 lastheight = pBaseLine->m_MaxFontSizeV; | 290 lastheight = pBaseLine->m_MaxFontSizeV; |
295 lastbaseline = pBaseLine->m_BaseLine; | 291 lastbaseline = pBaseLine->m_BaseLine; |
296 CFX_WideString str; | 292 CFX_WideString str; |
297 pBaseLine->WriteOutput(str, MinLeftX, MaxRightX - MinLeftX, iMinWidth); | 293 pBaseLine->WriteOutput(str, MinLeftX, MaxRightX - MinLeftX, iMinWidth); |
(...skipping 143 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
441 FX_WCHAR ch = str[i]; | 437 FX_WCHAR ch = str[i]; |
442 // TODO(dsinclair): --.+ +.-- should probably not be a number. | 438 // TODO(dsinclair): --.+ +.-- should probably not be a number. |
443 if (!std::iswdigit(ch) && ch != '-' && ch != '+' && ch != '.' && ch != ' ') | 439 if (!std::iswdigit(ch) && ch != '-' && ch != '+' && ch != '.' && ch != ' ') |
444 return FALSE; | 440 return FALSE; |
445 } | 441 } |
446 return TRUE; | 442 return TRUE; |
447 } | 443 } |
448 void CTextPage::FindColumns() { | 444 void CTextPage::FindColumns() { |
449 int i; | 445 int i; |
450 for (i = 0; i < m_BaseLines.GetSize(); i++) { | 446 for (i = 0; i < m_BaseLines.GetSize(); i++) { |
451 CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); | 447 CTextBaseLine* pBaseLine = m_BaseLines.GetAt(i); |
452 for (int j = 0; j < pBaseLine->m_TextList.GetSize(); j++) { | 448 for (int j = 0; j < pBaseLine->m_TextList.GetSize(); j++) { |
453 CTextBox* pTextBox = (CTextBox*)pBaseLine->m_TextList.GetAt(j); | 449 CTextBox* pTextBox = pBaseLine->m_TextList.GetAt(j); |
454 CTextColumn* pColumn = FindColumn(pTextBox->m_Right); | 450 CTextColumn* pColumn = FindColumn(pTextBox->m_Right); |
455 if (pColumn == NULL) { | 451 if (pColumn == NULL) { |
456 pColumn = new CTextColumn; | 452 pColumn = new CTextColumn; |
457 pColumn->m_Count = 1; | 453 pColumn->m_Count = 1; |
458 pColumn->m_AvgPos = pTextBox->m_Right; | 454 pColumn->m_AvgPos = pTextBox->m_Right; |
459 pColumn->m_TextPos = -1; | 455 pColumn->m_TextPos = -1; |
460 m_TextColumns.Add(pColumn); | 456 m_TextColumns.Add(pColumn); |
461 } else { | 457 } else { |
462 pColumn->m_AvgPos = | 458 pColumn->m_AvgPos = |
463 (pColumn->m_Count * pColumn->m_AvgPos + pTextBox->m_Right) / | 459 (pColumn->m_Count * pColumn->m_AvgPos + pTextBox->m_Right) / |
464 (pColumn->m_Count + 1); | 460 (pColumn->m_Count + 1); |
465 pColumn->m_Count++; | 461 pColumn->m_Count++; |
466 } | 462 } |
467 } | 463 } |
468 } | 464 } |
469 int mincount = m_BaseLines.GetSize() / 4; | 465 int mincount = m_BaseLines.GetSize() / 4; |
470 for (i = 0; i < m_TextColumns.GetSize(); i++) { | 466 for (i = 0; i < m_TextColumns.GetSize(); i++) { |
471 CTextColumn* pTextColumn = (CTextColumn*)m_TextColumns.GetAt(i); | 467 CTextColumn* pTextColumn = m_TextColumns.GetAt(i); |
472 if (pTextColumn->m_Count >= mincount) { | 468 if (pTextColumn->m_Count >= mincount) { |
473 continue; | 469 continue; |
474 } | 470 } |
475 delete pTextColumn; | 471 delete pTextColumn; |
476 m_TextColumns.RemoveAt(i); | 472 m_TextColumns.RemoveAt(i); |
477 i--; | 473 i--; |
478 } | 474 } |
479 for (i = 0; i < m_BaseLines.GetSize(); i++) { | 475 for (i = 0; i < m_BaseLines.GetSize(); i++) { |
480 CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); | 476 CTextBaseLine* pBaseLine = m_BaseLines.GetAt(i); |
481 for (int j = 0; j < pBaseLine->m_TextList.GetSize(); j++) { | 477 for (int j = 0; j < pBaseLine->m_TextList.GetSize(); j++) { |
482 CTextBox* pTextBox = (CTextBox*)pBaseLine->m_TextList.GetAt(j); | 478 CTextBox* pTextBox = pBaseLine->m_TextList.GetAt(j); |
483 if (IsNumber(pTextBox->m_Text)) { | 479 if (IsNumber(pTextBox->m_Text)) { |
484 pTextBox->m_pColumn = FindColumn(pTextBox->m_Right); | 480 pTextBox->m_pColumn = FindColumn(pTextBox->m_Right); |
485 } | 481 } |
486 } | 482 } |
487 } | 483 } |
488 } | 484 } |
489 CTextColumn* CTextPage::FindColumn(FX_FLOAT xpos) { | 485 CTextColumn* CTextPage::FindColumn(FX_FLOAT xpos) { |
490 for (int i = 0; i < m_TextColumns.GetSize(); i++) { | 486 for (int i = 0; i < m_TextColumns.GetSize(); i++) { |
491 CTextColumn* pColumn = (CTextColumn*)m_TextColumns.GetAt(i); | 487 CTextColumn* pColumn = m_TextColumns.GetAt(i); |
492 if (pColumn->m_AvgPos < xpos + 1 && pColumn->m_AvgPos > xpos - 1) { | 488 if (pColumn->m_AvgPos < xpos + 1 && pColumn->m_AvgPos > xpos - 1) { |
493 return pColumn; | 489 return pColumn; |
494 } | 490 } |
495 } | 491 } |
496 return NULL; | 492 return NULL; |
497 } | 493 } |
498 void CTextPage::BreakSpace(CPDF_TextObject* pTextObj) {} | 494 void CTextPage::BreakSpace(CPDF_TextObject* pTextObj) {} |
499 CTextBaseLine::CTextBaseLine() { | 495 CTextBaseLine::CTextBaseLine() { |
500 m_Top = -100000; | 496 m_Top = -100000; |
501 m_Bottom = 100000; | 497 m_Bottom = 100000; |
502 m_MaxFontSizeV = 0; | 498 m_MaxFontSizeV = 0; |
503 } | 499 } |
504 CTextBaseLine::~CTextBaseLine() { | 500 CTextBaseLine::~CTextBaseLine() { |
505 for (int i = 0; i < m_TextList.GetSize(); i++) { | 501 for (int i = 0; i < m_TextList.GetSize(); i++) { |
506 CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); | 502 delete m_TextList.GetAt(i); |
507 delete pText; | |
508 } | 503 } |
509 } | 504 } |
510 void CTextBaseLine::InsertTextBox(FX_FLOAT leftx, | 505 void CTextBaseLine::InsertTextBox(FX_FLOAT leftx, |
511 FX_FLOAT rightx, | 506 FX_FLOAT rightx, |
512 FX_FLOAT topy, | 507 FX_FLOAT topy, |
513 FX_FLOAT bottomy, | 508 FX_FLOAT bottomy, |
514 FX_FLOAT spacew, | 509 FX_FLOAT spacew, |
515 FX_FLOAT fontsize_v, | 510 FX_FLOAT fontsize_v, |
516 const CFX_WideString& text) { | 511 const CFX_WideString& text) { |
517 if (m_Top < topy) { | 512 if (m_Top < topy) { |
518 m_Top = topy; | 513 m_Top = topy; |
519 } | 514 } |
520 if (m_Bottom > bottomy) { | 515 if (m_Bottom > bottomy) { |
521 m_Bottom = bottomy; | 516 m_Bottom = bottomy; |
522 } | 517 } |
523 if (m_MaxFontSizeV < fontsize_v) { | 518 if (m_MaxFontSizeV < fontsize_v) { |
524 m_MaxFontSizeV = fontsize_v; | 519 m_MaxFontSizeV = fontsize_v; |
525 } | 520 } |
526 int i; | 521 int i; |
527 for (i = 0; i < m_TextList.GetSize(); i++) { | 522 for (i = 0; i < m_TextList.GetSize(); i++) { |
528 CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); | 523 CTextBox* pText = m_TextList.GetAt(i); |
529 if (pText->m_Left > leftx) { | 524 if (pText->m_Left > leftx) { |
530 break; | 525 break; |
531 } | 526 } |
532 } | 527 } |
533 CTextBox* pText = new CTextBox; | 528 CTextBox* pText = new CTextBox; |
534 pText->m_Text = text; | 529 pText->m_Text = text; |
535 pText->m_Left = leftx; | 530 pText->m_Left = leftx; |
536 pText->m_Right = rightx; | 531 pText->m_Right = rightx; |
537 pText->m_Top = topy; | 532 pText->m_Top = topy; |
538 pText->m_Bottom = bottomy; | 533 pText->m_Bottom = bottomy; |
(...skipping 14 matching lines...) Expand all Loading... |
553 inter_bottom, inter_top)) { | 548 inter_bottom, inter_top)) { |
554 return FALSE; | 549 return FALSE; |
555 } | 550 } |
556 FX_FLOAT inter_h = inter_top - inter_bottom; | 551 FX_FLOAT inter_h = inter_top - inter_bottom; |
557 if (inter_h < (m_Top - m_Bottom) / 2 && | 552 if (inter_h < (m_Top - m_Bottom) / 2 && |
558 inter_h < (pOther->m_Top - pOther->m_Bottom) / 2) { | 553 inter_h < (pOther->m_Top - pOther->m_Bottom) / 2) { |
559 return FALSE; | 554 return FALSE; |
560 } | 555 } |
561 FX_FLOAT dy = (FX_FLOAT)FXSYS_fabs(m_BaseLine - pOther->m_BaseLine); | 556 FX_FLOAT dy = (FX_FLOAT)FXSYS_fabs(m_BaseLine - pOther->m_BaseLine); |
562 for (int i = 0; i < m_TextList.GetSize(); i++) { | 557 for (int i = 0; i < m_TextList.GetSize(); i++) { |
563 CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); | 558 CTextBox* pText = m_TextList.GetAt(i); |
564 for (int j = 0; j < pOther->m_TextList.GetSize(); j++) { | 559 for (int j = 0; j < pOther->m_TextList.GetSize(); j++) { |
565 CTextBox* pOtherText = (CTextBox*)pOther->m_TextList.GetAt(j); | 560 CTextBox* pOtherText = pOther->m_TextList.GetAt(j); |
566 FX_FLOAT inter_left, inter_right; | 561 FX_FLOAT inter_left, inter_right; |
567 if (!GetIntersection(pText->m_Left, pText->m_Right, pOtherText->m_Left, | 562 if (!GetIntersection(pText->m_Left, pText->m_Right, pOtherText->m_Left, |
568 pOtherText->m_Right, inter_left, inter_right)) { | 563 pOtherText->m_Right, inter_left, inter_right)) { |
569 continue; | 564 continue; |
570 } | 565 } |
571 FX_FLOAT inter_w = inter_right - inter_left; | 566 FX_FLOAT inter_w = inter_right - inter_left; |
572 if (inter_w < pText->m_SpaceWidth / 2 && | 567 if (inter_w < pText->m_SpaceWidth / 2 && |
573 inter_w < pOtherText->m_SpaceWidth / 2) { | 568 inter_w < pOtherText->m_SpaceWidth / 2) { |
574 continue; | 569 continue; |
575 } | 570 } |
576 if (dy >= (pText->m_Bottom - pText->m_Top) / 2 || | 571 if (dy >= (pText->m_Bottom - pText->m_Top) / 2 || |
577 dy >= (pOtherText->m_Bottom - pOtherText->m_Top) / 2) { | 572 dy >= (pOtherText->m_Bottom - pOtherText->m_Top) / 2) { |
578 return FALSE; | 573 return FALSE; |
579 } | 574 } |
580 } | 575 } |
581 } | 576 } |
582 return TRUE; | 577 return TRUE; |
583 } | 578 } |
584 void CTextBaseLine::Merge(CTextBaseLine* pOther) { | 579 void CTextBaseLine::Merge(CTextBaseLine* pOther) { |
585 for (int i = 0; i < pOther->m_TextList.GetSize(); i++) { | 580 for (int i = 0; i < pOther->m_TextList.GetSize(); i++) { |
586 CTextBox* pText = (CTextBox*)pOther->m_TextList.GetAt(i); | 581 CTextBox* pText = pOther->m_TextList.GetAt(i); |
587 InsertTextBox(pText->m_Left, pText->m_Right, pText->m_Top, pText->m_Bottom, | 582 InsertTextBox(pText->m_Left, pText->m_Right, pText->m_Top, pText->m_Bottom, |
588 pText->m_SpaceWidth, pText->m_FontSizeV, pText->m_Text); | 583 pText->m_SpaceWidth, pText->m_FontSizeV, pText->m_Text); |
589 } | 584 } |
590 } | 585 } |
591 FX_BOOL CTextBaseLine::GetWidth(FX_FLOAT& leftx, FX_FLOAT& rightx) { | 586 FX_BOOL CTextBaseLine::GetWidth(FX_FLOAT& leftx, FX_FLOAT& rightx) { |
592 int i; | 587 int i; |
593 for (i = 0; i < m_TextList.GetSize(); i++) { | 588 for (i = 0; i < m_TextList.GetSize(); i++) { |
594 CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); | 589 CTextBox* pText = m_TextList.GetAt(i); |
595 if (pText->m_Text != L" ") { | 590 if (pText->m_Text != L" ") { |
596 break; | 591 break; |
597 } | 592 } |
598 } | 593 } |
599 if (i == m_TextList.GetSize()) { | 594 if (i == m_TextList.GetSize()) { |
600 return FALSE; | 595 return FALSE; |
601 } | 596 } |
602 CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); | 597 CTextBox* pText = m_TextList.GetAt(i); |
603 leftx = pText->m_Left; | 598 leftx = pText->m_Left; |
604 for (i = m_TextList.GetSize() - 1; i >= 0; i--) { | 599 for (i = m_TextList.GetSize() - 1; i >= 0; i--) { |
605 CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); | 600 CTextBox* pText = m_TextList.GetAt(i); |
606 if (pText->m_Text != L" ") { | 601 if (pText->m_Text != L" ") { |
607 break; | 602 break; |
608 } | 603 } |
609 } | 604 } |
610 pText = (CTextBox*)m_TextList.GetAt(i); | 605 pText = m_TextList.GetAt(i); |
611 rightx = pText->m_Right; | 606 rightx = pText->m_Right; |
612 return TRUE; | 607 return TRUE; |
613 } | 608 } |
614 void CTextBaseLine::MergeBoxes() { | 609 void CTextBaseLine::MergeBoxes() { |
615 int i = 0; | 610 int i = 0; |
616 while (1) { | 611 while (1) { |
617 if (i >= m_TextList.GetSize() - 1) { | 612 if (i >= m_TextList.GetSize() - 1) { |
618 break; | 613 break; |
619 } | 614 } |
620 CTextBox* pThisText = (CTextBox*)m_TextList.GetAt(i); | 615 CTextBox* pThisText = m_TextList.GetAt(i); |
621 CTextBox* pNextText = (CTextBox*)m_TextList.GetAt(i + 1); | 616 CTextBox* pNextText = m_TextList.GetAt(i + 1); |
622 FX_FLOAT dx = pNextText->m_Left - pThisText->m_Right; | 617 FX_FLOAT dx = pNextText->m_Left - pThisText->m_Right; |
623 FX_FLOAT spacew = (pThisText->m_SpaceWidth == 0.0) | 618 FX_FLOAT spacew = (pThisText->m_SpaceWidth == 0.0) |
624 ? pNextText->m_SpaceWidth | 619 ? pNextText->m_SpaceWidth |
625 : pThisText->m_SpaceWidth; | 620 : pThisText->m_SpaceWidth; |
626 if (spacew > 0.0 && dx < spacew * 2) { | 621 if (spacew > 0.0 && dx < spacew * 2) { |
627 pThisText->m_Right = pNextText->m_Right; | 622 pThisText->m_Right = pNextText->m_Right; |
628 if (dx > spacew * 1.5) { | 623 if (dx > spacew * 1.5) { |
629 pThisText->m_Text += L" "; | 624 pThisText->m_Text += L" "; |
630 } else if (dx > spacew / 3) { | 625 } else if (dx > spacew / 3) { |
631 pThisText->m_Text += L' '; | 626 pThisText->m_Text += L' '; |
632 } | 627 } |
633 pThisText->m_Text += pNextText->m_Text; | 628 pThisText->m_Text += pNextText->m_Text; |
634 pThisText->m_SpaceWidth = | 629 pThisText->m_SpaceWidth = |
635 pNextText->m_SpaceWidth == 0.0 ? spacew : pNextText->m_SpaceWidth; | 630 pNextText->m_SpaceWidth == 0.0 ? spacew : pNextText->m_SpaceWidth; |
636 m_TextList.RemoveAt(i + 1); | 631 m_TextList.RemoveAt(i + 1); |
637 delete pNextText; | 632 delete pNextText; |
638 } else { | 633 } else { |
639 i++; | 634 i++; |
640 } | 635 } |
641 } | 636 } |
642 } | 637 } |
643 void CTextBaseLine::WriteOutput(CFX_WideString& str, | 638 void CTextBaseLine::WriteOutput(CFX_WideString& str, |
644 FX_FLOAT leftx, | 639 FX_FLOAT leftx, |
645 FX_FLOAT pagewidth, | 640 FX_FLOAT pagewidth, |
646 int iTextWidth) { | 641 int iTextWidth) { |
647 int lastpos = -1; | 642 int lastpos = -1; |
648 for (int i = 0; i < m_TextList.GetSize(); i++) { | 643 for (int i = 0; i < m_TextList.GetSize(); i++) { |
649 CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); | 644 CTextBox* pText = m_TextList.GetAt(i); |
650 int xpos; | 645 int xpos; |
651 if (pText->m_pColumn) { | 646 if (pText->m_pColumn) { |
652 xpos = | 647 xpos = |
653 (int)((pText->m_pColumn->m_AvgPos - leftx) * iTextWidth / pagewidth + | 648 (int)((pText->m_pColumn->m_AvgPos - leftx) * iTextWidth / pagewidth + |
654 0.5); | 649 0.5); |
655 xpos -= pText->m_Text.GetLength(); | 650 xpos -= pText->m_Text.GetLength(); |
656 } else { | 651 } else { |
657 xpos = (int)((pText->m_Left - leftx) * iTextWidth / pagewidth + 0.5); | 652 xpos = (int)((pText->m_Left - leftx) * iTextWidth / pagewidth + 0.5); |
658 } | 653 } |
659 if (xpos <= lastpos) { | 654 if (xpos <= lastpos) { |
660 xpos = lastpos + 1; | 655 xpos = lastpos + 1; |
661 } | 656 } |
662 for (int j = lastpos + 1; j < xpos; j++) { | 657 for (int j = lastpos + 1; j < xpos; j++) { |
663 str += ' '; | 658 str += ' '; |
664 } | 659 } |
665 CFX_WideString sSrc(pText->m_Text); | 660 CFX_WideString sSrc(pText->m_Text); |
666 NormalizeString(sSrc); | 661 NormalizeString(sSrc); |
667 str += sSrc; | 662 str += sSrc; |
668 str += ' '; | 663 str += ' '; |
669 lastpos = xpos + pText->m_Text.GetLength(); | 664 lastpos = xpos + pText->m_Text.GetLength(); |
670 } | 665 } |
671 } | 666 } |
672 void CTextBaseLine::CountChars(int& count, FX_FLOAT& width, int& minchars) { | 667 void CTextBaseLine::CountChars(int& count, FX_FLOAT& width, int& minchars) { |
673 minchars = 0; | 668 minchars = 0; |
674 for (int i = 0; i < m_TextList.GetSize(); i++) { | 669 for (int i = 0; i < m_TextList.GetSize(); i++) { |
675 CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); | 670 CTextBox* pText = m_TextList.GetAt(i); |
676 if (pText->m_Right - pText->m_Left < 0.002) { | 671 if (pText->m_Right - pText->m_Left < 0.002) { |
677 continue; | 672 continue; |
678 } | 673 } |
679 count += pText->m_Text.GetLength(); | 674 count += pText->m_Text.GetLength(); |
680 width += pText->m_Right - pText->m_Left; | 675 width += pText->m_Right - pText->m_Left; |
681 minchars += pText->m_Text.GetLength() + 1; | 676 minchars += pText->m_Text.GetLength() + 1; |
682 } | 677 } |
683 } | 678 } |
684 #define PI 3.1415926535897932384626433832795 | 679 #define PI 3.1415926535897932384626433832795 |
685 static void CheckRotate(CPDF_Page& page, CFX_FloatRect& page_bbox) { | 680 static void CheckRotate(CPDF_Page& page, CFX_FloatRect& page_bbox) { |
(...skipping 99 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
785 FX_DWORD flags) { | 780 FX_DWORD flags) { |
786 buffer.EstimateSize(0, 10240); | 781 buffer.EstimateSize(0, 10240); |
787 CPDF_Page page; | 782 CPDF_Page page; |
788 page.Load(pDoc, pPage); | 783 page.Load(pDoc, pPage); |
789 CPDF_ParseOptions options; | 784 CPDF_ParseOptions options; |
790 options.m_bTextOnly = TRUE; | 785 options.m_bTextOnly = TRUE; |
791 options.m_bSeparateForm = FALSE; | 786 options.m_bSeparateForm = FALSE; |
792 page.ParseContent(&options); | 787 page.ParseContent(&options); |
793 GetTextStream_Unicode(buffer, &page, TRUE, NULL); | 788 GetTextStream_Unicode(buffer, &page, TRUE, NULL); |
794 } | 789 } |
OLD | NEW |