| OLD | NEW |
| 1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
| 6 | 6 |
| 7 #include "core/include/fpdfapi/fpdf_page.h" | 7 #include "core/include/fpdfapi/fpdf_page.h" |
| 8 #include "core/include/fpdfapi/fpdf_pageobj.h" | 8 #include "core/include/fpdfapi/fpdf_pageobj.h" |
| 9 #include "core/include/fpdfapi/fpdf_resource.h" | 9 #include "core/include/fpdfapi/fpdf_resource.h" |
| 10 #include "core/include/fpdftext/fpdf_text.h" | 10 #include "core/include/fpdftext/fpdf_text.h" |
| (...skipping 23 matching lines...) Expand all Loading... |
| 34 if (ret && !iDef) { | 34 if (ret && !iDef) { |
| 35 return CFX_ByteString(buf, ret); | 35 return CFX_ByteString(buf, ret); |
| 36 } | 36 } |
| 37 const FX_CHAR* altstr = FCS_GetAltStr(unicode); | 37 const FX_CHAR* altstr = FCS_GetAltStr(unicode); |
| 38 return CFX_ByteString(altstr ? altstr : defchar); | 38 return CFX_ByteString(altstr ? altstr : defchar); |
| 39 } | 39 } |
| 40 CTextPage::CTextPage() {} | 40 CTextPage::CTextPage() {} |
| 41 CTextPage::~CTextPage() { | 41 CTextPage::~CTextPage() { |
| 42 int i; | 42 int i; |
| 43 for (i = 0; i < m_BaseLines.GetSize(); i++) { | 43 for (i = 0; i < m_BaseLines.GetSize(); i++) { |
| 44 CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); | 44 delete m_BaseLines.GetAt(i); |
| 45 delete pBaseLine; | |
| 46 } | 45 } |
| 47 for (i = 0; i < m_TextColumns.GetSize(); i++) { | 46 for (i = 0; i < m_TextColumns.GetSize(); i++) { |
| 48 CTextColumn* pTextColumn = (CTextColumn*)m_TextColumns.GetAt(i); | 47 delete m_TextColumns.GetAt(i); |
| 49 delete pTextColumn; | |
| 50 } | 48 } |
| 51 } | 49 } |
| 52 void CTextPage::ProcessObject(CPDF_PageObject* pObject) { | 50 void CTextPage::ProcessObject(CPDF_PageObject* pObject) { |
| 53 if (pObject->m_Type != PDFPAGE_TEXT) { | 51 if (pObject->m_Type != PDFPAGE_TEXT) { |
| 54 return; | 52 return; |
| 55 } | 53 } |
| 56 CPDF_TextObject* pText = (CPDF_TextObject*)pObject; | 54 CPDF_TextObject* pText = (CPDF_TextObject*)pObject; |
| 57 CPDF_Font* pFont = pText->m_TextState.GetFont(); | 55 CPDF_Font* pFont = pText->m_TextState.GetFont(); |
| 58 int count = pText->CountItems(); | 56 int count = pText->CountItems(); |
| 59 FX_FLOAT* pPosArray = FX_Alloc2D(FX_FLOAT, count, 2); | 57 FX_FLOAT* pPosArray = FX_Alloc2D(FX_FLOAT, count, 2); |
| (...skipping 106 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 166 FX_FLOAT spacew, | 164 FX_FLOAT spacew, |
| 167 FX_FLOAT fontsize_v, | 165 FX_FLOAT fontsize_v, |
| 168 CFX_ByteString& str, | 166 CFX_ByteString& str, |
| 169 CPDF_Font* pFont) { | 167 CPDF_Font* pFont) { |
| 170 if (str.GetLength() == 0) { | 168 if (str.GetLength() == 0) { |
| 171 return NULL; | 169 return NULL; |
| 172 } | 170 } |
| 173 if (pBaseLine == NULL) { | 171 if (pBaseLine == NULL) { |
| 174 int i; | 172 int i; |
| 175 for (i = 0; i < m_BaseLines.GetSize(); i++) { | 173 for (i = 0; i < m_BaseLines.GetSize(); i++) { |
| 176 CTextBaseLine* pExistLine = (CTextBaseLine*)m_BaseLines.GetAt(i); | 174 CTextBaseLine* pExistLine = m_BaseLines.GetAt(i); |
| 177 if (pExistLine->m_BaseLine == basey) { | 175 if (pExistLine->m_BaseLine == basey) { |
| 178 pBaseLine = pExistLine; | 176 pBaseLine = pExistLine; |
| 179 break; | 177 break; |
| 180 } | 178 } |
| 181 if (pExistLine->m_BaseLine < basey) { | 179 if (pExistLine->m_BaseLine < basey) { |
| 182 break; | 180 break; |
| 183 } | 181 } |
| 184 } | 182 } |
| 185 if (pBaseLine == NULL) { | 183 if (pBaseLine == NULL) { |
| 186 pBaseLine = new CTextBaseLine; | 184 pBaseLine = new CTextBaseLine; |
| (...skipping 17 matching lines...) Expand all Loading... |
| 204 text); | 202 text); |
| 205 return pBaseLine; | 203 return pBaseLine; |
| 206 } | 204 } |
| 207 void CTextPage::WriteOutput(CFX_WideStringArray& lines, int iMinWidth) { | 205 void CTextPage::WriteOutput(CFX_WideStringArray& lines, int iMinWidth) { |
| 208 FX_FLOAT lastheight = -1; | 206 FX_FLOAT lastheight = -1; |
| 209 FX_FLOAT lastbaseline = -1; | 207 FX_FLOAT lastbaseline = -1; |
| 210 FX_FLOAT MinLeftX = 1000000; | 208 FX_FLOAT MinLeftX = 1000000; |
| 211 FX_FLOAT MaxRightX = 0; | 209 FX_FLOAT MaxRightX = 0; |
| 212 int i; | 210 int i; |
| 213 for (i = 0; i < m_BaseLines.GetSize(); i++) { | 211 for (i = 0; i < m_BaseLines.GetSize(); i++) { |
| 214 CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); | 212 CTextBaseLine* pBaseLine = m_BaseLines.GetAt(i); |
| 215 FX_FLOAT leftx, rightx; | 213 FX_FLOAT leftx, rightx; |
| 216 if (pBaseLine->GetWidth(leftx, rightx)) { | 214 if (pBaseLine->GetWidth(leftx, rightx)) { |
| 217 if (leftx < MinLeftX) { | 215 if (leftx < MinLeftX) { |
| 218 MinLeftX = leftx; | 216 MinLeftX = leftx; |
| 219 } | 217 } |
| 220 if (rightx > MaxRightX) { | 218 if (rightx > MaxRightX) { |
| 221 MaxRightX = rightx; | 219 MaxRightX = rightx; |
| 222 } | 220 } |
| 223 } | 221 } |
| 224 } | 222 } |
| 225 for (i = 0; i < m_BaseLines.GetSize(); i++) { | 223 for (i = 0; i < m_BaseLines.GetSize(); i++) { |
| 226 CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); | 224 m_BaseLines.GetAt(i)->MergeBoxes(); |
| 227 pBaseLine->MergeBoxes(); | |
| 228 } | 225 } |
| 229 for (i = 1; i < m_BaseLines.GetSize(); i++) { | 226 for (i = 1; i < m_BaseLines.GetSize(); i++) { |
| 230 CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); | 227 CTextBaseLine* pBaseLine = m_BaseLines.GetAt(i); |
| 231 CTextBaseLine* pPrevLine = (CTextBaseLine*)m_BaseLines.GetAt(i - 1); | 228 CTextBaseLine* pPrevLine = m_BaseLines.GetAt(i - 1); |
| 232 if (pBaseLine->CanMerge(pPrevLine)) { | 229 if (pBaseLine->CanMerge(pPrevLine)) { |
| 233 pPrevLine->Merge(pBaseLine); | 230 pPrevLine->Merge(pBaseLine); |
| 234 delete pBaseLine; | 231 delete pBaseLine; |
| 235 m_BaseLines.RemoveAt(i); | 232 m_BaseLines.RemoveAt(i); |
| 236 i--; | 233 i--; |
| 237 } | 234 } |
| 238 } | 235 } |
| 239 if (m_bAutoWidth) { | 236 if (m_bAutoWidth) { |
| 240 int* widths = FX_Alloc(int, m_BaseLines.GetSize()); | 237 int* widths = FX_Alloc(int, m_BaseLines.GetSize()); |
| 241 for (i = 0; i < m_BaseLines.GetSize(); i++) { | 238 for (i = 0; i < m_BaseLines.GetSize(); i++) { |
| 242 widths[i] = 0; | 239 widths[i] = 0; |
| 243 CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); | 240 CTextBaseLine* pBaseLine = m_BaseLines.GetAt(i); |
| 244 int TotalChars = 0; | 241 int TotalChars = 0; |
| 245 FX_FLOAT TotalWidth = 0; | 242 FX_FLOAT TotalWidth = 0; |
| 246 int minchars; | 243 int minchars; |
| 247 pBaseLine->CountChars(TotalChars, TotalWidth, minchars); | 244 pBaseLine->CountChars(TotalChars, TotalWidth, minchars); |
| 248 if (TotalChars) { | 245 if (TotalChars) { |
| 249 FX_FLOAT charwidth = TotalWidth / TotalChars; | 246 FX_FLOAT charwidth = TotalWidth / TotalChars; |
| 250 widths[i] = (int)((MaxRightX - MinLeftX) / charwidth); | 247 widths[i] = (int)((MaxRightX - MinLeftX) / charwidth); |
| 251 } | 248 } |
| 252 if (widths[i] > 1000) { | 249 if (widths[i] > 1000) { |
| 253 widths[i] = 1000; | 250 widths[i] = 1000; |
| (...skipping 16 matching lines...) Expand all Loading... |
| 270 } | 267 } |
| 271 if (MaxWidth > AvgWidth * 6 / 5) { | 268 if (MaxWidth > AvgWidth * 6 / 5) { |
| 272 MaxWidth = AvgWidth * 6 / 5; | 269 MaxWidth = AvgWidth * 6 / 5; |
| 273 } | 270 } |
| 274 FX_Free(widths); | 271 FX_Free(widths); |
| 275 if (iMinWidth < MaxWidth) { | 272 if (iMinWidth < MaxWidth) { |
| 276 iMinWidth = MaxWidth; | 273 iMinWidth = MaxWidth; |
| 277 } | 274 } |
| 278 } | 275 } |
| 279 for (i = 0; i < m_BaseLines.GetSize(); i++) { | 276 for (i = 0; i < m_BaseLines.GetSize(); i++) { |
| 280 CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); | 277 m_BaseLines.GetAt(i)->MergeBoxes(); |
| 281 pBaseLine->MergeBoxes(); | |
| 282 } | 278 } |
| 283 if (m_bKeepColumn) { | 279 if (m_bKeepColumn) { |
| 284 FindColumns(); | 280 FindColumns(); |
| 285 } | 281 } |
| 286 for (i = 0; i < m_BaseLines.GetSize(); i++) { | 282 for (i = 0; i < m_BaseLines.GetSize(); i++) { |
| 287 CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); | 283 CTextBaseLine* pBaseLine = m_BaseLines.GetAt(i); |
| 288 if (lastheight >= 0) { | 284 if (lastheight >= 0) { |
| 289 FX_FLOAT dy = lastbaseline - pBaseLine->m_BaseLine; | 285 FX_FLOAT dy = lastbaseline - pBaseLine->m_BaseLine; |
| 290 if (dy >= (pBaseLine->m_MaxFontSizeV) * 1.5 || dy >= lastheight * 1.5) { | 286 if (dy >= (pBaseLine->m_MaxFontSizeV) * 1.5 || dy >= lastheight * 1.5) { |
| 291 lines.Add(L""); | 287 lines.Add(L""); |
| 292 } | 288 } |
| 293 } | 289 } |
| 294 lastheight = pBaseLine->m_MaxFontSizeV; | 290 lastheight = pBaseLine->m_MaxFontSizeV; |
| 295 lastbaseline = pBaseLine->m_BaseLine; | 291 lastbaseline = pBaseLine->m_BaseLine; |
| 296 CFX_WideString str; | 292 CFX_WideString str; |
| 297 pBaseLine->WriteOutput(str, MinLeftX, MaxRightX - MinLeftX, iMinWidth); | 293 pBaseLine->WriteOutput(str, MinLeftX, MaxRightX - MinLeftX, iMinWidth); |
| (...skipping 143 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 441 FX_WCHAR ch = str[i]; | 437 FX_WCHAR ch = str[i]; |
| 442 // TODO(dsinclair): --.+ +.-- should probably not be a number. | 438 // TODO(dsinclair): --.+ +.-- should probably not be a number. |
| 443 if (!std::iswdigit(ch) && ch != '-' && ch != '+' && ch != '.' && ch != ' ') | 439 if (!std::iswdigit(ch) && ch != '-' && ch != '+' && ch != '.' && ch != ' ') |
| 444 return FALSE; | 440 return FALSE; |
| 445 } | 441 } |
| 446 return TRUE; | 442 return TRUE; |
| 447 } | 443 } |
| 448 void CTextPage::FindColumns() { | 444 void CTextPage::FindColumns() { |
| 449 int i; | 445 int i; |
| 450 for (i = 0; i < m_BaseLines.GetSize(); i++) { | 446 for (i = 0; i < m_BaseLines.GetSize(); i++) { |
| 451 CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); | 447 CTextBaseLine* pBaseLine = m_BaseLines.GetAt(i); |
| 452 for (int j = 0; j < pBaseLine->m_TextList.GetSize(); j++) { | 448 for (int j = 0; j < pBaseLine->m_TextList.GetSize(); j++) { |
| 453 CTextBox* pTextBox = (CTextBox*)pBaseLine->m_TextList.GetAt(j); | 449 CTextBox* pTextBox = pBaseLine->m_TextList.GetAt(j); |
| 454 CTextColumn* pColumn = FindColumn(pTextBox->m_Right); | 450 CTextColumn* pColumn = FindColumn(pTextBox->m_Right); |
| 455 if (pColumn == NULL) { | 451 if (pColumn == NULL) { |
| 456 pColumn = new CTextColumn; | 452 pColumn = new CTextColumn; |
| 457 pColumn->m_Count = 1; | 453 pColumn->m_Count = 1; |
| 458 pColumn->m_AvgPos = pTextBox->m_Right; | 454 pColumn->m_AvgPos = pTextBox->m_Right; |
| 459 pColumn->m_TextPos = -1; | 455 pColumn->m_TextPos = -1; |
| 460 m_TextColumns.Add(pColumn); | 456 m_TextColumns.Add(pColumn); |
| 461 } else { | 457 } else { |
| 462 pColumn->m_AvgPos = | 458 pColumn->m_AvgPos = |
| 463 (pColumn->m_Count * pColumn->m_AvgPos + pTextBox->m_Right) / | 459 (pColumn->m_Count * pColumn->m_AvgPos + pTextBox->m_Right) / |
| 464 (pColumn->m_Count + 1); | 460 (pColumn->m_Count + 1); |
| 465 pColumn->m_Count++; | 461 pColumn->m_Count++; |
| 466 } | 462 } |
| 467 } | 463 } |
| 468 } | 464 } |
| 469 int mincount = m_BaseLines.GetSize() / 4; | 465 int mincount = m_BaseLines.GetSize() / 4; |
| 470 for (i = 0; i < m_TextColumns.GetSize(); i++) { | 466 for (i = 0; i < m_TextColumns.GetSize(); i++) { |
| 471 CTextColumn* pTextColumn = (CTextColumn*)m_TextColumns.GetAt(i); | 467 CTextColumn* pTextColumn = m_TextColumns.GetAt(i); |
| 472 if (pTextColumn->m_Count >= mincount) { | 468 if (pTextColumn->m_Count >= mincount) { |
| 473 continue; | 469 continue; |
| 474 } | 470 } |
| 475 delete pTextColumn; | 471 delete pTextColumn; |
| 476 m_TextColumns.RemoveAt(i); | 472 m_TextColumns.RemoveAt(i); |
| 477 i--; | 473 i--; |
| 478 } | 474 } |
| 479 for (i = 0; i < m_BaseLines.GetSize(); i++) { | 475 for (i = 0; i < m_BaseLines.GetSize(); i++) { |
| 480 CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); | 476 CTextBaseLine* pBaseLine = m_BaseLines.GetAt(i); |
| 481 for (int j = 0; j < pBaseLine->m_TextList.GetSize(); j++) { | 477 for (int j = 0; j < pBaseLine->m_TextList.GetSize(); j++) { |
| 482 CTextBox* pTextBox = (CTextBox*)pBaseLine->m_TextList.GetAt(j); | 478 CTextBox* pTextBox = pBaseLine->m_TextList.GetAt(j); |
| 483 if (IsNumber(pTextBox->m_Text)) { | 479 if (IsNumber(pTextBox->m_Text)) { |
| 484 pTextBox->m_pColumn = FindColumn(pTextBox->m_Right); | 480 pTextBox->m_pColumn = FindColumn(pTextBox->m_Right); |
| 485 } | 481 } |
| 486 } | 482 } |
| 487 } | 483 } |
| 488 } | 484 } |
| 489 CTextColumn* CTextPage::FindColumn(FX_FLOAT xpos) { | 485 CTextColumn* CTextPage::FindColumn(FX_FLOAT xpos) { |
| 490 for (int i = 0; i < m_TextColumns.GetSize(); i++) { | 486 for (int i = 0; i < m_TextColumns.GetSize(); i++) { |
| 491 CTextColumn* pColumn = (CTextColumn*)m_TextColumns.GetAt(i); | 487 CTextColumn* pColumn = m_TextColumns.GetAt(i); |
| 492 if (pColumn->m_AvgPos < xpos + 1 && pColumn->m_AvgPos > xpos - 1) { | 488 if (pColumn->m_AvgPos < xpos + 1 && pColumn->m_AvgPos > xpos - 1) { |
| 493 return pColumn; | 489 return pColumn; |
| 494 } | 490 } |
| 495 } | 491 } |
| 496 return NULL; | 492 return NULL; |
| 497 } | 493 } |
| 498 void CTextPage::BreakSpace(CPDF_TextObject* pTextObj) {} | 494 void CTextPage::BreakSpace(CPDF_TextObject* pTextObj) {} |
| 499 CTextBaseLine::CTextBaseLine() { | 495 CTextBaseLine::CTextBaseLine() { |
| 500 m_Top = -100000; | 496 m_Top = -100000; |
| 501 m_Bottom = 100000; | 497 m_Bottom = 100000; |
| 502 m_MaxFontSizeV = 0; | 498 m_MaxFontSizeV = 0; |
| 503 } | 499 } |
| 504 CTextBaseLine::~CTextBaseLine() { | 500 CTextBaseLine::~CTextBaseLine() { |
| 505 for (int i = 0; i < m_TextList.GetSize(); i++) { | 501 for (int i = 0; i < m_TextList.GetSize(); i++) { |
| 506 CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); | 502 delete m_TextList.GetAt(i); |
| 507 delete pText; | |
| 508 } | 503 } |
| 509 } | 504 } |
| 510 void CTextBaseLine::InsertTextBox(FX_FLOAT leftx, | 505 void CTextBaseLine::InsertTextBox(FX_FLOAT leftx, |
| 511 FX_FLOAT rightx, | 506 FX_FLOAT rightx, |
| 512 FX_FLOAT topy, | 507 FX_FLOAT topy, |
| 513 FX_FLOAT bottomy, | 508 FX_FLOAT bottomy, |
| 514 FX_FLOAT spacew, | 509 FX_FLOAT spacew, |
| 515 FX_FLOAT fontsize_v, | 510 FX_FLOAT fontsize_v, |
| 516 const CFX_WideString& text) { | 511 const CFX_WideString& text) { |
| 517 if (m_Top < topy) { | 512 if (m_Top < topy) { |
| 518 m_Top = topy; | 513 m_Top = topy; |
| 519 } | 514 } |
| 520 if (m_Bottom > bottomy) { | 515 if (m_Bottom > bottomy) { |
| 521 m_Bottom = bottomy; | 516 m_Bottom = bottomy; |
| 522 } | 517 } |
| 523 if (m_MaxFontSizeV < fontsize_v) { | 518 if (m_MaxFontSizeV < fontsize_v) { |
| 524 m_MaxFontSizeV = fontsize_v; | 519 m_MaxFontSizeV = fontsize_v; |
| 525 } | 520 } |
| 526 int i; | 521 int i; |
| 527 for (i = 0; i < m_TextList.GetSize(); i++) { | 522 for (i = 0; i < m_TextList.GetSize(); i++) { |
| 528 CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); | 523 CTextBox* pText = m_TextList.GetAt(i); |
| 529 if (pText->m_Left > leftx) { | 524 if (pText->m_Left > leftx) { |
| 530 break; | 525 break; |
| 531 } | 526 } |
| 532 } | 527 } |
| 533 CTextBox* pText = new CTextBox; | 528 CTextBox* pText = new CTextBox; |
| 534 pText->m_Text = text; | 529 pText->m_Text = text; |
| 535 pText->m_Left = leftx; | 530 pText->m_Left = leftx; |
| 536 pText->m_Right = rightx; | 531 pText->m_Right = rightx; |
| 537 pText->m_Top = topy; | 532 pText->m_Top = topy; |
| 538 pText->m_Bottom = bottomy; | 533 pText->m_Bottom = bottomy; |
| (...skipping 14 matching lines...) Expand all Loading... |
| 553 inter_bottom, inter_top)) { | 548 inter_bottom, inter_top)) { |
| 554 return FALSE; | 549 return FALSE; |
| 555 } | 550 } |
| 556 FX_FLOAT inter_h = inter_top - inter_bottom; | 551 FX_FLOAT inter_h = inter_top - inter_bottom; |
| 557 if (inter_h < (m_Top - m_Bottom) / 2 && | 552 if (inter_h < (m_Top - m_Bottom) / 2 && |
| 558 inter_h < (pOther->m_Top - pOther->m_Bottom) / 2) { | 553 inter_h < (pOther->m_Top - pOther->m_Bottom) / 2) { |
| 559 return FALSE; | 554 return FALSE; |
| 560 } | 555 } |
| 561 FX_FLOAT dy = (FX_FLOAT)FXSYS_fabs(m_BaseLine - pOther->m_BaseLine); | 556 FX_FLOAT dy = (FX_FLOAT)FXSYS_fabs(m_BaseLine - pOther->m_BaseLine); |
| 562 for (int i = 0; i < m_TextList.GetSize(); i++) { | 557 for (int i = 0; i < m_TextList.GetSize(); i++) { |
| 563 CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); | 558 CTextBox* pText = m_TextList.GetAt(i); |
| 564 for (int j = 0; j < pOther->m_TextList.GetSize(); j++) { | 559 for (int j = 0; j < pOther->m_TextList.GetSize(); j++) { |
| 565 CTextBox* pOtherText = (CTextBox*)pOther->m_TextList.GetAt(j); | 560 CTextBox* pOtherText = pOther->m_TextList.GetAt(j); |
| 566 FX_FLOAT inter_left, inter_right; | 561 FX_FLOAT inter_left, inter_right; |
| 567 if (!GetIntersection(pText->m_Left, pText->m_Right, pOtherText->m_Left, | 562 if (!GetIntersection(pText->m_Left, pText->m_Right, pOtherText->m_Left, |
| 568 pOtherText->m_Right, inter_left, inter_right)) { | 563 pOtherText->m_Right, inter_left, inter_right)) { |
| 569 continue; | 564 continue; |
| 570 } | 565 } |
| 571 FX_FLOAT inter_w = inter_right - inter_left; | 566 FX_FLOAT inter_w = inter_right - inter_left; |
| 572 if (inter_w < pText->m_SpaceWidth / 2 && | 567 if (inter_w < pText->m_SpaceWidth / 2 && |
| 573 inter_w < pOtherText->m_SpaceWidth / 2) { | 568 inter_w < pOtherText->m_SpaceWidth / 2) { |
| 574 continue; | 569 continue; |
| 575 } | 570 } |
| 576 if (dy >= (pText->m_Bottom - pText->m_Top) / 2 || | 571 if (dy >= (pText->m_Bottom - pText->m_Top) / 2 || |
| 577 dy >= (pOtherText->m_Bottom - pOtherText->m_Top) / 2) { | 572 dy >= (pOtherText->m_Bottom - pOtherText->m_Top) / 2) { |
| 578 return FALSE; | 573 return FALSE; |
| 579 } | 574 } |
| 580 } | 575 } |
| 581 } | 576 } |
| 582 return TRUE; | 577 return TRUE; |
| 583 } | 578 } |
| 584 void CTextBaseLine::Merge(CTextBaseLine* pOther) { | 579 void CTextBaseLine::Merge(CTextBaseLine* pOther) { |
| 585 for (int i = 0; i < pOther->m_TextList.GetSize(); i++) { | 580 for (int i = 0; i < pOther->m_TextList.GetSize(); i++) { |
| 586 CTextBox* pText = (CTextBox*)pOther->m_TextList.GetAt(i); | 581 CTextBox* pText = pOther->m_TextList.GetAt(i); |
| 587 InsertTextBox(pText->m_Left, pText->m_Right, pText->m_Top, pText->m_Bottom, | 582 InsertTextBox(pText->m_Left, pText->m_Right, pText->m_Top, pText->m_Bottom, |
| 588 pText->m_SpaceWidth, pText->m_FontSizeV, pText->m_Text); | 583 pText->m_SpaceWidth, pText->m_FontSizeV, pText->m_Text); |
| 589 } | 584 } |
| 590 } | 585 } |
| 591 FX_BOOL CTextBaseLine::GetWidth(FX_FLOAT& leftx, FX_FLOAT& rightx) { | 586 FX_BOOL CTextBaseLine::GetWidth(FX_FLOAT& leftx, FX_FLOAT& rightx) { |
| 592 int i; | 587 int i; |
| 593 for (i = 0; i < m_TextList.GetSize(); i++) { | 588 for (i = 0; i < m_TextList.GetSize(); i++) { |
| 594 CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); | 589 CTextBox* pText = m_TextList.GetAt(i); |
| 595 if (pText->m_Text != L" ") { | 590 if (pText->m_Text != L" ") { |
| 596 break; | 591 break; |
| 597 } | 592 } |
| 598 } | 593 } |
| 599 if (i == m_TextList.GetSize()) { | 594 if (i == m_TextList.GetSize()) { |
| 600 return FALSE; | 595 return FALSE; |
| 601 } | 596 } |
| 602 CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); | 597 CTextBox* pText = m_TextList.GetAt(i); |
| 603 leftx = pText->m_Left; | 598 leftx = pText->m_Left; |
| 604 for (i = m_TextList.GetSize() - 1; i >= 0; i--) { | 599 for (i = m_TextList.GetSize() - 1; i >= 0; i--) { |
| 605 CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); | 600 CTextBox* pText = m_TextList.GetAt(i); |
| 606 if (pText->m_Text != L" ") { | 601 if (pText->m_Text != L" ") { |
| 607 break; | 602 break; |
| 608 } | 603 } |
| 609 } | 604 } |
| 610 pText = (CTextBox*)m_TextList.GetAt(i); | 605 pText = m_TextList.GetAt(i); |
| 611 rightx = pText->m_Right; | 606 rightx = pText->m_Right; |
| 612 return TRUE; | 607 return TRUE; |
| 613 } | 608 } |
| 614 void CTextBaseLine::MergeBoxes() { | 609 void CTextBaseLine::MergeBoxes() { |
| 615 int i = 0; | 610 int i = 0; |
| 616 while (1) { | 611 while (1) { |
| 617 if (i >= m_TextList.GetSize() - 1) { | 612 if (i >= m_TextList.GetSize() - 1) { |
| 618 break; | 613 break; |
| 619 } | 614 } |
| 620 CTextBox* pThisText = (CTextBox*)m_TextList.GetAt(i); | 615 CTextBox* pThisText = m_TextList.GetAt(i); |
| 621 CTextBox* pNextText = (CTextBox*)m_TextList.GetAt(i + 1); | 616 CTextBox* pNextText = m_TextList.GetAt(i + 1); |
| 622 FX_FLOAT dx = pNextText->m_Left - pThisText->m_Right; | 617 FX_FLOAT dx = pNextText->m_Left - pThisText->m_Right; |
| 623 FX_FLOAT spacew = (pThisText->m_SpaceWidth == 0.0) | 618 FX_FLOAT spacew = (pThisText->m_SpaceWidth == 0.0) |
| 624 ? pNextText->m_SpaceWidth | 619 ? pNextText->m_SpaceWidth |
| 625 : pThisText->m_SpaceWidth; | 620 : pThisText->m_SpaceWidth; |
| 626 if (spacew > 0.0 && dx < spacew * 2) { | 621 if (spacew > 0.0 && dx < spacew * 2) { |
| 627 pThisText->m_Right = pNextText->m_Right; | 622 pThisText->m_Right = pNextText->m_Right; |
| 628 if (dx > spacew * 1.5) { | 623 if (dx > spacew * 1.5) { |
| 629 pThisText->m_Text += L" "; | 624 pThisText->m_Text += L" "; |
| 630 } else if (dx > spacew / 3) { | 625 } else if (dx > spacew / 3) { |
| 631 pThisText->m_Text += L' '; | 626 pThisText->m_Text += L' '; |
| 632 } | 627 } |
| 633 pThisText->m_Text += pNextText->m_Text; | 628 pThisText->m_Text += pNextText->m_Text; |
| 634 pThisText->m_SpaceWidth = | 629 pThisText->m_SpaceWidth = |
| 635 pNextText->m_SpaceWidth == 0.0 ? spacew : pNextText->m_SpaceWidth; | 630 pNextText->m_SpaceWidth == 0.0 ? spacew : pNextText->m_SpaceWidth; |
| 636 m_TextList.RemoveAt(i + 1); | 631 m_TextList.RemoveAt(i + 1); |
| 637 delete pNextText; | 632 delete pNextText; |
| 638 } else { | 633 } else { |
| 639 i++; | 634 i++; |
| 640 } | 635 } |
| 641 } | 636 } |
| 642 } | 637 } |
| 643 void CTextBaseLine::WriteOutput(CFX_WideString& str, | 638 void CTextBaseLine::WriteOutput(CFX_WideString& str, |
| 644 FX_FLOAT leftx, | 639 FX_FLOAT leftx, |
| 645 FX_FLOAT pagewidth, | 640 FX_FLOAT pagewidth, |
| 646 int iTextWidth) { | 641 int iTextWidth) { |
| 647 int lastpos = -1; | 642 int lastpos = -1; |
| 648 for (int i = 0; i < m_TextList.GetSize(); i++) { | 643 for (int i = 0; i < m_TextList.GetSize(); i++) { |
| 649 CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); | 644 CTextBox* pText = m_TextList.GetAt(i); |
| 650 int xpos; | 645 int xpos; |
| 651 if (pText->m_pColumn) { | 646 if (pText->m_pColumn) { |
| 652 xpos = | 647 xpos = |
| 653 (int)((pText->m_pColumn->m_AvgPos - leftx) * iTextWidth / pagewidth + | 648 (int)((pText->m_pColumn->m_AvgPos - leftx) * iTextWidth / pagewidth + |
| 654 0.5); | 649 0.5); |
| 655 xpos -= pText->m_Text.GetLength(); | 650 xpos -= pText->m_Text.GetLength(); |
| 656 } else { | 651 } else { |
| 657 xpos = (int)((pText->m_Left - leftx) * iTextWidth / pagewidth + 0.5); | 652 xpos = (int)((pText->m_Left - leftx) * iTextWidth / pagewidth + 0.5); |
| 658 } | 653 } |
| 659 if (xpos <= lastpos) { | 654 if (xpos <= lastpos) { |
| 660 xpos = lastpos + 1; | 655 xpos = lastpos + 1; |
| 661 } | 656 } |
| 662 for (int j = lastpos + 1; j < xpos; j++) { | 657 for (int j = lastpos + 1; j < xpos; j++) { |
| 663 str += ' '; | 658 str += ' '; |
| 664 } | 659 } |
| 665 CFX_WideString sSrc(pText->m_Text); | 660 CFX_WideString sSrc(pText->m_Text); |
| 666 NormalizeString(sSrc); | 661 NormalizeString(sSrc); |
| 667 str += sSrc; | 662 str += sSrc; |
| 668 str += ' '; | 663 str += ' '; |
| 669 lastpos = xpos + pText->m_Text.GetLength(); | 664 lastpos = xpos + pText->m_Text.GetLength(); |
| 670 } | 665 } |
| 671 } | 666 } |
| 672 void CTextBaseLine::CountChars(int& count, FX_FLOAT& width, int& minchars) { | 667 void CTextBaseLine::CountChars(int& count, FX_FLOAT& width, int& minchars) { |
| 673 minchars = 0; | 668 minchars = 0; |
| 674 for (int i = 0; i < m_TextList.GetSize(); i++) { | 669 for (int i = 0; i < m_TextList.GetSize(); i++) { |
| 675 CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); | 670 CTextBox* pText = m_TextList.GetAt(i); |
| 676 if (pText->m_Right - pText->m_Left < 0.002) { | 671 if (pText->m_Right - pText->m_Left < 0.002) { |
| 677 continue; | 672 continue; |
| 678 } | 673 } |
| 679 count += pText->m_Text.GetLength(); | 674 count += pText->m_Text.GetLength(); |
| 680 width += pText->m_Right - pText->m_Left; | 675 width += pText->m_Right - pText->m_Left; |
| 681 minchars += pText->m_Text.GetLength() + 1; | 676 minchars += pText->m_Text.GetLength() + 1; |
| 682 } | 677 } |
| 683 } | 678 } |
| 684 #define PI 3.1415926535897932384626433832795 | 679 #define PI 3.1415926535897932384626433832795 |
| 685 static void CheckRotate(CPDF_Page& page, CFX_FloatRect& page_bbox) { | 680 static void CheckRotate(CPDF_Page& page, CFX_FloatRect& page_bbox) { |
| (...skipping 99 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 785 FX_DWORD flags) { | 780 FX_DWORD flags) { |
| 786 buffer.EstimateSize(0, 10240); | 781 buffer.EstimateSize(0, 10240); |
| 787 CPDF_Page page; | 782 CPDF_Page page; |
| 788 page.Load(pDoc, pPage); | 783 page.Load(pDoc, pPage); |
| 789 CPDF_ParseOptions options; | 784 CPDF_ParseOptions options; |
| 790 options.m_bTextOnly = TRUE; | 785 options.m_bTextOnly = TRUE; |
| 791 options.m_bSeparateForm = FALSE; | 786 options.m_bSeparateForm = FALSE; |
| 792 page.ParseContent(&options); | 787 page.ParseContent(&options); |
| 793 GetTextStream_Unicode(buffer, &page, TRUE, NULL); | 788 GetTextStream_Unicode(buffer, &page, TRUE, NULL); |
| 794 } | 789 } |
| OLD | NEW |