OLD | NEW |
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "pdf/pdfium/pdfium_page.h" | 5 #include "pdf/pdfium/pdfium_page.h" |
6 | 6 |
7 #include <math.h> | 7 #include <math.h> |
8 #include <stddef.h> | 8 #include <stddef.h> |
9 | 9 |
10 #include <algorithm> | 10 #include <algorithm> |
(...skipping 354 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
365 int text_run_font_size = FPDFText_GetFontSize(text_page, char_index); | 365 int text_run_font_size = FPDFText_GetFontSize(text_page, char_index); |
366 pp::FloatRect text_run_bounds = | 366 pp::FloatRect text_run_bounds = |
367 GetFloatCharRectInPixels(page, text_page, char_index); | 367 GetFloatCharRectInPixels(page, text_page, char_index); |
368 char_index++; | 368 char_index++; |
369 while (char_index < chars_count) { | 369 while (char_index < chars_count) { |
370 unsigned int character = FPDFText_GetUnicode(text_page, char_index); | 370 unsigned int character = FPDFText_GetUnicode(text_page, char_index); |
371 | 371 |
372 if (!base::IsUnicodeWhitespace(character)) { | 372 if (!base::IsUnicodeWhitespace(character)) { |
373 // TODO(dmazzoni): this assumes horizontal text. | 373 // TODO(dmazzoni): this assumes horizontal text. |
374 // https://crbug.com/580311 | 374 // https://crbug.com/580311 |
375 pp::FloatRect char_rect = | 375 pp::FloatRect char_rect = GetFloatCharRectInPixels( |
376 GetFloatCharRectInPixels(page, text_page, char_index); | 376 page, text_page, char_index); |
377 if (!char_rect.IsEmpty() && !OverlapsOnYAxis(text_run_bounds, char_rect)) | 377 if (!char_rect.IsEmpty() && !OverlapsOnYAxis(text_run_bounds, char_rect)) |
378 break; | 378 break; |
379 | 379 |
380 int font_size = FPDFText_GetFontSize(text_page, char_index); | 380 int font_size = FPDFText_GetFontSize(text_page, char_index); |
381 if (font_size != text_run_font_size) | 381 if (font_size != text_run_font_size) |
382 break; | 382 break; |
383 | 383 |
| 384 // Heuristic: split a text run after a space longer than 3 average |
| 385 // characters. |
| 386 double avg_char_width = |
| 387 text_run_bounds.width() / (char_index - start_char_index); |
| 388 if (char_rect.x() - text_run_bounds.right() > avg_char_width * 3) |
| 389 break; |
| 390 |
384 text_run_bounds = text_run_bounds.Union(char_rect); | 391 text_run_bounds = text_run_bounds.Union(char_rect); |
385 } | 392 } |
386 | 393 |
387 char_index++; | 394 char_index++; |
388 } | 395 } |
389 | 396 |
| 397 // Some PDFs have missing or obviously bogus font sizes; substitute the |
| 398 // height of the bounding box in those cases. |
| 399 if (text_run_font_size <= 1 || |
| 400 text_run_font_size < text_run_bounds.height() / 2 || |
| 401 text_run_font_size > text_run_bounds.height() * 2) { |
| 402 text_run_font_size = text_run_bounds.height(); |
| 403 } |
| 404 |
390 *out_len = char_index - start_char_index; | 405 *out_len = char_index - start_char_index; |
391 *out_font_size = text_run_font_size; | 406 *out_font_size = text_run_font_size; |
392 *out_bounds = text_run_bounds; | 407 *out_bounds = text_run_bounds; |
393 } | 408 } |
394 | 409 |
395 uint32_t PDFiumPage::GetCharUnicode(int char_index) { | 410 uint32_t PDFiumPage::GetCharUnicode(int char_index) { |
396 FPDF_TEXTPAGE text_page = GetTextPage(); | 411 FPDF_TEXTPAGE text_page = GetTextPage(); |
397 return FPDFText_GetUnicode(text_page, char_index); | 412 return FPDFText_GetUnicode(text_page, char_index); |
398 } | 413 } |
399 | 414 |
400 double PDFiumPage::GetCharWidth(int char_index) { | 415 pp::FloatRect PDFiumPage::GetCharBounds(int char_index) { |
401 FPDF_PAGE page = GetPage(); | 416 FPDF_PAGE page = GetPage(); |
402 FPDF_TEXTPAGE text_page = GetTextPage(); | 417 FPDF_TEXTPAGE text_page = GetTextPage(); |
403 return GetFloatCharRectInPixels(page, text_page, char_index).width(); | 418 return GetFloatCharRectInPixels(page, text_page, char_index); |
404 } | 419 } |
405 | 420 |
406 PDFiumPage::Area PDFiumPage::GetCharIndex(const pp::Point& point, | 421 PDFiumPage::Area PDFiumPage::GetCharIndex(const pp::Point& point, |
407 int rotation, | 422 int rotation, |
408 int* char_index, | 423 int* char_index, |
409 int* form_type, | 424 int* form_type, |
410 LinkTarget* target) { | 425 LinkTarget* target) { |
411 if (!available_) | 426 if (!available_) |
412 return NONSELECTABLE_AREA; | 427 return NONSELECTABLE_AREA; |
413 pp::Point point2 = point - rect_.point(); | 428 pp::Point point2 = point - rect_.point(); |
(...skipping 257 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
671 page_->loading_count_--; | 686 page_->loading_count_--; |
672 } | 687 } |
673 | 688 |
674 PDFiumPage::Link::Link() { | 689 PDFiumPage::Link::Link() { |
675 } | 690 } |
676 | 691 |
677 PDFiumPage::Link::~Link() { | 692 PDFiumPage::Link::~Link() { |
678 } | 693 } |
679 | 694 |
680 } // namespace chrome_pdf | 695 } // namespace chrome_pdf |
OLD | NEW |