Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "pdf/pdfium/pdfium_page.h" | 5 #include "pdf/pdfium/pdfium_page.h" |
| 6 | 6 |
| 7 #include <math.h> | 7 #include <math.h> |
| 8 #include <stddef.h> | 8 #include <stddef.h> |
| 9 | 9 |
| 10 #include <algorithm> | 10 #include <algorithm> |
| 11 #include <memory> | 11 #include <memory> |
| 12 | 12 |
| 13 #include "base/logging.h" | 13 #include "base/logging.h" |
| 14 #include "base/strings/string_number_conversions.h" | 14 #include "base/strings/string_number_conversions.h" |
| 15 #include "base/strings/string_util.h" | 15 #include "base/strings/string_util.h" |
| 16 #include "base/strings/utf_string_conversions.h" | 16 #include "base/strings/utf_string_conversions.h" |
| 17 #include "base/values.h" | 17 #include "base/values.h" |
| 18 #include "pdf/pdfium/pdfium_api_string_buffer_adapter.h" | 18 #include "pdf/pdfium/pdfium_api_string_buffer_adapter.h" |
| 19 #include "pdf/pdfium/pdfium_engine.h" | 19 #include "pdf/pdfium/pdfium_engine.h" |
| 20 #include "printing/units.h" | |
| 20 | 21 |
| 21 // Used when doing hit detection. | 22 // Used when doing hit detection. |
| 22 #define kTolerance 20.0 | 23 #define kTolerance 20.0 |
| 23 | 24 |
| 25 using printing::ConvertUnitDouble; | |
| 26 using printing::kPointsPerInch; | |
| 27 using printing::kPixelsPerInch; | |
| 28 | |
| 24 namespace { | 29 namespace { |
| 25 | 30 |
| 26 // Dictionary Value key names for returning the accessible page content as JSON. | 31 // Dictionary Value key names for returning the accessible page content as JSON. |
| 27 const char kPageWidth[] = "width"; | 32 const char kPageWidth[] = "width"; |
| 28 const char kPageHeight[] = "height"; | 33 const char kPageHeight[] = "height"; |
| 29 const char kPageTextBox[] = "textBox"; | 34 const char kPageTextBox[] = "textBox"; |
| 30 const char kTextBoxLeft[] = "left"; | 35 const char kTextBoxLeft[] = "left"; |
| 31 const char kTextBoxTop[] = "top"; | 36 const char kTextBoxTop[] = "top"; |
| 32 const char kTextBoxWidth[] = "width"; | 37 const char kTextBoxWidth[] = "width"; |
| 33 const char kTextBoxHeight[] = "height"; | 38 const char kTextBoxHeight[] = "height"; |
| (...skipping 19 matching lines...) Expand all Loading... | |
| 53 if (max_x < min_x) | 58 if (max_x < min_x) |
| 54 std::swap(min_x, max_x); | 59 std::swap(min_x, max_x); |
| 55 if (max_y < min_y) | 60 if (max_y < min_y) |
| 56 std::swap(min_y, max_y); | 61 std::swap(min_y, max_y); |
| 57 | 62 |
| 58 pp::Rect output_rect(min_x, min_y, max_x - min_x, max_y - min_y); | 63 pp::Rect output_rect(min_x, min_y, max_x - min_x, max_y - min_y); |
| 59 output_rect.Intersect(pp::Rect(0, 0, output_width, output_height)); | 64 output_rect.Intersect(pp::Rect(0, 0, output_width, output_height)); |
| 60 return output_rect; | 65 return output_rect; |
| 61 } | 66 } |
| 62 | 67 |
| 68 pp::FloatRect FloatPageRectToPixelRect( | |
| 69 FPDF_PAGE page, const pp::FloatRect& input) { | |
| 70 int output_width = FPDF_GetPageWidth(page); | |
| 71 int output_height = FPDF_GetPageHeight(page); | |
| 72 | |
| 73 int min_x; | |
| 74 int min_y; | |
| 75 int max_x; | |
| 76 int max_y; | |
| 77 FPDF_PageToDevice(page, 0, 0, output_width, output_height, 0, | |
| 78 input.x(), input.y(), &min_x, &min_y); | |
| 79 FPDF_PageToDevice(page, 0, 0, output_width, output_height, 0, | |
| 80 input.right(), input.bottom(), &max_x, &max_y); | |
| 81 | |
| 82 if (max_x < min_x) | |
| 83 std::swap(min_x, max_x); | |
| 84 if (max_y < min_y) | |
| 85 std::swap(min_y, max_y); | |
| 86 | |
| 87 pp::FloatRect output_rect( | |
| 88 ConvertUnitDouble(min_x, kPointsPerInch, kPixelsPerInch), | |
| 89 ConvertUnitDouble(min_y, kPointsPerInch, kPixelsPerInch), | |
| 90 ConvertUnitDouble(max_x - min_x, kPointsPerInch, kPixelsPerInch), | |
| 91 ConvertUnitDouble(max_y - min_y, kPointsPerInch, kPixelsPerInch)); | |
| 92 /* | |
| 93 output_rect.Intersect(pp::FloatRect( | |
| 94 0, | |
| 95 0, | |
| 96 ConvertUnitDouble(output_width, kPointsPerInch, kPixelsPerInch), | |
| 97 ConvertUnitDouble(output_height, kPointsPerInch, kPixelsPerInch))); | |
| 98 */ | |
| 99 return output_rect; | |
| 100 } | |
| 101 | |
| 63 pp::Rect GetCharRectInGViewCoords(FPDF_PAGE page, FPDF_TEXTPAGE text_page, | 102 pp::Rect GetCharRectInGViewCoords(FPDF_PAGE page, FPDF_TEXTPAGE text_page, |
| 64 int index) { | 103 int index) { |
| 65 double left, right, bottom, top; | 104 double left, right, bottom, top; |
| 66 FPDFText_GetCharBox(text_page, index, &left, &right, &bottom, &top); | 105 FPDFText_GetCharBox(text_page, index, &left, &right, &bottom, &top); |
| 67 if (right < left) | 106 if (right < left) |
| 68 std::swap(left, right); | 107 std::swap(left, right); |
| 69 if (bottom < top) | 108 if (bottom < top) |
| 70 std::swap(top, bottom); | 109 std::swap(top, bottom); |
| 71 pp::Rect page_coords(left, top, right - left, bottom - top); | 110 pp::Rect page_coords(left, top, right - left, bottom - top); |
| 72 return PageRectToGViewRect(page, page_coords); | 111 return PageRectToGViewRect(page, page_coords); |
| 73 } | 112 } |
| 74 | 113 |
| 114 pp::FloatRect GetFloatCharRectInPixels( | |
| 115 FPDF_PAGE page, FPDF_TEXTPAGE text_page, int index) { | |
| 116 double left, right, bottom, top; | |
| 117 FPDFText_GetCharBox(text_page, index, &left, &right, &bottom, &top); | |
| 118 if (right < left) | |
| 119 std::swap(left, right); | |
| 120 if (bottom < top) | |
| 121 std::swap(top, bottom); | |
| 122 pp::FloatRect page_coords(left, top, right - left, bottom - top); | |
| 123 return FloatPageRectToPixelRect(page, page_coords); | |
| 124 } | |
| 125 | |
| 75 // This is the character PDFium inserts where a word is broken across lines. | 126 // This is the character PDFium inserts where a word is broken across lines. |
| 76 const unsigned int kSoftHyphen = 0x02; | 127 const unsigned int kSoftHyphen = 0x02; |
| 77 | 128 |
| 78 // The following characters should all be recognized as Unicode newlines: | 129 // The following characters should all be recognized as Unicode newlines: |
| 79 // LF: Line Feed, U+000A | 130 // LF: Line Feed, U+000A |
| 80 // VT: Vertical Tab, U+000B | 131 // VT: Vertical Tab, U+000B |
| 81 // FF: Form Feed, U+000C | 132 // FF: Form Feed, U+000C |
| 82 // CR: Carriage Return, U+000D | 133 // CR: Carriage Return, U+000D |
| 83 // CR+LF: CR (U+000D) followed by LF (U+000A) | 134 // CR+LF: CR (U+000D) followed by LF (U+000A) |
| 84 // NEL: Next Line, U+0085 | 135 // NEL: Next Line, U+0085 |
| 85 // LS: Line Separator, U+2028 | 136 // LS: Line Separator, U+2028 |
| 86 // PS: Paragraph Separator, U+2029. | 137 // PS: Paragraph Separator, U+2029. |
| 87 // Source: http://en.wikipedia.org/wiki/Newline#Unicode . | 138 // Source: http://en.wikipedia.org/wiki/Newline#Unicode . |
| 88 const unsigned int kUnicodeNewlines[] = { | 139 const unsigned int kUnicodeNewlines[] = { |
| 89 0xA, 0xB, 0xC, 0xD, 0X85, 0x2028, 0x2029 | 140 0xA, 0xB, 0xC, 0xD, 0X85, 0x2028, 0x2029 |
| 90 }; | 141 }; |
| 91 | 142 |
| 92 bool IsSoftHyphen(unsigned int character) { | 143 bool IsSoftHyphen(unsigned int character) { |
| 93 return kSoftHyphen == character; | 144 return kSoftHyphen == character; |
| 94 } | 145 } |
| 95 | 146 |
| 96 bool OverlapsOnYAxis(const pp::Rect &a, const pp::Rect& b) { | 147 bool OverlapsOnYAxis(const pp::Rect &a, const pp::Rect& b) { |
| 97 return !(a.IsEmpty() || b.IsEmpty() || | 148 return !(a.IsEmpty() || b.IsEmpty() || |
| 98 a.bottom() < b.y() || b.bottom() < a.y()); | 149 a.bottom() < b.y() || b.bottom() < a.y()); |
| 99 } | 150 } |
| 100 | 151 |
| 152 bool OverlapsOnYAxis(const pp::FloatRect &a, const pp::FloatRect& b) { | |
| 153 return !(a.IsEmpty() || b.IsEmpty() || | |
| 154 a.bottom() < b.y() || b.bottom() < a.y()); | |
| 155 } | |
| 156 | |
| 101 bool IsEol(unsigned int character) { | 157 bool IsEol(unsigned int character) { |
| 102 const unsigned int* first = kUnicodeNewlines; | 158 const unsigned int* first = kUnicodeNewlines; |
| 103 const unsigned int* last = kUnicodeNewlines + arraysize(kUnicodeNewlines); | 159 const unsigned int* last = kUnicodeNewlines + arraysize(kUnicodeNewlines); |
| 104 return std::find(first, last, character) != last; | 160 return std::find(first, last, character) != last; |
| 105 } | 161 } |
| 106 | 162 |
| 107 } // namespace | 163 } // namespace |
| 108 | 164 |
| 109 namespace chrome_pdf { | 165 namespace chrome_pdf { |
| 110 | 166 |
| (...skipping 181 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 292 if (!base::IsUnicodeWhitespace(character)) | 348 if (!base::IsUnicodeWhitespace(character)) |
| 293 word_rect = word_rect.Union(char_rect); | 349 word_rect = word_rect.Union(char_rect); |
| 294 } | 350 } |
| 295 } | 351 } |
| 296 | 352 |
| 297 node->Set(kPageTextBox, text.release()); // Takes ownership of |text| | 353 node->Set(kPageTextBox, text.release()); // Takes ownership of |text| |
| 298 | 354 |
| 299 return node; | 355 return node; |
| 300 } | 356 } |
| 301 | 357 |
| 358 bool PDFiumPage::GetTextRunInfo( | |
|
Lei Zhang
2016/05/09 23:58:49
Can this be void since it always returns true? The
dmazzoni
2016/05/10 22:09:18
Done.
| |
| 359 int start_char_index, | |
| 360 uint32_t* out_len, | |
| 361 double* out_font_size, | |
| 362 pp::FloatRect* out_bounds) { | |
| 363 FPDF_PAGE page = GetPage(); | |
| 364 FPDF_TEXTPAGE text_page = GetTextPage(); | |
| 365 int chars_count = FPDFText_CountChars(text_page); | |
| 366 int char_index = start_char_index; | |
| 367 while (char_index < chars_count && | |
| 368 base::IsUnicodeWhitespace( | |
| 369 FPDFText_GetUnicode(text_page, char_index))) { | |
| 370 char_index++; | |
| 371 } | |
| 372 int text_run_font_size = FPDFText_GetFontSize(text_page, char_index); | |
| 373 pp::FloatRect text_run_bounds = GetFloatCharRectInPixels( | |
| 374 page, text_page, char_index); | |
| 375 char_index++; | |
| 376 while (char_index < chars_count) { | |
| 377 unsigned int character = FPDFText_GetUnicode(text_page, char_index); | |
| 378 | |
| 379 if (!base::IsUnicodeWhitespace(character)) { | |
| 380 // TODO(dmazzoni): this assumes horizontal text. | |
| 381 // https://crbug.com/580311 | |
| 382 pp::FloatRect char_rect = GetFloatCharRectInPixels( | |
| 383 page, text_page, char_index); | |
| 384 if (!char_rect.IsEmpty() && !OverlapsOnYAxis(text_run_bounds, char_rect)) | |
| 385 break; | |
| 386 | |
| 387 int font_size = FPDFText_GetFontSize(text_page, char_index); | |
| 388 if (font_size != text_run_font_size) | |
| 389 break; | |
| 390 | |
| 391 text_run_bounds = text_run_bounds.Union(char_rect); | |
| 392 } | |
| 393 | |
| 394 char_index++; | |
| 395 } | |
| 396 | |
| 397 *out_len = char_index - start_char_index; | |
| 398 *out_font_size = text_run_font_size; | |
| 399 *out_bounds = text_run_bounds; | |
| 400 return true; | |
| 401 } | |
| 402 | |
| 403 double PDFiumPage::GetCharWidth(int char_index) { | |
| 404 FPDF_PAGE page = GetPage(); | |
| 405 FPDF_TEXTPAGE text_page = GetTextPage(); | |
| 406 return GetFloatCharRectInPixels(page, text_page, char_index).width(); | |
| 407 } | |
| 408 | |
| 302 PDFiumPage::Area PDFiumPage::GetCharIndex(const pp::Point& point, | 409 PDFiumPage::Area PDFiumPage::GetCharIndex(const pp::Point& point, |
| 303 int rotation, | 410 int rotation, |
| 304 int* char_index, | 411 int* char_index, |
| 305 int* form_type, | 412 int* form_type, |
| 306 LinkTarget* target) { | 413 LinkTarget* target) { |
| 307 if (!available_) | 414 if (!available_) |
| 308 return NONSELECTABLE_AREA; | 415 return NONSELECTABLE_AREA; |
| 309 pp::Point point2 = point - rect_.point(); | 416 pp::Point point2 = point - rect_.point(); |
| 310 double new_x; | 417 double new_x; |
| 311 double new_y; | 418 double new_y; |
| (...skipping 255 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 567 page_->loading_count_--; | 674 page_->loading_count_--; |
| 568 } | 675 } |
| 569 | 676 |
| 570 PDFiumPage::Link::Link() { | 677 PDFiumPage::Link::Link() { |
| 571 } | 678 } |
| 572 | 679 |
| 573 PDFiumPage::Link::~Link() { | 680 PDFiumPage::Link::~Link() { |
| 574 } | 681 } |
| 575 | 682 |
| 576 } // namespace chrome_pdf | 683 } // namespace chrome_pdf |
| OLD | NEW |