OLD | NEW |
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "pdf/pdfium/pdfium_page.h" | 5 #include "pdf/pdfium/pdfium_page.h" |
6 | 6 |
7 #include <math.h> | 7 #include <math.h> |
8 #include <stddef.h> | 8 #include <stddef.h> |
9 | 9 |
10 #include <algorithm> | 10 #include <algorithm> |
11 #include <memory> | 11 #include <memory> |
12 | 12 |
13 #include "base/logging.h" | 13 #include "base/logging.h" |
14 #include "base/strings/string_number_conversions.h" | 14 #include "base/strings/string_number_conversions.h" |
15 #include "base/strings/string_util.h" | 15 #include "base/strings/string_util.h" |
16 #include "base/strings/utf_string_conversions.h" | 16 #include "base/strings/utf_string_conversions.h" |
17 #include "base/values.h" | 17 #include "base/values.h" |
18 #include "pdf/pdfium/pdfium_api_string_buffer_adapter.h" | 18 #include "pdf/pdfium/pdfium_api_string_buffer_adapter.h" |
19 #include "pdf/pdfium/pdfium_engine.h" | 19 #include "pdf/pdfium/pdfium_engine.h" |
| 20 #include "printing/units.h" |
20 | 21 |
21 // Used when doing hit detection. | 22 // Used when doing hit detection. |
22 #define kTolerance 20.0 | 23 #define kTolerance 20.0 |
23 | 24 |
| 25 using printing::ConvertUnitDouble; |
| 26 using printing::kPointsPerInch; |
| 27 using printing::kPixelsPerInch; |
| 28 |
24 namespace { | 29 namespace { |
25 | 30 |
26 // Dictionary Value key names for returning the accessible page content as JSON. | 31 // Dictionary Value key names for returning the accessible page content as JSON. |
27 const char kPageWidth[] = "width"; | 32 const char kPageWidth[] = "width"; |
28 const char kPageHeight[] = "height"; | 33 const char kPageHeight[] = "height"; |
29 const char kPageTextBox[] = "textBox"; | 34 const char kPageTextBox[] = "textBox"; |
30 const char kTextBoxLeft[] = "left"; | 35 const char kTextBoxLeft[] = "left"; |
31 const char kTextBoxTop[] = "top"; | 36 const char kTextBoxTop[] = "top"; |
32 const char kTextBoxWidth[] = "width"; | 37 const char kTextBoxWidth[] = "width"; |
33 const char kTextBoxHeight[] = "height"; | 38 const char kTextBoxHeight[] = "height"; |
(...skipping 19 matching lines...) Expand all Loading... |
53 if (max_x < min_x) | 58 if (max_x < min_x) |
54 std::swap(min_x, max_x); | 59 std::swap(min_x, max_x); |
55 if (max_y < min_y) | 60 if (max_y < min_y) |
56 std::swap(min_y, max_y); | 61 std::swap(min_y, max_y); |
57 | 62 |
58 pp::Rect output_rect(min_x, min_y, max_x - min_x, max_y - min_y); | 63 pp::Rect output_rect(min_x, min_y, max_x - min_x, max_y - min_y); |
59 output_rect.Intersect(pp::Rect(0, 0, output_width, output_height)); | 64 output_rect.Intersect(pp::Rect(0, 0, output_width, output_height)); |
60 return output_rect; | 65 return output_rect; |
61 } | 66 } |
62 | 67 |
| 68 pp::FloatRect FloatPageRectToPixelRect(FPDF_PAGE page, |
| 69 const pp::FloatRect& input) { |
| 70 int output_width = FPDF_GetPageWidth(page); |
| 71 int output_height = FPDF_GetPageHeight(page); |
| 72 |
| 73 int min_x; |
| 74 int min_y; |
| 75 int max_x; |
| 76 int max_y; |
| 77 FPDF_PageToDevice(page, 0, 0, output_width, output_height, 0, input.x(), |
| 78 input.y(), &min_x, &min_y); |
| 79 FPDF_PageToDevice(page, 0, 0, output_width, output_height, 0, input.right(), |
| 80 input.bottom(), &max_x, &max_y); |
| 81 |
| 82 if (max_x < min_x) |
| 83 std::swap(min_x, max_x); |
| 84 if (max_y < min_y) |
| 85 std::swap(min_y, max_y); |
| 86 |
| 87 pp::FloatRect output_rect( |
| 88 ConvertUnitDouble(min_x, kPointsPerInch, kPixelsPerInch), |
| 89 ConvertUnitDouble(min_y, kPointsPerInch, kPixelsPerInch), |
| 90 ConvertUnitDouble(max_x - min_x, kPointsPerInch, kPixelsPerInch), |
| 91 ConvertUnitDouble(max_y - min_y, kPointsPerInch, kPixelsPerInch)); |
| 92 return output_rect; |
| 93 } |
| 94 |
63 pp::Rect GetCharRectInGViewCoords(FPDF_PAGE page, FPDF_TEXTPAGE text_page, | 95 pp::Rect GetCharRectInGViewCoords(FPDF_PAGE page, FPDF_TEXTPAGE text_page, |
64 int index) { | 96 int index) { |
65 double left, right, bottom, top; | 97 double left, right, bottom, top; |
66 FPDFText_GetCharBox(text_page, index, &left, &right, &bottom, &top); | 98 FPDFText_GetCharBox(text_page, index, &left, &right, &bottom, &top); |
67 if (right < left) | 99 if (right < left) |
68 std::swap(left, right); | 100 std::swap(left, right); |
69 if (bottom < top) | 101 if (bottom < top) |
70 std::swap(top, bottom); | 102 std::swap(top, bottom); |
71 pp::Rect page_coords(left, top, right - left, bottom - top); | 103 pp::Rect page_coords(left, top, right - left, bottom - top); |
72 return PageRectToGViewRect(page, page_coords); | 104 return PageRectToGViewRect(page, page_coords); |
73 } | 105 } |
74 | 106 |
| 107 pp::FloatRect GetFloatCharRectInPixels(FPDF_PAGE page, |
| 108 FPDF_TEXTPAGE text_page, |
| 109 int index) { |
| 110 double left, right, bottom, top; |
| 111 FPDFText_GetCharBox(text_page, index, &left, &right, &bottom, &top); |
| 112 if (right < left) |
| 113 std::swap(left, right); |
| 114 if (bottom < top) |
| 115 std::swap(top, bottom); |
| 116 pp::FloatRect page_coords(left, top, right - left, bottom - top); |
| 117 return FloatPageRectToPixelRect(page, page_coords); |
| 118 } |
| 119 |
75 // This is the character PDFium inserts where a word is broken across lines. | 120 // This is the character PDFium inserts where a word is broken across lines. |
76 const unsigned int kSoftHyphen = 0x02; | 121 const unsigned int kSoftHyphen = 0x02; |
77 | 122 |
78 // The following characters should all be recognized as Unicode newlines: | 123 // The following characters should all be recognized as Unicode newlines: |
79 // LF: Line Feed, U+000A | 124 // LF: Line Feed, U+000A |
80 // VT: Vertical Tab, U+000B | 125 // VT: Vertical Tab, U+000B |
81 // FF: Form Feed, U+000C | 126 // FF: Form Feed, U+000C |
82 // CR: Carriage Return, U+000D | 127 // CR: Carriage Return, U+000D |
83 // CR+LF: CR (U+000D) followed by LF (U+000A) | 128 // CR+LF: CR (U+000D) followed by LF (U+000A) |
84 // NEL: Next Line, U+0085 | 129 // NEL: Next Line, U+0085 |
85 // LS: Line Separator, U+2028 | 130 // LS: Line Separator, U+2028 |
86 // PS: Paragraph Separator, U+2029. | 131 // PS: Paragraph Separator, U+2029. |
87 // Source: http://en.wikipedia.org/wiki/Newline#Unicode . | 132 // Source: http://en.wikipedia.org/wiki/Newline#Unicode . |
88 const unsigned int kUnicodeNewlines[] = { | 133 const unsigned int kUnicodeNewlines[] = { |
89 0xA, 0xB, 0xC, 0xD, 0X85, 0x2028, 0x2029 | 134 0xA, 0xB, 0xC, 0xD, 0X85, 0x2028, 0x2029 |
90 }; | 135 }; |
91 | 136 |
92 bool IsSoftHyphen(unsigned int character) { | 137 bool IsSoftHyphen(unsigned int character) { |
93 return kSoftHyphen == character; | 138 return kSoftHyphen == character; |
94 } | 139 } |
95 | 140 |
96 bool OverlapsOnYAxis(const pp::Rect &a, const pp::Rect& b) { | 141 bool OverlapsOnYAxis(const pp::Rect &a, const pp::Rect& b) { |
97 return !(a.IsEmpty() || b.IsEmpty() || | 142 return !(a.IsEmpty() || b.IsEmpty() || |
98 a.bottom() < b.y() || b.bottom() < a.y()); | 143 a.bottom() < b.y() || b.bottom() < a.y()); |
99 } | 144 } |
100 | 145 |
| 146 bool OverlapsOnYAxis(const pp::FloatRect &a, const pp::FloatRect& b) { |
| 147 return !(a.IsEmpty() || b.IsEmpty() || |
| 148 a.bottom() < b.y() || b.bottom() < a.y()); |
| 149 } |
| 150 |
101 bool IsEol(unsigned int character) { | 151 bool IsEol(unsigned int character) { |
102 const unsigned int* first = kUnicodeNewlines; | 152 const unsigned int* first = kUnicodeNewlines; |
103 const unsigned int* last = kUnicodeNewlines + arraysize(kUnicodeNewlines); | 153 const unsigned int* last = kUnicodeNewlines + arraysize(kUnicodeNewlines); |
104 return std::find(first, last, character) != last; | 154 return std::find(first, last, character) != last; |
105 } | 155 } |
106 | 156 |
107 } // namespace | 157 } // namespace |
108 | 158 |
109 namespace chrome_pdf { | 159 namespace chrome_pdf { |
110 | 160 |
(...skipping 181 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
292 if (!base::IsUnicodeWhitespace(character)) | 342 if (!base::IsUnicodeWhitespace(character)) |
293 word_rect = word_rect.Union(char_rect); | 343 word_rect = word_rect.Union(char_rect); |
294 } | 344 } |
295 } | 345 } |
296 | 346 |
297 node->Set(kPageTextBox, text.release()); // Takes ownership of |text| | 347 node->Set(kPageTextBox, text.release()); // Takes ownership of |text| |
298 | 348 |
299 return node; | 349 return node; |
300 } | 350 } |
301 | 351 |
| 352 void PDFiumPage::GetTextRunInfo(int start_char_index, |
| 353 uint32_t* out_len, |
| 354 double* out_font_size, |
| 355 pp::FloatRect* out_bounds) { |
| 356 FPDF_PAGE page = GetPage(); |
| 357 FPDF_TEXTPAGE text_page = GetTextPage(); |
| 358 int chars_count = FPDFText_CountChars(text_page); |
| 359 int char_index = start_char_index; |
| 360 while ( |
| 361 char_index < chars_count && |
| 362 base::IsUnicodeWhitespace(FPDFText_GetUnicode(text_page, char_index))) { |
| 363 char_index++; |
| 364 } |
| 365 int text_run_font_size = FPDFText_GetFontSize(text_page, char_index); |
| 366 pp::FloatRect text_run_bounds = |
| 367 GetFloatCharRectInPixels(page, text_page, char_index); |
| 368 char_index++; |
| 369 while (char_index < chars_count) { |
| 370 unsigned int character = FPDFText_GetUnicode(text_page, char_index); |
| 371 |
| 372 if (!base::IsUnicodeWhitespace(character)) { |
| 373 // TODO(dmazzoni): this assumes horizontal text. |
| 374 // https://crbug.com/580311 |
| 375 pp::FloatRect char_rect = |
| 376 GetFloatCharRectInPixels(page, text_page, char_index); |
| 377 if (!char_rect.IsEmpty() && !OverlapsOnYAxis(text_run_bounds, char_rect)) |
| 378 break; |
| 379 |
| 380 int font_size = FPDFText_GetFontSize(text_page, char_index); |
| 381 if (font_size != text_run_font_size) |
| 382 break; |
| 383 |
| 384 text_run_bounds = text_run_bounds.Union(char_rect); |
| 385 } |
| 386 |
| 387 char_index++; |
| 388 } |
| 389 |
| 390 *out_len = char_index - start_char_index; |
| 391 *out_font_size = text_run_font_size; |
| 392 *out_bounds = text_run_bounds; |
| 393 } |
| 394 |
| 395 uint32_t PDFiumPage::GetCharUnicode(int char_index) { |
| 396 FPDF_TEXTPAGE text_page = GetTextPage(); |
| 397 return FPDFText_GetUnicode(text_page, char_index); |
| 398 } |
| 399 |
| 400 double PDFiumPage::GetCharWidth(int char_index) { |
| 401 FPDF_PAGE page = GetPage(); |
| 402 FPDF_TEXTPAGE text_page = GetTextPage(); |
| 403 return GetFloatCharRectInPixels(page, text_page, char_index).width(); |
| 404 } |
| 405 |
302 PDFiumPage::Area PDFiumPage::GetCharIndex(const pp::Point& point, | 406 PDFiumPage::Area PDFiumPage::GetCharIndex(const pp::Point& point, |
303 int rotation, | 407 int rotation, |
304 int* char_index, | 408 int* char_index, |
305 int* form_type, | 409 int* form_type, |
306 LinkTarget* target) { | 410 LinkTarget* target) { |
307 if (!available_) | 411 if (!available_) |
308 return NONSELECTABLE_AREA; | 412 return NONSELECTABLE_AREA; |
309 pp::Point point2 = point - rect_.point(); | 413 pp::Point point2 = point - rect_.point(); |
310 double new_x; | 414 double new_x; |
311 double new_y; | 415 double new_y; |
(...skipping 255 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
567 page_->loading_count_--; | 671 page_->loading_count_--; |
568 } | 672 } |
569 | 673 |
570 PDFiumPage::Link::Link() { | 674 PDFiumPage::Link::Link() { |
571 } | 675 } |
572 | 676 |
573 PDFiumPage::Link::~Link() { | 677 PDFiumPage::Link::~Link() { |
574 } | 678 } |
575 | 679 |
576 } // namespace chrome_pdf | 680 } // namespace chrome_pdf |
OLD | NEW |