OLD | NEW |
---|---|
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "pdf/pdfium/pdfium_page.h" | 5 #include "pdf/pdfium/pdfium_page.h" |
6 | 6 |
7 #include <math.h> | 7 #include <math.h> |
8 #include <stddef.h> | 8 #include <stddef.h> |
9 | 9 |
10 #include <algorithm> | 10 #include <algorithm> |
11 #include <memory> | 11 #include <memory> |
12 | 12 |
13 #include "base/logging.h" | 13 #include "base/logging.h" |
14 #include "base/strings/string_number_conversions.h" | 14 #include "base/strings/string_number_conversions.h" |
15 #include "base/strings/string_util.h" | 15 #include "base/strings/string_util.h" |
16 #include "base/strings/utf_string_conversions.h" | 16 #include "base/strings/utf_string_conversions.h" |
17 #include "base/values.h" | 17 #include "base/values.h" |
18 #include "pdf/pdfium/pdfium_api_string_buffer_adapter.h" | 18 #include "pdf/pdfium/pdfium_api_string_buffer_adapter.h" |
19 #include "pdf/pdfium/pdfium_engine.h" | 19 #include "pdf/pdfium/pdfium_engine.h" |
20 #include "printing/units.h" | |
20 | 21 |
21 // Used when doing hit detection. | 22 // Used when doing hit detection. |
22 #define kTolerance 20.0 | 23 #define kTolerance 20.0 |
23 | 24 |
25 using printing::ConvertUnitDouble; | |
26 using printing::kPointsPerInch; | |
27 using printing::kPixelsPerInch; | |
28 | |
24 namespace { | 29 namespace { |
25 | 30 |
26 // Dictionary Value key names for returning the accessible page content as JSON. | 31 // Dictionary Value key names for returning the accessible page content as JSON. |
27 const char kPageWidth[] = "width"; | 32 const char kPageWidth[] = "width"; |
28 const char kPageHeight[] = "height"; | 33 const char kPageHeight[] = "height"; |
29 const char kPageTextBox[] = "textBox"; | 34 const char kPageTextBox[] = "textBox"; |
30 const char kTextBoxLeft[] = "left"; | 35 const char kTextBoxLeft[] = "left"; |
31 const char kTextBoxTop[] = "top"; | 36 const char kTextBoxTop[] = "top"; |
32 const char kTextBoxWidth[] = "width"; | 37 const char kTextBoxWidth[] = "width"; |
33 const char kTextBoxHeight[] = "height"; | 38 const char kTextBoxHeight[] = "height"; |
(...skipping 19 matching lines...) Expand all Loading... | |
53 if (max_x < min_x) | 58 if (max_x < min_x) |
54 std::swap(min_x, max_x); | 59 std::swap(min_x, max_x); |
55 if (max_y < min_y) | 60 if (max_y < min_y) |
56 std::swap(min_y, max_y); | 61 std::swap(min_y, max_y); |
57 | 62 |
58 pp::Rect output_rect(min_x, min_y, max_x - min_x, max_y - min_y); | 63 pp::Rect output_rect(min_x, min_y, max_x - min_x, max_y - min_y); |
59 output_rect.Intersect(pp::Rect(0, 0, output_width, output_height)); | 64 output_rect.Intersect(pp::Rect(0, 0, output_width, output_height)); |
60 return output_rect; | 65 return output_rect; |
61 } | 66 } |
62 | 67 |
68 pp::FloatRect FloatPageRectToPixelRect( | |
69 FPDF_PAGE page, const pp::FloatRect& input) { | |
70 int output_width = FPDF_GetPageWidth(page); | |
71 int output_height = FPDF_GetPageHeight(page); | |
72 | |
73 int min_x; | |
74 int min_y; | |
75 int max_x; | |
76 int max_y; | |
77 FPDF_PageToDevice(page, 0, 0, output_width, output_height, 0, | |
78 input.x(), input.y(), &min_x, &min_y); | |
79 FPDF_PageToDevice(page, 0, 0, output_width, output_height, 0, | |
80 input.right(), input.bottom(), &max_x, &max_y); | |
81 | |
82 if (max_x < min_x) | |
83 std::swap(min_x, max_x); | |
84 if (max_y < min_y) | |
85 std::swap(min_y, max_y); | |
86 | |
87 pp::FloatRect output_rect( | |
88 ConvertUnitDouble(min_x, kPointsPerInch, kPixelsPerInch), | |
89 ConvertUnitDouble(min_y, kPointsPerInch, kPixelsPerInch), | |
90 ConvertUnitDouble(max_x - min_x, kPointsPerInch, kPixelsPerInch), | |
91 ConvertUnitDouble(max_y - min_y, kPointsPerInch, kPixelsPerInch)); | |
92 /* | |
93 output_rect.Intersect(pp::FloatRect( | |
94 0, | |
95 0, | |
96 ConvertUnitDouble(output_width, kPointsPerInch, kPixelsPerInch), | |
97 ConvertUnitDouble(output_height, kPointsPerInch, kPixelsPerInch))); | |
98 */ | |
99 return output_rect; | |
100 } | |
101 | |
63 pp::Rect GetCharRectInGViewCoords(FPDF_PAGE page, FPDF_TEXTPAGE text_page, | 102 pp::Rect GetCharRectInGViewCoords(FPDF_PAGE page, FPDF_TEXTPAGE text_page, |
64 int index) { | 103 int index) { |
65 double left, right, bottom, top; | 104 double left, right, bottom, top; |
66 FPDFText_GetCharBox(text_page, index, &left, &right, &bottom, &top); | 105 FPDFText_GetCharBox(text_page, index, &left, &right, &bottom, &top); |
67 if (right < left) | 106 if (right < left) |
68 std::swap(left, right); | 107 std::swap(left, right); |
69 if (bottom < top) | 108 if (bottom < top) |
70 std::swap(top, bottom); | 109 std::swap(top, bottom); |
71 pp::Rect page_coords(left, top, right - left, bottom - top); | 110 pp::Rect page_coords(left, top, right - left, bottom - top); |
72 return PageRectToGViewRect(page, page_coords); | 111 return PageRectToGViewRect(page, page_coords); |
73 } | 112 } |
74 | 113 |
114 pp::FloatRect GetFloatCharRectInPixels( | |
115 FPDF_PAGE page, FPDF_TEXTPAGE text_page, int index) { | |
116 double left, right, bottom, top; | |
117 FPDFText_GetCharBox(text_page, index, &left, &right, &bottom, &top); | |
118 if (right < left) | |
119 std::swap(left, right); | |
120 if (bottom < top) | |
121 std::swap(top, bottom); | |
122 pp::FloatRect page_coords(left, top, right - left, bottom - top); | |
123 return FloatPageRectToPixelRect(page, page_coords); | |
124 } | |
125 | |
75 // This is the character PDFium inserts where a word is broken across lines. | 126 // This is the character PDFium inserts where a word is broken across lines. |
76 const unsigned int kSoftHyphen = 0x02; | 127 const unsigned int kSoftHyphen = 0x02; |
77 | 128 |
78 // The following characters should all be recognized as Unicode newlines: | 129 // The following characters should all be recognized as Unicode newlines: |
79 // LF: Line Feed, U+000A | 130 // LF: Line Feed, U+000A |
80 // VT: Vertical Tab, U+000B | 131 // VT: Vertical Tab, U+000B |
81 // FF: Form Feed, U+000C | 132 // FF: Form Feed, U+000C |
82 // CR: Carriage Return, U+000D | 133 // CR: Carriage Return, U+000D |
83 // CR+LF: CR (U+000D) followed by LF (U+000A) | 134 // CR+LF: CR (U+000D) followed by LF (U+000A) |
84 // NEL: Next Line, U+0085 | 135 // NEL: Next Line, U+0085 |
85 // LS: Line Separator, U+2028 | 136 // LS: Line Separator, U+2028 |
86 // PS: Paragraph Separator, U+2029. | 137 // PS: Paragraph Separator, U+2029. |
87 // Source: http://en.wikipedia.org/wiki/Newline#Unicode . | 138 // Source: http://en.wikipedia.org/wiki/Newline#Unicode . |
88 const unsigned int kUnicodeNewlines[] = { | 139 const unsigned int kUnicodeNewlines[] = { |
89 0xA, 0xB, 0xC, 0xD, 0X85, 0x2028, 0x2029 | 140 0xA, 0xB, 0xC, 0xD, 0X85, 0x2028, 0x2029 |
90 }; | 141 }; |
91 | 142 |
92 bool IsSoftHyphen(unsigned int character) { | 143 bool IsSoftHyphen(unsigned int character) { |
93 return kSoftHyphen == character; | 144 return kSoftHyphen == character; |
94 } | 145 } |
95 | 146 |
96 bool OverlapsOnYAxis(const pp::Rect &a, const pp::Rect& b) { | 147 bool OverlapsOnYAxis(const pp::Rect &a, const pp::Rect& b) { |
97 return !(a.IsEmpty() || b.IsEmpty() || | 148 return !(a.IsEmpty() || b.IsEmpty() || |
98 a.bottom() < b.y() || b.bottom() < a.y()); | 149 a.bottom() < b.y() || b.bottom() < a.y()); |
99 } | 150 } |
100 | 151 |
152 bool OverlapsOnYAxis(const pp::FloatRect &a, const pp::FloatRect& b) { | |
153 return !(a.IsEmpty() || b.IsEmpty() || | |
154 a.bottom() < b.y() || b.bottom() < a.y()); | |
155 } | |
156 | |
101 bool IsEol(unsigned int character) { | 157 bool IsEol(unsigned int character) { |
102 const unsigned int* first = kUnicodeNewlines; | 158 const unsigned int* first = kUnicodeNewlines; |
103 const unsigned int* last = kUnicodeNewlines + arraysize(kUnicodeNewlines); | 159 const unsigned int* last = kUnicodeNewlines + arraysize(kUnicodeNewlines); |
104 return std::find(first, last, character) != last; | 160 return std::find(first, last, character) != last; |
105 } | 161 } |
106 | 162 |
107 } // namespace | 163 } // namespace |
108 | 164 |
109 namespace chrome_pdf { | 165 namespace chrome_pdf { |
110 | 166 |
(...skipping 181 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
292 if (!base::IsUnicodeWhitespace(character)) | 348 if (!base::IsUnicodeWhitespace(character)) |
293 word_rect = word_rect.Union(char_rect); | 349 word_rect = word_rect.Union(char_rect); |
294 } | 350 } |
295 } | 351 } |
296 | 352 |
297 node->Set(kPageTextBox, text.release()); // Takes ownership of |text| | 353 node->Set(kPageTextBox, text.release()); // Takes ownership of |text| |
298 | 354 |
299 return node; | 355 return node; |
300 } | 356 } |
301 | 357 |
358 bool PDFiumPage::GetTextRunInfo( | |
Lei Zhang
2016/05/09 23:58:49
Can this be void since it always returns true? The
dmazzoni
2016/05/10 22:09:18
Done.
| |
359 int start_char_index, | |
360 uint32_t* out_len, | |
361 double* out_font_size, | |
362 pp::FloatRect* out_bounds) { | |
363 FPDF_PAGE page = GetPage(); | |
364 FPDF_TEXTPAGE text_page = GetTextPage(); | |
365 int chars_count = FPDFText_CountChars(text_page); | |
366 int char_index = start_char_index; | |
367 while (char_index < chars_count && | |
368 base::IsUnicodeWhitespace( | |
369 FPDFText_GetUnicode(text_page, char_index))) { | |
370 char_index++; | |
371 } | |
372 int text_run_font_size = FPDFText_GetFontSize(text_page, char_index); | |
373 pp::FloatRect text_run_bounds = GetFloatCharRectInPixels( | |
374 page, text_page, char_index); | |
375 char_index++; | |
376 while (char_index < chars_count) { | |
377 unsigned int character = FPDFText_GetUnicode(text_page, char_index); | |
378 | |
379 if (!base::IsUnicodeWhitespace(character)) { | |
380 // TODO(dmazzoni): this assumes horizontal text. | |
381 // https://crbug.com/580311 | |
382 pp::FloatRect char_rect = GetFloatCharRectInPixels( | |
383 page, text_page, char_index); | |
384 if (!char_rect.IsEmpty() && !OverlapsOnYAxis(text_run_bounds, char_rect)) | |
385 break; | |
386 | |
387 int font_size = FPDFText_GetFontSize(text_page, char_index); | |
388 if (font_size != text_run_font_size) | |
389 break; | |
390 | |
391 text_run_bounds = text_run_bounds.Union(char_rect); | |
392 } | |
393 | |
394 char_index++; | |
395 } | |
396 | |
397 *out_len = char_index - start_char_index; | |
398 *out_font_size = text_run_font_size; | |
399 *out_bounds = text_run_bounds; | |
400 return true; | |
401 } | |
402 | |
403 double PDFiumPage::GetCharWidth(int char_index) { | |
404 FPDF_PAGE page = GetPage(); | |
405 FPDF_TEXTPAGE text_page = GetTextPage(); | |
406 return GetFloatCharRectInPixels(page, text_page, char_index).width(); | |
407 } | |
408 | |
302 PDFiumPage::Area PDFiumPage::GetCharIndex(const pp::Point& point, | 409 PDFiumPage::Area PDFiumPage::GetCharIndex(const pp::Point& point, |
303 int rotation, | 410 int rotation, |
304 int* char_index, | 411 int* char_index, |
305 int* form_type, | 412 int* form_type, |
306 LinkTarget* target) { | 413 LinkTarget* target) { |
307 if (!available_) | 414 if (!available_) |
308 return NONSELECTABLE_AREA; | 415 return NONSELECTABLE_AREA; |
309 pp::Point point2 = point - rect_.point(); | 416 pp::Point point2 = point - rect_.point(); |
310 double new_x; | 417 double new_x; |
311 double new_y; | 418 double new_y; |
(...skipping 255 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
567 page_->loading_count_--; | 674 page_->loading_count_--; |
568 } | 675 } |
569 | 676 |
570 PDFiumPage::Link::Link() { | 677 PDFiumPage::Link::Link() { |
571 } | 678 } |
572 | 679 |
573 PDFiumPage::Link::~Link() { | 680 PDFiumPage::Link::~Link() { |
574 } | 681 } |
575 | 682 |
576 } // namespace chrome_pdf | 683 } // namespace chrome_pdf |
OLD | NEW |