OLD | NEW |
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "pdf/pdfium/pdfium_page.h" | 5 #include "pdf/pdfium/pdfium_page.h" |
6 | 6 |
7 #include <math.h> | 7 #include <math.h> |
8 #include <stddef.h> | 8 #include <stddef.h> |
9 | 9 |
10 #include <algorithm> | 10 #include <algorithm> |
11 #include <memory> | 11 #include <memory> |
12 #include <utility> | 12 #include <utility> |
13 | 13 |
14 #include "base/logging.h" | 14 #include "base/logging.h" |
15 #include "base/strings/string_number_conversions.h" | 15 #include "base/strings/string_number_conversions.h" |
16 #include "base/strings/string_util.h" | 16 #include "base/strings/string_util.h" |
17 #include "base/strings/utf_string_conversions.h" | 17 #include "base/strings/utf_string_conversions.h" |
18 #include "base/values.h" | |
19 #include "pdf/pdfium/pdfium_api_string_buffer_adapter.h" | 18 #include "pdf/pdfium/pdfium_api_string_buffer_adapter.h" |
20 #include "pdf/pdfium/pdfium_engine.h" | 19 #include "pdf/pdfium/pdfium_engine.h" |
21 #include "printing/units.h" | 20 #include "printing/units.h" |
22 | 21 |
23 // Used when doing hit detection. | 22 // Used when doing hit detection. |
24 #define kTolerance 20.0 | 23 #define kTolerance 20.0 |
25 | 24 |
26 using printing::ConvertUnitDouble; | 25 using printing::ConvertUnitDouble; |
27 using printing::kPointsPerInch; | 26 using printing::kPointsPerInch; |
28 using printing::kPixelsPerInch; | 27 using printing::kPixelsPerInch; |
29 | 28 |
30 namespace { | 29 namespace { |
31 | 30 |
32 // Dictionary Value key names for returning the accessible page content as JSON. | |
33 const char kPageWidth[] = "width"; | |
34 const char kPageHeight[] = "height"; | |
35 const char kPageTextBox[] = "textBox"; | |
36 const char kTextBoxLeft[] = "left"; | |
37 const char kTextBoxTop[] = "top"; | |
38 const char kTextBoxWidth[] = "width"; | |
39 const char kTextBoxHeight[] = "height"; | |
40 const char kTextBoxFontSize[] = "fontSize"; | |
41 const char kTextBoxNodes[] = "textNodes"; | |
42 const char kTextNodeType[] = "type"; | |
43 const char kTextNodeText[] = "text"; | |
44 const char kTextNodeTypeText[] = "text"; | |
45 | |
46 pp::Rect PageRectToGViewRect(FPDF_PAGE page, const pp::Rect& input) { | |
47 int output_width = FPDF_GetPageWidth(page); | |
48 int output_height = FPDF_GetPageHeight(page); | |
49 | |
50 int min_x; | |
51 int min_y; | |
52 int max_x; | |
53 int max_y; | |
54 FPDF_PageToDevice(page, 0, 0, output_width, output_height, 0, | |
55 input.x(), input.y(), &min_x, &min_y); | |
56 FPDF_PageToDevice(page, 0, 0, output_width, output_height, 0, | |
57 input.right(), input.bottom(), &max_x, &max_y); | |
58 | |
59 if (max_x < min_x) | |
60 std::swap(min_x, max_x); | |
61 if (max_y < min_y) | |
62 std::swap(min_y, max_y); | |
63 | |
64 pp::Rect output_rect(min_x, min_y, max_x - min_x, max_y - min_y); | |
65 output_rect.Intersect(pp::Rect(0, 0, output_width, output_height)); | |
66 return output_rect; | |
67 } | |
68 | |
69 pp::FloatRect FloatPageRectToPixelRect(FPDF_PAGE page, | 31 pp::FloatRect FloatPageRectToPixelRect(FPDF_PAGE page, |
70 const pp::FloatRect& input) { | 32 const pp::FloatRect& input) { |
71 int output_width = FPDF_GetPageWidth(page); | 33 int output_width = FPDF_GetPageWidth(page); |
72 int output_height = FPDF_GetPageHeight(page); | 34 int output_height = FPDF_GetPageHeight(page); |
73 | 35 |
74 int min_x; | 36 int min_x; |
75 int min_y; | 37 int min_y; |
76 int max_x; | 38 int max_x; |
77 int max_y; | 39 int max_y; |
78 FPDF_PageToDevice(page, 0, 0, output_width, output_height, 0, input.x(), | 40 FPDF_PageToDevice(page, 0, 0, output_width, output_height, 0, input.x(), |
79 input.y(), &min_x, &min_y); | 41 input.y(), &min_x, &min_y); |
80 FPDF_PageToDevice(page, 0, 0, output_width, output_height, 0, input.right(), | 42 FPDF_PageToDevice(page, 0, 0, output_width, output_height, 0, input.right(), |
81 input.bottom(), &max_x, &max_y); | 43 input.bottom(), &max_x, &max_y); |
82 | 44 |
83 if (max_x < min_x) | 45 if (max_x < min_x) |
84 std::swap(min_x, max_x); | 46 std::swap(min_x, max_x); |
85 if (max_y < min_y) | 47 if (max_y < min_y) |
86 std::swap(min_y, max_y); | 48 std::swap(min_y, max_y); |
87 | 49 |
88 pp::FloatRect output_rect( | 50 pp::FloatRect output_rect( |
89 ConvertUnitDouble(min_x, kPointsPerInch, kPixelsPerInch), | 51 ConvertUnitDouble(min_x, kPointsPerInch, kPixelsPerInch), |
90 ConvertUnitDouble(min_y, kPointsPerInch, kPixelsPerInch), | 52 ConvertUnitDouble(min_y, kPointsPerInch, kPixelsPerInch), |
91 ConvertUnitDouble(max_x - min_x, kPointsPerInch, kPixelsPerInch), | 53 ConvertUnitDouble(max_x - min_x, kPointsPerInch, kPixelsPerInch), |
92 ConvertUnitDouble(max_y - min_y, kPointsPerInch, kPixelsPerInch)); | 54 ConvertUnitDouble(max_y - min_y, kPointsPerInch, kPixelsPerInch)); |
93 return output_rect; | 55 return output_rect; |
94 } | 56 } |
95 | 57 |
96 pp::Rect GetCharRectInGViewCoords(FPDF_PAGE page, FPDF_TEXTPAGE text_page, | |
97 int index) { | |
98 double left, right, bottom, top; | |
99 FPDFText_GetCharBox(text_page, index, &left, &right, &bottom, &top); | |
100 if (right < left) | |
101 std::swap(left, right); | |
102 if (bottom < top) | |
103 std::swap(top, bottom); | |
104 pp::Rect page_coords(left, top, right - left, bottom - top); | |
105 return PageRectToGViewRect(page, page_coords); | |
106 } | |
107 | |
108 pp::FloatRect GetFloatCharRectInPixels(FPDF_PAGE page, | 58 pp::FloatRect GetFloatCharRectInPixels(FPDF_PAGE page, |
109 FPDF_TEXTPAGE text_page, | 59 FPDF_TEXTPAGE text_page, |
110 int index) { | 60 int index) { |
111 double left, right, bottom, top; | 61 double left, right, bottom, top; |
112 FPDFText_GetCharBox(text_page, index, &left, &right, &bottom, &top); | 62 FPDFText_GetCharBox(text_page, index, &left, &right, &bottom, &top); |
113 if (right < left) | 63 if (right < left) |
114 std::swap(left, right); | 64 std::swap(left, right); |
115 if (bottom < top) | 65 if (bottom < top) |
116 std::swap(top, bottom); | 66 std::swap(top, bottom); |
117 pp::FloatRect page_coords(left, top, right - left, bottom - top); | 67 pp::FloatRect page_coords(left, top, right - left, bottom - top); |
118 return FloatPageRectToPixelRect(page, page_coords); | 68 return FloatPageRectToPixelRect(page, page_coords); |
119 } | 69 } |
120 | 70 |
121 // This is the character PDFium inserts where a word is broken across lines. | |
122 const unsigned int kSoftHyphen = 0x02; | |
123 | |
124 // The following characters should all be recognized as Unicode newlines: | |
125 // LF: Line Feed, U+000A | |
126 // VT: Vertical Tab, U+000B | |
127 // FF: Form Feed, U+000C | |
128 // CR: Carriage Return, U+000D | |
129 // CR+LF: CR (U+000D) followed by LF (U+000A) | |
130 // NEL: Next Line, U+0085 | |
131 // LS: Line Separator, U+2028 | |
132 // PS: Paragraph Separator, U+2029. | |
133 // Source: http://en.wikipedia.org/wiki/Newline#Unicode . | |
134 const unsigned int kUnicodeNewlines[] = { | |
135 0xA, 0xB, 0xC, 0xD, 0X85, 0x2028, 0x2029 | |
136 }; | |
137 | |
138 bool IsSoftHyphen(unsigned int character) { | |
139 return kSoftHyphen == character; | |
140 } | |
141 | |
142 bool OverlapsOnYAxis(const pp::Rect &a, const pp::Rect& b) { | |
143 return !(a.IsEmpty() || b.IsEmpty() || | |
144 a.bottom() < b.y() || b.bottom() < a.y()); | |
145 } | |
146 | |
147 bool OverlapsOnYAxis(const pp::FloatRect &a, const pp::FloatRect& b) { | 71 bool OverlapsOnYAxis(const pp::FloatRect &a, const pp::FloatRect& b) { |
148 return !(a.IsEmpty() || b.IsEmpty() || | 72 return !(a.IsEmpty() || b.IsEmpty() || |
149 a.bottom() < b.y() || b.bottom() < a.y()); | 73 a.bottom() < b.y() || b.bottom() < a.y()); |
150 } | 74 } |
151 | 75 |
152 bool IsEol(unsigned int character) { | |
153 const unsigned int* first = kUnicodeNewlines; | |
154 const unsigned int* last = kUnicodeNewlines + arraysize(kUnicodeNewlines); | |
155 return std::find(first, last, character) != last; | |
156 } | |
157 | |
158 } // namespace | 76 } // namespace |
159 | 77 |
160 namespace chrome_pdf { | 78 namespace chrome_pdf { |
161 | 79 |
162 PDFiumPage::PDFiumPage(PDFiumEngine* engine, | 80 PDFiumPage::PDFiumPage(PDFiumEngine* engine, |
163 int i, | 81 int i, |
164 const pp::Rect& r, | 82 const pp::Rect& r, |
165 bool available) | 83 bool available) |
166 : engine_(engine), | 84 : engine_(engine), |
167 page_(NULL), | 85 page_(NULL), |
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
235 FPDF_TEXTPAGE PDFiumPage::GetTextPage() { | 153 FPDF_TEXTPAGE PDFiumPage::GetTextPage() { |
236 if (!available_) | 154 if (!available_) |
237 return NULL; | 155 return NULL; |
238 if (!text_page_) { | 156 if (!text_page_) { |
239 ScopedLoadCounter scoped_load(this); | 157 ScopedLoadCounter scoped_load(this); |
240 text_page_ = FPDFText_LoadPage(GetPage()); | 158 text_page_ = FPDFText_LoadPage(GetPage()); |
241 } | 159 } |
242 return text_page_; | 160 return text_page_; |
243 } | 161 } |
244 | 162 |
245 base::Value* PDFiumPage::GetAccessibleContentAsValue(int rotation) { | |
246 base::DictionaryValue* node = new base::DictionaryValue(); | |
247 | |
248 if (!available_) | |
249 return node; | |
250 | |
251 FPDF_PAGE page = GetPage(); | |
252 FPDF_TEXTPAGE text_page = GetTextPage(); | |
253 | |
254 double width = FPDF_GetPageWidth(page); | |
255 double height = FPDF_GetPageHeight(page); | |
256 | |
257 node->SetDouble(kPageWidth, width); | |
258 node->SetDouble(kPageHeight, height); | |
259 std::unique_ptr<base::ListValue> text(new base::ListValue()); | |
260 | |
261 int chars_count = FPDFText_CountChars(text_page); | |
262 pp::Rect line_rect; | |
263 pp::Rect word_rect; | |
264 bool seen_literal_text_in_word = false; | |
265 | |
266 // Iterate over all of the chars on the page. Explicitly run the loop | |
267 // with |i == chars_count|, which is one past the last character, and | |
268 // pretend it's a newline character in order to ensure we always flush | |
269 // the last line. | |
270 base::string16 line; | |
271 for (int i = 0; i <= chars_count; i++) { | |
272 unsigned int character; | |
273 pp::Rect char_rect; | |
274 | |
275 if (i < chars_count) { | |
276 character = FPDFText_GetUnicode(text_page, i); | |
277 char_rect = GetCharRectInGViewCoords(page, text_page, i); | |
278 } else { | |
279 // Make the last character a newline so the last line isn't lost. | |
280 character = '\n'; | |
281 } | |
282 | |
283 // There are spurious STX chars appearing in place | |
284 // of ligatures. Apply a heuristic to check that some vertical displacement | |
285 // is involved before assuming they are line-breaks. | |
286 bool is_intraword_linebreak = false; | |
287 if (i < chars_count - 1 && IsSoftHyphen(character)) { | |
288 // check if the next char and this char are in different lines. | |
289 pp::Rect next_char_rect = GetCharRectInGViewCoords( | |
290 page, text_page, i + 1); | |
291 | |
292 // TODO(dmazzoni): this assumes horizontal text. | |
293 // https://crbug.com/580311 | |
294 is_intraword_linebreak = !OverlapsOnYAxis(char_rect, next_char_rect); | |
295 } | |
296 if (is_intraword_linebreak || | |
297 base::IsUnicodeWhitespace(character) || | |
298 IsEol(character)) { | |
299 if (!word_rect.IsEmpty() && seen_literal_text_in_word) { | |
300 word_rect = pp::Rect(); | |
301 seen_literal_text_in_word = false; | |
302 } | |
303 } | |
304 | |
305 if (is_intraword_linebreak || IsEol(character)) { | |
306 if (!line_rect.IsEmpty()) { | |
307 if (is_intraword_linebreak) { | |
308 // Add a 0-width hyphen. | |
309 line.push_back('-'); | |
310 } | |
311 | |
312 std::unique_ptr<base::DictionaryValue> text_node( | |
313 new base::DictionaryValue()); | |
314 text_node->SetString(kTextNodeType, kTextNodeTypeText); | |
315 text_node->SetString(kTextNodeText, line); | |
316 | |
317 base::ListValue* text_nodes = new base::ListValue(); | |
318 text_nodes->Append(std::move(text_node)); | |
319 | |
320 std::unique_ptr<base::DictionaryValue> line_node( | |
321 new base::DictionaryValue()); | |
322 line_node->SetDouble(kTextBoxLeft, line_rect.x()); | |
323 line_node->SetDouble(kTextBoxTop, line_rect.y()); | |
324 line_node->SetDouble(kTextBoxWidth, line_rect.width()); | |
325 line_node->SetDouble(kTextBoxHeight, line_rect.height()); | |
326 line_node->SetDouble(kTextBoxFontSize, | |
327 FPDFText_GetFontSize(text_page, i)); | |
328 line_node->Set(kTextBoxNodes, text_nodes); | |
329 text->Append(std::move(line_node)); | |
330 | |
331 line.clear(); | |
332 line_rect = pp::Rect(); | |
333 word_rect = pp::Rect(); | |
334 seen_literal_text_in_word = false; | |
335 } | |
336 continue; | |
337 } | |
338 seen_literal_text_in_word = seen_literal_text_in_word || | |
339 !base::IsUnicodeWhitespace(character); | |
340 line.push_back(character); | |
341 | |
342 if (!char_rect.IsEmpty()) { | |
343 line_rect = line_rect.Union(char_rect); | |
344 | |
345 if (!base::IsUnicodeWhitespace(character)) | |
346 word_rect = word_rect.Union(char_rect); | |
347 } | |
348 } | |
349 | |
350 node->Set(kPageTextBox, text.release()); // Takes ownership of |text| | |
351 | |
352 return node; | |
353 } | |
354 | |
355 void PDFiumPage::GetTextRunInfo(int start_char_index, | 163 void PDFiumPage::GetTextRunInfo(int start_char_index, |
356 uint32_t* out_len, | 164 uint32_t* out_len, |
357 double* out_font_size, | 165 double* out_font_size, |
358 pp::FloatRect* out_bounds) { | 166 pp::FloatRect* out_bounds) { |
359 FPDF_PAGE page = GetPage(); | 167 FPDF_PAGE page = GetPage(); |
360 FPDF_TEXTPAGE text_page = GetTextPage(); | 168 FPDF_TEXTPAGE text_page = GetTextPage(); |
361 int chars_count = FPDFText_CountChars(text_page); | 169 int chars_count = FPDFText_CountChars(text_page); |
362 int char_index = start_char_index; | 170 int char_index = start_char_index; |
363 while ( | 171 while ( |
364 char_index < chars_count && | 172 char_index < chars_count && |
(...skipping 324 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
689 page_->loading_count_--; | 497 page_->loading_count_--; |
690 } | 498 } |
691 | 499 |
692 PDFiumPage::Link::Link() { | 500 PDFiumPage::Link::Link() { |
693 } | 501 } |
694 | 502 |
695 PDFiumPage::Link::~Link() { | 503 PDFiumPage::Link::~Link() { |
696 } | 504 } |
697 | 505 |
698 } // namespace chrome_pdf | 506 } // namespace chrome_pdf |
OLD | NEW |