Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(234)

Side by Side Diff: pdf/pdfium/pdfium_page.cc

Issue 2103043003: Remove legacy PDF JSON interface. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Fix typo after merge Created 4 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « pdf/pdfium/pdfium_page.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "pdf/pdfium/pdfium_page.h" 5 #include "pdf/pdfium/pdfium_page.h"
6 6
7 #include <math.h> 7 #include <math.h>
8 #include <stddef.h> 8 #include <stddef.h>
9 9
10 #include <algorithm> 10 #include <algorithm>
11 #include <memory> 11 #include <memory>
12 #include <utility> 12 #include <utility>
13 13
14 #include "base/logging.h" 14 #include "base/logging.h"
15 #include "base/strings/string_number_conversions.h" 15 #include "base/strings/string_number_conversions.h"
16 #include "base/strings/string_util.h" 16 #include "base/strings/string_util.h"
17 #include "base/strings/utf_string_conversions.h" 17 #include "base/strings/utf_string_conversions.h"
18 #include "base/values.h"
19 #include "pdf/pdfium/pdfium_api_string_buffer_adapter.h" 18 #include "pdf/pdfium/pdfium_api_string_buffer_adapter.h"
20 #include "pdf/pdfium/pdfium_engine.h" 19 #include "pdf/pdfium/pdfium_engine.h"
21 #include "printing/units.h" 20 #include "printing/units.h"
22 21
23 // Used when doing hit detection. 22 // Used when doing hit detection.
24 #define kTolerance 20.0 23 #define kTolerance 20.0
25 24
26 using printing::ConvertUnitDouble; 25 using printing::ConvertUnitDouble;
27 using printing::kPointsPerInch; 26 using printing::kPointsPerInch;
28 using printing::kPixelsPerInch; 27 using printing::kPixelsPerInch;
29 28
30 namespace { 29 namespace {
31 30
32 // Dictionary Value key names for returning the accessible page content as JSON.
33 const char kPageWidth[] = "width";
34 const char kPageHeight[] = "height";
35 const char kPageTextBox[] = "textBox";
36 const char kTextBoxLeft[] = "left";
37 const char kTextBoxTop[] = "top";
38 const char kTextBoxWidth[] = "width";
39 const char kTextBoxHeight[] = "height";
40 const char kTextBoxFontSize[] = "fontSize";
41 const char kTextBoxNodes[] = "textNodes";
42 const char kTextNodeType[] = "type";
43 const char kTextNodeText[] = "text";
44 const char kTextNodeTypeText[] = "text";
45
46 pp::Rect PageRectToGViewRect(FPDF_PAGE page, const pp::Rect& input) {
47 int output_width = FPDF_GetPageWidth(page);
48 int output_height = FPDF_GetPageHeight(page);
49
50 int min_x;
51 int min_y;
52 int max_x;
53 int max_y;
54 FPDF_PageToDevice(page, 0, 0, output_width, output_height, 0,
55 input.x(), input.y(), &min_x, &min_y);
56 FPDF_PageToDevice(page, 0, 0, output_width, output_height, 0,
57 input.right(), input.bottom(), &max_x, &max_y);
58
59 if (max_x < min_x)
60 std::swap(min_x, max_x);
61 if (max_y < min_y)
62 std::swap(min_y, max_y);
63
64 pp::Rect output_rect(min_x, min_y, max_x - min_x, max_y - min_y);
65 output_rect.Intersect(pp::Rect(0, 0, output_width, output_height));
66 return output_rect;
67 }
68
69 pp::FloatRect FloatPageRectToPixelRect(FPDF_PAGE page, 31 pp::FloatRect FloatPageRectToPixelRect(FPDF_PAGE page,
70 const pp::FloatRect& input) { 32 const pp::FloatRect& input) {
71 int output_width = FPDF_GetPageWidth(page); 33 int output_width = FPDF_GetPageWidth(page);
72 int output_height = FPDF_GetPageHeight(page); 34 int output_height = FPDF_GetPageHeight(page);
73 35
74 int min_x; 36 int min_x;
75 int min_y; 37 int min_y;
76 int max_x; 38 int max_x;
77 int max_y; 39 int max_y;
78 FPDF_PageToDevice(page, 0, 0, output_width, output_height, 0, input.x(), 40 FPDF_PageToDevice(page, 0, 0, output_width, output_height, 0, input.x(),
79 input.y(), &min_x, &min_y); 41 input.y(), &min_x, &min_y);
80 FPDF_PageToDevice(page, 0, 0, output_width, output_height, 0, input.right(), 42 FPDF_PageToDevice(page, 0, 0, output_width, output_height, 0, input.right(),
81 input.bottom(), &max_x, &max_y); 43 input.bottom(), &max_x, &max_y);
82 44
83 if (max_x < min_x) 45 if (max_x < min_x)
84 std::swap(min_x, max_x); 46 std::swap(min_x, max_x);
85 if (max_y < min_y) 47 if (max_y < min_y)
86 std::swap(min_y, max_y); 48 std::swap(min_y, max_y);
87 49
88 pp::FloatRect output_rect( 50 pp::FloatRect output_rect(
89 ConvertUnitDouble(min_x, kPointsPerInch, kPixelsPerInch), 51 ConvertUnitDouble(min_x, kPointsPerInch, kPixelsPerInch),
90 ConvertUnitDouble(min_y, kPointsPerInch, kPixelsPerInch), 52 ConvertUnitDouble(min_y, kPointsPerInch, kPixelsPerInch),
91 ConvertUnitDouble(max_x - min_x, kPointsPerInch, kPixelsPerInch), 53 ConvertUnitDouble(max_x - min_x, kPointsPerInch, kPixelsPerInch),
92 ConvertUnitDouble(max_y - min_y, kPointsPerInch, kPixelsPerInch)); 54 ConvertUnitDouble(max_y - min_y, kPointsPerInch, kPixelsPerInch));
93 return output_rect; 55 return output_rect;
94 } 56 }
95 57
96 pp::Rect GetCharRectInGViewCoords(FPDF_PAGE page, FPDF_TEXTPAGE text_page,
97 int index) {
98 double left, right, bottom, top;
99 FPDFText_GetCharBox(text_page, index, &left, &right, &bottom, &top);
100 if (right < left)
101 std::swap(left, right);
102 if (bottom < top)
103 std::swap(top, bottom);
104 pp::Rect page_coords(left, top, right - left, bottom - top);
105 return PageRectToGViewRect(page, page_coords);
106 }
107
108 pp::FloatRect GetFloatCharRectInPixels(FPDF_PAGE page, 58 pp::FloatRect GetFloatCharRectInPixels(FPDF_PAGE page,
109 FPDF_TEXTPAGE text_page, 59 FPDF_TEXTPAGE text_page,
110 int index) { 60 int index) {
111 double left, right, bottom, top; 61 double left, right, bottom, top;
112 FPDFText_GetCharBox(text_page, index, &left, &right, &bottom, &top); 62 FPDFText_GetCharBox(text_page, index, &left, &right, &bottom, &top);
113 if (right < left) 63 if (right < left)
114 std::swap(left, right); 64 std::swap(left, right);
115 if (bottom < top) 65 if (bottom < top)
116 std::swap(top, bottom); 66 std::swap(top, bottom);
117 pp::FloatRect page_coords(left, top, right - left, bottom - top); 67 pp::FloatRect page_coords(left, top, right - left, bottom - top);
118 return FloatPageRectToPixelRect(page, page_coords); 68 return FloatPageRectToPixelRect(page, page_coords);
119 } 69 }
120 70
121 // This is the character PDFium inserts where a word is broken across lines.
122 const unsigned int kSoftHyphen = 0x02;
123
124 // The following characters should all be recognized as Unicode newlines:
125 // LF: Line Feed, U+000A
126 // VT: Vertical Tab, U+000B
127 // FF: Form Feed, U+000C
128 // CR: Carriage Return, U+000D
129 // CR+LF: CR (U+000D) followed by LF (U+000A)
130 // NEL: Next Line, U+0085
131 // LS: Line Separator, U+2028
132 // PS: Paragraph Separator, U+2029.
133 // Source: http://en.wikipedia.org/wiki/Newline#Unicode .
134 const unsigned int kUnicodeNewlines[] = {
135 0xA, 0xB, 0xC, 0xD, 0X85, 0x2028, 0x2029
136 };
137
138 bool IsSoftHyphen(unsigned int character) {
139 return kSoftHyphen == character;
140 }
141
142 bool OverlapsOnYAxis(const pp::Rect &a, const pp::Rect& b) {
143 return !(a.IsEmpty() || b.IsEmpty() ||
144 a.bottom() < b.y() || b.bottom() < a.y());
145 }
146
147 bool OverlapsOnYAxis(const pp::FloatRect &a, const pp::FloatRect& b) { 71 bool OverlapsOnYAxis(const pp::FloatRect &a, const pp::FloatRect& b) {
148 return !(a.IsEmpty() || b.IsEmpty() || 72 return !(a.IsEmpty() || b.IsEmpty() ||
149 a.bottom() < b.y() || b.bottom() < a.y()); 73 a.bottom() < b.y() || b.bottom() < a.y());
150 } 74 }
151 75
152 bool IsEol(unsigned int character) {
153 const unsigned int* first = kUnicodeNewlines;
154 const unsigned int* last = kUnicodeNewlines + arraysize(kUnicodeNewlines);
155 return std::find(first, last, character) != last;
156 }
157
158 } // namespace 76 } // namespace
159 77
160 namespace chrome_pdf { 78 namespace chrome_pdf {
161 79
162 PDFiumPage::PDFiumPage(PDFiumEngine* engine, 80 PDFiumPage::PDFiumPage(PDFiumEngine* engine,
163 int i, 81 int i,
164 const pp::Rect& r, 82 const pp::Rect& r,
165 bool available) 83 bool available)
166 : engine_(engine), 84 : engine_(engine),
167 page_(NULL), 85 page_(NULL),
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after
235 FPDF_TEXTPAGE PDFiumPage::GetTextPage() { 153 FPDF_TEXTPAGE PDFiumPage::GetTextPage() {
236 if (!available_) 154 if (!available_)
237 return NULL; 155 return NULL;
238 if (!text_page_) { 156 if (!text_page_) {
239 ScopedLoadCounter scoped_load(this); 157 ScopedLoadCounter scoped_load(this);
240 text_page_ = FPDFText_LoadPage(GetPage()); 158 text_page_ = FPDFText_LoadPage(GetPage());
241 } 159 }
242 return text_page_; 160 return text_page_;
243 } 161 }
244 162
245 base::Value* PDFiumPage::GetAccessibleContentAsValue(int rotation) {
246 base::DictionaryValue* node = new base::DictionaryValue();
247
248 if (!available_)
249 return node;
250
251 FPDF_PAGE page = GetPage();
252 FPDF_TEXTPAGE text_page = GetTextPage();
253
254 double width = FPDF_GetPageWidth(page);
255 double height = FPDF_GetPageHeight(page);
256
257 node->SetDouble(kPageWidth, width);
258 node->SetDouble(kPageHeight, height);
259 std::unique_ptr<base::ListValue> text(new base::ListValue());
260
261 int chars_count = FPDFText_CountChars(text_page);
262 pp::Rect line_rect;
263 pp::Rect word_rect;
264 bool seen_literal_text_in_word = false;
265
266 // Iterate over all of the chars on the page. Explicitly run the loop
267 // with |i == chars_count|, which is one past the last character, and
268 // pretend it's a newline character in order to ensure we always flush
269 // the last line.
270 base::string16 line;
271 for (int i = 0; i <= chars_count; i++) {
272 unsigned int character;
273 pp::Rect char_rect;
274
275 if (i < chars_count) {
276 character = FPDFText_GetUnicode(text_page, i);
277 char_rect = GetCharRectInGViewCoords(page, text_page, i);
278 } else {
279 // Make the last character a newline so the last line isn't lost.
280 character = '\n';
281 }
282
283 // There are spurious STX chars appearing in place
284 // of ligatures. Apply a heuristic to check that some vertical displacement
285 // is involved before assuming they are line-breaks.
286 bool is_intraword_linebreak = false;
287 if (i < chars_count - 1 && IsSoftHyphen(character)) {
288 // check if the next char and this char are in different lines.
289 pp::Rect next_char_rect = GetCharRectInGViewCoords(
290 page, text_page, i + 1);
291
292 // TODO(dmazzoni): this assumes horizontal text.
293 // https://crbug.com/580311
294 is_intraword_linebreak = !OverlapsOnYAxis(char_rect, next_char_rect);
295 }
296 if (is_intraword_linebreak ||
297 base::IsUnicodeWhitespace(character) ||
298 IsEol(character)) {
299 if (!word_rect.IsEmpty() && seen_literal_text_in_word) {
300 word_rect = pp::Rect();
301 seen_literal_text_in_word = false;
302 }
303 }
304
305 if (is_intraword_linebreak || IsEol(character)) {
306 if (!line_rect.IsEmpty()) {
307 if (is_intraword_linebreak) {
308 // Add a 0-width hyphen.
309 line.push_back('-');
310 }
311
312 std::unique_ptr<base::DictionaryValue> text_node(
313 new base::DictionaryValue());
314 text_node->SetString(kTextNodeType, kTextNodeTypeText);
315 text_node->SetString(kTextNodeText, line);
316
317 base::ListValue* text_nodes = new base::ListValue();
318 text_nodes->Append(std::move(text_node));
319
320 std::unique_ptr<base::DictionaryValue> line_node(
321 new base::DictionaryValue());
322 line_node->SetDouble(kTextBoxLeft, line_rect.x());
323 line_node->SetDouble(kTextBoxTop, line_rect.y());
324 line_node->SetDouble(kTextBoxWidth, line_rect.width());
325 line_node->SetDouble(kTextBoxHeight, line_rect.height());
326 line_node->SetDouble(kTextBoxFontSize,
327 FPDFText_GetFontSize(text_page, i));
328 line_node->Set(kTextBoxNodes, text_nodes);
329 text->Append(std::move(line_node));
330
331 line.clear();
332 line_rect = pp::Rect();
333 word_rect = pp::Rect();
334 seen_literal_text_in_word = false;
335 }
336 continue;
337 }
338 seen_literal_text_in_word = seen_literal_text_in_word ||
339 !base::IsUnicodeWhitespace(character);
340 line.push_back(character);
341
342 if (!char_rect.IsEmpty()) {
343 line_rect = line_rect.Union(char_rect);
344
345 if (!base::IsUnicodeWhitespace(character))
346 word_rect = word_rect.Union(char_rect);
347 }
348 }
349
350 node->Set(kPageTextBox, text.release()); // Takes ownership of |text|
351
352 return node;
353 }
354
355 void PDFiumPage::GetTextRunInfo(int start_char_index, 163 void PDFiumPage::GetTextRunInfo(int start_char_index,
356 uint32_t* out_len, 164 uint32_t* out_len,
357 double* out_font_size, 165 double* out_font_size,
358 pp::FloatRect* out_bounds) { 166 pp::FloatRect* out_bounds) {
359 FPDF_PAGE page = GetPage(); 167 FPDF_PAGE page = GetPage();
360 FPDF_TEXTPAGE text_page = GetTextPage(); 168 FPDF_TEXTPAGE text_page = GetTextPage();
361 int chars_count = FPDFText_CountChars(text_page); 169 int chars_count = FPDFText_CountChars(text_page);
362 int char_index = start_char_index; 170 int char_index = start_char_index;
363 while ( 171 while (
364 char_index < chars_count && 172 char_index < chars_count &&
(...skipping 324 matching lines...) Expand 10 before | Expand all | Expand 10 after
689 page_->loading_count_--; 497 page_->loading_count_--;
690 } 498 }
691 499
692 PDFiumPage::Link::Link() { 500 PDFiumPage::Link::Link() {
693 } 501 }
694 502
695 PDFiumPage::Link::~Link() { 503 PDFiumPage::Link::~Link() {
696 } 504 }
697 505
698 } // namespace chrome_pdf 506 } // namespace chrome_pdf
OLDNEW
« no previous file with comments | « pdf/pdfium/pdfium_page.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698