OLD | NEW |
(Empty) | |
| 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #include "pdf/pdfium/pdfium_page.h" |
| 6 |
| 7 #include <math.h> |
| 8 |
| 9 #include "base/logging.h" |
| 10 #include "base/strings/string_number_conversions.h" |
| 11 #include "base/strings/string_util.h" |
| 12 #include "base/strings/utf_string_conversions.h" |
| 13 #include "base/values.h" |
| 14 #include "pdf/pdfium/pdfium_engine.h" |
| 15 |
| 16 // Used when doing hit detection. |
| 17 #define kTolerance 20.0 |
| 18 |
| 19 // Dictionary Value key names for returning the accessible page content as JSON. |
| 20 const char kPageWidth[] = "width"; |
| 21 const char kPageHeight[] = "height"; |
| 22 const char kPageTextBox[] = "textBox"; |
| 23 const char kTextBoxLeft[] = "left"; |
| 24 const char kTextBoxTop[] = "top"; |
| 25 const char kTextBoxWidth[] = "width"; |
| 26 const char kTextBoxHeight[] = "height"; |
| 27 const char kTextBoxFontSize[] = "fontSize"; |
| 28 const char kTextBoxNodes[] = "textNodes"; |
| 29 const char kTextNodeType[] = "type"; |
| 30 const char kTextNodeText[] = "text"; |
| 31 const char kTextNodeURL[] = "url"; |
| 32 const char kTextNodeTypeText[] = "text"; |
| 33 const char kTextNodeTypeURL[] = "url"; |
| 34 const char kDocLinkURLPrefix[] = "#page"; |
| 35 |
| 36 namespace chrome_pdf { |
| 37 |
| 38 PDFiumPage::PDFiumPage(PDFiumEngine* engine, |
| 39 int i, |
| 40 const pp::Rect& r, |
| 41 bool available) |
| 42 : engine_(engine), |
| 43 page_(NULL), |
| 44 text_page_(NULL), |
| 45 index_(i), |
| 46 rect_(r), |
| 47 calculated_links_(false), |
| 48 available_(available) { |
| 49 } |
| 50 |
| 51 PDFiumPage::~PDFiumPage() { |
| 52 Unload(); |
| 53 } |
| 54 |
| 55 void PDFiumPage::Unload() { |
| 56 if (text_page_) { |
| 57 FPDFText_ClosePage(text_page_); |
| 58 text_page_ = NULL; |
| 59 } |
| 60 |
| 61 if (page_) { |
| 62 if (engine_->form()) { |
| 63 FORM_OnBeforeClosePage(page_, engine_->form()); |
| 64 } |
| 65 FPDF_ClosePage(page_); |
| 66 page_ = NULL; |
| 67 } |
| 68 } |
| 69 |
| 70 FPDF_PAGE PDFiumPage::GetPage() { |
| 71 ScopedUnsupportedFeature scoped_unsupported_feature(engine_); |
| 72 if (!available_) |
| 73 return NULL; |
| 74 if (!page_) { |
| 75 page_ = FPDF_LoadPage(engine_->doc(), index_); |
| 76 if (page_ && engine_->form()) { |
| 77 FORM_OnAfterLoadPage(page_, engine_->form()); |
| 78 } |
| 79 } |
| 80 return page_; |
| 81 } |
| 82 |
| 83 FPDF_PAGE PDFiumPage::GetPrintPage() { |
| 84 ScopedUnsupportedFeature scoped_unsupported_feature(engine_); |
| 85 if (!available_) |
| 86 return NULL; |
| 87 if (!page_) |
| 88 page_ = FPDF_LoadPage(engine_->doc(), index_); |
| 89 return page_; |
| 90 } |
| 91 |
| 92 void PDFiumPage::ClosePrintPage() { |
| 93 if (page_) { |
| 94 FPDF_ClosePage(page_); |
| 95 page_ = NULL; |
| 96 } |
| 97 } |
| 98 |
| 99 FPDF_TEXTPAGE PDFiumPage::GetTextPage() { |
| 100 if (!available_) |
| 101 return NULL; |
| 102 if (!text_page_) |
| 103 text_page_ = FPDFText_LoadPage(GetPage()); |
| 104 return text_page_; |
| 105 } |
| 106 |
| 107 base::Value* PDFiumPage::GetAccessibleContentAsValue(int rotation) { |
| 108 base::DictionaryValue* node = new base::DictionaryValue(); |
| 109 |
| 110 if (!available_) |
| 111 return node; |
| 112 |
| 113 double width = FPDF_GetPageWidth(GetPage()); |
| 114 double height = FPDF_GetPageHeight(GetPage()); |
| 115 |
| 116 base::ListValue* text = new base::ListValue(); |
| 117 int box_count = FPDFText_CountRects(GetTextPage(), 0, GetCharCount()); |
| 118 for (int i = 0; i < box_count; i++) { |
| 119 double left, top, right, bottom; |
| 120 FPDFText_GetRect(GetTextPage(), i, &left, &top, &right, &bottom); |
| 121 text->Append( |
| 122 GetTextBoxAsValue(height, left, top, right, bottom, rotation)); |
| 123 } |
| 124 |
| 125 node->SetDouble(kPageWidth, width); |
| 126 node->SetDouble(kPageHeight, height); |
| 127 node->Set(kPageTextBox, text); // Takes ownership of |text| |
| 128 |
| 129 return node; |
| 130 } |
| 131 |
| 132 base::Value* PDFiumPage::GetTextBoxAsValue(double page_height, |
| 133 double left, double top, |
| 134 double right, double bottom, |
| 135 int rotation) { |
| 136 base::string16 text_utf16; |
| 137 int char_count = |
| 138 FPDFText_GetBoundedText(GetTextPage(), left, top, right, bottom, NULL, 0); |
| 139 if (char_count > 0) { |
| 140 unsigned short* data = reinterpret_cast<unsigned short*>( |
| 141 WriteInto(&text_utf16, char_count + 1)); |
| 142 FPDFText_GetBoundedText(GetTextPage(), |
| 143 left, top, right, bottom, |
| 144 data, char_count); |
| 145 } |
| 146 std::string text_utf8 = base::UTF16ToUTF8(text_utf16); |
| 147 |
| 148 FPDF_LINK link = FPDFLink_GetLinkAtPoint(GetPage(), left, top); |
| 149 Area area; |
| 150 std::vector<LinkTarget> targets; |
| 151 if (link) { |
| 152 targets.push_back(LinkTarget()); |
| 153 area = GetLinkTarget(link, &targets[0]); |
| 154 } else { |
| 155 pp::Rect rect( |
| 156 PageToScreen(pp::Point(), 1.0, left, top, right, bottom, rotation)); |
| 157 GetLinks(rect, &targets); |
| 158 area = targets.size() == 0 ? TEXT_AREA : WEBLINK_AREA; |
| 159 } |
| 160 |
| 161 int char_index = FPDFText_GetCharIndexAtPos(GetTextPage(), left, top, |
| 162 kTolerance, kTolerance); |
| 163 double font_size = FPDFText_GetFontSize(GetTextPage(), char_index); |
| 164 |
| 165 base::DictionaryValue* node = new base::DictionaryValue(); |
| 166 node->SetDouble(kTextBoxLeft, left); |
| 167 node->SetDouble(kTextBoxTop, page_height - top); |
| 168 node->SetDouble(kTextBoxWidth, right - left); |
| 169 node->SetDouble(kTextBoxHeight, top - bottom); |
| 170 node->SetDouble(kTextBoxFontSize, font_size); |
| 171 |
| 172 base::ListValue* text_nodes = new base::ListValue(); |
| 173 |
| 174 if (area == DOCLINK_AREA) { |
| 175 std::string url = kDocLinkURLPrefix + base::IntToString(targets[0].page); |
| 176 text_nodes->Append(CreateURLNode(text_utf8, url)); |
| 177 } else if (area == WEBLINK_AREA && link) { |
| 178 text_nodes->Append(CreateURLNode(text_utf8, targets[0].url)); |
| 179 } else if (area == WEBLINK_AREA && !link) { |
| 180 size_t start = 0; |
| 181 for (size_t i = 0; i < targets.size(); ++i) { |
| 182 // Remove the extra NULL character at end. |
| 183 // Otherwise, find() will not return any matches. |
| 184 if (targets[i].url.size() > 0 && |
| 185 targets[i].url[targets[i].url.size() - 1] == '\0') { |
| 186 targets[i].url.resize(targets[i].url.size() - 1); |
| 187 } |
| 188 // There should only ever be one NULL character |
| 189 DCHECK(targets[i].url[targets[i].url.size() - 1] != '\0'); |
| 190 |
| 191 // PDFium may change the case of generated links. |
| 192 std::string lowerCaseURL = StringToLowerASCII(targets[i].url); |
| 193 std::string lowerCaseText = StringToLowerASCII(text_utf8); |
| 194 size_t pos = lowerCaseText.find(lowerCaseURL, start); |
| 195 size_t length = targets[i].url.size(); |
| 196 if (pos == std::string::npos) { |
| 197 // Check if the link is a "mailto:" URL |
| 198 if (lowerCaseURL.compare(0, 7, "mailto:") == 0) { |
| 199 pos = lowerCaseText.find(lowerCaseURL.substr(7), start); |
| 200 length -= 7; |
| 201 } |
| 202 |
| 203 if (pos == std::string::npos) { |
| 204 // No match has been found. This should never happen. |
| 205 continue; |
| 206 } |
| 207 } |
| 208 |
| 209 std::string before_text = text_utf8.substr(start, pos - start); |
| 210 if (before_text.size() > 0) |
| 211 text_nodes->Append(CreateTextNode(before_text)); |
| 212 std::string link_text = text_utf8.substr(pos, length); |
| 213 text_nodes->Append(CreateURLNode(link_text, targets[i].url)); |
| 214 |
| 215 start = pos + length; |
| 216 } |
| 217 std::string before_text = text_utf8.substr(start); |
| 218 if (before_text.size() > 0) |
| 219 text_nodes->Append(CreateTextNode(before_text)); |
| 220 } else { |
| 221 text_nodes->Append(CreateTextNode(text_utf8)); |
| 222 } |
| 223 |
| 224 node->Set(kTextBoxNodes, text_nodes); // Takes ownership of |text_nodes|. |
| 225 return node; |
| 226 } |
| 227 |
| 228 base::Value* PDFiumPage::CreateTextNode(std::string text) { |
| 229 base::DictionaryValue* node = new base::DictionaryValue(); |
| 230 node->SetString(kTextNodeType, kTextNodeTypeText); |
| 231 node->SetString(kTextNodeText, text); |
| 232 return node; |
| 233 } |
| 234 |
| 235 base::Value* PDFiumPage::CreateURLNode(std::string text, std::string url) { |
| 236 base::DictionaryValue* node = new base::DictionaryValue(); |
| 237 node->SetString(kTextNodeType, kTextNodeTypeURL); |
| 238 node->SetString(kTextNodeText, text); |
| 239 node->SetString(kTextNodeURL, url); |
| 240 return node; |
| 241 } |
| 242 |
| 243 PDFiumPage::Area PDFiumPage::GetCharIndex(const pp::Point& point, |
| 244 int rotation, |
| 245 int* char_index, |
| 246 LinkTarget* target) { |
| 247 if (!available_) |
| 248 return NONSELECTABLE_AREA; |
| 249 pp::Point point2 = point - rect_.point(); |
| 250 double new_x, new_y; |
| 251 FPDF_DeviceToPage(GetPage(), 0, 0, rect_.width(), rect_.height(), |
| 252 rotation, point2.x(), point2.y(), &new_x, &new_y); |
| 253 |
| 254 int rv = FPDFText_GetCharIndexAtPos( |
| 255 GetTextPage(), new_x, new_y, kTolerance, kTolerance); |
| 256 *char_index = rv; |
| 257 |
| 258 FPDF_LINK link = FPDFLink_GetLinkAtPoint(GetPage(), new_x, new_y); |
| 259 if (link) { |
| 260 // We don't handle all possible link types of the PDF. For example, |
| 261 // launch actions, cross-document links, etc. |
| 262 // In that case, GetLinkTarget() will return NONSELECTABLE_AREA |
| 263 // and we should proceed with area detection. |
| 264 PDFiumPage::Area area = GetLinkTarget(link, target); |
| 265 if (area != PDFiumPage::NONSELECTABLE_AREA) |
| 266 return area; |
| 267 } |
| 268 |
| 269 if (rv < 0) |
| 270 return NONSELECTABLE_AREA; |
| 271 |
| 272 return GetLink(*char_index, target) != -1 ? WEBLINK_AREA : TEXT_AREA; |
| 273 } |
| 274 |
| 275 base::char16 PDFiumPage::GetCharAtIndex(int index) { |
| 276 if (!available_) |
| 277 return L'\0'; |
| 278 return static_cast<base::char16>(FPDFText_GetUnicode(GetTextPage(), index)); |
| 279 } |
| 280 |
| 281 int PDFiumPage::GetCharCount() { |
| 282 if (!available_) |
| 283 return 0; |
| 284 return FPDFText_CountChars(GetTextPage()); |
| 285 } |
| 286 |
| 287 PDFiumPage::Area PDFiumPage::GetLinkTarget( |
| 288 FPDF_LINK link, PDFiumPage::LinkTarget* target) { |
| 289 FPDF_DEST dest = FPDFLink_GetDest(engine_->doc(), link); |
| 290 if (dest != NULL) |
| 291 return GetDestinationTarget(dest, target); |
| 292 |
| 293 FPDF_ACTION action = FPDFLink_GetAction(link); |
| 294 if (action) { |
| 295 switch (FPDFAction_GetType(action)) { |
| 296 case PDFACTION_GOTO: { |
| 297 FPDF_DEST dest = FPDFAction_GetDest(engine_->doc(), action); |
| 298 if (dest) |
| 299 return GetDestinationTarget(dest, target); |
| 300 // TODO(gene): We don't fully support all types of the in-document |
| 301 // links. Need to implement that. There is a bug to track that: |
| 302 // http://code.google.com/p/chromium/issues/detail?id=55776 |
| 303 } break; |
| 304 case PDFACTION_URI: { |
| 305 if (target) { |
| 306 size_t buffer_size = |
| 307 FPDFAction_GetURIPath(engine_->doc(), action, NULL, 0); |
| 308 if (buffer_size > 1) { |
| 309 void* data = WriteInto(&target->url, buffer_size); |
| 310 FPDFAction_GetURIPath(engine_->doc(), action, data, buffer_size); |
| 311 } |
| 312 } |
| 313 return WEBLINK_AREA; |
| 314 } break; |
| 315 // TODO(gene): We don't support PDFACTION_REMOTEGOTO and PDFACTION_LAUNCH |
| 316 // at the moment. |
| 317 } |
| 318 } |
| 319 |
| 320 return NONSELECTABLE_AREA; |
| 321 } |
| 322 |
| 323 PDFiumPage::Area PDFiumPage::GetDestinationTarget( |
| 324 FPDF_DEST destination, PDFiumPage::LinkTarget* target) { |
| 325 int page_index = FPDFDest_GetPageIndex(engine_->doc(), destination); |
| 326 if (target) { |
| 327 target->page = page_index; |
| 328 } |
| 329 return DOCLINK_AREA; |
| 330 } |
| 331 |
| 332 int PDFiumPage::GetLink(int char_index, PDFiumPage::LinkTarget* target) { |
| 333 if (!available_) |
| 334 return -1; |
| 335 |
| 336 CalculateLinks(); |
| 337 |
| 338 // Get the bounding box of the rect again, since it might have moved because |
| 339 // of the tolerance above. |
| 340 double left, right, bottom, top; |
| 341 FPDFText_GetCharBox(GetTextPage(), char_index, &left, &right, &bottom, &top); |
| 342 |
| 343 pp::Point origin( |
| 344 PageToScreen(pp::Point(), 1.0, left, top, right, bottom, 0).point()); |
| 345 for (size_t i = 0; i < links_.size(); ++i) { |
| 346 for (size_t j = 0; j < links_[i].rects.size(); ++j) { |
| 347 if (links_[i].rects[j].Contains(origin)) { |
| 348 if (target) |
| 349 target->url = links_[i].url; |
| 350 return i; |
| 351 } |
| 352 } |
| 353 } |
| 354 return -1; |
| 355 } |
| 356 |
| 357 std::vector<int> PDFiumPage::GetLinks(pp::Rect text_area, |
| 358 std::vector<LinkTarget>* targets) { |
| 359 if (!available_) |
| 360 return std::vector<int>(); |
| 361 |
| 362 CalculateLinks(); |
| 363 |
| 364 std::vector<int> links; |
| 365 |
| 366 for (size_t i = 0; i < links_.size(); ++i) { |
| 367 for (size_t j = 0; j < links_[i].rects.size(); ++j) { |
| 368 if (links_[i].rects[j].Intersects(text_area)) { |
| 369 if (targets) { |
| 370 LinkTarget target; |
| 371 target.url = links_[i].url; |
| 372 targets->push_back(target); |
| 373 } |
| 374 links.push_back(i); |
| 375 } |
| 376 } |
| 377 } |
| 378 return links; |
| 379 } |
| 380 |
| 381 void PDFiumPage::CalculateLinks() { |
| 382 if (calculated_links_) |
| 383 return; |
| 384 |
| 385 calculated_links_ = true; |
| 386 FPDF_PAGELINK links = FPDFLink_LoadWebLinks(GetTextPage()); |
| 387 int count = FPDFLink_CountWebLinks(links); |
| 388 for (int i = 0; i < count; ++i) { |
| 389 base::string16 url; |
| 390 int url_length = FPDFLink_GetURL(links, i, NULL, 0); |
| 391 if (url_length > 0) { |
| 392 unsigned short* data = |
| 393 reinterpret_cast<unsigned short*>(WriteInto(&url, url_length + 1)); |
| 394 FPDFLink_GetURL(links, i, data, url_length); |
| 395 } |
| 396 Link link; |
| 397 link.url = base::UTF16ToUTF8(url); |
| 398 |
| 399 // If the link cannot be converted to a pp::Var, then it is not possible to |
| 400 // pass it to JS. In this case, ignore the link like other PDF viewers. |
| 401 // See http://crbug.com/312882 for an example. |
| 402 pp::Var link_var(link.url); |
| 403 if (!link_var.is_string()) |
| 404 continue; |
| 405 |
| 406 // Make sure all the characters in the URL are valid per RFC 1738. |
| 407 // http://crbug.com/340326 has a sample bad PDF. |
| 408 // GURL does not work correctly, e.g. it just strips \t \r \n. |
| 409 bool is_invalid_url = false; |
| 410 for (size_t j = 0; j < link.url.length(); ++j) { |
| 411 // Control characters are not allowed. |
| 412 // 0x7F is also a control character. |
| 413 // 0x80 and above are not in US-ASCII. |
| 414 if (link.url[j] < ' ' || link.url[j] >= '\x7F') { |
| 415 is_invalid_url = true; |
| 416 break; |
| 417 } |
| 418 } |
| 419 if (is_invalid_url) |
| 420 continue; |
| 421 |
| 422 int rect_count = FPDFLink_CountRects(links, i); |
| 423 for (int j = 0; j < rect_count; ++j) { |
| 424 double left, top, right, bottom; |
| 425 FPDFLink_GetRect(links, i, j, &left, &top, &right, &bottom); |
| 426 link.rects.push_back( |
| 427 PageToScreen(pp::Point(), 1.0, left, top, right, bottom, 0)); |
| 428 } |
| 429 links_.push_back(link); |
| 430 } |
| 431 FPDFLink_CloseWebLinks(links); |
| 432 } |
| 433 |
| 434 pp::Rect PDFiumPage::PageToScreen(const pp::Point& offset, |
| 435 double zoom, |
| 436 double left, |
| 437 double top, |
| 438 double right, |
| 439 double bottom, |
| 440 int rotation) { |
| 441 if (!available_) |
| 442 return pp::Rect(); |
| 443 |
| 444 int new_left, new_top, new_right, new_bottom; |
| 445 FPDF_PageToDevice( |
| 446 page_, |
| 447 static_cast<int>((rect_.x() - offset.x()) * zoom), |
| 448 static_cast<int>((rect_.y() - offset.y()) * zoom), |
| 449 static_cast<int>(ceil(rect_.width() * zoom)), |
| 450 static_cast<int>(ceil(rect_.height() * zoom)), |
| 451 rotation, left, top, &new_left, &new_top); |
| 452 FPDF_PageToDevice( |
| 453 page_, |
| 454 static_cast<int>((rect_.x() - offset.x()) * zoom), |
| 455 static_cast<int>((rect_.y() - offset.y()) * zoom), |
| 456 static_cast<int>(ceil(rect_.width() * zoom)), |
| 457 static_cast<int>(ceil(rect_.height() * zoom)), |
| 458 rotation, right, bottom, &new_right, &new_bottom); |
| 459 |
| 460 // If the PDF is rotated, the horizontal/vertical coordinates could be |
| 461 // flipped. See |
| 462 // http://www.netl.doe.gov/publications/proceedings/03/ubc/presentations/Goeck
ner-pres.pdf |
| 463 if (new_right < new_left) |
| 464 std::swap(new_right, new_left); |
| 465 if (new_bottom < new_top) |
| 466 std::swap(new_bottom, new_top); |
| 467 |
| 468 return pp::Rect( |
| 469 new_left, new_top, new_right - new_left + 1, new_bottom - new_top + 1); |
| 470 } |
| 471 |
| 472 PDFiumPage::Link::Link() { |
| 473 } |
| 474 |
| 475 PDFiumPage::Link::~Link() { |
| 476 } |
| 477 |
| 478 } // namespace chrome_pdf |
OLD | NEW |