| Index: pdf/pdfium/pdfium_page.cc
|
| ===================================================================
|
| --- pdf/pdfium/pdfium_page.cc (revision 0)
|
| +++ pdf/pdfium/pdfium_page.cc (revision 0)
|
| @@ -0,0 +1,478 @@
|
| +// Copyright (c) 2010 The Chromium Authors. All rights reserved.
|
| +// Use of this source code is governed by a BSD-style license that can be
|
| +// found in the LICENSE file.
|
| +
|
| +#include "pdf/pdfium/pdfium_page.h"
|
| +
|
| +#include <math.h>
|
| +
|
| +#include "base/logging.h"
|
| +#include "base/strings/string_number_conversions.h"
|
| +#include "base/strings/string_util.h"
|
| +#include "base/strings/utf_string_conversions.h"
|
| +#include "base/values.h"
|
| +#include "pdf/pdfium/pdfium_engine.h"
|
| +
|
| +// Used when doing hit detection.
|
| +#define kTolerance 20.0
|
| +
|
| +// Dictionary Value key names for returning the accessible page content as JSON.
|
| +const char kPageWidth[] = "width";
|
| +const char kPageHeight[] = "height";
|
| +const char kPageTextBox[] = "textBox";
|
| +const char kTextBoxLeft[] = "left";
|
| +const char kTextBoxTop[] = "top";
|
| +const char kTextBoxWidth[] = "width";
|
| +const char kTextBoxHeight[] = "height";
|
| +const char kTextBoxFontSize[] = "fontSize";
|
| +const char kTextBoxNodes[] = "textNodes";
|
| +const char kTextNodeType[] = "type";
|
| +const char kTextNodeText[] = "text";
|
| +const char kTextNodeURL[] = "url";
|
| +const char kTextNodeTypeText[] = "text";
|
| +const char kTextNodeTypeURL[] = "url";
|
| +const char kDocLinkURLPrefix[] = "#page";
|
| +
|
| +namespace chrome_pdf {
|
| +
|
| +PDFiumPage::PDFiumPage(PDFiumEngine* engine,
|
| + int i,
|
| + const pp::Rect& r,
|
| + bool available)
|
| + : engine_(engine),
|
| + page_(NULL),
|
| + text_page_(NULL),
|
| + index_(i),
|
| + rect_(r),
|
| + calculated_links_(false),
|
| + available_(available) {
|
| +}
|
| +
|
| +PDFiumPage::~PDFiumPage() {
|
| + Unload();
|
| +}
|
| +
|
| +void PDFiumPage::Unload() {
|
| + if (text_page_) {
|
| + FPDFText_ClosePage(text_page_);
|
| + text_page_ = NULL;
|
| + }
|
| +
|
| + if (page_) {
|
| + if (engine_->form()) {
|
| + FORM_OnBeforeClosePage(page_, engine_->form());
|
| + }
|
| + FPDF_ClosePage(page_);
|
| + page_ = NULL;
|
| + }
|
| +}
|
| +
|
| +FPDF_PAGE PDFiumPage::GetPage() {
|
| + ScopedUnsupportedFeature scoped_unsupported_feature(engine_);
|
| + if (!available_)
|
| + return NULL;
|
| + if (!page_) {
|
| + page_ = FPDF_LoadPage(engine_->doc(), index_);
|
| + if (page_ && engine_->form()) {
|
| + FORM_OnAfterLoadPage(page_, engine_->form());
|
| + }
|
| + }
|
| + return page_;
|
| +}
|
| +
|
| +FPDF_PAGE PDFiumPage::GetPrintPage() {
|
| + ScopedUnsupportedFeature scoped_unsupported_feature(engine_);
|
| + if (!available_)
|
| + return NULL;
|
| + if (!page_)
|
| + page_ = FPDF_LoadPage(engine_->doc(), index_);
|
| + return page_;
|
| +}
|
| +
|
| +void PDFiumPage::ClosePrintPage() {
|
| + if (page_) {
|
| + FPDF_ClosePage(page_);
|
| + page_ = NULL;
|
| + }
|
| +}
|
| +
|
| +FPDF_TEXTPAGE PDFiumPage::GetTextPage() {
|
| + if (!available_)
|
| + return NULL;
|
| + if (!text_page_)
|
| + text_page_ = FPDFText_LoadPage(GetPage());
|
| + return text_page_;
|
| +}
|
| +
|
| +base::Value* PDFiumPage::GetAccessibleContentAsValue(int rotation) {
|
| + base::DictionaryValue* node = new base::DictionaryValue();
|
| +
|
| + if (!available_)
|
| + return node;
|
| +
|
| + double width = FPDF_GetPageWidth(GetPage());
|
| + double height = FPDF_GetPageHeight(GetPage());
|
| +
|
| + base::ListValue* text = new base::ListValue();
|
| + int box_count = FPDFText_CountRects(GetTextPage(), 0, GetCharCount());
|
| + for (int i = 0; i < box_count; i++) {
|
| + double left, top, right, bottom;
|
| + FPDFText_GetRect(GetTextPage(), i, &left, &top, &right, &bottom);
|
| + text->Append(
|
| + GetTextBoxAsValue(height, left, top, right, bottom, rotation));
|
| + }
|
| +
|
| + node->SetDouble(kPageWidth, width);
|
| + node->SetDouble(kPageHeight, height);
|
| + node->Set(kPageTextBox, text); // Takes ownership of |text|
|
| +
|
| + return node;
|
| +}
|
| +
|
| +base::Value* PDFiumPage::GetTextBoxAsValue(double page_height,
|
| + double left, double top,
|
| + double right, double bottom,
|
| + int rotation) {
|
| + base::string16 text_utf16;
|
| + int char_count =
|
| + FPDFText_GetBoundedText(GetTextPage(), left, top, right, bottom, NULL, 0);
|
| + if (char_count > 0) {
|
| + unsigned short* data = reinterpret_cast<unsigned short*>(
|
| + WriteInto(&text_utf16, char_count + 1));
|
| + FPDFText_GetBoundedText(GetTextPage(),
|
| + left, top, right, bottom,
|
| + data, char_count);
|
| + }
|
| + std::string text_utf8 = base::UTF16ToUTF8(text_utf16);
|
| +
|
| + FPDF_LINK link = FPDFLink_GetLinkAtPoint(GetPage(), left, top);
|
| + Area area;
|
| + std::vector<LinkTarget> targets;
|
| + if (link) {
|
| + targets.push_back(LinkTarget());
|
| + area = GetLinkTarget(link, &targets[0]);
|
| + } else {
|
| + pp::Rect rect(
|
| + PageToScreen(pp::Point(), 1.0, left, top, right, bottom, rotation));
|
| + GetLinks(rect, &targets);
|
| + area = targets.size() == 0 ? TEXT_AREA : WEBLINK_AREA;
|
| + }
|
| +
|
| + int char_index = FPDFText_GetCharIndexAtPos(GetTextPage(), left, top,
|
| + kTolerance, kTolerance);
|
| + double font_size = FPDFText_GetFontSize(GetTextPage(), char_index);
|
| +
|
| + base::DictionaryValue* node = new base::DictionaryValue();
|
| + node->SetDouble(kTextBoxLeft, left);
|
| + node->SetDouble(kTextBoxTop, page_height - top);
|
| + node->SetDouble(kTextBoxWidth, right - left);
|
| + node->SetDouble(kTextBoxHeight, top - bottom);
|
| + node->SetDouble(kTextBoxFontSize, font_size);
|
| +
|
| + base::ListValue* text_nodes = new base::ListValue();
|
| +
|
| + if (area == DOCLINK_AREA) {
|
| + std::string url = kDocLinkURLPrefix + base::IntToString(targets[0].page);
|
| + text_nodes->Append(CreateURLNode(text_utf8, url));
|
| + } else if (area == WEBLINK_AREA && link) {
|
| + text_nodes->Append(CreateURLNode(text_utf8, targets[0].url));
|
| + } else if (area == WEBLINK_AREA && !link) {
|
| + size_t start = 0;
|
| + for (size_t i = 0; i < targets.size(); ++i) {
|
| + // Remove the extra NULL character at end.
|
| + // Otherwise, find() will not return any matches.
|
| + if (targets[i].url.size() > 0 &&
|
| + targets[i].url[targets[i].url.size() - 1] == '\0') {
|
| + targets[i].url.resize(targets[i].url.size() - 1);
|
| + }
|
| + // There should only ever be one NULL character
|
| + DCHECK(targets[i].url[targets[i].url.size() - 1] != '\0');
|
| +
|
| + // PDFium may change the case of generated links.
|
| + std::string lowerCaseURL = StringToLowerASCII(targets[i].url);
|
| + std::string lowerCaseText = StringToLowerASCII(text_utf8);
|
| + size_t pos = lowerCaseText.find(lowerCaseURL, start);
|
| + size_t length = targets[i].url.size();
|
| + if (pos == std::string::npos) {
|
| + // Check if the link is a "mailto:" URL
|
| + if (lowerCaseURL.compare(0, 7, "mailto:") == 0) {
|
| + pos = lowerCaseText.find(lowerCaseURL.substr(7), start);
|
| + length -= 7;
|
| + }
|
| +
|
| + if (pos == std::string::npos) {
|
| + // No match has been found. This should never happen.
|
| + continue;
|
| + }
|
| + }
|
| +
|
| + std::string before_text = text_utf8.substr(start, pos - start);
|
| + if (before_text.size() > 0)
|
| + text_nodes->Append(CreateTextNode(before_text));
|
| + std::string link_text = text_utf8.substr(pos, length);
|
| + text_nodes->Append(CreateURLNode(link_text, targets[i].url));
|
| +
|
| + start = pos + length;
|
| + }
|
| + std::string before_text = text_utf8.substr(start);
|
| + if (before_text.size() > 0)
|
| + text_nodes->Append(CreateTextNode(before_text));
|
| + } else {
|
| + text_nodes->Append(CreateTextNode(text_utf8));
|
| + }
|
| +
|
| + node->Set(kTextBoxNodes, text_nodes); // Takes ownership of |text_nodes|.
|
| + return node;
|
| +}
|
| +
|
| +base::Value* PDFiumPage::CreateTextNode(std::string text) {
|
| + base::DictionaryValue* node = new base::DictionaryValue();
|
| + node->SetString(kTextNodeType, kTextNodeTypeText);
|
| + node->SetString(kTextNodeText, text);
|
| + return node;
|
| +}
|
| +
|
| +base::Value* PDFiumPage::CreateURLNode(std::string text, std::string url) {
|
| + base::DictionaryValue* node = new base::DictionaryValue();
|
| + node->SetString(kTextNodeType, kTextNodeTypeURL);
|
| + node->SetString(kTextNodeText, text);
|
| + node->SetString(kTextNodeURL, url);
|
| + return node;
|
| +}
|
| +
|
| +PDFiumPage::Area PDFiumPage::GetCharIndex(const pp::Point& point,
|
| + int rotation,
|
| + int* char_index,
|
| + LinkTarget* target) {
|
| + if (!available_)
|
| + return NONSELECTABLE_AREA;
|
| + pp::Point point2 = point - rect_.point();
|
| + double new_x, new_y;
|
| + FPDF_DeviceToPage(GetPage(), 0, 0, rect_.width(), rect_.height(),
|
| + rotation, point2.x(), point2.y(), &new_x, &new_y);
|
| +
|
| + int rv = FPDFText_GetCharIndexAtPos(
|
| + GetTextPage(), new_x, new_y, kTolerance, kTolerance);
|
| + *char_index = rv;
|
| +
|
| + FPDF_LINK link = FPDFLink_GetLinkAtPoint(GetPage(), new_x, new_y);
|
| + if (link) {
|
| + // We don't handle all possible link types of the PDF. For example,
|
| + // launch actions, cross-document links, etc.
|
| + // In that case, GetLinkTarget() will return NONSELECTABLE_AREA
|
| + // and we should proceed with area detection.
|
| + PDFiumPage::Area area = GetLinkTarget(link, target);
|
| + if (area != PDFiumPage::NONSELECTABLE_AREA)
|
| + return area;
|
| + }
|
| +
|
| + if (rv < 0)
|
| + return NONSELECTABLE_AREA;
|
| +
|
| + return GetLink(*char_index, target) != -1 ? WEBLINK_AREA : TEXT_AREA;
|
| +}
|
| +
|
| +base::char16 PDFiumPage::GetCharAtIndex(int index) {
|
| + if (!available_)
|
| + return L'\0';
|
| + return static_cast<base::char16>(FPDFText_GetUnicode(GetTextPage(), index));
|
| +}
|
| +
|
| +int PDFiumPage::GetCharCount() {
|
| + if (!available_)
|
| + return 0;
|
| + return FPDFText_CountChars(GetTextPage());
|
| +}
|
| +
|
| +PDFiumPage::Area PDFiumPage::GetLinkTarget(
|
| + FPDF_LINK link, PDFiumPage::LinkTarget* target) {
|
| + FPDF_DEST dest = FPDFLink_GetDest(engine_->doc(), link);
|
| + if (dest != NULL)
|
| + return GetDestinationTarget(dest, target);
|
| +
|
| + FPDF_ACTION action = FPDFLink_GetAction(link);
|
| + if (action) {
|
| + switch (FPDFAction_GetType(action)) {
|
| + case PDFACTION_GOTO: {
|
| + FPDF_DEST dest = FPDFAction_GetDest(engine_->doc(), action);
|
| + if (dest)
|
| + return GetDestinationTarget(dest, target);
|
| + // TODO(gene): We don't fully support all types of the in-document
|
| + // links. Need to implement that. There is a bug to track that:
|
| + // http://code.google.com/p/chromium/issues/detail?id=55776
|
| + } break;
|
| + case PDFACTION_URI: {
|
| + if (target) {
|
| + size_t buffer_size =
|
| + FPDFAction_GetURIPath(engine_->doc(), action, NULL, 0);
|
| + if (buffer_size > 1) {
|
| + void* data = WriteInto(&target->url, buffer_size);
|
| + FPDFAction_GetURIPath(engine_->doc(), action, data, buffer_size);
|
| + }
|
| + }
|
| + return WEBLINK_AREA;
|
| + } break;
|
| + // TODO(gene): We don't support PDFACTION_REMOTEGOTO and PDFACTION_LAUNCH
|
| + // at the moment.
|
| + }
|
| + }
|
| +
|
| + return NONSELECTABLE_AREA;
|
| +}
|
| +
|
| +PDFiumPage::Area PDFiumPage::GetDestinationTarget(
|
| + FPDF_DEST destination, PDFiumPage::LinkTarget* target) {
|
| + int page_index = FPDFDest_GetPageIndex(engine_->doc(), destination);
|
| + if (target) {
|
| + target->page = page_index;
|
| + }
|
| + return DOCLINK_AREA;
|
| +}
|
| +
|
| +int PDFiumPage::GetLink(int char_index, PDFiumPage::LinkTarget* target) {
|
| + if (!available_)
|
| + return -1;
|
| +
|
| + CalculateLinks();
|
| +
|
| + // Get the bounding box of the rect again, since it might have moved because
|
| + // of the tolerance above.
|
| + double left, right, bottom, top;
|
| + FPDFText_GetCharBox(GetTextPage(), char_index, &left, &right, &bottom, &top);
|
| +
|
| + pp::Point origin(
|
| + PageToScreen(pp::Point(), 1.0, left, top, right, bottom, 0).point());
|
| + for (size_t i = 0; i < links_.size(); ++i) {
|
| + for (size_t j = 0; j < links_[i].rects.size(); ++j) {
|
| + if (links_[i].rects[j].Contains(origin)) {
|
| + if (target)
|
| + target->url = links_[i].url;
|
| + return i;
|
| + }
|
| + }
|
| + }
|
| + return -1;
|
| +}
|
| +
|
| +std::vector<int> PDFiumPage::GetLinks(pp::Rect text_area,
|
| + std::vector<LinkTarget>* targets) {
|
| + if (!available_)
|
| + return std::vector<int>();
|
| +
|
| + CalculateLinks();
|
| +
|
| + std::vector<int> links;
|
| +
|
| + for (size_t i = 0; i < links_.size(); ++i) {
|
| + for (size_t j = 0; j < links_[i].rects.size(); ++j) {
|
| + if (links_[i].rects[j].Intersects(text_area)) {
|
| + if (targets) {
|
| + LinkTarget target;
|
| + target.url = links_[i].url;
|
| + targets->push_back(target);
|
| + }
|
| + links.push_back(i);
|
| + }
|
| + }
|
| + }
|
| + return links;
|
| +}
|
| +
|
| +void PDFiumPage::CalculateLinks() {
|
| + if (calculated_links_)
|
| + return;
|
| +
|
| + calculated_links_ = true;
|
| + FPDF_PAGELINK links = FPDFLink_LoadWebLinks(GetTextPage());
|
| + int count = FPDFLink_CountWebLinks(links);
|
| + for (int i = 0; i < count; ++i) {
|
| + base::string16 url;
|
| + int url_length = FPDFLink_GetURL(links, i, NULL, 0);
|
| + if (url_length > 0) {
|
| + unsigned short* data =
|
| + reinterpret_cast<unsigned short*>(WriteInto(&url, url_length + 1));
|
| + FPDFLink_GetURL(links, i, data, url_length);
|
| + }
|
| + Link link;
|
| + link.url = base::UTF16ToUTF8(url);
|
| +
|
| + // If the link cannot be converted to a pp::Var, then it is not possible to
|
| + // pass it to JS. In this case, ignore the link like other PDF viewers.
|
| + // See http://crbug.com/312882 for an example.
|
| + pp::Var link_var(link.url);
|
| + if (!link_var.is_string())
|
| + continue;
|
| +
|
| + // Make sure all the characters in the URL are valid per RFC 1738.
|
| + // http://crbug.com/340326 has a sample bad PDF.
|
| + // GURL does not work correctly, e.g. it just strips \t \r \n.
|
| + bool is_invalid_url = false;
|
| + for (size_t j = 0; j < link.url.length(); ++j) {
|
| + // Control characters are not allowed.
|
| + // 0x7F is also a control character.
|
| + // 0x80 and above are not in US-ASCII.
|
| + if (link.url[j] < ' ' || link.url[j] >= '\x7F') {
|
| + is_invalid_url = true;
|
| + break;
|
| + }
|
| + }
|
| + if (is_invalid_url)
|
| + continue;
|
| +
|
| + int rect_count = FPDFLink_CountRects(links, i);
|
| + for (int j = 0; j < rect_count; ++j) {
|
| + double left, top, right, bottom;
|
| + FPDFLink_GetRect(links, i, j, &left, &top, &right, &bottom);
|
| + link.rects.push_back(
|
| + PageToScreen(pp::Point(), 1.0, left, top, right, bottom, 0));
|
| + }
|
| + links_.push_back(link);
|
| + }
|
| + FPDFLink_CloseWebLinks(links);
|
| +}
|
| +
|
| +pp::Rect PDFiumPage::PageToScreen(const pp::Point& offset,
|
| + double zoom,
|
| + double left,
|
| + double top,
|
| + double right,
|
| + double bottom,
|
| + int rotation) {
|
| + if (!available_)
|
| + return pp::Rect();
|
| +
|
| + int new_left, new_top, new_right, new_bottom;
|
| + FPDF_PageToDevice(
|
| + page_,
|
| + static_cast<int>((rect_.x() - offset.x()) * zoom),
|
| + static_cast<int>((rect_.y() - offset.y()) * zoom),
|
| + static_cast<int>(ceil(rect_.width() * zoom)),
|
| + static_cast<int>(ceil(rect_.height() * zoom)),
|
| + rotation, left, top, &new_left, &new_top);
|
| + FPDF_PageToDevice(
|
| + page_,
|
| + static_cast<int>((rect_.x() - offset.x()) * zoom),
|
| + static_cast<int>((rect_.y() - offset.y()) * zoom),
|
| + static_cast<int>(ceil(rect_.width() * zoom)),
|
| + static_cast<int>(ceil(rect_.height() * zoom)),
|
| + rotation, right, bottom, &new_right, &new_bottom);
|
| +
|
| + // If the PDF is rotated, the horizontal/vertical coordinates could be
|
| + // flipped. See
|
| + // http://www.netl.doe.gov/publications/proceedings/03/ubc/presentations/Goeckner-pres.pdf
|
| + if (new_right < new_left)
|
| + std::swap(new_right, new_left);
|
| + if (new_bottom < new_top)
|
| + std::swap(new_bottom, new_top);
|
| +
|
| + return pp::Rect(
|
| + new_left, new_top, new_right - new_left + 1, new_bottom - new_top + 1);
|
| +}
|
| +
|
| +PDFiumPage::Link::Link() {
|
| +}
|
| +
|
| +PDFiumPage::Link::~Link() {
|
| +}
|
| +
|
| +} // namespace chrome_pdf
|
|
|
| Property changes on: pdf\pdfium\pdfium_page.cc
|
| ___________________________________________________________________
|
| Added: svn:eol-style
|
| + LF
|
|
|
|
|