| Index: pdf/pdfium/pdfium_page.cc
|
| diff --git a/pdf/pdfium/pdfium_page.cc b/pdf/pdfium/pdfium_page.cc
|
| index 3b7787f08f226622945255dcb5b8b093ff5adf9f..e95296986d823d84491fdd0fd8cdeea7ea80daf2 100644
|
| --- a/pdf/pdfium/pdfium_page.cc
|
| +++ b/pdf/pdfium/pdfium_page.cc
|
| @@ -15,7 +15,6 @@
|
| #include "base/strings/string_number_conversions.h"
|
| #include "base/strings/string_util.h"
|
| #include "base/strings/utf_string_conversions.h"
|
| -#include "base/values.h"
|
| #include "pdf/pdfium/pdfium_api_string_buffer_adapter.h"
|
| #include "pdf/pdfium/pdfium_engine.h"
|
| #include "printing/units.h"
|
| @@ -29,43 +28,6 @@ using printing::kPixelsPerInch;
|
|
|
| namespace {
|
|
|
| -// Dictionary Value key names for returning the accessible page content as JSON.
|
| -const char kPageWidth[] = "width";
|
| -const char kPageHeight[] = "height";
|
| -const char kPageTextBox[] = "textBox";
|
| -const char kTextBoxLeft[] = "left";
|
| -const char kTextBoxTop[] = "top";
|
| -const char kTextBoxWidth[] = "width";
|
| -const char kTextBoxHeight[] = "height";
|
| -const char kTextBoxFontSize[] = "fontSize";
|
| -const char kTextBoxNodes[] = "textNodes";
|
| -const char kTextNodeType[] = "type";
|
| -const char kTextNodeText[] = "text";
|
| -const char kTextNodeTypeText[] = "text";
|
| -
|
| -pp::Rect PageRectToGViewRect(FPDF_PAGE page, const pp::Rect& input) {
|
| - int output_width = FPDF_GetPageWidth(page);
|
| - int output_height = FPDF_GetPageHeight(page);
|
| -
|
| - int min_x;
|
| - int min_y;
|
| - int max_x;
|
| - int max_y;
|
| - FPDF_PageToDevice(page, 0, 0, output_width, output_height, 0,
|
| - input.x(), input.y(), &min_x, &min_y);
|
| - FPDF_PageToDevice(page, 0, 0, output_width, output_height, 0,
|
| - input.right(), input.bottom(), &max_x, &max_y);
|
| -
|
| - if (max_x < min_x)
|
| - std::swap(min_x, max_x);
|
| - if (max_y < min_y)
|
| - std::swap(min_y, max_y);
|
| -
|
| - pp::Rect output_rect(min_x, min_y, max_x - min_x, max_y - min_y);
|
| - output_rect.Intersect(pp::Rect(0, 0, output_width, output_height));
|
| - return output_rect;
|
| -}
|
| -
|
| pp::FloatRect FloatPageRectToPixelRect(FPDF_PAGE page,
|
| const pp::FloatRect& input) {
|
| int output_width = FPDF_GetPageWidth(page);
|
| @@ -93,18 +55,6 @@ pp::FloatRect FloatPageRectToPixelRect(FPDF_PAGE page,
|
| return output_rect;
|
| }
|
|
|
| -pp::Rect GetCharRectInGViewCoords(FPDF_PAGE page, FPDF_TEXTPAGE text_page,
|
| - int index) {
|
| - double left, right, bottom, top;
|
| - FPDFText_GetCharBox(text_page, index, &left, &right, &bottom, &top);
|
| - if (right < left)
|
| - std::swap(left, right);
|
| - if (bottom < top)
|
| - std::swap(top, bottom);
|
| - pp::Rect page_coords(left, top, right - left, bottom - top);
|
| - return PageRectToGViewRect(page, page_coords);
|
| -}
|
| -
|
| pp::FloatRect GetFloatCharRectInPixels(FPDF_PAGE page,
|
| FPDF_TEXTPAGE text_page,
|
| int index) {
|
| @@ -118,43 +68,11 @@ pp::FloatRect GetFloatCharRectInPixels(FPDF_PAGE page,
|
| return FloatPageRectToPixelRect(page, page_coords);
|
| }
|
|
|
| -// This is the character PDFium inserts where a word is broken across lines.
|
| -const unsigned int kSoftHyphen = 0x02;
|
| -
|
| -// The following characters should all be recognized as Unicode newlines:
|
| -// LF: Line Feed, U+000A
|
| -// VT: Vertical Tab, U+000B
|
| -// FF: Form Feed, U+000C
|
| -// CR: Carriage Return, U+000D
|
| -// CR+LF: CR (U+000D) followed by LF (U+000A)
|
| -// NEL: Next Line, U+0085
|
| -// LS: Line Separator, U+2028
|
| -// PS: Paragraph Separator, U+2029.
|
| -// Source: http://en.wikipedia.org/wiki/Newline#Unicode .
|
| -const unsigned int kUnicodeNewlines[] = {
|
| - 0xA, 0xB, 0xC, 0xD, 0X85, 0x2028, 0x2029
|
| -};
|
| -
|
| -bool IsSoftHyphen(unsigned int character) {
|
| - return kSoftHyphen == character;
|
| -}
|
| -
|
| -bool OverlapsOnYAxis(const pp::Rect &a, const pp::Rect& b) {
|
| - return !(a.IsEmpty() || b.IsEmpty() ||
|
| - a.bottom() < b.y() || b.bottom() < a.y());
|
| -}
|
| -
|
| bool OverlapsOnYAxis(const pp::FloatRect &a, const pp::FloatRect& b) {
|
| return !(a.IsEmpty() || b.IsEmpty() ||
|
| a.bottom() < b.y() || b.bottom() < a.y());
|
| }
|
|
|
| -bool IsEol(unsigned int character) {
|
| - const unsigned int* first = kUnicodeNewlines;
|
| - const unsigned int* last = kUnicodeNewlines + arraysize(kUnicodeNewlines);
|
| - return std::find(first, last, character) != last;
|
| -}
|
| -
|
| } // namespace
|
|
|
| namespace chrome_pdf {
|
| @@ -242,116 +160,6 @@ FPDF_TEXTPAGE PDFiumPage::GetTextPage() {
|
| return text_page_;
|
| }
|
|
|
| -base::Value* PDFiumPage::GetAccessibleContentAsValue(int rotation) {
|
| - base::DictionaryValue* node = new base::DictionaryValue();
|
| -
|
| - if (!available_)
|
| - return node;
|
| -
|
| - FPDF_PAGE page = GetPage();
|
| - FPDF_TEXTPAGE text_page = GetTextPage();
|
| -
|
| - double width = FPDF_GetPageWidth(page);
|
| - double height = FPDF_GetPageHeight(page);
|
| -
|
| - node->SetDouble(kPageWidth, width);
|
| - node->SetDouble(kPageHeight, height);
|
| - std::unique_ptr<base::ListValue> text(new base::ListValue());
|
| -
|
| - int chars_count = FPDFText_CountChars(text_page);
|
| - pp::Rect line_rect;
|
| - pp::Rect word_rect;
|
| - bool seen_literal_text_in_word = false;
|
| -
|
| - // Iterate over all of the chars on the page. Explicitly run the loop
|
| - // with |i == chars_count|, which is one past the last character, and
|
| - // pretend it's a newline character in order to ensure we always flush
|
| - // the last line.
|
| - base::string16 line;
|
| - for (int i = 0; i <= chars_count; i++) {
|
| - unsigned int character;
|
| - pp::Rect char_rect;
|
| -
|
| - if (i < chars_count) {
|
| - character = FPDFText_GetUnicode(text_page, i);
|
| - char_rect = GetCharRectInGViewCoords(page, text_page, i);
|
| - } else {
|
| - // Make the last character a newline so the last line isn't lost.
|
| - character = '\n';
|
| - }
|
| -
|
| - // There are spurious STX chars appearing in place
|
| - // of ligatures. Apply a heuristic to check that some vertical displacement
|
| - // is involved before assuming they are line-breaks.
|
| - bool is_intraword_linebreak = false;
|
| - if (i < chars_count - 1 && IsSoftHyphen(character)) {
|
| - // check if the next char and this char are in different lines.
|
| - pp::Rect next_char_rect = GetCharRectInGViewCoords(
|
| - page, text_page, i + 1);
|
| -
|
| - // TODO(dmazzoni): this assumes horizontal text.
|
| - // https://crbug.com/580311
|
| - is_intraword_linebreak = !OverlapsOnYAxis(char_rect, next_char_rect);
|
| - }
|
| - if (is_intraword_linebreak ||
|
| - base::IsUnicodeWhitespace(character) ||
|
| - IsEol(character)) {
|
| - if (!word_rect.IsEmpty() && seen_literal_text_in_word) {
|
| - word_rect = pp::Rect();
|
| - seen_literal_text_in_word = false;
|
| - }
|
| - }
|
| -
|
| - if (is_intraword_linebreak || IsEol(character)) {
|
| - if (!line_rect.IsEmpty()) {
|
| - if (is_intraword_linebreak) {
|
| - // Add a 0-width hyphen.
|
| - line.push_back('-');
|
| - }
|
| -
|
| - std::unique_ptr<base::DictionaryValue> text_node(
|
| - new base::DictionaryValue());
|
| - text_node->SetString(kTextNodeType, kTextNodeTypeText);
|
| - text_node->SetString(kTextNodeText, line);
|
| -
|
| - base::ListValue* text_nodes = new base::ListValue();
|
| - text_nodes->Append(std::move(text_node));
|
| -
|
| - std::unique_ptr<base::DictionaryValue> line_node(
|
| - new base::DictionaryValue());
|
| - line_node->SetDouble(kTextBoxLeft, line_rect.x());
|
| - line_node->SetDouble(kTextBoxTop, line_rect.y());
|
| - line_node->SetDouble(kTextBoxWidth, line_rect.width());
|
| - line_node->SetDouble(kTextBoxHeight, line_rect.height());
|
| - line_node->SetDouble(kTextBoxFontSize,
|
| - FPDFText_GetFontSize(text_page, i));
|
| - line_node->Set(kTextBoxNodes, text_nodes);
|
| - text->Append(std::move(line_node));
|
| -
|
| - line.clear();
|
| - line_rect = pp::Rect();
|
| - word_rect = pp::Rect();
|
| - seen_literal_text_in_word = false;
|
| - }
|
| - continue;
|
| - }
|
| - seen_literal_text_in_word = seen_literal_text_in_word ||
|
| - !base::IsUnicodeWhitespace(character);
|
| - line.push_back(character);
|
| -
|
| - if (!char_rect.IsEmpty()) {
|
| - line_rect = line_rect.Union(char_rect);
|
| -
|
| - if (!base::IsUnicodeWhitespace(character))
|
| - word_rect = word_rect.Union(char_rect);
|
| - }
|
| - }
|
| -
|
| - node->Set(kPageTextBox, text.release()); // Takes ownership of |text|
|
| -
|
| - return node;
|
| -}
|
| -
|
| void PDFiumPage::GetTextRunInfo(int start_char_index,
|
| uint32_t* out_len,
|
| double* out_font_size,
|
|
|