Index: pdf/pdfium/pdfium_engine.cc |
diff --git a/pdf/pdfium/pdfium_engine.cc b/pdf/pdfium/pdfium_engine.cc |
index f0fe1b26a5b03b6257e60d2ab7a3f04b8c27f49f..3ab98681ceec195606fa02c6c2cb4c7e0e396303 100644 |
--- a/pdf/pdfium/pdfium_engine.cc |
+++ b/pdf/pdfium/pdfium_engine.cc |
@@ -45,6 +45,8 @@ |
namespace chrome_pdf { |
+namespace { |
+ |
#define kPageShadowTop 3 |
#define kPageShadowBottom 7 |
#define kPageShadowLeft 5 |
@@ -458,46 +460,75 @@ void CalculateNonScaledClipBoxOffset(const pp::Rect& content_rect, int rotation, |
} |
} |
-// Do an in-place transformation of objects on |page|. Translate all objects on |
-// |page| in |source_clip_box| by (|offset_x|, |offset_y|) and scale them by |
-// |scale_factor|. |
-// |
-// |page| Handle to the page. Returned by FPDF_LoadPage function. |
-// |source_clip_box| specifies the source clip box positions, relative to |
-// origin at left-bottom. |
-// |scale_factor| specifies the scale factor that should be applied to page |
-// objects. |
-// |offset_x| and |offset_y| specifies the translation offsets for the page |
-// objects, relative to origin at left-bottom. |
- |
-void TransformPageObjects(FPDF_PAGE page, const ClipBox& source_clip_box, |
Lei Zhang
2014/09/11 23:13:10
This has been unused for some time. Putting this b
|
- const double scale_factor, double offset_x, |
- double offset_y) { |
- const int obj_count = FPDFPage_CountObject(page); |
- |
- // Create a new clip path. |
- FPDF_CLIPPATH clip_path = FPDF_CreateClipPath( |
- source_clip_box.left + offset_x, source_clip_box.bottom + offset_y, |
- source_clip_box.right + offset_x, source_clip_box.top + offset_y); |
- |
- for (int obj_idx = 0; obj_idx < obj_count; ++obj_idx) { |
- FPDF_PAGEOBJECT page_obj = FPDFPage_GetObject(page, obj_idx); |
- FPDFPageObj_Transform(page_obj, scale_factor, 0, 0, scale_factor, |
- offset_x, offset_y); |
- FPDFPageObj_TransformClipPath(page_obj, scale_factor, 0, 0, scale_factor, |
- offset_x, offset_y); |
+// This formats a string with special 0xfffe end-of-line hyphens the same way |
+// as Adobe Reader. When a hyphen is encountered, the next non-CR/LF whitespace |
+// becomes CR+LF and the hyphen is erased. If there is no whitespace between |
+// two hyphens, the latter hyphen is erased and ignored. |
+void FormatStringWithHyphens(base::string16* text) { |
+ // First pass marks all the hyphen positions. |
+ struct HyphenPosition { |
+ HyphenPosition() : position(0), next_whitespace_position(0) {} |
+ size_t position; |
+ size_t next_whitespace_position; // 0 for none |
+ }; |
+ std::vector<HyphenPosition> hyphen_positions; |
+ HyphenPosition current_hyphen_position; |
+ bool current_hyphen_position_is_valid = false; |
+ const base::char16 kPdfiumHyphenEOL = 0xfffe; |
+ |
+ for (size_t i = 0; i < text->size(); ++i) { |
+ const base::char16& current_char = (*text)[i]; |
+ if (current_char == kPdfiumHyphenEOL) { |
+ if (current_hyphen_position_is_valid) |
+ hyphen_positions.push_back(current_hyphen_position); |
+ current_hyphen_position = HyphenPosition(); |
+ current_hyphen_position.position = i; |
+ current_hyphen_position_is_valid = true; |
+ } else if (IsWhitespace(current_char)) { |
+ if (current_hyphen_position_is_valid) { |
+ if (current_char != L'\r' && current_char != L'\n') |
+ current_hyphen_position.next_whitespace_position = i; |
+ hyphen_positions.push_back(current_hyphen_position); |
+ current_hyphen_position_is_valid = false; |
+ } |
+ } |
} |
- FPDFPage_TransformAnnots(page, scale_factor, 0, 0, scale_factor, |
- offset_x, offset_y); |
- FPDFPage_GenerateContent(page); |
+ if (current_hyphen_position_is_valid) |
+ hyphen_positions.push_back(current_hyphen_position); |
- // Add a extra clip path to the new pdf page here. |
- FPDFPage_InsertClipPath(page, clip_path); |
- |
- // Destroy the clip path. |
- FPDF_DestroyClipPath(clip_path); |
+ // With all the hyphen positions, do the search and replace. |
+ while (!hyphen_positions.empty()) { |
+ static const base::char16 kCr[] = {L'\r', L'\0'}; |
+ const HyphenPosition& position = hyphen_positions.back(); |
+ if (position.next_whitespace_position != 0) { |
+ (*text)[position.next_whitespace_position] = L'\n'; |
+ text->insert(position.next_whitespace_position, kCr); |
+ } |
+ text->erase(position.position, 1); |
+ hyphen_positions.pop_back(); |
+ } |
+ |
+ // Adobe Reader also get rid of trailing spaces right before a CRLF. |
+ static const base::char16 kSpaceCrCn[] = {L' ', L'\r', L'\n', L'\0'}; |
+ static const base::char16 kCrCn[] = {L'\r', L'\n', L'\0'}; |
+ ReplaceSubstringsAfterOffset(text, 0, kSpaceCrCn, kCrCn); |
+} |
+ |
+// Replace CR/LF with just LF on POSIX. |
+void FormatStringForOS(base::string16* text) { |
+#if defined(OS_POSIX) |
+ static const base::char16 kCr[] = {L'\r', L'\0'}; |
+ static const base::char16 kBlank[] = {L'\0'}; |
+ base::ReplaceChars(*text, kCr, kBlank, text); |
+#elif defined(OS_WIN) |
+ // Do nothing |
+#else |
+ NOTIMPLEMENTED(); |
+#endif |
} |
+} // namespace |
+ |
bool InitializeSDK(void* data) { |
FPDF_InitLibrary(data); |
@@ -1861,6 +1892,8 @@ std::string PDFiumEngine::GetSelectedText() { |
} |
} |
+ FormatStringWithHyphens(&result); |
+ FormatStringForOS(&result); |
return base::UTF16ToUTF8(result); |
} |