| Index: core/include/fpdftext/fpdf_text.h
|
| diff --git a/core/include/fpdftext/fpdf_text.h b/core/include/fpdftext/fpdf_text.h
|
| index 2a5e4ac4bd5e863e5d146989f5fbfe9648b70fd8..58df4b6751380e136f55a9f4bd261983bd4eef7d 100644
|
| --- a/core/include/fpdftext/fpdf_text.h
|
| +++ b/core/include/fpdftext/fpdf_text.h
|
| @@ -1,7 +1,7 @@
|
| // Copyright 2014 PDFium Authors. All rights reserved.
|
| // Use of this source code is governed by a BSD-style license that can be
|
| // found in the LICENSE file.
|
| -
|
| +
|
| // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
|
|
|
| #ifndef _FPDF_TEXT_H_
|
| @@ -16,158 +16,173 @@
|
| #include "../fpdfapi/fpdf_page.h"
|
| #endif
|
| class CPDF_PageObjects;
|
| -#define PDF2TXT_AUTO_ROTATE 1
|
| -#define PDF2TXT_AUTO_WIDTH 2
|
| -#define PDF2TXT_KEEP_COLUMN 4
|
| -#define PDF2TXT_USE_OCR 8
|
| -#define PDF2TXT_INCLUDE_INVISIBLE 16
|
| -void PDF_GetPageText(CFX_ByteStringArray& lines, CPDF_Document* pDoc, CPDF_Dictionary* pPage,
|
| - int iMinWidth, FX_DWORD flags);
|
| -void PDF_GetPageText_Unicode(CFX_WideStringArray& lines, CPDF_Document* pDoc, CPDF_Dictionary* pPage,
|
| - int iMinWidth, FX_DWORD flags);
|
| -void PDF_GetTextStream_Unicode(CFX_WideTextBuf& buffer, CPDF_Document* pDoc, CPDF_Dictionary* pPage,
|
| +#define PDF2TXT_AUTO_ROTATE 1
|
| +#define PDF2TXT_AUTO_WIDTH 2
|
| +#define PDF2TXT_KEEP_COLUMN 4
|
| +#define PDF2TXT_USE_OCR 8
|
| +#define PDF2TXT_INCLUDE_INVISIBLE 16
|
| +void PDF_GetPageText(CFX_ByteStringArray& lines,
|
| + CPDF_Document* pDoc,
|
| + CPDF_Dictionary* pPage,
|
| + int iMinWidth,
|
| + FX_DWORD flags);
|
| +void PDF_GetPageText_Unicode(CFX_WideStringArray& lines,
|
| + CPDF_Document* pDoc,
|
| + CPDF_Dictionary* pPage,
|
| + int iMinWidth,
|
| + FX_DWORD flags);
|
| +void PDF_GetTextStream_Unicode(CFX_WideTextBuf& buffer,
|
| + CPDF_Document* pDoc,
|
| + CPDF_Dictionary* pPage,
|
| FX_DWORD flags);
|
| -CFX_WideString PDF_GetFirstTextLine_Unicode(CPDF_Document* pDoc, CPDF_Dictionary* pPage);
|
| +CFX_WideString PDF_GetFirstTextLine_Unicode(CPDF_Document* pDoc,
|
| + CPDF_Dictionary* pPage);
|
| class IPDF_TextPage;
|
| class IPDF_LinkExtract;
|
| class IPDF_TextPageFind;
|
| -#define CHAR_ERROR -1
|
| -#define CHAR_NORMAL 0
|
| -#define CHAR_GENERATED 1
|
| -#define CHAR_UNUNICODE 2
|
| +#define CHAR_ERROR -1
|
| +#define CHAR_NORMAL 0
|
| +#define CHAR_GENERATED 1
|
| +#define CHAR_UNUNICODE 2
|
| typedef struct {
|
| - FX_WCHAR m_Unicode;
|
| - FX_WCHAR m_Charcode;
|
| - FX_INT32 m_Flag;
|
| - FX_FLOAT m_FontSize;
|
| - FX_FLOAT m_OriginX;
|
| - FX_FLOAT m_OriginY;
|
| - CFX_FloatRect m_CharBox;
|
| - CPDF_TextObject* m_pTextObj;
|
| - CFX_AffineMatrix m_Matrix;
|
| + FX_WCHAR m_Unicode;
|
| + FX_WCHAR m_Charcode;
|
| + FX_INT32 m_Flag;
|
| + FX_FLOAT m_FontSize;
|
| + FX_FLOAT m_OriginX;
|
| + FX_FLOAT m_OriginY;
|
| + CFX_FloatRect m_CharBox;
|
| + CPDF_TextObject* m_pTextObj;
|
| + CFX_AffineMatrix m_Matrix;
|
| } FPDF_CHAR_INFO;
|
| -typedef CFX_ArrayTemplate<CFX_FloatRect> CFX_RectArray;
|
| -#define FPDFTEXT_LRTB 0
|
| -#define FPDFTEXT_RLTB 1
|
| -#define FPDFTEXT_TBRL 2
|
| -#define FPDFTEXT_LEFT -1
|
| -#define FPDFTEXT_RIGHT 1
|
| -#define FPDFTEXT_UP -2
|
| -#define FPDFTEXT_DOWN 2
|
| +typedef CFX_ArrayTemplate<CFX_FloatRect> CFX_RectArray;
|
| +#define FPDFTEXT_LRTB 0
|
| +#define FPDFTEXT_RLTB 1
|
| +#define FPDFTEXT_TBRL 2
|
| +#define FPDFTEXT_LEFT -1
|
| +#define FPDFTEXT_RIGHT 1
|
| +#define FPDFTEXT_UP -2
|
| +#define FPDFTEXT_DOWN 2
|
| class IPDF_ReflowedPage;
|
| -#define FPDFTEXT_WRITINGMODE_UNKNOW 0
|
| -#define FPDFTEXT_WRITINGMODE_LRTB 1
|
| -#define FPDFTEXT_WRITINGMODE_RLTB 2
|
| -#define FPDFTEXT_WRITINGMODE_TBRL 3
|
| -class CPDFText_ParseOptions : public CFX_Object
|
| -{
|
| -public:
|
| -
|
| - CPDFText_ParseOptions();
|
| - FX_BOOL m_bGetCharCodeOnly;
|
| - FX_BOOL m_bNormalizeObjs;
|
| - FX_BOOL m_bOutputHyphen;
|
| +#define FPDFTEXT_WRITINGMODE_UNKNOW 0
|
| +#define FPDFTEXT_WRITINGMODE_LRTB 1
|
| +#define FPDFTEXT_WRITINGMODE_RLTB 2
|
| +#define FPDFTEXT_WRITINGMODE_TBRL 3
|
| +class CPDFText_ParseOptions : public CFX_Object {
|
| + public:
|
| + CPDFText_ParseOptions();
|
| + FX_BOOL m_bGetCharCodeOnly;
|
| + FX_BOOL m_bNormalizeObjs;
|
| + FX_BOOL m_bOutputHyphen;
|
| };
|
| -class IPDF_TextPage : public CFX_Object
|
| -{
|
| -public:
|
| -
|
| - virtual ~IPDF_TextPage() {}
|
| - static IPDF_TextPage* CreateTextPage(const CPDF_Page* pPage, CPDFText_ParseOptions ParserOptions);
|
| - static IPDF_TextPage* CreateTextPage(const CPDF_Page* pPage, int flags = 0);
|
| - static IPDF_TextPage* CreateTextPage(const CPDF_PageObjects* pObjs, int flags = 0);
|
| - static IPDF_TextPage* CreateReflowTextPage(IPDF_ReflowedPage* pRefPage);
|
| -
|
| - virtual void NormalizeObjects(FX_BOOL bNormalize) = 0;
|
| -
|
| - virtual FX_BOOL ParseTextPage() = 0;
|
| -
|
| +class IPDF_TextPage : public CFX_Object {
|
| + public:
|
| + virtual ~IPDF_TextPage() {}
|
| + static IPDF_TextPage* CreateTextPage(const CPDF_Page* pPage,
|
| + CPDFText_ParseOptions ParserOptions);
|
| + static IPDF_TextPage* CreateTextPage(const CPDF_Page* pPage, int flags = 0);
|
| + static IPDF_TextPage* CreateTextPage(const CPDF_PageObjects* pObjs,
|
| + int flags = 0);
|
| + static IPDF_TextPage* CreateReflowTextPage(IPDF_ReflowedPage* pRefPage);
|
|
|
| - virtual FX_BOOL IsParsered() const = 0;
|
| -public:
|
| + virtual void NormalizeObjects(FX_BOOL bNormalize) = 0;
|
|
|
| - virtual int CharIndexFromTextIndex(int TextIndex) const = 0;
|
| + virtual FX_BOOL ParseTextPage() = 0;
|
|
|
| - virtual int TextIndexFromCharIndex(int CharIndex) const = 0;
|
| + virtual FX_BOOL IsParsered() const = 0;
|
|
|
| + public:
|
| + virtual int CharIndexFromTextIndex(int TextIndex) const = 0;
|
|
|
| - virtual int CountChars() const = 0;
|
| + virtual int TextIndexFromCharIndex(int CharIndex) const = 0;
|
|
|
| - virtual void GetCharInfo(int index, FPDF_CHAR_INFO & info) const = 0;
|
| + virtual int CountChars() const = 0;
|
|
|
| - virtual void GetRectArray(int start, int nCount, CFX_RectArray& rectArray) const = 0;
|
| + virtual void GetCharInfo(int index, FPDF_CHAR_INFO& info) const = 0;
|
|
|
| + virtual void GetRectArray(int start,
|
| + int nCount,
|
| + CFX_RectArray& rectArray) const = 0;
|
|
|
| + virtual int GetIndexAtPos(CPDF_Point point,
|
| + FX_FLOAT xTorelance,
|
| + FX_FLOAT yTorelance) const = 0;
|
|
|
| - virtual int GetIndexAtPos(CPDF_Point point, FX_FLOAT xTorelance, FX_FLOAT yTorelance) const = 0;
|
| + virtual int GetIndexAtPos(FX_FLOAT x,
|
| + FX_FLOAT y,
|
| + FX_FLOAT xTorelance,
|
| + FX_FLOAT yTorelance) const = 0;
|
|
|
| - virtual int GetIndexAtPos(FX_FLOAT x, FX_FLOAT y, FX_FLOAT xTorelance, FX_FLOAT yTorelance) const = 0;
|
| + virtual int GetOrderByDirection(int index, int direction) const = 0;
|
|
|
| - virtual int GetOrderByDirection(int index, int direction) const = 0;
|
| + virtual CFX_WideString GetTextByRect(CFX_FloatRect rect) const = 0;
|
|
|
| - virtual CFX_WideString GetTextByRect(CFX_FloatRect rect) const = 0;
|
| + virtual void GetRectsArrayByRect(CFX_FloatRect rect,
|
| + CFX_RectArray& resRectArray) const = 0;
|
|
|
| - virtual void GetRectsArrayByRect(CFX_FloatRect rect, CFX_RectArray& resRectArray) const = 0;
|
| + virtual int CountRects(int start, int nCount) = 0;
|
|
|
| + virtual void GetRect(int rectIndex,
|
| + FX_FLOAT& left,
|
| + FX_FLOAT& top,
|
| + FX_FLOAT& right,
|
| + FX_FLOAT& bottom) const = 0;
|
|
|
| - virtual int CountRects(int start, int nCount) = 0;
|
| + virtual FX_BOOL GetBaselineRotate(int rectIndex, int& Rotate) = 0;
|
|
|
| - virtual void GetRect(int rectIndex, FX_FLOAT& left, FX_FLOAT& top, FX_FLOAT& right, FX_FLOAT &bottom) const = 0;
|
| + virtual FX_BOOL GetBaselineRotate(CFX_FloatRect rect, int& Rotate) = 0;
|
|
|
| - virtual FX_BOOL GetBaselineRotate(int rectIndex, int& Rotate) = 0;
|
| + virtual int CountBoundedSegments(FX_FLOAT left,
|
| + FX_FLOAT top,
|
| + FX_FLOAT right,
|
| + FX_FLOAT bottom,
|
| + FX_BOOL bContains = FALSE) = 0;
|
|
|
| - virtual FX_BOOL GetBaselineRotate(CFX_FloatRect rect, int& Rotate) = 0;
|
| + virtual void GetBoundedSegment(int index, int& start, int& count) const = 0;
|
|
|
| - virtual int CountBoundedSegments(FX_FLOAT left, FX_FLOAT top, FX_FLOAT right, FX_FLOAT bottom, FX_BOOL bContains = FALSE) = 0;
|
| + virtual int GetWordBreak(int index, int direction) const = 0;
|
|
|
| - virtual void GetBoundedSegment(int index, int& start, int& count) const = 0;
|
| -
|
| -
|
| - virtual int GetWordBreak(int index, int direction) const = 0;
|
| -
|
| - virtual CFX_WideString GetPageText(int start = 0, int nCount = -1 ) const = 0;
|
| + virtual CFX_WideString GetPageText(int start = 0, int nCount = -1) const = 0;
|
| };
|
| -#define FPDFTEXT_MATCHCASE 0x00000001
|
| +#define FPDFTEXT_MATCHCASE 0x00000001
|
| #define FPDFTEXT_MATCHWHOLEWORD 0x00000002
|
| -#define FPDFTEXT_CONSECUTIVE 0x00000004
|
| -class IPDF_TextPageFind : public CFX_Object
|
| -{
|
| -public:
|
| +#define FPDFTEXT_CONSECUTIVE 0x00000004
|
| +class IPDF_TextPageFind : public CFX_Object {
|
| + public:
|
| + virtual ~IPDF_TextPageFind() {}
|
|
|
| - virtual ~IPDF_TextPageFind() {}
|
| + static IPDF_TextPageFind* CreatePageFind(const IPDF_TextPage* pTextPage);
|
|
|
| - static IPDF_TextPageFind* CreatePageFind(const IPDF_TextPage* pTextPage);
|
| -public:
|
| + public:
|
| + virtual FX_BOOL FindFirst(CFX_WideString findwhat,
|
| + int flags,
|
| + int startPos = 0) = 0;
|
|
|
| - virtual FX_BOOL FindFirst(CFX_WideString findwhat, int flags, int startPos = 0) = 0;
|
| + virtual FX_BOOL FindNext() = 0;
|
|
|
| - virtual FX_BOOL FindNext() = 0;
|
| + virtual FX_BOOL FindPrev() = 0;
|
|
|
| - virtual FX_BOOL FindPrev() = 0;
|
| + virtual void GetRectArray(CFX_RectArray& rects) const = 0;
|
|
|
| - virtual void GetRectArray(CFX_RectArray& rects) const = 0;
|
| + virtual int GetCurOrder() const = 0;
|
|
|
| - virtual int GetCurOrder() const = 0;
|
| -
|
| - virtual int GetMatchedCount() const = 0;
|
| + virtual int GetMatchedCount() const = 0;
|
| };
|
| -class IPDF_LinkExtract : public CFX_Object
|
| -{
|
| -public:
|
| -
|
| - virtual ~IPDF_LinkExtract() {}
|
| +class IPDF_LinkExtract : public CFX_Object {
|
| + public:
|
| + virtual ~IPDF_LinkExtract() {}
|
|
|
| - static IPDF_LinkExtract* CreateLinkExtract();
|
| + static IPDF_LinkExtract* CreateLinkExtract();
|
|
|
| - virtual FX_BOOL ExtractLinks(const IPDF_TextPage* pTextPage) = 0;
|
| -public:
|
| + virtual FX_BOOL ExtractLinks(const IPDF_TextPage* pTextPage) = 0;
|
|
|
| - virtual int CountLinks() const = 0;
|
| + public:
|
| + virtual int CountLinks() const = 0;
|
|
|
| - virtual CFX_WideString GetURL(int index) const = 0;
|
| + virtual CFX_WideString GetURL(int index) const = 0;
|
|
|
| - virtual void GetBoundedSegment(int index, int& start, int& count) const = 0;
|
| + virtual void GetBoundedSegment(int index, int& start, int& count) const = 0;
|
|
|
| - virtual void GetRects(int index, CFX_RectArray& rects) const = 0;
|
| + virtual void GetRects(int index, CFX_RectArray& rects) const = 0;
|
| };
|
| #endif
|
|
|