| OLD | NEW |
| 1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
| 6 | 6 |
| 7 #ifndef _FPDF_TEXT_H_ | 7 #ifndef _FPDF_TEXT_H_ |
| 8 #define _FPDF_TEXT_H_ | 8 #define _FPDF_TEXT_H_ |
| 9 #ifndef _FPDF_PARSER_ | 9 #ifndef _FPDF_PARSER_ |
| 10 #include "../fpdfapi/fpdf_parser.h" | 10 #include "../fpdfapi/fpdf_parser.h" |
| 11 #endif | 11 #endif |
| 12 #ifndef _FPDF_PAGEOBJ_H_ | 12 #ifndef _FPDF_PAGEOBJ_H_ |
| 13 #include "../fpdfapi/fpdf_pageobj.h" | 13 #include "../fpdfapi/fpdf_pageobj.h" |
| 14 #endif | 14 #endif |
| 15 #ifndef _FPDF_PAGE_ | 15 #ifndef _FPDF_PAGE_ |
| 16 #include "../fpdfapi/fpdf_page.h" | 16 #include "../fpdfapi/fpdf_page.h" |
| 17 #endif | 17 #endif |
| 18 class CPDF_PageObjects; | 18 class CPDF_PageObjects; |
| 19 #define PDF2TXT_AUTO_ROTATE» » 1 | 19 #define PDF2TXT_AUTO_ROTATE 1 |
| 20 #define PDF2TXT_AUTO_WIDTH» » 2 | 20 #define PDF2TXT_AUTO_WIDTH 2 |
| 21 #define PDF2TXT_KEEP_COLUMN» » 4 | 21 #define PDF2TXT_KEEP_COLUMN 4 |
| 22 #define PDF2TXT_USE_OCR»» » 8 | 22 #define PDF2TXT_USE_OCR 8 |
| 23 #define PDF2TXT_INCLUDE_INVISIBLE» 16 | 23 #define PDF2TXT_INCLUDE_INVISIBLE 16 |
| 24 void PDF_GetPageText(CFX_ByteStringArray& lines, CPDF_Document* pDoc, CPDF_Dicti
onary* pPage, | 24 void PDF_GetPageText(CFX_ByteStringArray& lines, |
| 25 int iMinWidth, FX_DWORD flags); | 25 CPDF_Document* pDoc, |
| 26 void PDF_GetPageText_Unicode(CFX_WideStringArray& lines, CPDF_Document* pDoc, CP
DF_Dictionary* pPage, | 26 CPDF_Dictionary* pPage, |
| 27 int iMinWidth, FX_DWORD flags); | 27 int iMinWidth, |
| 28 void PDF_GetTextStream_Unicode(CFX_WideTextBuf& buffer, CPDF_Document* pDoc, CPD
F_Dictionary* pPage, | 28 FX_DWORD flags); |
| 29 void PDF_GetPageText_Unicode(CFX_WideStringArray& lines, |
| 30 CPDF_Document* pDoc, |
| 31 CPDF_Dictionary* pPage, |
| 32 int iMinWidth, |
| 33 FX_DWORD flags); |
| 34 void PDF_GetTextStream_Unicode(CFX_WideTextBuf& buffer, |
| 35 CPDF_Document* pDoc, |
| 36 CPDF_Dictionary* pPage, |
| 29 FX_DWORD flags); | 37 FX_DWORD flags); |
| 30 CFX_WideString PDF_GetFirstTextLine_Unicode(CPDF_Document* pDoc, CPDF_Dictionary
* pPage); | 38 CFX_WideString PDF_GetFirstTextLine_Unicode(CPDF_Document* pDoc, |
| 39 CPDF_Dictionary* pPage); |
| 31 class IPDF_TextPage; | 40 class IPDF_TextPage; |
| 32 class IPDF_LinkExtract; | 41 class IPDF_LinkExtract; |
| 33 class IPDF_TextPageFind; | 42 class IPDF_TextPageFind; |
| 34 #define CHAR_ERROR» » » -1 | 43 #define CHAR_ERROR -1 |
| 35 #define CHAR_NORMAL» » » 0 | 44 #define CHAR_NORMAL 0 |
| 36 #define CHAR_GENERATED» » 1 | 45 #define CHAR_GENERATED 1 |
| 37 #define CHAR_UNUNICODE» » 2 | 46 #define CHAR_UNUNICODE 2 |
| 38 typedef struct { | 47 typedef struct { |
| 39 FX_WCHAR» » » m_Unicode; | 48 FX_WCHAR m_Unicode; |
| 40 FX_WCHAR» » » m_Charcode; | 49 FX_WCHAR m_Charcode; |
| 41 FX_INT32» » » m_Flag; | 50 FX_INT32 m_Flag; |
| 42 FX_FLOAT» » » m_FontSize; | 51 FX_FLOAT m_FontSize; |
| 43 FX_FLOAT» » » m_OriginX; | 52 FX_FLOAT m_OriginX; |
| 44 FX_FLOAT» » » m_OriginY; | 53 FX_FLOAT m_OriginY; |
| 45 CFX_FloatRect» » m_CharBox; | 54 CFX_FloatRect m_CharBox; |
| 46 CPDF_TextObject*» m_pTextObj; | 55 CPDF_TextObject* m_pTextObj; |
| 47 CFX_AffineMatrix» m_Matrix; | 56 CFX_AffineMatrix m_Matrix; |
| 48 } FPDF_CHAR_INFO; | 57 } FPDF_CHAR_INFO; |
| 49 typedef»CFX_ArrayTemplate<CFX_FloatRect> CFX_RectArray; | 58 typedef CFX_ArrayTemplate<CFX_FloatRect> CFX_RectArray; |
| 50 #define FPDFTEXT_LRTB» 0 | 59 #define FPDFTEXT_LRTB 0 |
| 51 #define FPDFTEXT_RLTB» 1 | 60 #define FPDFTEXT_RLTB 1 |
| 52 #define FPDFTEXT_TBRL» 2 | 61 #define FPDFTEXT_TBRL 2 |
| 53 #define FPDFTEXT_LEFT» » » -1 | 62 #define FPDFTEXT_LEFT -1 |
| 54 #define FPDFTEXT_RIGHT» » » 1 | 63 #define FPDFTEXT_RIGHT 1 |
| 55 #define FPDFTEXT_UP» » » » -2 | 64 #define FPDFTEXT_UP -2 |
| 56 #define FPDFTEXT_DOWN» » » 2 | 65 #define FPDFTEXT_DOWN 2 |
| 57 class IPDF_ReflowedPage; | 66 class IPDF_ReflowedPage; |
| 58 #define FPDFTEXT_WRITINGMODE_UNKNOW» 0 | 67 #define FPDFTEXT_WRITINGMODE_UNKNOW 0 |
| 59 #define FPDFTEXT_WRITINGMODE_LRTB» 1 | 68 #define FPDFTEXT_WRITINGMODE_LRTB 1 |
| 60 #define FPDFTEXT_WRITINGMODE_RLTB» 2 | 69 #define FPDFTEXT_WRITINGMODE_RLTB 2 |
| 61 #define FPDFTEXT_WRITINGMODE_TBRL» 3 | 70 #define FPDFTEXT_WRITINGMODE_TBRL 3 |
| 62 class CPDFText_ParseOptions : public CFX_Object | 71 class CPDFText_ParseOptions : public CFX_Object { |
| 63 { | 72 public: |
| 64 public: | 73 CPDFText_ParseOptions(); |
| 74 FX_BOOL m_bGetCharCodeOnly; |
| 75 FX_BOOL m_bNormalizeObjs; |
| 76 FX_BOOL m_bOutputHyphen; |
| 77 }; |
| 78 class IPDF_TextPage : public CFX_Object { |
| 79 public: |
| 80 virtual ~IPDF_TextPage() {} |
| 81 static IPDF_TextPage* CreateTextPage(const CPDF_Page* pPage, |
| 82 CPDFText_ParseOptions ParserOptions); |
| 83 static IPDF_TextPage* CreateTextPage(const CPDF_Page* pPage, int flags = 0); |
| 84 static IPDF_TextPage* CreateTextPage(const CPDF_PageObjects* pObjs, |
| 85 int flags = 0); |
| 86 static IPDF_TextPage* CreateReflowTextPage(IPDF_ReflowedPage* pRefPage); |
| 65 | 87 |
| 66 CPDFText_ParseOptions(); | 88 virtual void NormalizeObjects(FX_BOOL bNormalize) = 0; |
| 67 FX_BOOL» » » m_bGetCharCodeOnly; | 89 |
| 68 FX_BOOL» » » m_bNormalizeObjs; | 90 virtual FX_BOOL ParseTextPage() = 0; |
| 69 FX_BOOL» » » m_bOutputHyphen; | 91 |
| 92 virtual FX_BOOL IsParsered() const = 0; |
| 93 |
| 94 public: |
| 95 virtual int CharIndexFromTextIndex(int TextIndex) const = 0; |
| 96 |
| 97 virtual int TextIndexFromCharIndex(int CharIndex) const = 0; |
| 98 |
| 99 virtual int CountChars() const = 0; |
| 100 |
| 101 virtual void GetCharInfo(int index, FPDF_CHAR_INFO& info) const = 0; |
| 102 |
| 103 virtual void GetRectArray(int start, |
| 104 int nCount, |
| 105 CFX_RectArray& rectArray) const = 0; |
| 106 |
| 107 virtual int GetIndexAtPos(CPDF_Point point, |
| 108 FX_FLOAT xTorelance, |
| 109 FX_FLOAT yTorelance) const = 0; |
| 110 |
| 111 virtual int GetIndexAtPos(FX_FLOAT x, |
| 112 FX_FLOAT y, |
| 113 FX_FLOAT xTorelance, |
| 114 FX_FLOAT yTorelance) const = 0; |
| 115 |
| 116 virtual int GetOrderByDirection(int index, int direction) const = 0; |
| 117 |
| 118 virtual CFX_WideString GetTextByRect(CFX_FloatRect rect) const = 0; |
| 119 |
| 120 virtual void GetRectsArrayByRect(CFX_FloatRect rect, |
| 121 CFX_RectArray& resRectArray) const = 0; |
| 122 |
| 123 virtual int CountRects(int start, int nCount) = 0; |
| 124 |
| 125 virtual void GetRect(int rectIndex, |
| 126 FX_FLOAT& left, |
| 127 FX_FLOAT& top, |
| 128 FX_FLOAT& right, |
| 129 FX_FLOAT& bottom) const = 0; |
| 130 |
| 131 virtual FX_BOOL GetBaselineRotate(int rectIndex, int& Rotate) = 0; |
| 132 |
| 133 virtual FX_BOOL GetBaselineRotate(CFX_FloatRect rect, int& Rotate) = 0; |
| 134 |
| 135 virtual int CountBoundedSegments(FX_FLOAT left, |
| 136 FX_FLOAT top, |
| 137 FX_FLOAT right, |
| 138 FX_FLOAT bottom, |
| 139 FX_BOOL bContains = FALSE) = 0; |
| 140 |
| 141 virtual void GetBoundedSegment(int index, int& start, int& count) const = 0; |
| 142 |
| 143 virtual int GetWordBreak(int index, int direction) const = 0; |
| 144 |
| 145 virtual CFX_WideString GetPageText(int start = 0, int nCount = -1) const = 0; |
| 70 }; | 146 }; |
| 71 class IPDF_TextPage : public CFX_Object | 147 #define FPDFTEXT_MATCHCASE 0x00000001 |
| 72 { | 148 #define FPDFTEXT_MATCHWHOLEWORD 0x00000002 |
| 73 public: | 149 #define FPDFTEXT_CONSECUTIVE 0x00000004 |
| 150 class IPDF_TextPageFind : public CFX_Object { |
| 151 public: |
| 152 virtual ~IPDF_TextPageFind() {} |
| 74 | 153 |
| 75 virtual ~IPDF_TextPage() {} | 154 static IPDF_TextPageFind* CreatePageFind(const IPDF_TextPage* pTextPage); |
| 76 static IPDF_TextPage*» CreateTextPage(const CPDF_Page* pPage, CPDFText_
ParseOptions ParserOptions); | |
| 77 static IPDF_TextPage*» CreateTextPage(const CPDF_Page* pPage, int flags
= 0); | |
| 78 static IPDF_TextPage*» CreateTextPage(const CPDF_PageObjects* pObjs, in
t flags = 0); | |
| 79 static IPDF_TextPage*» CreateReflowTextPage(IPDF_ReflowedPage* pRefPage
); | |
| 80 | 155 |
| 81 virtual void» » » NormalizeObjects(FX_BOOL bNormalize) = 0
; | 156 public: |
| 157 virtual FX_BOOL FindFirst(CFX_WideString findwhat, |
| 158 int flags, |
| 159 int startPos = 0) = 0; |
| 82 | 160 |
| 83 virtual FX_BOOL» » » ParseTextPage() = 0; | 161 virtual FX_BOOL FindNext() = 0; |
| 84 | 162 |
| 163 virtual FX_BOOL FindPrev() = 0; |
| 85 | 164 |
| 86 virtual FX_BOOL» » » IsParsered() const = 0; | 165 virtual void GetRectArray(CFX_RectArray& rects) const = 0; |
| 87 public: | |
| 88 | 166 |
| 89 virtual int CharIndexFromTextIndex(int TextIndex) const = 0; | 167 virtual int GetCurOrder() const = 0; |
| 90 | 168 |
| 91 virtual int TextIndexFromCharIndex(int CharIndex) const = 0; | 169 virtual int GetMatchedCount() const = 0; |
| 170 }; |
| 171 class IPDF_LinkExtract : public CFX_Object { |
| 172 public: |
| 173 virtual ~IPDF_LinkExtract() {} |
| 92 | 174 |
| 175 static IPDF_LinkExtract* CreateLinkExtract(); |
| 93 | 176 |
| 94 virtual int»» » » CountChars() const = 0; | 177 virtual FX_BOOL ExtractLinks(const IPDF_TextPage* pTextPage) = 0; |
| 95 | 178 |
| 96 virtual» void» » » GetCharInfo(int index, FPDF_CHAR_INFO &
info) const = 0; | 179 public: |
| 180 virtual int CountLinks() const = 0; |
| 97 | 181 |
| 98 virtual void» » » GetRectArray(int start, int nCount, CFX_
RectArray& rectArray) const = 0; | 182 virtual CFX_WideString GetURL(int index) const = 0; |
| 99 | 183 |
| 184 virtual void GetBoundedSegment(int index, int& start, int& count) const = 0; |
| 100 | 185 |
| 101 | 186 virtual void GetRects(int index, CFX_RectArray& rects) const = 0; |
| 102 virtual int»» » » GetIndexAtPos(CPDF_Point point, FX_FLOAT
xTorelance, FX_FLOAT yTorelance) const = 0; | |
| 103 | |
| 104 virtual int»» » » GetIndexAtPos(FX_FLOAT x, FX_FLOAT y, FX
_FLOAT xTorelance, FX_FLOAT yTorelance) const = 0; | |
| 105 | |
| 106 virtual» int» » » » GetOrderByDirection(int index, i
nt direction) const = 0; | |
| 107 | |
| 108 virtual CFX_WideString» GetTextByRect(CFX_FloatRect rect) const = 0; | |
| 109 | |
| 110 virtual void» » » GetRectsArrayByRect(CFX_FloatRect rect,
CFX_RectArray& resRectArray) const = 0; | |
| 111 | |
| 112 | |
| 113 virtual int»» » » CountRects(int start, int nCount) = 0; | |
| 114 | |
| 115 virtual» void» » » GetRect(int rectIndex, FX_FLOAT& left, F
X_FLOAT& top, FX_FLOAT& right, FX_FLOAT &bottom) const = 0; | |
| 116 | |
| 117 virtual FX_BOOL» » » GetBaselineRotate(int rectIndex, int& Ro
tate) = 0; | |
| 118 | |
| 119 virtual FX_BOOL» » » GetBaselineRotate(CFX_FloatRect rect, in
t& Rotate) = 0; | |
| 120 | |
| 121 virtual» int» » » » CountBoundedSegments(FX_FLOAT le
ft, FX_FLOAT top, FX_FLOAT right, FX_FLOAT bottom, FX_BOOL bContains = FALSE) =
0; | |
| 122 | |
| 123 virtual» void» » » GetBoundedSegment(int index, int& start,
int& count) const = 0; | |
| 124 | |
| 125 | |
| 126 virtual int»» » » GetWordBreak(int index, int direction) c
onst = 0; | |
| 127 | |
| 128 virtual CFX_WideString» GetPageText(int start = 0, int nCount = -1 ) con
st = 0; | |
| 129 }; | |
| 130 #define FPDFTEXT_MATCHCASE 0x00000001 | |
| 131 #define FPDFTEXT_MATCHWHOLEWORD 0x00000002 | |
| 132 #define FPDFTEXT_CONSECUTIVE» 0x00000004 | |
| 133 class IPDF_TextPageFind : public CFX_Object | |
| 134 { | |
| 135 public: | |
| 136 | |
| 137 virtual» ~IPDF_TextPageFind() {} | |
| 138 | |
| 139 static» IPDF_TextPageFind*» CreatePageFind(const IPDF_TextPage* pTex
tPage); | |
| 140 public: | |
| 141 | |
| 142 virtual» FX_BOOL»» » » FindFirst(CFX_WideString findwha
t, int flags, int startPos = 0) = 0; | |
| 143 | |
| 144 virtual» FX_BOOL»» » » FindNext() = 0; | |
| 145 | |
| 146 virtual» FX_BOOL»» » » FindPrev() = 0; | |
| 147 | |
| 148 virtual void» » » » GetRectArray(CFX_RectArray& rect
s) const = 0; | |
| 149 | |
| 150 virtual int»» » » » GetCurOrder() const = 0; | |
| 151 | |
| 152 virtual int»» » » » GetMatchedCount() const = 0; | |
| 153 }; | |
| 154 class IPDF_LinkExtract : public CFX_Object | |
| 155 { | |
| 156 public: | |
| 157 | |
| 158 virtual» ~IPDF_LinkExtract() {} | |
| 159 | |
| 160 static» IPDF_LinkExtract*» CreateLinkExtract(); | |
| 161 | |
| 162 virtual FX_BOOL» » » » ExtractLinks(const IPDF_TextPage
* pTextPage) = 0; | |
| 163 public: | |
| 164 | |
| 165 virtual int»» » » » CountLinks() const = 0; | |
| 166 | |
| 167 virtual CFX_WideString» » GetURL(int index) const = 0; | |
| 168 | |
| 169 virtual» void» » » » GetBoundedSegment(int index, int
& start, int& count) const = 0; | |
| 170 | |
| 171 virtual void» » » » GetRects(int index, CFX_RectArra
y& rects) const = 0; | |
| 172 }; | 187 }; |
| 173 #endif | 188 #endif |
| OLD | NEW |