OLD | NEW |
1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
6 | 6 |
7 #include <ctype.h> | 7 #include <ctype.h> |
8 #include <algorithm> | 8 #include <algorithm> |
9 | 9 |
10 #include "../../../third_party/base/nonstd_unique_ptr.h" | 10 #include "../../../third_party/base/nonstd_unique_ptr.h" |
11 #include "../../include/fpdfapi/fpdf_module.h" | 11 #include "../../include/fpdfapi/fpdf_module.h" |
12 #include "../../include/fpdfapi/fpdf_page.h" | 12 #include "../../include/fpdfapi/fpdf_page.h" |
13 #include "../../include/fpdfapi/fpdf_pageobj.h" | 13 #include "../../include/fpdfapi/fpdf_pageobj.h" |
14 #include "../../include/fpdfapi/fpdf_resource.h" | 14 #include "../../include/fpdfapi/fpdf_resource.h" |
15 #include "../../include/fpdftext/fpdf_text.h" | 15 #include "../../include/fpdftext/fpdf_text.h" |
16 #include "../../include/fxcrt/fx_arb.h" | 16 #include "../../include/fxcrt/fx_arb.h" |
17 #include "../../include/fxcrt/fx_ucd.h" | 17 #include "../../include/fxcrt/fx_ucd.h" |
18 #include "text_int.h" | 18 #include "text_int.h" |
19 | 19 |
20 namespace { | 20 namespace { |
21 | 21 |
22 FX_BOOL _IsIgnoreSpaceCharacter(FX_WCHAR curChar) | 22 FX_BOOL _IsIgnoreSpaceCharacter(FX_WCHAR curChar) { |
23 { | 23 if (curChar < 255) { |
24 if(curChar < 255 ) { | 24 return FALSE; |
25 return FALSE; | 25 } |
26 } | 26 if ((curChar >= 0x0600 && curChar <= 0x06FF) || |
27 if ( (curChar >= 0x0600 && curChar <= 0x06FF) | 27 (curChar >= 0xFE70 && curChar <= 0xFEFF) || |
28 || (curChar >= 0xFE70 && curChar <= 0xFEFF) | 28 (curChar >= 0xFB50 && curChar <= 0xFDFF) || |
29 || (curChar >= 0xFB50 && curChar <= 0xFDFF) | 29 (curChar >= 0x0400 && curChar <= 0x04FF) || |
30 || (curChar >= 0x0400 && curChar <= 0x04FF) | 30 (curChar >= 0x0500 && curChar <= 0x052F) || |
31 || (curChar >= 0x0500 && curChar <= 0x052F) | 31 (curChar >= 0xA640 && curChar <= 0xA69F) || |
32 || (curChar >= 0xA640 && curChar <= 0xA69F) | 32 (curChar >= 0x2DE0 && curChar <= 0x2DFF) || curChar == 8467 || |
33 || (curChar >= 0x2DE0 && curChar <= 0x2DFF) | 33 (curChar >= 0x2000 && curChar <= 0x206F)) { |
34 || curChar == 8467 | 34 return FALSE; |
35 || (curChar >= 0x2000 && curChar <= 0x206F)) { | 35 } |
36 return FALSE; | 36 return TRUE; |
37 } | |
38 return TRUE; | |
39 } | 37 } |
40 | 38 |
41 FX_FLOAT _NormalizeThreshold(FX_FLOAT threshold) | 39 FX_FLOAT _NormalizeThreshold(FX_FLOAT threshold) { |
42 { | 40 if (threshold < 300) { |
43 if (threshold < 300) { | 41 return threshold / 2.0f; |
44 return threshold / 2.0f; | 42 } |
45 } | 43 if (threshold < 500) { |
46 if (threshold < 500) { | 44 return threshold / 4.0f; |
47 return threshold / 4.0f; | 45 } |
48 } | 46 if (threshold < 700) { |
49 if (threshold < 700) { | 47 return threshold / 5.0f; |
50 return threshold / 5.0f; | 48 } |
51 } | 49 return threshold / 6.0f; |
52 return threshold / 6.0f; | |
53 } | 50 } |
54 | 51 |
55 FX_FLOAT _CalculateBaseSpace(const CPDF_TextObject* pTextObj, | 52 FX_FLOAT _CalculateBaseSpace(const CPDF_TextObject* pTextObj, |
56 const CFX_AffineMatrix& matrix) | 53 const CFX_AffineMatrix& matrix) { |
57 { | 54 FX_FLOAT baseSpace = 0.0; |
58 FX_FLOAT baseSpace = 0.0; | 55 const int nItems = pTextObj->CountItems(); |
59 const int nItems = pTextObj->CountItems(); | 56 if (pTextObj->m_TextState.GetObject()->m_CharSpace && nItems >= 3) { |
60 if (pTextObj->m_TextState.GetObject()->m_CharSpace && nItems >= 3) { | 57 FX_BOOL bAllChar = TRUE; |
61 FX_BOOL bAllChar = TRUE; | 58 FX_FLOAT spacing = matrix.TransformDistance( |
62 FX_FLOAT spacing = matrix.TransformDistance( | 59 pTextObj->m_TextState.GetObject()->m_CharSpace); |
63 pTextObj->m_TextState.GetObject()->m_CharSpace); | 60 baseSpace = spacing; |
64 baseSpace = spacing; | 61 for (int i = 0; i < nItems; i++) { |
65 for (int i = 0; i < nItems; i++) { | 62 CPDF_TextObjectItem item; |
66 CPDF_TextObjectItem item; | 63 pTextObj->GetItemInfo(i, &item); |
67 pTextObj->GetItemInfo(i, &item); | 64 if (item.m_CharCode == (FX_DWORD)-1) { |
68 if (item.m_CharCode == (FX_DWORD) - 1) { | 65 FX_FLOAT fontsize_h = pTextObj->m_TextState.GetFontSizeH(); |
69 FX_FLOAT fontsize_h = pTextObj->m_TextState.GetFontSizeH(); | 66 FX_FLOAT kerning = -fontsize_h * item.m_OriginX / 1000; |
70 FX_FLOAT kerning = -fontsize_h * item.m_OriginX / 1000; | 67 baseSpace = std::min(baseSpace, kerning + spacing); |
71 baseSpace = std::min(baseSpace, kerning + spacing); | 68 bAllChar = FALSE; |
72 bAllChar = FALSE; | 69 } |
73 } | |
74 } | |
75 if (baseSpace < 0.0 || (nItems == 3 && !bAllChar)) { | |
76 baseSpace = 0.0; | |
77 } | |
78 } | 70 } |
79 return baseSpace; | 71 if (baseSpace < 0.0 || (nItems == 3 && !bAllChar)) { |
| 72 baseSpace = 0.0; |
| 73 } |
| 74 } |
| 75 return baseSpace; |
80 } | 76 } |
81 | 77 |
82 } // namespace | 78 } // namespace |
83 | 79 |
84 CPDFText_ParseOptions::CPDFText_ParseOptions() | 80 CPDFText_ParseOptions::CPDFText_ParseOptions() |
85 : m_bGetCharCodeOnly(FALSE), m_bNormalizeObjs(TRUE), m_bOutputHyphen(FALSE) | 81 : m_bGetCharCodeOnly(FALSE), |
86 { | 82 m_bNormalizeObjs(TRUE), |
| 83 m_bOutputHyphen(FALSE) {} |
| 84 IPDF_TextPage* IPDF_TextPage::CreateTextPage( |
| 85 const CPDF_Page* pPage, |
| 86 CPDFText_ParseOptions ParserOptions) { |
| 87 return new CPDF_TextPage(pPage, ParserOptions); |
87 } | 88 } |
88 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_Page* pPage, CPDFText_Pa
rseOptions ParserOptions) | 89 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_Page* pPage, |
89 { | 90 int flags) { |
90 return new CPDF_TextPage(pPage, ParserOptions); | 91 return new CPDF_TextPage(pPage, flags); |
91 } | 92 } |
92 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_Page* pPage, int flags) | 93 IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_PageObjects* pObjs, |
93 { | 94 int flags) { |
94 return new CPDF_TextPage(pPage, flags); | 95 return new CPDF_TextPage(pObjs, flags); |
95 } | 96 } |
96 IPDF_TextPage*» IPDF_TextPage::CreateTextPage(const CPDF_PageObjects* pObjs, int
flags) | 97 IPDF_TextPageFind* IPDF_TextPageFind::CreatePageFind( |
97 { | 98 const IPDF_TextPage* pTextPage) { |
98 return new CPDF_TextPage(pObjs, flags); | 99 if (!pTextPage) { |
| 100 return NULL; |
| 101 } |
| 102 return new CPDF_TextPageFind(pTextPage); |
99 } | 103 } |
100 IPDF_TextPageFind*» IPDF_TextPageFind::CreatePageFind(const IPDF_TextPage* p
TextPage) | 104 IPDF_LinkExtract* IPDF_LinkExtract::CreateLinkExtract() { |
101 { | 105 return new CPDF_LinkExtract(); |
102 if (!pTextPage) { | |
103 return NULL; | |
104 } | |
105 return new CPDF_TextPageFind(pTextPage); | |
106 } | 106 } |
107 IPDF_LinkExtract* IPDF_LinkExtract::CreateLinkExtract() | 107 #define TEXT_BLANK_CHAR L' ' |
108 { | 108 #define TEXT_LINEFEED_CHAR L'\n' |
109 return new CPDF_LinkExtract(); | 109 #define TEXT_RETURN_CHAR L'\r' |
110 } | 110 #define TEXT_EMPTY L"" |
111 #define TEXT_BLANK_CHAR» » L' ' | 111 #define TEXT_BLANK L" " |
112 #define TEXT_LINEFEED_CHAR» » L'\n' | 112 #define TEXT_RETURN_LINEFEED L"\r\n" |
113 #define» TEXT_RETURN_CHAR» » L'\r' | 113 #define TEXT_LINEFEED L"\n" |
114 #define TEXT_EMPTY» » » » L"" | 114 #define TEXT_CHARRATIO_GAPDELTA 0.070 |
115 #define TEXT_BLANK» » » » L" " | |
116 #define TEXT_RETURN_LINEFEED» L"\r\n" | |
117 #define TEXT_LINEFEED» » » L"\n" | |
118 #define» TEXT_CHARRATIO_GAPDELTA» 0.070 | |
119 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, int flags) | 115 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, int flags) |
120 : m_charList(512), | 116 : m_charList(512), |
121 m_TempCharList(50), | 117 m_TempCharList(50), |
122 m_pPreTextObj(NULL), | 118 m_pPreTextObj(NULL), |
123 m_IsParsered(FALSE), | 119 m_IsParsered(FALSE), |
124 m_TextlineDir(-1), | 120 m_TextlineDir(-1), |
125 m_CurlineRect(0, 0, 0, 0) | 121 m_CurlineRect(0, 0, 0, 0) { |
126 { | 122 m_pPage = pPage; |
127 m_pPage = pPage; | 123 m_parserflag = flags; |
128 m_parserflag = flags; | 124 m_TextBuf.EstimateSize(0, 10240); |
129 m_TextBuf.EstimateSize(0, 10240); | 125 pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int)pPage->GetPageWidth(), |
130 pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int) pPage->GetPageWidth(),
(int)pPage->GetPageHeight(), 0); | 126 (int)pPage->GetPageHeight(), 0); |
131 } | 127 } |
132 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, CPDFText_ParseOptions Parse
rOptions) | 128 CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, |
133 : m_ParseOptions(ParserOptions) | 129 CPDFText_ParseOptions ParserOptions) |
134 , m_charList(512) | 130 : m_ParseOptions(ParserOptions), |
135 , m_TempCharList(50) | 131 m_charList(512), |
136 , m_pPreTextObj(NULL) | 132 m_TempCharList(50), |
137 , m_IsParsered(FALSE) | 133 m_pPreTextObj(NULL), |
138 , m_TextlineDir(-1) | 134 m_IsParsered(FALSE), |
139 , m_CurlineRect(0, 0, 0, 0) | 135 m_TextlineDir(-1), |
140 { | 136 m_CurlineRect(0, 0, 0, 0) { |
141 m_pPage = pPage; | 137 m_pPage = pPage; |
142 m_parserflag = 0; | 138 m_parserflag = 0; |
143 m_TextBuf.EstimateSize(0, 10240); | 139 m_TextBuf.EstimateSize(0, 10240); |
144 pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int) pPage->GetPageWidth(),
(int)pPage->GetPageHeight(), 0); | 140 pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int)pPage->GetPageWidth(), |
| 141 (int)pPage->GetPageHeight(), 0); |
145 } | 142 } |
146 CPDF_TextPage::CPDF_TextPage(const CPDF_PageObjects* pPage, int flags) | 143 CPDF_TextPage::CPDF_TextPage(const CPDF_PageObjects* pPage, int flags) |
147 : m_charList(512), | 144 : m_charList(512), |
148 m_TempCharList(50), | 145 m_TempCharList(50), |
149 m_pPreTextObj(NULL), | 146 m_pPreTextObj(NULL), |
150 m_IsParsered(FALSE), | 147 m_IsParsered(FALSE), |
151 m_TextlineDir(-1), | 148 m_TextlineDir(-1), |
152 m_CurlineRect(0, 0, 0, 0) | 149 m_CurlineRect(0, 0, 0, 0) { |
153 { | 150 m_pPage = pPage; |
154 m_pPage = pPage; | 151 m_parserflag = flags; |
155 m_parserflag = flags; | 152 m_TextBuf.EstimateSize(0, 10240); |
156 m_TextBuf.EstimateSize(0, 10240); | 153 CFX_FloatRect pageRect = pPage->CalcBoundingBox(); |
157 CFX_FloatRect pageRect = pPage->CalcBoundingBox(); | 154 m_DisplayMatrix = CFX_AffineMatrix(1, 0, 0, -1, pageRect.right, pageRect.top); |
158 m_DisplayMatrix = CFX_AffineMatrix(1, 0, 0, -1, pageRect.right, pageRect.top
); | 155 } |
159 } | 156 void CPDF_TextPage::NormalizeObjects(FX_BOOL bNormalize) { |
160 void CPDF_TextPage::NormalizeObjects(FX_BOOL bNormalize) | 157 m_ParseOptions.m_bNormalizeObjs = bNormalize; |
161 { | 158 } |
162 m_ParseOptions.m_bNormalizeObjs = bNormalize; | 159 bool CPDF_TextPage::IsControlChar(const PAGECHAR_INFO& charInfo) { |
163 } | 160 switch (charInfo.m_Unicode) { |
164 bool CPDF_TextPage::IsControlChar(const PAGECHAR_INFO& charInfo) | 161 case 0x2: |
165 { | 162 case 0x3: |
166 switch (charInfo.m_Unicode) { | 163 case 0x93: |
167 case 0x2: | 164 case 0x94: |
168 case 0x3: | 165 case 0x96: |
169 case 0x93: | 166 case 0x97: |
170 case 0x94: | 167 case 0x98: |
171 case 0x96: | 168 case 0xfffe: |
172 case 0x97: | 169 return charInfo.m_Flag != FPDFTEXT_CHAR_HYPHEN; |
173 case 0x98: | 170 default: |
174 case 0xfffe: | 171 return false; |
175 return charInfo.m_Flag != FPDFTEXT_CHAR_HYPHEN; | 172 } |
176 default: | 173 } |
177 return false; | 174 FX_BOOL CPDF_TextPage::ParseTextPage() { |
178 } | 175 if (!m_pPage) { |
179 } | |
180 FX_BOOL CPDF_TextPage::ParseTextPage() | |
181 { | |
182 if (!m_pPage) { | |
183 m_IsParsered = FALSE; | |
184 return FALSE; | |
185 } | |
186 m_IsParsered = FALSE; | 176 m_IsParsered = FALSE; |
187 m_TextBuf.Clear(); | 177 return FALSE; |
188 m_charList.RemoveAll(); | 178 } |
189 m_pPreTextObj = NULL; | 179 m_IsParsered = FALSE; |
190 ProcessObject(); | 180 m_TextBuf.Clear(); |
191 m_IsParsered = TRUE; | 181 m_charList.RemoveAll(); |
192 if(!m_ParseOptions.m_bGetCharCodeOnly) { | 182 m_pPreTextObj = NULL; |
193 m_CharIndex.RemoveAll(); | 183 ProcessObject(); |
194 int nCount = m_charList.GetSize(); | 184 m_IsParsered = TRUE; |
195 if(nCount) { | 185 if (!m_ParseOptions.m_bGetCharCodeOnly) { |
196 m_CharIndex.Add(0); | 186 m_CharIndex.RemoveAll(); |
197 } | 187 int nCount = m_charList.GetSize(); |
198 for(int i = 0; i < nCount; i++) { | 188 if (nCount) { |
199 int indexSize = m_CharIndex.GetSize(); | 189 m_CharIndex.Add(0); |
200 FX_BOOL bNormal = FALSE; | 190 } |
201 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(i); | 191 for (int i = 0; i < nCount; i++) { |
202 if(charinfo.m_Flag == FPDFTEXT_CHAR_GENERATED) { | 192 int indexSize = m_CharIndex.GetSize(); |
203 bNormal = TRUE; | 193 FX_BOOL bNormal = FALSE; |
204 } | 194 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(i); |
205 else if(charinfo.m_Unicode == 0 || IsControlChar(charinfo)) | 195 if (charinfo.m_Flag == FPDFTEXT_CHAR_GENERATED) { |
206 bNormal = FALSE; | 196 bNormal = TRUE; |
207 else { | 197 } else if (charinfo.m_Unicode == 0 || IsControlChar(charinfo)) |
208 bNormal = TRUE; | 198 bNormal = FALSE; |
209 } | 199 else { |
210 if(bNormal) { | 200 bNormal = TRUE; |
211 if(indexSize % 2) { | 201 } |
212 m_CharIndex.Add(1); | 202 if (bNormal) { |
213 } else { | 203 if (indexSize % 2) { |
214 if(indexSize <= 0) { | 204 m_CharIndex.Add(1); |
215 continue; | 205 } else { |
216 } | 206 if (indexSize <= 0) { |
217 m_CharIndex.SetAt(indexSize - 1, m_CharIndex.GetAt(indexSize
- 1) + 1); | 207 continue; |
218 } | 208 } |
219 } else { | 209 m_CharIndex.SetAt(indexSize - 1, |
220 if(indexSize % 2) { | 210 m_CharIndex.GetAt(indexSize - 1) + 1); |
221 if(indexSize <= 0) { | 211 } |
222 continue; | 212 } else { |
223 } | 213 if (indexSize % 2) { |
224 m_CharIndex.SetAt(indexSize - 1, i + 1); | 214 if (indexSize <= 0) { |
225 } else { | 215 continue; |
226 m_CharIndex.Add(i + 1); | 216 } |
227 } | 217 m_CharIndex.SetAt(indexSize - 1, i + 1); |
228 } | 218 } else { |
229 } | 219 m_CharIndex.Add(i + 1); |
230 int indexSize = m_CharIndex.GetSize(); | 220 } |
231 if(indexSize % 2) { | 221 } |
232 m_CharIndex.RemoveAt(indexSize - 1); | 222 } |
233 } | |
234 } | |
235 return TRUE; | |
236 } | |
237 int» CPDF_TextPage::CountChars() const | |
238 { | |
239 if(m_ParseOptions.m_bGetCharCodeOnly) { | |
240 return m_TextBuf.GetSize(); | |
241 } | |
242 return m_charList.GetSize(); | |
243 } | |
244 int CPDF_TextPage::CharIndexFromTextIndex(int TextIndex) const | |
245 { | |
246 int indexSize = m_CharIndex.GetSize(); | 223 int indexSize = m_CharIndex.GetSize(); |
247 int count = 0; | 224 if (indexSize % 2) { |
248 for(int i = 0; i < indexSize; i += 2) { | 225 m_CharIndex.RemoveAt(indexSize - 1); |
249 count += m_CharIndex.GetAt(i + 1); | 226 } |
250 if(count > TextIndex) { | 227 } |
251 return TextIndex - count + m_CharIndex.GetAt(i + 1) + m_CharInd
ex.GetAt(i); | 228 return TRUE; |
252 } | 229 } |
253 } | 230 int CPDF_TextPage::CountChars() const { |
| 231 if (m_ParseOptions.m_bGetCharCodeOnly) { |
| 232 return m_TextBuf.GetSize(); |
| 233 } |
| 234 return m_charList.GetSize(); |
| 235 } |
| 236 int CPDF_TextPage::CharIndexFromTextIndex(int TextIndex) const { |
| 237 int indexSize = m_CharIndex.GetSize(); |
| 238 int count = 0; |
| 239 for (int i = 0; i < indexSize; i += 2) { |
| 240 count += m_CharIndex.GetAt(i + 1); |
| 241 if (count > TextIndex) { |
| 242 return TextIndex - count + m_CharIndex.GetAt(i + 1) + |
| 243 m_CharIndex.GetAt(i); |
| 244 } |
| 245 } |
| 246 return -1; |
| 247 } |
| 248 int CPDF_TextPage::TextIndexFromCharIndex(int CharIndex) const { |
| 249 int indexSize = m_CharIndex.GetSize(); |
| 250 int count = 0; |
| 251 for (int i = 0; i < indexSize; i += 2) { |
| 252 count += m_CharIndex.GetAt(i + 1); |
| 253 if (m_CharIndex.GetAt(i + 1) + m_CharIndex.GetAt(i) > CharIndex) { |
| 254 if (CharIndex - m_CharIndex.GetAt(i) < 0) { |
| 255 return -1; |
| 256 } |
| 257 return CharIndex - m_CharIndex.GetAt(i) + count - |
| 258 m_CharIndex.GetAt(i + 1); |
| 259 } |
| 260 } |
| 261 return -1; |
| 262 } |
| 263 void CPDF_TextPage::GetRectArray(int start, |
| 264 int nCount, |
| 265 CFX_RectArray& rectArray) const { |
| 266 if (m_ParseOptions.m_bGetCharCodeOnly) { |
| 267 return; |
| 268 } |
| 269 if (start < 0 || nCount == 0) { |
| 270 return; |
| 271 } |
| 272 if (!m_IsParsered) { |
| 273 return; |
| 274 } |
| 275 PAGECHAR_INFO info_curchar; |
| 276 CPDF_TextObject* pCurObj = NULL; |
| 277 CFX_FloatRect rect; |
| 278 int curPos = start; |
| 279 FX_BOOL flagNewRect = TRUE; |
| 280 if (nCount + start > m_charList.GetSize() || nCount == -1) { |
| 281 nCount = m_charList.GetSize() - start; |
| 282 } |
| 283 while (nCount--) { |
| 284 info_curchar = *(PAGECHAR_INFO*)m_charList.GetAt(curPos++); |
| 285 if (info_curchar.m_Flag == FPDFTEXT_CHAR_GENERATED) { |
| 286 continue; |
| 287 } |
| 288 if (info_curchar.m_CharBox.Width() < 0.01 || |
| 289 info_curchar.m_CharBox.Height() < 0.01) { |
| 290 continue; |
| 291 } |
| 292 if (!pCurObj) { |
| 293 pCurObj = info_curchar.m_pTextObj; |
| 294 } |
| 295 if (pCurObj != info_curchar.m_pTextObj) { |
| 296 rectArray.Add(rect); |
| 297 pCurObj = info_curchar.m_pTextObj; |
| 298 flagNewRect = TRUE; |
| 299 } |
| 300 if (flagNewRect) { |
| 301 FX_FLOAT orgX = info_curchar.m_OriginX, orgY = info_curchar.m_OriginY; |
| 302 CFX_AffineMatrix matrix, matrix_reverse; |
| 303 info_curchar.m_pTextObj->GetTextMatrix(&matrix); |
| 304 matrix.Concat(info_curchar.m_Matrix); |
| 305 matrix_reverse.SetReverse(matrix); |
| 306 matrix_reverse.Transform(orgX, orgY); |
| 307 rect.left = info_curchar.m_CharBox.left; |
| 308 rect.right = info_curchar.m_CharBox.right; |
| 309 if (pCurObj->GetFont()->GetTypeDescent()) { |
| 310 rect.bottom = orgY + |
| 311 pCurObj->GetFont()->GetTypeDescent() * |
| 312 pCurObj->GetFontSize() / 1000; |
| 313 FX_FLOAT xPosTemp = orgX; |
| 314 matrix.Transform(xPosTemp, rect.bottom); |
| 315 } else { |
| 316 rect.bottom = info_curchar.m_CharBox.bottom; |
| 317 } |
| 318 if (pCurObj->GetFont()->GetTypeAscent()) { |
| 319 rect.top = |
| 320 orgY + |
| 321 pCurObj->GetFont()->GetTypeAscent() * pCurObj->GetFontSize() / 1000; |
| 322 FX_FLOAT xPosTemp = |
| 323 orgX + |
| 324 GetCharWidth(info_curchar.m_CharCode, pCurObj->GetFont()) * |
| 325 pCurObj->GetFontSize() / 1000; |
| 326 matrix.Transform(xPosTemp, rect.top); |
| 327 } else { |
| 328 rect.top = info_curchar.m_CharBox.top; |
| 329 } |
| 330 flagNewRect = FALSE; |
| 331 rect = info_curchar.m_CharBox; |
| 332 rect.Normalize(); |
| 333 } else { |
| 334 info_curchar.m_CharBox.Normalize(); |
| 335 if (rect.left > info_curchar.m_CharBox.left) { |
| 336 rect.left = info_curchar.m_CharBox.left; |
| 337 } |
| 338 if (rect.right < info_curchar.m_CharBox.right) { |
| 339 rect.right = info_curchar.m_CharBox.right; |
| 340 } |
| 341 if (rect.top < info_curchar.m_CharBox.top) { |
| 342 rect.top = info_curchar.m_CharBox.top; |
| 343 } |
| 344 if (rect.bottom > info_curchar.m_CharBox.bottom) { |
| 345 rect.bottom = info_curchar.m_CharBox.bottom; |
| 346 } |
| 347 } |
| 348 } |
| 349 rectArray.Add(rect); |
| 350 return; |
| 351 } |
| 352 int CPDF_TextPage::GetIndexAtPos(CPDF_Point point, |
| 353 FX_FLOAT xTorelance, |
| 354 FX_FLOAT yTorelance) const { |
| 355 if (m_ParseOptions.m_bGetCharCodeOnly) { |
| 356 return -3; |
| 357 } |
| 358 if (!m_IsParsered) { |
| 359 return -3; |
| 360 } |
| 361 int pos = 0; |
| 362 int NearPos = -1; |
| 363 double xdif = 5000, ydif = 5000; |
| 364 while (pos < m_charList.GetSize()) { |
| 365 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)(m_charList.GetAt(pos)); |
| 366 CFX_FloatRect charrect = charinfo.m_CharBox; |
| 367 if (charrect.Contains(point.x, point.y)) { |
| 368 break; |
| 369 } |
| 370 if (xTorelance > 0 || yTorelance > 0) { |
| 371 CFX_FloatRect charRectExt; |
| 372 charrect.Normalize(); |
| 373 charRectExt.left = charrect.left - xTorelance / 2; |
| 374 charRectExt.right = charrect.right + xTorelance / 2; |
| 375 charRectExt.top = charrect.top + yTorelance / 2; |
| 376 charRectExt.bottom = charrect.bottom - yTorelance / 2; |
| 377 if (charRectExt.Contains(point.x, point.y)) { |
| 378 double curXdif, curYdif; |
| 379 curXdif = FXSYS_fabs(point.x - charrect.left) < |
| 380 FXSYS_fabs(point.x - charrect.right) |
| 381 ? FXSYS_fabs(point.x - charrect.left) |
| 382 : FXSYS_fabs(point.x - charrect.right); |
| 383 curYdif = FXSYS_fabs(point.y - charrect.bottom) < |
| 384 FXSYS_fabs(point.y - charrect.top) |
| 385 ? FXSYS_fabs(point.y - charrect.bottom) |
| 386 : FXSYS_fabs(point.y - charrect.top); |
| 387 if (curYdif + curXdif < xdif + ydif) { |
| 388 ydif = curYdif; |
| 389 xdif = curXdif; |
| 390 NearPos = pos; |
| 391 } |
| 392 } |
| 393 } |
| 394 ++pos; |
| 395 } |
| 396 if (pos >= m_charList.GetSize()) { |
| 397 pos = NearPos; |
| 398 } |
| 399 return pos; |
| 400 } |
| 401 CFX_WideString CPDF_TextPage::GetTextByRect(const CFX_FloatRect& rect) const { |
| 402 CFX_WideString strText; |
| 403 if (m_ParseOptions.m_bGetCharCodeOnly || !m_IsParsered) { |
| 404 return strText; |
| 405 } |
| 406 int nCount = m_charList.GetSize(); |
| 407 int pos = 0; |
| 408 FX_FLOAT posy = 0; |
| 409 FX_BOOL IsContainPreChar = FALSE; |
| 410 FX_BOOL ISAddLineFeed = FALSE; |
| 411 while (pos < nCount) { |
| 412 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(pos++); |
| 413 if (IsRectIntersect(rect, charinfo.m_CharBox)) { |
| 414 if (FXSYS_fabs(posy - charinfo.m_OriginY) > 0 && !IsContainPreChar && |
| 415 ISAddLineFeed) { |
| 416 posy = charinfo.m_OriginY; |
| 417 if (strText.GetLength() > 0) { |
| 418 strText += L"\r\n"; |
| 419 } |
| 420 } |
| 421 IsContainPreChar = TRUE; |
| 422 ISAddLineFeed = FALSE; |
| 423 if (charinfo.m_Unicode) { |
| 424 strText += charinfo.m_Unicode; |
| 425 } |
| 426 } else if (charinfo.m_Unicode == 32) { |
| 427 if (IsContainPreChar && charinfo.m_Unicode) { |
| 428 strText += charinfo.m_Unicode; |
| 429 IsContainPreChar = FALSE; |
| 430 ISAddLineFeed = FALSE; |
| 431 } |
| 432 } else { |
| 433 IsContainPreChar = FALSE; |
| 434 ISAddLineFeed = TRUE; |
| 435 } |
| 436 } |
| 437 return strText; |
| 438 } |
| 439 void CPDF_TextPage::GetRectsArrayByRect(const CFX_FloatRect& rect, |
| 440 CFX_RectArray& resRectArray) const { |
| 441 if (m_ParseOptions.m_bGetCharCodeOnly) { |
| 442 return; |
| 443 } |
| 444 if (!m_IsParsered) { |
| 445 return; |
| 446 } |
| 447 CFX_FloatRect curRect; |
| 448 FX_BOOL flagNewRect = TRUE; |
| 449 CPDF_TextObject* pCurObj = NULL; |
| 450 int nCount = m_charList.GetSize(); |
| 451 int pos = 0; |
| 452 while (pos < nCount) { |
| 453 PAGECHAR_INFO info_curchar = *(PAGECHAR_INFO*)m_charList.GetAt(pos++); |
| 454 if (info_curchar.m_Flag == FPDFTEXT_CHAR_GENERATED) { |
| 455 continue; |
| 456 } |
| 457 if (IsRectIntersect(rect, info_curchar.m_CharBox)) { |
| 458 if (!pCurObj) { |
| 459 pCurObj = info_curchar.m_pTextObj; |
| 460 } |
| 461 if (pCurObj != info_curchar.m_pTextObj) { |
| 462 resRectArray.Add(curRect); |
| 463 pCurObj = info_curchar.m_pTextObj; |
| 464 flagNewRect = TRUE; |
| 465 } |
| 466 if (flagNewRect) { |
| 467 curRect = info_curchar.m_CharBox; |
| 468 flagNewRect = FALSE; |
| 469 curRect.Normalize(); |
| 470 } else { |
| 471 info_curchar.m_CharBox.Normalize(); |
| 472 if (curRect.left > info_curchar.m_CharBox.left) { |
| 473 curRect.left = info_curchar.m_CharBox.left; |
| 474 } |
| 475 if (curRect.right < info_curchar.m_CharBox.right) { |
| 476 curRect.right = info_curchar.m_CharBox.right; |
| 477 } |
| 478 if (curRect.top < info_curchar.m_CharBox.top) { |
| 479 curRect.top = info_curchar.m_CharBox.top; |
| 480 } |
| 481 if (curRect.bottom > info_curchar.m_CharBox.bottom) { |
| 482 curRect.bottom = info_curchar.m_CharBox.bottom; |
| 483 } |
| 484 } |
| 485 } |
| 486 } |
| 487 resRectArray.Add(curRect); |
| 488 return; |
| 489 } |
| 490 int CPDF_TextPage::GetIndexAtPos(FX_FLOAT x, |
| 491 FX_FLOAT y, |
| 492 FX_FLOAT xTorelance, |
| 493 FX_FLOAT yTorelance) const { |
| 494 if (m_ParseOptions.m_bGetCharCodeOnly) { |
| 495 return -3; |
| 496 } |
| 497 CPDF_Point point(x, y); |
| 498 return GetIndexAtPos(point, xTorelance, yTorelance); |
| 499 } |
| 500 void CPDF_TextPage::GetCharInfo(int index, FPDF_CHAR_INFO& info) const { |
| 501 if (m_ParseOptions.m_bGetCharCodeOnly) { |
| 502 return; |
| 503 } |
| 504 if (!m_IsParsered) { |
| 505 return; |
| 506 } |
| 507 if (index < 0 || index >= m_charList.GetSize()) { |
| 508 return; |
| 509 } |
| 510 PAGECHAR_INFO charinfo; |
| 511 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index); |
| 512 info.m_Charcode = charinfo.m_CharCode; |
| 513 info.m_OriginX = charinfo.m_OriginX; |
| 514 info.m_OriginY = charinfo.m_OriginY; |
| 515 info.m_Unicode = charinfo.m_Unicode; |
| 516 info.m_Flag = charinfo.m_Flag; |
| 517 info.m_CharBox = charinfo.m_CharBox; |
| 518 info.m_pTextObj = charinfo.m_pTextObj; |
| 519 if (charinfo.m_pTextObj && charinfo.m_pTextObj->GetFont()) { |
| 520 info.m_FontSize = charinfo.m_pTextObj->GetFontSize(); |
| 521 } |
| 522 info.m_Matrix.Copy(charinfo.m_Matrix); |
| 523 return; |
| 524 } |
| 525 void CPDF_TextPage::CheckMarkedContentObject(int32_t& start, |
| 526 int32_t& nCount) const { |
| 527 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(start); |
| 528 PAGECHAR_INFO charinfo2 = |
| 529 *(PAGECHAR_INFO*)m_charList.GetAt(start + nCount - 1); |
| 530 if (FPDFTEXT_CHAR_PIECE != charinfo.m_Flag && |
| 531 FPDFTEXT_CHAR_PIECE != charinfo2.m_Flag) { |
| 532 return; |
| 533 } |
| 534 if (FPDFTEXT_CHAR_PIECE == charinfo.m_Flag) { |
| 535 PAGECHAR_INFO charinfo1 = charinfo; |
| 536 int startIndex = start; |
| 537 while (FPDFTEXT_CHAR_PIECE == charinfo1.m_Flag && |
| 538 charinfo1.m_Index == charinfo.m_Index) { |
| 539 startIndex--; |
| 540 if (startIndex < 0) { |
| 541 break; |
| 542 } |
| 543 charinfo1 = *(PAGECHAR_INFO*)m_charList.GetAt(startIndex); |
| 544 } |
| 545 startIndex++; |
| 546 start = startIndex; |
| 547 } |
| 548 if (FPDFTEXT_CHAR_PIECE == charinfo2.m_Flag) { |
| 549 PAGECHAR_INFO charinfo3 = charinfo2; |
| 550 int endIndex = start + nCount - 1; |
| 551 while (FPDFTEXT_CHAR_PIECE == charinfo3.m_Flag && |
| 552 charinfo3.m_Index == charinfo2.m_Index) { |
| 553 endIndex++; |
| 554 if (endIndex >= m_charList.GetSize()) { |
| 555 break; |
| 556 } |
| 557 charinfo3 = *(PAGECHAR_INFO*)m_charList.GetAt(endIndex); |
| 558 } |
| 559 endIndex--; |
| 560 nCount = endIndex - start + 1; |
| 561 } |
| 562 } |
| 563 CFX_WideString CPDF_TextPage::GetPageText(int start, int nCount) const { |
| 564 if (!m_IsParsered || nCount == 0) { |
| 565 return L""; |
| 566 } |
| 567 if (start < 0) { |
| 568 start = 0; |
| 569 } |
| 570 if (nCount == -1) { |
| 571 nCount = m_charList.GetSize() - start; |
| 572 return m_TextBuf.GetWideString().Mid(start, |
| 573 m_TextBuf.GetWideString().GetLength()); |
| 574 } |
| 575 if (nCount <= 0 || m_charList.GetSize() <= 0) { |
| 576 return L""; |
| 577 } |
| 578 if (nCount + start > m_charList.GetSize() - 1) { |
| 579 nCount = m_charList.GetSize() - start; |
| 580 } |
| 581 if (nCount <= 0) { |
| 582 return L""; |
| 583 } |
| 584 CheckMarkedContentObject(start, nCount); |
| 585 int startindex = 0; |
| 586 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(start); |
| 587 int startOffset = 0; |
| 588 while (charinfo.m_Index == -1) { |
| 589 startOffset++; |
| 590 if (startOffset > nCount || start + startOffset >= m_charList.GetSize()) { |
| 591 return L""; |
| 592 } |
| 593 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(start + startOffset); |
| 594 } |
| 595 startindex = charinfo.m_Index; |
| 596 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(start + nCount - 1); |
| 597 int nCountOffset = 0; |
| 598 while (charinfo.m_Index == -1) { |
| 599 nCountOffset++; |
| 600 if (nCountOffset >= nCount) { |
| 601 return L""; |
| 602 } |
| 603 charinfo = |
| 604 *(PAGECHAR_INFO*)m_charList.GetAt(start + nCount - nCountOffset - 1); |
| 605 } |
| 606 nCount = start + nCount - nCountOffset - startindex; |
| 607 if (nCount <= 0) { |
| 608 return L""; |
| 609 } |
| 610 return m_TextBuf.GetWideString().Mid(startindex, nCount); |
| 611 } |
| 612 int CPDF_TextPage::CountRects(int start, int nCount) { |
| 613 if (m_ParseOptions.m_bGetCharCodeOnly) { |
254 return -1; | 614 return -1; |
255 } | 615 } |
256 int CPDF_TextPage::TextIndexFromCharIndex(int CharIndex) const | 616 if (!m_IsParsered) { |
257 { | |
258 int indexSize = m_CharIndex.GetSize(); | |
259 int count = 0; | |
260 for(int i = 0; i < indexSize; i += 2) { | |
261 count += m_CharIndex.GetAt(i + 1); | |
262 if(m_CharIndex.GetAt(i + 1) + m_CharIndex.GetAt(i) > CharIndex) { | |
263 if(CharIndex - m_CharIndex.GetAt(i) < 0) { | |
264 return -1; | |
265 } | |
266 return » CharIndex - m_CharIndex.GetAt(i) + count - m_CharIndex.G
etAt(i + 1); | |
267 } | |
268 } | |
269 return -1; | 617 return -1; |
270 } | 618 } |
271 void CPDF_TextPage::GetRectArray(int start, int nCount, CFX_RectArray& rectArray
) const | 619 if (start < 0) { |
272 { | 620 return -1; |
273 if(m_ParseOptions.m_bGetCharCodeOnly) { | 621 } |
274 return; | 622 if (nCount == -1 || nCount + start > m_charList.GetSize()) { |
275 } | 623 nCount = m_charList.GetSize() - start; |
276 if(start < 0 || nCount == 0) { | 624 } |
277 return; | 625 m_SelRects.RemoveAll(); |
278 } | 626 GetRectArray(start, nCount, m_SelRects); |
279 if (!m_IsParsered) { | 627 return m_SelRects.GetSize(); |
280 return; | 628 } |
281 } | 629 void CPDF_TextPage::GetRect(int rectIndex, |
282 PAGECHAR_INFO info_curchar; | 630 FX_FLOAT& left, |
283 CPDF_TextObject* pCurObj = NULL; | 631 FX_FLOAT& top, |
284 CFX_FloatRect rect; | 632 FX_FLOAT& right, |
285 int curPos = start; | 633 FX_FLOAT& bottom) const { |
286 FX_BOOL flagNewRect = TRUE; | 634 if (m_ParseOptions.m_bGetCharCodeOnly) { |
287 if (nCount + start > m_charList.GetSize() || nCount == -1) { | 635 return; |
288 nCount = m_charList.GetSize() - start; | 636 } |
289 } | 637 if (!m_IsParsered || rectIndex < 0 || rectIndex >= m_SelRects.GetSize()) { |
290 while (nCount--) { | 638 return; |
291 info_curchar = *(PAGECHAR_INFO*)m_charList.GetAt(curPos++); | 639 } |
292 if (info_curchar.m_Flag == FPDFTEXT_CHAR_GENERATED) { | 640 left = m_SelRects.GetAt(rectIndex).left; |
293 continue; | 641 top = m_SelRects.GetAt(rectIndex).top; |
294 } | 642 right = m_SelRects.GetAt(rectIndex).right; |
295 if(info_curchar.m_CharBox.Width() < 0.01 || info_curchar.m_CharBox.Heigh
t() < 0.01) { | 643 bottom = m_SelRects.GetAt(rectIndex).bottom; |
296 continue; | 644 } |
297 } | 645 FX_BOOL CPDF_TextPage::GetBaselineRotate(int start, int end, int& Rotate) { |
298 if(!pCurObj) { | 646 if (m_ParseOptions.m_bGetCharCodeOnly) { |
299 pCurObj = info_curchar.m_pTextObj; | 647 return FALSE; |
300 } | 648 } |
301 if (pCurObj != info_curchar.m_pTextObj) { | 649 if (end == start) { |
302 rectArray.Add(rect); | 650 return FALSE; |
303 pCurObj = info_curchar.m_pTextObj; | 651 } |
304 flagNewRect = TRUE; | 652 FX_FLOAT dx, dy; |
305 } | 653 FPDF_CHAR_INFO info1, info2; |
306 if (flagNewRect) { | 654 GetCharInfo(start, info1); |
307 FX_FLOAT orgX = info_curchar.m_OriginX, orgY = info_curchar.m_Origin
Y; | 655 GetCharInfo(end, info2); |
308 CFX_AffineMatrix matrix, matrix_reverse; | 656 while (info2.m_CharBox.Width() == 0 || info2.m_CharBox.Height() == 0) { |
309 info_curchar.m_pTextObj->GetTextMatrix(&matrix); | 657 end--; |
310 matrix.Concat(info_curchar.m_Matrix); | 658 if (end <= start) { |
311 matrix_reverse.SetReverse(matrix); | 659 return FALSE; |
312 matrix_reverse.Transform(orgX, orgY); | 660 } |
313 rect.left = info_curchar.m_CharBox.left; | |
314 rect.right = info_curchar.m_CharBox.right; | |
315 if (pCurObj->GetFont()->GetTypeDescent()) { | |
316 rect.bottom = orgY + pCurObj->GetFont()->GetTypeDescent() * pCur
Obj->GetFontSize() / 1000; | |
317 FX_FLOAT xPosTemp = orgX; | |
318 matrix.Transform(xPosTemp, rect.bottom); | |
319 } else { | |
320 rect.bottom = info_curchar.m_CharBox.bottom; | |
321 } | |
322 if (pCurObj->GetFont()->GetTypeAscent()) { | |
323 rect.top = orgY + pCurObj->GetFont()->GetTypeAscent() * pCurObj-
>GetFontSize() / 1000; | |
324 FX_FLOAT xPosTemp = orgX + GetCharWidth(info_curchar.m_CharCode,
pCurObj->GetFont()) * pCurObj->GetFontSize() / 1000; | |
325 matrix.Transform(xPosTemp, rect.top); | |
326 } else { | |
327 rect.top = info_curchar.m_CharBox.top; | |
328 } | |
329 flagNewRect = FALSE; | |
330 rect = info_curchar.m_CharBox; | |
331 rect.Normalize(); | |
332 } else { | |
333 info_curchar.m_CharBox.Normalize(); | |
334 if (rect.left > info_curchar.m_CharBox.left) { | |
335 rect.left = info_curchar.m_CharBox.left; | |
336 } | |
337 if (rect.right < info_curchar.m_CharBox.right) { | |
338 rect.right = info_curchar.m_CharBox.right; | |
339 } | |
340 if ( rect.top < info_curchar.m_CharBox.top) { | |
341 rect.top = info_curchar.m_CharBox.top; | |
342 } | |
343 if (rect.bottom > info_curchar.m_CharBox.bottom) { | |
344 rect.bottom = info_curchar.m_CharBox.bottom; | |
345 } | |
346 } | |
347 } | |
348 rectArray.Add(rect); | |
349 return; | |
350 } | |
351 int CPDF_TextPage::GetIndexAtPos(CPDF_Point point , FX_FLOAT xTorelance, FX_FLOA
T yTorelance) const | |
352 { | |
353 if(m_ParseOptions.m_bGetCharCodeOnly) { | |
354 return -3; | |
355 } | |
356 if (!m_IsParsered) { | |
357 return -3; | |
358 } | |
359 int pos = 0; | |
360 int NearPos = -1; | |
361 double xdif = 5000, ydif = 5000; | |
362 while(pos < m_charList.GetSize()) { | |
363 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)(m_charList.GetAt(pos)); | |
364 CFX_FloatRect charrect = charinfo.m_CharBox; | |
365 if (charrect.Contains(point.x, point.y)) { | |
366 break; | |
367 } | |
368 if (xTorelance > 0 || yTorelance > 0) { | |
369 CFX_FloatRect charRectExt; | |
370 charrect.Normalize(); | |
371 charRectExt.left = charrect.left - xTorelance / 2; | |
372 charRectExt.right = charrect.right + xTorelance / 2; | |
373 charRectExt.top = charrect.top + yTorelance / 2; | |
374 charRectExt.bottom = charrect.bottom - yTorelance / 2; | |
375 if (charRectExt.Contains(point.x, point.y)) { | |
376 double curXdif, curYdif; | |
377 curXdif = FXSYS_fabs(point.x - charrect.left) < FXSYS_fabs(point
.x - charrect.right) ? FXSYS_fabs(point.x - charrect.left) : FXSYS_fabs(point.x
- charrect.right); | |
378 curYdif = FXSYS_fabs(point.y - charrect.bottom) < FXSYS_fabs(poi
nt.y - charrect.top ) ? FXSYS_fabs(point.y - charrect.bottom) : FXSYS_fabs(p
oint.y - charrect.top); | |
379 if (curYdif + curXdif < xdif + ydif) { | |
380 ydif = curYdif; | |
381 xdif = curXdif; | |
382 NearPos = pos; | |
383 } | |
384 } | |
385 } | |
386 ++pos; | |
387 } | |
388 if (pos >= m_charList.GetSize()) { | |
389 pos = NearPos; | |
390 } | |
391 return pos; | |
392 } | |
393 CFX_WideString CPDF_TextPage::GetTextByRect(const CFX_FloatRect& rect) const | |
394 { | |
395 CFX_WideString strText; | |
396 if(m_ParseOptions.m_bGetCharCodeOnly || !m_IsParsered) { | |
397 return strText; | |
398 } | |
399 int nCount = m_charList.GetSize(); | |
400 int pos = 0; | |
401 FX_FLOAT posy = 0; | |
402 FX_BOOL IsContainPreChar = FALSE; | |
403 FX_BOOL ISAddLineFeed = FALSE; | |
404 while (pos < nCount) { | |
405 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(pos++); | |
406 if (IsRectIntersect(rect, charinfo.m_CharBox)) { | |
407 if (FXSYS_fabs(posy - charinfo.m_OriginY) > 0 && !IsContainPreChar &
& ISAddLineFeed) { | |
408 posy = charinfo.m_OriginY; | |
409 if (strText.GetLength() > 0) { | |
410 strText += L"\r\n"; | |
411 } | |
412 } | |
413 IsContainPreChar = TRUE; | |
414 ISAddLineFeed = FALSE; | |
415 if (charinfo.m_Unicode) { | |
416 strText += charinfo.m_Unicode; | |
417 } | |
418 } else if (charinfo.m_Unicode == 32) { | |
419 if (IsContainPreChar && charinfo.m_Unicode) { | |
420 strText += charinfo.m_Unicode; | |
421 IsContainPreChar = FALSE; | |
422 ISAddLineFeed = FALSE; | |
423 } | |
424 } else { | |
425 IsContainPreChar = FALSE; | |
426 ISAddLineFeed = TRUE; | |
427 } | |
428 } | |
429 return strText; | |
430 } | |
431 void CPDF_TextPage::GetRectsArrayByRect(const CFX_FloatRect& rect, CFX_RectArray
& resRectArray) const | |
432 { | |
433 if(m_ParseOptions.m_bGetCharCodeOnly) { | |
434 return; | |
435 } | |
436 if (!m_IsParsered) { | |
437 return; | |
438 } | |
439 CFX_FloatRect curRect; | |
440 FX_BOOL flagNewRect = TRUE; | |
441 CPDF_TextObject* pCurObj = NULL; | |
442 int nCount = m_charList.GetSize(); | |
443 int pos = 0; | |
444 while (pos < nCount) { | |
445 PAGECHAR_INFO info_curchar = *(PAGECHAR_INFO*)m_charList.GetAt(pos++); | |
446 if (info_curchar.m_Flag == FPDFTEXT_CHAR_GENERATED) { | |
447 continue; | |
448 } | |
449 if (IsRectIntersect(rect, info_curchar.m_CharBox)) { | |
450 if(!pCurObj) { | |
451 pCurObj = info_curchar.m_pTextObj; | |
452 } | |
453 if (pCurObj != info_curchar.m_pTextObj) { | |
454 resRectArray.Add(curRect); | |
455 pCurObj = info_curchar.m_pTextObj; | |
456 flagNewRect = TRUE; | |
457 } | |
458 if (flagNewRect) { | |
459 curRect = info_curchar.m_CharBox; | |
460 flagNewRect = FALSE; | |
461 curRect.Normalize(); | |
462 } else { | |
463 info_curchar.m_CharBox.Normalize(); | |
464 if (curRect.left > info_curchar.m_CharBox.left) { | |
465 curRect.left = info_curchar.m_CharBox.left; | |
466 } | |
467 if (curRect.right < info_curchar.m_CharBox.right) { | |
468 curRect.right = info_curchar.m_CharBox.right; | |
469 } | |
470 if ( curRect.top < info_curchar.m_CharBox.top) { | |
471 curRect.top = info_curchar.m_CharBox.top; | |
472 } | |
473 if (curRect.bottom > info_curchar.m_CharBox.bottom) { | |
474 curRect.bottom = info_curchar.m_CharBox.bottom; | |
475 } | |
476 } | |
477 } | |
478 } | |
479 resRectArray.Add(curRect); | |
480 return; | |
481 } | |
482 int CPDF_TextPage::GetIndexAtPos(FX_FLOAT x, FX_FLOAT y, FX_FLOAT xTorelance
, FX_FLOAT yTorelance) const | |
483 { | |
484 if(m_ParseOptions.m_bGetCharCodeOnly) { | |
485 return -3; | |
486 } | |
487 CPDF_Point point(x, y); | |
488 return GetIndexAtPos(point, xTorelance, yTorelance); | |
489 } | |
490 void CPDF_TextPage::GetCharInfo(int index, FPDF_CHAR_INFO & info) const | |
491 { | |
492 if(m_ParseOptions.m_bGetCharCodeOnly) { | |
493 return; | |
494 } | |
495 if (!m_IsParsered) { | |
496 return; | |
497 } | |
498 if (index < 0 || index >= m_charList.GetSize()) { | |
499 return; | |
500 } | |
501 PAGECHAR_INFO charinfo; | |
502 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index); | |
503 info.m_Charcode = charinfo.m_CharCode; | |
504 info.m_OriginX = charinfo.m_OriginX; | |
505 info.m_OriginY = charinfo.m_OriginY; | |
506 info.m_Unicode = charinfo.m_Unicode; | |
507 info.m_Flag = charinfo.m_Flag; | |
508 info.m_CharBox = charinfo.m_CharBox; | |
509 info.m_pTextObj = charinfo.m_pTextObj; | |
510 if (charinfo.m_pTextObj && charinfo.m_pTextObj->GetFont()) { | |
511 info.m_FontSize = charinfo.m_pTextObj->GetFontSize(); | |
512 } | |
513 info.m_Matrix.Copy(charinfo.m_Matrix); | |
514 return; | |
515 } | |
516 void CPDF_TextPage::CheckMarkedContentObject(int32_t& start, int32_t& nCount) co
nst | |
517 { | |
518 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(start); | |
519 PAGECHAR_INFO charinfo2 = *(PAGECHAR_INFO*)m_charList.GetAt(start + nCount -
1); | |
520 if (FPDFTEXT_CHAR_PIECE != charinfo.m_Flag && FPDFTEXT_CHAR_PIECE != charinf
o2.m_Flag) { | |
521 return; | |
522 } | |
523 if (FPDFTEXT_CHAR_PIECE == charinfo.m_Flag) { | |
524 PAGECHAR_INFO charinfo1 = charinfo; | |
525 int startIndex = start; | |
526 while(FPDFTEXT_CHAR_PIECE == charinfo1.m_Flag && charinfo1.m_Index == ch
arinfo.m_Index) { | |
527 startIndex--; | |
528 if (startIndex < 0) { | |
529 break; | |
530 } | |
531 charinfo1 = *(PAGECHAR_INFO*)m_charList.GetAt(startIndex); | |
532 } | |
533 startIndex++; | |
534 start = startIndex; | |
535 } | |
536 if (FPDFTEXT_CHAR_PIECE == charinfo2.m_Flag) { | |
537 PAGECHAR_INFO charinfo3 = charinfo2; | |
538 int endIndex = start + nCount - 1; | |
539 while(FPDFTEXT_CHAR_PIECE == charinfo3.m_Flag && charinfo3.m_Index == ch
arinfo2.m_Index) { | |
540 endIndex++; | |
541 if (endIndex >= m_charList.GetSize()) { | |
542 break; | |
543 } | |
544 charinfo3 = *(PAGECHAR_INFO*)m_charList.GetAt(endIndex); | |
545 } | |
546 endIndex--; | |
547 nCount = endIndex - start + 1; | |
548 } | |
549 } | |
550 CFX_WideString CPDF_TextPage::GetPageText(int start , int nCount) const | |
551 { | |
552 if (!m_IsParsered || nCount == 0) { | |
553 return L""; | |
554 } | |
555 if (start < 0) { | |
556 start = 0; | |
557 } | |
558 if (nCount == -1) { | |
559 nCount = m_charList.GetSize() - start; | |
560 return m_TextBuf.GetWideString().Mid(start, m_TextBuf.GetWideString().Ge
tLength()); | |
561 } | |
562 if(nCount <= 0 || m_charList.GetSize() <= 0) { | |
563 return L""; | |
564 } | |
565 if(nCount + start > m_charList.GetSize() - 1) { | |
566 nCount = m_charList.GetSize() - start; | |
567 } | |
568 if (nCount <= 0) { | |
569 return L""; | |
570 } | |
571 CheckMarkedContentObject(start, nCount); | |
572 int startindex = 0; | |
573 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(start); | |
574 int startOffset = 0; | |
575 while(charinfo.m_Index == -1) { | |
576 startOffset++; | |
577 if (startOffset > nCount || start + startOffset >= m_charList.GetSize())
{ | |
578 return L""; | |
579 } | |
580 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(start + startOffset); | |
581 } | |
582 startindex = charinfo.m_Index; | |
583 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(start + nCount - 1); | |
584 int nCountOffset = 0; | |
585 while (charinfo.m_Index == -1) { | |
586 nCountOffset++; | |
587 if (nCountOffset >= nCount) { | |
588 return L""; | |
589 } | |
590 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(start + nCount - nCountOffs
et - 1); | |
591 } | |
592 nCount = start + nCount - nCountOffset - startindex; | |
593 if(nCount <= 0) { | |
594 return L""; | |
595 } | |
596 return m_TextBuf.GetWideString().Mid(startindex, nCount); | |
597 } | |
598 int CPDF_TextPage::CountRects(int start, int nCount) | |
599 { | |
600 if(m_ParseOptions.m_bGetCharCodeOnly) { | |
601 return -1; | |
602 } | |
603 if (!m_IsParsered) { | |
604 return -1; | |
605 } | |
606 if (start < 0) { | |
607 return -1; | |
608 } | |
609 if (nCount == -1 || nCount + start > m_charList.GetSize() ) { | |
610 nCount = m_charList.GetSize() - start; | |
611 } | |
612 m_SelRects.RemoveAll(); | |
613 GetRectArray(start, nCount, m_SelRects); | |
614 return m_SelRects.GetSize(); | |
615 } | |
616 void CPDF_TextPage::GetRect(int rectIndex, FX_FLOAT& left, FX_FLOAT& top, FX_FLO
AT& right, FX_FLOAT &bottom) const | |
617 { | |
618 if(m_ParseOptions.m_bGetCharCodeOnly) { | |
619 return ; | |
620 } | |
621 if (!m_IsParsered || rectIndex < 0 || rectIndex >= m_SelRects.GetSize()) { | |
622 return; | |
623 } | |
624 left = m_SelRects.GetAt(rectIndex).left; | |
625 top = m_SelRects.GetAt(rectIndex).top; | |
626 right = m_SelRects.GetAt(rectIndex).right; | |
627 bottom = m_SelRects.GetAt(rectIndex).bottom; | |
628 } | |
629 FX_BOOL CPDF_TextPage::GetBaselineRotate(int start, int end, int& Rotate) | |
630 { | |
631 if(m_ParseOptions.m_bGetCharCodeOnly) { | |
632 return FALSE; | |
633 } | |
634 if(end == start) { | |
635 return FALSE; | |
636 } | |
637 FX_FLOAT dx, dy; | |
638 FPDF_CHAR_INFO info1, info2; | |
639 GetCharInfo(start, info1); | |
640 GetCharInfo(end, info2); | 661 GetCharInfo(end, info2); |
641 while(info2.m_CharBox.Width() == 0 || info2.m_CharBox.Height() == 0) { | 662 } |
642 end--; | 663 dx = (info2.m_OriginX - info1.m_OriginX); |
643 if(end <= start) { | 664 dy = (info2.m_OriginY - info1.m_OriginY); |
644 return FALSE; | 665 if (dx == 0) { |
645 } | 666 if (dy > 0) { |
646 GetCharInfo(end, info2); | 667 Rotate = 90; |
647 } | 668 } else if (dy < 0) { |
648 dx = (info2.m_OriginX - info1.m_OriginX); | 669 Rotate = 270; |
649 dy = (info2.m_OriginY - info1.m_OriginY); | |
650 if(dx == 0) { | |
651 if(dy > 0) { | |
652 Rotate = 90; | |
653 } else if (dy < 0) { | |
654 Rotate = 270; | |
655 } else { | |
656 Rotate = 0; | |
657 } | |
658 } else { | 670 } else { |
659 float a = FXSYS_atan2(dy, dx); | 671 Rotate = 0; |
660 Rotate = (int)(a * 180 / FX_PI + 0.5); | 672 } |
661 } | 673 } else { |
662 if(Rotate < 0) { | 674 float a = FXSYS_atan2(dy, dx); |
663 Rotate = -Rotate; | 675 Rotate = (int)(a * 180 / FX_PI + 0.5); |
664 } else if(Rotate > 0) { | 676 } |
665 Rotate = 360 - Rotate; | 677 if (Rotate < 0) { |
666 } | 678 Rotate = -Rotate; |
667 return TRUE; | 679 } else if (Rotate > 0) { |
668 } | 680 Rotate = 360 - Rotate; |
669 FX_BOOL»CPDF_TextPage::GetBaselineRotate(const CFX_FloatRect& rect , int& Rotate
) | 681 } |
670 { | 682 return TRUE; |
671 if(m_ParseOptions.m_bGetCharCodeOnly) { | 683 } |
672 return FALSE; | 684 FX_BOOL CPDF_TextPage::GetBaselineRotate(const CFX_FloatRect& rect, |
673 } | 685 int& Rotate) { |
674 int start, end, count, n = CountBoundedSegments(rect.left, rect.top, rect.ri
ght, rect.bottom, TRUE); | 686 if (m_ParseOptions.m_bGetCharCodeOnly) { |
675 if(n < 1) { | 687 return FALSE; |
676 return FALSE; | 688 } |
677 } | 689 int start, end, count, |
678 if(n > 1) { | 690 n = CountBoundedSegments(rect.left, rect.top, rect.right, rect.bottom, |
679 GetBoundedSegment(n - 1, start, count); | 691 TRUE); |
680 end = start + count - 1; | 692 if (n < 1) { |
681 GetBoundedSegment(0, start, count); | 693 return FALSE; |
| 694 } |
| 695 if (n > 1) { |
| 696 GetBoundedSegment(n - 1, start, count); |
| 697 end = start + count - 1; |
| 698 GetBoundedSegment(0, start, count); |
| 699 } else { |
| 700 GetBoundedSegment(0, start, count); |
| 701 end = start + count - 1; |
| 702 } |
| 703 return GetBaselineRotate(start, end, Rotate); |
| 704 } |
| 705 FX_BOOL CPDF_TextPage::GetBaselineRotate(int rectIndex, int& Rotate) { |
| 706 if (m_ParseOptions.m_bGetCharCodeOnly) { |
| 707 return FALSE; |
| 708 } |
| 709 if (!m_IsParsered || rectIndex < 0 || rectIndex > m_SelRects.GetSize()) { |
| 710 return FALSE; |
| 711 } |
| 712 CFX_FloatRect rect = m_SelRects.GetAt(rectIndex); |
| 713 return GetBaselineRotate(rect, Rotate); |
| 714 } |
| 715 int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left, |
| 716 FX_FLOAT top, |
| 717 FX_FLOAT right, |
| 718 FX_FLOAT bottom, |
| 719 FX_BOOL bContains) { |
| 720 if (m_ParseOptions.m_bGetCharCodeOnly) { |
| 721 return -1; |
| 722 } |
| 723 m_Segment.RemoveAll(); |
| 724 if (!m_IsParsered) { |
| 725 return -1; |
| 726 } |
| 727 CFX_FloatRect rect(left, bottom, right, top); |
| 728 rect.Normalize(); |
| 729 int nCount = m_charList.GetSize(); |
| 730 int pos = 0; |
| 731 FPDF_SEGMENT segment; |
| 732 segment.m_Start = 0; |
| 733 segment.m_nCount = 0; |
| 734 int segmentStatus = 0; |
| 735 FX_BOOL IsContainPreChar = FALSE; |
| 736 while (pos < nCount) { |
| 737 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(pos); |
| 738 if (bContains && rect.Contains(charinfo.m_CharBox)) { |
| 739 if (segmentStatus == 0 || segmentStatus == 2) { |
| 740 segment.m_Start = pos; |
| 741 segment.m_nCount = 1; |
| 742 segmentStatus = 1; |
| 743 } else if (segmentStatus == 1) { |
| 744 segment.m_nCount++; |
| 745 } |
| 746 IsContainPreChar = TRUE; |
| 747 } else if (!bContains && |
| 748 (IsRectIntersect(rect, charinfo.m_CharBox) || |
| 749 rect.Contains(charinfo.m_OriginX, charinfo.m_OriginY))) { |
| 750 if (segmentStatus == 0 || segmentStatus == 2) { |
| 751 segment.m_Start = pos; |
| 752 segment.m_nCount = 1; |
| 753 segmentStatus = 1; |
| 754 } else if (segmentStatus == 1) { |
| 755 segment.m_nCount++; |
| 756 } |
| 757 IsContainPreChar = TRUE; |
| 758 } else if (charinfo.m_Unicode == 32) { |
| 759 if (IsContainPreChar == TRUE) { |
| 760 if (segmentStatus == 0 || segmentStatus == 2) { |
| 761 segment.m_Start = pos; |
| 762 segment.m_nCount = 1; |
| 763 segmentStatus = 1; |
| 764 } else if (segmentStatus == 1) { |
| 765 segment.m_nCount++; |
| 766 } |
| 767 IsContainPreChar = FALSE; |
| 768 } else { |
| 769 if (segmentStatus == 1) { |
| 770 segmentStatus = 2; |
| 771 m_Segment.Add(segment); |
| 772 segment.m_Start = 0; |
| 773 segment.m_nCount = 0; |
| 774 } |
| 775 } |
682 } else { | 776 } else { |
683 GetBoundedSegment(0, start, count); | 777 if (segmentStatus == 1) { |
684 end = start + count - 1; | |
685 } | |
686 return GetBaselineRotate(start, end, Rotate); | |
687 } | |
688 FX_BOOL»CPDF_TextPage::GetBaselineRotate(int rectIndex, int& Rotate) | |
689 { | |
690 if(m_ParseOptions.m_bGetCharCodeOnly) { | |
691 return FALSE; | |
692 } | |
693 if (!m_IsParsered || rectIndex < 0 || rectIndex > m_SelRects.GetSize()) { | |
694 return FALSE; | |
695 } | |
696 CFX_FloatRect rect = m_SelRects.GetAt(rectIndex); | |
697 return GetBaselineRotate(rect , Rotate); | |
698 } | |
699 int» CPDF_TextPage::CountBoundedSegments(FX_FLOAT left, FX_FLOAT top, FX_FLOA
T right, FX_FLOAT bottom, FX_BOOL bContains ) | |
700 { | |
701 if(m_ParseOptions.m_bGetCharCodeOnly) { | |
702 return -1; | |
703 } | |
704 m_Segment.RemoveAll(); | |
705 if (!m_IsParsered)» { | |
706 return -1; | |
707 } | |
708 CFX_FloatRect rect(left, bottom, right, top); | |
709 rect.Normalize(); | |
710 int nCount = m_charList.GetSize(); | |
711 int pos = 0; | |
712 FPDF_SEGMENT» segment; | |
713 segment.m_Start = 0; | |
714 segment.m_nCount = 0; | |
715 int » » segmentStatus = 0; | |
716 FX_BOOL» » IsContainPreChar = FALSE; | |
717 while (pos < nCount) { | |
718 PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(pos); | |
719 if(bContains && rect.Contains(charinfo.m_CharBox)) { | |
720 if (segmentStatus == 0 || segmentStatus == 2) { | |
721 segment.m_Start = pos; | |
722 segment.m_nCount = 1; | |
723 segmentStatus = 1; | |
724 } else if (segmentStatus == 1) { | |
725 segment.m_nCount++; | |
726 } | |
727 IsContainPreChar = TRUE; | |
728 } else if (!bContains && (IsRectIntersect(rect, charinfo.m_CharBox) || r
ect.Contains(charinfo.m_OriginX, charinfo.m_OriginY))) { | |
729 if (segmentStatus == 0 || segmentStatus == 2) { | |
730 segment.m_Start = pos; | |
731 segment.m_nCount = 1; | |
732 segmentStatus = 1; | |
733 } else if (segmentStatus == 1) { | |
734 segment.m_nCount++; | |
735 } | |
736 IsContainPreChar = TRUE; | |
737 } else if (charinfo.m_Unicode == 32) { | |
738 if (IsContainPreChar == TRUE) { | |
739 if (segmentStatus == 0 || segmentStatus == 2) { | |
740 segment.m_Start = pos; | |
741 segment.m_nCount = 1; | |
742 segmentStatus = 1; | |
743 } else if (segmentStatus == 1) { | |
744 segment.m_nCount++; | |
745 } | |
746 IsContainPreChar = FALSE; | |
747 } else { | |
748 if (segmentStatus == 1) { | |
749 segmentStatus = 2; | |
750 m_Segment.Add(segment); | |
751 segment.m_Start = 0; | |
752 segment.m_nCount = 0; | |
753 } | |
754 } | |
755 } else { | |
756 if (segmentStatus == 1) { | |
757 segmentStatus = 2; | |
758 m_Segment.Add(segment); | |
759 segment.m_Start = 0; | |
760 segment.m_nCount = 0; | |
761 } | |
762 IsContainPreChar = FALSE; | |
763 } | |
764 pos++; | |
765 } | |
766 if (segmentStatus == 1) { | |
767 segmentStatus = 2; | 778 segmentStatus = 2; |
768 m_Segment.Add(segment); | 779 m_Segment.Add(segment); |
769 segment.m_Start = 0; | 780 segment.m_Start = 0; |
770 segment.m_nCount = 0; | 781 segment.m_nCount = 0; |
771 } | 782 } |
772 return m_Segment.GetSize(); | 783 IsContainPreChar = FALSE; |
773 } | 784 } |
774 void CPDF_TextPage::GetBoundedSegment(int index, int& start, int& count) const | 785 pos++; |
775 { | 786 } |
776 if(m_ParseOptions.m_bGetCharCodeOnly) { | 787 if (segmentStatus == 1) { |
777 return ; | 788 segmentStatus = 2; |
778 } | 789 m_Segment.Add(segment); |
779 if (index < 0 || index >= m_Segment.GetSize()) { | 790 segment.m_Start = 0; |
| 791 segment.m_nCount = 0; |
| 792 } |
| 793 return m_Segment.GetSize(); |
| 794 } |
| 795 void CPDF_TextPage::GetBoundedSegment(int index, int& start, int& count) const { |
| 796 if (m_ParseOptions.m_bGetCharCodeOnly) { |
| 797 return; |
| 798 } |
| 799 if (index < 0 || index >= m_Segment.GetSize()) { |
| 800 return; |
| 801 } |
| 802 start = m_Segment.GetAt(index).m_Start; |
| 803 count = m_Segment.GetAt(index).m_nCount; |
| 804 } |
| 805 int CPDF_TextPage::GetWordBreak(int index, int direction) const { |
| 806 if (m_ParseOptions.m_bGetCharCodeOnly) { |
| 807 return -1; |
| 808 } |
| 809 if (!m_IsParsered) { |
| 810 return -1; |
| 811 } |
| 812 if (direction != FPDFTEXT_LEFT && direction != FPDFTEXT_RIGHT) { |
| 813 return -1; |
| 814 } |
| 815 if (index < 0 || index >= m_charList.GetSize()) { |
| 816 return -1; |
| 817 } |
| 818 PAGECHAR_INFO charinfo; |
| 819 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index); |
| 820 if (charinfo.m_Index == -1 || charinfo.m_Flag == FPDFTEXT_CHAR_GENERATED) { |
| 821 return index; |
| 822 } |
| 823 if (!IsLetter(charinfo.m_Unicode)) { |
| 824 return index; |
| 825 } |
| 826 int breakPos = index; |
| 827 if (direction == FPDFTEXT_LEFT) { |
| 828 while (--breakPos > 0) { |
| 829 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(breakPos); |
| 830 if (!IsLetter(charinfo.m_Unicode)) { |
| 831 return breakPos; |
| 832 } |
| 833 } |
| 834 } else if (direction == FPDFTEXT_RIGHT) { |
| 835 while (++breakPos < m_charList.GetSize()) { |
| 836 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(breakPos); |
| 837 if (!IsLetter(charinfo.m_Unicode)) { |
| 838 return breakPos; |
| 839 } |
| 840 } |
| 841 } |
| 842 return breakPos; |
| 843 } |
| 844 int32_t CPDF_TextPage::FindTextlineFlowDirection() { |
| 845 if (!m_pPage) { |
| 846 return -1; |
| 847 } |
| 848 const int32_t nPageWidth = (int32_t)((CPDF_Page*)m_pPage)->GetPageWidth(); |
| 849 const int32_t nPageHeight = (int32_t)((CPDF_Page*)m_pPage)->GetPageHeight(); |
| 850 CFX_ByteArray nHorizontalMask; |
| 851 if (!nHorizontalMask.SetSize(nPageWidth)) { |
| 852 return -1; |
| 853 } |
| 854 uint8_t* pDataH = nHorizontalMask.GetData(); |
| 855 CFX_ByteArray nVerticalMask; |
| 856 if (!nVerticalMask.SetSize(nPageHeight)) { |
| 857 return -1; |
| 858 } |
| 859 uint8_t* pDataV = nVerticalMask.GetData(); |
| 860 int32_t index = 0; |
| 861 FX_FLOAT fLineHeight = 0.0f; |
| 862 CPDF_PageObject* pPageObj = NULL; |
| 863 FX_POSITION pos = NULL; |
| 864 pos = m_pPage->GetFirstObjectPosition(); |
| 865 if (!pos) { |
| 866 return -1; |
| 867 } |
| 868 while (pos) { |
| 869 pPageObj = m_pPage->GetNextObject(pos); |
| 870 if (NULL == pPageObj) { |
| 871 continue; |
| 872 } |
| 873 if (PDFPAGE_TEXT != pPageObj->m_Type) { |
| 874 continue; |
| 875 } |
| 876 int32_t minH = |
| 877 (int32_t)pPageObj->m_Left < 0 ? 0 : (int32_t)pPageObj->m_Left; |
| 878 int32_t maxH = (int32_t)pPageObj->m_Right > nPageWidth |
| 879 ? nPageWidth |
| 880 : (int32_t)pPageObj->m_Right; |
| 881 int32_t minV = |
| 882 (int32_t)pPageObj->m_Bottom < 0 ? 0 : (int32_t)pPageObj->m_Bottom; |
| 883 int32_t maxV = (int32_t)pPageObj->m_Top > nPageHeight |
| 884 ? nPageHeight |
| 885 : (int32_t)pPageObj->m_Top; |
| 886 if (minH >= maxH || minV >= maxV) { |
| 887 continue; |
| 888 } |
| 889 FXSYS_memset(pDataH + minH, 1, maxH - minH); |
| 890 FXSYS_memset(pDataV + minV, 1, maxV - minV); |
| 891 if (fLineHeight <= 0.0f) { |
| 892 fLineHeight = pPageObj->m_Top - pPageObj->m_Bottom; |
| 893 } |
| 894 pPageObj = NULL; |
| 895 } |
| 896 int32_t nStartH = 0; |
| 897 int32_t nEndH = 0; |
| 898 FX_FLOAT nSumH = 0.0f; |
| 899 for (index = 0; index < nPageWidth; index++) |
| 900 if (1 == nHorizontalMask[index]) { |
| 901 break; |
| 902 } |
| 903 nStartH = index; |
| 904 for (index = nPageWidth; index > 0; index--) |
| 905 if (1 == nHorizontalMask[index - 1]) { |
| 906 break; |
| 907 } |
| 908 nEndH = index; |
| 909 for (index = nStartH; index < nEndH; index++) { |
| 910 nSumH += nHorizontalMask[index]; |
| 911 } |
| 912 nSumH /= nEndH - nStartH; |
| 913 int32_t nStartV = 0; |
| 914 int32_t nEndV = 0; |
| 915 FX_FLOAT nSumV = 0.0f; |
| 916 for (index = 0; index < nPageHeight; index++) |
| 917 if (1 == nVerticalMask[index]) { |
| 918 break; |
| 919 } |
| 920 nStartV = index; |
| 921 for (index = nPageHeight; index > 0; index--) |
| 922 if (1 == nVerticalMask[index - 1]) { |
| 923 break; |
| 924 } |
| 925 nEndV = index; |
| 926 for (index = nStartV; index < nEndV; index++) { |
| 927 nSumV += nVerticalMask[index]; |
| 928 } |
| 929 nSumV /= nEndV - nStartV; |
| 930 if ((nEndV - nStartV) < (int32_t)(2 * fLineHeight)) { |
| 931 return 0; |
| 932 } |
| 933 if ((nEndH - nStartH) < (int32_t)(2 * fLineHeight)) { |
| 934 return 1; |
| 935 } |
| 936 if (nSumH > 0.8f) { |
| 937 return 0; |
| 938 } |
| 939 if (nSumH - nSumV > 0.0f) { |
| 940 return 0; |
| 941 } |
| 942 if (nSumV - nSumH > 0.0f) { |
| 943 return 1; |
| 944 } |
| 945 return -1; |
| 946 } |
| 947 void CPDF_TextPage::ProcessObject() { |
| 948 CPDF_PageObject* pPageObj = NULL; |
| 949 if (!m_pPage) { |
| 950 return; |
| 951 } |
| 952 FX_POSITION pos; |
| 953 pos = m_pPage->GetFirstObjectPosition(); |
| 954 if (!pos) { |
| 955 return; |
| 956 } |
| 957 m_TextlineDir = FindTextlineFlowDirection(); |
| 958 int nCount = 0; |
| 959 while (pos) { |
| 960 pPageObj = m_pPage->GetNextObject(pos); |
| 961 if (pPageObj) { |
| 962 if (pPageObj->m_Type == PDFPAGE_TEXT) { |
| 963 CFX_AffineMatrix matrix; |
| 964 ProcessTextObject((CPDF_TextObject*)pPageObj, matrix, pos); |
| 965 nCount++; |
| 966 } else if (pPageObj->m_Type == PDFPAGE_FORM) { |
| 967 CFX_AffineMatrix formMatrix(1, 0, 0, 1, 0, 0); |
| 968 ProcessFormObject((CPDF_FormObject*)pPageObj, formMatrix); |
| 969 } |
| 970 } |
| 971 pPageObj = NULL; |
| 972 } |
| 973 int count = m_LineObj.GetSize(); |
| 974 for (int i = 0; i < count; i++) { |
| 975 ProcessTextObject(m_LineObj.GetAt(i)); |
| 976 } |
| 977 m_LineObj.RemoveAll(); |
| 978 CloseTempLine(); |
| 979 } |
| 980 void CPDF_TextPage::ProcessFormObject(CPDF_FormObject* pFormObj, |
| 981 const CFX_AffineMatrix& formMatrix) { |
| 982 CPDF_PageObject* pPageObj = NULL; |
| 983 FX_POSITION pos; |
| 984 if (!pFormObj) { |
| 985 return; |
| 986 } |
| 987 pos = pFormObj->m_pForm->GetFirstObjectPosition(); |
| 988 if (!pos) { |
| 989 return; |
| 990 } |
| 991 CFX_AffineMatrix curFormMatrix; |
| 992 curFormMatrix.Copy(pFormObj->m_FormMatrix); |
| 993 curFormMatrix.Concat(formMatrix); |
| 994 while (pos) { |
| 995 pPageObj = pFormObj->m_pForm->GetNextObject(pos); |
| 996 if (pPageObj) { |
| 997 if (pPageObj->m_Type == PDFPAGE_TEXT) { |
| 998 ProcessTextObject((CPDF_TextObject*)pPageObj, curFormMatrix, pos); |
| 999 } else if (pPageObj->m_Type == PDFPAGE_FORM) { |
| 1000 ProcessFormObject((CPDF_FormObject*)pPageObj, curFormMatrix); |
| 1001 } |
| 1002 } |
| 1003 pPageObj = NULL; |
| 1004 } |
| 1005 } |
| 1006 int CPDF_TextPage::GetCharWidth(FX_DWORD charCode, CPDF_Font* pFont) const { |
| 1007 if (charCode == -1) { |
| 1008 return 0; |
| 1009 } |
| 1010 int w = pFont->GetCharWidthF(charCode); |
| 1011 if (w == 0) { |
| 1012 CFX_ByteString str; |
| 1013 pFont->AppendChar(str, charCode); |
| 1014 w = pFont->GetStringWidth(str, 1); |
| 1015 if (w == 0) { |
| 1016 FX_RECT BBox; |
| 1017 pFont->GetCharBBox(charCode, BBox); |
| 1018 w = BBox.right - BBox.left; |
| 1019 } |
| 1020 } |
| 1021 return w; |
| 1022 } |
| 1023 void CPDF_TextPage::OnPiece(IFX_BidiChar* pBidi, CFX_WideString& str) { |
| 1024 int32_t start, count; |
| 1025 int32_t ret = pBidi->GetBidiInfo(start, count); |
| 1026 if (ret == 2) { |
| 1027 for (int i = start + count - 1; i >= start; i--) { |
| 1028 m_TextBuf.AppendChar(str.GetAt(i)); |
| 1029 m_charList.Add(*(PAGECHAR_INFO*)m_TempCharList.GetAt(i)); |
| 1030 } |
| 1031 } else { |
| 1032 int end = start + count; |
| 1033 for (int i = start; i < end; i++) { |
| 1034 m_TextBuf.AppendChar(str.GetAt(i)); |
| 1035 m_charList.Add(*(PAGECHAR_INFO*)m_TempCharList.GetAt(i)); |
| 1036 } |
| 1037 } |
| 1038 } |
| 1039 void CPDF_TextPage::AddCharInfoByLRDirection(CFX_WideString& str, int i) { |
| 1040 PAGECHAR_INFO Info = *(PAGECHAR_INFO*)m_TempCharList.GetAt(i); |
| 1041 FX_WCHAR wChar = str.GetAt(i); |
| 1042 if (!IsControlChar(Info)) { |
| 1043 Info.m_Index = m_TextBuf.GetLength(); |
| 1044 if (wChar >= 0xFB00 && wChar <= 0xFB06) { |
| 1045 FX_WCHAR* pDst = NULL; |
| 1046 FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst); |
| 1047 if (nCount >= 1) { |
| 1048 pDst = FX_Alloc(FX_WCHAR, nCount); |
| 1049 FX_Unicode_GetNormalization(wChar, pDst); |
| 1050 for (int nIndex = 0; nIndex < nCount; nIndex++) { |
| 1051 PAGECHAR_INFO Info2 = Info; |
| 1052 Info2.m_Unicode = pDst[nIndex]; |
| 1053 Info2.m_Flag = FPDFTEXT_CHAR_PIECE; |
| 1054 m_TextBuf.AppendChar(Info2.m_Unicode); |
| 1055 if (!m_ParseOptions.m_bGetCharCodeOnly) { |
| 1056 m_charList.Add(Info2); |
| 1057 } |
| 1058 } |
| 1059 FX_Free(pDst); |
780 return; | 1060 return; |
781 } | 1061 } |
782 start = m_Segment.GetAt(index).m_Start; | 1062 } |
783 count = m_Segment.GetAt(index).m_nCount; | 1063 m_TextBuf.AppendChar(wChar); |
784 } | 1064 } else { |
785 int CPDF_TextPage::GetWordBreak(int index, int direction) const | 1065 Info.m_Index = -1; |
786 { | 1066 } |
787 if(m_ParseOptions.m_bGetCharCodeOnly) { | 1067 if (!m_ParseOptions.m_bGetCharCodeOnly) { |
788 return -1; | 1068 m_charList.Add(Info); |
789 } | 1069 } |
790 if (!m_IsParsered) { | 1070 } |
791 return -1; | 1071 void CPDF_TextPage::AddCharInfoByRLDirection(CFX_WideString& str, int i) { |
792 } | 1072 PAGECHAR_INFO Info = *(PAGECHAR_INFO*)m_TempCharList.GetAt(i); |
793 if (direction != FPDFTEXT_LEFT && direction != FPDFTEXT_RIGHT) { | 1073 if (!IsControlChar(Info)) { |
794 return -1; | 1074 Info.m_Index = m_TextBuf.GetLength(); |
795 } | 1075 FX_WCHAR wChar = FX_GetMirrorChar(str.GetAt(i), TRUE, FALSE); |
796 if (index < 0 || index >= m_charList.GetSize()) { | 1076 FX_WCHAR* pDst = NULL; |
797 return -1; | 1077 FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst); |
| 1078 if (nCount >= 1) { |
| 1079 pDst = FX_Alloc(FX_WCHAR, nCount); |
| 1080 FX_Unicode_GetNormalization(wChar, pDst); |
| 1081 for (int nIndex = 0; nIndex < nCount; nIndex++) { |
| 1082 PAGECHAR_INFO Info2 = Info; |
| 1083 Info2.m_Unicode = pDst[nIndex]; |
| 1084 Info2.m_Flag = FPDFTEXT_CHAR_PIECE; |
| 1085 m_TextBuf.AppendChar(Info2.m_Unicode); |
| 1086 if (!m_ParseOptions.m_bGetCharCodeOnly) { |
| 1087 m_charList.Add(Info2); |
| 1088 } |
| 1089 } |
| 1090 FX_Free(pDst); |
| 1091 return; |
| 1092 } |
| 1093 Info.m_Unicode = wChar; |
| 1094 m_TextBuf.AppendChar(Info.m_Unicode); |
| 1095 } else { |
| 1096 Info.m_Index = -1; |
| 1097 } |
| 1098 if (!m_ParseOptions.m_bGetCharCodeOnly) { |
| 1099 m_charList.Add(Info); |
| 1100 } |
| 1101 } |
| 1102 void CPDF_TextPage::CloseTempLine() { |
| 1103 int count1 = m_TempCharList.GetSize(); |
| 1104 if (count1 <= 0) { |
| 1105 return; |
| 1106 } |
| 1107 nonstd::unique_ptr<IFX_BidiChar> pBidiChar(IFX_BidiChar::Create()); |
| 1108 CFX_WideString str = m_TempTextBuf.GetWideString(); |
| 1109 CFX_WordArray order; |
| 1110 FX_BOOL bR2L = FALSE; |
| 1111 int32_t start = 0, count = 0; |
| 1112 int nR2L = 0, nL2R = 0; |
| 1113 FX_BOOL bPrevSpace = FALSE; |
| 1114 for (int i = 0; i < str.GetLength(); i++) { |
| 1115 if (str.GetAt(i) == 32) { |
| 1116 if (bPrevSpace) { |
| 1117 m_TempTextBuf.Delete(i, 1); |
| 1118 m_TempCharList.Delete(i); |
| 1119 str.Delete(i); |
| 1120 count1--; |
| 1121 i--; |
| 1122 continue; |
| 1123 } |
| 1124 bPrevSpace = TRUE; |
| 1125 } else { |
| 1126 bPrevSpace = FALSE; |
| 1127 } |
| 1128 if (pBidiChar->AppendChar(str.GetAt(i))) { |
| 1129 int32_t ret = pBidiChar->GetBidiInfo(start, count); |
| 1130 order.Add(start); |
| 1131 order.Add(count); |
| 1132 order.Add(ret); |
| 1133 if (!bR2L) { |
| 1134 if (ret == 2) { |
| 1135 nR2L++; |
| 1136 } else if (ret == 1) { |
| 1137 nL2R++; |
| 1138 } |
| 1139 } |
| 1140 } |
| 1141 } |
| 1142 if (pBidiChar->EndChar()) { |
| 1143 int32_t ret = pBidiChar->GetBidiInfo(start, count); |
| 1144 order.Add(start); |
| 1145 order.Add(count); |
| 1146 order.Add(ret); |
| 1147 if (!bR2L) { |
| 1148 if (ret == 2) { |
| 1149 nR2L++; |
| 1150 } else if (ret == 1) { |
| 1151 nL2R++; |
| 1152 } |
| 1153 } |
| 1154 } |
| 1155 if (nR2L > 0 && nR2L >= nL2R) { |
| 1156 bR2L = TRUE; |
| 1157 } |
| 1158 if (m_parserflag == FPDFTEXT_RLTB || bR2L) { |
| 1159 int count = order.GetSize(); |
| 1160 for (int i = count - 1; i > 0; i -= 3) { |
| 1161 int ret = order.GetAt(i); |
| 1162 int start = order.GetAt(i - 2); |
| 1163 int count1 = order.GetAt(i - 1); |
| 1164 if (ret == 2 || ret == 0) { |
| 1165 for (int j = start + count1 - 1; j >= start; j--) { |
| 1166 AddCharInfoByRLDirection(str, j); |
| 1167 } |
| 1168 } else { |
| 1169 int j = i; |
| 1170 FX_BOOL bSymbol = FALSE; |
| 1171 while (j > 0 && order.GetAt(j) != 2) { |
| 1172 bSymbol = !order.GetAt(j); |
| 1173 j -= 3; |
| 1174 } |
| 1175 int end = start + count1; |
| 1176 int n = 0; |
| 1177 if (bSymbol) { |
| 1178 n = j + 6; |
| 1179 } else { |
| 1180 n = j + 3; |
| 1181 } |
| 1182 if (n >= i) { |
| 1183 for (int m = start; m < end; m++) { |
| 1184 AddCharInfoByLRDirection(str, m); |
| 1185 } |
| 1186 } else { |
| 1187 j = i; |
| 1188 i = n; |
| 1189 for (; n <= j; n += 3) { |
| 1190 int start = order.GetAt(n - 2); |
| 1191 int count1 = order.GetAt(n - 1); |
| 1192 int end = start + count1; |
| 1193 for (int m = start; m < end; m++) { |
| 1194 AddCharInfoByLRDirection(str, m); |
| 1195 } |
| 1196 } |
| 1197 } |
| 1198 } |
| 1199 } |
| 1200 } else { |
| 1201 int count = order.GetSize(); |
| 1202 FX_BOOL bL2R = FALSE; |
| 1203 for (int i = 0; i < count; i += 3) { |
| 1204 int ret = order.GetAt(i + 2); |
| 1205 int start = order.GetAt(i); |
| 1206 int count1 = order.GetAt(i + 1); |
| 1207 if (ret == 2 || (i == 0 && ret == 0 && !bL2R)) { |
| 1208 int j = i + 3; |
| 1209 while (bR2L && j < count) { |
| 1210 if (order.GetAt(j + 2) == 1) { |
| 1211 break; |
| 1212 } else { |
| 1213 j += 3; |
| 1214 } |
| 1215 } |
| 1216 if (j == 3) { |
| 1217 i = -3; |
| 1218 bL2R = TRUE; |
| 1219 continue; |
| 1220 } |
| 1221 int end = m_TempCharList.GetSize() - 1; |
| 1222 if (j < count) { |
| 1223 end = order.GetAt(j) - 1; |
| 1224 } |
| 1225 i = j - 3; |
| 1226 for (int n = end; n >= start; n--) { |
| 1227 AddCharInfoByRLDirection(str, n); |
| 1228 } |
| 1229 } else { |
| 1230 int end = start + count1; |
| 1231 for (int n = start; n < end; n++) { |
| 1232 AddCharInfoByLRDirection(str, n); |
| 1233 } |
| 1234 } |
| 1235 } |
| 1236 } |
| 1237 order.RemoveAll(); |
| 1238 m_TempCharList.RemoveAll(); |
| 1239 m_TempTextBuf.Delete(0, m_TempTextBuf.GetLength()); |
| 1240 } |
| 1241 void CPDF_TextPage::ProcessTextObject(CPDF_TextObject* pTextObj, |
| 1242 const CFX_AffineMatrix& formMatrix, |
| 1243 FX_POSITION ObjPos) { |
| 1244 CFX_FloatRect re(pTextObj->m_Left, pTextObj->m_Bottom, pTextObj->m_Right, |
| 1245 pTextObj->m_Top); |
| 1246 if (FXSYS_fabs(pTextObj->m_Right - pTextObj->m_Left) < 0.01f) { |
| 1247 return; |
| 1248 } |
| 1249 int count = m_LineObj.GetSize(); |
| 1250 PDFTEXT_Obj Obj; |
| 1251 Obj.m_pTextObj = pTextObj; |
| 1252 Obj.m_formMatrix = formMatrix; |
| 1253 if (count == 0) { |
| 1254 m_LineObj.Add(Obj); |
| 1255 return; |
| 1256 } |
| 1257 if (IsSameAsPreTextObject(pTextObj, ObjPos)) { |
| 1258 return; |
| 1259 } |
| 1260 PDFTEXT_Obj prev_Obj = m_LineObj.GetAt(count - 1); |
| 1261 CPDF_TextObjectItem item; |
| 1262 int nItem = prev_Obj.m_pTextObj->CountItems(); |
| 1263 prev_Obj.m_pTextObj->GetItemInfo(nItem - 1, &item); |
| 1264 FX_FLOAT prev_width = |
| 1265 GetCharWidth(item.m_CharCode, prev_Obj.m_pTextObj->GetFont()) * |
| 1266 prev_Obj.m_pTextObj->GetFontSize() / 1000; |
| 1267 CFX_AffineMatrix prev_matrix; |
| 1268 prev_Obj.m_pTextObj->GetTextMatrix(&prev_matrix); |
| 1269 prev_width = FXSYS_fabs(prev_width); |
| 1270 prev_matrix.Concat(prev_Obj.m_formMatrix); |
| 1271 prev_width = prev_matrix.TransformDistance(prev_width); |
| 1272 pTextObj->GetItemInfo(0, &item); |
| 1273 FX_FLOAT this_width = GetCharWidth(item.m_CharCode, pTextObj->GetFont()) * |
| 1274 pTextObj->GetFontSize() / 1000; |
| 1275 this_width = FXSYS_fabs(this_width); |
| 1276 CFX_AffineMatrix this_matrix; |
| 1277 pTextObj->GetTextMatrix(&this_matrix); |
| 1278 this_width = FXSYS_fabs(this_width); |
| 1279 this_matrix.Concat(formMatrix); |
| 1280 this_width = this_matrix.TransformDistance(this_width); |
| 1281 FX_FLOAT threshold = |
| 1282 prev_width > this_width ? prev_width / 4 : this_width / 4; |
| 1283 FX_FLOAT prev_x = prev_Obj.m_pTextObj->GetPosX(), |
| 1284 prev_y = prev_Obj.m_pTextObj->GetPosY(); |
| 1285 prev_Obj.m_formMatrix.Transform(prev_x, prev_y); |
| 1286 m_DisplayMatrix.Transform(prev_x, prev_y); |
| 1287 FX_FLOAT this_x = pTextObj->GetPosX(), this_y = pTextObj->GetPosY(); |
| 1288 formMatrix.Transform(this_x, this_y); |
| 1289 m_DisplayMatrix.Transform(this_x, this_y); |
| 1290 if (FXSYS_fabs(this_y - prev_y) > threshold * 2) { |
| 1291 for (int i = 0; i < count; i++) { |
| 1292 ProcessTextObject(m_LineObj.GetAt(i)); |
| 1293 } |
| 1294 m_LineObj.RemoveAll(); |
| 1295 m_LineObj.Add(Obj); |
| 1296 return; |
| 1297 } |
| 1298 int i = 0; |
| 1299 if (m_ParseOptions.m_bNormalizeObjs) { |
| 1300 for (i = count - 1; i >= 0; i--) { |
| 1301 PDFTEXT_Obj prev_Obj = m_LineObj.GetAt(i); |
| 1302 CFX_AffineMatrix prev_matrix; |
| 1303 prev_Obj.m_pTextObj->GetTextMatrix(&prev_matrix); |
| 1304 FX_FLOAT Prev_x = prev_Obj.m_pTextObj->GetPosX(), |
| 1305 Prev_y = prev_Obj.m_pTextObj->GetPosY(); |
| 1306 prev_Obj.m_formMatrix.Transform(Prev_x, Prev_y); |
| 1307 m_DisplayMatrix.Transform(Prev_x, Prev_y); |
| 1308 if (this_x >= Prev_x) { |
| 1309 if (i == count - 1) { |
| 1310 m_LineObj.Add(Obj); |
| 1311 } else { |
| 1312 m_LineObj.InsertAt(i + 1, Obj); |
| 1313 } |
| 1314 break; |
| 1315 } |
| 1316 } |
| 1317 if (i < 0) { |
| 1318 m_LineObj.InsertAt(0, Obj); |
| 1319 } |
| 1320 } else { |
| 1321 m_LineObj.Add(Obj); |
| 1322 } |
| 1323 } |
| 1324 int32_t CPDF_TextPage::PreMarkedContent(PDFTEXT_Obj Obj) { |
| 1325 CPDF_TextObject* pTextObj = Obj.m_pTextObj; |
| 1326 CPDF_ContentMarkData* pMarkData = |
| 1327 (CPDF_ContentMarkData*)pTextObj->m_ContentMark.GetObject(); |
| 1328 if (!pMarkData) { |
| 1329 return FPDFTEXT_MC_PASS; |
| 1330 } |
| 1331 int nContentMark = pMarkData->CountItems(); |
| 1332 if (nContentMark < 1) { |
| 1333 return FPDFTEXT_MC_PASS; |
| 1334 } |
| 1335 CFX_WideString actText; |
| 1336 FX_BOOL bExist = FALSE; |
| 1337 CPDF_Dictionary* pDict = NULL; |
| 1338 int n = 0; |
| 1339 for (n = 0; n < nContentMark; n++) { |
| 1340 CPDF_ContentMarkItem& item = pMarkData->GetItem(n); |
| 1341 CFX_ByteString tagStr = (CFX_ByteString)item.GetName(); |
| 1342 pDict = (CPDF_Dictionary*)item.GetParam(); |
| 1343 CPDF_String* temp = |
| 1344 (CPDF_String*)(pDict ? pDict->GetElement(FX_BSTRC("ActualText")) |
| 1345 : NULL); |
| 1346 if (temp) { |
| 1347 bExist = TRUE; |
| 1348 actText = temp->GetUnicodeText(); |
| 1349 } |
| 1350 } |
| 1351 if (!bExist) { |
| 1352 return FPDFTEXT_MC_PASS; |
| 1353 } |
| 1354 if (m_pPreTextObj) { |
| 1355 if (CPDF_ContentMarkData* pPreMarkData = |
| 1356 (CPDF_ContentMarkData*)m_pPreTextObj->m_ContentMark.GetObject()) { |
| 1357 if (pPreMarkData->CountItems() == n) { |
| 1358 CPDF_ContentMarkItem& item = pPreMarkData->GetItem(n - 1); |
| 1359 if (pDict == item.GetParam()) { |
| 1360 return FPDFTEXT_MC_DONE; |
| 1361 } |
| 1362 } |
| 1363 } |
| 1364 } |
| 1365 CPDF_Font* pFont = pTextObj->GetFont(); |
| 1366 FX_STRSIZE nItems = actText.GetLength(); |
| 1367 if (nItems < 1) { |
| 1368 return FPDFTEXT_MC_PASS; |
| 1369 } |
| 1370 bExist = FALSE; |
| 1371 for (FX_STRSIZE i = 0; i < nItems; i++) { |
| 1372 FX_WCHAR wChar = actText.GetAt(i); |
| 1373 if (-1 == pFont->CharCodeFromUnicode(wChar)) { |
| 1374 continue; |
| 1375 } else { |
| 1376 bExist = TRUE; |
| 1377 break; |
| 1378 } |
| 1379 } |
| 1380 if (!bExist) { |
| 1381 return FPDFTEXT_MC_PASS; |
| 1382 } |
| 1383 bExist = FALSE; |
| 1384 for (FX_STRSIZE i = 0; i < nItems; i++) { |
| 1385 FX_WCHAR wChar = actText.GetAt(i); |
| 1386 if ((wChar > 0x80 && wChar < 0xFFFD) || (wChar <= 0x80 && isprint(wChar))) { |
| 1387 bExist = TRUE; |
| 1388 break; |
| 1389 } |
| 1390 } |
| 1391 if (!bExist) { |
| 1392 return FPDFTEXT_MC_DONE; |
| 1393 } |
| 1394 return FPDFTEXT_MC_DELAY; |
| 1395 } |
| 1396 void CPDF_TextPage::ProcessMarkedContent(PDFTEXT_Obj Obj) { |
| 1397 CPDF_TextObject* pTextObj = Obj.m_pTextObj; |
| 1398 CPDF_ContentMarkData* pMarkData = |
| 1399 (CPDF_ContentMarkData*)pTextObj->m_ContentMark.GetObject(); |
| 1400 if (!pMarkData) { |
| 1401 return; |
| 1402 } |
| 1403 int nContentMark = pMarkData->CountItems(); |
| 1404 if (nContentMark < 1) { |
| 1405 return; |
| 1406 } |
| 1407 CFX_WideString actText; |
| 1408 CPDF_Dictionary* pDict = NULL; |
| 1409 int n = 0; |
| 1410 for (n = 0; n < nContentMark; n++) { |
| 1411 CPDF_ContentMarkItem& item = pMarkData->GetItem(n); |
| 1412 CFX_ByteString tagStr = (CFX_ByteString)item.GetName(); |
| 1413 pDict = (CPDF_Dictionary*)item.GetParam(); |
| 1414 CPDF_String* temp = |
| 1415 (CPDF_String*)(pDict ? pDict->GetElement(FX_BSTRC("ActualText")) |
| 1416 : NULL); |
| 1417 if (temp) { |
| 1418 actText = temp->GetUnicodeText(); |
| 1419 } |
| 1420 } |
| 1421 FX_STRSIZE nItems = actText.GetLength(); |
| 1422 if (nItems < 1) { |
| 1423 return; |
| 1424 } |
| 1425 CPDF_Font* pFont = pTextObj->GetFont(); |
| 1426 CFX_AffineMatrix formMatrix = Obj.m_formMatrix; |
| 1427 CFX_AffineMatrix matrix; |
| 1428 pTextObj->GetTextMatrix(&matrix); |
| 1429 matrix.Concat(formMatrix); |
| 1430 FX_FLOAT fPosX = pTextObj->GetPosX(); |
| 1431 FX_FLOAT fPosY = pTextObj->GetPosY(); |
| 1432 int nCharInfoIndex = m_TextBuf.GetLength(); |
| 1433 CFX_FloatRect charBox; |
| 1434 charBox.top = pTextObj->m_Top; |
| 1435 charBox.left = pTextObj->m_Left; |
| 1436 charBox.right = pTextObj->m_Right; |
| 1437 charBox.bottom = pTextObj->m_Bottom; |
| 1438 for (FX_STRSIZE k = 0; k < nItems; k++) { |
| 1439 FX_WCHAR wChar = actText.GetAt(k); |
| 1440 if (wChar <= 0x80 && !isprint(wChar)) { |
| 1441 wChar = 0x20; |
| 1442 } |
| 1443 if (wChar >= 0xFFFD) { |
| 1444 continue; |
798 } | 1445 } |
799 PAGECHAR_INFO charinfo; | 1446 PAGECHAR_INFO charinfo; |
800 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index); | 1447 charinfo.m_OriginX = fPosX; |
801 if (charinfo.m_Index == -1 || charinfo.m_Flag == FPDFTEXT_CHAR_GENERATED)
{ | 1448 charinfo.m_OriginY = fPosY; |
802 return index; | 1449 charinfo.m_Index = nCharInfoIndex; |
803 } | 1450 charinfo.m_Unicode = wChar; |
804 if (!IsLetter(charinfo.m_Unicode)) { | 1451 charinfo.m_CharCode = pFont->CharCodeFromUnicode(wChar); |
805 return index; | 1452 charinfo.m_Flag = FPDFTEXT_CHAR_PIECE; |
806 } | 1453 charinfo.m_pTextObj = pTextObj; |
807 int breakPos = index; | 1454 charinfo.m_CharBox.top = charBox.top; |
808 if (direction == FPDFTEXT_LEFT) { | 1455 charinfo.m_CharBox.left = charBox.left; |
809 while (--breakPos > 0) { | 1456 charinfo.m_CharBox.right = charBox.right; |
810 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(breakPos); | 1457 charinfo.m_CharBox.bottom = charBox.bottom; |
811 if (!IsLetter(charinfo.m_Unicode)) { | 1458 charinfo.m_Matrix.Copy(matrix); |
812 return breakPos; | 1459 m_TempTextBuf.AppendChar(wChar); |
813 } | 1460 m_TempCharList.Add(charinfo); |
814 } | 1461 } |
815 } else if (direction == FPDFTEXT_RIGHT) { | 1462 } |
816 while (++breakPos < m_charList.GetSize()) { | 1463 void CPDF_TextPage::FindPreviousTextObject(void) { |
817 charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(breakPos); | 1464 if (m_TempCharList.GetSize() < 1 && m_charList.GetSize() < 1) { |
818 if (!IsLetter(charinfo.m_Unicode)) { | 1465 return; |
819 return breakPos; | 1466 } |
820 } | 1467 PAGECHAR_INFO preChar; |
821 } | 1468 if (m_TempCharList.GetSize() >= 1) { |
822 } | 1469 preChar = |
823 return breakPos; | 1470 *(PAGECHAR_INFO*)m_TempCharList.GetAt(m_TempCharList.GetSize() - 1); |
824 } | 1471 } else { |
825 int32_t CPDF_TextPage::FindTextlineFlowDirection() | 1472 preChar = *(PAGECHAR_INFO*)m_charList.GetAt(m_charList.GetSize() - 1); |
826 { | 1473 } |
827 if (!m_pPage) { | 1474 if (preChar.m_pTextObj) { |
828 return -1; | 1475 m_pPreTextObj = preChar.m_pTextObj; |
829 } | 1476 } |
830 const int32_t nPageWidth = (int32_t)((CPDF_Page*)m_pPage)->GetPageWidth(); | 1477 } |
831 const int32_t nPageHeight = (int32_t)((CPDF_Page*)m_pPage)->GetPageHeight(); | 1478 void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) { |
832 CFX_ByteArray nHorizontalMask; | 1479 CPDF_TextObject* pTextObj = Obj.m_pTextObj; |
833 if (!nHorizontalMask.SetSize(nPageWidth)) { | 1480 if (FXSYS_fabs(pTextObj->m_Right - pTextObj->m_Left) < 0.01f) { |
834 return -1; | 1481 return; |
835 } | 1482 } |
836 uint8_t* pDataH = nHorizontalMask.GetData(); | 1483 CFX_AffineMatrix formMatrix = Obj.m_formMatrix; |
837 CFX_ByteArray nVerticalMask; | 1484 CPDF_Font* pFont = pTextObj->GetFont(); |
838 if (!nVerticalMask.SetSize(nPageHeight)) { | 1485 CFX_AffineMatrix matrix; |
839 return -1; | 1486 pTextObj->GetTextMatrix(&matrix); |
840 } | 1487 matrix.Concat(formMatrix); |
841 uint8_t* pDataV = nVerticalMask.GetData(); | 1488 int32_t bPreMKC = PreMarkedContent(Obj); |
842 int32_t index = 0; | 1489 if (FPDFTEXT_MC_DONE == bPreMKC) { |
843 FX_FLOAT fLineHeight = 0.0f; | |
844 CPDF_PageObject* pPageObj = NULL; | |
845 FX_POSITION pos = NULL; | |
846 pos = m_pPage->GetFirstObjectPosition(); | |
847 if(!pos) { | |
848 return -1; | |
849 } | |
850 while(pos) { | |
851 pPageObj = m_pPage->GetNextObject(pos); | |
852 if(NULL == pPageObj) { | |
853 continue; | |
854 } | |
855 if(PDFPAGE_TEXT != pPageObj->m_Type) { | |
856 continue; | |
857 } | |
858 int32_t minH = (int32_t)pPageObj->m_Left < 0 ? 0 : (int32_t)pPageObj->m_
Left; | |
859 int32_t maxH = (int32_t)pPageObj->m_Right > nPageWidth ? nPageWidth : (i
nt32_t)pPageObj->m_Right; | |
860 int32_t minV = (int32_t)pPageObj->m_Bottom < 0 ? 0 : (int32_t)pPageObj->
m_Bottom; | |
861 int32_t maxV = (int32_t)pPageObj->m_Top > nPageHeight ? nPageHeight : (i
nt32_t)pPageObj->m_Top; | |
862 if (minH >= maxH || minV >= maxV) { | |
863 continue; | |
864 } | |
865 FXSYS_memset(pDataH + minH, 1, maxH - minH); | |
866 FXSYS_memset(pDataV + minV, 1, maxV - minV); | |
867 if (fLineHeight <= 0.0f) { | |
868 fLineHeight = pPageObj->m_Top - pPageObj->m_Bottom; | |
869 } | |
870 pPageObj = NULL; | |
871 } | |
872 int32_t nStartH = 0; | |
873 int32_t nEndH = 0; | |
874 FX_FLOAT nSumH = 0.0f; | |
875 for (index = 0; index < nPageWidth; index++) | |
876 if(1 == nHorizontalMask[index]) { | |
877 break; | |
878 } | |
879 nStartH = index; | |
880 for (index = nPageWidth; index > 0; index--) | |
881 if(1 == nHorizontalMask[index - 1]) { | |
882 break; | |
883 } | |
884 nEndH = index; | |
885 for (index = nStartH; index < nEndH; index++) { | |
886 nSumH += nHorizontalMask[index]; | |
887 } | |
888 nSumH /= nEndH - nStartH; | |
889 int32_t nStartV = 0; | |
890 int32_t nEndV = 0; | |
891 FX_FLOAT nSumV = 0.0f; | |
892 for (index = 0; index < nPageHeight; index++) | |
893 if(1 == nVerticalMask[index]) { | |
894 break; | |
895 } | |
896 nStartV = index; | |
897 for (index = nPageHeight; index > 0; index--) | |
898 if(1 == nVerticalMask[index - 1]) { | |
899 break; | |
900 } | |
901 nEndV = index; | |
902 for (index = nStartV; index < nEndV; index++) { | |
903 nSumV += nVerticalMask[index]; | |
904 } | |
905 nSumV /= nEndV - nStartV; | |
906 if ((nEndV - nStartV) < (int32_t)(2 * fLineHeight)) { | |
907 return 0; | |
908 } | |
909 if ((nEndH - nStartH) < (int32_t)(2 * fLineHeight)) { | |
910 return 1; | |
911 } | |
912 if (nSumH > 0.8f) { | |
913 return 0; | |
914 } | |
915 if (nSumH - nSumV > 0.0f) { | |
916 return 0; | |
917 } | |
918 if (nSumV - nSumH > 0.0f) { | |
919 return 1; | |
920 } | |
921 return -1; | |
922 } | |
923 void CPDF_TextPage::ProcessObject() | |
924 { | |
925 CPDF_PageObject* pPageObj = NULL; | |
926 if (!m_pPage) { | |
927 return; | |
928 } | |
929 FX_POSITION pos; | |
930 pos = m_pPage->GetFirstObjectPosition(); | |
931 if (!pos) { | |
932 return; | |
933 } | |
934 m_TextlineDir = FindTextlineFlowDirection(); | |
935 int nCount = 0; | |
936 while (pos) { | |
937 pPageObj = m_pPage->GetNextObject(pos); | |
938 if(pPageObj) { | |
939 if(pPageObj->m_Type == PDFPAGE_TEXT) { | |
940 CFX_AffineMatrix matrix; | |
941 ProcessTextObject((CPDF_TextObject*)pPageObj, matrix, pos); | |
942 nCount++; | |
943 } else if (pPageObj->m_Type == PDFPAGE_FORM) { | |
944 CFX_AffineMatrix formMatrix(1, 0, 0, 1, 0, 0); | |
945 ProcessFormObject((CPDF_FormObject*)pPageObj, formMatrix); | |
946 } | |
947 } | |
948 pPageObj = NULL; | |
949 } | |
950 int count = m_LineObj.GetSize(); | |
951 for(int i = 0; i < count; i++) { | |
952 ProcessTextObject(m_LineObj.GetAt(i)); | |
953 } | |
954 m_LineObj.RemoveAll(); | |
955 CloseTempLine(); | |
956 } | |
957 void CPDF_TextPage::ProcessFormObject(CPDF_FormObject* pFormObj, const CFX_Affin
eMatrix& formMatrix) | |
958 { | |
959 CPDF_PageObject* pPageObj = NULL; | |
960 FX_POSITION pos; | |
961 if (!pFormObj) { | |
962 return; | |
963 } | |
964 pos = pFormObj->m_pForm->GetFirstObjectPosition(); | |
965 if (!pos) { | |
966 return; | |
967 } | |
968 CFX_AffineMatrix curFormMatrix; | |
969 curFormMatrix.Copy(pFormObj->m_FormMatrix); | |
970 curFormMatrix.Concat(formMatrix); | |
971 while (pos) { | |
972 pPageObj = pFormObj->m_pForm->GetNextObject(pos); | |
973 if(pPageObj) { | |
974 if(pPageObj->m_Type == PDFPAGE_TEXT) { | |
975 ProcessTextObject((CPDF_TextObject*)pPageObj, curFormMatrix, pos
); | |
976 } else if (pPageObj->m_Type == PDFPAGE_FORM) { | |
977 ProcessFormObject((CPDF_FormObject*)pPageObj, curFormMatrix); | |
978 } | |
979 } | |
980 pPageObj = NULL; | |
981 } | |
982 } | |
983 int CPDF_TextPage::GetCharWidth(FX_DWORD charCode, CPDF_Font* pFont) const | |
984 { | |
985 if(charCode == -1) { | |
986 return 0; | |
987 } | |
988 int w = pFont->GetCharWidthF(charCode); | |
989 if(w == 0) { | |
990 CFX_ByteString str; | |
991 pFont->AppendChar(str, charCode); | |
992 w = pFont->GetStringWidth(str, 1); | |
993 if(w == 0) { | |
994 FX_RECT BBox; | |
995 pFont->GetCharBBox(charCode, BBox); | |
996 w = BBox.right - BBox.left; | |
997 } | |
998 } | |
999 return w; | |
1000 } | |
1001 void CPDF_TextPage::OnPiece(IFX_BidiChar* pBidi, CFX_WideString& str) | |
1002 { | |
1003 int32_t start, count; | |
1004 int32_t ret = pBidi->GetBidiInfo(start, count); | |
1005 if(ret == 2) { | |
1006 for(int i = start + count - 1; i >= start; i--) { | |
1007 m_TextBuf.AppendChar(str.GetAt(i)); | |
1008 m_charList.Add(*(PAGECHAR_INFO*)m_TempCharList.GetAt(i)); | |
1009 } | |
1010 } else { | |
1011 int end = start + count ; | |
1012 for(int i = start; i < end; i++) { | |
1013 m_TextBuf.AppendChar(str.GetAt(i)); | |
1014 m_charList.Add(*(PAGECHAR_INFO*)m_TempCharList.GetAt(i)); | |
1015 } | |
1016 } | |
1017 } | |
1018 void CPDF_TextPage::AddCharInfoByLRDirection(CFX_WideString& str, int i) | |
1019 { | |
1020 PAGECHAR_INFO Info = *(PAGECHAR_INFO*)m_TempCharList.GetAt(i); | |
1021 FX_WCHAR wChar = str.GetAt(i); | |
1022 if(!IsControlChar(Info)) { | |
1023 Info.m_Index = m_TextBuf.GetLength(); | |
1024 if (wChar >= 0xFB00 && wChar <= 0xFB06) { | |
1025 FX_WCHAR* pDst = NULL; | |
1026 FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst); | |
1027 if (nCount >= 1) { | |
1028 pDst = FX_Alloc(FX_WCHAR, nCount); | |
1029 FX_Unicode_GetNormalization(wChar, pDst); | |
1030 for (int nIndex = 0; nIndex < nCount; nIndex++) { | |
1031 PAGECHAR_INFO Info2 = Info; | |
1032 Info2.m_Unicode = pDst[nIndex]; | |
1033 Info2.m_Flag = FPDFTEXT_CHAR_PIECE; | |
1034 m_TextBuf.AppendChar(Info2.m_Unicode); | |
1035 if( !m_ParseOptions.m_bGetCharCodeOnly) { | |
1036 m_charList.Add(Info2); | |
1037 } | |
1038 } | |
1039 FX_Free(pDst); | |
1040 return; | |
1041 } | |
1042 } | |
1043 m_TextBuf.AppendChar(wChar); | |
1044 } else { | |
1045 Info.m_Index = -1; | |
1046 } | |
1047 if( !m_ParseOptions.m_bGetCharCodeOnly) { | |
1048 m_charList.Add(Info); | |
1049 } | |
1050 } | |
1051 void CPDF_TextPage::AddCharInfoByRLDirection(CFX_WideString& str, int i) | |
1052 { | |
1053 PAGECHAR_INFO Info = *(PAGECHAR_INFO*)m_TempCharList.GetAt(i); | |
1054 if(!IsControlChar(Info)) { | |
1055 Info.m_Index = m_TextBuf.GetLength(); | |
1056 FX_WCHAR wChar = FX_GetMirrorChar(str.GetAt(i), TRUE, FALSE); | |
1057 FX_WCHAR* pDst = NULL; | |
1058 FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst); | |
1059 if (nCount >= 1) { | |
1060 pDst = FX_Alloc(FX_WCHAR, nCount); | |
1061 FX_Unicode_GetNormalization(wChar, pDst); | |
1062 for (int nIndex = 0; nIndex < nCount; nIndex++) { | |
1063 PAGECHAR_INFO Info2 = Info; | |
1064 Info2.m_Unicode = pDst[nIndex]; | |
1065 Info2.m_Flag = FPDFTEXT_CHAR_PIECE; | |
1066 m_TextBuf.AppendChar(Info2.m_Unicode); | |
1067 if( !m_ParseOptions.m_bGetCharCodeOnly) { | |
1068 m_charList.Add(Info2); | |
1069 } | |
1070 } | |
1071 FX_Free(pDst); | |
1072 return; | |
1073 } | |
1074 Info.m_Unicode = wChar; | |
1075 m_TextBuf.AppendChar(Info.m_Unicode); | |
1076 } else { | |
1077 Info.m_Index = -1; | |
1078 } | |
1079 if( !m_ParseOptions.m_bGetCharCodeOnly) { | |
1080 m_charList.Add(Info); | |
1081 } | |
1082 } | |
1083 void CPDF_TextPage::CloseTempLine() | |
1084 { | |
1085 int count1 = m_TempCharList.GetSize(); | |
1086 if (count1 <= 0) { | |
1087 return; | |
1088 } | |
1089 nonstd::unique_ptr<IFX_BidiChar> pBidiChar(IFX_BidiChar::Create()); | |
1090 CFX_WideString str = m_TempTextBuf.GetWideString(); | |
1091 CFX_WordArray order; | |
1092 FX_BOOL bR2L = FALSE; | |
1093 int32_t start = 0, count = 0; | |
1094 int nR2L = 0, nL2R = 0; | |
1095 FX_BOOL bPrevSpace = FALSE; | |
1096 for (int i = 0; i < str.GetLength(); i++) { | |
1097 if(str.GetAt(i) == 32) { | |
1098 if(bPrevSpace) { | |
1099 m_TempTextBuf.Delete(i, 1); | |
1100 m_TempCharList.Delete(i); | |
1101 str.Delete(i); | |
1102 count1--; | |
1103 i--; | |
1104 continue; | |
1105 } | |
1106 bPrevSpace = TRUE; | |
1107 } else { | |
1108 bPrevSpace = FALSE; | |
1109 } | |
1110 if(pBidiChar->AppendChar(str.GetAt(i))) { | |
1111 int32_t ret = pBidiChar->GetBidiInfo(start, count); | |
1112 order.Add(start); | |
1113 order.Add(count); | |
1114 order.Add(ret); | |
1115 if(!bR2L) { | |
1116 if(ret == 2) { | |
1117 nR2L++; | |
1118 } else if (ret == 1) { | |
1119 nL2R++; | |
1120 } | |
1121 } | |
1122 } | |
1123 } | |
1124 if(pBidiChar->EndChar()) { | |
1125 int32_t ret = pBidiChar->GetBidiInfo(start, count); | |
1126 order.Add(start); | |
1127 order.Add(count); | |
1128 order.Add(ret); | |
1129 if(!bR2L) { | |
1130 if(ret == 2) { | |
1131 nR2L++; | |
1132 } else if(ret == 1) { | |
1133 nL2R++; | |
1134 } | |
1135 } | |
1136 } | |
1137 if(nR2L > 0 && nR2L >= nL2R) { | |
1138 bR2L = TRUE; | |
1139 } | |
1140 if (m_parserflag == FPDFTEXT_RLTB || bR2L) { | |
1141 int count = order.GetSize(); | |
1142 for(int i = count - 1; i > 0; i -= 3) { | |
1143 int ret = order.GetAt(i); | |
1144 int start = order.GetAt(i - 2); | |
1145 int count1 = order.GetAt(i - 1); | |
1146 if(ret == 2 || ret == 0) { | |
1147 for(int j = start + count1 - 1; j >= start; j--) { | |
1148 AddCharInfoByRLDirection(str, j); | |
1149 } | |
1150 } else { | |
1151 int j = i; | |
1152 FX_BOOL bSymbol = FALSE; | |
1153 while(j > 0 && order.GetAt(j) != 2) { | |
1154 bSymbol = !order.GetAt(j); | |
1155 j -= 3; | |
1156 } | |
1157 int end = start + count1 ; | |
1158 int n = 0; | |
1159 if(bSymbol) { | |
1160 n = j + 6; | |
1161 } else { | |
1162 n = j + 3; | |
1163 } | |
1164 if(n >= i) { | |
1165 for(int m = start; m < end; m++) { | |
1166 AddCharInfoByLRDirection(str, m); | |
1167 } | |
1168 } else { | |
1169 j = i; | |
1170 i = n; | |
1171 for(; n <= j; n += 3) { | |
1172 int start = order.GetAt(n - 2); | |
1173 int count1 = order.GetAt(n - 1); | |
1174 int end = start + count1 ; | |
1175 for(int m = start; m < end; m++) { | |
1176 AddCharInfoByLRDirection(str, m); | |
1177 } | |
1178 } | |
1179 } | |
1180 } | |
1181 } | |
1182 } else { | |
1183 int count = order.GetSize(); | |
1184 FX_BOOL bL2R = FALSE; | |
1185 for(int i = 0; i < count; i += 3) { | |
1186 int ret = order.GetAt(i + 2); | |
1187 int start = order.GetAt(i); | |
1188 int count1 = order.GetAt(i + 1); | |
1189 if(ret == 2 || (i == 0 && ret == 0 && !bL2R)) { | |
1190 int j = i + 3; | |
1191 while(bR2L && j < count) { | |
1192 if(order.GetAt(j + 2) == 1) { | |
1193 break; | |
1194 } else { | |
1195 j += 3; | |
1196 } | |
1197 } | |
1198 if(j == 3) { | |
1199 i = -3; | |
1200 bL2R = TRUE; | |
1201 continue; | |
1202 } | |
1203 int end = m_TempCharList.GetSize() - 1; | |
1204 if(j < count) { | |
1205 end = order.GetAt(j) - 1; | |
1206 } | |
1207 i = j - 3; | |
1208 for(int n = end; n >= start; n--) { | |
1209 AddCharInfoByRLDirection(str, n); | |
1210 } | |
1211 } else { | |
1212 int end = start + count1 ; | |
1213 for(int n = start; n < end; n++) { | |
1214 AddCharInfoByLRDirection(str, n); | |
1215 } | |
1216 } | |
1217 } | |
1218 } | |
1219 order.RemoveAll(); | |
1220 m_TempCharList.RemoveAll(); | |
1221 m_TempTextBuf.Delete(0, m_TempTextBuf.GetLength()); | |
1222 } | |
1223 void CPDF_TextPage::ProcessTextObject(CPDF_TextObject* pTextObj, const CFX_Affi
neMatrix& formMatrix, FX_POSITION ObjPos) | |
1224 { | |
1225 CFX_FloatRect re(pTextObj->m_Left, pTextObj->m_Bottom, pTextObj->m_Right, pT
extObj->m_Top); | |
1226 if(FXSYS_fabs(pTextObj->m_Right - pTextObj->m_Left) < 0.01f ) { | |
1227 return; | |
1228 } | |
1229 int count = m_LineObj.GetSize(); | |
1230 PDFTEXT_Obj Obj; | |
1231 Obj.m_pTextObj = pTextObj; | |
1232 Obj.m_formMatrix = formMatrix; | |
1233 if(count == 0) { | |
1234 m_LineObj.Add(Obj); | |
1235 return; | |
1236 } | |
1237 if (IsSameAsPreTextObject(pTextObj, ObjPos)) { | |
1238 return; | |
1239 } | |
1240 PDFTEXT_Obj prev_Obj = m_LineObj.GetAt(count - 1); | |
1241 CPDF_TextObjectItem item; | |
1242 int nItem = prev_Obj.m_pTextObj->CountItems(); | |
1243 prev_Obj.m_pTextObj->GetItemInfo(nItem - 1, &item); | |
1244 FX_FLOAT prev_width = GetCharWidth(item.m_CharCode, prev_Obj.m_pTextObj->Get
Font()) * prev_Obj.m_pTextObj->GetFontSize() / 1000; | |
1245 CFX_AffineMatrix prev_matrix; | |
1246 prev_Obj.m_pTextObj->GetTextMatrix(&prev_matrix); | |
1247 prev_width = FXSYS_fabs(prev_width); | |
1248 prev_matrix.Concat(prev_Obj.m_formMatrix); | |
1249 prev_width = prev_matrix.TransformDistance(prev_width); | |
1250 pTextObj->GetItemInfo(0, &item); | |
1251 FX_FLOAT this_width = GetCharWidth(item.m_CharCode, pTextObj->GetFont()) * p
TextObj->GetFontSize() / 1000; | |
1252 this_width = FXSYS_fabs(this_width); | |
1253 CFX_AffineMatrix this_matrix; | |
1254 pTextObj->GetTextMatrix(&this_matrix); | |
1255 this_width = FXSYS_fabs(this_width); | |
1256 this_matrix.Concat(formMatrix); | |
1257 this_width = this_matrix.TransformDistance(this_width); | |
1258 FX_FLOAT threshold = prev_width > this_width ? prev_width / 4 : this_width /
4; | |
1259 FX_FLOAT prev_x = prev_Obj.m_pTextObj->GetPosX(), prev_y = prev_Obj.m_pTextO
bj->GetPosY(); | |
1260 prev_Obj.m_formMatrix.Transform(prev_x, prev_y); | |
1261 m_DisplayMatrix.Transform(prev_x, prev_y); | |
1262 FX_FLOAT this_x = pTextObj->GetPosX(), this_y = pTextObj->GetPosY(); | |
1263 formMatrix.Transform(this_x, this_y); | |
1264 m_DisplayMatrix.Transform(this_x, this_y); | |
1265 if (FXSYS_fabs(this_y - prev_y) > threshold * 2) { | |
1266 for(int i = 0; i < count; i++) { | |
1267 ProcessTextObject(m_LineObj.GetAt(i)); | |
1268 } | |
1269 m_LineObj.RemoveAll(); | |
1270 m_LineObj.Add(Obj); | |
1271 return; | |
1272 } | |
1273 int i = 0; | |
1274 if(m_ParseOptions.m_bNormalizeObjs) { | |
1275 for(i = count - 1; i >= 0; i--) { | |
1276 PDFTEXT_Obj prev_Obj = m_LineObj.GetAt(i); | |
1277 CFX_AffineMatrix prev_matrix; | |
1278 prev_Obj.m_pTextObj->GetTextMatrix(&prev_matrix); | |
1279 FX_FLOAT Prev_x = prev_Obj.m_pTextObj->GetPosX(), Prev_y = prev_Obj.
m_pTextObj->GetPosY(); | |
1280 prev_Obj.m_formMatrix.Transform(Prev_x, Prev_y); | |
1281 m_DisplayMatrix.Transform(Prev_x, Prev_y); | |
1282 if(this_x >= Prev_x) { | |
1283 if(i == count - 1) { | |
1284 m_LineObj.Add(Obj); | |
1285 } else { | |
1286 m_LineObj.InsertAt(i + 1, Obj); | |
1287 } | |
1288 break; | |
1289 } | |
1290 } | |
1291 if(i < 0) { | |
1292 m_LineObj.InsertAt(0, Obj); | |
1293 } | |
1294 } else { | |
1295 m_LineObj.Add(Obj); | |
1296 } | |
1297 } | |
1298 int32_t CPDF_TextPage::PreMarkedContent(PDFTEXT_Obj Obj) | |
1299 { | |
1300 CPDF_TextObject* pTextObj = Obj.m_pTextObj; | |
1301 CPDF_ContentMarkData* pMarkData = (CPDF_ContentMarkData*)pTextObj->m_Content
Mark.GetObject(); | |
1302 if(!pMarkData) { | |
1303 return FPDFTEXT_MC_PASS; | |
1304 } | |
1305 int nContentMark = pMarkData->CountItems(); | |
1306 if (nContentMark < 1) { | |
1307 return FPDFTEXT_MC_PASS; | |
1308 } | |
1309 CFX_WideString actText; | |
1310 FX_BOOL bExist = FALSE; | |
1311 CPDF_Dictionary* pDict = NULL; | |
1312 int n = 0; | |
1313 for (n = 0; n < nContentMark; n++) { | |
1314 CPDF_ContentMarkItem& item = pMarkData->GetItem(n); | |
1315 CFX_ByteString tagStr = (CFX_ByteString)item.GetName(); | |
1316 pDict = (CPDF_Dictionary*)item.GetParam(); | |
1317 CPDF_String* temp = (CPDF_String*)(pDict ? pDict->GetElement(FX_BSTRC("A
ctualText")) : NULL); | |
1318 if (temp) { | |
1319 bExist = TRUE; | |
1320 actText = temp->GetUnicodeText(); | |
1321 } | |
1322 } | |
1323 if (!bExist) { | |
1324 return FPDFTEXT_MC_PASS; | |
1325 } | |
1326 if (m_pPreTextObj) { | |
1327 if (CPDF_ContentMarkData* pPreMarkData = (CPDF_ContentMarkData*)m_pPreTe
xtObj->m_ContentMark.GetObject()) { | |
1328 if (pPreMarkData->CountItems() == n) { | |
1329 CPDF_ContentMarkItem& item = pPreMarkData->GetItem(n - 1); | |
1330 if (pDict == item.GetParam()) { | |
1331 return FPDFTEXT_MC_DONE; | |
1332 } | |
1333 } | |
1334 } | |
1335 } | |
1336 CPDF_Font* pFont = pTextObj->GetFont(); | |
1337 FX_STRSIZE nItems = actText.GetLength(); | |
1338 if (nItems < 1) { | |
1339 return FPDFTEXT_MC_PASS; | |
1340 } | |
1341 bExist = FALSE; | |
1342 for (FX_STRSIZE i = 0; i < nItems; i++) { | |
1343 FX_WCHAR wChar = actText.GetAt(i); | |
1344 if (-1 == pFont->CharCodeFromUnicode(wChar)) { | |
1345 continue; | |
1346 } else { | |
1347 bExist = TRUE; | |
1348 break; | |
1349 } | |
1350 } | |
1351 if (!bExist) { | |
1352 return FPDFTEXT_MC_PASS; | |
1353 } | |
1354 bExist = FALSE; | |
1355 for (FX_STRSIZE i = 0; i < nItems; i++) { | |
1356 FX_WCHAR wChar = actText.GetAt(i); | |
1357 if ((wChar > 0x80 && wChar < 0xFFFD) || (wChar <= 0x80 && isprint(wChar)
)) { | |
1358 bExist = TRUE; | |
1359 break; | |
1360 } | |
1361 } | |
1362 if (!bExist) { | |
1363 return FPDFTEXT_MC_DONE; | |
1364 } | |
1365 return FPDFTEXT_MC_DELAY; | |
1366 } | |
1367 void CPDF_TextPage::ProcessMarkedContent(PDFTEXT_Obj Obj) | |
1368 { | |
1369 CPDF_TextObject* pTextObj = Obj.m_pTextObj; | |
1370 CPDF_ContentMarkData* pMarkData = (CPDF_ContentMarkData*)pTextObj->m_Content
Mark.GetObject(); | |
1371 if(!pMarkData) { | |
1372 return; | |
1373 } | |
1374 int nContentMark = pMarkData->CountItems(); | |
1375 if (nContentMark < 1) { | |
1376 return; | |
1377 } | |
1378 CFX_WideString actText; | |
1379 CPDF_Dictionary* pDict = NULL; | |
1380 int n = 0; | |
1381 for (n = 0; n < nContentMark; n++) { | |
1382 CPDF_ContentMarkItem& item = pMarkData->GetItem(n); | |
1383 CFX_ByteString tagStr = (CFX_ByteString)item.GetName(); | |
1384 pDict = (CPDF_Dictionary*)item.GetParam(); | |
1385 CPDF_String* temp = (CPDF_String*)(pDict ? pDict->GetElement(FX_BSTRC("A
ctualText")) : NULL); | |
1386 if (temp) { | |
1387 actText = temp->GetUnicodeText(); | |
1388 } | |
1389 } | |
1390 FX_STRSIZE nItems = actText.GetLength(); | |
1391 if (nItems < 1) { | |
1392 return; | |
1393 } | |
1394 CPDF_Font* pFont = pTextObj->GetFont(); | |
1395 CFX_AffineMatrix formMatrix = Obj.m_formMatrix; | |
1396 CFX_AffineMatrix matrix; | |
1397 pTextObj->GetTextMatrix(&matrix); | |
1398 matrix.Concat(formMatrix); | |
1399 FX_FLOAT fPosX = pTextObj->GetPosX(); | |
1400 FX_FLOAT fPosY = pTextObj->GetPosY(); | |
1401 int nCharInfoIndex = m_TextBuf.GetLength(); | |
1402 CFX_FloatRect charBox; | |
1403 charBox.top = pTextObj->m_Top; | |
1404 charBox.left = pTextObj->m_Left; | |
1405 charBox.right = pTextObj->m_Right; | |
1406 charBox.bottom = pTextObj->m_Bottom; | |
1407 for (FX_STRSIZE k = 0; k < nItems; k++) { | |
1408 FX_WCHAR wChar = actText.GetAt(k); | |
1409 if (wChar <= 0x80 && !isprint(wChar)) { | |
1410 wChar = 0x20; | |
1411 } | |
1412 if (wChar >= 0xFFFD) { | |
1413 continue; | |
1414 } | |
1415 PAGECHAR_INFO charinfo; | |
1416 charinfo.m_OriginX = fPosX; | |
1417 charinfo.m_OriginY = fPosY; | |
1418 charinfo.m_Index = nCharInfoIndex; | |
1419 charinfo.m_Unicode = wChar; | |
1420 charinfo.m_CharCode = pFont->CharCodeFromUnicode(wChar); | |
1421 charinfo.m_Flag = FPDFTEXT_CHAR_PIECE; | |
1422 charinfo.m_pTextObj = pTextObj; | |
1423 charinfo.m_CharBox.top = charBox.top; | |
1424 charinfo.m_CharBox.left = charBox.left; | |
1425 charinfo.m_CharBox.right = charBox.right; | |
1426 charinfo.m_CharBox.bottom = charBox.bottom; | |
1427 charinfo.m_Matrix.Copy(matrix); | |
1428 m_TempTextBuf.AppendChar(wChar); | |
1429 m_TempCharList.Add(charinfo); | |
1430 } | |
1431 } | |
1432 void CPDF_TextPage::FindPreviousTextObject(void) | |
1433 { | |
1434 if (m_TempCharList.GetSize() < 1 && m_charList.GetSize() < 1) { | |
1435 return; | |
1436 } | |
1437 PAGECHAR_INFO preChar; | |
1438 if (m_TempCharList.GetSize() >= 1) { | |
1439 preChar = *(PAGECHAR_INFO*)m_TempCharList.GetAt(m_TempCharList.GetSize()
- 1); | |
1440 } else { | |
1441 preChar = *(PAGECHAR_INFO*)m_charList.GetAt(m_charList.GetSize() - 1); | |
1442 } | |
1443 if (preChar.m_pTextObj) { | |
1444 m_pPreTextObj = preChar.m_pTextObj; | |
1445 } | |
1446 } | |
1447 void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) | |
1448 { | |
1449 CPDF_TextObject* pTextObj = Obj.m_pTextObj; | |
1450 if(FXSYS_fabs(pTextObj->m_Right - pTextObj->m_Left) < 0.01f ) { | |
1451 return; | |
1452 } | |
1453 CFX_AffineMatrix formMatrix = Obj.m_formMatrix; | |
1454 CPDF_Font* pFont = pTextObj->GetFont(); | |
1455 CFX_AffineMatrix matrix; | |
1456 pTextObj->GetTextMatrix(&matrix); | |
1457 matrix.Concat(formMatrix); | |
1458 int32_t bPreMKC = PreMarkedContent(Obj); | |
1459 if (FPDFTEXT_MC_DONE == bPreMKC) { | |
1460 m_pPreTextObj = pTextObj; | |
1461 m_perMatrix.Copy(formMatrix); | |
1462 return; | |
1463 } | |
1464 int result = 0; | |
1465 if (m_pPreTextObj) { | |
1466 result = ProcessInsertObject(pTextObj, formMatrix); | |
1467 if (2 == result) { | |
1468 m_CurlineRect = CFX_FloatRect(Obj.m_pTextObj->m_Left, Obj.m_pTextObj
->m_Bottom, Obj.m_pTextObj->m_Right, Obj.m_pTextObj->m_Top); | |
1469 } else { | |
1470 m_CurlineRect.Union(CFX_FloatRect(Obj.m_pTextObj->m_Left, Obj.m_pTex
tObj->m_Bottom, Obj.m_pTextObj->m_Right, Obj.m_pTextObj->m_Top)); | |
1471 } | |
1472 PAGECHAR_INFO generateChar; | |
1473 if (result == 1) { | |
1474 if (GenerateCharInfo(TEXT_BLANK_CHAR, generateChar)) { | |
1475 if (!formMatrix.IsIdentity()) { | |
1476 generateChar.m_Matrix.Copy(formMatrix); | |
1477 } | |
1478 m_TempTextBuf.AppendChar(TEXT_BLANK_CHAR); | |
1479 m_TempCharList.Add(generateChar); | |
1480 } | |
1481 } else if(result == 2) { | |
1482 CloseTempLine(); | |
1483 if(m_TextBuf.GetSize()) { | |
1484 if(m_ParseOptions.m_bGetCharCodeOnly) { | |
1485 m_TextBuf.AppendChar(TEXT_RETURN_CHAR); | |
1486 m_TextBuf.AppendChar(TEXT_LINEFEED_CHAR); | |
1487 } else { | |
1488 if(GenerateCharInfo(TEXT_RETURN_CHAR, generateChar)) { | |
1489 m_TextBuf.AppendChar(TEXT_RETURN_CHAR); | |
1490 if (!formMatrix.IsIdentity()) { | |
1491 generateChar.m_Matrix.Copy(formMatrix); | |
1492 } | |
1493 m_charList.Add(generateChar); | |
1494 } | |
1495 if(GenerateCharInfo(TEXT_LINEFEED_CHAR, generateChar)) { | |
1496 m_TextBuf.AppendChar(TEXT_LINEFEED_CHAR); | |
1497 if (!formMatrix.IsIdentity()) { | |
1498 generateChar.m_Matrix.Copy(formMatrix); | |
1499 } | |
1500 m_charList.Add(generateChar); | |
1501 } | |
1502 } | |
1503 } | |
1504 } else if (result == 3 && !m_ParseOptions.m_bOutputHyphen) { | |
1505 int32_t nChars = pTextObj->CountChars(); | |
1506 if (nChars == 1) { | |
1507 CPDF_TextObjectItem item; | |
1508 pTextObj->GetCharInfo(0, &item); | |
1509 CFX_WideString wstrItem = pTextObj->GetFont()->UnicodeFromCharCo
de(item.m_CharCode); | |
1510 if(wstrItem.IsEmpty()) { | |
1511 wstrItem += (FX_WCHAR)item.m_CharCode; | |
1512 } | |
1513 FX_WCHAR curChar = wstrItem.GetAt(0); | |
1514 if (0x2D == curChar || 0xAD == curChar) { | |
1515 return; | |
1516 } | |
1517 } | |
1518 while (m_TempTextBuf.GetSize() > 0 && m_TempTextBuf.GetWideString().
GetAt(m_TempTextBuf.GetLength() - 1) == 0x20) { | |
1519 m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1); | |
1520 m_TempCharList.Delete(m_TempCharList.GetSize() - 1); | |
1521 } | |
1522 PAGECHAR_INFO* cha = (PAGECHAR_INFO*)m_TempCharList.GetAt(m_TempChar
List.GetSize() - 1); | |
1523 m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1); | |
1524 cha->m_Unicode = 0x2; | |
1525 cha->m_Flag = FPDFTEXT_CHAR_HYPHEN; | |
1526 m_TempTextBuf.AppendChar(0xfffe); | |
1527 } | |
1528 } else { | |
1529 m_CurlineRect = CFX_FloatRect(Obj.m_pTextObj->m_Left, Obj.m_pTextObj->m_
Bottom, Obj.m_pTextObj->m_Right, Obj.m_pTextObj->m_Top); | |
1530 } | |
1531 if (FPDFTEXT_MC_DELAY == bPreMKC) { | |
1532 ProcessMarkedContent(Obj); | |
1533 m_pPreTextObj = pTextObj; | |
1534 m_perMatrix.Copy(formMatrix); | |
1535 return; | |
1536 } | |
1537 m_pPreTextObj = pTextObj; | 1490 m_pPreTextObj = pTextObj; |
1538 m_perMatrix.Copy(formMatrix); | 1491 m_perMatrix.Copy(formMatrix); |
1539 int nItems = pTextObj->CountItems(); | 1492 return; |
1540 FX_FLOAT baseSpace = _CalculateBaseSpace(pTextObj, matrix); | 1493 } |
| 1494 int result = 0; |
| 1495 if (m_pPreTextObj) { |
| 1496 result = ProcessInsertObject(pTextObj, formMatrix); |
| 1497 if (2 == result) { |
| 1498 m_CurlineRect = |
| 1499 CFX_FloatRect(Obj.m_pTextObj->m_Left, Obj.m_pTextObj->m_Bottom, |
| 1500 Obj.m_pTextObj->m_Right, Obj.m_pTextObj->m_Top); |
| 1501 } else { |
| 1502 m_CurlineRect.Union( |
| 1503 CFX_FloatRect(Obj.m_pTextObj->m_Left, Obj.m_pTextObj->m_Bottom, |
| 1504 Obj.m_pTextObj->m_Right, Obj.m_pTextObj->m_Top)); |
| 1505 } |
| 1506 PAGECHAR_INFO generateChar; |
| 1507 if (result == 1) { |
| 1508 if (GenerateCharInfo(TEXT_BLANK_CHAR, generateChar)) { |
| 1509 if (!formMatrix.IsIdentity()) { |
| 1510 generateChar.m_Matrix.Copy(formMatrix); |
| 1511 } |
| 1512 m_TempTextBuf.AppendChar(TEXT_BLANK_CHAR); |
| 1513 m_TempCharList.Add(generateChar); |
| 1514 } |
| 1515 } else if (result == 2) { |
| 1516 CloseTempLine(); |
| 1517 if (m_TextBuf.GetSize()) { |
| 1518 if (m_ParseOptions.m_bGetCharCodeOnly) { |
| 1519 m_TextBuf.AppendChar(TEXT_RETURN_CHAR); |
| 1520 m_TextBuf.AppendChar(TEXT_LINEFEED_CHAR); |
| 1521 } else { |
| 1522 if (GenerateCharInfo(TEXT_RETURN_CHAR, generateChar)) { |
| 1523 m_TextBuf.AppendChar(TEXT_RETURN_CHAR); |
| 1524 if (!formMatrix.IsIdentity()) { |
| 1525 generateChar.m_Matrix.Copy(formMatrix); |
| 1526 } |
| 1527 m_charList.Add(generateChar); |
| 1528 } |
| 1529 if (GenerateCharInfo(TEXT_LINEFEED_CHAR, generateChar)) { |
| 1530 m_TextBuf.AppendChar(TEXT_LINEFEED_CHAR); |
| 1531 if (!formMatrix.IsIdentity()) { |
| 1532 generateChar.m_Matrix.Copy(formMatrix); |
| 1533 } |
| 1534 m_charList.Add(generateChar); |
| 1535 } |
| 1536 } |
| 1537 } |
| 1538 } else if (result == 3 && !m_ParseOptions.m_bOutputHyphen) { |
| 1539 int32_t nChars = pTextObj->CountChars(); |
| 1540 if (nChars == 1) { |
| 1541 CPDF_TextObjectItem item; |
| 1542 pTextObj->GetCharInfo(0, &item); |
| 1543 CFX_WideString wstrItem = |
| 1544 pTextObj->GetFont()->UnicodeFromCharCode(item.m_CharCode); |
| 1545 if (wstrItem.IsEmpty()) { |
| 1546 wstrItem += (FX_WCHAR)item.m_CharCode; |
| 1547 } |
| 1548 FX_WCHAR curChar = wstrItem.GetAt(0); |
| 1549 if (0x2D == curChar || 0xAD == curChar) { |
| 1550 return; |
| 1551 } |
| 1552 } |
| 1553 while (m_TempTextBuf.GetSize() > 0 && |
| 1554 m_TempTextBuf.GetWideString().GetAt(m_TempTextBuf.GetLength() - |
| 1555 1) == 0x20) { |
| 1556 m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1); |
| 1557 m_TempCharList.Delete(m_TempCharList.GetSize() - 1); |
| 1558 } |
| 1559 PAGECHAR_INFO* cha = |
| 1560 (PAGECHAR_INFO*)m_TempCharList.GetAt(m_TempCharList.GetSize() - 1); |
| 1561 m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1); |
| 1562 cha->m_Unicode = 0x2; |
| 1563 cha->m_Flag = FPDFTEXT_CHAR_HYPHEN; |
| 1564 m_TempTextBuf.AppendChar(0xfffe); |
| 1565 } |
| 1566 } else { |
| 1567 m_CurlineRect = |
| 1568 CFX_FloatRect(Obj.m_pTextObj->m_Left, Obj.m_pTextObj->m_Bottom, |
| 1569 Obj.m_pTextObj->m_Right, Obj.m_pTextObj->m_Top); |
| 1570 } |
| 1571 if (FPDFTEXT_MC_DELAY == bPreMKC) { |
| 1572 ProcessMarkedContent(Obj); |
| 1573 m_pPreTextObj = pTextObj; |
| 1574 m_perMatrix.Copy(formMatrix); |
| 1575 return; |
| 1576 } |
| 1577 m_pPreTextObj = pTextObj; |
| 1578 m_perMatrix.Copy(formMatrix); |
| 1579 int nItems = pTextObj->CountItems(); |
| 1580 FX_FLOAT baseSpace = _CalculateBaseSpace(pTextObj, matrix); |
1541 | 1581 |
1542 const FX_BOOL bR2L = IsRightToLeft(pTextObj, pFont, nItems); | 1582 const FX_BOOL bR2L = IsRightToLeft(pTextObj, pFont, nItems); |
1543 const FX_BOOL bIsBidiAndMirrorInverse = | 1583 const FX_BOOL bIsBidiAndMirrorInverse = |
1544 bR2L && (matrix.a * matrix.d - matrix.b * matrix.c) < 0; | 1584 bR2L && (matrix.a * matrix.d - matrix.b * matrix.c) < 0; |
1545 int32_t iBufStartAppend = m_TempTextBuf.GetLength(); | 1585 int32_t iBufStartAppend = m_TempTextBuf.GetLength(); |
1546 int32_t iCharListStartAppend = m_TempCharList.GetSize(); | 1586 int32_t iCharListStartAppend = m_TempCharList.GetSize(); |
1547 | 1587 |
1548 FX_FLOAT spacing = 0; | 1588 FX_FLOAT spacing = 0; |
1549 for (int i = 0; i < nItems; i++) { | 1589 for (int i = 0; i < nItems; i++) { |
1550 CPDF_TextObjectItem item; | 1590 CPDF_TextObjectItem item; |
1551 PAGECHAR_INFO charinfo; | 1591 PAGECHAR_INFO charinfo; |
1552 charinfo.m_OriginX = 0; | 1592 charinfo.m_OriginX = 0; |
1553 charinfo.m_OriginY = 0; | 1593 charinfo.m_OriginY = 0; |
1554 pTextObj->GetItemInfo(i, &item); | 1594 pTextObj->GetItemInfo(i, &item); |
1555 if (item.m_CharCode == (FX_DWORD) - 1) { | 1595 if (item.m_CharCode == (FX_DWORD)-1) { |
1556 CFX_WideString str = m_TempTextBuf.GetWideString(); | 1596 CFX_WideString str = m_TempTextBuf.GetWideString(); |
1557 if(str.IsEmpty()) { | 1597 if (str.IsEmpty()) { |
1558 str = m_TextBuf.GetWideString(); | 1598 str = m_TextBuf.GetWideString(); |
1559 } | 1599 } |
1560 if (str.IsEmpty() || str.GetAt(str.GetLength() - 1) == TEXT_BLANK_CH
AR) { | 1600 if (str.IsEmpty() || str.GetAt(str.GetLength() - 1) == TEXT_BLANK_CHAR) { |
1561 continue; | 1601 continue; |
1562 } | 1602 } |
1563 FX_FLOAT fontsize_h = pTextObj->m_TextState.GetFontSizeH(); | 1603 FX_FLOAT fontsize_h = pTextObj->m_TextState.GetFontSizeH(); |
1564 spacing = -fontsize_h * item.m_OriginX / 1000; | 1604 spacing = -fontsize_h * item.m_OriginX / 1000; |
1565 continue; | 1605 continue; |
1566 } | 1606 } |
1567 FX_FLOAT charSpace = pTextObj->m_TextState.GetObject()->m_CharSpace; | 1607 FX_FLOAT charSpace = pTextObj->m_TextState.GetObject()->m_CharSpace; |
1568 if (charSpace > 0.001) { | 1608 if (charSpace > 0.001) { |
1569 spacing += matrix.TransformDistance(charSpace); | 1609 spacing += matrix.TransformDistance(charSpace); |
1570 } else if(charSpace < -0.001) { | 1610 } else if (charSpace < -0.001) { |
1571 spacing -= matrix.TransformDistance(FXSYS_fabs(charSpace)); | 1611 spacing -= matrix.TransformDistance(FXSYS_fabs(charSpace)); |
1572 } | 1612 } |
1573 spacing -= baseSpace; | 1613 spacing -= baseSpace; |
1574 if (spacing && i > 0) { | 1614 if (spacing && i > 0) { |
1575 int last_width = 0; | 1615 int last_width = 0; |
1576 FX_FLOAT fontsize_h = pTextObj->m_TextState.GetFontSizeH(); | 1616 FX_FLOAT fontsize_h = pTextObj->m_TextState.GetFontSizeH(); |
1577 FX_DWORD space_charcode = pFont->CharCodeFromUnicode(' '); | 1617 FX_DWORD space_charcode = pFont->CharCodeFromUnicode(' '); |
1578 FX_FLOAT threshold = 0; | 1618 FX_FLOAT threshold = 0; |
1579 if (space_charcode != -1) { | 1619 if (space_charcode != -1) { |
1580 threshold = fontsize_h * pFont->GetCharWidthF(space_charcode) /
1000 ; | 1620 threshold = fontsize_h * pFont->GetCharWidthF(space_charcode) / 1000; |
1581 } | 1621 } |
1582 if (threshold > fontsize_h / 3) { | 1622 if (threshold > fontsize_h / 3) { |
1583 threshold = 0; | 1623 threshold = 0; |
1584 } else { | 1624 } else { |
1585 threshold /= 2; | 1625 threshold /= 2; |
1586 } | 1626 } |
1587 if (threshold == 0) { | 1627 if (threshold == 0) { |
1588 threshold = fontsize_h; | 1628 threshold = fontsize_h; |
1589 int this_width = FXSYS_abs(GetCharWidth(item.m_CharCode, pFont))
; | 1629 int this_width = FXSYS_abs(GetCharWidth(item.m_CharCode, pFont)); |
1590 threshold = this_width > last_width ? (FX_FLOAT)this_width : (FX
_FLOAT)last_width; | 1630 threshold = this_width > last_width ? (FX_FLOAT)this_width |
1591 threshold = _NormalizeThreshold(threshold); | 1631 : (FX_FLOAT)last_width; |
1592 threshold = fontsize_h * threshold / 1000; | 1632 threshold = _NormalizeThreshold(threshold); |
1593 } | 1633 threshold = fontsize_h * threshold / 1000; |
1594 if (threshold && (spacing && spacing >= threshold) ) { | 1634 } |
1595 charinfo.m_Unicode = TEXT_BLANK_CHAR; | 1635 if (threshold && (spacing && spacing >= threshold)) { |
1596 charinfo.m_Flag = FPDFTEXT_CHAR_GENERATED; | 1636 charinfo.m_Unicode = TEXT_BLANK_CHAR; |
1597 charinfo.m_pTextObj = pTextObj; | 1637 charinfo.m_Flag = FPDFTEXT_CHAR_GENERATED; |
1598 charinfo.m_Index = m_TextBuf.GetLength(); | |
1599 m_TempTextBuf.AppendChar(TEXT_BLANK_CHAR); | |
1600 charinfo.m_CharCode = -1; | |
1601 charinfo.m_Matrix.Copy(formMatrix); | |
1602 matrix.Transform(item.m_OriginX, item.m_OriginY, charinfo.m_Orig
inX, charinfo.m_OriginY); | |
1603 charinfo.m_CharBox = CFX_FloatRect(charinfo.m_OriginX, charinfo.
m_OriginY, charinfo.m_OriginX, charinfo.m_OriginY); | |
1604 m_TempCharList.Add(charinfo); | |
1605 } | |
1606 if (item.m_CharCode == (FX_DWORD) - 1) { | |
1607 continue; | |
1608 } | |
1609 } | |
1610 spacing = 0; | |
1611 CFX_WideString wstrItem = pFont->UnicodeFromCharCode(item.m_CharCode); | |
1612 FX_BOOL bNoUnicode = FALSE; | |
1613 FX_WCHAR wChar = wstrItem.GetAt(0); | |
1614 if ((wstrItem.IsEmpty() || wChar == 0) && item.m_CharCode) { | |
1615 if(wstrItem.IsEmpty()) { | |
1616 wstrItem += (FX_WCHAR)item.m_CharCode; | |
1617 } else { | |
1618 wstrItem.SetAt(0, (FX_WCHAR)item.m_CharCode); | |
1619 } | |
1620 bNoUnicode = TRUE; | |
1621 } | |
1622 charinfo.m_Index = -1; | |
1623 charinfo.m_CharCode = item.m_CharCode; | |
1624 if(bNoUnicode) { | |
1625 charinfo.m_Flag = FPDFTEXT_CHAR_UNUNICODE; | |
1626 } else { | |
1627 charinfo.m_Flag = FPDFTEXT_CHAR_NORMAL; | |
1628 } | |
1629 charinfo.m_pTextObj = pTextObj; | 1638 charinfo.m_pTextObj = pTextObj; |
1630 charinfo.m_OriginX = 0, charinfo.m_OriginY = 0; | 1639 charinfo.m_Index = m_TextBuf.GetLength(); |
1631 matrix.Transform(item.m_OriginX, item.m_OriginY, charinfo.m_OriginX, cha
rinfo.m_OriginY); | 1640 m_TempTextBuf.AppendChar(TEXT_BLANK_CHAR); |
1632 FX_RECT rect(0, 0, 0, 0); | 1641 charinfo.m_CharCode = -1; |
1633 rect.Intersect(0, 0, 0, 0); | 1642 charinfo.m_Matrix.Copy(formMatrix); |
1634 charinfo.m_pTextObj->GetFont()->GetCharBBox(charinfo.m_CharCode, rect); | 1643 matrix.Transform(item.m_OriginX, item.m_OriginY, charinfo.m_OriginX, |
1635 charinfo.m_CharBox.top = rect.top * pTextObj->GetFontSize() / 1000 + ite
m.m_OriginY; | 1644 charinfo.m_OriginY); |
1636 charinfo.m_CharBox.left = rect.left * pTextObj->GetFontSize() / 1000 + i
tem.m_OriginX; | 1645 charinfo.m_CharBox = |
1637 charinfo.m_CharBox.right = rect.right * pTextObj->GetFontSize() / 1000 +
item.m_OriginX; | 1646 CFX_FloatRect(charinfo.m_OriginX, charinfo.m_OriginY, |
1638 charinfo.m_CharBox.bottom = rect.bottom * pTextObj->GetFontSize() / 1000
+ item.m_OriginY; | 1647 charinfo.m_OriginX, charinfo.m_OriginY); |
1639 if (fabsf(charinfo.m_CharBox.top - charinfo.m_CharBox.bottom) < 0.01f) { | 1648 m_TempCharList.Add(charinfo); |
1640 charinfo.m_CharBox.top = charinfo.m_CharBox.bottom + pTextObj->GetFo
ntSize(); | 1649 } |
1641 } | 1650 if (item.m_CharCode == (FX_DWORD)-1) { |
1642 if (fabsf(charinfo.m_CharBox.right - charinfo.m_CharBox.left) < 0.01f) { | 1651 continue; |
1643 charinfo.m_CharBox.right = charinfo.m_CharBox.left + pTextObj->GetCh
arWidth(charinfo.m_CharCode); | 1652 } |
1644 } | 1653 } |
1645 matrix.TransformRect(charinfo.m_CharBox); | 1654 spacing = 0; |
1646 charinfo.m_Matrix.Copy(matrix); | 1655 CFX_WideString wstrItem = pFont->UnicodeFromCharCode(item.m_CharCode); |
1647 if (wstrItem.IsEmpty()) { | 1656 FX_BOOL bNoUnicode = FALSE; |
1648 charinfo.m_Unicode = 0; | 1657 FX_WCHAR wChar = wstrItem.GetAt(0); |
1649 m_TempCharList.Add(charinfo); | 1658 if ((wstrItem.IsEmpty() || wChar == 0) && item.m_CharCode) { |
| 1659 if (wstrItem.IsEmpty()) { |
| 1660 wstrItem += (FX_WCHAR)item.m_CharCode; |
| 1661 } else { |
| 1662 wstrItem.SetAt(0, (FX_WCHAR)item.m_CharCode); |
| 1663 } |
| 1664 bNoUnicode = TRUE; |
| 1665 } |
| 1666 charinfo.m_Index = -1; |
| 1667 charinfo.m_CharCode = item.m_CharCode; |
| 1668 if (bNoUnicode) { |
| 1669 charinfo.m_Flag = FPDFTEXT_CHAR_UNUNICODE; |
| 1670 } else { |
| 1671 charinfo.m_Flag = FPDFTEXT_CHAR_NORMAL; |
| 1672 } |
| 1673 charinfo.m_pTextObj = pTextObj; |
| 1674 charinfo.m_OriginX = 0, charinfo.m_OriginY = 0; |
| 1675 matrix.Transform(item.m_OriginX, item.m_OriginY, charinfo.m_OriginX, |
| 1676 charinfo.m_OriginY); |
| 1677 FX_RECT rect(0, 0, 0, 0); |
| 1678 rect.Intersect(0, 0, 0, 0); |
| 1679 charinfo.m_pTextObj->GetFont()->GetCharBBox(charinfo.m_CharCode, rect); |
| 1680 charinfo.m_CharBox.top = |
| 1681 rect.top * pTextObj->GetFontSize() / 1000 + item.m_OriginY; |
| 1682 charinfo.m_CharBox.left = |
| 1683 rect.left * pTextObj->GetFontSize() / 1000 + item.m_OriginX; |
| 1684 charinfo.m_CharBox.right = |
| 1685 rect.right * pTextObj->GetFontSize() / 1000 + item.m_OriginX; |
| 1686 charinfo.m_CharBox.bottom = |
| 1687 rect.bottom * pTextObj->GetFontSize() / 1000 + item.m_OriginY; |
| 1688 if (fabsf(charinfo.m_CharBox.top - charinfo.m_CharBox.bottom) < 0.01f) { |
| 1689 charinfo.m_CharBox.top = |
| 1690 charinfo.m_CharBox.bottom + pTextObj->GetFontSize(); |
| 1691 } |
| 1692 if (fabsf(charinfo.m_CharBox.right - charinfo.m_CharBox.left) < 0.01f) { |
| 1693 charinfo.m_CharBox.right = |
| 1694 charinfo.m_CharBox.left + pTextObj->GetCharWidth(charinfo.m_CharCode); |
| 1695 } |
| 1696 matrix.TransformRect(charinfo.m_CharBox); |
| 1697 charinfo.m_Matrix.Copy(matrix); |
| 1698 if (wstrItem.IsEmpty()) { |
| 1699 charinfo.m_Unicode = 0; |
| 1700 m_TempCharList.Add(charinfo); |
| 1701 m_TempTextBuf.AppendChar(0xfffe); |
| 1702 continue; |
| 1703 } else { |
| 1704 int nTotal = wstrItem.GetLength(); |
| 1705 FX_BOOL bDel = FALSE; |
| 1706 const int count = std::min(m_TempCharList.GetSize(), 7); |
| 1707 FX_FLOAT threshold = charinfo.m_Matrix.TransformXDistance( |
| 1708 (FX_FLOAT)TEXT_CHARRATIO_GAPDELTA * pTextObj->GetFontSize()); |
| 1709 for (int n = m_TempCharList.GetSize(); |
| 1710 n > m_TempCharList.GetSize() - count; n--) { |
| 1711 PAGECHAR_INFO* charinfo1 = (PAGECHAR_INFO*)m_TempCharList.GetAt(n - 1); |
| 1712 if (charinfo1->m_CharCode == charinfo.m_CharCode && |
| 1713 charinfo1->m_pTextObj->GetFont() == |
| 1714 charinfo.m_pTextObj->GetFont() && |
| 1715 FXSYS_fabs(charinfo1->m_OriginX - charinfo.m_OriginX) < threshold && |
| 1716 FXSYS_fabs(charinfo1->m_OriginY - charinfo.m_OriginY) < threshold) { |
| 1717 bDel = TRUE; |
| 1718 break; |
| 1719 } |
| 1720 } |
| 1721 if (!bDel) { |
| 1722 for (int nIndex = 0; nIndex < nTotal; nIndex++) { |
| 1723 charinfo.m_Unicode = wstrItem.GetAt(nIndex); |
| 1724 if (charinfo.m_Unicode) { |
| 1725 charinfo.m_Index = m_TextBuf.GetLength(); |
| 1726 m_TempTextBuf.AppendChar(charinfo.m_Unicode); |
| 1727 } else { |
1650 m_TempTextBuf.AppendChar(0xfffe); | 1728 m_TempTextBuf.AppendChar(0xfffe); |
1651 continue; | 1729 } |
1652 } else { | 1730 m_TempCharList.Add(charinfo); |
1653 int nTotal = wstrItem.GetLength(); | 1731 } |
1654 FX_BOOL bDel = FALSE; | 1732 } else if (i == 0) { |
1655 const int count = std::min(m_TempCharList.GetSize(), 7); | 1733 CFX_WideString str = m_TempTextBuf.GetWideString(); |
1656 FX_FLOAT threshold = charinfo.m_Matrix.TransformXDistance((FX_FLOAT)
TEXT_CHARRATIO_GAPDELTA * pTextObj->GetFontSize()); | 1734 if (!str.IsEmpty() && |
1657 for (int n = m_TempCharList.GetSize(); | 1735 str.GetAt(str.GetLength() - 1) == TEXT_BLANK_CHAR) { |
1658 n > m_TempCharList.GetSize() - count; | 1736 m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1); |
1659 n--) { | 1737 m_TempCharList.Delete(m_TempCharList.GetSize() - 1); |
1660 PAGECHAR_INFO* charinfo1 = (PAGECHAR_INFO*)m_TempCharList.GetAt(
n - 1); | 1738 } |
1661 if(charinfo1->m_CharCode == charinfo.m_CharCode && | 1739 } |
1662 charinfo1->m_pTextObj->GetFont() == charinfo.m_pTextObj-
>GetFont() && | 1740 } |
1663 FXSYS_fabs(charinfo1->m_OriginX - charinfo.m_OriginX) <
threshold && | 1741 } |
1664 FXSYS_fabs(charinfo1->m_OriginY - charinfo.m_OriginY) <
threshold) { | 1742 if (bIsBidiAndMirrorInverse) { |
1665 bDel = TRUE; | 1743 SwapTempTextBuf(iCharListStartAppend, iBufStartAppend); |
1666 break; | 1744 } |
1667 } | |
1668 } | |
1669 if(!bDel) { | |
1670 for (int nIndex = 0; nIndex < nTotal; nIndex++) { | |
1671 charinfo.m_Unicode = wstrItem.GetAt(nIndex); | |
1672 if (charinfo.m_Unicode) { | |
1673 charinfo.m_Index = m_TextBuf.GetLength(); | |
1674 m_TempTextBuf.AppendChar(charinfo.m_Unicode); | |
1675 } else { | |
1676 m_TempTextBuf.AppendChar(0xfffe); | |
1677 } | |
1678 m_TempCharList.Add(charinfo); | |
1679 } | |
1680 } else if(i == 0) { | |
1681 CFX_WideString str = m_TempTextBuf.GetWideString(); | |
1682 if (!str.IsEmpty() && str.GetAt(str.GetLength() - 1) == TEXT_BLA
NK_CHAR) { | |
1683 m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1); | |
1684 m_TempCharList.Delete(m_TempCharList.GetSize() - 1); | |
1685 } | |
1686 } | |
1687 } | |
1688 } | |
1689 if (bIsBidiAndMirrorInverse) { | |
1690 SwapTempTextBuf(iCharListStartAppend, iBufStartAppend); | |
1691 } | |
1692 } | 1745 } |
1693 void CPDF_TextPage::SwapTempTextBuf(int32_t iCharListStartAppend, | 1746 void CPDF_TextPage::SwapTempTextBuf(int32_t iCharListStartAppend, |
1694 int32_t iBufStartAppend) | 1747 int32_t iBufStartAppend) { |
1695 { | 1748 int32_t i, j; |
1696 int32_t i, j; | 1749 i = iCharListStartAppend; |
1697 i = iCharListStartAppend; | 1750 j = m_TempCharList.GetSize() - 1; |
1698 j = m_TempCharList.GetSize() - 1; | 1751 for (; i < j; i++, j--) { |
1699 for (; i < j; i++, j--) { | 1752 std::swap(m_TempCharList[i], m_TempCharList[j]); |
1700 std::swap(m_TempCharList[i], m_TempCharList[j]); | 1753 std::swap(m_TempCharList[i].m_Index, m_TempCharList[j].m_Index); |
1701 std::swap(m_TempCharList[i].m_Index, m_TempCharList[j].m_Index); | 1754 } |
1702 } | 1755 FX_WCHAR* pTempBuffer = m_TempTextBuf.GetBuffer(); |
1703 FX_WCHAR * pTempBuffer = m_TempTextBuf.GetBuffer(); | 1756 i = iBufStartAppend; |
1704 i = iBufStartAppend; | 1757 j = m_TempTextBuf.GetLength() - 1; |
1705 j = m_TempTextBuf.GetLength() - 1; | 1758 for (; i < j; i++, j--) { |
1706 for (; i < j; i++, j--) { | 1759 std::swap(pTempBuffer[i], pTempBuffer[j]); |
1707 std::swap(pTempBuffer[i], pTempBuffer[j]); | 1760 } |
1708 } | |
1709 } | 1761 } |
1710 FX_BOOL CPDF_TextPage::IsRightToLeft(const CPDF_TextObject* pTextObj, | 1762 FX_BOOL CPDF_TextPage::IsRightToLeft(const CPDF_TextObject* pTextObj, |
1711 const CPDF_Font* pFont, | 1763 const CPDF_Font* pFont, |
1712 int nItems) const | 1764 int nItems) const { |
1713 { | 1765 nonstd::unique_ptr<IFX_BidiChar> pBidiChar(IFX_BidiChar::Create()); |
1714 nonstd::unique_ptr<IFX_BidiChar> pBidiChar(IFX_BidiChar::Create()); | 1766 int32_t nR2L = 0; |
1715 int32_t nR2L = 0; | 1767 int32_t nL2R = 0; |
1716 int32_t nL2R = 0; | 1768 int32_t start = 0, count = 0; |
1717 int32_t start = 0, count = 0; | 1769 CPDF_TextObjectItem item; |
1718 CPDF_TextObjectItem item; | 1770 for (int32_t i = 0; i < nItems; i++) { |
1719 for (int32_t i = 0; i < nItems; i++) { | 1771 pTextObj->GetItemInfo(i, &item); |
1720 pTextObj->GetItemInfo(i, &item); | 1772 if (item.m_CharCode == (FX_DWORD)-1) { |
1721 if (item.m_CharCode == (FX_DWORD)-1) { | 1773 continue; |
1722 continue; | 1774 } |
1723 } | 1775 CFX_WideString wstrItem = pFont->UnicodeFromCharCode(item.m_CharCode); |
1724 CFX_WideString wstrItem = pFont->UnicodeFromCharCode(item.m_CharCode); | 1776 FX_WCHAR wChar = wstrItem.GetAt(0); |
1725 FX_WCHAR wChar = wstrItem.GetAt(0); | 1777 if ((wstrItem.IsEmpty() || wChar == 0) && item.m_CharCode) { |
1726 if ((wstrItem.IsEmpty() || wChar == 0) && item.m_CharCode) { | 1778 wChar = (FX_WCHAR)item.m_CharCode; |
1727 wChar = (FX_WCHAR)item.m_CharCode; | 1779 } |
1728 } | 1780 if (!wChar) { |
1729 if (!wChar) { | 1781 continue; |
1730 continue; | 1782 } |
1731 } | 1783 if (pBidiChar->AppendChar(wChar)) { |
1732 if (pBidiChar->AppendChar(wChar)) { | 1784 int32_t ret = pBidiChar->GetBidiInfo(start, count); |
1733 int32_t ret = pBidiChar->GetBidiInfo(start, count); | 1785 if (ret == 2) { |
1734 if (ret == 2) { | 1786 nR2L++; |
1735 nR2L++; | 1787 } else if (ret == 1) { |
1736 } | 1788 nL2R++; |
1737 else if (ret == 1) { | 1789 } |
1738 nL2R++; | 1790 } |
1739 } | 1791 } |
1740 } | 1792 if (pBidiChar->EndChar()) { |
1741 } | 1793 int32_t ret = pBidiChar->GetBidiInfo(start, count); |
1742 if (pBidiChar->EndChar()) { | 1794 if (ret == 2) { |
1743 int32_t ret = pBidiChar->GetBidiInfo(start, count); | 1795 nR2L++; |
1744 if (ret == 2) { | 1796 } else if (ret == 1) { |
1745 nR2L++; | 1797 nL2R++; |
1746 } | 1798 } |
1747 else if (ret == 1) { | 1799 } |
1748 nL2R++; | 1800 return (nR2L > 0 && nR2L >= nL2R); |
1749 } | 1801 } |
1750 } | 1802 int32_t CPDF_TextPage::GetTextObjectWritingMode( |
1751 return (nR2L > 0 && nR2L >= nL2R); | 1803 const CPDF_TextObject* pTextObj) { |
1752 } | 1804 int32_t nChars = pTextObj->CountChars(); |
1753 int32_t CPDF_TextPage::GetTextObjectWritingMode(const CPDF_TextObject* pTextObj) | 1805 if (nChars == 1) { |
1754 { | |
1755 int32_t nChars = pTextObj->CountChars(); | |
1756 if (nChars == 1) { | |
1757 return m_TextlineDir; | |
1758 } | |
1759 CPDF_TextObjectItem first, last; | |
1760 pTextObj->GetCharInfo(0, &first); | |
1761 pTextObj->GetCharInfo(nChars - 1, &last); | |
1762 CFX_Matrix textMatrix; | |
1763 pTextObj->GetTextMatrix(&textMatrix); | |
1764 textMatrix.TransformPoint(first.m_OriginX, first.m_OriginY); | |
1765 textMatrix.TransformPoint(last.m_OriginX, last.m_OriginY); | |
1766 FX_FLOAT dX = FXSYS_fabs(last.m_OriginX - first.m_OriginX); | |
1767 FX_FLOAT dY = FXSYS_fabs(last.m_OriginY - first.m_OriginY); | |
1768 if (dX <= 0.0001f && dY <= 0.0001f) { | |
1769 return -1; | |
1770 } | |
1771 CFX_VectorF v; | |
1772 v.Set(dX, dY); | |
1773 v.Normalize(); | |
1774 if (v.y <= 0.0872f) { | |
1775 return v.x <= 0.0872f ? m_TextlineDir : 0; | |
1776 } | |
1777 if (v.x <= 0.0872f) { | |
1778 return 1; | |
1779 } | |
1780 return m_TextlineDir; | 1806 return m_TextlineDir; |
1781 } | 1807 } |
1782 FX_BOOL CPDF_TextPage::IsHyphen(FX_WCHAR curChar) | 1808 CPDF_TextObjectItem first, last; |
1783 { | 1809 pTextObj->GetCharInfo(0, &first); |
1784 CFX_WideString strCurText = m_TempTextBuf.GetWideString(); | 1810 pTextObj->GetCharInfo(nChars - 1, &last); |
1785 if(strCurText.GetLength() == 0) { | 1811 CFX_Matrix textMatrix; |
1786 strCurText = m_TextBuf.GetWideString(); | 1812 pTextObj->GetTextMatrix(&textMatrix); |
1787 } | 1813 textMatrix.TransformPoint(first.m_OriginX, first.m_OriginY); |
1788 FX_STRSIZE nCount = strCurText.GetLength(); | 1814 textMatrix.TransformPoint(last.m_OriginX, last.m_OriginY); |
1789 int nIndex = nCount - 1; | 1815 FX_FLOAT dX = FXSYS_fabs(last.m_OriginX - first.m_OriginX); |
1790 FX_WCHAR wcTmp = strCurText.GetAt(nIndex); | 1816 FX_FLOAT dY = FXSYS_fabs(last.m_OriginY - first.m_OriginY); |
1791 while(wcTmp == 0x20 && nIndex <= nCount - 1 && nIndex >= 0) { | 1817 if (dX <= 0.0001f && dY <= 0.0001f) { |
1792 wcTmp = strCurText.GetAt(--nIndex); | 1818 return -1; |
1793 } | 1819 } |
1794 if (0x2D == wcTmp || 0xAD == wcTmp) { | 1820 CFX_VectorF v; |
1795 if (--nIndex > 0) { | 1821 v.Set(dX, dY); |
1796 FX_WCHAR preChar = strCurText.GetAt((nIndex)); | 1822 v.Normalize(); |
1797 if (((preChar >= L'A' && preChar <= L'Z') || (preChar >= L'a' && pre
Char <= L'z')) | 1823 if (v.y <= 0.0872f) { |
1798 && ((curChar >= L'A' && curChar <= L'Z') || (curChar >= L'a'
&& curChar <= L'z'))) { | 1824 return v.x <= 0.0872f ? m_TextlineDir : 0; |
1799 return TRUE; | 1825 } |
1800 } | 1826 if (v.x <= 0.0872f) { |
1801 } | 1827 return 1; |
1802 int size = m_TempCharList.GetSize(); | 1828 } |
1803 PAGECHAR_INFO preChar; | 1829 return m_TextlineDir; |
1804 if (size) { | 1830 } |
1805 preChar = (PAGECHAR_INFO)m_TempCharList[size - 1]; | 1831 FX_BOOL CPDF_TextPage::IsHyphen(FX_WCHAR curChar) { |
1806 } else { | 1832 CFX_WideString strCurText = m_TempTextBuf.GetWideString(); |
1807 size = m_charList.GetSize(); | 1833 if (strCurText.GetLength() == 0) { |
1808 if(size == 0) { | 1834 strCurText = m_TextBuf.GetWideString(); |
1809 return FALSE; | 1835 } |
1810 } | 1836 FX_STRSIZE nCount = strCurText.GetLength(); |
1811 preChar = (PAGECHAR_INFO)m_charList[size - 1]; | 1837 int nIndex = nCount - 1; |
1812 } | 1838 FX_WCHAR wcTmp = strCurText.GetAt(nIndex); |
1813 if (FPDFTEXT_CHAR_PIECE == preChar.m_Flag) | 1839 while (wcTmp == 0x20 && nIndex <= nCount - 1 && nIndex >= 0) { |
1814 if (0xAD == preChar.m_Unicode || 0x2D == preChar.m_Unicode) { | 1840 wcTmp = strCurText.GetAt(--nIndex); |
1815 return TRUE; | 1841 } |
1816 } | 1842 if (0x2D == wcTmp || 0xAD == wcTmp) { |
1817 } | 1843 if (--nIndex > 0) { |
1818 return FALSE; | 1844 FX_WCHAR preChar = strCurText.GetAt((nIndex)); |
1819 } | 1845 if (((preChar >= L'A' && preChar <= L'Z') || |
1820 int CPDF_TextPage::ProcessInsertObject(const CPDF_TextObject* pObj, const CFX_Af
fineMatrix& formMatrix) | 1846 (preChar >= L'a' && preChar <= L'z')) && |
1821 { | 1847 ((curChar >= L'A' && curChar <= L'Z') || |
1822 FindPreviousTextObject(); | 1848 (curChar >= L'a' && curChar <= L'z'))) { |
1823 FX_BOOL bNewline = FALSE; | 1849 return TRUE; |
1824 int WritingMode = GetTextObjectWritingMode(pObj); | 1850 } |
1825 if(WritingMode == -1) { | 1851 } |
1826 WritingMode = GetTextObjectWritingMode(m_pPreTextObj); | |
1827 } | |
1828 CFX_FloatRect this_rect(pObj->m_Left, pObj->m_Bottom, pObj->m_Right, pObj->m
_Top); | |
1829 CFX_FloatRect prev_rect(m_pPreTextObj->m_Left, m_pPreTextObj->m_Bottom, m_pP
reTextObj->m_Right, m_pPreTextObj->m_Top); | |
1830 CPDF_TextObjectItem PrevItem, item; | |
1831 int nItem = m_pPreTextObj->CountItems(); | |
1832 m_pPreTextObj->GetItemInfo(nItem - 1, &PrevItem); | |
1833 pObj->GetItemInfo(0, &item); | |
1834 CFX_WideString wstrItem = pObj->GetFont()->UnicodeFromCharCode(item.m_CharCo
de); | |
1835 if(wstrItem.IsEmpty()) { | |
1836 wstrItem += (FX_WCHAR)item.m_CharCode; | |
1837 } | |
1838 FX_WCHAR curChar = wstrItem.GetAt(0); | |
1839 if(WritingMode == 0) { | |
1840 if(this_rect.Height() > 4.5 && prev_rect.Height() > 4.5) { | |
1841 FX_FLOAT top = this_rect.top < prev_rect.top ? this_rect.top : prev_
rect.top; | |
1842 FX_FLOAT bottom = this_rect.bottom > prev_rect.bottom ? this_rect.bo
ttom : prev_rect.bottom; | |
1843 if(bottom >= top) { | |
1844 if(IsHyphen(curChar)) { | |
1845 return 3; | |
1846 } | |
1847 return 2; | |
1848 } | |
1849 } | |
1850 } else if (WritingMode == 1) { | |
1851 if(this_rect.Width() > pObj->GetFontSize() * 0.1f && prev_rect.Width() >
m_pPreTextObj->GetFontSize() * 0.1f) { | |
1852 FX_FLOAT left = this_rect.left > m_CurlineRect.left ? this_rect.left
: m_CurlineRect.left; | |
1853 FX_FLOAT right = this_rect.right < m_CurlineRect.right ? this_rect.r
ight : m_CurlineRect.right; | |
1854 if(right <= left) { | |
1855 if(IsHyphen(curChar)) { | |
1856 return 3; | |
1857 } | |
1858 return 2; | |
1859 } | |
1860 } | |
1861 } | |
1862 FX_FLOAT last_pos = PrevItem.m_OriginX; | |
1863 int nLastWidth = GetCharWidth(PrevItem.m_CharCode, m_pPreTextObj->GetFont())
; | |
1864 FX_FLOAT last_width = nLastWidth * m_pPreTextObj->GetFontSize() / 1000; | |
1865 last_width = FXSYS_fabs(last_width); | |
1866 int nThisWidth = GetCharWidth(item.m_CharCode, pObj->GetFont()); | |
1867 FX_FLOAT this_width = nThisWidth * pObj->GetFontSize() / 1000; | |
1868 this_width = FXSYS_fabs(this_width); | |
1869 FX_FLOAT threshold = last_width > this_width ? last_width / 4 : this_width /
4; | |
1870 CFX_AffineMatrix prev_matrix, prev_reverse; | |
1871 m_pPreTextObj->GetTextMatrix(&prev_matrix); | |
1872 prev_matrix.Concat(m_perMatrix); | |
1873 prev_reverse.SetReverse(prev_matrix); | |
1874 FX_FLOAT x = pObj->GetPosX(); | |
1875 FX_FLOAT y = pObj->GetPosY(); | |
1876 formMatrix.Transform(x, y); | |
1877 prev_reverse.Transform(x, y); | |
1878 if(last_width < this_width) { | |
1879 threshold = prev_reverse.TransformDistance(threshold); | |
1880 } | |
1881 CFX_FloatRect rect1(m_pPreTextObj->m_Left, pObj->m_Bottom, m_pPreTextObj->m_
Right, pObj->m_Top); | |
1882 CFX_FloatRect rect2(m_pPreTextObj->m_Left, m_pPreTextObj->m_Bottom, m_pPreTe
xtObj->m_Right, m_pPreTextObj->m_Top); | |
1883 CFX_FloatRect rect3 = rect1; | |
1884 rect1.Intersect(rect2); | |
1885 if (WritingMode == 0) { | |
1886 if ((rect1.IsEmpty() && rect2.Height() > 5 && rect3.Height() > 5) | |
1887 || ((y > threshold * 2 || y < threshold * -3) && (FXSYS_fabs(y)
< 1 ? FXSYS_fabs(x) < FXSYS_fabs(y) : TRUE))) { | |
1888 bNewline = TRUE; | |
1889 if(nItem > 1 ) { | |
1890 CPDF_TextObjectItem tempItem; | |
1891 m_pPreTextObj->GetItemInfo(0, &tempItem); | |
1892 CFX_AffineMatrix m; | |
1893 m_pPreTextObj->GetTextMatrix(&m); | |
1894 if(PrevItem.m_OriginX > tempItem.m_OriginX && | |
1895 m_DisplayMatrix.a > 0.9 && m_DisplayMatrix.b < 0.1 && | |
1896 m_DisplayMatrix.c < 0.1 && m_DisplayMatrix.d < -0.9 | |
1897 && m.b < 0.1 && m.c < 0.1 ) { | |
1898 CFX_FloatRect re(0, m_pPreTextObj->m_Bottom, 1000, m_pPreTex
tObj->m_Top); | |
1899 if(re.Contains(pObj->GetPosX(), pObj->GetPosY())) { | |
1900 bNewline = FALSE; | |
1901 } else { | |
1902 CFX_FloatRect re(0, pObj->m_Bottom, 1000, pObj->m_Top); | |
1903 if(re.Contains(m_pPreTextObj->GetPosX(), m_pPreTextObj->
GetPosY())) { | |
1904 bNewline = FALSE; | |
1905 } | |
1906 } | |
1907 } | |
1908 } | |
1909 } | |
1910 } | |
1911 if(bNewline) { | |
1912 if(IsHyphen(curChar)) { | |
1913 return 3; | |
1914 } | |
1915 return 2; | |
1916 } | |
1917 int32_t nChars = pObj->CountChars(); | |
1918 if (nChars == 1 && ( 0x2D == curChar || 0xAD == curChar)) | |
1919 if (IsHyphen(curChar)) { | |
1920 return 3; | |
1921 } | |
1922 CFX_WideString PrevStr = m_pPreTextObj->GetFont()->UnicodeFromCharCode(PrevI
tem.m_CharCode); | |
1923 FX_WCHAR preChar = PrevStr.GetAt(PrevStr.GetLength() - 1); | |
1924 CFX_AffineMatrix matrix; | |
1925 pObj->GetTextMatrix(&matrix); | |
1926 matrix.Concat(formMatrix); | |
1927 threshold = (FX_FLOAT)(nLastWidth > nThisWidth ? nLastWidth : nThisWidth); | |
1928 threshold = threshold > 400 ? (threshold < 700 ? threshold / 4 : (threshold
> 800 ? threshold / 6 : threshold / 5)) : (threshold / 2); | |
1929 if(nLastWidth >= nThisWidth) { | |
1930 threshold *= FXSYS_fabs(m_pPreTextObj->GetFontSize()); | |
1931 } else { | |
1932 threshold *= FXSYS_fabs(pObj->GetFontSize()); | |
1933 threshold = matrix.TransformDistance(threshold); | |
1934 threshold = prev_reverse.TransformDistance(threshold); | |
1935 } | |
1936 threshold /= 1000; | |
1937 if((threshold < 1.4881 && threshold > 1.4879) | |
1938 || (threshold < 1.39001 && threshold > 1.38999)) { | |
1939 threshold *= 1.5; | |
1940 } | |
1941 if (FXSYS_fabs(last_pos + last_width - x) > threshold && curChar != L' ' &&
preChar != L' ') | |
1942 if (curChar != L' ' && preChar != L' ') { | |
1943 if((x - last_pos - last_width) > threshold || (last_pos - x - last_w
idth) > threshold) { | |
1944 return 1; | |
1945 } | |
1946 if(x < 0 && (last_pos - x - last_width) > threshold) { | |
1947 return 1; | |
1948 } | |
1949 if((x - last_pos - last_width) > this_width || (x - last_pos - this_
width) > last_width ) { | |
1950 return 1; | |
1951 } | |
1952 } | |
1953 return 0; | |
1954 } | |
1955 FX_BOOL CPDF_TextPage::IsSameTextObject(CPDF_TextObject* pTextObj1, CPDF_TextObj
ect* pTextObj2) | |
1956 { | |
1957 if (!pTextObj1 || !pTextObj2) { | |
1958 return FALSE; | |
1959 } | |
1960 CFX_FloatRect rcPreObj(pTextObj2->m_Left, pTextObj2->m_Bottom, pTextObj2->m_
Right, pTextObj2->m_Top); | |
1961 CFX_FloatRect rcCurObj(pTextObj1->m_Left, pTextObj1->m_Bottom, pTextObj1->m_
Right, pTextObj1->m_Top); | |
1962 if (rcPreObj.IsEmpty() && rcCurObj.IsEmpty() && !m_ParseOptions.m_bGetCharCo
deOnly) { | |
1963 FX_FLOAT dbXdif = FXSYS_fabs(rcPreObj.left - rcCurObj.left); | |
1964 int nCount = m_charList.GetSize(); | |
1965 if (nCount >= 2) { | |
1966 PAGECHAR_INFO perCharTemp = (PAGECHAR_INFO)m_charList[nCount - 2]; | |
1967 FX_FLOAT dbSpace = perCharTemp.m_CharBox.Width(); | |
1968 if (dbXdif > dbSpace) { | |
1969 return FALSE; | |
1970 } | |
1971 } | |
1972 } | |
1973 if (!rcPreObj.IsEmpty() || !rcCurObj.IsEmpty()) { | |
1974 rcPreObj.Intersect(rcCurObj); | |
1975 if (rcPreObj.IsEmpty()) { | |
1976 return FALSE; | |
1977 } | |
1978 if (FXSYS_fabs(rcPreObj.Width() - rcCurObj.Width()) > rcCurObj.Width() /
2) { | |
1979 return FALSE; | |
1980 } | |
1981 if (pTextObj2->GetFontSize() != pTextObj1->GetFontSize()) { | |
1982 return FALSE; | |
1983 } | |
1984 } | |
1985 int nPreCount = pTextObj2->CountItems(); | |
1986 int nCurCount = pTextObj1->CountItems(); | |
1987 if (nPreCount != nCurCount) { | |
1988 return FALSE; | |
1989 } | |
1990 CPDF_TextObjectItem itemPer, itemCur; | |
1991 for (int i = 0; i < nPreCount; i++) { | |
1992 pTextObj2->GetItemInfo(i, &itemPer); | |
1993 pTextObj1->GetItemInfo(i, &itemCur); | |
1994 if (itemCur.m_CharCode != itemPer.m_CharCode) { | |
1995 return FALSE; | |
1996 } | |
1997 } | |
1998 if(FXSYS_fabs(pTextObj1->GetPosX() - pTextObj2->GetPosX()) > GetCharWidth(it
emPer.m_CharCode, pTextObj2->GetFont())*pTextObj2->GetFontSize() / 1000 * 0.9 || | |
1999 FXSYS_fabs(pTextObj1->GetPosY() - pTextObj2->GetPosY()) > | |
2000 FX_MAX(FX_MAX(rcPreObj.Height() , rcPreObj.Width()), pTextObj2->GetF
ontSize()) / 8) { | |
2001 return FALSE; | |
2002 } | |
2003 return TRUE; | |
2004 } | |
2005 FX_BOOL CPDF_TextPage::IsSameAsPreTextObject(CPDF_TextObject* pTextObj, FX_POSIT
ION ObjPos) | |
2006 { | |
2007 if (!pTextObj) { | |
2008 return FALSE; | |
2009 } | |
2010 int i = 0; | |
2011 if (!ObjPos) { | |
2012 ObjPos = m_pPage->GetLastObjectPosition(); | |
2013 } | |
2014 CPDF_PageObject* pObj = m_pPage->GetPrevObject(ObjPos); | |
2015 while (i < 5 && ObjPos) { | |
2016 pObj = m_pPage->GetPrevObject(ObjPos); | |
2017 if(pObj == pTextObj) { | |
2018 continue; | |
2019 } | |
2020 if(pObj->m_Type != PDFPAGE_TEXT) { | |
2021 continue; | |
2022 } | |
2023 if(IsSameTextObject((CPDF_TextObject*)pObj, pTextObj)) { | |
2024 return TRUE; | |
2025 } | |
2026 i++; | |
2027 } | |
2028 return FALSE; | |
2029 } | |
2030 FX_BOOL CPDF_TextPage::GenerateCharInfo(FX_WCHAR unicode, PAGECHAR_INFO& info) | |
2031 { | |
2032 int size = m_TempCharList.GetSize(); | 1852 int size = m_TempCharList.GetSize(); |
2033 PAGECHAR_INFO preChar; | 1853 PAGECHAR_INFO preChar; |
2034 if (size) { | 1854 if (size) { |
2035 preChar = (PAGECHAR_INFO)m_TempCharList[size - 1]; | 1855 preChar = (PAGECHAR_INFO)m_TempCharList[size - 1]; |
2036 } else { | 1856 } else { |
2037 size = m_charList.GetSize(); | 1857 size = m_charList.GetSize(); |
2038 if(size == 0) { | 1858 if (size == 0) { |
2039 return FALSE; | |
2040 } | |
2041 preChar = (PAGECHAR_INFO)m_charList[size - 1]; | |
2042 } | |
2043 info.m_Index = m_TextBuf.GetLength(); | |
2044 info.m_Unicode = unicode; | |
2045 info.m_pTextObj = NULL; | |
2046 info.m_CharCode = -1; | |
2047 info.m_Flag = FPDFTEXT_CHAR_GENERATED; | |
2048 int preWidth = 0; | |
2049 if (preChar.m_pTextObj && preChar.m_CharCode != (FX_DWORD) - 1) { | |
2050 preWidth = GetCharWidth(preChar.m_CharCode, preChar.m_pTextObj->GetFont(
)); | |
2051 } | |
2052 FX_FLOAT fs = 0; | |
2053 if(preChar.m_pTextObj) { | |
2054 fs = preChar.m_pTextObj->GetFontSize(); | |
2055 } else { | |
2056 fs = preChar.m_CharBox.Height(); | |
2057 } | |
2058 if(!fs) { | |
2059 fs = 1; | |
2060 } | |
2061 info.m_OriginX = preChar.m_OriginX + preWidth * (fs) / 1000; | |
2062 info.m_OriginY = preChar.m_OriginY; | |
2063 info.m_CharBox = CFX_FloatRect(info.m_OriginX, info.m_OriginY, info.m_Origin
X, info.m_OriginY); | |
2064 return TRUE; | |
2065 } | |
2066 FX_BOOL CPDF_TextPage::IsRectIntersect(const CFX_FloatRect& rect1, const CFX_Flo
atRect& rect2) | |
2067 { | |
2068 CFX_FloatRect rect = rect1; | |
2069 rect.Intersect(rect2); | |
2070 return !rect.IsEmpty(); | |
2071 } | |
2072 FX_BOOL»CPDF_TextPage::IsLetter(FX_WCHAR unicode) | |
2073 { | |
2074 if (unicode < L'A') { | |
2075 return FALSE; | 1859 return FALSE; |
2076 } | 1860 } |
2077 if (unicode > L'Z' && unicode < L'a') { | 1861 preChar = (PAGECHAR_INFO)m_charList[size - 1]; |
| 1862 } |
| 1863 if (FPDFTEXT_CHAR_PIECE == preChar.m_Flag) |
| 1864 if (0xAD == preChar.m_Unicode || 0x2D == preChar.m_Unicode) { |
| 1865 return TRUE; |
| 1866 } |
| 1867 } |
| 1868 return FALSE; |
| 1869 } |
| 1870 int CPDF_TextPage::ProcessInsertObject(const CPDF_TextObject* pObj, |
| 1871 const CFX_AffineMatrix& formMatrix) { |
| 1872 FindPreviousTextObject(); |
| 1873 FX_BOOL bNewline = FALSE; |
| 1874 int WritingMode = GetTextObjectWritingMode(pObj); |
| 1875 if (WritingMode == -1) { |
| 1876 WritingMode = GetTextObjectWritingMode(m_pPreTextObj); |
| 1877 } |
| 1878 CFX_FloatRect this_rect(pObj->m_Left, pObj->m_Bottom, pObj->m_Right, |
| 1879 pObj->m_Top); |
| 1880 CFX_FloatRect prev_rect(m_pPreTextObj->m_Left, m_pPreTextObj->m_Bottom, |
| 1881 m_pPreTextObj->m_Right, m_pPreTextObj->m_Top); |
| 1882 CPDF_TextObjectItem PrevItem, item; |
| 1883 int nItem = m_pPreTextObj->CountItems(); |
| 1884 m_pPreTextObj->GetItemInfo(nItem - 1, &PrevItem); |
| 1885 pObj->GetItemInfo(0, &item); |
| 1886 CFX_WideString wstrItem = |
| 1887 pObj->GetFont()->UnicodeFromCharCode(item.m_CharCode); |
| 1888 if (wstrItem.IsEmpty()) { |
| 1889 wstrItem += (FX_WCHAR)item.m_CharCode; |
| 1890 } |
| 1891 FX_WCHAR curChar = wstrItem.GetAt(0); |
| 1892 if (WritingMode == 0) { |
| 1893 if (this_rect.Height() > 4.5 && prev_rect.Height() > 4.5) { |
| 1894 FX_FLOAT top = |
| 1895 this_rect.top < prev_rect.top ? this_rect.top : prev_rect.top; |
| 1896 FX_FLOAT bottom = this_rect.bottom > prev_rect.bottom ? this_rect.bottom |
| 1897 : prev_rect.bottom; |
| 1898 if (bottom >= top) { |
| 1899 if (IsHyphen(curChar)) { |
| 1900 return 3; |
| 1901 } |
| 1902 return 2; |
| 1903 } |
| 1904 } |
| 1905 } else if (WritingMode == 1) { |
| 1906 if (this_rect.Width() > pObj->GetFontSize() * 0.1f && |
| 1907 prev_rect.Width() > m_pPreTextObj->GetFontSize() * 0.1f) { |
| 1908 FX_FLOAT left = this_rect.left > m_CurlineRect.left ? this_rect.left |
| 1909 : m_CurlineRect.left; |
| 1910 FX_FLOAT right = this_rect.right < m_CurlineRect.right |
| 1911 ? this_rect.right |
| 1912 : m_CurlineRect.right; |
| 1913 if (right <= left) { |
| 1914 if (IsHyphen(curChar)) { |
| 1915 return 3; |
| 1916 } |
| 1917 return 2; |
| 1918 } |
| 1919 } |
| 1920 } |
| 1921 FX_FLOAT last_pos = PrevItem.m_OriginX; |
| 1922 int nLastWidth = GetCharWidth(PrevItem.m_CharCode, m_pPreTextObj->GetFont()); |
| 1923 FX_FLOAT last_width = nLastWidth * m_pPreTextObj->GetFontSize() / 1000; |
| 1924 last_width = FXSYS_fabs(last_width); |
| 1925 int nThisWidth = GetCharWidth(item.m_CharCode, pObj->GetFont()); |
| 1926 FX_FLOAT this_width = nThisWidth * pObj->GetFontSize() / 1000; |
| 1927 this_width = FXSYS_fabs(this_width); |
| 1928 FX_FLOAT threshold = |
| 1929 last_width > this_width ? last_width / 4 : this_width / 4; |
| 1930 CFX_AffineMatrix prev_matrix, prev_reverse; |
| 1931 m_pPreTextObj->GetTextMatrix(&prev_matrix); |
| 1932 prev_matrix.Concat(m_perMatrix); |
| 1933 prev_reverse.SetReverse(prev_matrix); |
| 1934 FX_FLOAT x = pObj->GetPosX(); |
| 1935 FX_FLOAT y = pObj->GetPosY(); |
| 1936 formMatrix.Transform(x, y); |
| 1937 prev_reverse.Transform(x, y); |
| 1938 if (last_width < this_width) { |
| 1939 threshold = prev_reverse.TransformDistance(threshold); |
| 1940 } |
| 1941 CFX_FloatRect rect1(m_pPreTextObj->m_Left, pObj->m_Bottom, |
| 1942 m_pPreTextObj->m_Right, pObj->m_Top); |
| 1943 CFX_FloatRect rect2(m_pPreTextObj->m_Left, m_pPreTextObj->m_Bottom, |
| 1944 m_pPreTextObj->m_Right, m_pPreTextObj->m_Top); |
| 1945 CFX_FloatRect rect3 = rect1; |
| 1946 rect1.Intersect(rect2); |
| 1947 if (WritingMode == 0) { |
| 1948 if ((rect1.IsEmpty() && rect2.Height() > 5 && rect3.Height() > 5) || |
| 1949 ((y > threshold * 2 || y < threshold * -3) && |
| 1950 (FXSYS_fabs(y) < 1 ? FXSYS_fabs(x) < FXSYS_fabs(y) : TRUE))) { |
| 1951 bNewline = TRUE; |
| 1952 if (nItem > 1) { |
| 1953 CPDF_TextObjectItem tempItem; |
| 1954 m_pPreTextObj->GetItemInfo(0, &tempItem); |
| 1955 CFX_AffineMatrix m; |
| 1956 m_pPreTextObj->GetTextMatrix(&m); |
| 1957 if (PrevItem.m_OriginX > tempItem.m_OriginX && |
| 1958 m_DisplayMatrix.a > 0.9 && m_DisplayMatrix.b < 0.1 && |
| 1959 m_DisplayMatrix.c < 0.1 && m_DisplayMatrix.d < -0.9 && m.b < 0.1 && |
| 1960 m.c < 0.1) { |
| 1961 CFX_FloatRect re(0, m_pPreTextObj->m_Bottom, 1000, |
| 1962 m_pPreTextObj->m_Top); |
| 1963 if (re.Contains(pObj->GetPosX(), pObj->GetPosY())) { |
| 1964 bNewline = FALSE; |
| 1965 } else { |
| 1966 CFX_FloatRect re(0, pObj->m_Bottom, 1000, pObj->m_Top); |
| 1967 if (re.Contains(m_pPreTextObj->GetPosX(), |
| 1968 m_pPreTextObj->GetPosY())) { |
| 1969 bNewline = FALSE; |
| 1970 } |
| 1971 } |
| 1972 } |
| 1973 } |
| 1974 } |
| 1975 } |
| 1976 if (bNewline) { |
| 1977 if (IsHyphen(curChar)) { |
| 1978 return 3; |
| 1979 } |
| 1980 return 2; |
| 1981 } |
| 1982 int32_t nChars = pObj->CountChars(); |
| 1983 if (nChars == 1 && (0x2D == curChar || 0xAD == curChar)) |
| 1984 if (IsHyphen(curChar)) { |
| 1985 return 3; |
| 1986 } |
| 1987 CFX_WideString PrevStr = |
| 1988 m_pPreTextObj->GetFont()->UnicodeFromCharCode(PrevItem.m_CharCode); |
| 1989 FX_WCHAR preChar = PrevStr.GetAt(PrevStr.GetLength() - 1); |
| 1990 CFX_AffineMatrix matrix; |
| 1991 pObj->GetTextMatrix(&matrix); |
| 1992 matrix.Concat(formMatrix); |
| 1993 threshold = (FX_FLOAT)(nLastWidth > nThisWidth ? nLastWidth : nThisWidth); |
| 1994 threshold = threshold > 400 |
| 1995 ? (threshold < 700 |
| 1996 ? threshold / 4 |
| 1997 : (threshold > 800 ? threshold / 6 : threshold / 5)) |
| 1998 : (threshold / 2); |
| 1999 if (nLastWidth >= nThisWidth) { |
| 2000 threshold *= FXSYS_fabs(m_pPreTextObj->GetFontSize()); |
| 2001 } else { |
| 2002 threshold *= FXSYS_fabs(pObj->GetFontSize()); |
| 2003 threshold = matrix.TransformDistance(threshold); |
| 2004 threshold = prev_reverse.TransformDistance(threshold); |
| 2005 } |
| 2006 threshold /= 1000; |
| 2007 if ((threshold < 1.4881 && threshold > 1.4879) || |
| 2008 (threshold < 1.39001 && threshold > 1.38999)) { |
| 2009 threshold *= 1.5; |
| 2010 } |
| 2011 if (FXSYS_fabs(last_pos + last_width - x) > threshold && curChar != L' ' && |
| 2012 preChar != L' ') |
| 2013 if (curChar != L' ' && preChar != L' ') { |
| 2014 if ((x - last_pos - last_width) > threshold || |
| 2015 (last_pos - x - last_width) > threshold) { |
| 2016 return 1; |
| 2017 } |
| 2018 if (x < 0 && (last_pos - x - last_width) > threshold) { |
| 2019 return 1; |
| 2020 } |
| 2021 if ((x - last_pos - last_width) > this_width || |
| 2022 (x - last_pos - this_width) > last_width) { |
| 2023 return 1; |
| 2024 } |
| 2025 } |
| 2026 return 0; |
| 2027 } |
| 2028 FX_BOOL CPDF_TextPage::IsSameTextObject(CPDF_TextObject* pTextObj1, |
| 2029 CPDF_TextObject* pTextObj2) { |
| 2030 if (!pTextObj1 || !pTextObj2) { |
| 2031 return FALSE; |
| 2032 } |
| 2033 CFX_FloatRect rcPreObj(pTextObj2->m_Left, pTextObj2->m_Bottom, |
| 2034 pTextObj2->m_Right, pTextObj2->m_Top); |
| 2035 CFX_FloatRect rcCurObj(pTextObj1->m_Left, pTextObj1->m_Bottom, |
| 2036 pTextObj1->m_Right, pTextObj1->m_Top); |
| 2037 if (rcPreObj.IsEmpty() && rcCurObj.IsEmpty() && |
| 2038 !m_ParseOptions.m_bGetCharCodeOnly) { |
| 2039 FX_FLOAT dbXdif = FXSYS_fabs(rcPreObj.left - rcCurObj.left); |
| 2040 int nCount = m_charList.GetSize(); |
| 2041 if (nCount >= 2) { |
| 2042 PAGECHAR_INFO perCharTemp = (PAGECHAR_INFO)m_charList[nCount - 2]; |
| 2043 FX_FLOAT dbSpace = perCharTemp.m_CharBox.Width(); |
| 2044 if (dbXdif > dbSpace) { |
2078 return FALSE; | 2045 return FALSE; |
2079 } | 2046 } |
2080 if (unicode > L'z') { | 2047 } |
2081 return FALSE; | 2048 } |
2082 } | 2049 if (!rcPreObj.IsEmpty() || !rcCurObj.IsEmpty()) { |
2083 return TRUE; | 2050 rcPreObj.Intersect(rcCurObj); |
| 2051 if (rcPreObj.IsEmpty()) { |
| 2052 return FALSE; |
| 2053 } |
| 2054 if (FXSYS_fabs(rcPreObj.Width() - rcCurObj.Width()) > |
| 2055 rcCurObj.Width() / 2) { |
| 2056 return FALSE; |
| 2057 } |
| 2058 if (pTextObj2->GetFontSize() != pTextObj1->GetFontSize()) { |
| 2059 return FALSE; |
| 2060 } |
| 2061 } |
| 2062 int nPreCount = pTextObj2->CountItems(); |
| 2063 int nCurCount = pTextObj1->CountItems(); |
| 2064 if (nPreCount != nCurCount) { |
| 2065 return FALSE; |
| 2066 } |
| 2067 CPDF_TextObjectItem itemPer, itemCur; |
| 2068 for (int i = 0; i < nPreCount; i++) { |
| 2069 pTextObj2->GetItemInfo(i, &itemPer); |
| 2070 pTextObj1->GetItemInfo(i, &itemCur); |
| 2071 if (itemCur.m_CharCode != itemPer.m_CharCode) { |
| 2072 return FALSE; |
| 2073 } |
| 2074 } |
| 2075 if (FXSYS_fabs(pTextObj1->GetPosX() - pTextObj2->GetPosX()) > |
| 2076 GetCharWidth(itemPer.m_CharCode, pTextObj2->GetFont()) * |
| 2077 pTextObj2->GetFontSize() / 1000 * 0.9 || |
| 2078 FXSYS_fabs(pTextObj1->GetPosY() - pTextObj2->GetPosY()) > |
| 2079 FX_MAX(FX_MAX(rcPreObj.Height(), rcPreObj.Width()), |
| 2080 pTextObj2->GetFontSize()) / |
| 2081 8) { |
| 2082 return FALSE; |
| 2083 } |
| 2084 return TRUE; |
| 2085 } |
| 2086 FX_BOOL CPDF_TextPage::IsSameAsPreTextObject(CPDF_TextObject* pTextObj, |
| 2087 FX_POSITION ObjPos) { |
| 2088 if (!pTextObj) { |
| 2089 return FALSE; |
| 2090 } |
| 2091 int i = 0; |
| 2092 if (!ObjPos) { |
| 2093 ObjPos = m_pPage->GetLastObjectPosition(); |
| 2094 } |
| 2095 CPDF_PageObject* pObj = m_pPage->GetPrevObject(ObjPos); |
| 2096 while (i < 5 && ObjPos) { |
| 2097 pObj = m_pPage->GetPrevObject(ObjPos); |
| 2098 if (pObj == pTextObj) { |
| 2099 continue; |
| 2100 } |
| 2101 if (pObj->m_Type != PDFPAGE_TEXT) { |
| 2102 continue; |
| 2103 } |
| 2104 if (IsSameTextObject((CPDF_TextObject*)pObj, pTextObj)) { |
| 2105 return TRUE; |
| 2106 } |
| 2107 i++; |
| 2108 } |
| 2109 return FALSE; |
| 2110 } |
| 2111 FX_BOOL CPDF_TextPage::GenerateCharInfo(FX_WCHAR unicode, PAGECHAR_INFO& info) { |
| 2112 int size = m_TempCharList.GetSize(); |
| 2113 PAGECHAR_INFO preChar; |
| 2114 if (size) { |
| 2115 preChar = (PAGECHAR_INFO)m_TempCharList[size - 1]; |
| 2116 } else { |
| 2117 size = m_charList.GetSize(); |
| 2118 if (size == 0) { |
| 2119 return FALSE; |
| 2120 } |
| 2121 preChar = (PAGECHAR_INFO)m_charList[size - 1]; |
| 2122 } |
| 2123 info.m_Index = m_TextBuf.GetLength(); |
| 2124 info.m_Unicode = unicode; |
| 2125 info.m_pTextObj = NULL; |
| 2126 info.m_CharCode = -1; |
| 2127 info.m_Flag = FPDFTEXT_CHAR_GENERATED; |
| 2128 int preWidth = 0; |
| 2129 if (preChar.m_pTextObj && preChar.m_CharCode != (FX_DWORD)-1) { |
| 2130 preWidth = GetCharWidth(preChar.m_CharCode, preChar.m_pTextObj->GetFont()); |
| 2131 } |
| 2132 FX_FLOAT fs = 0; |
| 2133 if (preChar.m_pTextObj) { |
| 2134 fs = preChar.m_pTextObj->GetFontSize(); |
| 2135 } else { |
| 2136 fs = preChar.m_CharBox.Height(); |
| 2137 } |
| 2138 if (!fs) { |
| 2139 fs = 1; |
| 2140 } |
| 2141 info.m_OriginX = preChar.m_OriginX + preWidth * (fs) / 1000; |
| 2142 info.m_OriginY = preChar.m_OriginY; |
| 2143 info.m_CharBox = CFX_FloatRect(info.m_OriginX, info.m_OriginY, info.m_OriginX, |
| 2144 info.m_OriginY); |
| 2145 return TRUE; |
| 2146 } |
| 2147 FX_BOOL CPDF_TextPage::IsRectIntersect(const CFX_FloatRect& rect1, |
| 2148 const CFX_FloatRect& rect2) { |
| 2149 CFX_FloatRect rect = rect1; |
| 2150 rect.Intersect(rect2); |
| 2151 return !rect.IsEmpty(); |
| 2152 } |
| 2153 FX_BOOL CPDF_TextPage::IsLetter(FX_WCHAR unicode) { |
| 2154 if (unicode < L'A') { |
| 2155 return FALSE; |
| 2156 } |
| 2157 if (unicode > L'Z' && unicode < L'a') { |
| 2158 return FALSE; |
| 2159 } |
| 2160 if (unicode > L'z') { |
| 2161 return FALSE; |
| 2162 } |
| 2163 return TRUE; |
2084 } | 2164 } |
2085 CPDF_TextPageFind::CPDF_TextPageFind(const IPDF_TextPage* pTextPage) | 2165 CPDF_TextPageFind::CPDF_TextPageFind(const IPDF_TextPage* pTextPage) |
2086 : m_pTextPage(pTextPage), | 2166 : m_pTextPage(pTextPage), |
2087 m_flags(0), | 2167 m_flags(0), |
2088 m_findNextStart(-1), | 2168 m_findNextStart(-1), |
2089 m_findPreStart(-1), | 2169 m_findPreStart(-1), |
2090 m_bMatchCase(FALSE), | 2170 m_bMatchCase(FALSE), |
2091 m_bMatchWholeWord(FALSE), | 2171 m_bMatchWholeWord(FALSE), |
2092 m_resStart(0), | 2172 m_resStart(0), |
2093 m_resEnd(-1), | 2173 m_resEnd(-1), |
2094 m_IsFind(FALSE) | 2174 m_IsFind(FALSE) { |
2095 { | 2175 m_strText = m_pTextPage->GetPageText(); |
| 2176 int nCount = pTextPage->CountChars(); |
| 2177 if (nCount) { |
| 2178 m_CharIndex.Add(0); |
| 2179 } |
| 2180 for (int i = 0; i < nCount; i++) { |
| 2181 FPDF_CHAR_INFO info; |
| 2182 pTextPage->GetCharInfo(i, info); |
| 2183 int indexSize = m_CharIndex.GetSize(); |
| 2184 if (info.m_Flag == CHAR_NORMAL || info.m_Flag == CHAR_GENERATED) { |
| 2185 if (indexSize % 2) { |
| 2186 m_CharIndex.Add(1); |
| 2187 } else { |
| 2188 if (indexSize <= 0) { |
| 2189 continue; |
| 2190 } |
| 2191 m_CharIndex.SetAt(indexSize - 1, m_CharIndex.GetAt(indexSize - 1) + 1); |
| 2192 } |
| 2193 } else { |
| 2194 if (indexSize % 2) { |
| 2195 if (indexSize <= 0) { |
| 2196 continue; |
| 2197 } |
| 2198 m_CharIndex.SetAt(indexSize - 1, i + 1); |
| 2199 } else { |
| 2200 m_CharIndex.Add(i + 1); |
| 2201 } |
| 2202 } |
| 2203 } |
| 2204 int indexSize = m_CharIndex.GetSize(); |
| 2205 if (indexSize % 2) { |
| 2206 m_CharIndex.RemoveAt(indexSize - 1); |
| 2207 } |
| 2208 } |
| 2209 int CPDF_TextPageFind::GetCharIndex(int index) const { |
| 2210 return m_pTextPage->CharIndexFromTextIndex(index); |
| 2211 int indexSize = m_CharIndex.GetSize(); |
| 2212 int count = 0; |
| 2213 for (int i = 0; i < indexSize; i += 2) { |
| 2214 count += m_CharIndex.GetAt(i + 1); |
| 2215 if (count > index) { |
| 2216 return index - count + m_CharIndex.GetAt(i + 1) + m_CharIndex.GetAt(i); |
| 2217 } |
| 2218 } |
| 2219 return -1; |
| 2220 } |
| 2221 FX_BOOL CPDF_TextPageFind::FindFirst(const CFX_WideString& findwhat, |
| 2222 int flags, |
| 2223 int startPos) { |
| 2224 if (!m_pTextPage) { |
| 2225 return FALSE; |
| 2226 } |
| 2227 if (m_strText.IsEmpty() || m_bMatchCase != (flags & FPDFTEXT_MATCHCASE)) { |
2096 m_strText = m_pTextPage->GetPageText(); | 2228 m_strText = m_pTextPage->GetPageText(); |
2097 int nCount = pTextPage->CountChars(); | 2229 } |
2098 if(nCount) { | 2230 CFX_WideString findwhatStr = findwhat; |
2099 m_CharIndex.Add(0); | 2231 m_findWhat = findwhatStr; |
2100 } | 2232 m_flags = flags; |
2101 for(int i = 0; i < nCount; i++) { | 2233 m_bMatchCase = flags & FPDFTEXT_MATCHCASE; |
2102 FPDF_CHAR_INFO info; | 2234 if (m_strText.IsEmpty()) { |
2103 pTextPage->GetCharInfo(i, info); | 2235 m_IsFind = FALSE; |
2104 int indexSize = m_CharIndex.GetSize(); | 2236 return TRUE; |
2105 if(info.m_Flag == CHAR_NORMAL || info.m_Flag == CHAR_GENERATED) { | 2237 } |
2106 if(indexSize % 2) { | 2238 FX_STRSIZE len = findwhatStr.GetLength(); |
2107 m_CharIndex.Add(1); | 2239 if (!m_bMatchCase) { |
2108 } else { | 2240 findwhatStr.MakeLower(); |
2109 if(indexSize <= 0) { | 2241 m_strText.MakeLower(); |
2110 continue; | 2242 } |
2111 } | 2243 m_bMatchWholeWord = flags & FPDFTEXT_MATCHWHOLEWORD; |
2112 m_CharIndex.SetAt(indexSize - 1, m_CharIndex.GetAt(indexSize - 1
) + 1); | 2244 m_findNextStart = startPos; |
2113 } | 2245 if (startPos == -1) { |
| 2246 m_findPreStart = m_strText.GetLength() - 1; |
| 2247 } else { |
| 2248 m_findPreStart = startPos; |
| 2249 } |
| 2250 m_csFindWhatArray.RemoveAll(); |
| 2251 int i = 0; |
| 2252 while (i < len) { |
| 2253 if (findwhatStr.GetAt(i) != ' ') { |
| 2254 break; |
| 2255 } |
| 2256 i++; |
| 2257 } |
| 2258 if (i < len) { |
| 2259 ExtractFindWhat(findwhatStr); |
| 2260 } else { |
| 2261 m_csFindWhatArray.Add(findwhatStr); |
| 2262 } |
| 2263 if (m_csFindWhatArray.GetSize() <= 0) { |
| 2264 return FALSE; |
| 2265 } |
| 2266 m_IsFind = TRUE; |
| 2267 m_resStart = 0; |
| 2268 m_resEnd = -1; |
| 2269 return TRUE; |
| 2270 } |
| 2271 FX_BOOL CPDF_TextPageFind::FindNext() { |
| 2272 if (!m_pTextPage) { |
| 2273 return FALSE; |
| 2274 } |
| 2275 m_resArray.RemoveAll(); |
| 2276 if (m_findNextStart == -1) { |
| 2277 return FALSE; |
| 2278 } |
| 2279 if (m_strText.IsEmpty()) { |
| 2280 m_IsFind = FALSE; |
| 2281 return m_IsFind; |
| 2282 } |
| 2283 int strLen = m_strText.GetLength(); |
| 2284 if (m_findNextStart > strLen - 1) { |
| 2285 m_IsFind = FALSE; |
| 2286 return m_IsFind; |
| 2287 } |
| 2288 int nCount = m_csFindWhatArray.GetSize(); |
| 2289 int nResultPos = 0; |
| 2290 int nStartPos = 0; |
| 2291 nStartPos = m_findNextStart; |
| 2292 FX_BOOL bSpaceStart = FALSE; |
| 2293 for (int iWord = 0; iWord < nCount; iWord++) { |
| 2294 CFX_WideString csWord = m_csFindWhatArray[iWord]; |
| 2295 if (csWord.IsEmpty()) { |
| 2296 if (iWord == nCount - 1) { |
| 2297 FX_WCHAR strInsert = m_strText.GetAt(nStartPos); |
| 2298 if (strInsert == TEXT_LINEFEED_CHAR || strInsert == TEXT_BLANK_CHAR || |
| 2299 strInsert == TEXT_RETURN_CHAR || strInsert == 160) { |
| 2300 nResultPos = nStartPos + 1; |
| 2301 break; |
| 2302 } |
| 2303 iWord = -1; |
| 2304 } else if (iWord == 0) { |
| 2305 bSpaceStart = TRUE; |
| 2306 } |
| 2307 continue; |
| 2308 } |
| 2309 int endIndex; |
| 2310 nResultPos = m_strText.Find(csWord.c_str(), nStartPos); |
| 2311 if (nResultPos == -1) { |
| 2312 m_IsFind = FALSE; |
| 2313 return m_IsFind; |
| 2314 } |
| 2315 endIndex = nResultPos + csWord.GetLength() - 1; |
| 2316 if (iWord == 0) { |
| 2317 m_resStart = nResultPos; |
| 2318 } |
| 2319 FX_BOOL bMatch = TRUE; |
| 2320 if (iWord != 0 && !bSpaceStart) { |
| 2321 int PreResEndPos = nStartPos; |
| 2322 int curChar = csWord.GetAt(0); |
| 2323 CFX_WideString lastWord = m_csFindWhatArray[iWord - 1]; |
| 2324 int lastChar = lastWord.GetAt(lastWord.GetLength() - 1); |
| 2325 if (nStartPos == nResultPos && |
| 2326 !(_IsIgnoreSpaceCharacter(lastChar) || |
| 2327 _IsIgnoreSpaceCharacter(curChar))) { |
| 2328 bMatch = FALSE; |
| 2329 } |
| 2330 for (int d = PreResEndPos; d < nResultPos; d++) { |
| 2331 FX_WCHAR strInsert = m_strText.GetAt(d); |
| 2332 if (strInsert != TEXT_LINEFEED_CHAR && strInsert != TEXT_BLANK_CHAR && |
| 2333 strInsert != TEXT_RETURN_CHAR && strInsert != 160) { |
| 2334 bMatch = FALSE; |
| 2335 break; |
| 2336 } |
| 2337 } |
| 2338 } else if (bSpaceStart) { |
| 2339 if (nResultPos > 0) { |
| 2340 FX_WCHAR strInsert = m_strText.GetAt(nResultPos - 1); |
| 2341 if (strInsert != TEXT_LINEFEED_CHAR && strInsert != TEXT_BLANK_CHAR && |
| 2342 strInsert != TEXT_RETURN_CHAR && strInsert != 160) { |
| 2343 bMatch = FALSE; |
| 2344 m_resStart = nResultPos; |
2114 } else { | 2345 } else { |
2115 if(indexSize % 2) { | 2346 m_resStart = nResultPos - 1; |
2116 if(indexSize <= 0) { | 2347 } |
2117 continue; | 2348 } |
2118 } | 2349 } |
2119 m_CharIndex.SetAt(indexSize - 1, i + 1); | 2350 if (m_bMatchWholeWord && bMatch) { |
2120 } else { | 2351 bMatch = IsMatchWholeWord(m_strText, nResultPos, endIndex); |
2121 m_CharIndex.Add(i + 1); | 2352 } |
2122 } | 2353 nStartPos = endIndex + 1; |
2123 } | 2354 if (!bMatch) { |
2124 } | 2355 iWord = -1; |
2125 int indexSize = m_CharIndex.GetSize(); | 2356 if (bSpaceStart) { |
2126 if(indexSize % 2) { | 2357 nStartPos = m_resStart + m_csFindWhatArray[1].GetLength(); |
2127 m_CharIndex.RemoveAt(indexSize - 1); | 2358 } else { |
2128 } | 2359 nStartPos = m_resStart + m_csFindWhatArray[0].GetLength(); |
2129 } | 2360 } |
2130 int CPDF_TextPageFind::GetCharIndex(int index) const | 2361 } |
2131 { | 2362 } |
2132 return m_pTextPage->CharIndexFromTextIndex(index); | 2363 m_resEnd = nResultPos + |
2133 int indexSize = m_CharIndex.GetSize(); | 2364 m_csFindWhatArray[m_csFindWhatArray.GetSize() - 1].GetLength() - 1; |
2134 int count = 0; | 2365 m_IsFind = TRUE; |
2135 for(int i = 0; i < indexSize; i += 2) { | 2366 int resStart = GetCharIndex(m_resStart); |
2136 count += m_CharIndex.GetAt(i + 1); | 2367 int resEnd = GetCharIndex(m_resEnd); |
2137 if(count > index) { | 2368 m_pTextPage->GetRectArray(resStart, resEnd - resStart + 1, m_resArray); |
2138 return » index - count + m_CharIndex.GetAt(i + 1) + m_CharIndex.G
etAt(i); | 2369 if (m_flags & FPDFTEXT_CONSECUTIVE) { |
2139 } | 2370 m_findNextStart = m_resStart + 1; |
2140 } | 2371 m_findPreStart = m_resEnd - 1; |
| 2372 } else { |
| 2373 m_findNextStart = m_resEnd + 1; |
| 2374 m_findPreStart = m_resStart - 1; |
| 2375 } |
| 2376 return m_IsFind; |
| 2377 } |
| 2378 FX_BOOL CPDF_TextPageFind::FindPrev() { |
| 2379 if (!m_pTextPage) { |
| 2380 return FALSE; |
| 2381 } |
| 2382 m_resArray.RemoveAll(); |
| 2383 if (m_strText.IsEmpty() || m_findPreStart < 0) { |
| 2384 m_IsFind = FALSE; |
| 2385 return m_IsFind; |
| 2386 } |
| 2387 CPDF_TextPageFind findEngine(m_pTextPage); |
| 2388 FX_BOOL ret = findEngine.FindFirst(m_findWhat, m_flags); |
| 2389 if (!ret) { |
| 2390 m_IsFind = FALSE; |
| 2391 return m_IsFind; |
| 2392 } |
| 2393 int order = -1, MatchedCount = 0; |
| 2394 while (ret) { |
| 2395 ret = findEngine.FindNext(); |
| 2396 if (ret) { |
| 2397 int order1 = findEngine.GetCurOrder(); |
| 2398 int MatchedCount1 = findEngine.GetMatchedCount(); |
| 2399 if (((order1 + MatchedCount1) - 1) > m_findPreStart) { |
| 2400 break; |
| 2401 } |
| 2402 order = order1; |
| 2403 MatchedCount = MatchedCount1; |
| 2404 } |
| 2405 } |
| 2406 if (order == -1) { |
| 2407 m_IsFind = FALSE; |
| 2408 return m_IsFind; |
| 2409 } |
| 2410 m_resStart = m_pTextPage->TextIndexFromCharIndex(order); |
| 2411 m_resEnd = m_pTextPage->TextIndexFromCharIndex(order + MatchedCount - 1); |
| 2412 m_IsFind = TRUE; |
| 2413 m_pTextPage->GetRectArray(order, MatchedCount, m_resArray); |
| 2414 if (m_flags & FPDFTEXT_CONSECUTIVE) { |
| 2415 m_findNextStart = m_resStart + 1; |
| 2416 m_findPreStart = m_resEnd - 1; |
| 2417 } else { |
| 2418 m_findNextStart = m_resEnd + 1; |
| 2419 m_findPreStart = m_resStart - 1; |
| 2420 } |
| 2421 return m_IsFind; |
| 2422 } |
| 2423 void CPDF_TextPageFind::ExtractFindWhat(const CFX_WideString& findwhat) { |
| 2424 if (findwhat.IsEmpty()) { |
| 2425 return; |
| 2426 } |
| 2427 int index = 0; |
| 2428 while (1) { |
| 2429 CFX_WideString csWord = TEXT_EMPTY; |
| 2430 int ret = |
| 2431 ExtractSubString(csWord, findwhat.c_str(), index, TEXT_BLANK_CHAR); |
| 2432 if (csWord.IsEmpty()) { |
| 2433 if (ret) { |
| 2434 m_csFindWhatArray.Add(CFX_WideString(L"")); |
| 2435 index++; |
| 2436 continue; |
| 2437 } else { |
| 2438 break; |
| 2439 } |
| 2440 } |
| 2441 int pos = 0; |
| 2442 while (pos < csWord.GetLength()) { |
| 2443 CFX_WideString curStr = csWord.Mid(pos, 1); |
| 2444 FX_WCHAR curChar = csWord.GetAt(pos); |
| 2445 if (_IsIgnoreSpaceCharacter(curChar)) { |
| 2446 if (pos > 0 && curChar == 0x2019) { |
| 2447 pos++; |
| 2448 continue; |
| 2449 } |
| 2450 if (pos > 0) { |
| 2451 CFX_WideString preStr = csWord.Mid(0, pos); |
| 2452 m_csFindWhatArray.Add(preStr); |
| 2453 } |
| 2454 m_csFindWhatArray.Add(curStr); |
| 2455 if (pos == csWord.GetLength() - 1) { |
| 2456 csWord.Empty(); |
| 2457 break; |
| 2458 } |
| 2459 csWord = csWord.Right(csWord.GetLength() - pos - 1); |
| 2460 pos = 0; |
| 2461 continue; |
| 2462 } |
| 2463 pos++; |
| 2464 } |
| 2465 if (!csWord.IsEmpty()) { |
| 2466 m_csFindWhatArray.Add(csWord); |
| 2467 } |
| 2468 index++; |
| 2469 } |
| 2470 } |
| 2471 FX_BOOL CPDF_TextPageFind::IsMatchWholeWord(const CFX_WideString& csPageText, |
| 2472 int startPos, |
| 2473 int endPos) { |
| 2474 int char_left = 0; |
| 2475 int char_right = 0; |
| 2476 int char_count = endPos - startPos + 1; |
| 2477 if (char_count < 1) { |
| 2478 return FALSE; |
| 2479 } |
| 2480 if (char_count == 1 && csPageText.GetAt(startPos) > 255) { |
| 2481 return TRUE; |
| 2482 } |
| 2483 if (startPos - 1 >= 0) { |
| 2484 char_left = csPageText.GetAt(startPos - 1); |
| 2485 } |
| 2486 if (startPos + char_count < csPageText.GetLength()) { |
| 2487 char_right = csPageText.GetAt(startPos + char_count); |
| 2488 } |
| 2489 if ((char_left > 'A' && char_left < 'a') || |
| 2490 (char_left > 'a' && char_left < 'z') || |
| 2491 (char_left > 0xfb00 && char_left < 0xfb06) || |
| 2492 (char_left >= '0' && char_left <= '9') || |
| 2493 (char_right > 'A' && char_right < 'a') || |
| 2494 (char_right > 'a' && char_right < 'z') || |
| 2495 (char_right > 0xfb00 && char_right < 0xfb06) || |
| 2496 (char_right >= '0' && char_right <= '9')) { |
| 2497 return FALSE; |
| 2498 } |
| 2499 if (!(('A' > char_left || char_left > 'Z') && |
| 2500 ('a' > char_left || char_left > 'z') && |
| 2501 ('A' > char_right || char_right > 'Z') && |
| 2502 ('a' > char_right || char_right > 'z'))) { |
| 2503 return FALSE; |
| 2504 } |
| 2505 if (char_count > 0) { |
| 2506 if (csPageText.GetAt(startPos) >= L'0' && |
| 2507 csPageText.GetAt(startPos) <= L'9' && char_left >= L'0' && |
| 2508 char_left <= L'9') { |
| 2509 return FALSE; |
| 2510 } |
| 2511 if (csPageText.GetAt(endPos) >= L'0' && csPageText.GetAt(endPos) <= L'9' && |
| 2512 char_right >= L'0' && char_right <= L'9') { |
| 2513 return FALSE; |
| 2514 } |
| 2515 } |
| 2516 return TRUE; |
| 2517 } |
| 2518 FX_BOOL CPDF_TextPageFind::ExtractSubString(CFX_WideString& rString, |
| 2519 const FX_WCHAR* lpszFullString, |
| 2520 int iSubString, |
| 2521 FX_WCHAR chSep) { |
| 2522 if (lpszFullString == NULL) { |
| 2523 return FALSE; |
| 2524 } |
| 2525 while (iSubString--) { |
| 2526 lpszFullString = FXSYS_wcschr(lpszFullString, chSep); |
| 2527 if (lpszFullString == NULL) { |
| 2528 rString.Empty(); |
| 2529 return FALSE; |
| 2530 } |
| 2531 lpszFullString++; |
| 2532 while (*lpszFullString == chSep) { |
| 2533 lpszFullString++; |
| 2534 } |
| 2535 } |
| 2536 const FX_WCHAR* lpchEnd = FXSYS_wcschr(lpszFullString, chSep); |
| 2537 int nLen = (lpchEnd == NULL) ? (int)FXSYS_wcslen(lpszFullString) |
| 2538 : (int)(lpchEnd - lpszFullString); |
| 2539 ASSERT(nLen >= 0); |
| 2540 FXSYS_memcpy(rString.GetBuffer(nLen), lpszFullString, |
| 2541 nLen * sizeof(FX_WCHAR)); |
| 2542 rString.ReleaseBuffer(); |
| 2543 return TRUE; |
| 2544 } |
| 2545 CFX_WideString CPDF_TextPageFind::MakeReverse(const CFX_WideString& str) { |
| 2546 CFX_WideString str2; |
| 2547 str2.Empty(); |
| 2548 int nlen = str.GetLength(); |
| 2549 for (int i = nlen - 1; i >= 0; i--) { |
| 2550 str2 += str.GetAt(i); |
| 2551 } |
| 2552 return str2; |
| 2553 } |
| 2554 void CPDF_TextPageFind::GetRectArray(CFX_RectArray& rects) const { |
| 2555 rects.Copy(m_resArray); |
| 2556 } |
| 2557 int CPDF_TextPageFind::GetCurOrder() const { |
| 2558 return GetCharIndex(m_resStart); |
| 2559 } |
| 2560 int CPDF_TextPageFind::GetMatchedCount() const { |
| 2561 int resStart = GetCharIndex(m_resStart); |
| 2562 int resEnd = GetCharIndex(m_resEnd); |
| 2563 return resEnd - resStart + 1; |
| 2564 } |
| 2565 CPDF_LinkExtract::CPDF_LinkExtract() : m_pTextPage(NULL), m_IsParserd(FALSE) {} |
| 2566 CPDF_LinkExtract::~CPDF_LinkExtract() { |
| 2567 DeleteLinkList(); |
| 2568 } |
| 2569 FX_BOOL CPDF_LinkExtract::ExtractLinks(const IPDF_TextPage* pTextPage) { |
| 2570 if (!pTextPage || !pTextPage->IsParsered()) { |
| 2571 return FALSE; |
| 2572 } |
| 2573 m_pTextPage = (const CPDF_TextPage*)pTextPage; |
| 2574 m_strPageText = m_pTextPage->GetPageText(0, -1); |
| 2575 DeleteLinkList(); |
| 2576 if (m_strPageText.IsEmpty()) { |
| 2577 return FALSE; |
| 2578 } |
| 2579 parserLink(); |
| 2580 m_IsParserd = TRUE; |
| 2581 return TRUE; |
| 2582 } |
| 2583 void CPDF_LinkExtract::DeleteLinkList() { |
| 2584 while (m_LinkList.GetSize()) { |
| 2585 CPDF_LinkExt* linkinfo = NULL; |
| 2586 linkinfo = m_LinkList.GetAt(0); |
| 2587 m_LinkList.RemoveAt(0); |
| 2588 delete linkinfo; |
| 2589 } |
| 2590 m_LinkList.RemoveAll(); |
| 2591 } |
| 2592 int CPDF_LinkExtract::CountLinks() const { |
| 2593 if (!m_IsParserd) { |
2141 return -1; | 2594 return -1; |
2142 } | 2595 } |
2143 FX_BOOL»CPDF_TextPageFind::FindFirst(const CFX_WideString& findwhat, int flags,
int startPos) | 2596 return m_LinkList.GetSize(); |
2144 { | 2597 } |
2145 if (!m_pTextPage) { | 2598 void CPDF_LinkExtract::parserLink() { |
| 2599 int start = 0, pos = 0; |
| 2600 int TotalChar = m_pTextPage->CountChars(); |
| 2601 while (pos < TotalChar) { |
| 2602 FPDF_CHAR_INFO pageChar; |
| 2603 m_pTextPage->GetCharInfo(pos, pageChar); |
| 2604 if (pageChar.m_Flag == CHAR_GENERATED || pageChar.m_Unicode == 0x20 || |
| 2605 pos == TotalChar - 1) { |
| 2606 int nCount = pos - start; |
| 2607 if (pos == TotalChar - 1) { |
| 2608 nCount++; |
| 2609 } |
| 2610 CFX_WideString strBeCheck; |
| 2611 strBeCheck = m_pTextPage->GetPageText(start, nCount); |
| 2612 if (strBeCheck.GetLength() > 5) { |
| 2613 while (strBeCheck.GetLength() > 0) { |
| 2614 FX_WCHAR ch = strBeCheck.GetAt(strBeCheck.GetLength() - 1); |
| 2615 if (ch == L')' || ch == L',' || ch == L'>' || ch == L'.') { |
| 2616 strBeCheck = strBeCheck.Mid(0, strBeCheck.GetLength() - 1); |
| 2617 nCount--; |
| 2618 } else { |
| 2619 break; |
| 2620 } |
| 2621 } |
| 2622 if (nCount > 5 && |
| 2623 (CheckWebLink(strBeCheck) || CheckMailLink(strBeCheck))) { |
| 2624 if (!AppendToLinkList(start, nCount, strBeCheck)) { |
| 2625 break; |
| 2626 } |
| 2627 } |
| 2628 } |
| 2629 start = ++pos; |
| 2630 } else { |
| 2631 pos++; |
| 2632 } |
| 2633 } |
| 2634 } |
| 2635 FX_BOOL CPDF_LinkExtract::CheckWebLink(CFX_WideString& strBeCheck) { |
| 2636 CFX_WideString str = strBeCheck; |
| 2637 str.MakeLower(); |
| 2638 if (str.Find(L"http://www.") != -1) { |
| 2639 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"http://www.")); |
| 2640 return TRUE; |
| 2641 } |
| 2642 if (str.Find(L"http://") != -1) { |
| 2643 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"http://")); |
| 2644 return TRUE; |
| 2645 } |
| 2646 if (str.Find(L"https://www.") != -1) { |
| 2647 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"https://www.")); |
| 2648 return TRUE; |
| 2649 } |
| 2650 if (str.Find(L"https://") != -1) { |
| 2651 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"https://")); |
| 2652 return TRUE; |
| 2653 } |
| 2654 if (str.Find(L"www.") != -1) { |
| 2655 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"www.")); |
| 2656 strBeCheck = L"http://" + strBeCheck; |
| 2657 return TRUE; |
| 2658 } |
| 2659 return FALSE; |
| 2660 } |
| 2661 FX_BOOL CPDF_LinkExtract::CheckMailLink(CFX_WideString& str) { |
| 2662 str.MakeLower(); |
| 2663 int aPos = str.Find(L'@'); |
| 2664 if (aPos < 1) { |
| 2665 return FALSE; |
| 2666 } |
| 2667 if (str.GetAt(aPos - 1) == L'.' || str.GetAt(aPos - 1) == L'_') { |
| 2668 return FALSE; |
| 2669 } |
| 2670 int i; |
| 2671 for (i = aPos - 1; i >= 0; i--) { |
| 2672 FX_WCHAR ch = str.GetAt(i); |
| 2673 if (ch == L'_' || ch == L'.' || (ch >= L'a' && ch <= L'z') || |
| 2674 (ch >= L'0' && ch <= L'9')) { |
| 2675 continue; |
| 2676 } else { |
| 2677 if (i == aPos - 1) { |
2146 return FALSE; | 2678 return FALSE; |
2147 } | 2679 } |
2148 if (m_strText.IsEmpty() || m_bMatchCase != (flags & FPDFTEXT_MATCHCASE)) { | 2680 str = str.Right(str.GetLength() - i - 1); |
2149 m_strText = m_pTextPage->GetPageText(); | 2681 break; |
2150 } | 2682 } |
2151 CFX_WideString findwhatStr = findwhat; | 2683 } |
2152 m_findWhat = findwhatStr; | 2684 aPos = str.Find(L'@'); |
2153 m_flags = flags; | 2685 if (aPos < 1) { |
2154 m_bMatchCase = flags & FPDFTEXT_MATCHCASE; | 2686 return FALSE; |
2155 if (m_strText.IsEmpty()) { | 2687 } |
2156 m_IsFind = FALSE; | 2688 CFX_WideString strtemp = L""; |
2157 return TRUE; | 2689 for (i = 0; i < aPos; i++) { |
2158 } | 2690 FX_WCHAR wch = str.GetAt(i); |
2159 FX_STRSIZE len = findwhatStr.GetLength(); | 2691 if (wch >= L'a' && wch <= L'z') { |
2160 if (!m_bMatchCase) { | 2692 break; |
2161 findwhatStr.MakeLower(); | |
2162 m_strText.MakeLower(); | |
2163 } | |
2164 m_bMatchWholeWord = flags & FPDFTEXT_MATCHWHOLEWORD; | |
2165 m_findNextStart = startPos; | |
2166 if (startPos == -1) { | |
2167 m_findPreStart = m_strText.GetLength() - 1; | |
2168 } else { | 2693 } else { |
2169 m_findPreStart = startPos; | 2694 strtemp = str.Right(str.GetLength() - i + 1); |
2170 } | 2695 } |
2171 m_csFindWhatArray.RemoveAll(); | 2696 } |
2172 int i = 0; | 2697 if (strtemp != L"") { |
2173 while(i < len) { | 2698 str = strtemp; |
2174 if(findwhatStr.GetAt(i) != ' ') { | 2699 } |
2175 break; | 2700 aPos = str.Find(L'@'); |
2176 } | 2701 if (aPos < 1) { |
2177 i++; | 2702 return FALSE; |
2178 } | 2703 } |
2179 if(i < len) { | 2704 str.TrimRight(L'.'); |
2180 ExtractFindWhat(findwhatStr); | 2705 strtemp = str; |
| 2706 int ePos = str.Find(L'.'); |
| 2707 if (ePos == -1) { |
| 2708 return FALSE; |
| 2709 } |
| 2710 while (ePos != -1) { |
| 2711 strtemp = strtemp.Right(strtemp.GetLength() - ePos - 1); |
| 2712 ePos = strtemp.Find('.'); |
| 2713 } |
| 2714 ePos = strtemp.GetLength(); |
| 2715 for (i = 0; i < ePos; i++) { |
| 2716 FX_WCHAR wch = str.GetAt(i); |
| 2717 if ((wch >= L'a' && wch <= L'z') || (wch >= L'0' && wch <= L'9')) { |
| 2718 continue; |
2181 } else { | 2719 } else { |
2182 m_csFindWhatArray.Add(findwhatStr); | 2720 str = str.Left(str.GetLength() - ePos + i + 1); |
2183 } | 2721 ePos = ePos - i - 1; |
2184 if(m_csFindWhatArray.GetSize() <= 0) { | 2722 break; |
2185 return FALSE; | 2723 } |
2186 } | 2724 } |
2187 m_IsFind = TRUE; | 2725 int nLen = str.GetLength(); |
2188 m_resStart = 0; | 2726 for (i = aPos + 1; i < nLen - ePos; i++) { |
2189 m_resEnd = -1; | 2727 FX_WCHAR wch = str.GetAt(i); |
2190 return TRUE; | 2728 if (wch == L'-' || wch == L'.' || (wch >= L'a' && wch <= L'z') || |
2191 } | 2729 (wch >= L'0' && wch <= L'9')) { |
2192 FX_BOOL CPDF_TextPageFind::FindNext() | 2730 continue; |
2193 { | |
2194 if (!m_pTextPage) { | |
2195 return FALSE; | |
2196 } | |
2197 m_resArray.RemoveAll(); | |
2198 if(m_findNextStart == -1) { | |
2199 return FALSE; | |
2200 } | |
2201 if(m_strText.IsEmpty()) { | |
2202 m_IsFind = FALSE; | |
2203 return m_IsFind; | |
2204 } | |
2205 int strLen = m_strText.GetLength(); | |
2206 if (m_findNextStart > strLen - 1) { | |
2207 m_IsFind = FALSE; | |
2208 return m_IsFind; | |
2209 } | |
2210 int nCount = m_csFindWhatArray.GetSize(); | |
2211 int nResultPos = 0; | |
2212 int»nStartPos = 0; | |
2213 nStartPos = m_findNextStart; | |
2214 FX_BOOL bSpaceStart = FALSE; | |
2215 for(int iWord = 0; iWord < nCount; iWord++) { | |
2216 CFX_WideString csWord = m_csFindWhatArray[iWord]; | |
2217 if(csWord.IsEmpty()) { | |
2218 if(iWord == nCount - 1) { | |
2219 FX_WCHAR strInsert = m_strText.GetAt(nStartPos); | |
2220 if(strInsert == TEXT_LINEFEED_CHAR || strInsert == TEXT_BLANK_CH
AR || strInsert == TEXT_RETURN_CHAR || strInsert == 160) { | |
2221 nResultPos = nStartPos + 1; | |
2222 break; | |
2223 } | |
2224 iWord = -1; | |
2225 } else if(iWord == 0) { | |
2226 bSpaceStart = TRUE; | |
2227 } | |
2228 continue; | |
2229 } | |
2230 int endIndex; | |
2231 nResultPos = m_strText.Find(csWord.c_str(), nStartPos); | |
2232 if (nResultPos == -1) { | |
2233 m_IsFind = FALSE; | |
2234 return m_IsFind; | |
2235 } | |
2236 endIndex = nResultPos + csWord.GetLength() - 1; | |
2237 if(iWord == 0) { | |
2238 m_resStart = nResultPos; | |
2239 } | |
2240 FX_BOOL bMatch = TRUE; | |
2241 if(iWord != 0 && !bSpaceStart) { | |
2242 int PreResEndPos = nStartPos; | |
2243 int curChar = csWord.GetAt(0); | |
2244 CFX_WideString lastWord = m_csFindWhatArray[iWord - 1]; | |
2245 int lastChar = lastWord.GetAt(lastWord.GetLength() - 1); | |
2246 if(nStartPos == nResultPos && !(_IsIgnoreSpaceCharacter(lastChar) ||
_IsIgnoreSpaceCharacter(curChar))) { | |
2247 bMatch = FALSE; | |
2248 } | |
2249 for(int d = PreResEndPos; d < nResultPos; d++) { | |
2250 FX_WCHAR strInsert = m_strText.GetAt(d); | |
2251 if(strInsert != TEXT_LINEFEED_CHAR && strInsert != TEXT_BLANK_CH
AR && strInsert != TEXT_RETURN_CHAR && strInsert != 160) { | |
2252 bMatch = FALSE; | |
2253 break; | |
2254 } | |
2255 } | |
2256 } else if(bSpaceStart) { | |
2257 if(nResultPos > 0) { | |
2258 FX_WCHAR strInsert = m_strText.GetAt(nResultPos - 1); | |
2259 if(strInsert != TEXT_LINEFEED_CHAR && strInsert != TEXT_BLANK_CH
AR && strInsert != TEXT_RETURN_CHAR && strInsert != 160) { | |
2260 bMatch = FALSE; | |
2261 m_resStart = nResultPos; | |
2262 } else { | |
2263 m_resStart = nResultPos - 1; | |
2264 } | |
2265 } | |
2266 } | |
2267 if(m_bMatchWholeWord && bMatch) { | |
2268 bMatch = IsMatchWholeWord(m_strText, nResultPos, endIndex); | |
2269 } | |
2270 nStartPos = endIndex + 1; | |
2271 if(!bMatch) { | |
2272 iWord = -1; | |
2273 if(bSpaceStart) { | |
2274 nStartPos = m_resStart + m_csFindWhatArray[1].GetLength(); | |
2275 } else { | |
2276 nStartPos = m_resStart + m_csFindWhatArray[0].GetLength(); | |
2277 } | |
2278 } | |
2279 } | |
2280 m_resEnd = nResultPos + m_csFindWhatArray[m_csFindWhatArray.GetSize() - 1].G
etLength() - 1; | |
2281 m_IsFind = TRUE; | |
2282 int resStart = GetCharIndex(m_resStart); | |
2283 int resEnd = GetCharIndex(m_resEnd); | |
2284 m_pTextPage->GetRectArray(resStart, resEnd - resStart + 1, m_resArray); | |
2285 if(m_flags & FPDFTEXT_CONSECUTIVE) { | |
2286 m_findNextStart = m_resStart + 1; | |
2287 m_findPreStart = m_resEnd - 1; | |
2288 } else { | 2731 } else { |
2289 m_findNextStart = m_resEnd + 1; | 2732 return FALSE; |
2290 m_findPreStart = m_resStart - 1; | 2733 } |
2291 } | 2734 } |
2292 return m_IsFind; | 2735 if (str.Find(L"mailto:") == -1) { |
2293 } | 2736 str = L"mailto:" + str; |
2294 FX_BOOL CPDF_TextPageFind::FindPrev() | 2737 } |
2295 { | 2738 return TRUE; |
2296 if (!m_pTextPage) { | 2739 } |
2297 return FALSE; | 2740 FX_BOOL CPDF_LinkExtract::AppendToLinkList(int start, |
2298 } | 2741 int count, |
2299 m_resArray.RemoveAll(); | 2742 const CFX_WideString& strUrl) { |
2300 if(m_strText.IsEmpty() || m_findPreStart < 0) { | 2743 CPDF_LinkExt* linkInfo = new CPDF_LinkExt; |
2301 m_IsFind = FALSE; | 2744 linkInfo->m_strUrl = strUrl; |
2302 return m_IsFind; | 2745 linkInfo->m_Start = start; |
2303 } | 2746 linkInfo->m_Count = count; |
2304 CPDF_TextPageFind findEngine(m_pTextPage); | 2747 m_LinkList.Add(linkInfo); |
2305 FX_BOOL ret = findEngine.FindFirst(m_findWhat, m_flags); | 2748 return TRUE; |
2306 if(!ret) { | 2749 } |
2307 m_IsFind = FALSE; | 2750 CFX_WideString CPDF_LinkExtract::GetURL(int index) const { |
2308 return m_IsFind; | 2751 if (!m_IsParserd || index < 0 || index >= m_LinkList.GetSize()) { |
2309 } | 2752 return L""; |
2310 int order = -1, MatchedCount = 0; | 2753 } |
2311 while(ret) { | 2754 CPDF_LinkExt* link = NULL; |
2312 ret = findEngine.FindNext(); | 2755 link = m_LinkList.GetAt(index); |
2313 if(ret) { | 2756 if (!link) { |
2314 int order1 = findEngine.GetCurOrder() ; | 2757 return L""; |
2315 int MatchedCount1 = findEngine.GetMatchedCount(); | 2758 } |
2316 if(((order1 + MatchedCount1) - 1) > m_findPreStart) { | 2759 return link->m_strUrl; |
2317 break; | 2760 } |
2318 } | 2761 void CPDF_LinkExtract::GetBoundedSegment(int index, |
2319 order = order1; | 2762 int& start, |
2320 MatchedCount = MatchedCount1; | 2763 int& count) const { |
2321 } | 2764 if (!m_IsParserd || index < 0 || index >= m_LinkList.GetSize()) { |
2322 } | 2765 return; |
2323 if(order == -1) { | 2766 } |
2324 m_IsFind = FALSE; | 2767 CPDF_LinkExt* link = NULL; |
2325 return m_IsFind; | 2768 link = m_LinkList.GetAt(index); |
2326 } | 2769 if (!link) { |
2327 m_resStart = m_pTextPage->TextIndexFromCharIndex(order); | 2770 return; |
2328 m_resEnd = m_pTextPage->TextIndexFromCharIndex(order + MatchedCount - 1); | 2771 } |
2329 m_IsFind = TRUE; | 2772 start = link->m_Start; |
2330 m_pTextPage->GetRectArray(order, MatchedCount, m_resArray); | 2773 count = link->m_Count; |
2331 if(m_flags & FPDFTEXT_CONSECUTIVE) { | 2774 } |
2332 m_findNextStart = m_resStart + 1; | 2775 void CPDF_LinkExtract::GetRects(int index, CFX_RectArray& rects) const { |
2333 m_findPreStart = m_resEnd - 1; | 2776 if (!m_IsParserd || index < 0 || index >= m_LinkList.GetSize()) { |
2334 } else { | 2777 return; |
2335 m_findNextStart = m_resEnd + 1; | 2778 } |
2336 m_findPreStart = m_resStart - 1; | 2779 CPDF_LinkExt* link = NULL; |
2337 } | 2780 link = m_LinkList.GetAt(index); |
2338 return m_IsFind; | 2781 if (!link) { |
2339 } | 2782 return; |
2340 void CPDF_TextPageFind::ExtractFindWhat(const CFX_WideString& findwhat) | 2783 } |
2341 { | 2784 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); |
2342 if(findwhat.IsEmpty()) { | 2785 } |
2343 return ; | |
2344 } | |
2345 int index = 0; | |
2346 while(1) { | |
2347 CFX_WideString csWord = TEXT_EMPTY; | |
2348 int ret = ExtractSubString(csWord, findwhat.c_str(), index, TEXT_BLANK_C
HAR); | |
2349 if(csWord.IsEmpty()) { | |
2350 if(ret) { | |
2351 m_csFindWhatArray.Add(CFX_WideString(L"")); | |
2352 index++; | |
2353 continue; | |
2354 } else { | |
2355 break; | |
2356 } | |
2357 } | |
2358 int pos = 0; | |
2359 while(pos < csWord.GetLength()) { | |
2360 CFX_WideString curStr = csWord.Mid(pos, 1); | |
2361 FX_WCHAR curChar = csWord.GetAt(pos); | |
2362 if (_IsIgnoreSpaceCharacter(curChar)) { | |
2363 if (pos > 0 && curChar == 0x2019) { | |
2364 pos++; | |
2365 continue; | |
2366 } | |
2367 if (pos > 0 ) { | |
2368 CFX_WideString preStr = csWord.Mid(0, pos); | |
2369 m_csFindWhatArray.Add(preStr); | |
2370 } | |
2371 m_csFindWhatArray.Add(curStr); | |
2372 if (pos == csWord.GetLength() - 1) { | |
2373 csWord.Empty(); | |
2374 break; | |
2375 } | |
2376 csWord = csWord.Right(csWord.GetLength() - pos - 1); | |
2377 pos = 0; | |
2378 continue; | |
2379 } | |
2380 pos++; | |
2381 } | |
2382 if (!csWord.IsEmpty()) { | |
2383 m_csFindWhatArray.Add(csWord); | |
2384 } | |
2385 index++; | |
2386 } | |
2387 } | |
2388 FX_BOOL CPDF_TextPageFind::IsMatchWholeWord(const CFX_WideString& csPageText, in
t startPos, int endPos) | |
2389 { | |
2390 int char_left = 0; | |
2391 int char_right = 0; | |
2392 int char_count = endPos - startPos + 1; | |
2393 if(char_count < 1) { | |
2394 return FALSE; | |
2395 } | |
2396 if (char_count == 1 && csPageText.GetAt(startPos) > 255) { | |
2397 return TRUE; | |
2398 } | |
2399 if(startPos - 1 >= 0 ) { | |
2400 char_left = csPageText.GetAt(startPos - 1); | |
2401 } | |
2402 if(startPos + char_count < csPageText.GetLength()) { | |
2403 char_right = csPageText.GetAt(startPos + char_count); | |
2404 } | |
2405 if ((char_left > 'A' && char_left < 'a') || (char_left > 'a' && char_left <
'z') || (char_left > 0xfb00 && char_left < 0xfb06) || (char_left >= '0' && char_
left <= '9') || | |
2406 (char_right > 'A' && char_right < 'a') || (char_right > 'a' && char_
right < 'z') || (char_right > 0xfb00 && char_right < 0xfb06) || (char_right >= '
0' && char_right <= '9')) { | |
2407 return FALSE; | |
2408 } | |
2409 if(!(('A' > char_left || char_left > 'Z') && ('a' > char_left || char_left
> 'z') | |
2410 && ('A' > char_right || char_right > 'Z') && ('a' > char_right || c
har_right > 'z'))) { | |
2411 return FALSE; | |
2412 } | |
2413 if (char_count > 0) { | |
2414 if (csPageText.GetAt(startPos) >= L'0' && csPageText.GetAt(startPos) <=
L'9' && char_left >= L'0' && char_left <= L'9') { | |
2415 return FALSE; | |
2416 } | |
2417 if (csPageText.GetAt(endPos) >= L'0' && csPageText.GetAt(endPos) <= L'9'
&& char_right >= L'0' && char_right <= L'9') { | |
2418 return FALSE; | |
2419 } | |
2420 } | |
2421 return TRUE; | |
2422 } | |
2423 FX_BOOL CPDF_TextPageFind::ExtractSubString(CFX_WideString& rString, const FX_WC
HAR* lpszFullString, | |
2424 int iSubString, FX_WCHAR chSep) | |
2425 { | |
2426 if (lpszFullString == NULL) { | |
2427 return FALSE; | |
2428 } | |
2429 while (iSubString--) { | |
2430 lpszFullString = FXSYS_wcschr(lpszFullString, chSep); | |
2431 if (lpszFullString == NULL) { | |
2432 rString.Empty(); | |
2433 return FALSE; | |
2434 } | |
2435 lpszFullString++; | |
2436 while(*lpszFullString == chSep) { | |
2437 lpszFullString++; | |
2438 } | |
2439 } | |
2440 const FX_WCHAR* lpchEnd = FXSYS_wcschr(lpszFullString, chSep); | |
2441 int nLen = (lpchEnd == NULL) ? | |
2442 (int)FXSYS_wcslen(lpszFullString) : (int)(lpchEnd - lpszFullStrin
g); | |
2443 ASSERT(nLen >= 0); | |
2444 FXSYS_memcpy(rString.GetBuffer(nLen), lpszFullString, nLen * sizeof(FX_WCHAR
)); | |
2445 rString.ReleaseBuffer(); | |
2446 return TRUE; | |
2447 } | |
2448 CFX_WideString CPDF_TextPageFind::MakeReverse(const CFX_WideString& str) | |
2449 { | |
2450 CFX_WideString str2; | |
2451 str2.Empty(); | |
2452 int nlen = str.GetLength(); | |
2453 for(int i = nlen - 1; i >= 0; i--) { | |
2454 str2 += str.GetAt(i); | |
2455 } | |
2456 return str2; | |
2457 } | |
2458 void CPDF_TextPageFind::GetRectArray(CFX_RectArray& rects) const | |
2459 { | |
2460 rects.Copy(m_resArray); | |
2461 } | |
2462 int CPDF_TextPageFind::GetCurOrder() const | |
2463 { | |
2464 return GetCharIndex(m_resStart); | |
2465 } | |
2466 int CPDF_TextPageFind::GetMatchedCount()const | |
2467 { | |
2468 int resStart = GetCharIndex(m_resStart); | |
2469 int resEnd = GetCharIndex(m_resEnd); | |
2470 return resEnd - resStart + 1; | |
2471 } | |
2472 CPDF_LinkExtract::CPDF_LinkExtract() | |
2473 : m_pTextPage(NULL), | |
2474 m_IsParserd(FALSE) | |
2475 { | |
2476 } | |
2477 CPDF_LinkExtract::~CPDF_LinkExtract() | |
2478 { | |
2479 DeleteLinkList(); | |
2480 } | |
2481 FX_BOOL CPDF_LinkExtract::ExtractLinks(const IPDF_TextPage* pTextPage) | |
2482 { | |
2483 if (!pTextPage || !pTextPage->IsParsered()) { | |
2484 return FALSE; | |
2485 } | |
2486 m_pTextPage = (const CPDF_TextPage*)pTextPage; | |
2487 m_strPageText = m_pTextPage->GetPageText(0, -1); | |
2488 DeleteLinkList(); | |
2489 if (m_strPageText.IsEmpty()) { | |
2490 return FALSE; | |
2491 } | |
2492 parserLink(); | |
2493 m_IsParserd = TRUE; | |
2494 return TRUE; | |
2495 } | |
2496 void CPDF_LinkExtract::DeleteLinkList() | |
2497 { | |
2498 while (m_LinkList.GetSize()) { | |
2499 CPDF_LinkExt* linkinfo = NULL; | |
2500 linkinfo = m_LinkList.GetAt(0); | |
2501 m_LinkList.RemoveAt(0); | |
2502 delete linkinfo; | |
2503 } | |
2504 m_LinkList.RemoveAll(); | |
2505 } | |
2506 int CPDF_LinkExtract::CountLinks() const | |
2507 { | |
2508 if (!m_IsParserd) { | |
2509 return -1; | |
2510 } | |
2511 return m_LinkList.GetSize(); | |
2512 } | |
2513 void CPDF_LinkExtract::parserLink() | |
2514 { | |
2515 int start = 0, pos = 0; | |
2516 int TotalChar = m_pTextPage->CountChars(); | |
2517 while (pos < TotalChar) { | |
2518 FPDF_CHAR_INFO pageChar; | |
2519 m_pTextPage->GetCharInfo(pos, pageChar); | |
2520 if (pageChar.m_Flag == CHAR_GENERATED || pageChar.m_Unicode == 0x20 || p
os == TotalChar - 1) { | |
2521 int nCount = pos - start; | |
2522 if(pos == TotalChar - 1) { | |
2523 nCount++; | |
2524 } | |
2525 CFX_WideString strBeCheck; | |
2526 strBeCheck = m_pTextPage->GetPageText(start, nCount); | |
2527 if (strBeCheck.GetLength() > 5) { | |
2528 while(strBeCheck.GetLength() > 0) { | |
2529 FX_WCHAR ch = strBeCheck.GetAt(strBeCheck.GetLength() - 1); | |
2530 if (ch == L')' || ch == L',' || ch == L'>' || ch == L'.') { | |
2531 strBeCheck = strBeCheck.Mid(0, strBeCheck.GetLength() -
1); | |
2532 nCount--; | |
2533 } else { | |
2534 break; | |
2535 } | |
2536 } | |
2537 if (nCount > 5 && (CheckWebLink(strBeCheck) || CheckMailLink(str
BeCheck))) { | |
2538 if (!AppendToLinkList(start, nCount, strBeCheck)) { | |
2539 break; | |
2540 } | |
2541 } | |
2542 } | |
2543 start = ++pos; | |
2544 } else { | |
2545 pos++; | |
2546 } | |
2547 } | |
2548 } | |
2549 FX_BOOL CPDF_LinkExtract::CheckWebLink(CFX_WideString& strBeCheck) | |
2550 { | |
2551 CFX_WideString str = strBeCheck; | |
2552 str.MakeLower(); | |
2553 if (str.Find(L"http://www.") != -1) { | |
2554 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"http://www.")
); | |
2555 return TRUE; | |
2556 } | |
2557 if (str.Find(L"http://") != -1) { | |
2558 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"http://")); | |
2559 return TRUE; | |
2560 } | |
2561 if (str.Find(L"https://www.") != -1) { | |
2562 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"https://www."
)); | |
2563 return TRUE; | |
2564 } | |
2565 if (str.Find(L"https://") != -1) { | |
2566 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"https://")); | |
2567 return TRUE; | |
2568 } | |
2569 if (str.Find(L"www.") != -1) { | |
2570 strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"www.")); | |
2571 strBeCheck = L"http://" + strBeCheck; | |
2572 return TRUE; | |
2573 } | |
2574 return FALSE; | |
2575 } | |
2576 FX_BOOL CPDF_LinkExtract::CheckMailLink(CFX_WideString& str) | |
2577 { | |
2578 str.MakeLower(); | |
2579 int aPos = str.Find(L'@'); | |
2580 if (aPos < 1) { | |
2581 return FALSE; | |
2582 } | |
2583 if (str.GetAt(aPos - 1) == L'.' || str.GetAt(aPos - 1) == L'_') { | |
2584 return FALSE; | |
2585 } | |
2586 int i; | |
2587 for (i = aPos - 1; i >= 0; i--) { | |
2588 FX_WCHAR ch = str.GetAt(i); | |
2589 if (ch == L'_' || ch == L'.' || (ch >= L'a' && ch <= L'z') || (ch >= L'0
' && ch <= L'9')) { | |
2590 continue; | |
2591 } else { | |
2592 if (i == aPos - 1) { | |
2593 return FALSE; | |
2594 } | |
2595 str = str.Right(str.GetLength() - i - 1); | |
2596 break; | |
2597 } | |
2598 } | |
2599 aPos = str.Find(L'@'); | |
2600 if (aPos < 1) { | |
2601 return FALSE; | |
2602 } | |
2603 CFX_WideString strtemp = L""; | |
2604 for (i = 0; i < aPos; i++) { | |
2605 FX_WCHAR wch = str.GetAt(i); | |
2606 if (wch >= L'a' && wch <= L'z') { | |
2607 break; | |
2608 } else { | |
2609 strtemp = str.Right(str.GetLength() - i + 1); | |
2610 } | |
2611 } | |
2612 if (strtemp != L"") { | |
2613 str = strtemp; | |
2614 } | |
2615 aPos = str.Find(L'@'); | |
2616 if (aPos < 1) { | |
2617 return FALSE; | |
2618 } | |
2619 str.TrimRight(L'.'); | |
2620 strtemp = str; | |
2621 int ePos = str.Find(L'.'); | |
2622 if (ePos == -1) { | |
2623 return FALSE; | |
2624 } | |
2625 while (ePos != -1) { | |
2626 strtemp = strtemp.Right(strtemp.GetLength() - ePos - 1); | |
2627 ePos = strtemp.Find('.'); | |
2628 } | |
2629 ePos = strtemp.GetLength(); | |
2630 for (i = 0; i < ePos; i++) { | |
2631 FX_WCHAR wch = str.GetAt(i); | |
2632 if ((wch >= L'a' && wch <= L'z') || (wch >= L'0' && wch <= L'9')) { | |
2633 continue; | |
2634 } else { | |
2635 str = str.Left(str.GetLength() - ePos + i + 1); | |
2636 ePos = ePos - i - 1; | |
2637 break; | |
2638 } | |
2639 } | |
2640 int nLen = str.GetLength(); | |
2641 for (i = aPos + 1; i < nLen - ePos; i++) { | |
2642 FX_WCHAR wch = str.GetAt(i); | |
2643 if (wch == L'-' || wch == L'.' || (wch >= L'a' && wch <= L'z') || (wch >
= L'0' && wch <= L'9')) { | |
2644 continue; | |
2645 } else { | |
2646 return FALSE; | |
2647 } | |
2648 } | |
2649 if (str.Find(L"mailto:") == -1) { | |
2650 str = L"mailto:" + str; | |
2651 } | |
2652 return TRUE; | |
2653 } | |
2654 FX_BOOL CPDF_LinkExtract::AppendToLinkList(int start, int count, const CFX_WideS
tring& strUrl) | |
2655 { | |
2656 CPDF_LinkExt* linkInfo = new CPDF_LinkExt; | |
2657 linkInfo->m_strUrl = strUrl; | |
2658 linkInfo->m_Start = start; | |
2659 linkInfo->m_Count = count; | |
2660 m_LinkList.Add(linkInfo); | |
2661 return TRUE; | |
2662 } | |
2663 CFX_WideString CPDF_LinkExtract::GetURL(int index) const | |
2664 { | |
2665 if (!m_IsParserd || index < 0 || index >= m_LinkList.GetSize()) { | |
2666 return L""; | |
2667 } | |
2668 CPDF_LinkExt* link = NULL; | |
2669 link = m_LinkList.GetAt(index); | |
2670 if (!link) { | |
2671 return L""; | |
2672 } | |
2673 return link->m_strUrl; | |
2674 } | |
2675 void CPDF_LinkExtract::GetBoundedSegment(int index, int& start, int& count) cons
t | |
2676 { | |
2677 if (!m_IsParserd || index < 0 || index >= m_LinkList.GetSize()) { | |
2678 return ; | |
2679 } | |
2680 CPDF_LinkExt* link = NULL; | |
2681 link = m_LinkList.GetAt(index); | |
2682 if (!link) { | |
2683 return ; | |
2684 } | |
2685 start = link->m_Start; | |
2686 count = link->m_Count; | |
2687 } | |
2688 void CPDF_LinkExtract::GetRects(int index, CFX_RectArray& rects) const | |
2689 { | |
2690 if (!m_IsParserd || index < 0 || index >= m_LinkList.GetSize()) { | |
2691 return; | |
2692 } | |
2693 CPDF_LinkExt* link = NULL; | |
2694 link = m_LinkList.GetAt(index); | |
2695 if (!link) { | |
2696 return ; | |
2697 } | |
2698 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); | |
2699 } | |
OLD | NEW |