| OLD | NEW |
| 1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
| 6 | 6 |
| 7 #include "../../../third_party/base/nonstd_unique_ptr.h" | 7 #include "../../../third_party/base/nonstd_unique_ptr.h" |
| 8 #include "../../include/fpdfapi/fpdf_page.h" | 8 #include "../../include/fpdfapi/fpdf_page.h" |
| 9 #include "../../include/fpdfapi/fpdf_pageobj.h" | 9 #include "../../include/fpdfapi/fpdf_pageobj.h" |
| 10 #include "../../include/fpdfapi/fpdf_resource.h" | 10 #include "../../include/fpdfapi/fpdf_resource.h" |
| 11 #include "../../include/fpdftext/fpdf_text.h" | 11 #include "../../include/fpdftext/fpdf_text.h" |
| 12 #include "../../include/fxcrt/fx_arb.h" | 12 #include "../../include/fxcrt/fx_arb.h" |
| 13 #include "../../include/fxcrt/fx_ucd.h" | 13 #include "../../include/fxcrt/fx_ucd.h" |
| 14 #include "text_int.h" | 14 #include "text_int.h" |
| 15 #include "txtproc.h" | 15 #include "txtproc.h" |
| 16 | 16 |
| 17 CFX_ByteString CharFromUnicodeAlt(FX_WCHAR unicode, int destcp, const FX_CHAR* d
efchar) | 17 CFX_ByteString CharFromUnicodeAlt(FX_WCHAR unicode, |
| 18 { | 18 int destcp, |
| 19 if (destcp == 0) { | 19 const FX_CHAR* defchar) { |
| 20 if (unicode < 0x80) { | 20 if (destcp == 0) { |
| 21 return CFX_ByteString((char)unicode); | 21 if (unicode < 0x80) { |
| 22 } | 22 return CFX_ByteString((char)unicode); |
| 23 const FX_CHAR* altstr = FCS_GetAltStr(unicode); | |
| 24 if (altstr) { | |
| 25 return CFX_ByteString(altstr, -1); | |
| 26 } | |
| 27 return CFX_ByteString(defchar, -1); | |
| 28 } | |
| 29 char buf[10]; | |
| 30 int iDef = 0; | |
| 31 int ret = FXSYS_WideCharToMultiByte(destcp, 0, (wchar_t*)&unicode, 1, buf, 1
0, NULL, &iDef); | |
| 32 if (ret && !iDef) { | |
| 33 return CFX_ByteString(buf, ret); | |
| 34 } | 23 } |
| 35 const FX_CHAR* altstr = FCS_GetAltStr(unicode); | 24 const FX_CHAR* altstr = FCS_GetAltStr(unicode); |
| 36 if (altstr) { | 25 if (altstr) { |
| 37 return CFX_ByteString(altstr, -1); | 26 return CFX_ByteString(altstr, -1); |
| 38 } | 27 } |
| 39 return CFX_ByteString(defchar, -1); | 28 return CFX_ByteString(defchar, -1); |
| 40 } | 29 } |
| 41 CTextPage::CTextPage() | 30 char buf[10]; |
| 42 { | 31 int iDef = 0; |
| 43 } | 32 int ret = FXSYS_WideCharToMultiByte(destcp, 0, (wchar_t*)&unicode, 1, buf, 10, |
| 44 CTextPage::~CTextPage() | 33 NULL, &iDef); |
| 45 { | 34 if (ret && !iDef) { |
| 35 return CFX_ByteString(buf, ret); |
| 36 } |
| 37 const FX_CHAR* altstr = FCS_GetAltStr(unicode); |
| 38 if (altstr) { |
| 39 return CFX_ByteString(altstr, -1); |
| 40 } |
| 41 return CFX_ByteString(defchar, -1); |
| 42 } |
| 43 CTextPage::CTextPage() {} |
| 44 CTextPage::~CTextPage() { |
| 45 int i; |
| 46 for (i = 0; i < m_BaseLines.GetSize(); i++) { |
| 47 CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); |
| 48 delete pBaseLine; |
| 49 } |
| 50 for (i = 0; i < m_TextColumns.GetSize(); i++) { |
| 51 CTextColumn* pTextColumn = (CTextColumn*)m_TextColumns.GetAt(i); |
| 52 delete pTextColumn; |
| 53 } |
| 54 } |
| 55 void CTextPage::ProcessObject(CPDF_PageObject* pObject) { |
| 56 if (pObject->m_Type != PDFPAGE_TEXT) { |
| 57 return; |
| 58 } |
| 59 CPDF_TextObject* pText = (CPDF_TextObject*)pObject; |
| 60 CPDF_Font* pFont = pText->m_TextState.GetFont(); |
| 61 int count = pText->CountItems(); |
| 62 FX_FLOAT* pPosArray = FX_Alloc2D(FX_FLOAT, count, 2); |
| 63 pText->CalcCharPos(pPosArray); |
| 64 |
| 65 FX_FLOAT fontsize_h = pText->m_TextState.GetFontSizeH(); |
| 66 FX_FLOAT fontsize_v = pText->m_TextState.GetFontSizeV(); |
| 67 FX_DWORD space_charcode = pFont->CharCodeFromUnicode(' '); |
| 68 FX_FLOAT spacew = 0; |
| 69 if (space_charcode != -1) { |
| 70 spacew = fontsize_h * pFont->GetCharWidthF(space_charcode) / 1000; |
| 71 } |
| 72 if (spacew == 0) { |
| 73 spacew = fontsize_h / 4; |
| 74 } |
| 75 if (pText->m_TextState.GetBaselineAngle() != 0) { |
| 76 int cc = 0; |
| 77 CFX_AffineMatrix matrix; |
| 78 pText->GetTextMatrix(&matrix); |
| 79 for (int i = 0; i < pText->m_nChars; i++) { |
| 80 FX_DWORD charcode = pText->m_nChars == 1 |
| 81 ? (FX_DWORD)(uintptr_t)pText->m_pCharCodes |
| 82 : pText->m_pCharCodes[i]; |
| 83 if (charcode == (FX_DWORD)-1) { |
| 84 continue; |
| 85 } |
| 86 FX_RECT char_box; |
| 87 pFont->GetCharBBox(charcode, char_box); |
| 88 FX_FLOAT char_left = |
| 89 pPosArray ? pPosArray[cc * 2] |
| 90 : char_box.left * pText->m_TextState.GetFontSize() / 1000; |
| 91 FX_FLOAT char_right = |
| 92 pPosArray ? pPosArray[cc * 2 + 1] |
| 93 : char_box.right * pText->m_TextState.GetFontSize() / 1000; |
| 94 FX_FLOAT char_top = |
| 95 char_box.top * pText->m_TextState.GetFontSize() / 1000; |
| 96 FX_FLOAT char_bottom = |
| 97 char_box.bottom * pText->m_TextState.GetFontSize() / 1000; |
| 98 cc++; |
| 99 FX_FLOAT char_origx, char_origy; |
| 100 matrix.Transform(char_left, 0, char_origx, char_origy); |
| 101 matrix.TransformRect(char_left, char_right, char_top, char_bottom); |
| 102 CFX_ByteString str; |
| 103 pFont->AppendChar(str, charcode); |
| 104 InsertTextBox(NULL, char_origy, char_left, char_right, char_top, |
| 105 char_bottom, spacew, fontsize_v, str, pFont); |
| 106 } |
| 107 if (pPosArray) { |
| 108 FX_Free(pPosArray); |
| 109 } |
| 110 return; |
| 111 } |
| 112 FX_FLOAT ratio_h = fontsize_h / pText->m_TextState.GetFontSize(); |
| 113 for (int ii = 0; ii < count * 2; ii++) { |
| 114 pPosArray[ii] *= ratio_h; |
| 115 } |
| 116 FX_FLOAT baseline = pText->m_PosY; |
| 117 CTextBaseLine* pBaseLine = NULL; |
| 118 FX_FLOAT topy = pText->m_Top; |
| 119 FX_FLOAT bottomy = pText->m_Bottom; |
| 120 FX_FLOAT leftx = pText->m_Left; |
| 121 int cc = 0; |
| 122 CFX_ByteString segment; |
| 123 int space_count = 0; |
| 124 FX_FLOAT last_left = 0, last_right = 0, segment_left = 0, segment_right = 0; |
| 125 for (int i = 0; i < pText->m_nChars; i++) { |
| 126 FX_DWORD charcode = pText->m_nChars == 1 |
| 127 ? (FX_DWORD)(uintptr_t)pText->m_pCharCodes |
| 128 : pText->m_pCharCodes[i]; |
| 129 if (charcode == (FX_DWORD)-1) { |
| 130 continue; |
| 131 } |
| 132 FX_FLOAT char_left = pPosArray[cc * 2]; |
| 133 FX_FLOAT char_right = pPosArray[cc * 2 + 1]; |
| 134 cc++; |
| 135 if (char_left < last_left || (char_left - last_right) > spacew / 2) { |
| 136 pBaseLine = InsertTextBox(pBaseLine, baseline, leftx + segment_left, |
| 137 leftx + segment_right, topy, bottomy, spacew, |
| 138 fontsize_v, segment, pFont); |
| 139 segment_left = char_left; |
| 140 segment = ""; |
| 141 } |
| 142 if (space_count > 1) { |
| 143 pBaseLine = InsertTextBox(pBaseLine, baseline, leftx + segment_left, |
| 144 leftx + segment_right, topy, bottomy, spacew, |
| 145 fontsize_v, segment, pFont); |
| 146 segment = ""; |
| 147 } else if (space_count == 1) { |
| 148 pFont->AppendChar(segment, ' '); |
| 149 } |
| 150 if (segment.GetLength() == 0) { |
| 151 segment_left = char_left; |
| 152 } |
| 153 segment_right = char_right; |
| 154 pFont->AppendChar(segment, charcode); |
| 155 space_count = 0; |
| 156 last_left = char_left; |
| 157 last_right = char_right; |
| 158 } |
| 159 if (segment.GetLength()) |
| 160 pBaseLine = InsertTextBox(pBaseLine, baseline, leftx + segment_left, |
| 161 leftx + segment_right, topy, bottomy, spacew, |
| 162 fontsize_v, segment, pFont); |
| 163 FX_Free(pPosArray); |
| 164 } |
| 165 CTextBaseLine* CTextPage::InsertTextBox(CTextBaseLine* pBaseLine, |
| 166 FX_FLOAT basey, |
| 167 FX_FLOAT leftx, |
| 168 FX_FLOAT rightx, |
| 169 FX_FLOAT topy, |
| 170 FX_FLOAT bottomy, |
| 171 FX_FLOAT spacew, |
| 172 FX_FLOAT fontsize_v, |
| 173 CFX_ByteString& str, |
| 174 CPDF_Font* pFont) { |
| 175 if (str.GetLength() == 0) { |
| 176 return NULL; |
| 177 } |
| 178 if (pBaseLine == NULL) { |
| 46 int i; | 179 int i; |
| 47 for (i = 0; i < m_BaseLines.GetSize(); i ++) { | 180 for (i = 0; i < m_BaseLines.GetSize(); i++) { |
| 48 CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); | 181 CTextBaseLine* pExistLine = (CTextBaseLine*)m_BaseLines.GetAt(i); |
| 49 delete pBaseLine; | 182 if (pExistLine->m_BaseLine == basey) { |
| 50 } | 183 pBaseLine = pExistLine; |
| 51 for (i = 0; i < m_TextColumns.GetSize(); i ++) { | 184 break; |
| 52 CTextColumn* pTextColumn = (CTextColumn*)m_TextColumns.GetAt(i); | 185 } |
| 53 delete pTextColumn; | 186 if (pExistLine->m_BaseLine < basey) { |
| 54 } | 187 break; |
| 55 } | 188 } |
| 56 void CTextPage::ProcessObject(CPDF_PageObject* pObject) | 189 } |
| 57 { | 190 if (pBaseLine == NULL) { |
| 58 if (pObject->m_Type != PDFPAGE_TEXT) { | 191 pBaseLine = new CTextBaseLine; |
| 59 return; | 192 pBaseLine->m_BaseLine = basey; |
| 60 } | 193 m_BaseLines.InsertAt(i, pBaseLine); |
| 61 CPDF_TextObject* pText = (CPDF_TextObject*)pObject; | 194 } |
| 62 CPDF_Font* pFont = pText->m_TextState.GetFont(); | 195 } |
| 63 int count = pText->CountItems(); | 196 CFX_WideString text; |
| 64 FX_FLOAT* pPosArray = FX_Alloc2D(FX_FLOAT, count, 2); | 197 const FX_CHAR* pStr = str; |
| 65 pText->CalcCharPos(pPosArray); | 198 int len = str.GetLength(), offset = 0; |
| 66 | 199 while (offset < len) { |
| 67 FX_FLOAT fontsize_h = pText->m_TextState.GetFontSizeH(); | 200 FX_DWORD ch = pFont->GetNextChar(pStr, len, offset); |
| 68 FX_FLOAT fontsize_v = pText->m_TextState.GetFontSizeV(); | 201 CFX_WideString unicode_str = pFont->UnicodeFromCharCode(ch); |
| 69 FX_DWORD space_charcode = pFont->CharCodeFromUnicode(' '); | 202 if (unicode_str.IsEmpty()) { |
| 70 FX_FLOAT spacew = 0; | 203 text += (FX_WCHAR)ch; |
| 71 if (space_charcode != -1) { | 204 } else { |
| 72 spacew = fontsize_h * pFont->GetCharWidthF(space_charcode) / 1000; | 205 text += unicode_str; |
| 73 } | 206 } |
| 74 if (spacew == 0) { | 207 } |
| 75 spacew = fontsize_h / 4; | 208 pBaseLine->InsertTextBox(leftx, rightx, topy, bottomy, spacew, fontsize_v, |
| 76 } | 209 text); |
| 77 if (pText->m_TextState.GetBaselineAngle() != 0) { | 210 return pBaseLine; |
| 78 int cc = 0; | 211 } |
| 79 CFX_AffineMatrix matrix; | 212 void CTextPage::WriteOutput(CFX_WideStringArray& lines, int iMinWidth) { |
| 80 pText->GetTextMatrix(&matrix); | 213 FX_FLOAT lastheight = -1; |
| 81 for (int i = 0; i < pText->m_nChars; i ++) { | 214 FX_FLOAT lastbaseline = -1; |
| 82 FX_DWORD charcode = pText->m_nChars == 1 ? (FX_DWORD)(uintptr_t)pTex
t->m_pCharCodes : pText->m_pCharCodes[i]; | 215 FX_FLOAT MinLeftX = 1000000; |
| 83 if (charcode == (FX_DWORD) - 1) { | 216 FX_FLOAT MaxRightX = 0; |
| 84 continue; | 217 int i; |
| 218 for (i = 0; i < m_BaseLines.GetSize(); i++) { |
| 219 CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); |
| 220 FX_FLOAT leftx, rightx; |
| 221 if (pBaseLine->GetWidth(leftx, rightx)) { |
| 222 if (leftx < MinLeftX) { |
| 223 MinLeftX = leftx; |
| 224 } |
| 225 if (rightx > MaxRightX) { |
| 226 MaxRightX = rightx; |
| 227 } |
| 228 } |
| 229 } |
| 230 for (i = 0; i < m_BaseLines.GetSize(); i++) { |
| 231 CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); |
| 232 pBaseLine->MergeBoxes(); |
| 233 } |
| 234 for (i = 1; i < m_BaseLines.GetSize(); i++) { |
| 235 CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); |
| 236 CTextBaseLine* pPrevLine = (CTextBaseLine*)m_BaseLines.GetAt(i - 1); |
| 237 if (pBaseLine->CanMerge(pPrevLine)) { |
| 238 pPrevLine->Merge(pBaseLine); |
| 239 delete pBaseLine; |
| 240 m_BaseLines.RemoveAt(i); |
| 241 i--; |
| 242 } |
| 243 } |
| 244 if (m_bAutoWidth) { |
| 245 int* widths = FX_Alloc(int, m_BaseLines.GetSize()); |
| 246 for (i = 0; i < m_BaseLines.GetSize(); i++) { |
| 247 widths[i] = 0; |
| 248 CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); |
| 249 int TotalChars = 0; |
| 250 FX_FLOAT TotalWidth = 0; |
| 251 int minchars; |
| 252 pBaseLine->CountChars(TotalChars, TotalWidth, minchars); |
| 253 if (TotalChars) { |
| 254 FX_FLOAT charwidth = TotalWidth / TotalChars; |
| 255 widths[i] = (int)((MaxRightX - MinLeftX) / charwidth); |
| 256 } |
| 257 if (widths[i] > 1000) { |
| 258 widths[i] = 1000; |
| 259 } |
| 260 if (widths[i] < minchars) { |
| 261 widths[i] = minchars; |
| 262 } |
| 263 } |
| 264 int AvgWidth = 0, widthcount = 0; |
| 265 for (i = 0; i < m_BaseLines.GetSize(); i++) |
| 266 if (widths[i]) { |
| 267 AvgWidth += widths[i]; |
| 268 widthcount++; |
| 269 } |
| 270 AvgWidth = int((FX_FLOAT)AvgWidth / widthcount + 0.5); |
| 271 int MaxWidth = 0; |
| 272 for (i = 0; i < m_BaseLines.GetSize(); i++) |
| 273 if (MaxWidth < widths[i]) { |
| 274 MaxWidth = widths[i]; |
| 275 } |
| 276 if (MaxWidth > AvgWidth * 6 / 5) { |
| 277 MaxWidth = AvgWidth * 6 / 5; |
| 278 } |
| 279 FX_Free(widths); |
| 280 if (iMinWidth < MaxWidth) { |
| 281 iMinWidth = MaxWidth; |
| 282 } |
| 283 } |
| 284 for (i = 0; i < m_BaseLines.GetSize(); i++) { |
| 285 CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); |
| 286 pBaseLine->MergeBoxes(); |
| 287 } |
| 288 if (m_bKeepColumn) { |
| 289 FindColumns(); |
| 290 } |
| 291 for (i = 0; i < m_BaseLines.GetSize(); i++) { |
| 292 CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); |
| 293 if (lastheight >= 0) { |
| 294 FX_FLOAT dy = lastbaseline - pBaseLine->m_BaseLine; |
| 295 if (dy >= (pBaseLine->m_MaxFontSizeV) * 1.5 || dy >= lastheight * 1.5) { |
| 296 lines.Add(L""); |
| 297 } |
| 298 } |
| 299 lastheight = pBaseLine->m_MaxFontSizeV; |
| 300 lastbaseline = pBaseLine->m_BaseLine; |
| 301 CFX_WideString str; |
| 302 pBaseLine->WriteOutput(str, MinLeftX, MaxRightX - MinLeftX, iMinWidth); |
| 303 lines.Add(str); |
| 304 } |
| 305 } |
| 306 void NormalizeCompositeChar(FX_WCHAR wChar, CFX_WideString& sDest) { |
| 307 wChar = FX_GetMirrorChar(wChar, TRUE, FALSE); |
| 308 FX_WCHAR* pDst = NULL; |
| 309 FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst); |
| 310 if (nCount < 1) { |
| 311 sDest += wChar; |
| 312 return; |
| 313 } |
| 314 pDst = new FX_WCHAR[nCount]; |
| 315 FX_Unicode_GetNormalization(wChar, pDst); |
| 316 for (int nIndex = 0; nIndex < nCount; nIndex++) { |
| 317 sDest += pDst[nIndex]; |
| 318 } |
| 319 delete[] pDst; |
| 320 } |
| 321 void NormalizeString(CFX_WideString& str) { |
| 322 if (str.GetLength() <= 0) { |
| 323 return; |
| 324 } |
| 325 CFX_WideString sBuffer; |
| 326 nonstd::unique_ptr<IFX_BidiChar> pBidiChar(IFX_BidiChar::Create()); |
| 327 CFX_WordArray order; |
| 328 FX_BOOL bR2L = FALSE; |
| 329 int32_t start = 0, count = 0, i = 0; |
| 330 int nR2L = 0, nL2R = 0; |
| 331 for (i = 0; i < str.GetLength(); i++) { |
| 332 if (pBidiChar->AppendChar(str.GetAt(i))) { |
| 333 int32_t ret = pBidiChar->GetBidiInfo(start, count); |
| 334 order.Add(start); |
| 335 order.Add(count); |
| 336 order.Add(ret); |
| 337 if (!bR2L) { |
| 338 if (ret == 2) { |
| 339 nR2L++; |
| 340 } else if (ret == 1) { |
| 341 nL2R++; |
| 342 } |
| 343 } |
| 344 } |
| 345 } |
| 346 if (pBidiChar->EndChar()) { |
| 347 int32_t ret = pBidiChar->GetBidiInfo(start, count); |
| 348 order.Add(start); |
| 349 order.Add(count); |
| 350 order.Add(ret); |
| 351 if (!bR2L) { |
| 352 if (ret == 2) { |
| 353 nR2L++; |
| 354 } else if (ret == 1) { |
| 355 nL2R++; |
| 356 } |
| 357 } |
| 358 } |
| 359 if (nR2L > 0 && nR2L >= nL2R) { |
| 360 bR2L = TRUE; |
| 361 } |
| 362 if (bR2L) { |
| 363 int count = order.GetSize(); |
| 364 for (int j = count - 1; j > 0; j -= 3) { |
| 365 int ret = order.GetAt(j); |
| 366 int start = order.GetAt(j - 2); |
| 367 int count1 = order.GetAt(j - 1); |
| 368 if (ret == 2 || ret == 0) { |
| 369 for (int i = start + count1 - 1; i >= start; i--) { |
| 370 NormalizeCompositeChar(str[i], sBuffer); |
| 371 } |
| 372 } else { |
| 373 i = j; |
| 374 FX_BOOL bSymbol = FALSE; |
| 375 while (i > 0 && order.GetAt(i) != 2) { |
| 376 bSymbol = !order.GetAt(i); |
| 377 i -= 3; |
| 378 } |
| 379 int end = start + count1; |
| 380 int n = 0; |
| 381 if (bSymbol) { |
| 382 n = i + 6; |
| 383 } else { |
| 384 n = i + 3; |
| 385 } |
| 386 if (n >= j) { |
| 387 for (int m = start; m < end; m++) { |
| 388 sBuffer += str[m]; |
| 389 } |
| 390 } else { |
| 391 i = j; |
| 392 j = n; |
| 393 for (; n <= i; n += 3) { |
| 394 int start = order.GetAt(n - 2); |
| 395 int count1 = order.GetAt(n - 1); |
| 396 int end = start + count1; |
| 397 for (int m = start; m < end; m++) { |
| 398 sBuffer += str[m]; |
| 85 } | 399 } |
| 86 FX_RECT char_box; | 400 } |
| 87 pFont->GetCharBBox(charcode, char_box); | 401 } |
| 88 FX_FLOAT char_left = pPosArray ? pPosArray[cc * 2] : char_box.left *
pText->m_TextState.GetFontSize() / 1000; | 402 } |
| 89 FX_FLOAT char_right = pPosArray ? pPosArray[cc * 2 + 1] : char_box.r
ight * pText->m_TextState.GetFontSize() / 1000; | 403 } |
| 90 FX_FLOAT char_top = char_box.top * pText->m_TextState.GetFontSize()
/ 1000; | 404 } else { |
| 91 FX_FLOAT char_bottom = char_box.bottom * pText->m_TextState.GetFontS
ize() / 1000; | 405 int count = order.GetSize(); |
| 92 cc ++; | 406 FX_BOOL bL2R = FALSE; |
| 93 FX_FLOAT char_origx, char_origy; | 407 for (int j = 0; j < count; j += 3) { |
| 94 matrix.Transform(char_left, 0, char_origx, char_origy); | 408 int ret = order.GetAt(j + 2); |
| 95 matrix.TransformRect(char_left, char_right, char_top, char_bottom); | 409 int start = order.GetAt(j); |
| 96 CFX_ByteString str; | 410 int count1 = order.GetAt(j + 1); |
| 97 pFont->AppendChar(str, charcode); | 411 if (ret == 2 || (j == 0 && ret == 0 && !bL2R)) { |
| 98 InsertTextBox(NULL, char_origy, char_left, char_right, char_top, | 412 int i = j + 3; |
| 99 char_bottom, spacew, fontsize_v, str, pFont); | 413 while (bR2L && i < count) { |
| 100 } | 414 if (order.GetAt(i + 2) == 1) { |
| 101 if (pPosArray) { | 415 break; |
| 102 FX_Free(pPosArray); | 416 } else { |
| 103 } | 417 i += 3; |
| 104 return; | 418 } |
| 105 } | 419 } |
| 106 FX_FLOAT ratio_h = fontsize_h / pText->m_TextState.GetFontSize(); | 420 if (i == 3) { |
| 107 for (int ii = 0; ii < count * 2; ii ++) { | 421 j = -3; |
| 108 pPosArray[ii] *= ratio_h; | 422 bL2R = TRUE; |
| 109 } | 423 continue; |
| 110 FX_FLOAT baseline = pText->m_PosY; | 424 } |
| 111 CTextBaseLine* pBaseLine = NULL; | 425 int end = str.GetLength() - 1; |
| 112 FX_FLOAT topy = pText->m_Top; | 426 if (i < count) { |
| 113 FX_FLOAT bottomy = pText->m_Bottom; | 427 end = order.GetAt(i) - 1; |
| 114 FX_FLOAT leftx = pText->m_Left; | 428 } |
| 115 int cc = 0; | 429 j = i - 3; |
| 116 CFX_ByteString segment; | 430 for (int n = end; n >= start; n--) { |
| 117 int space_count = 0; | 431 NormalizeCompositeChar(str[i], sBuffer); |
| 118 FX_FLOAT last_left = 0, last_right = 0, segment_left = 0, segment_right = 0; | 432 } |
| 119 for (int i = 0; i < pText->m_nChars; i ++) { | 433 } else { |
| 120 FX_DWORD charcode = pText->m_nChars == 1 ? (FX_DWORD)(uintptr_t)pText->m
_pCharCodes : pText->m_pCharCodes[i]; | 434 int end = start + count1; |
| 121 if (charcode == (FX_DWORD) - 1) { | 435 for (int i = start; i < end; i++) { |
| 122 continue; | 436 sBuffer += str[i]; |
| 123 } | 437 } |
| 124 FX_FLOAT char_left = pPosArray[cc * 2]; | 438 } |
| 125 FX_FLOAT char_right = pPosArray[cc * 2 + 1]; | 439 } |
| 126 cc ++; | 440 } |
| 127 if (char_left < last_left || (char_left - last_right) > spacew / 2) { | 441 str.Empty(); |
| 128 pBaseLine = InsertTextBox(pBaseLine, baseline, leftx + segment_left,
leftx + segment_right, | 442 str += sBuffer; |
| 129 topy, bottomy, spacew, fontsize_v, segment
, pFont); | 443 } |
| 130 segment_left = char_left; | 444 static FX_BOOL IsNumber(CFX_WideString& str) { |
| 131 segment = ""; | 445 for (int i = 0; i < str.GetLength(); i++) { |
| 132 } | 446 FX_WCHAR ch = str[i]; |
| 133 if (space_count > 1) { | 447 if ((ch < '0' || ch > '9') && ch != '-' && ch != '+' && ch != '.' && |
| 134 pBaseLine = InsertTextBox(pBaseLine, baseline, leftx + segment_left,
leftx + segment_right, | 448 ch != ' ') { |
| 135 topy, bottomy, spacew, fontsize_v, segment
, pFont); | 449 return FALSE; |
| 136 segment = ""; | 450 } |
| 137 } else if (space_count == 1) { | 451 } |
| 138 pFont->AppendChar(segment, ' '); | 452 return TRUE; |
| 139 } | 453 } |
| 140 if (segment.GetLength() == 0) { | 454 void CTextPage::FindColumns() { |
| 141 segment_left = char_left; | 455 int i; |
| 142 } | 456 for (i = 0; i < m_BaseLines.GetSize(); i++) { |
| 143 segment_right = char_right; | 457 CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); |
| 144 pFont->AppendChar(segment, charcode); | 458 for (int j = 0; j < pBaseLine->m_TextList.GetSize(); j++) { |
| 145 space_count = 0; | 459 CTextBox* pTextBox = (CTextBox*)pBaseLine->m_TextList.GetAt(j); |
| 146 last_left = char_left; | 460 CTextColumn* pColumn = FindColumn(pTextBox->m_Right); |
| 147 last_right = char_right; | 461 if (pColumn == NULL) { |
| 148 } | 462 pColumn = new CTextColumn; |
| 149 if (segment.GetLength()) | 463 pColumn->m_Count = 1; |
| 150 pBaseLine = InsertTextBox(pBaseLine, baseline, leftx + segment_left, lef
tx + segment_right, | 464 pColumn->m_AvgPos = pTextBox->m_Right; |
| 151 topy, bottomy, spacew, fontsize_v, segment, pF
ont); | 465 pColumn->m_TextPos = -1; |
| 152 FX_Free(pPosArray); | 466 m_TextColumns.Add(pColumn); |
| 153 } | 467 } else { |
| 154 CTextBaseLine* CTextPage::InsertTextBox(CTextBaseLine* pBaseLine, FX_FLOAT basey
, FX_FLOAT leftx, | 468 pColumn->m_AvgPos = |
| 155 FX_FLOAT rightx, FX_FLOAT topy, FX_FLOAT
bottomy, FX_FLOAT spacew, FX_FLOAT fontsize_v, | 469 (pColumn->m_Count * pColumn->m_AvgPos + pTextBox->m_Right) / |
| 156 CFX_ByteString& str, CPDF_Font* pFont) | 470 (pColumn->m_Count + 1); |
| 157 { | 471 pColumn->m_Count++; |
| 158 if (str.GetLength() == 0) { | 472 } |
| 159 return NULL; | 473 } |
| 160 } | 474 } |
| 161 if (pBaseLine == NULL) { | 475 int mincount = m_BaseLines.GetSize() / 4; |
| 162 int i; | 476 for (i = 0; i < m_TextColumns.GetSize(); i++) { |
| 163 for (i = 0; i < m_BaseLines.GetSize(); i ++) { | 477 CTextColumn* pTextColumn = (CTextColumn*)m_TextColumns.GetAt(i); |
| 164 CTextBaseLine* pExistLine = (CTextBaseLine*)m_BaseLines.GetAt(i); | 478 if (pTextColumn->m_Count >= mincount) { |
| 165 if (pExistLine->m_BaseLine == basey) { | 479 continue; |
| 166 pBaseLine = pExistLine; | 480 } |
| 167 break; | 481 delete pTextColumn; |
| 168 } | 482 m_TextColumns.RemoveAt(i); |
| 169 if (pExistLine->m_BaseLine < basey) { | 483 i--; |
| 170 break; | 484 } |
| 171 } | 485 for (i = 0; i < m_BaseLines.GetSize(); i++) { |
| 172 } | 486 CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); |
| 173 if (pBaseLine == NULL) { | 487 for (int j = 0; j < pBaseLine->m_TextList.GetSize(); j++) { |
| 174 pBaseLine = new CTextBaseLine; | 488 CTextBox* pTextBox = (CTextBox*)pBaseLine->m_TextList.GetAt(j); |
| 175 pBaseLine->m_BaseLine = basey; | 489 if (IsNumber(pTextBox->m_Text)) { |
| 176 m_BaseLines.InsertAt(i, pBaseLine); | 490 pTextBox->m_pColumn = FindColumn(pTextBox->m_Right); |
| 177 } | 491 } |
| 178 } | 492 } |
| 179 CFX_WideString text; | 493 } |
| 180 const FX_CHAR* pStr = str; | 494 } |
| 181 int len = str.GetLength(), offset = 0; | 495 CTextColumn* CTextPage::FindColumn(FX_FLOAT xpos) { |
| 182 while (offset < len) { | 496 for (int i = 0; i < m_TextColumns.GetSize(); i++) { |
| 183 FX_DWORD ch = pFont->GetNextChar(pStr, len, offset); | 497 CTextColumn* pColumn = (CTextColumn*)m_TextColumns.GetAt(i); |
| 184 CFX_WideString unicode_str = pFont->UnicodeFromCharCode(ch); | 498 if (pColumn->m_AvgPos < xpos + 1 && pColumn->m_AvgPos > xpos - 1) { |
| 185 if (unicode_str.IsEmpty()) { | 499 return pColumn; |
| 186 text += (FX_WCHAR)ch; | 500 } |
| 187 } | 501 } |
| 188 else { | 502 return NULL; |
| 189 text += unicode_str; | 503 } |
| 190 } | 504 void CTextPage::BreakSpace(CPDF_TextObject* pTextObj) {} |
| 191 } | 505 CTextBaseLine::CTextBaseLine() { |
| 192 pBaseLine->InsertTextBox(leftx, rightx, topy, bottomy, spacew, fontsize_v, t
ext); | 506 m_Top = -100000; |
| 193 return pBaseLine; | 507 m_Bottom = 100000; |
| 194 } | 508 m_MaxFontSizeV = 0; |
| 195 void CTextPage::WriteOutput(CFX_WideStringArray& lines, int iMinWidth) | 509 } |
| 196 { | 510 CTextBaseLine::~CTextBaseLine() { |
| 197 FX_FLOAT lastheight = -1; | 511 for (int i = 0; i < m_TextList.GetSize(); i++) { |
| 198 FX_FLOAT lastbaseline = -1; | 512 CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); |
| 199 FX_FLOAT MinLeftX = 1000000; | 513 delete pText; |
| 200 FX_FLOAT MaxRightX = 0; | 514 } |
| 201 int i; | 515 } |
| 202 for (i = 0; i < m_BaseLines.GetSize(); i ++) { | 516 void CTextBaseLine::InsertTextBox(FX_FLOAT leftx, |
| 203 CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); | 517 FX_FLOAT rightx, |
| 204 FX_FLOAT leftx, rightx; | 518 FX_FLOAT topy, |
| 205 if (pBaseLine->GetWidth(leftx, rightx)) { | 519 FX_FLOAT bottomy, |
| 206 if (leftx < MinLeftX) { | 520 FX_FLOAT spacew, |
| 207 MinLeftX = leftx; | 521 FX_FLOAT fontsize_v, |
| 208 } | 522 const CFX_WideString& text) { |
| 209 if (rightx > MaxRightX) { | 523 if (m_Top < topy) { |
| 210 MaxRightX = rightx; | 524 m_Top = topy; |
| 211 } | 525 } |
| 212 } | 526 if (m_Bottom > bottomy) { |
| 213 } | 527 m_Bottom = bottomy; |
| 214 for (i = 0; i < m_BaseLines.GetSize(); i ++) { | 528 } |
| 215 CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); | 529 if (m_MaxFontSizeV < fontsize_v) { |
| 216 pBaseLine->MergeBoxes(); | 530 m_MaxFontSizeV = fontsize_v; |
| 217 } | 531 } |
| 218 for (i = 1; i < m_BaseLines.GetSize(); i ++) { | 532 int i; |
| 219 CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); | 533 for (i = 0; i < m_TextList.GetSize(); i++) { |
| 220 CTextBaseLine* pPrevLine = (CTextBaseLine*)m_BaseLines.GetAt(i - 1); | 534 CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); |
| 221 if (pBaseLine->CanMerge(pPrevLine)) { | 535 if (pText->m_Left > leftx) { |
| 222 pPrevLine->Merge(pBaseLine); | 536 break; |
| 223 delete pBaseLine; | 537 } |
| 224 m_BaseLines.RemoveAt(i); | 538 } |
| 225 i --; | 539 CTextBox* pText = new CTextBox; |
| 226 } | 540 pText->m_Text = text; |
| 227 } | 541 pText->m_Left = leftx; |
| 228 if (m_bAutoWidth) { | 542 pText->m_Right = rightx; |
| 229 int* widths = FX_Alloc(int, m_BaseLines.GetSize()); | 543 pText->m_Top = topy; |
| 230 for (i = 0; i < m_BaseLines.GetSize(); i ++) { | 544 pText->m_Bottom = bottomy; |
| 231 widths[i] = 0; | 545 pText->m_SpaceWidth = spacew; |
| 232 CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); | 546 pText->m_FontSizeV = fontsize_v; |
| 233 int TotalChars = 0; | 547 pText->m_pColumn = NULL; |
| 234 FX_FLOAT TotalWidth = 0; | 548 m_TextList.InsertAt(i, pText); |
| 235 int minchars; | 549 } |
| 236 pBaseLine->CountChars(TotalChars, TotalWidth, minchars); | 550 FX_BOOL GetIntersection(FX_FLOAT low1, |
| 237 if (TotalChars) { | 551 FX_FLOAT high1, |
| 238 FX_FLOAT charwidth = TotalWidth / TotalChars; | 552 FX_FLOAT low2, |
| 239 widths[i] = (int)((MaxRightX - MinLeftX) / charwidth); | 553 FX_FLOAT high2, |
| 240 } | 554 FX_FLOAT& interlow, |
| 241 if (widths[i] > 1000) { | 555 FX_FLOAT& interhigh); |
| 242 widths[i] = 1000; | 556 FX_BOOL CTextBaseLine::CanMerge(CTextBaseLine* pOther) { |
| 243 } | 557 FX_FLOAT inter_top, inter_bottom; |
| 244 if (widths[i] < minchars) { | 558 if (!GetIntersection(m_Bottom, m_Top, pOther->m_Bottom, pOther->m_Top, |
| 245 widths[i] = minchars; | 559 inter_bottom, inter_top)) { |
| 246 } | 560 return FALSE; |
| 247 } | 561 } |
| 248 int AvgWidth = 0, widthcount = 0; | 562 FX_FLOAT inter_h = inter_top - inter_bottom; |
| 249 for (i = 0; i < m_BaseLines.GetSize(); i ++) | 563 if (inter_h < (m_Top - m_Bottom) / 2 && |
| 250 if (widths[i]) { | 564 inter_h < (pOther->m_Top - pOther->m_Bottom) / 2) { |
| 251 AvgWidth += widths[i]; | 565 return FALSE; |
| 252 widthcount ++; | 566 } |
| 253 } | 567 FX_FLOAT dy = (FX_FLOAT)FXSYS_fabs(m_BaseLine - pOther->m_BaseLine); |
| 254 AvgWidth = int((FX_FLOAT)AvgWidth / widthcount + 0.5); | 568 for (int i = 0; i < m_TextList.GetSize(); i++) { |
| 255 int MaxWidth = 0; | 569 CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); |
| 256 for (i = 0; i < m_BaseLines.GetSize(); i ++) | 570 for (int j = 0; j < pOther->m_TextList.GetSize(); j++) { |
| 257 if (MaxWidth < widths[i]) { | 571 CTextBox* pOtherText = (CTextBox*)pOther->m_TextList.GetAt(j); |
| 258 MaxWidth = widths[i]; | 572 FX_FLOAT inter_left, inter_right; |
| 259 } | 573 if (!GetIntersection(pText->m_Left, pText->m_Right, pOtherText->m_Left, |
| 260 if (MaxWidth > AvgWidth * 6 / 5) { | 574 pOtherText->m_Right, inter_left, inter_right)) { |
| 261 MaxWidth = AvgWidth * 6 / 5; | 575 continue; |
| 262 } | 576 } |
| 263 FX_Free(widths); | 577 FX_FLOAT inter_w = inter_right - inter_left; |
| 264 if (iMinWidth < MaxWidth) { | 578 if (inter_w < pText->m_SpaceWidth / 2 && |
| 265 iMinWidth = MaxWidth; | 579 inter_w < pOtherText->m_SpaceWidth / 2) { |
| 266 } | 580 continue; |
| 267 } | 581 } |
| 268 for (i = 0; i < m_BaseLines.GetSize(); i ++) { | 582 if (dy >= (pText->m_Bottom - pText->m_Top) / 2 || |
| 269 CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); | 583 dy >= (pOtherText->m_Bottom - pOtherText->m_Top) / 2) { |
| 270 pBaseLine->MergeBoxes(); | 584 return FALSE; |
| 271 } | 585 } |
| 272 if (m_bKeepColumn) { | 586 } |
| 273 FindColumns(); | 587 } |
| 274 } | 588 return TRUE; |
| 275 for (i = 0; i < m_BaseLines.GetSize(); i ++) { | 589 } |
| 276 CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); | 590 void CTextBaseLine::Merge(CTextBaseLine* pOther) { |
| 277 if (lastheight >= 0) { | 591 for (int i = 0; i < pOther->m_TextList.GetSize(); i++) { |
| 278 FX_FLOAT dy = lastbaseline - pBaseLine->m_BaseLine; | 592 CTextBox* pText = (CTextBox*)pOther->m_TextList.GetAt(i); |
| 279 if (dy >= (pBaseLine->m_MaxFontSizeV) * 1.5 || dy >= lastheight * 1.
5) { | 593 InsertTextBox(pText->m_Left, pText->m_Right, pText->m_Top, pText->m_Bottom, |
| 280 lines.Add(L""); | 594 pText->m_SpaceWidth, pText->m_FontSizeV, pText->m_Text); |
| 281 } | 595 } |
| 282 } | 596 } |
| 283 lastheight = pBaseLine->m_MaxFontSizeV; | 597 FX_BOOL CTextBaseLine::GetWidth(FX_FLOAT& leftx, FX_FLOAT& rightx) { |
| 284 lastbaseline = pBaseLine->m_BaseLine; | 598 int i; |
| 285 CFX_WideString str; | 599 for (i = 0; i < m_TextList.GetSize(); i++) { |
| 286 pBaseLine->WriteOutput(str, MinLeftX, MaxRightX - MinLeftX, iMinWidth); | 600 CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); |
| 287 lines.Add(str); | 601 if (pText->m_Text != L" ") { |
| 288 } | 602 break; |
| 289 } | 603 } |
| 290 void NormalizeCompositeChar(FX_WCHAR wChar, CFX_WideString& sDest) | 604 } |
| 291 { | 605 if (i == m_TextList.GetSize()) { |
| 292 wChar = FX_GetMirrorChar(wChar, TRUE, FALSE); | 606 return FALSE; |
| 293 FX_WCHAR* pDst = NULL; | 607 } |
| 294 FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst); | 608 CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); |
| 295 if (nCount < 1 ) { | 609 leftx = pText->m_Left; |
| 296 sDest += wChar; | 610 for (i = m_TextList.GetSize() - 1; i >= 0; i--) { |
| 297 return; | 611 CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); |
| 298 } | 612 if (pText->m_Text != L" ") { |
| 299 pDst = new FX_WCHAR[nCount]; | 613 break; |
| 300 FX_Unicode_GetNormalization(wChar, pDst); | 614 } |
| 301 for (int nIndex = 0; nIndex < nCount; nIndex++) { | 615 } |
| 302 sDest += pDst[nIndex]; | 616 pText = (CTextBox*)m_TextList.GetAt(i); |
| 303 } | 617 rightx = pText->m_Right; |
| 304 delete[] pDst; | 618 return TRUE; |
| 305 } | 619 } |
| 306 void NormalizeString(CFX_WideString& str) | 620 void CTextBaseLine::MergeBoxes() { |
| 307 { | 621 int i = 0; |
| 308 if (str.GetLength() <= 0) { | 622 while (1) { |
| 309 return; | 623 if (i >= m_TextList.GetSize() - 1) { |
| 310 } | 624 break; |
| 311 CFX_WideString sBuffer; | 625 } |
| 312 nonstd::unique_ptr<IFX_BidiChar> pBidiChar(IFX_BidiChar::Create()); | 626 CTextBox* pThisText = (CTextBox*)m_TextList.GetAt(i); |
| 313 CFX_WordArray order; | 627 CTextBox* pNextText = (CTextBox*)m_TextList.GetAt(i + 1); |
| 314 FX_BOOL bR2L = FALSE; | 628 FX_FLOAT dx = pNextText->m_Left - pThisText->m_Right; |
| 315 int32_t start = 0, count = 0, i = 0; | 629 FX_FLOAT spacew = (pThisText->m_SpaceWidth == 0.0) |
| 316 int nR2L = 0, nL2R = 0; | 630 ? pNextText->m_SpaceWidth |
| 317 for (i = 0; i < str.GetLength(); i++) { | 631 : pThisText->m_SpaceWidth; |
| 318 if(pBidiChar->AppendChar(str.GetAt(i))) { | 632 if (spacew > 0.0 && dx < spacew * 2) { |
| 319 int32_t ret = pBidiChar->GetBidiInfo(start, count); | 633 pThisText->m_Right = pNextText->m_Right; |
| 320 order.Add(start); | 634 if (dx > spacew * 1.5) { |
| 321 order.Add(count); | 635 pThisText->m_Text += L" "; |
| 322 order.Add(ret); | 636 } else if (dx > spacew / 3) { |
| 323 if(!bR2L) { | 637 pThisText->m_Text += L' '; |
| 324 if(ret == 2) { | 638 } |
| 325 nR2L++; | 639 pThisText->m_Text += pNextText->m_Text; |
| 326 } else if (ret == 1) { | 640 pThisText->m_SpaceWidth = |
| 327 nL2R++; | 641 pNextText->m_SpaceWidth == 0.0 ? spacew : pNextText->m_SpaceWidth; |
| 328 } | 642 m_TextList.RemoveAt(i + 1); |
| 329 } | 643 delete pNextText; |
| 330 } | |
| 331 } | |
| 332 if(pBidiChar->EndChar()) { | |
| 333 int32_t ret = pBidiChar->GetBidiInfo(start, count); | |
| 334 order.Add(start); | |
| 335 order.Add(count); | |
| 336 order.Add(ret); | |
| 337 if(!bR2L) { | |
| 338 if(ret == 2) { | |
| 339 nR2L++; | |
| 340 } else if(ret == 1) { | |
| 341 nL2R++; | |
| 342 } | |
| 343 } | |
| 344 } | |
| 345 if(nR2L > 0 && nR2L >= nL2R) { | |
| 346 bR2L = TRUE; | |
| 347 } | |
| 348 if(bR2L) { | |
| 349 int count = order.GetSize(); | |
| 350 for(int j = count - 1; j > 0; j -= 3) { | |
| 351 int ret = order.GetAt(j); | |
| 352 int start = order.GetAt(j - 2); | |
| 353 int count1 = order.GetAt(j - 1); | |
| 354 if(ret == 2 || ret == 0) { | |
| 355 for(int i = start + count1 - 1; i >= start; i--) { | |
| 356 NormalizeCompositeChar(str[i], sBuffer); | |
| 357 } | |
| 358 } else { | |
| 359 i = j; | |
| 360 FX_BOOL bSymbol = FALSE; | |
| 361 while(i > 0 && order.GetAt(i) != 2) { | |
| 362 bSymbol = !order.GetAt(i); | |
| 363 i -= 3; | |
| 364 } | |
| 365 int end = start + count1 ; | |
| 366 int n = 0; | |
| 367 if(bSymbol) { | |
| 368 n = i + 6; | |
| 369 } else { | |
| 370 n = i + 3; | |
| 371 } | |
| 372 if(n >= j) { | |
| 373 for(int m = start; m < end; m++) { | |
| 374 sBuffer += str[m]; | |
| 375 } | |
| 376 } else { | |
| 377 i = j; | |
| 378 j = n; | |
| 379 for(; n <= i; n += 3) { | |
| 380 int start = order.GetAt(n - 2); | |
| 381 int count1 = order.GetAt(n - 1); | |
| 382 int end = start + count1 ; | |
| 383 for(int m = start; m < end; m++) { | |
| 384 sBuffer += str[m]; | |
| 385 } | |
| 386 } | |
| 387 } | |
| 388 } | |
| 389 } | |
| 390 } else { | 644 } else { |
| 391 int count = order.GetSize(); | 645 i++; |
| 392 FX_BOOL bL2R = FALSE; | 646 } |
| 393 for(int j = 0; j < count; j += 3) { | 647 } |
| 394 int ret = order.GetAt(j + 2); | 648 } |
| 395 int start = order.GetAt(j); | 649 void CTextBaseLine::WriteOutput(CFX_WideString& str, |
| 396 int count1 = order.GetAt(j + 1); | 650 FX_FLOAT leftx, |
| 397 if(ret == 2 || (j == 0 && ret == 0 && !bL2R)) { | 651 FX_FLOAT pagewidth, |
| 398 int i = j + 3; | 652 int iTextWidth) { |
| 399 while(bR2L && i < count) { | 653 int lastpos = -1; |
| 400 if(order.GetAt(i + 2) == 1) { | 654 for (int i = 0; i < m_TextList.GetSize(); i++) { |
| 401 break; | |
| 402 } else { | |
| 403 i += 3; | |
| 404 } | |
| 405 } | |
| 406 if(i == 3) { | |
| 407 j = -3; | |
| 408 bL2R = TRUE; | |
| 409 continue; | |
| 410 } | |
| 411 int end = str.GetLength() - 1; | |
| 412 if(i < count) { | |
| 413 end = order.GetAt(i) - 1; | |
| 414 } | |
| 415 j = i - 3; | |
| 416 for(int n = end; n >= start; n--) { | |
| 417 NormalizeCompositeChar(str[i], sBuffer); | |
| 418 } | |
| 419 } else { | |
| 420 int end = start + count1 ; | |
| 421 for(int i = start; i < end; i++) { | |
| 422 sBuffer += str[i]; | |
| 423 } | |
| 424 } | |
| 425 } | |
| 426 } | |
| 427 str.Empty(); | |
| 428 str += sBuffer; | |
| 429 } | |
| 430 static FX_BOOL IsNumber(CFX_WideString& str) | |
| 431 { | |
| 432 for (int i = 0; i < str.GetLength(); i ++) { | |
| 433 FX_WCHAR ch = str[i]; | |
| 434 if ((ch < '0' || ch > '9') && ch != '-' && ch != '+' && ch != '.' && ch
!= ' ') { | |
| 435 return FALSE; | |
| 436 } | |
| 437 } | |
| 438 return TRUE; | |
| 439 } | |
| 440 void CTextPage::FindColumns() | |
| 441 { | |
| 442 int i; | |
| 443 for (i = 0; i < m_BaseLines.GetSize(); i ++) { | |
| 444 CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); | |
| 445 for (int j = 0; j < pBaseLine->m_TextList.GetSize(); j ++) { | |
| 446 CTextBox* pTextBox = (CTextBox*)pBaseLine->m_TextList.GetAt(j); | |
| 447 CTextColumn* pColumn = FindColumn(pTextBox->m_Right); | |
| 448 if (pColumn == NULL) { | |
| 449 pColumn = new CTextColumn; | |
| 450 pColumn->m_Count = 1; | |
| 451 pColumn->m_AvgPos = pTextBox->m_Right; | |
| 452 pColumn->m_TextPos = -1; | |
| 453 m_TextColumns.Add(pColumn); | |
| 454 } else { | |
| 455 pColumn->m_AvgPos = (pColumn->m_Count * pColumn->m_AvgPos + pTex
tBox->m_Right) / | |
| 456 (pColumn->m_Count + 1); | |
| 457 pColumn->m_Count ++; | |
| 458 } | |
| 459 } | |
| 460 } | |
| 461 int mincount = m_BaseLines.GetSize() / 4; | |
| 462 for (i = 0; i < m_TextColumns.GetSize(); i ++) { | |
| 463 CTextColumn* pTextColumn = (CTextColumn*)m_TextColumns.GetAt(i); | |
| 464 if (pTextColumn->m_Count >= mincount) { | |
| 465 continue; | |
| 466 } | |
| 467 delete pTextColumn; | |
| 468 m_TextColumns.RemoveAt(i); | |
| 469 i --; | |
| 470 } | |
| 471 for (i = 0; i < m_BaseLines.GetSize(); i ++) { | |
| 472 CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); | |
| 473 for (int j = 0; j < pBaseLine->m_TextList.GetSize(); j ++) { | |
| 474 CTextBox* pTextBox = (CTextBox*)pBaseLine->m_TextList.GetAt(j); | |
| 475 if (IsNumber(pTextBox->m_Text)) { | |
| 476 pTextBox->m_pColumn = FindColumn(pTextBox->m_Right); | |
| 477 } | |
| 478 } | |
| 479 } | |
| 480 } | |
| 481 CTextColumn* CTextPage::FindColumn(FX_FLOAT xpos) | |
| 482 { | |
| 483 for (int i = 0; i < m_TextColumns.GetSize(); i ++) { | |
| 484 CTextColumn* pColumn = (CTextColumn*)m_TextColumns.GetAt(i); | |
| 485 if (pColumn->m_AvgPos < xpos + 1 && pColumn->m_AvgPos > xpos - 1) { | |
| 486 return pColumn; | |
| 487 } | |
| 488 } | |
| 489 return NULL; | |
| 490 } | |
| 491 void CTextPage::BreakSpace(CPDF_TextObject* pTextObj) | |
| 492 { | |
| 493 } | |
| 494 CTextBaseLine::CTextBaseLine() | |
| 495 { | |
| 496 m_Top = -100000; | |
| 497 m_Bottom = 100000; | |
| 498 m_MaxFontSizeV = 0; | |
| 499 } | |
| 500 CTextBaseLine::~CTextBaseLine() | |
| 501 { | |
| 502 for (int i = 0; i < m_TextList.GetSize(); i ++) { | |
| 503 CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); | |
| 504 delete pText; | |
| 505 } | |
| 506 } | |
| 507 void CTextBaseLine::InsertTextBox(FX_FLOAT leftx, FX_FLOAT rightx, FX_FLOAT topy
, FX_FLOAT bottomy, | |
| 508 FX_FLOAT spacew, FX_FLOAT fontsize_v, const CF
X_WideString& text) | |
| 509 { | |
| 510 if (m_Top < topy) { | |
| 511 m_Top = topy; | |
| 512 } | |
| 513 if (m_Bottom > bottomy) { | |
| 514 m_Bottom = bottomy; | |
| 515 } | |
| 516 if (m_MaxFontSizeV < fontsize_v) { | |
| 517 m_MaxFontSizeV = fontsize_v; | |
| 518 } | |
| 519 int i; | |
| 520 for (i = 0; i < m_TextList.GetSize(); i ++) { | |
| 521 CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); | |
| 522 if (pText->m_Left > leftx) { | |
| 523 break; | |
| 524 } | |
| 525 } | |
| 526 CTextBox* pText = new CTextBox; | |
| 527 pText->m_Text = text; | |
| 528 pText->m_Left = leftx; | |
| 529 pText->m_Right = rightx; | |
| 530 pText->m_Top = topy; | |
| 531 pText->m_Bottom = bottomy; | |
| 532 pText->m_SpaceWidth = spacew; | |
| 533 pText->m_FontSizeV = fontsize_v; | |
| 534 pText->m_pColumn = NULL; | |
| 535 m_TextList.InsertAt(i, pText); | |
| 536 } | |
| 537 FX_BOOL GetIntersection(FX_FLOAT low1, FX_FLOAT high1, FX_FLOAT low2, FX_FLOAT h
igh2, | |
| 538 FX_FLOAT& interlow, FX_FLOAT& interhigh); | |
| 539 FX_BOOL CTextBaseLine::CanMerge(CTextBaseLine* pOther) | |
| 540 { | |
| 541 FX_FLOAT inter_top, inter_bottom; | |
| 542 if (!GetIntersection(m_Bottom, m_Top, pOther->m_Bottom, pOther->m_Top, | |
| 543 inter_bottom, inter_top)) { | |
| 544 return FALSE; | |
| 545 } | |
| 546 FX_FLOAT inter_h = inter_top - inter_bottom; | |
| 547 if (inter_h < (m_Top - m_Bottom) / 2 && inter_h < (pOther->m_Top - pOther->m
_Bottom) / 2) { | |
| 548 return FALSE; | |
| 549 } | |
| 550 FX_FLOAT dy = (FX_FLOAT)FXSYS_fabs(m_BaseLine - pOther->m_BaseLine); | |
| 551 for (int i = 0; i < m_TextList.GetSize(); i ++) { | |
| 552 CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); | |
| 553 for (int j = 0; j < pOther->m_TextList.GetSize(); j ++) { | |
| 554 CTextBox* pOtherText = (CTextBox*)pOther->m_TextList.GetAt(j); | |
| 555 FX_FLOAT inter_left, inter_right; | |
| 556 if (!GetIntersection(pText->m_Left, pText->m_Right, | |
| 557 pOtherText->m_Left, pOtherText->m_Right, inter_
left, inter_right)) { | |
| 558 continue; | |
| 559 } | |
| 560 FX_FLOAT inter_w = inter_right - inter_left; | |
| 561 if (inter_w < pText->m_SpaceWidth / 2 && inter_w < pOtherText->m_Spa
ceWidth / 2) { | |
| 562 continue; | |
| 563 } | |
| 564 if (dy >= (pText->m_Bottom - pText->m_Top) / 2 || | |
| 565 dy >= (pOtherText->m_Bottom - pOtherText->m_Top) / 2) { | |
| 566 return FALSE; | |
| 567 } | |
| 568 } | |
| 569 } | |
| 570 return TRUE; | |
| 571 } | |
| 572 void CTextBaseLine::Merge(CTextBaseLine* pOther) | |
| 573 { | |
| 574 for (int i = 0; i < pOther->m_TextList.GetSize(); i ++) { | |
| 575 CTextBox* pText = (CTextBox*)pOther->m_TextList.GetAt(i); | |
| 576 InsertTextBox(pText->m_Left, pText->m_Right, pText->m_Top, pText->m_Bott
om, | |
| 577 pText->m_SpaceWidth, pText->m_FontSizeV, pText->m_Text); | |
| 578 } | |
| 579 } | |
| 580 FX_BOOL CTextBaseLine::GetWidth(FX_FLOAT& leftx, FX_FLOAT& rightx) | |
| 581 { | |
| 582 int i; | |
| 583 for (i = 0; i < m_TextList.GetSize(); i ++) { | |
| 584 CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); | |
| 585 if (pText->m_Text != L" ") { | |
| 586 break; | |
| 587 } | |
| 588 } | |
| 589 if (i == m_TextList.GetSize()) { | |
| 590 return FALSE; | |
| 591 } | |
| 592 CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); | 655 CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); |
| 593 leftx = pText->m_Left; | 656 int xpos; |
| 594 for (i = m_TextList.GetSize() - 1; i >= 0; i --) { | 657 if (pText->m_pColumn) { |
| 595 CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); | 658 xpos = |
| 596 if (pText->m_Text != L" ") { | 659 (int)((pText->m_pColumn->m_AvgPos - leftx) * iTextWidth / pagewidth + |
| 597 break; | 660 0.5); |
| 598 } | 661 xpos -= pText->m_Text.GetLength(); |
| 599 } | 662 } else { |
| 600 pText = (CTextBox*)m_TextList.GetAt(i); | 663 xpos = (int)((pText->m_Left - leftx) * iTextWidth / pagewidth + 0.5); |
| 601 rightx = pText->m_Right; | 664 } |
| 602 return TRUE; | 665 if (xpos <= lastpos) { |
| 603 } | 666 xpos = lastpos + 1; |
| 604 void CTextBaseLine::MergeBoxes() | 667 } |
| 605 { | 668 for (int j = lastpos + 1; j < xpos; j++) { |
| 606 int i = 0; | 669 str += ' '; |
| 607 while (1) { | 670 } |
| 608 if (i >= m_TextList.GetSize() - 1) { | 671 CFX_WideString sSrc(pText->m_Text); |
| 609 break; | 672 NormalizeString(sSrc); |
| 610 } | 673 str += sSrc; |
| 611 CTextBox* pThisText = (CTextBox*)m_TextList.GetAt(i); | 674 str += ' '; |
| 612 CTextBox* pNextText = (CTextBox*)m_TextList.GetAt(i + 1); | 675 lastpos = xpos + pText->m_Text.GetLength(); |
| 613 FX_FLOAT dx = pNextText->m_Left - pThisText->m_Right; | 676 } |
| 614 FX_FLOAT spacew = (pThisText->m_SpaceWidth == 0.0) ? | 677 } |
| 615 pNextText->m_SpaceWidth : pThisText->m_SpaceWidth; | 678 void CTextBaseLine::CountChars(int& count, FX_FLOAT& width, int& minchars) { |
| 616 if (spacew > 0.0 && dx < spacew * 2) { | 679 minchars = 0; |
| 617 pThisText->m_Right = pNextText->m_Right; | 680 for (int i = 0; i < m_TextList.GetSize(); i++) { |
| 618 if (dx > spacew * 1.5) { | 681 CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); |
| 619 pThisText->m_Text += L" "; | 682 if (pText->m_Right - pText->m_Left < 0.002) { |
| 620 } else if (dx > spacew / 3) { | 683 continue; |
| 621 pThisText->m_Text += L' '; | 684 } |
| 622 } | 685 count += pText->m_Text.GetLength(); |
| 623 pThisText->m_Text += pNextText->m_Text; | 686 width += pText->m_Right - pText->m_Left; |
| 624 pThisText->m_SpaceWidth = pNextText->m_SpaceWidth == 0.0 ? | 687 minchars += pText->m_Text.GetLength() + 1; |
| 625 spacew : pNextText->m_SpaceWidth; | 688 } |
| 626 m_TextList.RemoveAt(i + 1); | |
| 627 delete pNextText; | |
| 628 } else { | |
| 629 i ++; | |
| 630 } | |
| 631 } | |
| 632 } | |
| 633 void CTextBaseLine::WriteOutput(CFX_WideString& str, FX_FLOAT leftx, FX_FLOAT pa
gewidth, | |
| 634 int iTextWidth) | |
| 635 { | |
| 636 int lastpos = -1; | |
| 637 for (int i = 0; i < m_TextList.GetSize(); i ++) { | |
| 638 CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); | |
| 639 int xpos; | |
| 640 if (pText->m_pColumn) { | |
| 641 xpos = (int)((pText->m_pColumn->m_AvgPos - leftx) * iTextWidth / pag
ewidth + 0.5); | |
| 642 xpos -= pText->m_Text.GetLength(); | |
| 643 } else { | |
| 644 xpos = (int)((pText->m_Left - leftx) * iTextWidth / pagewidth + 0.5)
; | |
| 645 } | |
| 646 if (xpos <= lastpos) { | |
| 647 xpos = lastpos + 1; | |
| 648 } | |
| 649 for (int j = lastpos + 1; j < xpos; j ++) { | |
| 650 str += ' '; | |
| 651 } | |
| 652 CFX_WideString sSrc(pText->m_Text); | |
| 653 NormalizeString(sSrc); | |
| 654 str += sSrc; | |
| 655 str += ' '; | |
| 656 lastpos = xpos + pText->m_Text.GetLength(); | |
| 657 } | |
| 658 } | |
| 659 void CTextBaseLine::CountChars(int& count, FX_FLOAT& width, int& minchars) | |
| 660 { | |
| 661 minchars = 0; | |
| 662 for (int i = 0; i < m_TextList.GetSize(); i ++) { | |
| 663 CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); | |
| 664 if (pText->m_Right - pText->m_Left < 0.002) { | |
| 665 continue; | |
| 666 } | |
| 667 count += pText->m_Text.GetLength(); | |
| 668 width += pText->m_Right - pText->m_Left; | |
| 669 minchars += pText->m_Text.GetLength() + 1; | |
| 670 } | |
| 671 } | 689 } |
| 672 #define PI 3.1415926535897932384626433832795 | 690 #define PI 3.1415926535897932384626433832795 |
| 673 static void CheckRotate(CPDF_Page& page, CFX_FloatRect& page_bbox) | 691 static void CheckRotate(CPDF_Page& page, CFX_FloatRect& page_bbox) { |
| 674 { | 692 int total_count = 0, rotated_count[3] = {0, 0, 0}; |
| 675 int total_count = 0, rotated_count[3] = {0, 0, 0}; | 693 FX_POSITION pos = page.GetFirstObjectPosition(); |
| 676 FX_POSITION pos = page.GetFirstObjectPosition(); | 694 while (pos) { |
| 677 while (pos) { | 695 CPDF_PageObject* pObj = page.GetNextObject(pos); |
| 678 CPDF_PageObject* pObj = page.GetNextObject(pos); | 696 if (pObj->m_Type != PDFPAGE_TEXT) { |
| 679 if (pObj->m_Type != PDFPAGE_TEXT) { | 697 continue; |
| 680 continue; | 698 } |
| 681 } | 699 total_count++; |
| 682 total_count ++; | 700 CPDF_TextObject* pText = (CPDF_TextObject*)pObj; |
| 683 CPDF_TextObject* pText = (CPDF_TextObject*)pObj; | 701 FX_FLOAT angle = pText->m_TextState.GetBaselineAngle(); |
| 684 FX_FLOAT angle = pText->m_TextState.GetBaselineAngle(); | 702 if (angle == 0.0) { |
| 685 if (angle == 0.0) { | 703 continue; |
| 686 continue; | 704 } |
| 687 } | 705 int degree = (int)(angle * 180 / PI + 0.5); |
| 688 int degree = (int)(angle * 180 / PI + 0.5); | 706 if (degree % 90) { |
| 689 if (degree % 90) { | 707 continue; |
| 690 continue; | 708 } |
| 691 } | 709 if (degree < 0) { |
| 692 if (degree < 0) { | 710 degree += 360; |
| 693 degree += 360; | 711 } |
| 694 } | 712 int index = degree / 90 % 3 - 1; |
| 695 int index = degree / 90 % 3 - 1; | 713 if (index < 0) { |
| 696 if (index < 0) { | 714 continue; |
| 697 continue; | 715 } |
| 698 } | 716 rotated_count[index]++; |
| 699 rotated_count[index] ++; | 717 } |
| 700 } | 718 if (total_count == 0) { |
| 701 if (total_count == 0) { | 719 return; |
| 702 return; | 720 } |
| 703 } | 721 CFX_AffineMatrix matrix; |
| 704 CFX_AffineMatrix matrix; | 722 if (rotated_count[0] > total_count * 2 / 3) { |
| 705 if (rotated_count[0] > total_count * 2 / 3) { | 723 matrix.Set(0, -1, 1, 0, 0, page.GetPageHeight()); |
| 706 matrix.Set(0, -1, 1, 0, 0, page.GetPageHeight()); | 724 } else if (rotated_count[1] > total_count * 2 / 3) { |
| 707 } else if (rotated_count[1] > total_count * 2 / 3) { | 725 matrix.Set(-1, 0, 0, -1, page.GetPageWidth(), page.GetPageHeight()); |
| 708 matrix.Set(-1, 0, 0, -1, page.GetPageWidth(), page.GetPageHeight()); | 726 } else if (rotated_count[2] > total_count * 2 / 3) { |
| 709 } else if (rotated_count[2] > total_count * 2 / 3) { | 727 matrix.Set(0, 1, -1, 0, page.GetPageWidth(), 0); |
| 710 matrix.Set(0, 1, -1, 0, page.GetPageWidth(), 0); | 728 } else { |
| 711 } else { | 729 return; |
| 712 return; | 730 } |
| 713 } | 731 page.Transform(matrix); |
| 714 page.Transform(matrix); | 732 page_bbox.Transform(&matrix); |
| 715 page_bbox.Transform(&matrix); | 733 } |
| 716 } | 734 void PDF_GetPageText_Unicode(CFX_WideStringArray& lines, |
| 717 void PDF_GetPageText_Unicode(CFX_WideStringArray& lines, CPDF_Document* pDoc, CP
DF_Dictionary* pPage, | 735 CPDF_Document* pDoc, |
| 718 int iMinWidth, FX_DWORD flags) | 736 CPDF_Dictionary* pPage, |
| 719 { | 737 int iMinWidth, |
| 720 lines.RemoveAll(); | 738 FX_DWORD flags) { |
| 721 if (pPage == NULL) { | 739 lines.RemoveAll(); |
| 722 return; | 740 if (pPage == NULL) { |
| 723 } | 741 return; |
| 724 CPDF_Page page; | 742 } |
| 725 page.Load(pDoc, pPage); | 743 CPDF_Page page; |
| 726 CPDF_ParseOptions options; | 744 page.Load(pDoc, pPage); |
| 727 options.m_bTextOnly = TRUE; | 745 CPDF_ParseOptions options; |
| 728 options.m_bSeparateForm = FALSE; | 746 options.m_bTextOnly = TRUE; |
| 729 page.ParseContent(&options); | 747 options.m_bSeparateForm = FALSE; |
| 730 CFX_FloatRect page_bbox = page.GetPageBBox(); | 748 page.ParseContent(&options); |
| 731 if (flags & PDF2TXT_AUTO_ROTATE) { | 749 CFX_FloatRect page_bbox = page.GetPageBBox(); |
| 732 CheckRotate(page, page_bbox); | 750 if (flags & PDF2TXT_AUTO_ROTATE) { |
| 733 } | 751 CheckRotate(page, page_bbox); |
| 734 CTextPage texts; | 752 } |
| 735 texts.m_bAutoWidth = flags & PDF2TXT_AUTO_WIDTH; | 753 CTextPage texts; |
| 736 texts.m_bKeepColumn = flags & PDF2TXT_KEEP_COLUMN; | 754 texts.m_bAutoWidth = flags & PDF2TXT_AUTO_WIDTH; |
| 737 texts.m_bBreakSpace = TRUE; | 755 texts.m_bKeepColumn = flags & PDF2TXT_KEEP_COLUMN; |
| 738 FX_POSITION pos = page.GetFirstObjectPosition(); | 756 texts.m_bBreakSpace = TRUE; |
| 739 while (pos) { | 757 FX_POSITION pos = page.GetFirstObjectPosition(); |
| 740 CPDF_PageObject* pObject = page.GetNextObject(pos); | 758 while (pos) { |
| 741 if (!(flags & PDF2TXT_INCLUDE_INVISIBLE)) { | 759 CPDF_PageObject* pObject = page.GetNextObject(pos); |
| 742 CFX_FloatRect rect(pObject->m_Left, pObject->m_Bottom, pObject->m_Ri
ght, pObject->m_Top); | 760 if (!(flags & PDF2TXT_INCLUDE_INVISIBLE)) { |
| 743 if (!page_bbox.Contains(rect)) { | 761 CFX_FloatRect rect(pObject->m_Left, pObject->m_Bottom, pObject->m_Right, |
| 744 continue; | 762 pObject->m_Top); |
| 745 } | 763 if (!page_bbox.Contains(rect)) { |
| 746 } | 764 continue; |
| 747 texts.ProcessObject(pObject); | 765 } |
| 748 } | 766 } |
| 749 texts.WriteOutput(lines, iMinWidth); | 767 texts.ProcessObject(pObject); |
| 750 } | 768 } |
| 751 void PDF_GetPageText(CFX_ByteStringArray& lines, CPDF_Document* pDoc, CPDF_Dicti
onary* pPage, | 769 texts.WriteOutput(lines, iMinWidth); |
| 752 int iMinWidth, FX_DWORD flags) | 770 } |
| 753 { | 771 void PDF_GetPageText(CFX_ByteStringArray& lines, |
| 754 lines.RemoveAll(); | 772 CPDF_Document* pDoc, |
| 755 CFX_WideStringArray wlines; | 773 CPDF_Dictionary* pPage, |
| 756 PDF_GetPageText_Unicode(wlines, pDoc, pPage, iMinWidth, flags); | 774 int iMinWidth, |
| 757 for (int i = 0; i < wlines.GetSize(); i ++) { | 775 FX_DWORD flags) { |
| 758 CFX_WideString wstr = wlines[i]; | 776 lines.RemoveAll(); |
| 759 CFX_ByteString str; | 777 CFX_WideStringArray wlines; |
| 760 for (int c = 0; c < wstr.GetLength(); c ++) { | 778 PDF_GetPageText_Unicode(wlines, pDoc, pPage, iMinWidth, flags); |
| 761 str += CharFromUnicodeAlt(wstr[c], FXSYS_GetACP(), "?"); | 779 for (int i = 0; i < wlines.GetSize(); i++) { |
| 762 } | 780 CFX_WideString wstr = wlines[i]; |
| 763 lines.Add(str); | 781 CFX_ByteString str; |
| 764 } | 782 for (int c = 0; c < wstr.GetLength(); c++) { |
| 765 } | 783 str += CharFromUnicodeAlt(wstr[c], FXSYS_GetACP(), "?"); |
| 766 extern void _PDF_GetTextStream_Unicode(CFX_WideTextBuf& buffer, CPDF_PageObjects
* pPage, FX_BOOL bUseLF, | 784 } |
| 785 lines.Add(str); |
| 786 } |
| 787 } |
| 788 extern void _PDF_GetTextStream_Unicode(CFX_WideTextBuf& buffer, |
| 789 CPDF_PageObjects* pPage, |
| 790 FX_BOOL bUseLF, |
| 767 CFX_PtrArray* pObjArray); | 791 CFX_PtrArray* pObjArray); |
| 768 void PDF_GetTextStream_Unicode(CFX_WideTextBuf& buffer, CPDF_Document* pDoc, CPD
F_Dictionary* pPage, FX_DWORD flags) | 792 void PDF_GetTextStream_Unicode(CFX_WideTextBuf& buffer, |
| 769 { | 793 CPDF_Document* pDoc, |
| 770 buffer.EstimateSize(0, 10240); | 794 CPDF_Dictionary* pPage, |
| 771 CPDF_Page page; | 795 FX_DWORD flags) { |
| 772 page.Load(pDoc, pPage); | 796 buffer.EstimateSize(0, 10240); |
| 773 CPDF_ParseOptions options; | 797 CPDF_Page page; |
| 774 options.m_bTextOnly = TRUE; | 798 page.Load(pDoc, pPage); |
| 775 options.m_bSeparateForm = FALSE; | 799 CPDF_ParseOptions options; |
| 776 page.ParseContent(&options); | 800 options.m_bTextOnly = TRUE; |
| 777 _PDF_GetTextStream_Unicode(buffer, &page, TRUE, NULL); | 801 options.m_bSeparateForm = FALSE; |
| 778 } | 802 page.ParseContent(&options); |
| 803 _PDF_GetTextStream_Unicode(buffer, &page, TRUE, NULL); |
| 804 } |
| OLD | NEW |