| OLD | NEW |
| 1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
| 6 | 6 |
| 7 #include <cctype> | 7 #include <cctype> |
| 8 #include <cwctype> | 8 #include <cwctype> |
| 9 #include <memory> | 9 #include <memory> |
| 10 #include <vector> |
| 10 | 11 |
| 11 #include "core/include/fpdfapi/fpdf_page.h" | 12 #include "core/include/fpdfapi/fpdf_page.h" |
| 12 #include "core/include/fpdfapi/fpdf_pageobj.h" | 13 #include "core/include/fpdfapi/fpdf_pageobj.h" |
| 13 #include "core/include/fpdfapi/fpdf_resource.h" | 14 #include "core/include/fpdfapi/fpdf_resource.h" |
| 14 #include "core/include/fpdftext/fpdf_text.h" | 15 #include "core/include/fpdftext/fpdf_text.h" |
| 15 #include "core/include/fxcrt/fx_bidi.h" | 16 #include "core/include/fxcrt/fx_bidi.h" |
| 16 #include "core/include/fxcrt/fx_ucd.h" | 17 #include "core/include/fxcrt/fx_ucd.h" |
| 17 #include "core/src/fpdftext/text_int.h" | 18 #include "core/src/fpdftext/text_int.h" |
| 18 #include "core/src/fpdftext/txtproc.h" | 19 #include "core/src/fpdftext/txtproc.h" |
| 20 #include "third_party/base/stl_util.h" |
| 19 | 21 |
| 20 CFX_ByteString CharFromUnicodeAlt(FX_WCHAR unicode, | 22 CFX_ByteString CharFromUnicodeAlt(FX_WCHAR unicode, |
| 21 int destcp, | 23 int destcp, |
| 22 const FX_CHAR* defchar) { | 24 const FX_CHAR* defchar) { |
| 23 if (destcp == 0) { | 25 if (destcp == 0) { |
| 24 if (unicode < 0x80) { | 26 if (unicode < 0x80) { |
| 25 return CFX_ByteString((char)unicode); | 27 return CFX_ByteString((char)unicode); |
| 26 } | 28 } |
| 27 const FX_CHAR* altstr = FCS_GetAltStr(unicode); | 29 const FX_CHAR* altstr = FCS_GetAltStr(unicode); |
| 28 return CFX_ByteString(altstr ? altstr : defchar); | 30 return CFX_ByteString(altstr ? altstr : defchar); |
| (...skipping 273 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 302 sDest += wChar; | 304 sDest += wChar; |
| 303 return; | 305 return; |
| 304 } | 306 } |
| 305 pDst = new FX_WCHAR[nCount]; | 307 pDst = new FX_WCHAR[nCount]; |
| 306 FX_Unicode_GetNormalization(wChar, pDst); | 308 FX_Unicode_GetNormalization(wChar, pDst); |
| 307 for (int nIndex = 0; nIndex < nCount; nIndex++) { | 309 for (int nIndex = 0; nIndex < nCount; nIndex++) { |
| 308 sDest += pDst[nIndex]; | 310 sDest += pDst[nIndex]; |
| 309 } | 311 } |
| 310 delete[] pDst; | 312 delete[] pDst; |
| 311 } | 313 } |
| 314 |
| 312 void NormalizeString(CFX_WideString& str) { | 315 void NormalizeString(CFX_WideString& str) { |
| 313 if (str.GetLength() <= 0) { | 316 if (str.GetLength() <= 0) { |
| 314 return; | 317 return; |
| 315 } | 318 } |
| 316 CFX_WideString sBuffer; | 319 CFX_WideString sBuffer; |
| 317 std::unique_ptr<CFX_BidiChar> pBidiChar(new CFX_BidiChar); | 320 std::unique_ptr<CFX_BidiChar> pBidiChar(new CFX_BidiChar); |
| 318 CFX_WordArray order; | 321 std::vector<FX_WORD> order; |
| 319 FX_BOOL bR2L = FALSE; | 322 FX_BOOL bR2L = FALSE; |
| 320 int32_t start = 0, count = 0, i = 0; | 323 int32_t start = 0, count = 0, i = 0; |
| 321 int nR2L = 0, nL2R = 0; | 324 int nR2L = 0, nL2R = 0; |
| 322 for (i = 0; i < str.GetLength(); i++) { | 325 for (i = 0; i < str.GetLength(); i++) { |
| 323 if (pBidiChar->AppendChar(str.GetAt(i))) { | 326 if (pBidiChar->AppendChar(str.GetAt(i))) { |
| 324 CFX_BidiChar::Direction ret = pBidiChar->GetBidiInfo(&start, &count); | 327 CFX_BidiChar::Direction ret = pBidiChar->GetBidiInfo(&start, &count); |
| 325 order.Add(start); | 328 order.push_back(start); |
| 326 order.Add(count); | 329 order.push_back(count); |
| 327 order.Add(ret); | 330 order.push_back(ret); |
| 328 if (!bR2L) { | 331 if (!bR2L) { |
| 329 if (ret == CFX_BidiChar::RIGHT) { | 332 if (ret == CFX_BidiChar::RIGHT) { |
| 330 nR2L++; | 333 nR2L++; |
| 331 } else if (ret == CFX_BidiChar::LEFT) { | 334 } else if (ret == CFX_BidiChar::LEFT) { |
| 332 nL2R++; | 335 nL2R++; |
| 333 } | 336 } |
| 334 } | 337 } |
| 335 } | 338 } |
| 336 } | 339 } |
| 337 if (pBidiChar->EndChar()) { | 340 if (pBidiChar->EndChar()) { |
| 338 CFX_BidiChar::Direction ret = pBidiChar->GetBidiInfo(&start, &count); | 341 CFX_BidiChar::Direction ret = pBidiChar->GetBidiInfo(&start, &count); |
| 339 order.Add(start); | 342 order.push_back(start); |
| 340 order.Add(count); | 343 order.push_back(count); |
| 341 order.Add(ret); | 344 order.push_back(ret); |
| 342 if (!bR2L) { | 345 if (!bR2L) { |
| 343 if (ret == CFX_BidiChar::RIGHT) { | 346 if (ret == CFX_BidiChar::RIGHT) { |
| 344 nR2L++; | 347 nR2L++; |
| 345 } else if (ret == CFX_BidiChar::LEFT) { | 348 } else if (ret == CFX_BidiChar::LEFT) { |
| 346 nL2R++; | 349 nL2R++; |
| 347 } | 350 } |
| 348 } | 351 } |
| 349 } | 352 } |
| 350 if (nR2L > 0 && nR2L >= nL2R) { | 353 if (nR2L > 0 && nR2L >= nL2R) { |
| 351 bR2L = TRUE; | 354 bR2L = TRUE; |
| 352 } | 355 } |
| 353 if (bR2L) { | 356 if (bR2L) { |
| 354 int count = order.GetSize(); | 357 int count = pdfium::CollectionSize<int>(order); |
| 355 for (int j = count - 1; j > 0; j -= 3) { | 358 for (int j = count - 1; j > 0; j -= 3) { |
| 356 int ret = order.GetAt(j); | 359 int ret = order[j]; |
| 357 int start = order.GetAt(j - 2); | 360 int count1 = order[j - 1]; |
| 358 int count1 = order.GetAt(j - 1); | 361 int start = order[j - 2]; |
| 359 if (ret == 2 || ret == 0) { | 362 if (ret == 2 || ret == 0) { |
| 360 for (int i = start + count1 - 1; i >= start; i--) { | 363 for (int i = start + count1 - 1; i >= start; i--) { |
| 361 NormalizeCompositeChar(str[i], sBuffer); | 364 NormalizeCompositeChar(str[i], sBuffer); |
| 362 } | 365 } |
| 363 } else { | 366 } else { |
| 364 i = j; | 367 i = j; |
| 365 FX_BOOL bSymbol = FALSE; | 368 FX_BOOL bSymbol = FALSE; |
| 366 while (i > 0 && order.GetAt(i) != 2) { | 369 while (i > 0 && order[i] != 2) { |
| 367 bSymbol = !order.GetAt(i); | 370 bSymbol = !order[i]; |
| 368 i -= 3; | 371 i -= 3; |
| 369 } | 372 } |
| 370 int end = start + count1; | 373 int end = start + count1; |
| 371 int n = 0; | 374 int n = 0; |
| 372 if (bSymbol) { | 375 if (bSymbol) { |
| 373 n = i + 6; | 376 n = i + 6; |
| 374 } else { | 377 } else { |
| 375 n = i + 3; | 378 n = i + 3; |
| 376 } | 379 } |
| 377 if (n >= j) { | 380 if (n >= j) { |
| 378 for (int m = start; m < end; m++) { | 381 for (int m = start; m < end; m++) { |
| 379 sBuffer += str[m]; | 382 sBuffer += str[m]; |
| 380 } | 383 } |
| 381 } else { | 384 } else { |
| 382 i = j; | 385 i = j; |
| 383 j = n; | 386 j = n; |
| 384 for (; n <= i; n += 3) { | 387 for (; n <= i; n += 3) { |
| 385 int start = order.GetAt(n - 2); | 388 int start = order[n - 2]; |
| 386 int count1 = order.GetAt(n - 1); | 389 int count1 = order[n - 1]; |
| 387 int end = start + count1; | 390 int end = start + count1; |
| 388 for (int m = start; m < end; m++) { | 391 for (int m = start; m < end; m++) { |
| 389 sBuffer += str[m]; | 392 sBuffer += str[m]; |
| 390 } | 393 } |
| 391 } | 394 } |
| 392 } | 395 } |
| 393 } | 396 } |
| 394 } | 397 } |
| 395 } else { | 398 } else { |
| 396 int count = order.GetSize(); | 399 int count = pdfium::CollectionSize<int>(order); |
| 397 FX_BOOL bL2R = FALSE; | 400 FX_BOOL bL2R = FALSE; |
| 398 for (int j = 0; j < count; j += 3) { | 401 for (int j = 0; j < count; j += 3) { |
| 399 int ret = order.GetAt(j + 2); | 402 int start = order[j]; |
| 400 int start = order.GetAt(j); | 403 int count1 = order[j + 1]; |
| 401 int count1 = order.GetAt(j + 1); | 404 int ret = order[j + 2]; |
| 402 if (ret == 2 || (j == 0 && ret == 0 && !bL2R)) { | 405 if (ret == 2 || (j == 0 && ret == 0 && !bL2R)) { |
| 403 int i = j + 3; | 406 int i = j + 3; |
| 404 while (bR2L && i < count) { | 407 while (bR2L && i < count) { |
| 405 if (order.GetAt(i + 2) == 1) { | 408 if (order[i + 2] == 1) { |
| 406 break; | 409 break; |
| 407 } else { | 410 } else { |
| 408 i += 3; | 411 i += 3; |
| 409 } | 412 } |
| 410 } | 413 } |
| 411 if (i == 3) { | 414 if (i == 3) { |
| 412 j = -3; | 415 j = -3; |
| 413 bL2R = TRUE; | 416 bL2R = TRUE; |
| 414 continue; | 417 continue; |
| 415 } | 418 } |
| 416 int end = str.GetLength() - 1; | 419 int end = str.GetLength() - 1; |
| 417 if (i < count) { | 420 if (i < count) { |
| 418 end = order.GetAt(i) - 1; | 421 end = order[i] - 1; |
| 419 } | 422 } |
| 420 j = i - 3; | 423 j = i - 3; |
| 421 for (int n = end; n >= start; n--) { | 424 for (int n = end; n >= start; n--) { |
| 422 NormalizeCompositeChar(str[i], sBuffer); | 425 NormalizeCompositeChar(str[i], sBuffer); |
| 423 } | 426 } |
| 424 } else { | 427 } else { |
| 425 int end = start + count1; | 428 int end = start + count1; |
| 426 for (int i = start; i < end; i++) { | 429 for (int i = start; i < end; i++) { |
| 427 sBuffer += str[i]; | 430 sBuffer += str[i]; |
| 428 } | 431 } |
| (...skipping 351 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 780 FX_DWORD flags) { | 783 FX_DWORD flags) { |
| 781 buffer.EstimateSize(0, 10240); | 784 buffer.EstimateSize(0, 10240); |
| 782 CPDF_Page page; | 785 CPDF_Page page; |
| 783 page.Load(pDoc, pPage); | 786 page.Load(pDoc, pPage); |
| 784 CPDF_ParseOptions options; | 787 CPDF_ParseOptions options; |
| 785 options.m_bTextOnly = TRUE; | 788 options.m_bTextOnly = TRUE; |
| 786 options.m_bSeparateForm = FALSE; | 789 options.m_bSeparateForm = FALSE; |
| 787 page.ParseContent(&options); | 790 page.ParseContent(&options); |
| 788 GetTextStream_Unicode(buffer, &page, TRUE); | 791 GetTextStream_Unicode(buffer, &page, TRUE); |
| 789 } | 792 } |
| OLD | NEW |