OLD | NEW |
1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
6 | 6 |
7 #include <cctype> | 7 #include <cctype> |
8 #include <cwctype> | 8 #include <cwctype> |
9 #include <memory> | 9 #include <memory> |
| 10 #include <vector> |
10 | 11 |
11 #include "core/include/fpdfapi/fpdf_page.h" | 12 #include "core/include/fpdfapi/fpdf_page.h" |
12 #include "core/include/fpdfapi/fpdf_pageobj.h" | 13 #include "core/include/fpdfapi/fpdf_pageobj.h" |
13 #include "core/include/fpdfapi/fpdf_resource.h" | 14 #include "core/include/fpdfapi/fpdf_resource.h" |
14 #include "core/include/fpdftext/fpdf_text.h" | 15 #include "core/include/fpdftext/fpdf_text.h" |
15 #include "core/include/fxcrt/fx_bidi.h" | 16 #include "core/include/fxcrt/fx_bidi.h" |
16 #include "core/include/fxcrt/fx_ucd.h" | 17 #include "core/include/fxcrt/fx_ucd.h" |
17 #include "core/src/fpdftext/text_int.h" | 18 #include "core/src/fpdftext/text_int.h" |
18 #include "core/src/fpdftext/txtproc.h" | 19 #include "core/src/fpdftext/txtproc.h" |
| 20 #include "third_party/base/stl_util.h" |
19 | 21 |
20 CFX_ByteString CharFromUnicodeAlt(FX_WCHAR unicode, | 22 CFX_ByteString CharFromUnicodeAlt(FX_WCHAR unicode, |
21 int destcp, | 23 int destcp, |
22 const FX_CHAR* defchar) { | 24 const FX_CHAR* defchar) { |
23 if (destcp == 0) { | 25 if (destcp == 0) { |
24 if (unicode < 0x80) { | 26 if (unicode < 0x80) { |
25 return CFX_ByteString((char)unicode); | 27 return CFX_ByteString((char)unicode); |
26 } | 28 } |
27 const FX_CHAR* altstr = FCS_GetAltStr(unicode); | 29 const FX_CHAR* altstr = FCS_GetAltStr(unicode); |
28 return CFX_ByteString(altstr ? altstr : defchar); | 30 return CFX_ByteString(altstr ? altstr : defchar); |
(...skipping 273 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
302 sDest += wChar; | 304 sDest += wChar; |
303 return; | 305 return; |
304 } | 306 } |
305 pDst = new FX_WCHAR[nCount]; | 307 pDst = new FX_WCHAR[nCount]; |
306 FX_Unicode_GetNormalization(wChar, pDst); | 308 FX_Unicode_GetNormalization(wChar, pDst); |
307 for (int nIndex = 0; nIndex < nCount; nIndex++) { | 309 for (int nIndex = 0; nIndex < nCount; nIndex++) { |
308 sDest += pDst[nIndex]; | 310 sDest += pDst[nIndex]; |
309 } | 311 } |
310 delete[] pDst; | 312 delete[] pDst; |
311 } | 313 } |
| 314 |
312 void NormalizeString(CFX_WideString& str) { | 315 void NormalizeString(CFX_WideString& str) { |
313 if (str.GetLength() <= 0) { | 316 if (str.GetLength() <= 0) { |
314 return; | 317 return; |
315 } | 318 } |
316 CFX_WideString sBuffer; | 319 CFX_WideString sBuffer; |
317 std::unique_ptr<CFX_BidiChar> pBidiChar(new CFX_BidiChar); | 320 std::unique_ptr<CFX_BidiChar> pBidiChar(new CFX_BidiChar); |
318 CFX_WordArray order; | 321 std::vector<FX_WORD> order; |
319 FX_BOOL bR2L = FALSE; | 322 FX_BOOL bR2L = FALSE; |
320 int32_t start = 0, count = 0, i = 0; | 323 int32_t start = 0, count = 0, i = 0; |
321 int nR2L = 0, nL2R = 0; | 324 int nR2L = 0, nL2R = 0; |
322 for (i = 0; i < str.GetLength(); i++) { | 325 for (i = 0; i < str.GetLength(); i++) { |
323 if (pBidiChar->AppendChar(str.GetAt(i))) { | 326 if (pBidiChar->AppendChar(str.GetAt(i))) { |
324 CFX_BidiChar::Direction ret = pBidiChar->GetBidiInfo(&start, &count); | 327 CFX_BidiChar::Direction ret = pBidiChar->GetBidiInfo(&start, &count); |
325 order.Add(start); | 328 order.push_back(start); |
326 order.Add(count); | 329 order.push_back(count); |
327 order.Add(ret); | 330 order.push_back(ret); |
328 if (!bR2L) { | 331 if (!bR2L) { |
329 if (ret == CFX_BidiChar::RIGHT) { | 332 if (ret == CFX_BidiChar::RIGHT) { |
330 nR2L++; | 333 nR2L++; |
331 } else if (ret == CFX_BidiChar::LEFT) { | 334 } else if (ret == CFX_BidiChar::LEFT) { |
332 nL2R++; | 335 nL2R++; |
333 } | 336 } |
334 } | 337 } |
335 } | 338 } |
336 } | 339 } |
337 if (pBidiChar->EndChar()) { | 340 if (pBidiChar->EndChar()) { |
338 CFX_BidiChar::Direction ret = pBidiChar->GetBidiInfo(&start, &count); | 341 CFX_BidiChar::Direction ret = pBidiChar->GetBidiInfo(&start, &count); |
339 order.Add(start); | 342 order.push_back(start); |
340 order.Add(count); | 343 order.push_back(count); |
341 order.Add(ret); | 344 order.push_back(ret); |
342 if (!bR2L) { | 345 if (!bR2L) { |
343 if (ret == CFX_BidiChar::RIGHT) { | 346 if (ret == CFX_BidiChar::RIGHT) { |
344 nR2L++; | 347 nR2L++; |
345 } else if (ret == CFX_BidiChar::LEFT) { | 348 } else if (ret == CFX_BidiChar::LEFT) { |
346 nL2R++; | 349 nL2R++; |
347 } | 350 } |
348 } | 351 } |
349 } | 352 } |
350 if (nR2L > 0 && nR2L >= nL2R) { | 353 if (nR2L > 0 && nR2L >= nL2R) { |
351 bR2L = TRUE; | 354 bR2L = TRUE; |
352 } | 355 } |
353 if (bR2L) { | 356 if (bR2L) { |
354 int count = order.GetSize(); | 357 int count = pdfium::CollectionSize<int>(order); |
355 for (int j = count - 1; j > 0; j -= 3) { | 358 for (int j = count - 1; j > 0; j -= 3) { |
356 int ret = order.GetAt(j); | 359 int ret = order[j]; |
357 int start = order.GetAt(j - 2); | 360 int count1 = order[j - 1]; |
358 int count1 = order.GetAt(j - 1); | 361 int start = order[j - 2]; |
359 if (ret == 2 || ret == 0) { | 362 if (ret == 2 || ret == 0) { |
360 for (int i = start + count1 - 1; i >= start; i--) { | 363 for (int i = start + count1 - 1; i >= start; i--) { |
361 NormalizeCompositeChar(str[i], sBuffer); | 364 NormalizeCompositeChar(str[i], sBuffer); |
362 } | 365 } |
363 } else { | 366 } else { |
364 i = j; | 367 i = j; |
365 FX_BOOL bSymbol = FALSE; | 368 FX_BOOL bSymbol = FALSE; |
366 while (i > 0 && order.GetAt(i) != 2) { | 369 while (i > 0 && order[i] != 2) { |
367 bSymbol = !order.GetAt(i); | 370 bSymbol = !order[i]; |
368 i -= 3; | 371 i -= 3; |
369 } | 372 } |
370 int end = start + count1; | 373 int end = start + count1; |
371 int n = 0; | 374 int n = 0; |
372 if (bSymbol) { | 375 if (bSymbol) { |
373 n = i + 6; | 376 n = i + 6; |
374 } else { | 377 } else { |
375 n = i + 3; | 378 n = i + 3; |
376 } | 379 } |
377 if (n >= j) { | 380 if (n >= j) { |
378 for (int m = start; m < end; m++) { | 381 for (int m = start; m < end; m++) { |
379 sBuffer += str[m]; | 382 sBuffer += str[m]; |
380 } | 383 } |
381 } else { | 384 } else { |
382 i = j; | 385 i = j; |
383 j = n; | 386 j = n; |
384 for (; n <= i; n += 3) { | 387 for (; n <= i; n += 3) { |
385 int start = order.GetAt(n - 2); | 388 int start = order[n - 2]; |
386 int count1 = order.GetAt(n - 1); | 389 int count1 = order[n - 1]; |
387 int end = start + count1; | 390 int end = start + count1; |
388 for (int m = start; m < end; m++) { | 391 for (int m = start; m < end; m++) { |
389 sBuffer += str[m]; | 392 sBuffer += str[m]; |
390 } | 393 } |
391 } | 394 } |
392 } | 395 } |
393 } | 396 } |
394 } | 397 } |
395 } else { | 398 } else { |
396 int count = order.GetSize(); | 399 int count = pdfium::CollectionSize<int>(order); |
397 FX_BOOL bL2R = FALSE; | 400 FX_BOOL bL2R = FALSE; |
398 for (int j = 0; j < count; j += 3) { | 401 for (int j = 0; j < count; j += 3) { |
399 int ret = order.GetAt(j + 2); | 402 int start = order[j]; |
400 int start = order.GetAt(j); | 403 int count1 = order[j + 1]; |
401 int count1 = order.GetAt(j + 1); | 404 int ret = order[j + 2]; |
402 if (ret == 2 || (j == 0 && ret == 0 && !bL2R)) { | 405 if (ret == 2 || (j == 0 && ret == 0 && !bL2R)) { |
403 int i = j + 3; | 406 int i = j + 3; |
404 while (bR2L && i < count) { | 407 while (bR2L && i < count) { |
405 if (order.GetAt(i + 2) == 1) { | 408 if (order[i + 2] == 1) { |
406 break; | 409 break; |
407 } else { | 410 } else { |
408 i += 3; | 411 i += 3; |
409 } | 412 } |
410 } | 413 } |
411 if (i == 3) { | 414 if (i == 3) { |
412 j = -3; | 415 j = -3; |
413 bL2R = TRUE; | 416 bL2R = TRUE; |
414 continue; | 417 continue; |
415 } | 418 } |
416 int end = str.GetLength() - 1; | 419 int end = str.GetLength() - 1; |
417 if (i < count) { | 420 if (i < count) { |
418 end = order.GetAt(i) - 1; | 421 end = order[i] - 1; |
419 } | 422 } |
420 j = i - 3; | 423 j = i - 3; |
421 for (int n = end; n >= start; n--) { | 424 for (int n = end; n >= start; n--) { |
422 NormalizeCompositeChar(str[i], sBuffer); | 425 NormalizeCompositeChar(str[i], sBuffer); |
423 } | 426 } |
424 } else { | 427 } else { |
425 int end = start + count1; | 428 int end = start + count1; |
426 for (int i = start; i < end; i++) { | 429 for (int i = start; i < end; i++) { |
427 sBuffer += str[i]; | 430 sBuffer += str[i]; |
428 } | 431 } |
(...skipping 351 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
780 FX_DWORD flags) { | 783 FX_DWORD flags) { |
781 buffer.EstimateSize(0, 10240); | 784 buffer.EstimateSize(0, 10240); |
782 CPDF_Page page; | 785 CPDF_Page page; |
783 page.Load(pDoc, pPage); | 786 page.Load(pDoc, pPage); |
784 CPDF_ParseOptions options; | 787 CPDF_ParseOptions options; |
785 options.m_bTextOnly = TRUE; | 788 options.m_bTextOnly = TRUE; |
786 options.m_bSeparateForm = FALSE; | 789 options.m_bSeparateForm = FALSE; |
787 page.ParseContent(&options); | 790 page.ParseContent(&options); |
788 GetTextStream_Unicode(buffer, &page, TRUE); | 791 GetTextStream_Unicode(buffer, &page, TRUE); |
789 } | 792 } |
OLD | NEW |