| Index: core/src/fpdftext/fpdf_text.cpp
|
| diff --git a/core/src/fpdftext/fpdf_text.cpp b/core/src/fpdftext/fpdf_text.cpp
|
| index 7203a694abc5f1dc26f1a26cf4602c188e07d24b..52fcb5abd7508f57354d6b8dd5e36b4c0840f31c 100644
|
| --- a/core/src/fpdftext/fpdf_text.cpp
|
| +++ b/core/src/fpdftext/fpdf_text.cpp
|
| @@ -14,765 +14,791 @@
|
| #include "text_int.h"
|
| #include "txtproc.h"
|
|
|
| -CFX_ByteString CharFromUnicodeAlt(FX_WCHAR unicode, int destcp, const FX_CHAR* defchar)
|
| -{
|
| - if (destcp == 0) {
|
| - if (unicode < 0x80) {
|
| - return CFX_ByteString((char)unicode);
|
| - }
|
| - const FX_CHAR* altstr = FCS_GetAltStr(unicode);
|
| - if (altstr) {
|
| - return CFX_ByteString(altstr, -1);
|
| - }
|
| - return CFX_ByteString(defchar, -1);
|
| - }
|
| - char buf[10];
|
| - int iDef = 0;
|
| - int ret = FXSYS_WideCharToMultiByte(destcp, 0, (wchar_t*)&unicode, 1, buf, 10, NULL, &iDef);
|
| - if (ret && !iDef) {
|
| - return CFX_ByteString(buf, ret);
|
| +CFX_ByteString CharFromUnicodeAlt(FX_WCHAR unicode,
|
| + int destcp,
|
| + const FX_CHAR* defchar) {
|
| + if (destcp == 0) {
|
| + if (unicode < 0x80) {
|
| + return CFX_ByteString((char)unicode);
|
| }
|
| const FX_CHAR* altstr = FCS_GetAltStr(unicode);
|
| if (altstr) {
|
| - return CFX_ByteString(altstr, -1);
|
| + return CFX_ByteString(altstr, -1);
|
| }
|
| return CFX_ByteString(defchar, -1);
|
| + }
|
| + char buf[10];
|
| + int iDef = 0;
|
| + int ret = FXSYS_WideCharToMultiByte(destcp, 0, (wchar_t*)&unicode, 1, buf, 10,
|
| + NULL, &iDef);
|
| + if (ret && !iDef) {
|
| + return CFX_ByteString(buf, ret);
|
| + }
|
| + const FX_CHAR* altstr = FCS_GetAltStr(unicode);
|
| + if (altstr) {
|
| + return CFX_ByteString(altstr, -1);
|
| + }
|
| + return CFX_ByteString(defchar, -1);
|
| }
|
| -CTextPage::CTextPage()
|
| -{
|
| +CTextPage::CTextPage() {}
|
| +CTextPage::~CTextPage() {
|
| + int i;
|
| + for (i = 0; i < m_BaseLines.GetSize(); i++) {
|
| + CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i);
|
| + delete pBaseLine;
|
| + }
|
| + for (i = 0; i < m_TextColumns.GetSize(); i++) {
|
| + CTextColumn* pTextColumn = (CTextColumn*)m_TextColumns.GetAt(i);
|
| + delete pTextColumn;
|
| + }
|
| }
|
| -CTextPage::~CTextPage()
|
| -{
|
| - int i;
|
| - for (i = 0; i < m_BaseLines.GetSize(); i ++) {
|
| - CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i);
|
| - delete pBaseLine;
|
| - }
|
| - for (i = 0; i < m_TextColumns.GetSize(); i ++) {
|
| - CTextColumn* pTextColumn = (CTextColumn*)m_TextColumns.GetAt(i);
|
| - delete pTextColumn;
|
| - }
|
| -}
|
| -void CTextPage::ProcessObject(CPDF_PageObject* pObject)
|
| -{
|
| - if (pObject->m_Type != PDFPAGE_TEXT) {
|
| - return;
|
| - }
|
| - CPDF_TextObject* pText = (CPDF_TextObject*)pObject;
|
| - CPDF_Font* pFont = pText->m_TextState.GetFont();
|
| - int count = pText->CountItems();
|
| - FX_FLOAT* pPosArray = FX_Alloc2D(FX_FLOAT, count, 2);
|
| - pText->CalcCharPos(pPosArray);
|
| +void CTextPage::ProcessObject(CPDF_PageObject* pObject) {
|
| + if (pObject->m_Type != PDFPAGE_TEXT) {
|
| + return;
|
| + }
|
| + CPDF_TextObject* pText = (CPDF_TextObject*)pObject;
|
| + CPDF_Font* pFont = pText->m_TextState.GetFont();
|
| + int count = pText->CountItems();
|
| + FX_FLOAT* pPosArray = FX_Alloc2D(FX_FLOAT, count, 2);
|
| + pText->CalcCharPos(pPosArray);
|
|
|
| - FX_FLOAT fontsize_h = pText->m_TextState.GetFontSizeH();
|
| - FX_FLOAT fontsize_v = pText->m_TextState.GetFontSizeV();
|
| - FX_DWORD space_charcode = pFont->CharCodeFromUnicode(' ');
|
| - FX_FLOAT spacew = 0;
|
| - if (space_charcode != -1) {
|
| - spacew = fontsize_h * pFont->GetCharWidthF(space_charcode) / 1000;
|
| - }
|
| - if (spacew == 0) {
|
| - spacew = fontsize_h / 4;
|
| - }
|
| - if (pText->m_TextState.GetBaselineAngle() != 0) {
|
| - int cc = 0;
|
| - CFX_AffineMatrix matrix;
|
| - pText->GetTextMatrix(&matrix);
|
| - for (int i = 0; i < pText->m_nChars; i ++) {
|
| - FX_DWORD charcode = pText->m_nChars == 1 ? (FX_DWORD)(uintptr_t)pText->m_pCharCodes : pText->m_pCharCodes[i];
|
| - if (charcode == (FX_DWORD) - 1) {
|
| - continue;
|
| - }
|
| - FX_RECT char_box;
|
| - pFont->GetCharBBox(charcode, char_box);
|
| - FX_FLOAT char_left = pPosArray ? pPosArray[cc * 2] : char_box.left * pText->m_TextState.GetFontSize() / 1000;
|
| - FX_FLOAT char_right = pPosArray ? pPosArray[cc * 2 + 1] : char_box.right * pText->m_TextState.GetFontSize() / 1000;
|
| - FX_FLOAT char_top = char_box.top * pText->m_TextState.GetFontSize() / 1000;
|
| - FX_FLOAT char_bottom = char_box.bottom * pText->m_TextState.GetFontSize() / 1000;
|
| - cc ++;
|
| - FX_FLOAT char_origx, char_origy;
|
| - matrix.Transform(char_left, 0, char_origx, char_origy);
|
| - matrix.TransformRect(char_left, char_right, char_top, char_bottom);
|
| - CFX_ByteString str;
|
| - pFont->AppendChar(str, charcode);
|
| - InsertTextBox(NULL, char_origy, char_left, char_right, char_top,
|
| - char_bottom, spacew, fontsize_v, str, pFont);
|
| - }
|
| - if (pPosArray) {
|
| - FX_Free(pPosArray);
|
| - }
|
| - return;
|
| - }
|
| - FX_FLOAT ratio_h = fontsize_h / pText->m_TextState.GetFontSize();
|
| - for (int ii = 0; ii < count * 2; ii ++) {
|
| - pPosArray[ii] *= ratio_h;
|
| - }
|
| - FX_FLOAT baseline = pText->m_PosY;
|
| - CTextBaseLine* pBaseLine = NULL;
|
| - FX_FLOAT topy = pText->m_Top;
|
| - FX_FLOAT bottomy = pText->m_Bottom;
|
| - FX_FLOAT leftx = pText->m_Left;
|
| + FX_FLOAT fontsize_h = pText->m_TextState.GetFontSizeH();
|
| + FX_FLOAT fontsize_v = pText->m_TextState.GetFontSizeV();
|
| + FX_DWORD space_charcode = pFont->CharCodeFromUnicode(' ');
|
| + FX_FLOAT spacew = 0;
|
| + if (space_charcode != -1) {
|
| + spacew = fontsize_h * pFont->GetCharWidthF(space_charcode) / 1000;
|
| + }
|
| + if (spacew == 0) {
|
| + spacew = fontsize_h / 4;
|
| + }
|
| + if (pText->m_TextState.GetBaselineAngle() != 0) {
|
| int cc = 0;
|
| - CFX_ByteString segment;
|
| - int space_count = 0;
|
| - FX_FLOAT last_left = 0, last_right = 0, segment_left = 0, segment_right = 0;
|
| - for (int i = 0; i < pText->m_nChars; i ++) {
|
| - FX_DWORD charcode = pText->m_nChars == 1 ? (FX_DWORD)(uintptr_t)pText->m_pCharCodes : pText->m_pCharCodes[i];
|
| - if (charcode == (FX_DWORD) - 1) {
|
| - continue;
|
| - }
|
| - FX_FLOAT char_left = pPosArray[cc * 2];
|
| - FX_FLOAT char_right = pPosArray[cc * 2 + 1];
|
| - cc ++;
|
| - if (char_left < last_left || (char_left - last_right) > spacew / 2) {
|
| - pBaseLine = InsertTextBox(pBaseLine, baseline, leftx + segment_left, leftx + segment_right,
|
| - topy, bottomy, spacew, fontsize_v, segment, pFont);
|
| - segment_left = char_left;
|
| - segment = "";
|
| - }
|
| - if (space_count > 1) {
|
| - pBaseLine = InsertTextBox(pBaseLine, baseline, leftx + segment_left, leftx + segment_right,
|
| - topy, bottomy, spacew, fontsize_v, segment, pFont);
|
| - segment = "";
|
| - } else if (space_count == 1) {
|
| - pFont->AppendChar(segment, ' ');
|
| - }
|
| - if (segment.GetLength() == 0) {
|
| - segment_left = char_left;
|
| - }
|
| - segment_right = char_right;
|
| - pFont->AppendChar(segment, charcode);
|
| - space_count = 0;
|
| - last_left = char_left;
|
| - last_right = char_right;
|
| - }
|
| - if (segment.GetLength())
|
| - pBaseLine = InsertTextBox(pBaseLine, baseline, leftx + segment_left, leftx + segment_right,
|
| - topy, bottomy, spacew, fontsize_v, segment, pFont);
|
| - FX_Free(pPosArray);
|
| + CFX_AffineMatrix matrix;
|
| + pText->GetTextMatrix(&matrix);
|
| + for (int i = 0; i < pText->m_nChars; i++) {
|
| + FX_DWORD charcode = pText->m_nChars == 1
|
| + ? (FX_DWORD)(uintptr_t)pText->m_pCharCodes
|
| + : pText->m_pCharCodes[i];
|
| + if (charcode == (FX_DWORD)-1) {
|
| + continue;
|
| + }
|
| + FX_RECT char_box;
|
| + pFont->GetCharBBox(charcode, char_box);
|
| + FX_FLOAT char_left =
|
| + pPosArray ? pPosArray[cc * 2]
|
| + : char_box.left * pText->m_TextState.GetFontSize() / 1000;
|
| + FX_FLOAT char_right =
|
| + pPosArray ? pPosArray[cc * 2 + 1]
|
| + : char_box.right * pText->m_TextState.GetFontSize() / 1000;
|
| + FX_FLOAT char_top =
|
| + char_box.top * pText->m_TextState.GetFontSize() / 1000;
|
| + FX_FLOAT char_bottom =
|
| + char_box.bottom * pText->m_TextState.GetFontSize() / 1000;
|
| + cc++;
|
| + FX_FLOAT char_origx, char_origy;
|
| + matrix.Transform(char_left, 0, char_origx, char_origy);
|
| + matrix.TransformRect(char_left, char_right, char_top, char_bottom);
|
| + CFX_ByteString str;
|
| + pFont->AppendChar(str, charcode);
|
| + InsertTextBox(NULL, char_origy, char_left, char_right, char_top,
|
| + char_bottom, spacew, fontsize_v, str, pFont);
|
| + }
|
| + if (pPosArray) {
|
| + FX_Free(pPosArray);
|
| + }
|
| + return;
|
| + }
|
| + FX_FLOAT ratio_h = fontsize_h / pText->m_TextState.GetFontSize();
|
| + for (int ii = 0; ii < count * 2; ii++) {
|
| + pPosArray[ii] *= ratio_h;
|
| + }
|
| + FX_FLOAT baseline = pText->m_PosY;
|
| + CTextBaseLine* pBaseLine = NULL;
|
| + FX_FLOAT topy = pText->m_Top;
|
| + FX_FLOAT bottomy = pText->m_Bottom;
|
| + FX_FLOAT leftx = pText->m_Left;
|
| + int cc = 0;
|
| + CFX_ByteString segment;
|
| + int space_count = 0;
|
| + FX_FLOAT last_left = 0, last_right = 0, segment_left = 0, segment_right = 0;
|
| + for (int i = 0; i < pText->m_nChars; i++) {
|
| + FX_DWORD charcode = pText->m_nChars == 1
|
| + ? (FX_DWORD)(uintptr_t)pText->m_pCharCodes
|
| + : pText->m_pCharCodes[i];
|
| + if (charcode == (FX_DWORD)-1) {
|
| + continue;
|
| + }
|
| + FX_FLOAT char_left = pPosArray[cc * 2];
|
| + FX_FLOAT char_right = pPosArray[cc * 2 + 1];
|
| + cc++;
|
| + if (char_left < last_left || (char_left - last_right) > spacew / 2) {
|
| + pBaseLine = InsertTextBox(pBaseLine, baseline, leftx + segment_left,
|
| + leftx + segment_right, topy, bottomy, spacew,
|
| + fontsize_v, segment, pFont);
|
| + segment_left = char_left;
|
| + segment = "";
|
| + }
|
| + if (space_count > 1) {
|
| + pBaseLine = InsertTextBox(pBaseLine, baseline, leftx + segment_left,
|
| + leftx + segment_right, topy, bottomy, spacew,
|
| + fontsize_v, segment, pFont);
|
| + segment = "";
|
| + } else if (space_count == 1) {
|
| + pFont->AppendChar(segment, ' ');
|
| + }
|
| + if (segment.GetLength() == 0) {
|
| + segment_left = char_left;
|
| + }
|
| + segment_right = char_right;
|
| + pFont->AppendChar(segment, charcode);
|
| + space_count = 0;
|
| + last_left = char_left;
|
| + last_right = char_right;
|
| + }
|
| + if (segment.GetLength())
|
| + pBaseLine = InsertTextBox(pBaseLine, baseline, leftx + segment_left,
|
| + leftx + segment_right, topy, bottomy, spacew,
|
| + fontsize_v, segment, pFont);
|
| + FX_Free(pPosArray);
|
| }
|
| -CTextBaseLine* CTextPage::InsertTextBox(CTextBaseLine* pBaseLine, FX_FLOAT basey, FX_FLOAT leftx,
|
| - FX_FLOAT rightx, FX_FLOAT topy, FX_FLOAT bottomy, FX_FLOAT spacew, FX_FLOAT fontsize_v,
|
| - CFX_ByteString& str, CPDF_Font* pFont)
|
| -{
|
| - if (str.GetLength() == 0) {
|
| - return NULL;
|
| +CTextBaseLine* CTextPage::InsertTextBox(CTextBaseLine* pBaseLine,
|
| + FX_FLOAT basey,
|
| + FX_FLOAT leftx,
|
| + FX_FLOAT rightx,
|
| + FX_FLOAT topy,
|
| + FX_FLOAT bottomy,
|
| + FX_FLOAT spacew,
|
| + FX_FLOAT fontsize_v,
|
| + CFX_ByteString& str,
|
| + CPDF_Font* pFont) {
|
| + if (str.GetLength() == 0) {
|
| + return NULL;
|
| + }
|
| + if (pBaseLine == NULL) {
|
| + int i;
|
| + for (i = 0; i < m_BaseLines.GetSize(); i++) {
|
| + CTextBaseLine* pExistLine = (CTextBaseLine*)m_BaseLines.GetAt(i);
|
| + if (pExistLine->m_BaseLine == basey) {
|
| + pBaseLine = pExistLine;
|
| + break;
|
| + }
|
| + if (pExistLine->m_BaseLine < basey) {
|
| + break;
|
| + }
|
| }
|
| if (pBaseLine == NULL) {
|
| - int i;
|
| - for (i = 0; i < m_BaseLines.GetSize(); i ++) {
|
| - CTextBaseLine* pExistLine = (CTextBaseLine*)m_BaseLines.GetAt(i);
|
| - if (pExistLine->m_BaseLine == basey) {
|
| - pBaseLine = pExistLine;
|
| - break;
|
| - }
|
| - if (pExistLine->m_BaseLine < basey) {
|
| - break;
|
| - }
|
| - }
|
| - if (pBaseLine == NULL) {
|
| - pBaseLine = new CTextBaseLine;
|
| - pBaseLine->m_BaseLine = basey;
|
| - m_BaseLines.InsertAt(i, pBaseLine);
|
| - }
|
| - }
|
| - CFX_WideString text;
|
| - const FX_CHAR* pStr = str;
|
| - int len = str.GetLength(), offset = 0;
|
| - while (offset < len) {
|
| - FX_DWORD ch = pFont->GetNextChar(pStr, len, offset);
|
| - CFX_WideString unicode_str = pFont->UnicodeFromCharCode(ch);
|
| - if (unicode_str.IsEmpty()) {
|
| - text += (FX_WCHAR)ch;
|
| - }
|
| - else {
|
| - text += unicode_str;
|
| - }
|
| + pBaseLine = new CTextBaseLine;
|
| + pBaseLine->m_BaseLine = basey;
|
| + m_BaseLines.InsertAt(i, pBaseLine);
|
| + }
|
| + }
|
| + CFX_WideString text;
|
| + const FX_CHAR* pStr = str;
|
| + int len = str.GetLength(), offset = 0;
|
| + while (offset < len) {
|
| + FX_DWORD ch = pFont->GetNextChar(pStr, len, offset);
|
| + CFX_WideString unicode_str = pFont->UnicodeFromCharCode(ch);
|
| + if (unicode_str.IsEmpty()) {
|
| + text += (FX_WCHAR)ch;
|
| + } else {
|
| + text += unicode_str;
|
| }
|
| - pBaseLine->InsertTextBox(leftx, rightx, topy, bottomy, spacew, fontsize_v, text);
|
| - return pBaseLine;
|
| + }
|
| + pBaseLine->InsertTextBox(leftx, rightx, topy, bottomy, spacew, fontsize_v,
|
| + text);
|
| + return pBaseLine;
|
| }
|
| -void CTextPage::WriteOutput(CFX_WideStringArray& lines, int iMinWidth)
|
| -{
|
| - FX_FLOAT lastheight = -1;
|
| - FX_FLOAT lastbaseline = -1;
|
| - FX_FLOAT MinLeftX = 1000000;
|
| - FX_FLOAT MaxRightX = 0;
|
| - int i;
|
| - for (i = 0; i < m_BaseLines.GetSize(); i ++) {
|
| - CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i);
|
| - FX_FLOAT leftx, rightx;
|
| - if (pBaseLine->GetWidth(leftx, rightx)) {
|
| - if (leftx < MinLeftX) {
|
| - MinLeftX = leftx;
|
| - }
|
| - if (rightx > MaxRightX) {
|
| - MaxRightX = rightx;
|
| - }
|
| - }
|
| - }
|
| - for (i = 0; i < m_BaseLines.GetSize(); i ++) {
|
| - CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i);
|
| - pBaseLine->MergeBoxes();
|
| - }
|
| - for (i = 1; i < m_BaseLines.GetSize(); i ++) {
|
| - CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i);
|
| - CTextBaseLine* pPrevLine = (CTextBaseLine*)m_BaseLines.GetAt(i - 1);
|
| - if (pBaseLine->CanMerge(pPrevLine)) {
|
| - pPrevLine->Merge(pBaseLine);
|
| - delete pBaseLine;
|
| - m_BaseLines.RemoveAt(i);
|
| - i --;
|
| - }
|
| - }
|
| - if (m_bAutoWidth) {
|
| - int* widths = FX_Alloc(int, m_BaseLines.GetSize());
|
| - for (i = 0; i < m_BaseLines.GetSize(); i ++) {
|
| - widths[i] = 0;
|
| - CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i);
|
| - int TotalChars = 0;
|
| - FX_FLOAT TotalWidth = 0;
|
| - int minchars;
|
| - pBaseLine->CountChars(TotalChars, TotalWidth, minchars);
|
| - if (TotalChars) {
|
| - FX_FLOAT charwidth = TotalWidth / TotalChars;
|
| - widths[i] = (int)((MaxRightX - MinLeftX) / charwidth);
|
| - }
|
| - if (widths[i] > 1000) {
|
| - widths[i] = 1000;
|
| - }
|
| - if (widths[i] < minchars) {
|
| - widths[i] = minchars;
|
| - }
|
| - }
|
| - int AvgWidth = 0, widthcount = 0;
|
| - for (i = 0; i < m_BaseLines.GetSize(); i ++)
|
| - if (widths[i]) {
|
| - AvgWidth += widths[i];
|
| - widthcount ++;
|
| - }
|
| - AvgWidth = int((FX_FLOAT)AvgWidth / widthcount + 0.5);
|
| - int MaxWidth = 0;
|
| - for (i = 0; i < m_BaseLines.GetSize(); i ++)
|
| - if (MaxWidth < widths[i]) {
|
| - MaxWidth = widths[i];
|
| - }
|
| - if (MaxWidth > AvgWidth * 6 / 5) {
|
| - MaxWidth = AvgWidth * 6 / 5;
|
| - }
|
| - FX_Free(widths);
|
| - if (iMinWidth < MaxWidth) {
|
| - iMinWidth = MaxWidth;
|
| - }
|
| - }
|
| - for (i = 0; i < m_BaseLines.GetSize(); i ++) {
|
| - CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i);
|
| - pBaseLine->MergeBoxes();
|
| - }
|
| - if (m_bKeepColumn) {
|
| - FindColumns();
|
| - }
|
| - for (i = 0; i < m_BaseLines.GetSize(); i ++) {
|
| - CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i);
|
| - if (lastheight >= 0) {
|
| - FX_FLOAT dy = lastbaseline - pBaseLine->m_BaseLine;
|
| - if (dy >= (pBaseLine->m_MaxFontSizeV) * 1.5 || dy >= lastheight * 1.5) {
|
| - lines.Add(L"");
|
| - }
|
| - }
|
| - lastheight = pBaseLine->m_MaxFontSizeV;
|
| - lastbaseline = pBaseLine->m_BaseLine;
|
| - CFX_WideString str;
|
| - pBaseLine->WriteOutput(str, MinLeftX, MaxRightX - MinLeftX, iMinWidth);
|
| - lines.Add(str);
|
| - }
|
| +void CTextPage::WriteOutput(CFX_WideStringArray& lines, int iMinWidth) {
|
| + FX_FLOAT lastheight = -1;
|
| + FX_FLOAT lastbaseline = -1;
|
| + FX_FLOAT MinLeftX = 1000000;
|
| + FX_FLOAT MaxRightX = 0;
|
| + int i;
|
| + for (i = 0; i < m_BaseLines.GetSize(); i++) {
|
| + CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i);
|
| + FX_FLOAT leftx, rightx;
|
| + if (pBaseLine->GetWidth(leftx, rightx)) {
|
| + if (leftx < MinLeftX) {
|
| + MinLeftX = leftx;
|
| + }
|
| + if (rightx > MaxRightX) {
|
| + MaxRightX = rightx;
|
| + }
|
| + }
|
| + }
|
| + for (i = 0; i < m_BaseLines.GetSize(); i++) {
|
| + CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i);
|
| + pBaseLine->MergeBoxes();
|
| + }
|
| + for (i = 1; i < m_BaseLines.GetSize(); i++) {
|
| + CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i);
|
| + CTextBaseLine* pPrevLine = (CTextBaseLine*)m_BaseLines.GetAt(i - 1);
|
| + if (pBaseLine->CanMerge(pPrevLine)) {
|
| + pPrevLine->Merge(pBaseLine);
|
| + delete pBaseLine;
|
| + m_BaseLines.RemoveAt(i);
|
| + i--;
|
| + }
|
| + }
|
| + if (m_bAutoWidth) {
|
| + int* widths = FX_Alloc(int, m_BaseLines.GetSize());
|
| + for (i = 0; i < m_BaseLines.GetSize(); i++) {
|
| + widths[i] = 0;
|
| + CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i);
|
| + int TotalChars = 0;
|
| + FX_FLOAT TotalWidth = 0;
|
| + int minchars;
|
| + pBaseLine->CountChars(TotalChars, TotalWidth, minchars);
|
| + if (TotalChars) {
|
| + FX_FLOAT charwidth = TotalWidth / TotalChars;
|
| + widths[i] = (int)((MaxRightX - MinLeftX) / charwidth);
|
| + }
|
| + if (widths[i] > 1000) {
|
| + widths[i] = 1000;
|
| + }
|
| + if (widths[i] < minchars) {
|
| + widths[i] = minchars;
|
| + }
|
| + }
|
| + int AvgWidth = 0, widthcount = 0;
|
| + for (i = 0; i < m_BaseLines.GetSize(); i++)
|
| + if (widths[i]) {
|
| + AvgWidth += widths[i];
|
| + widthcount++;
|
| + }
|
| + AvgWidth = int((FX_FLOAT)AvgWidth / widthcount + 0.5);
|
| + int MaxWidth = 0;
|
| + for (i = 0; i < m_BaseLines.GetSize(); i++)
|
| + if (MaxWidth < widths[i]) {
|
| + MaxWidth = widths[i];
|
| + }
|
| + if (MaxWidth > AvgWidth * 6 / 5) {
|
| + MaxWidth = AvgWidth * 6 / 5;
|
| + }
|
| + FX_Free(widths);
|
| + if (iMinWidth < MaxWidth) {
|
| + iMinWidth = MaxWidth;
|
| + }
|
| + }
|
| + for (i = 0; i < m_BaseLines.GetSize(); i++) {
|
| + CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i);
|
| + pBaseLine->MergeBoxes();
|
| + }
|
| + if (m_bKeepColumn) {
|
| + FindColumns();
|
| + }
|
| + for (i = 0; i < m_BaseLines.GetSize(); i++) {
|
| + CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i);
|
| + if (lastheight >= 0) {
|
| + FX_FLOAT dy = lastbaseline - pBaseLine->m_BaseLine;
|
| + if (dy >= (pBaseLine->m_MaxFontSizeV) * 1.5 || dy >= lastheight * 1.5) {
|
| + lines.Add(L"");
|
| + }
|
| + }
|
| + lastheight = pBaseLine->m_MaxFontSizeV;
|
| + lastbaseline = pBaseLine->m_BaseLine;
|
| + CFX_WideString str;
|
| + pBaseLine->WriteOutput(str, MinLeftX, MaxRightX - MinLeftX, iMinWidth);
|
| + lines.Add(str);
|
| + }
|
| }
|
| -void NormalizeCompositeChar(FX_WCHAR wChar, CFX_WideString& sDest)
|
| -{
|
| - wChar = FX_GetMirrorChar(wChar, TRUE, FALSE);
|
| - FX_WCHAR* pDst = NULL;
|
| - FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst);
|
| - if (nCount < 1 ) {
|
| - sDest += wChar;
|
| - return;
|
| - }
|
| - pDst = new FX_WCHAR[nCount];
|
| - FX_Unicode_GetNormalization(wChar, pDst);
|
| - for (int nIndex = 0; nIndex < nCount; nIndex++) {
|
| - sDest += pDst[nIndex];
|
| - }
|
| - delete[] pDst;
|
| +void NormalizeCompositeChar(FX_WCHAR wChar, CFX_WideString& sDest) {
|
| + wChar = FX_GetMirrorChar(wChar, TRUE, FALSE);
|
| + FX_WCHAR* pDst = NULL;
|
| + FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst);
|
| + if (nCount < 1) {
|
| + sDest += wChar;
|
| + return;
|
| + }
|
| + pDst = new FX_WCHAR[nCount];
|
| + FX_Unicode_GetNormalization(wChar, pDst);
|
| + for (int nIndex = 0; nIndex < nCount; nIndex++) {
|
| + sDest += pDst[nIndex];
|
| + }
|
| + delete[] pDst;
|
| }
|
| -void NormalizeString(CFX_WideString& str)
|
| -{
|
| - if (str.GetLength() <= 0) {
|
| - return;
|
| - }
|
| - CFX_WideString sBuffer;
|
| - nonstd::unique_ptr<IFX_BidiChar> pBidiChar(IFX_BidiChar::Create());
|
| - CFX_WordArray order;
|
| - FX_BOOL bR2L = FALSE;
|
| - int32_t start = 0, count = 0, i = 0;
|
| - int nR2L = 0, nL2R = 0;
|
| - for (i = 0; i < str.GetLength(); i++) {
|
| - if(pBidiChar->AppendChar(str.GetAt(i))) {
|
| - int32_t ret = pBidiChar->GetBidiInfo(start, count);
|
| - order.Add(start);
|
| - order.Add(count);
|
| - order.Add(ret);
|
| - if(!bR2L) {
|
| - if(ret == 2) {
|
| - nR2L++;
|
| - } else if (ret == 1) {
|
| - nL2R++;
|
| - }
|
| - }
|
| - }
|
| - }
|
| - if(pBidiChar->EndChar()) {
|
| - int32_t ret = pBidiChar->GetBidiInfo(start, count);
|
| - order.Add(start);
|
| - order.Add(count);
|
| - order.Add(ret);
|
| - if(!bR2L) {
|
| - if(ret == 2) {
|
| - nR2L++;
|
| - } else if(ret == 1) {
|
| - nL2R++;
|
| - }
|
| - }
|
| - }
|
| - if(nR2L > 0 && nR2L >= nL2R) {
|
| - bR2L = TRUE;
|
| - }
|
| - if(bR2L) {
|
| - int count = order.GetSize();
|
| - for(int j = count - 1; j > 0; j -= 3) {
|
| - int ret = order.GetAt(j);
|
| - int start = order.GetAt(j - 2);
|
| - int count1 = order.GetAt(j - 1);
|
| - if(ret == 2 || ret == 0) {
|
| - for(int i = start + count1 - 1; i >= start; i--) {
|
| - NormalizeCompositeChar(str[i], sBuffer);
|
| - }
|
| - } else {
|
| - i = j;
|
| - FX_BOOL bSymbol = FALSE;
|
| - while(i > 0 && order.GetAt(i) != 2) {
|
| - bSymbol = !order.GetAt(i);
|
| - i -= 3;
|
| - }
|
| - int end = start + count1 ;
|
| - int n = 0;
|
| - if(bSymbol) {
|
| - n = i + 6;
|
| - } else {
|
| - n = i + 3;
|
| - }
|
| - if(n >= j) {
|
| - for(int m = start; m < end; m++) {
|
| - sBuffer += str[m];
|
| - }
|
| - } else {
|
| - i = j;
|
| - j = n;
|
| - for(; n <= i; n += 3) {
|
| - int start = order.GetAt(n - 2);
|
| - int count1 = order.GetAt(n - 1);
|
| - int end = start + count1 ;
|
| - for(int m = start; m < end; m++) {
|
| - sBuffer += str[m];
|
| - }
|
| - }
|
| - }
|
| - }
|
| +void NormalizeString(CFX_WideString& str) {
|
| + if (str.GetLength() <= 0) {
|
| + return;
|
| + }
|
| + CFX_WideString sBuffer;
|
| + nonstd::unique_ptr<IFX_BidiChar> pBidiChar(IFX_BidiChar::Create());
|
| + CFX_WordArray order;
|
| + FX_BOOL bR2L = FALSE;
|
| + int32_t start = 0, count = 0, i = 0;
|
| + int nR2L = 0, nL2R = 0;
|
| + for (i = 0; i < str.GetLength(); i++) {
|
| + if (pBidiChar->AppendChar(str.GetAt(i))) {
|
| + int32_t ret = pBidiChar->GetBidiInfo(start, count);
|
| + order.Add(start);
|
| + order.Add(count);
|
| + order.Add(ret);
|
| + if (!bR2L) {
|
| + if (ret == 2) {
|
| + nR2L++;
|
| + } else if (ret == 1) {
|
| + nL2R++;
|
| + }
|
| + }
|
| + }
|
| + }
|
| + if (pBidiChar->EndChar()) {
|
| + int32_t ret = pBidiChar->GetBidiInfo(start, count);
|
| + order.Add(start);
|
| + order.Add(count);
|
| + order.Add(ret);
|
| + if (!bR2L) {
|
| + if (ret == 2) {
|
| + nR2L++;
|
| + } else if (ret == 1) {
|
| + nL2R++;
|
| + }
|
| + }
|
| + }
|
| + if (nR2L > 0 && nR2L >= nL2R) {
|
| + bR2L = TRUE;
|
| + }
|
| + if (bR2L) {
|
| + int count = order.GetSize();
|
| + for (int j = count - 1; j > 0; j -= 3) {
|
| + int ret = order.GetAt(j);
|
| + int start = order.GetAt(j - 2);
|
| + int count1 = order.GetAt(j - 1);
|
| + if (ret == 2 || ret == 0) {
|
| + for (int i = start + count1 - 1; i >= start; i--) {
|
| + NormalizeCompositeChar(str[i], sBuffer);
|
| + }
|
| + } else {
|
| + i = j;
|
| + FX_BOOL bSymbol = FALSE;
|
| + while (i > 0 && order.GetAt(i) != 2) {
|
| + bSymbol = !order.GetAt(i);
|
| + i -= 3;
|
| + }
|
| + int end = start + count1;
|
| + int n = 0;
|
| + if (bSymbol) {
|
| + n = i + 6;
|
| + } else {
|
| + n = i + 3;
|
| }
|
| - } else {
|
| - int count = order.GetSize();
|
| - FX_BOOL bL2R = FALSE;
|
| - for(int j = 0; j < count; j += 3) {
|
| - int ret = order.GetAt(j + 2);
|
| - int start = order.GetAt(j);
|
| - int count1 = order.GetAt(j + 1);
|
| - if(ret == 2 || (j == 0 && ret == 0 && !bL2R)) {
|
| - int i = j + 3;
|
| - while(bR2L && i < count) {
|
| - if(order.GetAt(i + 2) == 1) {
|
| - break;
|
| - } else {
|
| - i += 3;
|
| - }
|
| - }
|
| - if(i == 3) {
|
| - j = -3;
|
| - bL2R = TRUE;
|
| - continue;
|
| - }
|
| - int end = str.GetLength() - 1;
|
| - if(i < count) {
|
| - end = order.GetAt(i) - 1;
|
| - }
|
| - j = i - 3;
|
| - for(int n = end; n >= start; n--) {
|
| - NormalizeCompositeChar(str[i], sBuffer);
|
| - }
|
| - } else {
|
| - int end = start + count1 ;
|
| - for(int i = start; i < end; i++) {
|
| - sBuffer += str[i];
|
| - }
|
| + if (n >= j) {
|
| + for (int m = start; m < end; m++) {
|
| + sBuffer += str[m];
|
| + }
|
| + } else {
|
| + i = j;
|
| + j = n;
|
| + for (; n <= i; n += 3) {
|
| + int start = order.GetAt(n - 2);
|
| + int count1 = order.GetAt(n - 1);
|
| + int end = start + count1;
|
| + for (int m = start; m < end; m++) {
|
| + sBuffer += str[m];
|
| }
|
| + }
|
| + }
|
| + }
|
| + }
|
| + } else {
|
| + int count = order.GetSize();
|
| + FX_BOOL bL2R = FALSE;
|
| + for (int j = 0; j < count; j += 3) {
|
| + int ret = order.GetAt(j + 2);
|
| + int start = order.GetAt(j);
|
| + int count1 = order.GetAt(j + 1);
|
| + if (ret == 2 || (j == 0 && ret == 0 && !bL2R)) {
|
| + int i = j + 3;
|
| + while (bR2L && i < count) {
|
| + if (order.GetAt(i + 2) == 1) {
|
| + break;
|
| + } else {
|
| + i += 3;
|
| + }
|
| }
|
| - }
|
| - str.Empty();
|
| - str += sBuffer;
|
| -}
|
| -static FX_BOOL IsNumber(CFX_WideString& str)
|
| -{
|
| - for (int i = 0; i < str.GetLength(); i ++) {
|
| - FX_WCHAR ch = str[i];
|
| - if ((ch < '0' || ch > '9') && ch != '-' && ch != '+' && ch != '.' && ch != ' ') {
|
| - return FALSE;
|
| + if (i == 3) {
|
| + j = -3;
|
| + bL2R = TRUE;
|
| + continue;
|
| }
|
| - }
|
| - return TRUE;
|
| -}
|
| -void CTextPage::FindColumns()
|
| -{
|
| - int i;
|
| - for (i = 0; i < m_BaseLines.GetSize(); i ++) {
|
| - CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i);
|
| - for (int j = 0; j < pBaseLine->m_TextList.GetSize(); j ++) {
|
| - CTextBox* pTextBox = (CTextBox*)pBaseLine->m_TextList.GetAt(j);
|
| - CTextColumn* pColumn = FindColumn(pTextBox->m_Right);
|
| - if (pColumn == NULL) {
|
| - pColumn = new CTextColumn;
|
| - pColumn->m_Count = 1;
|
| - pColumn->m_AvgPos = pTextBox->m_Right;
|
| - pColumn->m_TextPos = -1;
|
| - m_TextColumns.Add(pColumn);
|
| - } else {
|
| - pColumn->m_AvgPos = (pColumn->m_Count * pColumn->m_AvgPos + pTextBox->m_Right) /
|
| - (pColumn->m_Count + 1);
|
| - pColumn->m_Count ++;
|
| - }
|
| + int end = str.GetLength() - 1;
|
| + if (i < count) {
|
| + end = order.GetAt(i) - 1;
|
| }
|
| - }
|
| - int mincount = m_BaseLines.GetSize() / 4;
|
| - for (i = 0; i < m_TextColumns.GetSize(); i ++) {
|
| - CTextColumn* pTextColumn = (CTextColumn*)m_TextColumns.GetAt(i);
|
| - if (pTextColumn->m_Count >= mincount) {
|
| - continue;
|
| + j = i - 3;
|
| + for (int n = end; n >= start; n--) {
|
| + NormalizeCompositeChar(str[i], sBuffer);
|
| }
|
| - delete pTextColumn;
|
| - m_TextColumns.RemoveAt(i);
|
| - i --;
|
| - }
|
| - for (i = 0; i < m_BaseLines.GetSize(); i ++) {
|
| - CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i);
|
| - for (int j = 0; j < pBaseLine->m_TextList.GetSize(); j ++) {
|
| - CTextBox* pTextBox = (CTextBox*)pBaseLine->m_TextList.GetAt(j);
|
| - if (IsNumber(pTextBox->m_Text)) {
|
| - pTextBox->m_pColumn = FindColumn(pTextBox->m_Right);
|
| - }
|
| + } else {
|
| + int end = start + count1;
|
| + for (int i = start; i < end; i++) {
|
| + sBuffer += str[i];
|
| }
|
| + }
|
| }
|
| + }
|
| + str.Empty();
|
| + str += sBuffer;
|
| }
|
| -CTextColumn* CTextPage::FindColumn(FX_FLOAT xpos)
|
| -{
|
| - for (int i = 0; i < m_TextColumns.GetSize(); i ++) {
|
| - CTextColumn* pColumn = (CTextColumn*)m_TextColumns.GetAt(i);
|
| - if (pColumn->m_AvgPos < xpos + 1 && pColumn->m_AvgPos > xpos - 1) {
|
| - return pColumn;
|
| - }
|
| - }
|
| - return NULL;
|
| +static FX_BOOL IsNumber(CFX_WideString& str) {
|
| + for (int i = 0; i < str.GetLength(); i++) {
|
| + FX_WCHAR ch = str[i];
|
| + if ((ch < '0' || ch > '9') && ch != '-' && ch != '+' && ch != '.' &&
|
| + ch != ' ') {
|
| + return FALSE;
|
| + }
|
| + }
|
| + return TRUE;
|
| }
|
| -void CTextPage::BreakSpace(CPDF_TextObject* pTextObj)
|
| -{
|
| +void CTextPage::FindColumns() {
|
| + int i;
|
| + for (i = 0; i < m_BaseLines.GetSize(); i++) {
|
| + CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i);
|
| + for (int j = 0; j < pBaseLine->m_TextList.GetSize(); j++) {
|
| + CTextBox* pTextBox = (CTextBox*)pBaseLine->m_TextList.GetAt(j);
|
| + CTextColumn* pColumn = FindColumn(pTextBox->m_Right);
|
| + if (pColumn == NULL) {
|
| + pColumn = new CTextColumn;
|
| + pColumn->m_Count = 1;
|
| + pColumn->m_AvgPos = pTextBox->m_Right;
|
| + pColumn->m_TextPos = -1;
|
| + m_TextColumns.Add(pColumn);
|
| + } else {
|
| + pColumn->m_AvgPos =
|
| + (pColumn->m_Count * pColumn->m_AvgPos + pTextBox->m_Right) /
|
| + (pColumn->m_Count + 1);
|
| + pColumn->m_Count++;
|
| + }
|
| + }
|
| + }
|
| + int mincount = m_BaseLines.GetSize() / 4;
|
| + for (i = 0; i < m_TextColumns.GetSize(); i++) {
|
| + CTextColumn* pTextColumn = (CTextColumn*)m_TextColumns.GetAt(i);
|
| + if (pTextColumn->m_Count >= mincount) {
|
| + continue;
|
| + }
|
| + delete pTextColumn;
|
| + m_TextColumns.RemoveAt(i);
|
| + i--;
|
| + }
|
| + for (i = 0; i < m_BaseLines.GetSize(); i++) {
|
| + CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i);
|
| + for (int j = 0; j < pBaseLine->m_TextList.GetSize(); j++) {
|
| + CTextBox* pTextBox = (CTextBox*)pBaseLine->m_TextList.GetAt(j);
|
| + if (IsNumber(pTextBox->m_Text)) {
|
| + pTextBox->m_pColumn = FindColumn(pTextBox->m_Right);
|
| + }
|
| + }
|
| + }
|
| }
|
| -CTextBaseLine::CTextBaseLine()
|
| -{
|
| - m_Top = -100000;
|
| - m_Bottom = 100000;
|
| - m_MaxFontSizeV = 0;
|
| +CTextColumn* CTextPage::FindColumn(FX_FLOAT xpos) {
|
| + for (int i = 0; i < m_TextColumns.GetSize(); i++) {
|
| + CTextColumn* pColumn = (CTextColumn*)m_TextColumns.GetAt(i);
|
| + if (pColumn->m_AvgPos < xpos + 1 && pColumn->m_AvgPos > xpos - 1) {
|
| + return pColumn;
|
| + }
|
| + }
|
| + return NULL;
|
| }
|
| -CTextBaseLine::~CTextBaseLine()
|
| -{
|
| - for (int i = 0; i < m_TextList.GetSize(); i ++) {
|
| - CTextBox* pText = (CTextBox*)m_TextList.GetAt(i);
|
| - delete pText;
|
| - }
|
| +void CTextPage::BreakSpace(CPDF_TextObject* pTextObj) {}
|
| +CTextBaseLine::CTextBaseLine() {
|
| + m_Top = -100000;
|
| + m_Bottom = 100000;
|
| + m_MaxFontSizeV = 0;
|
| }
|
| -void CTextBaseLine::InsertTextBox(FX_FLOAT leftx, FX_FLOAT rightx, FX_FLOAT topy, FX_FLOAT bottomy,
|
| - FX_FLOAT spacew, FX_FLOAT fontsize_v, const CFX_WideString& text)
|
| -{
|
| - if (m_Top < topy) {
|
| - m_Top = topy;
|
| - }
|
| - if (m_Bottom > bottomy) {
|
| - m_Bottom = bottomy;
|
| - }
|
| - if (m_MaxFontSizeV < fontsize_v) {
|
| - m_MaxFontSizeV = fontsize_v;
|
| - }
|
| - int i;
|
| - for (i = 0; i < m_TextList.GetSize(); i ++) {
|
| - CTextBox* pText = (CTextBox*)m_TextList.GetAt(i);
|
| - if (pText->m_Left > leftx) {
|
| - break;
|
| - }
|
| - }
|
| - CTextBox* pText = new CTextBox;
|
| - pText->m_Text = text;
|
| - pText->m_Left = leftx;
|
| - pText->m_Right = rightx;
|
| - pText->m_Top = topy;
|
| - pText->m_Bottom = bottomy;
|
| - pText->m_SpaceWidth = spacew;
|
| - pText->m_FontSizeV = fontsize_v;
|
| - pText->m_pColumn = NULL;
|
| - m_TextList.InsertAt(i, pText);
|
| +CTextBaseLine::~CTextBaseLine() {
|
| + for (int i = 0; i < m_TextList.GetSize(); i++) {
|
| + CTextBox* pText = (CTextBox*)m_TextList.GetAt(i);
|
| + delete pText;
|
| + }
|
| }
|
| -FX_BOOL GetIntersection(FX_FLOAT low1, FX_FLOAT high1, FX_FLOAT low2, FX_FLOAT high2,
|
| - FX_FLOAT& interlow, FX_FLOAT& interhigh);
|
| -FX_BOOL CTextBaseLine::CanMerge(CTextBaseLine* pOther)
|
| -{
|
| - FX_FLOAT inter_top, inter_bottom;
|
| - if (!GetIntersection(m_Bottom, m_Top, pOther->m_Bottom, pOther->m_Top,
|
| - inter_bottom, inter_top)) {
|
| - return FALSE;
|
| - }
|
| - FX_FLOAT inter_h = inter_top - inter_bottom;
|
| - if (inter_h < (m_Top - m_Bottom) / 2 && inter_h < (pOther->m_Top - pOther->m_Bottom) / 2) {
|
| +void CTextBaseLine::InsertTextBox(FX_FLOAT leftx,
|
| + FX_FLOAT rightx,
|
| + FX_FLOAT topy,
|
| + FX_FLOAT bottomy,
|
| + FX_FLOAT spacew,
|
| + FX_FLOAT fontsize_v,
|
| + const CFX_WideString& text) {
|
| + if (m_Top < topy) {
|
| + m_Top = topy;
|
| + }
|
| + if (m_Bottom > bottomy) {
|
| + m_Bottom = bottomy;
|
| + }
|
| + if (m_MaxFontSizeV < fontsize_v) {
|
| + m_MaxFontSizeV = fontsize_v;
|
| + }
|
| + int i;
|
| + for (i = 0; i < m_TextList.GetSize(); i++) {
|
| + CTextBox* pText = (CTextBox*)m_TextList.GetAt(i);
|
| + if (pText->m_Left > leftx) {
|
| + break;
|
| + }
|
| + }
|
| + CTextBox* pText = new CTextBox;
|
| + pText->m_Text = text;
|
| + pText->m_Left = leftx;
|
| + pText->m_Right = rightx;
|
| + pText->m_Top = topy;
|
| + pText->m_Bottom = bottomy;
|
| + pText->m_SpaceWidth = spacew;
|
| + pText->m_FontSizeV = fontsize_v;
|
| + pText->m_pColumn = NULL;
|
| + m_TextList.InsertAt(i, pText);
|
| +}
|
| +FX_BOOL GetIntersection(FX_FLOAT low1,
|
| + FX_FLOAT high1,
|
| + FX_FLOAT low2,
|
| + FX_FLOAT high2,
|
| + FX_FLOAT& interlow,
|
| + FX_FLOAT& interhigh);
|
| +FX_BOOL CTextBaseLine::CanMerge(CTextBaseLine* pOther) {
|
| + FX_FLOAT inter_top, inter_bottom;
|
| + if (!GetIntersection(m_Bottom, m_Top, pOther->m_Bottom, pOther->m_Top,
|
| + inter_bottom, inter_top)) {
|
| + return FALSE;
|
| + }
|
| + FX_FLOAT inter_h = inter_top - inter_bottom;
|
| + if (inter_h < (m_Top - m_Bottom) / 2 &&
|
| + inter_h < (pOther->m_Top - pOther->m_Bottom) / 2) {
|
| + return FALSE;
|
| + }
|
| + FX_FLOAT dy = (FX_FLOAT)FXSYS_fabs(m_BaseLine - pOther->m_BaseLine);
|
| + for (int i = 0; i < m_TextList.GetSize(); i++) {
|
| + CTextBox* pText = (CTextBox*)m_TextList.GetAt(i);
|
| + for (int j = 0; j < pOther->m_TextList.GetSize(); j++) {
|
| + CTextBox* pOtherText = (CTextBox*)pOther->m_TextList.GetAt(j);
|
| + FX_FLOAT inter_left, inter_right;
|
| + if (!GetIntersection(pText->m_Left, pText->m_Right, pOtherText->m_Left,
|
| + pOtherText->m_Right, inter_left, inter_right)) {
|
| + continue;
|
| + }
|
| + FX_FLOAT inter_w = inter_right - inter_left;
|
| + if (inter_w < pText->m_SpaceWidth / 2 &&
|
| + inter_w < pOtherText->m_SpaceWidth / 2) {
|
| + continue;
|
| + }
|
| + if (dy >= (pText->m_Bottom - pText->m_Top) / 2 ||
|
| + dy >= (pOtherText->m_Bottom - pOtherText->m_Top) / 2) {
|
| return FALSE;
|
| + }
|
| }
|
| - FX_FLOAT dy = (FX_FLOAT)FXSYS_fabs(m_BaseLine - pOther->m_BaseLine);
|
| - for (int i = 0; i < m_TextList.GetSize(); i ++) {
|
| - CTextBox* pText = (CTextBox*)m_TextList.GetAt(i);
|
| - for (int j = 0; j < pOther->m_TextList.GetSize(); j ++) {
|
| - CTextBox* pOtherText = (CTextBox*)pOther->m_TextList.GetAt(j);
|
| - FX_FLOAT inter_left, inter_right;
|
| - if (!GetIntersection(pText->m_Left, pText->m_Right,
|
| - pOtherText->m_Left, pOtherText->m_Right, inter_left, inter_right)) {
|
| - continue;
|
| - }
|
| - FX_FLOAT inter_w = inter_right - inter_left;
|
| - if (inter_w < pText->m_SpaceWidth / 2 && inter_w < pOtherText->m_SpaceWidth / 2) {
|
| - continue;
|
| - }
|
| - if (dy >= (pText->m_Bottom - pText->m_Top) / 2 ||
|
| - dy >= (pOtherText->m_Bottom - pOtherText->m_Top) / 2) {
|
| - return FALSE;
|
| - }
|
| - }
|
| - }
|
| - return TRUE;
|
| + }
|
| + return TRUE;
|
| }
|
| -void CTextBaseLine::Merge(CTextBaseLine* pOther)
|
| -{
|
| - for (int i = 0; i < pOther->m_TextList.GetSize(); i ++) {
|
| - CTextBox* pText = (CTextBox*)pOther->m_TextList.GetAt(i);
|
| - InsertTextBox(pText->m_Left, pText->m_Right, pText->m_Top, pText->m_Bottom,
|
| - pText->m_SpaceWidth, pText->m_FontSizeV, pText->m_Text);
|
| - }
|
| +void CTextBaseLine::Merge(CTextBaseLine* pOther) {
|
| + for (int i = 0; i < pOther->m_TextList.GetSize(); i++) {
|
| + CTextBox* pText = (CTextBox*)pOther->m_TextList.GetAt(i);
|
| + InsertTextBox(pText->m_Left, pText->m_Right, pText->m_Top, pText->m_Bottom,
|
| + pText->m_SpaceWidth, pText->m_FontSizeV, pText->m_Text);
|
| + }
|
| }
|
| -FX_BOOL CTextBaseLine::GetWidth(FX_FLOAT& leftx, FX_FLOAT& rightx)
|
| -{
|
| - int i;
|
| - for (i = 0; i < m_TextList.GetSize(); i ++) {
|
| - CTextBox* pText = (CTextBox*)m_TextList.GetAt(i);
|
| - if (pText->m_Text != L" ") {
|
| - break;
|
| - }
|
| - }
|
| - if (i == m_TextList.GetSize()) {
|
| - return FALSE;
|
| - }
|
| +FX_BOOL CTextBaseLine::GetWidth(FX_FLOAT& leftx, FX_FLOAT& rightx) {
|
| + int i;
|
| + for (i = 0; i < m_TextList.GetSize(); i++) {
|
| CTextBox* pText = (CTextBox*)m_TextList.GetAt(i);
|
| - leftx = pText->m_Left;
|
| - for (i = m_TextList.GetSize() - 1; i >= 0; i --) {
|
| - CTextBox* pText = (CTextBox*)m_TextList.GetAt(i);
|
| - if (pText->m_Text != L" ") {
|
| - break;
|
| - }
|
| + if (pText->m_Text != L" ") {
|
| + break;
|
| + }
|
| + }
|
| + if (i == m_TextList.GetSize()) {
|
| + return FALSE;
|
| + }
|
| + CTextBox* pText = (CTextBox*)m_TextList.GetAt(i);
|
| + leftx = pText->m_Left;
|
| + for (i = m_TextList.GetSize() - 1; i >= 0; i--) {
|
| + CTextBox* pText = (CTextBox*)m_TextList.GetAt(i);
|
| + if (pText->m_Text != L" ") {
|
| + break;
|
| }
|
| - pText = (CTextBox*)m_TextList.GetAt(i);
|
| - rightx = pText->m_Right;
|
| - return TRUE;
|
| + }
|
| + pText = (CTextBox*)m_TextList.GetAt(i);
|
| + rightx = pText->m_Right;
|
| + return TRUE;
|
| }
|
| -void CTextBaseLine::MergeBoxes()
|
| -{
|
| - int i = 0;
|
| - while (1) {
|
| - if (i >= m_TextList.GetSize() - 1) {
|
| - break;
|
| - }
|
| - CTextBox* pThisText = (CTextBox*)m_TextList.GetAt(i);
|
| - CTextBox* pNextText = (CTextBox*)m_TextList.GetAt(i + 1);
|
| - FX_FLOAT dx = pNextText->m_Left - pThisText->m_Right;
|
| - FX_FLOAT spacew = (pThisText->m_SpaceWidth == 0.0) ?
|
| - pNextText->m_SpaceWidth : pThisText->m_SpaceWidth;
|
| - if (spacew > 0.0 && dx < spacew * 2) {
|
| - pThisText->m_Right = pNextText->m_Right;
|
| - if (dx > spacew * 1.5) {
|
| - pThisText->m_Text += L" ";
|
| - } else if (dx > spacew / 3) {
|
| - pThisText->m_Text += L' ';
|
| - }
|
| - pThisText->m_Text += pNextText->m_Text;
|
| - pThisText->m_SpaceWidth = pNextText->m_SpaceWidth == 0.0 ?
|
| - spacew : pNextText->m_SpaceWidth;
|
| - m_TextList.RemoveAt(i + 1);
|
| - delete pNextText;
|
| - } else {
|
| - i ++;
|
| - }
|
| +void CTextBaseLine::MergeBoxes() {
|
| + int i = 0;
|
| + while (1) {
|
| + if (i >= m_TextList.GetSize() - 1) {
|
| + break;
|
| + }
|
| + CTextBox* pThisText = (CTextBox*)m_TextList.GetAt(i);
|
| + CTextBox* pNextText = (CTextBox*)m_TextList.GetAt(i + 1);
|
| + FX_FLOAT dx = pNextText->m_Left - pThisText->m_Right;
|
| + FX_FLOAT spacew = (pThisText->m_SpaceWidth == 0.0)
|
| + ? pNextText->m_SpaceWidth
|
| + : pThisText->m_SpaceWidth;
|
| + if (spacew > 0.0 && dx < spacew * 2) {
|
| + pThisText->m_Right = pNextText->m_Right;
|
| + if (dx > spacew * 1.5) {
|
| + pThisText->m_Text += L" ";
|
| + } else if (dx > spacew / 3) {
|
| + pThisText->m_Text += L' ';
|
| + }
|
| + pThisText->m_Text += pNextText->m_Text;
|
| + pThisText->m_SpaceWidth =
|
| + pNextText->m_SpaceWidth == 0.0 ? spacew : pNextText->m_SpaceWidth;
|
| + m_TextList.RemoveAt(i + 1);
|
| + delete pNextText;
|
| + } else {
|
| + i++;
|
| }
|
| + }
|
| }
|
| -void CTextBaseLine::WriteOutput(CFX_WideString& str, FX_FLOAT leftx, FX_FLOAT pagewidth,
|
| - int iTextWidth)
|
| -{
|
| - int lastpos = -1;
|
| - for (int i = 0; i < m_TextList.GetSize(); i ++) {
|
| - CTextBox* pText = (CTextBox*)m_TextList.GetAt(i);
|
| - int xpos;
|
| - if (pText->m_pColumn) {
|
| - xpos = (int)((pText->m_pColumn->m_AvgPos - leftx) * iTextWidth / pagewidth + 0.5);
|
| - xpos -= pText->m_Text.GetLength();
|
| - } else {
|
| - xpos = (int)((pText->m_Left - leftx) * iTextWidth / pagewidth + 0.5);
|
| - }
|
| - if (xpos <= lastpos) {
|
| - xpos = lastpos + 1;
|
| - }
|
| - for (int j = lastpos + 1; j < xpos; j ++) {
|
| - str += ' ';
|
| - }
|
| - CFX_WideString sSrc(pText->m_Text);
|
| - NormalizeString(sSrc);
|
| - str += sSrc;
|
| - str += ' ';
|
| - lastpos = xpos + pText->m_Text.GetLength();
|
| +void CTextBaseLine::WriteOutput(CFX_WideString& str,
|
| + FX_FLOAT leftx,
|
| + FX_FLOAT pagewidth,
|
| + int iTextWidth) {
|
| + int lastpos = -1;
|
| + for (int i = 0; i < m_TextList.GetSize(); i++) {
|
| + CTextBox* pText = (CTextBox*)m_TextList.GetAt(i);
|
| + int xpos;
|
| + if (pText->m_pColumn) {
|
| + xpos =
|
| + (int)((pText->m_pColumn->m_AvgPos - leftx) * iTextWidth / pagewidth +
|
| + 0.5);
|
| + xpos -= pText->m_Text.GetLength();
|
| + } else {
|
| + xpos = (int)((pText->m_Left - leftx) * iTextWidth / pagewidth + 0.5);
|
| + }
|
| + if (xpos <= lastpos) {
|
| + xpos = lastpos + 1;
|
| }
|
| + for (int j = lastpos + 1; j < xpos; j++) {
|
| + str += ' ';
|
| + }
|
| + CFX_WideString sSrc(pText->m_Text);
|
| + NormalizeString(sSrc);
|
| + str += sSrc;
|
| + str += ' ';
|
| + lastpos = xpos + pText->m_Text.GetLength();
|
| + }
|
| }
|
| -void CTextBaseLine::CountChars(int& count, FX_FLOAT& width, int& minchars)
|
| -{
|
| - minchars = 0;
|
| - for (int i = 0; i < m_TextList.GetSize(); i ++) {
|
| - CTextBox* pText = (CTextBox*)m_TextList.GetAt(i);
|
| - if (pText->m_Right - pText->m_Left < 0.002) {
|
| - continue;
|
| - }
|
| - count += pText->m_Text.GetLength();
|
| - width += pText->m_Right - pText->m_Left;
|
| - minchars += pText->m_Text.GetLength() + 1;
|
| +void CTextBaseLine::CountChars(int& count, FX_FLOAT& width, int& minchars) {
|
| + minchars = 0;
|
| + for (int i = 0; i < m_TextList.GetSize(); i++) {
|
| + CTextBox* pText = (CTextBox*)m_TextList.GetAt(i);
|
| + if (pText->m_Right - pText->m_Left < 0.002) {
|
| + continue;
|
| }
|
| + count += pText->m_Text.GetLength();
|
| + width += pText->m_Right - pText->m_Left;
|
| + minchars += pText->m_Text.GetLength() + 1;
|
| + }
|
| }
|
| #define PI 3.1415926535897932384626433832795
|
| -static void CheckRotate(CPDF_Page& page, CFX_FloatRect& page_bbox)
|
| -{
|
| - int total_count = 0, rotated_count[3] = {0, 0, 0};
|
| - FX_POSITION pos = page.GetFirstObjectPosition();
|
| - while (pos) {
|
| - CPDF_PageObject* pObj = page.GetNextObject(pos);
|
| - if (pObj->m_Type != PDFPAGE_TEXT) {
|
| - continue;
|
| - }
|
| - total_count ++;
|
| - CPDF_TextObject* pText = (CPDF_TextObject*)pObj;
|
| - FX_FLOAT angle = pText->m_TextState.GetBaselineAngle();
|
| - if (angle == 0.0) {
|
| - continue;
|
| - }
|
| - int degree = (int)(angle * 180 / PI + 0.5);
|
| - if (degree % 90) {
|
| - continue;
|
| - }
|
| - if (degree < 0) {
|
| - degree += 360;
|
| - }
|
| - int index = degree / 90 % 3 - 1;
|
| - if (index < 0) {
|
| - continue;
|
| - }
|
| - rotated_count[index] ++;
|
| - }
|
| - if (total_count == 0) {
|
| - return;
|
| - }
|
| - CFX_AffineMatrix matrix;
|
| - if (rotated_count[0] > total_count * 2 / 3) {
|
| - matrix.Set(0, -1, 1, 0, 0, page.GetPageHeight());
|
| - } else if (rotated_count[1] > total_count * 2 / 3) {
|
| - matrix.Set(-1, 0, 0, -1, page.GetPageWidth(), page.GetPageHeight());
|
| - } else if (rotated_count[2] > total_count * 2 / 3) {
|
| - matrix.Set(0, 1, -1, 0, page.GetPageWidth(), 0);
|
| - } else {
|
| - return;
|
| - }
|
| - page.Transform(matrix);
|
| - page_bbox.Transform(&matrix);
|
| +static void CheckRotate(CPDF_Page& page, CFX_FloatRect& page_bbox) {
|
| + int total_count = 0, rotated_count[3] = {0, 0, 0};
|
| + FX_POSITION pos = page.GetFirstObjectPosition();
|
| + while (pos) {
|
| + CPDF_PageObject* pObj = page.GetNextObject(pos);
|
| + if (pObj->m_Type != PDFPAGE_TEXT) {
|
| + continue;
|
| + }
|
| + total_count++;
|
| + CPDF_TextObject* pText = (CPDF_TextObject*)pObj;
|
| + FX_FLOAT angle = pText->m_TextState.GetBaselineAngle();
|
| + if (angle == 0.0) {
|
| + continue;
|
| + }
|
| + int degree = (int)(angle * 180 / PI + 0.5);
|
| + if (degree % 90) {
|
| + continue;
|
| + }
|
| + if (degree < 0) {
|
| + degree += 360;
|
| + }
|
| + int index = degree / 90 % 3 - 1;
|
| + if (index < 0) {
|
| + continue;
|
| + }
|
| + rotated_count[index]++;
|
| + }
|
| + if (total_count == 0) {
|
| + return;
|
| + }
|
| + CFX_AffineMatrix matrix;
|
| + if (rotated_count[0] > total_count * 2 / 3) {
|
| + matrix.Set(0, -1, 1, 0, 0, page.GetPageHeight());
|
| + } else if (rotated_count[1] > total_count * 2 / 3) {
|
| + matrix.Set(-1, 0, 0, -1, page.GetPageWidth(), page.GetPageHeight());
|
| + } else if (rotated_count[2] > total_count * 2 / 3) {
|
| + matrix.Set(0, 1, -1, 0, page.GetPageWidth(), 0);
|
| + } else {
|
| + return;
|
| + }
|
| + page.Transform(matrix);
|
| + page_bbox.Transform(&matrix);
|
| }
|
| -void PDF_GetPageText_Unicode(CFX_WideStringArray& lines, CPDF_Document* pDoc, CPDF_Dictionary* pPage,
|
| - int iMinWidth, FX_DWORD flags)
|
| -{
|
| - lines.RemoveAll();
|
| - if (pPage == NULL) {
|
| - return;
|
| - }
|
| - CPDF_Page page;
|
| - page.Load(pDoc, pPage);
|
| - CPDF_ParseOptions options;
|
| - options.m_bTextOnly = TRUE;
|
| - options.m_bSeparateForm = FALSE;
|
| - page.ParseContent(&options);
|
| - CFX_FloatRect page_bbox = page.GetPageBBox();
|
| - if (flags & PDF2TXT_AUTO_ROTATE) {
|
| - CheckRotate(page, page_bbox);
|
| - }
|
| - CTextPage texts;
|
| - texts.m_bAutoWidth = flags & PDF2TXT_AUTO_WIDTH;
|
| - texts.m_bKeepColumn = flags & PDF2TXT_KEEP_COLUMN;
|
| - texts.m_bBreakSpace = TRUE;
|
| - FX_POSITION pos = page.GetFirstObjectPosition();
|
| - while (pos) {
|
| - CPDF_PageObject* pObject = page.GetNextObject(pos);
|
| - if (!(flags & PDF2TXT_INCLUDE_INVISIBLE)) {
|
| - CFX_FloatRect rect(pObject->m_Left, pObject->m_Bottom, pObject->m_Right, pObject->m_Top);
|
| - if (!page_bbox.Contains(rect)) {
|
| - continue;
|
| - }
|
| - }
|
| - texts.ProcessObject(pObject);
|
| - }
|
| - texts.WriteOutput(lines, iMinWidth);
|
| +void PDF_GetPageText_Unicode(CFX_WideStringArray& lines,
|
| + CPDF_Document* pDoc,
|
| + CPDF_Dictionary* pPage,
|
| + int iMinWidth,
|
| + FX_DWORD flags) {
|
| + lines.RemoveAll();
|
| + if (pPage == NULL) {
|
| + return;
|
| + }
|
| + CPDF_Page page;
|
| + page.Load(pDoc, pPage);
|
| + CPDF_ParseOptions options;
|
| + options.m_bTextOnly = TRUE;
|
| + options.m_bSeparateForm = FALSE;
|
| + page.ParseContent(&options);
|
| + CFX_FloatRect page_bbox = page.GetPageBBox();
|
| + if (flags & PDF2TXT_AUTO_ROTATE) {
|
| + CheckRotate(page, page_bbox);
|
| + }
|
| + CTextPage texts;
|
| + texts.m_bAutoWidth = flags & PDF2TXT_AUTO_WIDTH;
|
| + texts.m_bKeepColumn = flags & PDF2TXT_KEEP_COLUMN;
|
| + texts.m_bBreakSpace = TRUE;
|
| + FX_POSITION pos = page.GetFirstObjectPosition();
|
| + while (pos) {
|
| + CPDF_PageObject* pObject = page.GetNextObject(pos);
|
| + if (!(flags & PDF2TXT_INCLUDE_INVISIBLE)) {
|
| + CFX_FloatRect rect(pObject->m_Left, pObject->m_Bottom, pObject->m_Right,
|
| + pObject->m_Top);
|
| + if (!page_bbox.Contains(rect)) {
|
| + continue;
|
| + }
|
| + }
|
| + texts.ProcessObject(pObject);
|
| + }
|
| + texts.WriteOutput(lines, iMinWidth);
|
| }
|
| -void PDF_GetPageText(CFX_ByteStringArray& lines, CPDF_Document* pDoc, CPDF_Dictionary* pPage,
|
| - int iMinWidth, FX_DWORD flags)
|
| -{
|
| - lines.RemoveAll();
|
| - CFX_WideStringArray wlines;
|
| - PDF_GetPageText_Unicode(wlines, pDoc, pPage, iMinWidth, flags);
|
| - for (int i = 0; i < wlines.GetSize(); i ++) {
|
| - CFX_WideString wstr = wlines[i];
|
| - CFX_ByteString str;
|
| - for (int c = 0; c < wstr.GetLength(); c ++) {
|
| - str += CharFromUnicodeAlt(wstr[c], FXSYS_GetACP(), "?");
|
| - }
|
| - lines.Add(str);
|
| - }
|
| +void PDF_GetPageText(CFX_ByteStringArray& lines,
|
| + CPDF_Document* pDoc,
|
| + CPDF_Dictionary* pPage,
|
| + int iMinWidth,
|
| + FX_DWORD flags) {
|
| + lines.RemoveAll();
|
| + CFX_WideStringArray wlines;
|
| + PDF_GetPageText_Unicode(wlines, pDoc, pPage, iMinWidth, flags);
|
| + for (int i = 0; i < wlines.GetSize(); i++) {
|
| + CFX_WideString wstr = wlines[i];
|
| + CFX_ByteString str;
|
| + for (int c = 0; c < wstr.GetLength(); c++) {
|
| + str += CharFromUnicodeAlt(wstr[c], FXSYS_GetACP(), "?");
|
| + }
|
| + lines.Add(str);
|
| + }
|
| }
|
| -extern void _PDF_GetTextStream_Unicode(CFX_WideTextBuf& buffer, CPDF_PageObjects* pPage, FX_BOOL bUseLF,
|
| +extern void _PDF_GetTextStream_Unicode(CFX_WideTextBuf& buffer,
|
| + CPDF_PageObjects* pPage,
|
| + FX_BOOL bUseLF,
|
| CFX_PtrArray* pObjArray);
|
| -void PDF_GetTextStream_Unicode(CFX_WideTextBuf& buffer, CPDF_Document* pDoc, CPDF_Dictionary* pPage, FX_DWORD flags)
|
| -{
|
| - buffer.EstimateSize(0, 10240);
|
| - CPDF_Page page;
|
| - page.Load(pDoc, pPage);
|
| - CPDF_ParseOptions options;
|
| - options.m_bTextOnly = TRUE;
|
| - options.m_bSeparateForm = FALSE;
|
| - page.ParseContent(&options);
|
| - _PDF_GetTextStream_Unicode(buffer, &page, TRUE, NULL);
|
| +void PDF_GetTextStream_Unicode(CFX_WideTextBuf& buffer,
|
| + CPDF_Document* pDoc,
|
| + CPDF_Dictionary* pPage,
|
| + FX_DWORD flags) {
|
| + buffer.EstimateSize(0, 10240);
|
| + CPDF_Page page;
|
| + page.Load(pDoc, pPage);
|
| + CPDF_ParseOptions options;
|
| + options.m_bTextOnly = TRUE;
|
| + options.m_bSeparateForm = FALSE;
|
| + page.ParseContent(&options);
|
| + _PDF_GetTextStream_Unicode(buffer, &page, TRUE, NULL);
|
| }
|
|
|