Index: core/src/fpdftext/fpdf_text.cpp |
diff --git a/core/src/fpdftext/fpdf_text.cpp b/core/src/fpdftext/fpdf_text.cpp |
index 7203a694abc5f1dc26f1a26cf4602c188e07d24b..52fcb5abd7508f57354d6b8dd5e36b4c0840f31c 100644 |
--- a/core/src/fpdftext/fpdf_text.cpp |
+++ b/core/src/fpdftext/fpdf_text.cpp |
@@ -14,765 +14,791 @@ |
#include "text_int.h" |
#include "txtproc.h" |
-CFX_ByteString CharFromUnicodeAlt(FX_WCHAR unicode, int destcp, const FX_CHAR* defchar) |
-{ |
- if (destcp == 0) { |
- if (unicode < 0x80) { |
- return CFX_ByteString((char)unicode); |
- } |
- const FX_CHAR* altstr = FCS_GetAltStr(unicode); |
- if (altstr) { |
- return CFX_ByteString(altstr, -1); |
- } |
- return CFX_ByteString(defchar, -1); |
- } |
- char buf[10]; |
- int iDef = 0; |
- int ret = FXSYS_WideCharToMultiByte(destcp, 0, (wchar_t*)&unicode, 1, buf, 10, NULL, &iDef); |
- if (ret && !iDef) { |
- return CFX_ByteString(buf, ret); |
+CFX_ByteString CharFromUnicodeAlt(FX_WCHAR unicode, |
+ int destcp, |
+ const FX_CHAR* defchar) { |
+ if (destcp == 0) { |
+ if (unicode < 0x80) { |
+ return CFX_ByteString((char)unicode); |
} |
const FX_CHAR* altstr = FCS_GetAltStr(unicode); |
if (altstr) { |
- return CFX_ByteString(altstr, -1); |
+ return CFX_ByteString(altstr, -1); |
} |
return CFX_ByteString(defchar, -1); |
+ } |
+ char buf[10]; |
+ int iDef = 0; |
+ int ret = FXSYS_WideCharToMultiByte(destcp, 0, (wchar_t*)&unicode, 1, buf, 10, |
+ NULL, &iDef); |
+ if (ret && !iDef) { |
+ return CFX_ByteString(buf, ret); |
+ } |
+ const FX_CHAR* altstr = FCS_GetAltStr(unicode); |
+ if (altstr) { |
+ return CFX_ByteString(altstr, -1); |
+ } |
+ return CFX_ByteString(defchar, -1); |
} |
-CTextPage::CTextPage() |
-{ |
+CTextPage::CTextPage() {} |
+CTextPage::~CTextPage() { |
+ int i; |
+ for (i = 0; i < m_BaseLines.GetSize(); i++) { |
+ CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); |
+ delete pBaseLine; |
+ } |
+ for (i = 0; i < m_TextColumns.GetSize(); i++) { |
+ CTextColumn* pTextColumn = (CTextColumn*)m_TextColumns.GetAt(i); |
+ delete pTextColumn; |
+ } |
} |
-CTextPage::~CTextPage() |
-{ |
- int i; |
- for (i = 0; i < m_BaseLines.GetSize(); i ++) { |
- CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); |
- delete pBaseLine; |
- } |
- for (i = 0; i < m_TextColumns.GetSize(); i ++) { |
- CTextColumn* pTextColumn = (CTextColumn*)m_TextColumns.GetAt(i); |
- delete pTextColumn; |
- } |
-} |
-void CTextPage::ProcessObject(CPDF_PageObject* pObject) |
-{ |
- if (pObject->m_Type != PDFPAGE_TEXT) { |
- return; |
- } |
- CPDF_TextObject* pText = (CPDF_TextObject*)pObject; |
- CPDF_Font* pFont = pText->m_TextState.GetFont(); |
- int count = pText->CountItems(); |
- FX_FLOAT* pPosArray = FX_Alloc2D(FX_FLOAT, count, 2); |
- pText->CalcCharPos(pPosArray); |
+void CTextPage::ProcessObject(CPDF_PageObject* pObject) { |
+ if (pObject->m_Type != PDFPAGE_TEXT) { |
+ return; |
+ } |
+ CPDF_TextObject* pText = (CPDF_TextObject*)pObject; |
+ CPDF_Font* pFont = pText->m_TextState.GetFont(); |
+ int count = pText->CountItems(); |
+ FX_FLOAT* pPosArray = FX_Alloc2D(FX_FLOAT, count, 2); |
+ pText->CalcCharPos(pPosArray); |
- FX_FLOAT fontsize_h = pText->m_TextState.GetFontSizeH(); |
- FX_FLOAT fontsize_v = pText->m_TextState.GetFontSizeV(); |
- FX_DWORD space_charcode = pFont->CharCodeFromUnicode(' '); |
- FX_FLOAT spacew = 0; |
- if (space_charcode != -1) { |
- spacew = fontsize_h * pFont->GetCharWidthF(space_charcode) / 1000; |
- } |
- if (spacew == 0) { |
- spacew = fontsize_h / 4; |
- } |
- if (pText->m_TextState.GetBaselineAngle() != 0) { |
- int cc = 0; |
- CFX_AffineMatrix matrix; |
- pText->GetTextMatrix(&matrix); |
- for (int i = 0; i < pText->m_nChars; i ++) { |
- FX_DWORD charcode = pText->m_nChars == 1 ? (FX_DWORD)(uintptr_t)pText->m_pCharCodes : pText->m_pCharCodes[i]; |
- if (charcode == (FX_DWORD) - 1) { |
- continue; |
- } |
- FX_RECT char_box; |
- pFont->GetCharBBox(charcode, char_box); |
- FX_FLOAT char_left = pPosArray ? pPosArray[cc * 2] : char_box.left * pText->m_TextState.GetFontSize() / 1000; |
- FX_FLOAT char_right = pPosArray ? pPosArray[cc * 2 + 1] : char_box.right * pText->m_TextState.GetFontSize() / 1000; |
- FX_FLOAT char_top = char_box.top * pText->m_TextState.GetFontSize() / 1000; |
- FX_FLOAT char_bottom = char_box.bottom * pText->m_TextState.GetFontSize() / 1000; |
- cc ++; |
- FX_FLOAT char_origx, char_origy; |
- matrix.Transform(char_left, 0, char_origx, char_origy); |
- matrix.TransformRect(char_left, char_right, char_top, char_bottom); |
- CFX_ByteString str; |
- pFont->AppendChar(str, charcode); |
- InsertTextBox(NULL, char_origy, char_left, char_right, char_top, |
- char_bottom, spacew, fontsize_v, str, pFont); |
- } |
- if (pPosArray) { |
- FX_Free(pPosArray); |
- } |
- return; |
- } |
- FX_FLOAT ratio_h = fontsize_h / pText->m_TextState.GetFontSize(); |
- for (int ii = 0; ii < count * 2; ii ++) { |
- pPosArray[ii] *= ratio_h; |
- } |
- FX_FLOAT baseline = pText->m_PosY; |
- CTextBaseLine* pBaseLine = NULL; |
- FX_FLOAT topy = pText->m_Top; |
- FX_FLOAT bottomy = pText->m_Bottom; |
- FX_FLOAT leftx = pText->m_Left; |
+ FX_FLOAT fontsize_h = pText->m_TextState.GetFontSizeH(); |
+ FX_FLOAT fontsize_v = pText->m_TextState.GetFontSizeV(); |
+ FX_DWORD space_charcode = pFont->CharCodeFromUnicode(' '); |
+ FX_FLOAT spacew = 0; |
+ if (space_charcode != -1) { |
+ spacew = fontsize_h * pFont->GetCharWidthF(space_charcode) / 1000; |
+ } |
+ if (spacew == 0) { |
+ spacew = fontsize_h / 4; |
+ } |
+ if (pText->m_TextState.GetBaselineAngle() != 0) { |
int cc = 0; |
- CFX_ByteString segment; |
- int space_count = 0; |
- FX_FLOAT last_left = 0, last_right = 0, segment_left = 0, segment_right = 0; |
- for (int i = 0; i < pText->m_nChars; i ++) { |
- FX_DWORD charcode = pText->m_nChars == 1 ? (FX_DWORD)(uintptr_t)pText->m_pCharCodes : pText->m_pCharCodes[i]; |
- if (charcode == (FX_DWORD) - 1) { |
- continue; |
- } |
- FX_FLOAT char_left = pPosArray[cc * 2]; |
- FX_FLOAT char_right = pPosArray[cc * 2 + 1]; |
- cc ++; |
- if (char_left < last_left || (char_left - last_right) > spacew / 2) { |
- pBaseLine = InsertTextBox(pBaseLine, baseline, leftx + segment_left, leftx + segment_right, |
- topy, bottomy, spacew, fontsize_v, segment, pFont); |
- segment_left = char_left; |
- segment = ""; |
- } |
- if (space_count > 1) { |
- pBaseLine = InsertTextBox(pBaseLine, baseline, leftx + segment_left, leftx + segment_right, |
- topy, bottomy, spacew, fontsize_v, segment, pFont); |
- segment = ""; |
- } else if (space_count == 1) { |
- pFont->AppendChar(segment, ' '); |
- } |
- if (segment.GetLength() == 0) { |
- segment_left = char_left; |
- } |
- segment_right = char_right; |
- pFont->AppendChar(segment, charcode); |
- space_count = 0; |
- last_left = char_left; |
- last_right = char_right; |
- } |
- if (segment.GetLength()) |
- pBaseLine = InsertTextBox(pBaseLine, baseline, leftx + segment_left, leftx + segment_right, |
- topy, bottomy, spacew, fontsize_v, segment, pFont); |
- FX_Free(pPosArray); |
+ CFX_AffineMatrix matrix; |
+ pText->GetTextMatrix(&matrix); |
+ for (int i = 0; i < pText->m_nChars; i++) { |
+ FX_DWORD charcode = pText->m_nChars == 1 |
+ ? (FX_DWORD)(uintptr_t)pText->m_pCharCodes |
+ : pText->m_pCharCodes[i]; |
+ if (charcode == (FX_DWORD)-1) { |
+ continue; |
+ } |
+ FX_RECT char_box; |
+ pFont->GetCharBBox(charcode, char_box); |
+ FX_FLOAT char_left = |
+ pPosArray ? pPosArray[cc * 2] |
+ : char_box.left * pText->m_TextState.GetFontSize() / 1000; |
+ FX_FLOAT char_right = |
+ pPosArray ? pPosArray[cc * 2 + 1] |
+ : char_box.right * pText->m_TextState.GetFontSize() / 1000; |
+ FX_FLOAT char_top = |
+ char_box.top * pText->m_TextState.GetFontSize() / 1000; |
+ FX_FLOAT char_bottom = |
+ char_box.bottom * pText->m_TextState.GetFontSize() / 1000; |
+ cc++; |
+ FX_FLOAT char_origx, char_origy; |
+ matrix.Transform(char_left, 0, char_origx, char_origy); |
+ matrix.TransformRect(char_left, char_right, char_top, char_bottom); |
+ CFX_ByteString str; |
+ pFont->AppendChar(str, charcode); |
+ InsertTextBox(NULL, char_origy, char_left, char_right, char_top, |
+ char_bottom, spacew, fontsize_v, str, pFont); |
+ } |
+ if (pPosArray) { |
+ FX_Free(pPosArray); |
+ } |
+ return; |
+ } |
+ FX_FLOAT ratio_h = fontsize_h / pText->m_TextState.GetFontSize(); |
+ for (int ii = 0; ii < count * 2; ii++) { |
+ pPosArray[ii] *= ratio_h; |
+ } |
+ FX_FLOAT baseline = pText->m_PosY; |
+ CTextBaseLine* pBaseLine = NULL; |
+ FX_FLOAT topy = pText->m_Top; |
+ FX_FLOAT bottomy = pText->m_Bottom; |
+ FX_FLOAT leftx = pText->m_Left; |
+ int cc = 0; |
+ CFX_ByteString segment; |
+ int space_count = 0; |
+ FX_FLOAT last_left = 0, last_right = 0, segment_left = 0, segment_right = 0; |
+ for (int i = 0; i < pText->m_nChars; i++) { |
+ FX_DWORD charcode = pText->m_nChars == 1 |
+ ? (FX_DWORD)(uintptr_t)pText->m_pCharCodes |
+ : pText->m_pCharCodes[i]; |
+ if (charcode == (FX_DWORD)-1) { |
+ continue; |
+ } |
+ FX_FLOAT char_left = pPosArray[cc * 2]; |
+ FX_FLOAT char_right = pPosArray[cc * 2 + 1]; |
+ cc++; |
+ if (char_left < last_left || (char_left - last_right) > spacew / 2) { |
+ pBaseLine = InsertTextBox(pBaseLine, baseline, leftx + segment_left, |
+ leftx + segment_right, topy, bottomy, spacew, |
+ fontsize_v, segment, pFont); |
+ segment_left = char_left; |
+ segment = ""; |
+ } |
+ if (space_count > 1) { |
+ pBaseLine = InsertTextBox(pBaseLine, baseline, leftx + segment_left, |
+ leftx + segment_right, topy, bottomy, spacew, |
+ fontsize_v, segment, pFont); |
+ segment = ""; |
+ } else if (space_count == 1) { |
+ pFont->AppendChar(segment, ' '); |
+ } |
+ if (segment.GetLength() == 0) { |
+ segment_left = char_left; |
+ } |
+ segment_right = char_right; |
+ pFont->AppendChar(segment, charcode); |
+ space_count = 0; |
+ last_left = char_left; |
+ last_right = char_right; |
+ } |
+ if (segment.GetLength()) |
+ pBaseLine = InsertTextBox(pBaseLine, baseline, leftx + segment_left, |
+ leftx + segment_right, topy, bottomy, spacew, |
+ fontsize_v, segment, pFont); |
+ FX_Free(pPosArray); |
} |
-CTextBaseLine* CTextPage::InsertTextBox(CTextBaseLine* pBaseLine, FX_FLOAT basey, FX_FLOAT leftx, |
- FX_FLOAT rightx, FX_FLOAT topy, FX_FLOAT bottomy, FX_FLOAT spacew, FX_FLOAT fontsize_v, |
- CFX_ByteString& str, CPDF_Font* pFont) |
-{ |
- if (str.GetLength() == 0) { |
- return NULL; |
+CTextBaseLine* CTextPage::InsertTextBox(CTextBaseLine* pBaseLine, |
+ FX_FLOAT basey, |
+ FX_FLOAT leftx, |
+ FX_FLOAT rightx, |
+ FX_FLOAT topy, |
+ FX_FLOAT bottomy, |
+ FX_FLOAT spacew, |
+ FX_FLOAT fontsize_v, |
+ CFX_ByteString& str, |
+ CPDF_Font* pFont) { |
+ if (str.GetLength() == 0) { |
+ return NULL; |
+ } |
+ if (pBaseLine == NULL) { |
+ int i; |
+ for (i = 0; i < m_BaseLines.GetSize(); i++) { |
+ CTextBaseLine* pExistLine = (CTextBaseLine*)m_BaseLines.GetAt(i); |
+ if (pExistLine->m_BaseLine == basey) { |
+ pBaseLine = pExistLine; |
+ break; |
+ } |
+ if (pExistLine->m_BaseLine < basey) { |
+ break; |
+ } |
} |
if (pBaseLine == NULL) { |
- int i; |
- for (i = 0; i < m_BaseLines.GetSize(); i ++) { |
- CTextBaseLine* pExistLine = (CTextBaseLine*)m_BaseLines.GetAt(i); |
- if (pExistLine->m_BaseLine == basey) { |
- pBaseLine = pExistLine; |
- break; |
- } |
- if (pExistLine->m_BaseLine < basey) { |
- break; |
- } |
- } |
- if (pBaseLine == NULL) { |
- pBaseLine = new CTextBaseLine; |
- pBaseLine->m_BaseLine = basey; |
- m_BaseLines.InsertAt(i, pBaseLine); |
- } |
- } |
- CFX_WideString text; |
- const FX_CHAR* pStr = str; |
- int len = str.GetLength(), offset = 0; |
- while (offset < len) { |
- FX_DWORD ch = pFont->GetNextChar(pStr, len, offset); |
- CFX_WideString unicode_str = pFont->UnicodeFromCharCode(ch); |
- if (unicode_str.IsEmpty()) { |
- text += (FX_WCHAR)ch; |
- } |
- else { |
- text += unicode_str; |
- } |
+ pBaseLine = new CTextBaseLine; |
+ pBaseLine->m_BaseLine = basey; |
+ m_BaseLines.InsertAt(i, pBaseLine); |
+ } |
+ } |
+ CFX_WideString text; |
+ const FX_CHAR* pStr = str; |
+ int len = str.GetLength(), offset = 0; |
+ while (offset < len) { |
+ FX_DWORD ch = pFont->GetNextChar(pStr, len, offset); |
+ CFX_WideString unicode_str = pFont->UnicodeFromCharCode(ch); |
+ if (unicode_str.IsEmpty()) { |
+ text += (FX_WCHAR)ch; |
+ } else { |
+ text += unicode_str; |
} |
- pBaseLine->InsertTextBox(leftx, rightx, topy, bottomy, spacew, fontsize_v, text); |
- return pBaseLine; |
+ } |
+ pBaseLine->InsertTextBox(leftx, rightx, topy, bottomy, spacew, fontsize_v, |
+ text); |
+ return pBaseLine; |
} |
-void CTextPage::WriteOutput(CFX_WideStringArray& lines, int iMinWidth) |
-{ |
- FX_FLOAT lastheight = -1; |
- FX_FLOAT lastbaseline = -1; |
- FX_FLOAT MinLeftX = 1000000; |
- FX_FLOAT MaxRightX = 0; |
- int i; |
- for (i = 0; i < m_BaseLines.GetSize(); i ++) { |
- CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); |
- FX_FLOAT leftx, rightx; |
- if (pBaseLine->GetWidth(leftx, rightx)) { |
- if (leftx < MinLeftX) { |
- MinLeftX = leftx; |
- } |
- if (rightx > MaxRightX) { |
- MaxRightX = rightx; |
- } |
- } |
- } |
- for (i = 0; i < m_BaseLines.GetSize(); i ++) { |
- CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); |
- pBaseLine->MergeBoxes(); |
- } |
- for (i = 1; i < m_BaseLines.GetSize(); i ++) { |
- CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); |
- CTextBaseLine* pPrevLine = (CTextBaseLine*)m_BaseLines.GetAt(i - 1); |
- if (pBaseLine->CanMerge(pPrevLine)) { |
- pPrevLine->Merge(pBaseLine); |
- delete pBaseLine; |
- m_BaseLines.RemoveAt(i); |
- i --; |
- } |
- } |
- if (m_bAutoWidth) { |
- int* widths = FX_Alloc(int, m_BaseLines.GetSize()); |
- for (i = 0; i < m_BaseLines.GetSize(); i ++) { |
- widths[i] = 0; |
- CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); |
- int TotalChars = 0; |
- FX_FLOAT TotalWidth = 0; |
- int minchars; |
- pBaseLine->CountChars(TotalChars, TotalWidth, minchars); |
- if (TotalChars) { |
- FX_FLOAT charwidth = TotalWidth / TotalChars; |
- widths[i] = (int)((MaxRightX - MinLeftX) / charwidth); |
- } |
- if (widths[i] > 1000) { |
- widths[i] = 1000; |
- } |
- if (widths[i] < minchars) { |
- widths[i] = minchars; |
- } |
- } |
- int AvgWidth = 0, widthcount = 0; |
- for (i = 0; i < m_BaseLines.GetSize(); i ++) |
- if (widths[i]) { |
- AvgWidth += widths[i]; |
- widthcount ++; |
- } |
- AvgWidth = int((FX_FLOAT)AvgWidth / widthcount + 0.5); |
- int MaxWidth = 0; |
- for (i = 0; i < m_BaseLines.GetSize(); i ++) |
- if (MaxWidth < widths[i]) { |
- MaxWidth = widths[i]; |
- } |
- if (MaxWidth > AvgWidth * 6 / 5) { |
- MaxWidth = AvgWidth * 6 / 5; |
- } |
- FX_Free(widths); |
- if (iMinWidth < MaxWidth) { |
- iMinWidth = MaxWidth; |
- } |
- } |
- for (i = 0; i < m_BaseLines.GetSize(); i ++) { |
- CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); |
- pBaseLine->MergeBoxes(); |
- } |
- if (m_bKeepColumn) { |
- FindColumns(); |
- } |
- for (i = 0; i < m_BaseLines.GetSize(); i ++) { |
- CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); |
- if (lastheight >= 0) { |
- FX_FLOAT dy = lastbaseline - pBaseLine->m_BaseLine; |
- if (dy >= (pBaseLine->m_MaxFontSizeV) * 1.5 || dy >= lastheight * 1.5) { |
- lines.Add(L""); |
- } |
- } |
- lastheight = pBaseLine->m_MaxFontSizeV; |
- lastbaseline = pBaseLine->m_BaseLine; |
- CFX_WideString str; |
- pBaseLine->WriteOutput(str, MinLeftX, MaxRightX - MinLeftX, iMinWidth); |
- lines.Add(str); |
- } |
+void CTextPage::WriteOutput(CFX_WideStringArray& lines, int iMinWidth) { |
+ FX_FLOAT lastheight = -1; |
+ FX_FLOAT lastbaseline = -1; |
+ FX_FLOAT MinLeftX = 1000000; |
+ FX_FLOAT MaxRightX = 0; |
+ int i; |
+ for (i = 0; i < m_BaseLines.GetSize(); i++) { |
+ CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); |
+ FX_FLOAT leftx, rightx; |
+ if (pBaseLine->GetWidth(leftx, rightx)) { |
+ if (leftx < MinLeftX) { |
+ MinLeftX = leftx; |
+ } |
+ if (rightx > MaxRightX) { |
+ MaxRightX = rightx; |
+ } |
+ } |
+ } |
+ for (i = 0; i < m_BaseLines.GetSize(); i++) { |
+ CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); |
+ pBaseLine->MergeBoxes(); |
+ } |
+ for (i = 1; i < m_BaseLines.GetSize(); i++) { |
+ CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); |
+ CTextBaseLine* pPrevLine = (CTextBaseLine*)m_BaseLines.GetAt(i - 1); |
+ if (pBaseLine->CanMerge(pPrevLine)) { |
+ pPrevLine->Merge(pBaseLine); |
+ delete pBaseLine; |
+ m_BaseLines.RemoveAt(i); |
+ i--; |
+ } |
+ } |
+ if (m_bAutoWidth) { |
+ int* widths = FX_Alloc(int, m_BaseLines.GetSize()); |
+ for (i = 0; i < m_BaseLines.GetSize(); i++) { |
+ widths[i] = 0; |
+ CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); |
+ int TotalChars = 0; |
+ FX_FLOAT TotalWidth = 0; |
+ int minchars; |
+ pBaseLine->CountChars(TotalChars, TotalWidth, minchars); |
+ if (TotalChars) { |
+ FX_FLOAT charwidth = TotalWidth / TotalChars; |
+ widths[i] = (int)((MaxRightX - MinLeftX) / charwidth); |
+ } |
+ if (widths[i] > 1000) { |
+ widths[i] = 1000; |
+ } |
+ if (widths[i] < minchars) { |
+ widths[i] = minchars; |
+ } |
+ } |
+ int AvgWidth = 0, widthcount = 0; |
+ for (i = 0; i < m_BaseLines.GetSize(); i++) |
+ if (widths[i]) { |
+ AvgWidth += widths[i]; |
+ widthcount++; |
+ } |
+ AvgWidth = int((FX_FLOAT)AvgWidth / widthcount + 0.5); |
+ int MaxWidth = 0; |
+ for (i = 0; i < m_BaseLines.GetSize(); i++) |
+ if (MaxWidth < widths[i]) { |
+ MaxWidth = widths[i]; |
+ } |
+ if (MaxWidth > AvgWidth * 6 / 5) { |
+ MaxWidth = AvgWidth * 6 / 5; |
+ } |
+ FX_Free(widths); |
+ if (iMinWidth < MaxWidth) { |
+ iMinWidth = MaxWidth; |
+ } |
+ } |
+ for (i = 0; i < m_BaseLines.GetSize(); i++) { |
+ CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); |
+ pBaseLine->MergeBoxes(); |
+ } |
+ if (m_bKeepColumn) { |
+ FindColumns(); |
+ } |
+ for (i = 0; i < m_BaseLines.GetSize(); i++) { |
+ CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); |
+ if (lastheight >= 0) { |
+ FX_FLOAT dy = lastbaseline - pBaseLine->m_BaseLine; |
+ if (dy >= (pBaseLine->m_MaxFontSizeV) * 1.5 || dy >= lastheight * 1.5) { |
+ lines.Add(L""); |
+ } |
+ } |
+ lastheight = pBaseLine->m_MaxFontSizeV; |
+ lastbaseline = pBaseLine->m_BaseLine; |
+ CFX_WideString str; |
+ pBaseLine->WriteOutput(str, MinLeftX, MaxRightX - MinLeftX, iMinWidth); |
+ lines.Add(str); |
+ } |
} |
-void NormalizeCompositeChar(FX_WCHAR wChar, CFX_WideString& sDest) |
-{ |
- wChar = FX_GetMirrorChar(wChar, TRUE, FALSE); |
- FX_WCHAR* pDst = NULL; |
- FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst); |
- if (nCount < 1 ) { |
- sDest += wChar; |
- return; |
- } |
- pDst = new FX_WCHAR[nCount]; |
- FX_Unicode_GetNormalization(wChar, pDst); |
- for (int nIndex = 0; nIndex < nCount; nIndex++) { |
- sDest += pDst[nIndex]; |
- } |
- delete[] pDst; |
+void NormalizeCompositeChar(FX_WCHAR wChar, CFX_WideString& sDest) { |
+ wChar = FX_GetMirrorChar(wChar, TRUE, FALSE); |
+ FX_WCHAR* pDst = NULL; |
+ FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst); |
+ if (nCount < 1) { |
+ sDest += wChar; |
+ return; |
+ } |
+ pDst = new FX_WCHAR[nCount]; |
+ FX_Unicode_GetNormalization(wChar, pDst); |
+ for (int nIndex = 0; nIndex < nCount; nIndex++) { |
+ sDest += pDst[nIndex]; |
+ } |
+ delete[] pDst; |
} |
-void NormalizeString(CFX_WideString& str) |
-{ |
- if (str.GetLength() <= 0) { |
- return; |
- } |
- CFX_WideString sBuffer; |
- nonstd::unique_ptr<IFX_BidiChar> pBidiChar(IFX_BidiChar::Create()); |
- CFX_WordArray order; |
- FX_BOOL bR2L = FALSE; |
- int32_t start = 0, count = 0, i = 0; |
- int nR2L = 0, nL2R = 0; |
- for (i = 0; i < str.GetLength(); i++) { |
- if(pBidiChar->AppendChar(str.GetAt(i))) { |
- int32_t ret = pBidiChar->GetBidiInfo(start, count); |
- order.Add(start); |
- order.Add(count); |
- order.Add(ret); |
- if(!bR2L) { |
- if(ret == 2) { |
- nR2L++; |
- } else if (ret == 1) { |
- nL2R++; |
- } |
- } |
- } |
- } |
- if(pBidiChar->EndChar()) { |
- int32_t ret = pBidiChar->GetBidiInfo(start, count); |
- order.Add(start); |
- order.Add(count); |
- order.Add(ret); |
- if(!bR2L) { |
- if(ret == 2) { |
- nR2L++; |
- } else if(ret == 1) { |
- nL2R++; |
- } |
- } |
- } |
- if(nR2L > 0 && nR2L >= nL2R) { |
- bR2L = TRUE; |
- } |
- if(bR2L) { |
- int count = order.GetSize(); |
- for(int j = count - 1; j > 0; j -= 3) { |
- int ret = order.GetAt(j); |
- int start = order.GetAt(j - 2); |
- int count1 = order.GetAt(j - 1); |
- if(ret == 2 || ret == 0) { |
- for(int i = start + count1 - 1; i >= start; i--) { |
- NormalizeCompositeChar(str[i], sBuffer); |
- } |
- } else { |
- i = j; |
- FX_BOOL bSymbol = FALSE; |
- while(i > 0 && order.GetAt(i) != 2) { |
- bSymbol = !order.GetAt(i); |
- i -= 3; |
- } |
- int end = start + count1 ; |
- int n = 0; |
- if(bSymbol) { |
- n = i + 6; |
- } else { |
- n = i + 3; |
- } |
- if(n >= j) { |
- for(int m = start; m < end; m++) { |
- sBuffer += str[m]; |
- } |
- } else { |
- i = j; |
- j = n; |
- for(; n <= i; n += 3) { |
- int start = order.GetAt(n - 2); |
- int count1 = order.GetAt(n - 1); |
- int end = start + count1 ; |
- for(int m = start; m < end; m++) { |
- sBuffer += str[m]; |
- } |
- } |
- } |
- } |
+void NormalizeString(CFX_WideString& str) { |
+ if (str.GetLength() <= 0) { |
+ return; |
+ } |
+ CFX_WideString sBuffer; |
+ nonstd::unique_ptr<IFX_BidiChar> pBidiChar(IFX_BidiChar::Create()); |
+ CFX_WordArray order; |
+ FX_BOOL bR2L = FALSE; |
+ int32_t start = 0, count = 0, i = 0; |
+ int nR2L = 0, nL2R = 0; |
+ for (i = 0; i < str.GetLength(); i++) { |
+ if (pBidiChar->AppendChar(str.GetAt(i))) { |
+ int32_t ret = pBidiChar->GetBidiInfo(start, count); |
+ order.Add(start); |
+ order.Add(count); |
+ order.Add(ret); |
+ if (!bR2L) { |
+ if (ret == 2) { |
+ nR2L++; |
+ } else if (ret == 1) { |
+ nL2R++; |
+ } |
+ } |
+ } |
+ } |
+ if (pBidiChar->EndChar()) { |
+ int32_t ret = pBidiChar->GetBidiInfo(start, count); |
+ order.Add(start); |
+ order.Add(count); |
+ order.Add(ret); |
+ if (!bR2L) { |
+ if (ret == 2) { |
+ nR2L++; |
+ } else if (ret == 1) { |
+ nL2R++; |
+ } |
+ } |
+ } |
+ if (nR2L > 0 && nR2L >= nL2R) { |
+ bR2L = TRUE; |
+ } |
+ if (bR2L) { |
+ int count = order.GetSize(); |
+ for (int j = count - 1; j > 0; j -= 3) { |
+ int ret = order.GetAt(j); |
+ int start = order.GetAt(j - 2); |
+ int count1 = order.GetAt(j - 1); |
+ if (ret == 2 || ret == 0) { |
+ for (int i = start + count1 - 1; i >= start; i--) { |
+ NormalizeCompositeChar(str[i], sBuffer); |
+ } |
+ } else { |
+ i = j; |
+ FX_BOOL bSymbol = FALSE; |
+ while (i > 0 && order.GetAt(i) != 2) { |
+ bSymbol = !order.GetAt(i); |
+ i -= 3; |
+ } |
+ int end = start + count1; |
+ int n = 0; |
+ if (bSymbol) { |
+ n = i + 6; |
+ } else { |
+ n = i + 3; |
} |
- } else { |
- int count = order.GetSize(); |
- FX_BOOL bL2R = FALSE; |
- for(int j = 0; j < count; j += 3) { |
- int ret = order.GetAt(j + 2); |
- int start = order.GetAt(j); |
- int count1 = order.GetAt(j + 1); |
- if(ret == 2 || (j == 0 && ret == 0 && !bL2R)) { |
- int i = j + 3; |
- while(bR2L && i < count) { |
- if(order.GetAt(i + 2) == 1) { |
- break; |
- } else { |
- i += 3; |
- } |
- } |
- if(i == 3) { |
- j = -3; |
- bL2R = TRUE; |
- continue; |
- } |
- int end = str.GetLength() - 1; |
- if(i < count) { |
- end = order.GetAt(i) - 1; |
- } |
- j = i - 3; |
- for(int n = end; n >= start; n--) { |
- NormalizeCompositeChar(str[i], sBuffer); |
- } |
- } else { |
- int end = start + count1 ; |
- for(int i = start; i < end; i++) { |
- sBuffer += str[i]; |
- } |
+ if (n >= j) { |
+ for (int m = start; m < end; m++) { |
+ sBuffer += str[m]; |
+ } |
+ } else { |
+ i = j; |
+ j = n; |
+ for (; n <= i; n += 3) { |
+ int start = order.GetAt(n - 2); |
+ int count1 = order.GetAt(n - 1); |
+ int end = start + count1; |
+ for (int m = start; m < end; m++) { |
+ sBuffer += str[m]; |
} |
+ } |
+ } |
+ } |
+ } |
+ } else { |
+ int count = order.GetSize(); |
+ FX_BOOL bL2R = FALSE; |
+ for (int j = 0; j < count; j += 3) { |
+ int ret = order.GetAt(j + 2); |
+ int start = order.GetAt(j); |
+ int count1 = order.GetAt(j + 1); |
+ if (ret == 2 || (j == 0 && ret == 0 && !bL2R)) { |
+ int i = j + 3; |
+ while (bR2L && i < count) { |
+ if (order.GetAt(i + 2) == 1) { |
+ break; |
+ } else { |
+ i += 3; |
+ } |
} |
- } |
- str.Empty(); |
- str += sBuffer; |
-} |
-static FX_BOOL IsNumber(CFX_WideString& str) |
-{ |
- for (int i = 0; i < str.GetLength(); i ++) { |
- FX_WCHAR ch = str[i]; |
- if ((ch < '0' || ch > '9') && ch != '-' && ch != '+' && ch != '.' && ch != ' ') { |
- return FALSE; |
+ if (i == 3) { |
+ j = -3; |
+ bL2R = TRUE; |
+ continue; |
} |
- } |
- return TRUE; |
-} |
-void CTextPage::FindColumns() |
-{ |
- int i; |
- for (i = 0; i < m_BaseLines.GetSize(); i ++) { |
- CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); |
- for (int j = 0; j < pBaseLine->m_TextList.GetSize(); j ++) { |
- CTextBox* pTextBox = (CTextBox*)pBaseLine->m_TextList.GetAt(j); |
- CTextColumn* pColumn = FindColumn(pTextBox->m_Right); |
- if (pColumn == NULL) { |
- pColumn = new CTextColumn; |
- pColumn->m_Count = 1; |
- pColumn->m_AvgPos = pTextBox->m_Right; |
- pColumn->m_TextPos = -1; |
- m_TextColumns.Add(pColumn); |
- } else { |
- pColumn->m_AvgPos = (pColumn->m_Count * pColumn->m_AvgPos + pTextBox->m_Right) / |
- (pColumn->m_Count + 1); |
- pColumn->m_Count ++; |
- } |
+ int end = str.GetLength() - 1; |
+ if (i < count) { |
+ end = order.GetAt(i) - 1; |
} |
- } |
- int mincount = m_BaseLines.GetSize() / 4; |
- for (i = 0; i < m_TextColumns.GetSize(); i ++) { |
- CTextColumn* pTextColumn = (CTextColumn*)m_TextColumns.GetAt(i); |
- if (pTextColumn->m_Count >= mincount) { |
- continue; |
+ j = i - 3; |
+ for (int n = end; n >= start; n--) { |
+ NormalizeCompositeChar(str[i], sBuffer); |
} |
- delete pTextColumn; |
- m_TextColumns.RemoveAt(i); |
- i --; |
- } |
- for (i = 0; i < m_BaseLines.GetSize(); i ++) { |
- CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); |
- for (int j = 0; j < pBaseLine->m_TextList.GetSize(); j ++) { |
- CTextBox* pTextBox = (CTextBox*)pBaseLine->m_TextList.GetAt(j); |
- if (IsNumber(pTextBox->m_Text)) { |
- pTextBox->m_pColumn = FindColumn(pTextBox->m_Right); |
- } |
+ } else { |
+ int end = start + count1; |
+ for (int i = start; i < end; i++) { |
+ sBuffer += str[i]; |
} |
+ } |
} |
+ } |
+ str.Empty(); |
+ str += sBuffer; |
} |
-CTextColumn* CTextPage::FindColumn(FX_FLOAT xpos) |
-{ |
- for (int i = 0; i < m_TextColumns.GetSize(); i ++) { |
- CTextColumn* pColumn = (CTextColumn*)m_TextColumns.GetAt(i); |
- if (pColumn->m_AvgPos < xpos + 1 && pColumn->m_AvgPos > xpos - 1) { |
- return pColumn; |
- } |
- } |
- return NULL; |
+static FX_BOOL IsNumber(CFX_WideString& str) { |
+ for (int i = 0; i < str.GetLength(); i++) { |
+ FX_WCHAR ch = str[i]; |
+ if ((ch < '0' || ch > '9') && ch != '-' && ch != '+' && ch != '.' && |
+ ch != ' ') { |
+ return FALSE; |
+ } |
+ } |
+ return TRUE; |
} |
-void CTextPage::BreakSpace(CPDF_TextObject* pTextObj) |
-{ |
+void CTextPage::FindColumns() { |
+ int i; |
+ for (i = 0; i < m_BaseLines.GetSize(); i++) { |
+ CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); |
+ for (int j = 0; j < pBaseLine->m_TextList.GetSize(); j++) { |
+ CTextBox* pTextBox = (CTextBox*)pBaseLine->m_TextList.GetAt(j); |
+ CTextColumn* pColumn = FindColumn(pTextBox->m_Right); |
+ if (pColumn == NULL) { |
+ pColumn = new CTextColumn; |
+ pColumn->m_Count = 1; |
+ pColumn->m_AvgPos = pTextBox->m_Right; |
+ pColumn->m_TextPos = -1; |
+ m_TextColumns.Add(pColumn); |
+ } else { |
+ pColumn->m_AvgPos = |
+ (pColumn->m_Count * pColumn->m_AvgPos + pTextBox->m_Right) / |
+ (pColumn->m_Count + 1); |
+ pColumn->m_Count++; |
+ } |
+ } |
+ } |
+ int mincount = m_BaseLines.GetSize() / 4; |
+ for (i = 0; i < m_TextColumns.GetSize(); i++) { |
+ CTextColumn* pTextColumn = (CTextColumn*)m_TextColumns.GetAt(i); |
+ if (pTextColumn->m_Count >= mincount) { |
+ continue; |
+ } |
+ delete pTextColumn; |
+ m_TextColumns.RemoveAt(i); |
+ i--; |
+ } |
+ for (i = 0; i < m_BaseLines.GetSize(); i++) { |
+ CTextBaseLine* pBaseLine = (CTextBaseLine*)m_BaseLines.GetAt(i); |
+ for (int j = 0; j < pBaseLine->m_TextList.GetSize(); j++) { |
+ CTextBox* pTextBox = (CTextBox*)pBaseLine->m_TextList.GetAt(j); |
+ if (IsNumber(pTextBox->m_Text)) { |
+ pTextBox->m_pColumn = FindColumn(pTextBox->m_Right); |
+ } |
+ } |
+ } |
} |
-CTextBaseLine::CTextBaseLine() |
-{ |
- m_Top = -100000; |
- m_Bottom = 100000; |
- m_MaxFontSizeV = 0; |
+CTextColumn* CTextPage::FindColumn(FX_FLOAT xpos) { |
+ for (int i = 0; i < m_TextColumns.GetSize(); i++) { |
+ CTextColumn* pColumn = (CTextColumn*)m_TextColumns.GetAt(i); |
+ if (pColumn->m_AvgPos < xpos + 1 && pColumn->m_AvgPos > xpos - 1) { |
+ return pColumn; |
+ } |
+ } |
+ return NULL; |
} |
-CTextBaseLine::~CTextBaseLine() |
-{ |
- for (int i = 0; i < m_TextList.GetSize(); i ++) { |
- CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); |
- delete pText; |
- } |
+void CTextPage::BreakSpace(CPDF_TextObject* pTextObj) {} |
+CTextBaseLine::CTextBaseLine() { |
+ m_Top = -100000; |
+ m_Bottom = 100000; |
+ m_MaxFontSizeV = 0; |
} |
-void CTextBaseLine::InsertTextBox(FX_FLOAT leftx, FX_FLOAT rightx, FX_FLOAT topy, FX_FLOAT bottomy, |
- FX_FLOAT spacew, FX_FLOAT fontsize_v, const CFX_WideString& text) |
-{ |
- if (m_Top < topy) { |
- m_Top = topy; |
- } |
- if (m_Bottom > bottomy) { |
- m_Bottom = bottomy; |
- } |
- if (m_MaxFontSizeV < fontsize_v) { |
- m_MaxFontSizeV = fontsize_v; |
- } |
- int i; |
- for (i = 0; i < m_TextList.GetSize(); i ++) { |
- CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); |
- if (pText->m_Left > leftx) { |
- break; |
- } |
- } |
- CTextBox* pText = new CTextBox; |
- pText->m_Text = text; |
- pText->m_Left = leftx; |
- pText->m_Right = rightx; |
- pText->m_Top = topy; |
- pText->m_Bottom = bottomy; |
- pText->m_SpaceWidth = spacew; |
- pText->m_FontSizeV = fontsize_v; |
- pText->m_pColumn = NULL; |
- m_TextList.InsertAt(i, pText); |
+CTextBaseLine::~CTextBaseLine() { |
+ for (int i = 0; i < m_TextList.GetSize(); i++) { |
+ CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); |
+ delete pText; |
+ } |
} |
-FX_BOOL GetIntersection(FX_FLOAT low1, FX_FLOAT high1, FX_FLOAT low2, FX_FLOAT high2, |
- FX_FLOAT& interlow, FX_FLOAT& interhigh); |
-FX_BOOL CTextBaseLine::CanMerge(CTextBaseLine* pOther) |
-{ |
- FX_FLOAT inter_top, inter_bottom; |
- if (!GetIntersection(m_Bottom, m_Top, pOther->m_Bottom, pOther->m_Top, |
- inter_bottom, inter_top)) { |
- return FALSE; |
- } |
- FX_FLOAT inter_h = inter_top - inter_bottom; |
- if (inter_h < (m_Top - m_Bottom) / 2 && inter_h < (pOther->m_Top - pOther->m_Bottom) / 2) { |
+void CTextBaseLine::InsertTextBox(FX_FLOAT leftx, |
+ FX_FLOAT rightx, |
+ FX_FLOAT topy, |
+ FX_FLOAT bottomy, |
+ FX_FLOAT spacew, |
+ FX_FLOAT fontsize_v, |
+ const CFX_WideString& text) { |
+ if (m_Top < topy) { |
+ m_Top = topy; |
+ } |
+ if (m_Bottom > bottomy) { |
+ m_Bottom = bottomy; |
+ } |
+ if (m_MaxFontSizeV < fontsize_v) { |
+ m_MaxFontSizeV = fontsize_v; |
+ } |
+ int i; |
+ for (i = 0; i < m_TextList.GetSize(); i++) { |
+ CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); |
+ if (pText->m_Left > leftx) { |
+ break; |
+ } |
+ } |
+ CTextBox* pText = new CTextBox; |
+ pText->m_Text = text; |
+ pText->m_Left = leftx; |
+ pText->m_Right = rightx; |
+ pText->m_Top = topy; |
+ pText->m_Bottom = bottomy; |
+ pText->m_SpaceWidth = spacew; |
+ pText->m_FontSizeV = fontsize_v; |
+ pText->m_pColumn = NULL; |
+ m_TextList.InsertAt(i, pText); |
+} |
+FX_BOOL GetIntersection(FX_FLOAT low1, |
+ FX_FLOAT high1, |
+ FX_FLOAT low2, |
+ FX_FLOAT high2, |
+ FX_FLOAT& interlow, |
+ FX_FLOAT& interhigh); |
+FX_BOOL CTextBaseLine::CanMerge(CTextBaseLine* pOther) { |
+ FX_FLOAT inter_top, inter_bottom; |
+ if (!GetIntersection(m_Bottom, m_Top, pOther->m_Bottom, pOther->m_Top, |
+ inter_bottom, inter_top)) { |
+ return FALSE; |
+ } |
+ FX_FLOAT inter_h = inter_top - inter_bottom; |
+ if (inter_h < (m_Top - m_Bottom) / 2 && |
+ inter_h < (pOther->m_Top - pOther->m_Bottom) / 2) { |
+ return FALSE; |
+ } |
+ FX_FLOAT dy = (FX_FLOAT)FXSYS_fabs(m_BaseLine - pOther->m_BaseLine); |
+ for (int i = 0; i < m_TextList.GetSize(); i++) { |
+ CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); |
+ for (int j = 0; j < pOther->m_TextList.GetSize(); j++) { |
+ CTextBox* pOtherText = (CTextBox*)pOther->m_TextList.GetAt(j); |
+ FX_FLOAT inter_left, inter_right; |
+ if (!GetIntersection(pText->m_Left, pText->m_Right, pOtherText->m_Left, |
+ pOtherText->m_Right, inter_left, inter_right)) { |
+ continue; |
+ } |
+ FX_FLOAT inter_w = inter_right - inter_left; |
+ if (inter_w < pText->m_SpaceWidth / 2 && |
+ inter_w < pOtherText->m_SpaceWidth / 2) { |
+ continue; |
+ } |
+ if (dy >= (pText->m_Bottom - pText->m_Top) / 2 || |
+ dy >= (pOtherText->m_Bottom - pOtherText->m_Top) / 2) { |
return FALSE; |
+ } |
} |
- FX_FLOAT dy = (FX_FLOAT)FXSYS_fabs(m_BaseLine - pOther->m_BaseLine); |
- for (int i = 0; i < m_TextList.GetSize(); i ++) { |
- CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); |
- for (int j = 0; j < pOther->m_TextList.GetSize(); j ++) { |
- CTextBox* pOtherText = (CTextBox*)pOther->m_TextList.GetAt(j); |
- FX_FLOAT inter_left, inter_right; |
- if (!GetIntersection(pText->m_Left, pText->m_Right, |
- pOtherText->m_Left, pOtherText->m_Right, inter_left, inter_right)) { |
- continue; |
- } |
- FX_FLOAT inter_w = inter_right - inter_left; |
- if (inter_w < pText->m_SpaceWidth / 2 && inter_w < pOtherText->m_SpaceWidth / 2) { |
- continue; |
- } |
- if (dy >= (pText->m_Bottom - pText->m_Top) / 2 || |
- dy >= (pOtherText->m_Bottom - pOtherText->m_Top) / 2) { |
- return FALSE; |
- } |
- } |
- } |
- return TRUE; |
+ } |
+ return TRUE; |
} |
-void CTextBaseLine::Merge(CTextBaseLine* pOther) |
-{ |
- for (int i = 0; i < pOther->m_TextList.GetSize(); i ++) { |
- CTextBox* pText = (CTextBox*)pOther->m_TextList.GetAt(i); |
- InsertTextBox(pText->m_Left, pText->m_Right, pText->m_Top, pText->m_Bottom, |
- pText->m_SpaceWidth, pText->m_FontSizeV, pText->m_Text); |
- } |
+void CTextBaseLine::Merge(CTextBaseLine* pOther) { |
+ for (int i = 0; i < pOther->m_TextList.GetSize(); i++) { |
+ CTextBox* pText = (CTextBox*)pOther->m_TextList.GetAt(i); |
+ InsertTextBox(pText->m_Left, pText->m_Right, pText->m_Top, pText->m_Bottom, |
+ pText->m_SpaceWidth, pText->m_FontSizeV, pText->m_Text); |
+ } |
} |
-FX_BOOL CTextBaseLine::GetWidth(FX_FLOAT& leftx, FX_FLOAT& rightx) |
-{ |
- int i; |
- for (i = 0; i < m_TextList.GetSize(); i ++) { |
- CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); |
- if (pText->m_Text != L" ") { |
- break; |
- } |
- } |
- if (i == m_TextList.GetSize()) { |
- return FALSE; |
- } |
+FX_BOOL CTextBaseLine::GetWidth(FX_FLOAT& leftx, FX_FLOAT& rightx) { |
+ int i; |
+ for (i = 0; i < m_TextList.GetSize(); i++) { |
CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); |
- leftx = pText->m_Left; |
- for (i = m_TextList.GetSize() - 1; i >= 0; i --) { |
- CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); |
- if (pText->m_Text != L" ") { |
- break; |
- } |
+ if (pText->m_Text != L" ") { |
+ break; |
+ } |
+ } |
+ if (i == m_TextList.GetSize()) { |
+ return FALSE; |
+ } |
+ CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); |
+ leftx = pText->m_Left; |
+ for (i = m_TextList.GetSize() - 1; i >= 0; i--) { |
+ CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); |
+ if (pText->m_Text != L" ") { |
+ break; |
} |
- pText = (CTextBox*)m_TextList.GetAt(i); |
- rightx = pText->m_Right; |
- return TRUE; |
+ } |
+ pText = (CTextBox*)m_TextList.GetAt(i); |
+ rightx = pText->m_Right; |
+ return TRUE; |
} |
-void CTextBaseLine::MergeBoxes() |
-{ |
- int i = 0; |
- while (1) { |
- if (i >= m_TextList.GetSize() - 1) { |
- break; |
- } |
- CTextBox* pThisText = (CTextBox*)m_TextList.GetAt(i); |
- CTextBox* pNextText = (CTextBox*)m_TextList.GetAt(i + 1); |
- FX_FLOAT dx = pNextText->m_Left - pThisText->m_Right; |
- FX_FLOAT spacew = (pThisText->m_SpaceWidth == 0.0) ? |
- pNextText->m_SpaceWidth : pThisText->m_SpaceWidth; |
- if (spacew > 0.0 && dx < spacew * 2) { |
- pThisText->m_Right = pNextText->m_Right; |
- if (dx > spacew * 1.5) { |
- pThisText->m_Text += L" "; |
- } else if (dx > spacew / 3) { |
- pThisText->m_Text += L' '; |
- } |
- pThisText->m_Text += pNextText->m_Text; |
- pThisText->m_SpaceWidth = pNextText->m_SpaceWidth == 0.0 ? |
- spacew : pNextText->m_SpaceWidth; |
- m_TextList.RemoveAt(i + 1); |
- delete pNextText; |
- } else { |
- i ++; |
- } |
+void CTextBaseLine::MergeBoxes() { |
+ int i = 0; |
+ while (1) { |
+ if (i >= m_TextList.GetSize() - 1) { |
+ break; |
+ } |
+ CTextBox* pThisText = (CTextBox*)m_TextList.GetAt(i); |
+ CTextBox* pNextText = (CTextBox*)m_TextList.GetAt(i + 1); |
+ FX_FLOAT dx = pNextText->m_Left - pThisText->m_Right; |
+ FX_FLOAT spacew = (pThisText->m_SpaceWidth == 0.0) |
+ ? pNextText->m_SpaceWidth |
+ : pThisText->m_SpaceWidth; |
+ if (spacew > 0.0 && dx < spacew * 2) { |
+ pThisText->m_Right = pNextText->m_Right; |
+ if (dx > spacew * 1.5) { |
+ pThisText->m_Text += L" "; |
+ } else if (dx > spacew / 3) { |
+ pThisText->m_Text += L' '; |
+ } |
+ pThisText->m_Text += pNextText->m_Text; |
+ pThisText->m_SpaceWidth = |
+ pNextText->m_SpaceWidth == 0.0 ? spacew : pNextText->m_SpaceWidth; |
+ m_TextList.RemoveAt(i + 1); |
+ delete pNextText; |
+ } else { |
+ i++; |
} |
+ } |
} |
-void CTextBaseLine::WriteOutput(CFX_WideString& str, FX_FLOAT leftx, FX_FLOAT pagewidth, |
- int iTextWidth) |
-{ |
- int lastpos = -1; |
- for (int i = 0; i < m_TextList.GetSize(); i ++) { |
- CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); |
- int xpos; |
- if (pText->m_pColumn) { |
- xpos = (int)((pText->m_pColumn->m_AvgPos - leftx) * iTextWidth / pagewidth + 0.5); |
- xpos -= pText->m_Text.GetLength(); |
- } else { |
- xpos = (int)((pText->m_Left - leftx) * iTextWidth / pagewidth + 0.5); |
- } |
- if (xpos <= lastpos) { |
- xpos = lastpos + 1; |
- } |
- for (int j = lastpos + 1; j < xpos; j ++) { |
- str += ' '; |
- } |
- CFX_WideString sSrc(pText->m_Text); |
- NormalizeString(sSrc); |
- str += sSrc; |
- str += ' '; |
- lastpos = xpos + pText->m_Text.GetLength(); |
+void CTextBaseLine::WriteOutput(CFX_WideString& str, |
+ FX_FLOAT leftx, |
+ FX_FLOAT pagewidth, |
+ int iTextWidth) { |
+ int lastpos = -1; |
+ for (int i = 0; i < m_TextList.GetSize(); i++) { |
+ CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); |
+ int xpos; |
+ if (pText->m_pColumn) { |
+ xpos = |
+ (int)((pText->m_pColumn->m_AvgPos - leftx) * iTextWidth / pagewidth + |
+ 0.5); |
+ xpos -= pText->m_Text.GetLength(); |
+ } else { |
+ xpos = (int)((pText->m_Left - leftx) * iTextWidth / pagewidth + 0.5); |
+ } |
+ if (xpos <= lastpos) { |
+ xpos = lastpos + 1; |
} |
+ for (int j = lastpos + 1; j < xpos; j++) { |
+ str += ' '; |
+ } |
+ CFX_WideString sSrc(pText->m_Text); |
+ NormalizeString(sSrc); |
+ str += sSrc; |
+ str += ' '; |
+ lastpos = xpos + pText->m_Text.GetLength(); |
+ } |
} |
-void CTextBaseLine::CountChars(int& count, FX_FLOAT& width, int& minchars) |
-{ |
- minchars = 0; |
- for (int i = 0; i < m_TextList.GetSize(); i ++) { |
- CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); |
- if (pText->m_Right - pText->m_Left < 0.002) { |
- continue; |
- } |
- count += pText->m_Text.GetLength(); |
- width += pText->m_Right - pText->m_Left; |
- minchars += pText->m_Text.GetLength() + 1; |
+void CTextBaseLine::CountChars(int& count, FX_FLOAT& width, int& minchars) { |
+ minchars = 0; |
+ for (int i = 0; i < m_TextList.GetSize(); i++) { |
+ CTextBox* pText = (CTextBox*)m_TextList.GetAt(i); |
+ if (pText->m_Right - pText->m_Left < 0.002) { |
+ continue; |
} |
+ count += pText->m_Text.GetLength(); |
+ width += pText->m_Right - pText->m_Left; |
+ minchars += pText->m_Text.GetLength() + 1; |
+ } |
} |
#define PI 3.1415926535897932384626433832795 |
-static void CheckRotate(CPDF_Page& page, CFX_FloatRect& page_bbox) |
-{ |
- int total_count = 0, rotated_count[3] = {0, 0, 0}; |
- FX_POSITION pos = page.GetFirstObjectPosition(); |
- while (pos) { |
- CPDF_PageObject* pObj = page.GetNextObject(pos); |
- if (pObj->m_Type != PDFPAGE_TEXT) { |
- continue; |
- } |
- total_count ++; |
- CPDF_TextObject* pText = (CPDF_TextObject*)pObj; |
- FX_FLOAT angle = pText->m_TextState.GetBaselineAngle(); |
- if (angle == 0.0) { |
- continue; |
- } |
- int degree = (int)(angle * 180 / PI + 0.5); |
- if (degree % 90) { |
- continue; |
- } |
- if (degree < 0) { |
- degree += 360; |
- } |
- int index = degree / 90 % 3 - 1; |
- if (index < 0) { |
- continue; |
- } |
- rotated_count[index] ++; |
- } |
- if (total_count == 0) { |
- return; |
- } |
- CFX_AffineMatrix matrix; |
- if (rotated_count[0] > total_count * 2 / 3) { |
- matrix.Set(0, -1, 1, 0, 0, page.GetPageHeight()); |
- } else if (rotated_count[1] > total_count * 2 / 3) { |
- matrix.Set(-1, 0, 0, -1, page.GetPageWidth(), page.GetPageHeight()); |
- } else if (rotated_count[2] > total_count * 2 / 3) { |
- matrix.Set(0, 1, -1, 0, page.GetPageWidth(), 0); |
- } else { |
- return; |
- } |
- page.Transform(matrix); |
- page_bbox.Transform(&matrix); |
+static void CheckRotate(CPDF_Page& page, CFX_FloatRect& page_bbox) { |
+ int total_count = 0, rotated_count[3] = {0, 0, 0}; |
+ FX_POSITION pos = page.GetFirstObjectPosition(); |
+ while (pos) { |
+ CPDF_PageObject* pObj = page.GetNextObject(pos); |
+ if (pObj->m_Type != PDFPAGE_TEXT) { |
+ continue; |
+ } |
+ total_count++; |
+ CPDF_TextObject* pText = (CPDF_TextObject*)pObj; |
+ FX_FLOAT angle = pText->m_TextState.GetBaselineAngle(); |
+ if (angle == 0.0) { |
+ continue; |
+ } |
+ int degree = (int)(angle * 180 / PI + 0.5); |
+ if (degree % 90) { |
+ continue; |
+ } |
+ if (degree < 0) { |
+ degree += 360; |
+ } |
+ int index = degree / 90 % 3 - 1; |
+ if (index < 0) { |
+ continue; |
+ } |
+ rotated_count[index]++; |
+ } |
+ if (total_count == 0) { |
+ return; |
+ } |
+ CFX_AffineMatrix matrix; |
+ if (rotated_count[0] > total_count * 2 / 3) { |
+ matrix.Set(0, -1, 1, 0, 0, page.GetPageHeight()); |
+ } else if (rotated_count[1] > total_count * 2 / 3) { |
+ matrix.Set(-1, 0, 0, -1, page.GetPageWidth(), page.GetPageHeight()); |
+ } else if (rotated_count[2] > total_count * 2 / 3) { |
+ matrix.Set(0, 1, -1, 0, page.GetPageWidth(), 0); |
+ } else { |
+ return; |
+ } |
+ page.Transform(matrix); |
+ page_bbox.Transform(&matrix); |
} |
-void PDF_GetPageText_Unicode(CFX_WideStringArray& lines, CPDF_Document* pDoc, CPDF_Dictionary* pPage, |
- int iMinWidth, FX_DWORD flags) |
-{ |
- lines.RemoveAll(); |
- if (pPage == NULL) { |
- return; |
- } |
- CPDF_Page page; |
- page.Load(pDoc, pPage); |
- CPDF_ParseOptions options; |
- options.m_bTextOnly = TRUE; |
- options.m_bSeparateForm = FALSE; |
- page.ParseContent(&options); |
- CFX_FloatRect page_bbox = page.GetPageBBox(); |
- if (flags & PDF2TXT_AUTO_ROTATE) { |
- CheckRotate(page, page_bbox); |
- } |
- CTextPage texts; |
- texts.m_bAutoWidth = flags & PDF2TXT_AUTO_WIDTH; |
- texts.m_bKeepColumn = flags & PDF2TXT_KEEP_COLUMN; |
- texts.m_bBreakSpace = TRUE; |
- FX_POSITION pos = page.GetFirstObjectPosition(); |
- while (pos) { |
- CPDF_PageObject* pObject = page.GetNextObject(pos); |
- if (!(flags & PDF2TXT_INCLUDE_INVISIBLE)) { |
- CFX_FloatRect rect(pObject->m_Left, pObject->m_Bottom, pObject->m_Right, pObject->m_Top); |
- if (!page_bbox.Contains(rect)) { |
- continue; |
- } |
- } |
- texts.ProcessObject(pObject); |
- } |
- texts.WriteOutput(lines, iMinWidth); |
+void PDF_GetPageText_Unicode(CFX_WideStringArray& lines, |
+ CPDF_Document* pDoc, |
+ CPDF_Dictionary* pPage, |
+ int iMinWidth, |
+ FX_DWORD flags) { |
+ lines.RemoveAll(); |
+ if (pPage == NULL) { |
+ return; |
+ } |
+ CPDF_Page page; |
+ page.Load(pDoc, pPage); |
+ CPDF_ParseOptions options; |
+ options.m_bTextOnly = TRUE; |
+ options.m_bSeparateForm = FALSE; |
+ page.ParseContent(&options); |
+ CFX_FloatRect page_bbox = page.GetPageBBox(); |
+ if (flags & PDF2TXT_AUTO_ROTATE) { |
+ CheckRotate(page, page_bbox); |
+ } |
+ CTextPage texts; |
+ texts.m_bAutoWidth = flags & PDF2TXT_AUTO_WIDTH; |
+ texts.m_bKeepColumn = flags & PDF2TXT_KEEP_COLUMN; |
+ texts.m_bBreakSpace = TRUE; |
+ FX_POSITION pos = page.GetFirstObjectPosition(); |
+ while (pos) { |
+ CPDF_PageObject* pObject = page.GetNextObject(pos); |
+ if (!(flags & PDF2TXT_INCLUDE_INVISIBLE)) { |
+ CFX_FloatRect rect(pObject->m_Left, pObject->m_Bottom, pObject->m_Right, |
+ pObject->m_Top); |
+ if (!page_bbox.Contains(rect)) { |
+ continue; |
+ } |
+ } |
+ texts.ProcessObject(pObject); |
+ } |
+ texts.WriteOutput(lines, iMinWidth); |
} |
-void PDF_GetPageText(CFX_ByteStringArray& lines, CPDF_Document* pDoc, CPDF_Dictionary* pPage, |
- int iMinWidth, FX_DWORD flags) |
-{ |
- lines.RemoveAll(); |
- CFX_WideStringArray wlines; |
- PDF_GetPageText_Unicode(wlines, pDoc, pPage, iMinWidth, flags); |
- for (int i = 0; i < wlines.GetSize(); i ++) { |
- CFX_WideString wstr = wlines[i]; |
- CFX_ByteString str; |
- for (int c = 0; c < wstr.GetLength(); c ++) { |
- str += CharFromUnicodeAlt(wstr[c], FXSYS_GetACP(), "?"); |
- } |
- lines.Add(str); |
- } |
+void PDF_GetPageText(CFX_ByteStringArray& lines, |
+ CPDF_Document* pDoc, |
+ CPDF_Dictionary* pPage, |
+ int iMinWidth, |
+ FX_DWORD flags) { |
+ lines.RemoveAll(); |
+ CFX_WideStringArray wlines; |
+ PDF_GetPageText_Unicode(wlines, pDoc, pPage, iMinWidth, flags); |
+ for (int i = 0; i < wlines.GetSize(); i++) { |
+ CFX_WideString wstr = wlines[i]; |
+ CFX_ByteString str; |
+ for (int c = 0; c < wstr.GetLength(); c++) { |
+ str += CharFromUnicodeAlt(wstr[c], FXSYS_GetACP(), "?"); |
+ } |
+ lines.Add(str); |
+ } |
} |
-extern void _PDF_GetTextStream_Unicode(CFX_WideTextBuf& buffer, CPDF_PageObjects* pPage, FX_BOOL bUseLF, |
+extern void _PDF_GetTextStream_Unicode(CFX_WideTextBuf& buffer, |
+ CPDF_PageObjects* pPage, |
+ FX_BOOL bUseLF, |
CFX_PtrArray* pObjArray); |
-void PDF_GetTextStream_Unicode(CFX_WideTextBuf& buffer, CPDF_Document* pDoc, CPDF_Dictionary* pPage, FX_DWORD flags) |
-{ |
- buffer.EstimateSize(0, 10240); |
- CPDF_Page page; |
- page.Load(pDoc, pPage); |
- CPDF_ParseOptions options; |
- options.m_bTextOnly = TRUE; |
- options.m_bSeparateForm = FALSE; |
- page.ParseContent(&options); |
- _PDF_GetTextStream_Unicode(buffer, &page, TRUE, NULL); |
+void PDF_GetTextStream_Unicode(CFX_WideTextBuf& buffer, |
+ CPDF_Document* pDoc, |
+ CPDF_Dictionary* pPage, |
+ FX_DWORD flags) { |
+ buffer.EstimateSize(0, 10240); |
+ CPDF_Page page; |
+ page.Load(pDoc, pPage); |
+ CPDF_ParseOptions options; |
+ options.m_bTextOnly = TRUE; |
+ options.m_bSeparateForm = FALSE; |
+ page.ParseContent(&options); |
+ _PDF_GetTextStream_Unicode(buffer, &page, TRUE, NULL); |
} |