| Index: core/src/fpdftext/fpdf_text_int.cpp
|
| diff --git a/core/src/fpdftext/fpdf_text_int.cpp b/core/src/fpdftext/fpdf_text_int.cpp
|
| index 3b633a623f6361fdf52c630513489e23e38ff43d..67411e31b8481fd4230cbfffcca9dc28a5dfc9e4 100644
|
| --- a/core/src/fpdftext/fpdf_text_int.cpp
|
| +++ b/core/src/fpdftext/fpdf_text_int.cpp
|
| @@ -81,11 +81,6 @@ const FX_FLOAT kDefaultFontSize = 1.0f;
|
|
|
| } // namespace
|
|
|
| -CPDFText_ParseOptions::CPDFText_ParseOptions()
|
| - : m_bGetCharCodeOnly(FALSE),
|
| - m_bNormalizeObjs(TRUE),
|
| - m_bOutputHyphen(FALSE) {}
|
| -
|
| IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_Page* pPage,
|
| int flags) {
|
| return new CPDF_TextPage(pPage, flags);
|
| @@ -123,9 +118,6 @@ CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, int flags)
|
| (int)pPage->GetPageHeight(), 0);
|
| }
|
|
|
| -void CPDF_TextPage::NormalizeObjects(FX_BOOL bNormalize) {
|
| - m_ParseOptions.m_bNormalizeObjs = bNormalize;
|
| -}
|
| bool CPDF_TextPage::IsControlChar(const PAGECHAR_INFO& charInfo) {
|
| switch (charInfo.m_Unicode) {
|
| case 0x2:
|
| @@ -151,55 +143,49 @@ FX_BOOL CPDF_TextPage::ParseTextPage() {
|
| m_pPreTextObj = NULL;
|
| ProcessObject();
|
| m_bIsParsed = true;
|
| - if (!m_ParseOptions.m_bGetCharCodeOnly) {
|
| - m_CharIndex.RemoveAll();
|
| - int nCount = m_charList.GetSize();
|
| - if (nCount) {
|
| - m_CharIndex.Add(0);
|
| - }
|
| - for (int i = 0; i < nCount; i++) {
|
| - int indexSize = m_CharIndex.GetSize();
|
| - FX_BOOL bNormal = FALSE;
|
| - PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(i);
|
| - if (charinfo.m_Flag == FPDFTEXT_CHAR_GENERATED) {
|
| - bNormal = TRUE;
|
| - } else if (charinfo.m_Unicode == 0 || IsControlChar(charinfo)) {
|
| - bNormal = FALSE;
|
| + m_CharIndex.RemoveAll();
|
| + int nCount = m_charList.GetSize();
|
| + if (nCount) {
|
| + m_CharIndex.Add(0);
|
| + }
|
| + for (int i = 0; i < nCount; i++) {
|
| + int indexSize = m_CharIndex.GetSize();
|
| + FX_BOOL bNormal = FALSE;
|
| + PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(i);
|
| + if (charinfo.m_Flag == FPDFTEXT_CHAR_GENERATED) {
|
| + bNormal = TRUE;
|
| + } else if (charinfo.m_Unicode == 0 || IsControlChar(charinfo)) {
|
| + bNormal = FALSE;
|
| + } else {
|
| + bNormal = TRUE;
|
| + }
|
| + if (bNormal) {
|
| + if (indexSize % 2) {
|
| + m_CharIndex.Add(1);
|
| } else {
|
| - bNormal = TRUE;
|
| + if (indexSize <= 0) {
|
| + continue;
|
| + }
|
| + m_CharIndex.SetAt(indexSize - 1, m_CharIndex.GetAt(indexSize - 1) + 1);
|
| }
|
| - if (bNormal) {
|
| - if (indexSize % 2) {
|
| - m_CharIndex.Add(1);
|
| - } else {
|
| - if (indexSize <= 0) {
|
| - continue;
|
| - }
|
| - m_CharIndex.SetAt(indexSize - 1,
|
| - m_CharIndex.GetAt(indexSize - 1) + 1);
|
| + } else {
|
| + if (indexSize % 2) {
|
| + if (indexSize <= 0) {
|
| + continue;
|
| }
|
| + m_CharIndex.SetAt(indexSize - 1, i + 1);
|
| } else {
|
| - if (indexSize % 2) {
|
| - if (indexSize <= 0) {
|
| - continue;
|
| - }
|
| - m_CharIndex.SetAt(indexSize - 1, i + 1);
|
| - } else {
|
| - m_CharIndex.Add(i + 1);
|
| - }
|
| + m_CharIndex.Add(i + 1);
|
| }
|
| }
|
| - int indexSize = m_CharIndex.GetSize();
|
| - if (indexSize % 2) {
|
| - m_CharIndex.RemoveAt(indexSize - 1);
|
| - }
|
| + }
|
| + int indexSize = m_CharIndex.GetSize();
|
| + if (indexSize % 2) {
|
| + m_CharIndex.RemoveAt(indexSize - 1);
|
| }
|
| return TRUE;
|
| }
|
| int CPDF_TextPage::CountChars() const {
|
| - if (m_ParseOptions.m_bGetCharCodeOnly) {
|
| - return m_TextBuf.GetSize();
|
| - }
|
| return m_charList.GetSize();
|
| }
|
| int CPDF_TextPage::CharIndexFromTextIndex(int TextIndex) const {
|
| @@ -232,9 +218,6 @@ int CPDF_TextPage::TextIndexFromCharIndex(int CharIndex) const {
|
| void CPDF_TextPage::GetRectArray(int start,
|
| int nCount,
|
| CFX_RectArray& rectArray) const {
|
| - if (m_ParseOptions.m_bGetCharCodeOnly) {
|
| - return;
|
| - }
|
| if (start < 0 || nCount == 0) {
|
| return;
|
| }
|
| @@ -321,7 +304,7 @@ void CPDF_TextPage::GetRectArray(int start,
|
| int CPDF_TextPage::GetIndexAtPos(CPDF_Point point,
|
| FX_FLOAT xTolerance,
|
| FX_FLOAT yTolerance) const {
|
| - if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed)
|
| + if (!m_bIsParsed)
|
| return -3;
|
|
|
| int pos = 0;
|
| @@ -366,7 +349,7 @@ int CPDF_TextPage::GetIndexAtPos(CPDF_Point point,
|
| }
|
| CFX_WideString CPDF_TextPage::GetTextByRect(const CFX_FloatRect& rect) const {
|
| CFX_WideString strText;
|
| - if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed)
|
| + if (!m_bIsParsed)
|
| return strText;
|
|
|
| int nCount = m_charList.GetSize();
|
| @@ -404,7 +387,7 @@ CFX_WideString CPDF_TextPage::GetTextByRect(const CFX_FloatRect& rect) const {
|
| }
|
| void CPDF_TextPage::GetRectsArrayByRect(const CFX_FloatRect& rect,
|
| CFX_RectArray& resRectArray) const {
|
| - if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed)
|
| + if (!m_bIsParsed)
|
| return;
|
|
|
| CFX_FloatRect curRect;
|
| @@ -454,15 +437,12 @@ int CPDF_TextPage::GetIndexAtPos(FX_FLOAT x,
|
| FX_FLOAT y,
|
| FX_FLOAT xTolerance,
|
| FX_FLOAT yTolerance) const {
|
| - if (m_ParseOptions.m_bGetCharCodeOnly) {
|
| - return -3;
|
| - }
|
| CPDF_Point point(x, y);
|
| return GetIndexAtPos(point, xTolerance, yTolerance);
|
| }
|
|
|
| void CPDF_TextPage::GetCharInfo(int index, FPDF_CHAR_INFO* info) const {
|
| - if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed)
|
| + if (!m_bIsParsed)
|
| return;
|
|
|
| if (index < 0 || index >= m_charList.GetSize())
|
| @@ -573,7 +553,7 @@ CFX_WideString CPDF_TextPage::GetPageText(int start, int nCount) const {
|
| return m_TextBuf.GetWideString().Mid(startindex, nCount);
|
| }
|
| int CPDF_TextPage::CountRects(int start, int nCount) {
|
| - if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed || start < 0)
|
| + if (!m_bIsParsed || start < 0)
|
| return -1;
|
|
|
| if (nCount == -1 || nCount + start > m_charList.GetSize()) {
|
| @@ -588,7 +568,7 @@ void CPDF_TextPage::GetRect(int rectIndex,
|
| FX_FLOAT& top,
|
| FX_FLOAT& right,
|
| FX_FLOAT& bottom) const {
|
| - if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed)
|
| + if (!m_bIsParsed)
|
| return;
|
|
|
| if (rectIndex < 0 || rectIndex >= m_SelRects.GetSize())
|
| @@ -601,9 +581,6 @@ void CPDF_TextPage::GetRect(int rectIndex,
|
| }
|
|
|
| FX_BOOL CPDF_TextPage::GetBaselineRotate(int start, int end, int& Rotate) {
|
| - if (m_ParseOptions.m_bGetCharCodeOnly) {
|
| - return FALSE;
|
| - }
|
| if (end == start) {
|
| return FALSE;
|
| }
|
| @@ -641,9 +618,6 @@ FX_BOOL CPDF_TextPage::GetBaselineRotate(int start, int end, int& Rotate) {
|
|
|
| FX_BOOL CPDF_TextPage::GetBaselineRotate(const CFX_FloatRect& rect,
|
| int& Rotate) {
|
| - if (m_ParseOptions.m_bGetCharCodeOnly) {
|
| - return FALSE;
|
| - }
|
| int start, end, count,
|
| n = CountBoundedSegments(rect.left, rect.top, rect.right, rect.bottom,
|
| TRUE);
|
| @@ -661,10 +635,10 @@ FX_BOOL CPDF_TextPage::GetBaselineRotate(const CFX_FloatRect& rect,
|
| return GetBaselineRotate(start, end, Rotate);
|
| }
|
| FX_BOOL CPDF_TextPage::GetBaselineRotate(int rectIndex, int& Rotate) {
|
| - if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed)
|
| + if (!m_bIsParsed)
|
| return FALSE;
|
|
|
| - if (rectIndex < 0 || rectIndex > m_SelRects.GetSize())
|
| + if (rectIndex < 0 || rectIndex >= m_SelRects.GetSize())
|
| return FALSE;
|
|
|
| CFX_FloatRect rect = m_SelRects.GetAt(rectIndex);
|
| @@ -675,9 +649,6 @@ int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left,
|
| FX_FLOAT right,
|
| FX_FLOAT bottom,
|
| FX_BOOL bContains) {
|
| - if (m_ParseOptions.m_bGetCharCodeOnly)
|
| - return -1;
|
| -
|
| m_Segment.RemoveAll();
|
| if (!m_bIsParsed)
|
| return -1;
|
| @@ -751,9 +722,6 @@ int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left,
|
| return m_Segment.GetSize();
|
| }
|
| void CPDF_TextPage::GetBoundedSegment(int index, int& start, int& count) const {
|
| - if (m_ParseOptions.m_bGetCharCodeOnly) {
|
| - return;
|
| - }
|
| if (index < 0 || index >= m_Segment.GetSize()) {
|
| return;
|
| }
|
| @@ -761,7 +729,7 @@ void CPDF_TextPage::GetBoundedSegment(int index, int& start, int& count) const {
|
| count = m_Segment.GetAt(index).m_nCount;
|
| }
|
| int CPDF_TextPage::GetWordBreak(int index, int direction) const {
|
| - if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed)
|
| + if (!m_bIsParsed)
|
| return -1;
|
|
|
| if (direction != FPDFTEXT_LEFT && direction != FPDFTEXT_RIGHT)
|
| @@ -1007,9 +975,7 @@ void CPDF_TextPage::AddCharInfoByLRDirection(CFX_WideString& str, int i) {
|
| Info2.m_Unicode = pDst[nIndex];
|
| Info2.m_Flag = FPDFTEXT_CHAR_PIECE;
|
| m_TextBuf.AppendChar(Info2.m_Unicode);
|
| - if (!m_ParseOptions.m_bGetCharCodeOnly) {
|
| - m_charList.Add(Info2);
|
| - }
|
| + m_charList.Add(Info2);
|
| }
|
| FX_Free(pDst);
|
| return;
|
| @@ -1019,9 +985,7 @@ void CPDF_TextPage::AddCharInfoByLRDirection(CFX_WideString& str, int i) {
|
| } else {
|
| Info.m_Index = -1;
|
| }
|
| - if (!m_ParseOptions.m_bGetCharCodeOnly) {
|
| - m_charList.Add(Info);
|
| - }
|
| + m_charList.Add(Info);
|
| }
|
| void CPDF_TextPage::AddCharInfoByRLDirection(CFX_WideString& str, int i) {
|
| PAGECHAR_INFO Info = *(PAGECHAR_INFO*)m_TempCharList.GetAt(i);
|
| @@ -1038,9 +1002,7 @@ void CPDF_TextPage::AddCharInfoByRLDirection(CFX_WideString& str, int i) {
|
| Info2.m_Unicode = pDst[nIndex];
|
| Info2.m_Flag = FPDFTEXT_CHAR_PIECE;
|
| m_TextBuf.AppendChar(Info2.m_Unicode);
|
| - if (!m_ParseOptions.m_bGetCharCodeOnly) {
|
| - m_charList.Add(Info2);
|
| - }
|
| + m_charList.Add(Info2);
|
| }
|
| FX_Free(pDst);
|
| return;
|
| @@ -1050,9 +1012,7 @@ void CPDF_TextPage::AddCharInfoByRLDirection(CFX_WideString& str, int i) {
|
| } else {
|
| Info.m_Index = -1;
|
| }
|
| - if (!m_ParseOptions.m_bGetCharCodeOnly) {
|
| - m_charList.Add(Info);
|
| - }
|
| + m_charList.Add(Info);
|
| }
|
| void CPDF_TextPage::CloseTempLine() {
|
| int count1 = m_TempCharList.GetSize();
|
| @@ -1251,31 +1211,28 @@ void CPDF_TextPage::ProcessTextObject(CPDF_TextObject* pTextObj,
|
| return;
|
| }
|
| int i = 0;
|
| - if (m_ParseOptions.m_bNormalizeObjs) {
|
| - for (i = count - 1; i >= 0; i--) {
|
| - PDFTEXT_Obj prev_Obj = m_LineObj.GetAt(i);
|
| - CFX_Matrix prev_matrix;
|
| - prev_Obj.m_pTextObj->GetTextMatrix(&prev_matrix);
|
| - FX_FLOAT Prev_x = prev_Obj.m_pTextObj->GetPosX(),
|
| - Prev_y = prev_Obj.m_pTextObj->GetPosY();
|
| - prev_Obj.m_formMatrix.Transform(Prev_x, Prev_y);
|
| - m_DisplayMatrix.Transform(Prev_x, Prev_y);
|
| - if (this_x >= Prev_x) {
|
| - if (i == count - 1) {
|
| - m_LineObj.Add(Obj);
|
| - } else {
|
| - m_LineObj.InsertAt(i + 1, Obj);
|
| - }
|
| - break;
|
| + for (i = count - 1; i >= 0; i--) {
|
| + PDFTEXT_Obj prev_Obj = m_LineObj.GetAt(i);
|
| + CFX_Matrix prev_matrix;
|
| + prev_Obj.m_pTextObj->GetTextMatrix(&prev_matrix);
|
| + FX_FLOAT Prev_x = prev_Obj.m_pTextObj->GetPosX(),
|
| + Prev_y = prev_Obj.m_pTextObj->GetPosY();
|
| + prev_Obj.m_formMatrix.Transform(Prev_x, Prev_y);
|
| + m_DisplayMatrix.Transform(Prev_x, Prev_y);
|
| + if (this_x >= Prev_x) {
|
| + if (i == count - 1) {
|
| + m_LineObj.Add(Obj);
|
| + } else {
|
| + m_LineObj.InsertAt(i + 1, Obj);
|
| }
|
| + break;
|
| }
|
| - if (i < 0) {
|
| - m_LineObj.InsertAt(0, Obj);
|
| - }
|
| - } else {
|
| - m_LineObj.Add(Obj);
|
| + }
|
| + if (i < 0) {
|
| + m_LineObj.InsertAt(0, Obj);
|
| }
|
| }
|
| +
|
| int32_t CPDF_TextPage::PreMarkedContent(PDFTEXT_Obj Obj) {
|
| CPDF_TextObject* pTextObj = Obj.m_pTextObj;
|
| CPDF_ContentMarkData* pMarkData =
|
| @@ -1524,27 +1481,22 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) {
|
| } else if (result == 2) {
|
| CloseTempLine();
|
| if (m_TextBuf.GetSize()) {
|
| - if (m_ParseOptions.m_bGetCharCodeOnly) {
|
| + if (GenerateCharInfo(TEXT_RETURN_CHAR, generateChar)) {
|
| m_TextBuf.AppendChar(TEXT_RETURN_CHAR);
|
| - m_TextBuf.AppendChar(TEXT_LINEFEED_CHAR);
|
| - } else {
|
| - if (GenerateCharInfo(TEXT_RETURN_CHAR, generateChar)) {
|
| - m_TextBuf.AppendChar(TEXT_RETURN_CHAR);
|
| - if (!formMatrix.IsIdentity()) {
|
| - generateChar.m_Matrix.Copy(formMatrix);
|
| - }
|
| - m_charList.Add(generateChar);
|
| + if (!formMatrix.IsIdentity()) {
|
| + generateChar.m_Matrix.Copy(formMatrix);
|
| }
|
| - if (GenerateCharInfo(TEXT_LINEFEED_CHAR, generateChar)) {
|
| - m_TextBuf.AppendChar(TEXT_LINEFEED_CHAR);
|
| - if (!formMatrix.IsIdentity()) {
|
| - generateChar.m_Matrix.Copy(formMatrix);
|
| - }
|
| - m_charList.Add(generateChar);
|
| + m_charList.Add(generateChar);
|
| + }
|
| + if (GenerateCharInfo(TEXT_LINEFEED_CHAR, generateChar)) {
|
| + m_TextBuf.AppendChar(TEXT_LINEFEED_CHAR);
|
| + if (!formMatrix.IsIdentity()) {
|
| + generateChar.m_Matrix.Copy(formMatrix);
|
| }
|
| + m_charList.Add(generateChar);
|
| }
|
| }
|
| - } else if (result == 3 && !m_ParseOptions.m_bOutputHyphen) {
|
| + } else if (result == 3) {
|
| int32_t nChars = pTextObj->CountChars();
|
| if (nChars == 1) {
|
| CPDF_TextObjectItem item;
|
| @@ -1985,8 +1937,7 @@ FX_BOOL CPDF_TextPage::IsSameTextObject(CPDF_TextObject* pTextObj1,
|
| pTextObj2->m_Right, pTextObj2->m_Top);
|
| CFX_FloatRect rcCurObj(pTextObj1->m_Left, pTextObj1->m_Bottom,
|
| pTextObj1->m_Right, pTextObj1->m_Top);
|
| - if (rcPreObj.IsEmpty() && rcCurObj.IsEmpty() &&
|
| - !m_ParseOptions.m_bGetCharCodeOnly) {
|
| + if (rcPreObj.IsEmpty() && rcCurObj.IsEmpty()) {
|
| FX_FLOAT dbXdif = FXSYS_fabs(rcPreObj.left - rcCurObj.left);
|
| int nCount = m_charList.GetSize();
|
| if (nCount >= 2) {
|
|
|