Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2379)

Unified Diff: core/fpdftext/fpdf_text_int.cpp

Issue 2064223002: Clean up CPDF_TextPage. (Closed) Base URL: https://pdfium.googlesource.com/pdfium@master
Patch Set: Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | core/fpdftext/include/cpdf_textpage.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: core/fpdftext/fpdf_text_int.cpp
diff --git a/core/fpdftext/fpdf_text_int.cpp b/core/fpdftext/fpdf_text_int.cpp
index 8ae2c1ef356d3ab842d570a2a0dbe4093f1d12ea..2aa13bf8a2468eb7cbc57c2fbdadf54315d8336d 100644
--- a/core/fpdftext/fpdf_text_int.cpp
+++ b/core/fpdftext/fpdf_text_int.cpp
@@ -150,7 +150,7 @@ CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, FPDFText_Direction flags)
m_parserflag(flags),
m_pPreTextObj(nullptr),
m_bIsParsed(false),
- m_TextlineDir(-1),
+ m_TextlineDir(TextOrientation::Unknown),
m_CurlineRect(0, 0, 0, 0) {
m_TextBuf.EstimateSize(0, 10240);
pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int)pPage->GetPageWidth(),
@@ -183,34 +183,25 @@ void CPDF_TextPage::ParseTextPage() {
m_bIsParsed = true;
m_CharIndex.clear();
int nCount = pdfium::CollectionSize<int>(m_CharList);
- if (nCount) {
+ if (nCount)
m_CharIndex.push_back(0);
- }
+
for (int i = 0; i < nCount; i++) {
int indexSize = pdfium::CollectionSize<int>(m_CharIndex);
- FX_BOOL bNormal = FALSE;
const PAGECHAR_INFO& charinfo = m_CharList[i];
- if (charinfo.m_Flag == FPDFTEXT_CHAR_GENERATED) {
- bNormal = TRUE;
- } else if (charinfo.m_Unicode == 0 || IsControlChar(charinfo)) {
- bNormal = FALSE;
- } else {
- bNormal = TRUE;
- }
- if (bNormal) {
+ if (charinfo.m_Flag == FPDFTEXT_CHAR_GENERATED ||
+ (charinfo.m_Unicode != 0 && !IsControlChar(charinfo))) {
if (indexSize % 2) {
m_CharIndex.push_back(1);
} else {
- if (indexSize <= 0) {
+ if (indexSize <= 0)
continue;
- }
m_CharIndex[indexSize - 1] += 1;
}
} else {
if (indexSize % 2) {
- if (indexSize <= 0) {
+ if (indexSize <= 0)
continue;
- }
m_CharIndex[indexSize - 1] = i + 1;
} else {
m_CharIndex.push_back(i + 1);
@@ -218,9 +209,8 @@ void CPDF_TextPage::ParseTextPage() {
}
}
int indexSize = pdfium::CollectionSize<int>(m_CharIndex);
- if (indexSize % 2) {
+ if (indexSize % 2)
m_CharIndex.erase(m_CharIndex.begin() + indexSize - 1);
- }
}
int CPDF_TextPage::CountChars() const {
@@ -685,7 +675,8 @@ int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left,
return m_Segments.GetSize();
}
-int32_t CPDF_TextPage::FindTextlineFlowDirection() {
+CPDF_TextPage::TextOrientation CPDF_TextPage::FindTextlineFlowOrientation()
dsinclair 2016/06/14 19:40:13 Is the CPDF_TextPage:: needed here?
Lei Zhang 2016/06/14 20:01:15 Yes
+ const {
const int32_t nPageWidth = static_cast<int32_t>(m_pPage->GetPageWidth());
const int32_t nPageHeight = static_cast<int32_t>(m_pPage->GetPageHeight());
std::vector<uint8_t> nHorizontalMask(nPageWidth);
@@ -695,7 +686,7 @@ int32_t CPDF_TextPage::FindTextlineFlowDirection() {
int32_t index = 0;
FX_FLOAT fLineHeight = 0.0f;
if (m_pPage->GetPageObjectList()->empty())
- return -1;
+ return TextOrientation::Unknown;
for (auto& pPageObj : *m_pPage->GetPageObjectList()) {
if (!pPageObj || !pPageObj->IsText())
@@ -749,33 +740,41 @@ int32_t CPDF_TextPage::FindTextlineFlowDirection() {
break;
}
nEndV = index;
- for (index = nStartV; index < nEndV; index++) {
+ for (index = nStartV; index < nEndV; index++)
nSumV += nVerticalMask[index];
- }
nSumV /= nEndV - nStartV;
- if ((nEndV - nStartV) < (int32_t)(2 * fLineHeight)) {
- return 0;
- }
- if ((nEndH - nStartH) < (int32_t)(2 * fLineHeight)) {
- return 1;
- }
- if (nSumH > 0.8f) {
- return 0;
- }
- if (nSumH - nSumV > 0.0f) {
- return 0;
- }
- if (nSumV - nSumH > 0.0f) {
- return 1;
+
+ if ((nEndV - nStartV) < (int32_t)(2 * fLineHeight))
+ return TextOrientation::Horizontal;
+ if ((nEndH - nStartH) < (int32_t)(2 * fLineHeight))
+ return TextOrientation::Vertical;
+
+ if (nSumH > 0.8f)
+ return TextOrientation::Horizontal;
+
+ if (nSumH > nSumV)
+ return TextOrientation::Horizontal;
+ if (nSumH < nSumV)
+ return TextOrientation::Vertical;
+ return TextOrientation::Unknown;
+}
+
+void CPDF_TextPage::AppendGeneratedCharacter(FX_WCHAR unicode,
+ const CFX_Matrix& formMatrix) {
+ PAGECHAR_INFO generateChar;
+ if (GenerateCharInfo(unicode, generateChar)) {
dsinclair 2016/06/14 19:40:12 if (!GenerateCharInfo(unicode, generateChar)) re
Lei Zhang 2016/06/14 20:01:16 Done.
+ if (!formMatrix.IsIdentity())
+ generateChar.m_Matrix.Copy(formMatrix);
+ m_TempTextBuf.AppendChar(unicode);
+ m_TempCharList.push_back(generateChar);
}
- return -1;
}
void CPDF_TextPage::ProcessObject() {
if (m_pPage->GetPageObjectList()->empty())
return;
- m_TextlineDir = FindTextlineFlowDirection();
+ m_TextlineDir = FindTextlineFlowOrientation();
const CPDF_PageObjectList* pObjList = m_pPage->GetPageObjectList();
for (auto it = pObjList->begin(); it != pObjList->end(); ++it) {
if (CPDF_PageObject* pObj = it->get()) {
@@ -833,37 +832,14 @@ int CPDF_TextPage::GetCharWidth(uint32_t charCode, CPDF_Font* pFont) const {
void CPDF_TextPage::AddCharInfoByLRDirection(FX_WCHAR wChar,
PAGECHAR_INFO info) {
- if (!IsControlChar(info)) {
- info.m_Index = m_TextBuf.GetLength();
- if (wChar >= 0xFB00 && wChar <= 0xFB06) {
- FX_WCHAR* pDst = nullptr;
- FX_STRSIZE nCount = Unicode_GetNormalization(wChar, pDst);
- if (nCount >= 1) {
- pDst = FX_Alloc(FX_WCHAR, nCount);
- Unicode_GetNormalization(wChar, pDst);
- for (int nIndex = 0; nIndex < nCount; nIndex++) {
- PAGECHAR_INFO info2 = info;
- info2.m_Unicode = pDst[nIndex];
- info2.m_Flag = FPDFTEXT_CHAR_PIECE;
- m_TextBuf.AppendChar(info2.m_Unicode);
- m_CharList.push_back(info2);
- }
- FX_Free(pDst);
- return;
- }
- }
- m_TextBuf.AppendChar(wChar);
- } else {
+ if (IsControlChar(info)) {
info.m_Index = -1;
+ m_CharList.push_back(info);
+ return;
}
- m_CharList.push_back(info);
-}
-void CPDF_TextPage::AddCharInfoByRLDirection(FX_WCHAR wChar,
- PAGECHAR_INFO info) {
- if (!IsControlChar(info)) {
- info.m_Index = m_TextBuf.GetLength();
- wChar = FX_GetMirrorChar(wChar, TRUE, FALSE);
+ info.m_Index = m_TextBuf.GetLength();
+ if (wChar >= 0xFB00 && wChar <= 0xFB06) {
FX_WCHAR* pDst = nullptr;
FX_STRSIZE nCount = Unicode_GetNormalization(wChar, pDst);
if (nCount >= 1) {
@@ -879,11 +855,38 @@ void CPDF_TextPage::AddCharInfoByRLDirection(FX_WCHAR wChar,
FX_Free(pDst);
return;
}
- info.m_Unicode = wChar;
- m_TextBuf.AppendChar(info.m_Unicode);
- } else {
+ }
+ m_TextBuf.AppendChar(wChar);
+ m_CharList.push_back(info);
+}
+
+void CPDF_TextPage::AddCharInfoByRLDirection(FX_WCHAR wChar,
+ PAGECHAR_INFO info) {
+ if (IsControlChar(info)) {
info.m_Index = -1;
+ m_CharList.push_back(info);
+ return;
+ }
+
+ info.m_Index = m_TextBuf.GetLength();
+ wChar = FX_GetMirrorChar(wChar, TRUE, FALSE);
+ FX_WCHAR* pDst = nullptr;
+ FX_STRSIZE nCount = Unicode_GetNormalization(wChar, pDst);
+ if (nCount >= 1) {
+ pDst = FX_Alloc(FX_WCHAR, nCount);
+ Unicode_GetNormalization(wChar, pDst);
+ for (int nIndex = 0; nIndex < nCount; nIndex++) {
+ PAGECHAR_INFO info2 = info;
+ info2.m_Unicode = pDst[nIndex];
+ info2.m_Flag = FPDFTEXT_CHAR_PIECE;
+ m_TextBuf.AppendChar(info2.m_Unicode);
+ m_CharList.push_back(info2);
+ }
+ FX_Free(pDst);
+ return;
}
+ info.m_Unicode = wChar;
+ m_TextBuf.AppendChar(info.m_Unicode);
m_CharList.push_back(info);
}
@@ -1145,6 +1148,7 @@ void CPDF_TextPage::FindPreviousTextObject() {
if (preChar.m_pTextObj)
m_pPreTextObj = preChar.m_pTextObj;
}
+
void CPDF_TextPage::SwapTempTextBuf(int32_t iCharListStartAppend,
int32_t iBufStartAppend) {
int32_t i = iCharListStartAppend;
@@ -1198,10 +1202,10 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) {
m_perMatrix.Copy(formMatrix);
return;
}
- int result = 0;
+ GenerateCharacter result = GenerateCharacter::None;
if (m_pPreTextObj) {
result = ProcessInsertObject(pTextObj, formMatrix);
- if (2 == result) {
+ if (result == GenerateCharacter::LineBreak) {
m_CurlineRect =
CFX_FloatRect(Obj.m_pTextObj->m_Left, Obj.m_pTextObj->m_Bottom,
Obj.m_pTextObj->m_Right, Obj.m_pTextObj->m_Top);
@@ -1210,59 +1214,43 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) {
CFX_FloatRect(Obj.m_pTextObj->m_Left, Obj.m_pTextObj->m_Bottom,
Obj.m_pTextObj->m_Right, Obj.m_pTextObj->m_Top));
}
- PAGECHAR_INFO generateChar;
- if (result == 1) {
- if (GenerateCharInfo(TEXT_BLANK_CHAR, generateChar)) {
- if (!formMatrix.IsIdentity()) {
- generateChar.m_Matrix.Copy(formMatrix);
+ switch (result) {
+ case GenerateCharacter::None:
+ break;
+ case GenerateCharacter::Blank:
+ AppendGeneratedCharacter(TEXT_BLANK_CHAR, formMatrix);
+ break;
+ case GenerateCharacter::LineBreak:
+ CloseTempLine();
+ if (m_TextBuf.GetSize()) {
+ AppendGeneratedCharacter(TEXT_RETURN_CHAR, formMatrix);
+ AppendGeneratedCharacter(TEXT_LINEFEED_CHAR, formMatrix);
}
- m_TempTextBuf.AppendChar(TEXT_BLANK_CHAR);
- m_TempCharList.push_back(generateChar);
- }
- } else if (result == 2) {
- CloseTempLine();
- if (m_TextBuf.GetSize()) {
- if (GenerateCharInfo(TEXT_RETURN_CHAR, generateChar)) {
- m_TextBuf.AppendChar(TEXT_RETURN_CHAR);
dsinclair 2016/06/14 19:40:13 This is different now. Previously it would append
Lei Zhang 2016/06/14 20:01:16 Whoops, bad refactoring.
- if (!formMatrix.IsIdentity()) {
- generateChar.m_Matrix.Copy(formMatrix);
+ case GenerateCharacter::Hyphen:
+ if (pTextObj->CountChars() == 1) {
+ CPDF_TextObjectItem item;
+ pTextObj->GetCharInfo(0, &item);
+ CFX_WideString wstrItem =
+ pTextObj->GetFont()->UnicodeFromCharCode(item.m_CharCode);
+ if (wstrItem.IsEmpty()) {
+ wstrItem += (FX_WCHAR)item.m_CharCode;
}
- m_CharList.push_back(generateChar);
- }
- if (GenerateCharInfo(TEXT_LINEFEED_CHAR, generateChar)) {
- m_TextBuf.AppendChar(TEXT_LINEFEED_CHAR);
- if (!formMatrix.IsIdentity()) {
- generateChar.m_Matrix.Copy(formMatrix);
+ FX_WCHAR curChar = wstrItem.GetAt(0);
+ if (0x2D == curChar || 0xAD == curChar) {
dsinclair 2016/06/14 19:40:13 nit: {}'s not needed
Lei Zhang 2016/06/14 20:01:15 Done.
+ return;
}
- m_CharList.push_back(generateChar);
- }
- }
- } else if (result == 3) {
- int32_t nChars = pTextObj->CountChars();
- if (nChars == 1) {
- CPDF_TextObjectItem item;
- pTextObj->GetCharInfo(0, &item);
- CFX_WideString wstrItem =
- pTextObj->GetFont()->UnicodeFromCharCode(item.m_CharCode);
- if (wstrItem.IsEmpty()) {
- wstrItem += (FX_WCHAR)item.m_CharCode;
}
- FX_WCHAR curChar = wstrItem.GetAt(0);
- if (0x2D == curChar || 0xAD == curChar) {
- return;
+ while (m_TempTextBuf.GetSize() > 0 &&
+ m_TempTextBuf.AsStringC().GetAt(m_TempTextBuf.GetLength() - 1) ==
+ 0x20) {
+ m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1);
+ m_TempCharList.pop_back();
}
- }
- while (m_TempTextBuf.GetSize() > 0 &&
- m_TempTextBuf.AsStringC().GetAt(m_TempTextBuf.GetLength() - 1) ==
- 0x20) {
+ PAGECHAR_INFO* charinfo = &m_TempCharList.back();
m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1);
- m_TempCharList.pop_back();
- }
- PAGECHAR_INFO* charinfo = &m_TempCharList.back();
- m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1);
- charinfo->m_Unicode = 0x2;
- charinfo->m_Flag = FPDFTEXT_CHAR_HYPHEN;
- m_TempTextBuf.AppendChar(0xfffe);
+ charinfo->m_Unicode = 0x2;
+ charinfo->m_Flag = FPDFTEXT_CHAR_HYPHEN;
+ m_TempTextBuf.AppendChar(0xfffe);
}
} else {
m_CurlineRect =
@@ -1445,12 +1433,12 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) {
}
}
-int32_t CPDF_TextPage::GetTextObjectWritingMode(
- const CPDF_TextObject* pTextObj) {
+CPDF_TextPage::TextOrientation CPDF_TextPage::GetTextObjectWritingMode(
dsinclair 2016/06/14 19:40:13 nit: CPDF_TextPage:: needed?
Lei Zhang 2016/06/14 20:01:15 Yes
+ const CPDF_TextObject* pTextObj) const {
int32_t nChars = pTextObj->CountChars();
- if (nChars == 1) {
+ if (nChars == 1)
return m_TextlineDir;
- }
+
CPDF_TextObjectItem first, last;
pTextObj->GetCharInfo(0, &first);
pTextObj->GetCharInfo(nChars - 1, &last);
@@ -1460,19 +1448,20 @@ int32_t CPDF_TextPage::GetTextObjectWritingMode(
textMatrix.TransformPoint(last.m_OriginX, last.m_OriginY);
FX_FLOAT dX = FXSYS_fabs(last.m_OriginX - first.m_OriginX);
FX_FLOAT dY = FXSYS_fabs(last.m_OriginY - first.m_OriginY);
- if (dX <= 0.0001f && dY <= 0.0001f) {
- return -1;
- }
+ if (dX <= 0.0001f && dY <= 0.0001f)
+ return TextOrientation::Unknown;
+
CFX_VectorF v(dX, dY);
v.Normalize();
- if (v.y <= 0.0872f) {
- return v.x <= 0.0872f ? m_TextlineDir : 0;
- }
- if (v.x <= 0.0872f) {
- return 1;
- }
+ if (v.y <= 0.0872f)
+ return v.x <= 0.0872f ? m_TextlineDir : TextOrientation::Horizontal;
+
+ if (v.x <= 0.0872f)
+ return TextOrientation::Vertical;
+
return m_TextlineDir;
}
+
FX_BOOL CPDF_TextPage::IsHyphen(FX_WCHAR curChar) {
CFX_WideString strCurText = m_TempTextBuf.MakeString();
if (strCurText.GetLength() == 0) {
@@ -1510,14 +1499,15 @@ FX_BOOL CPDF_TextPage::IsHyphen(FX_WCHAR curChar) {
return FALSE;
}
-int CPDF_TextPage::ProcessInsertObject(const CPDF_TextObject* pObj,
- const CFX_Matrix& formMatrix) {
+CPDF_TextPage::GenerateCharacter CPDF_TextPage::ProcessInsertObject(
dsinclair 2016/06/14 19:40:13 ditto
Lei Zhang 2016/06/14 20:01:15 ditto ditto
+ const CPDF_TextObject* pObj,
+ const CFX_Matrix& formMatrix) {
FindPreviousTextObject();
FX_BOOL bNewline = FALSE;
- int WritingMode = GetTextObjectWritingMode(pObj);
- if (WritingMode == -1) {
+ TextOrientation WritingMode = GetTextObjectWritingMode(pObj);
+ if (WritingMode == TextOrientation::Unknown)
WritingMode = GetTextObjectWritingMode(m_pPreTextObj);
- }
+
CFX_FloatRect this_rect(pObj->m_Left, pObj->m_Bottom, pObj->m_Right,
pObj->m_Top);
CFX_FloatRect prev_rect(m_pPreTextObj->m_Left, m_pPreTextObj->m_Bottom,
@@ -1532,20 +1522,18 @@ int CPDF_TextPage::ProcessInsertObject(const CPDF_TextObject* pObj,
wstrItem += (FX_WCHAR)item.m_CharCode;
}
FX_WCHAR curChar = wstrItem.GetAt(0);
- if (WritingMode == 0) {
+ if (WritingMode == TextOrientation::Horizontal) {
if (this_rect.Height() > 4.5 && prev_rect.Height() > 4.5) {
FX_FLOAT top =
this_rect.top < prev_rect.top ? this_rect.top : prev_rect.top;
FX_FLOAT bottom = this_rect.bottom > prev_rect.bottom ? this_rect.bottom
: prev_rect.bottom;
if (bottom >= top) {
- if (IsHyphen(curChar)) {
- return 3;
- }
- return 2;
+ return IsHyphen(curChar) ? GenerateCharacter::Hyphen
+ : GenerateCharacter::LineBreak;
}
}
- } else if (WritingMode == 1) {
+ } else if (WritingMode == TextOrientation::Vertical) {
if (this_rect.Width() > pObj->GetFontSize() * 0.1f &&
prev_rect.Width() > m_pPreTextObj->GetFontSize() * 0.1f) {
FX_FLOAT left = this_rect.left > m_CurlineRect.left ? this_rect.left
@@ -1554,10 +1542,8 @@ int CPDF_TextPage::ProcessInsertObject(const CPDF_TextObject* pObj,
? this_rect.right
: m_CurlineRect.right;
if (right <= left) {
- if (IsHyphen(curChar)) {
- return 3;
- }
- return 2;
+ return IsHyphen(curChar) ? GenerateCharacter::Hyphen
+ : GenerateCharacter::LineBreak;
}
}
}
@@ -1587,7 +1573,7 @@ int CPDF_TextPage::ProcessInsertObject(const CPDF_TextObject* pObj,
m_pPreTextObj->m_Right, m_pPreTextObj->m_Top);
CFX_FloatRect rect3 = rect1;
rect1.Intersect(rect2);
- if (WritingMode == 0) {
+ if (WritingMode == TextOrientation::Horizontal) {
if ((rect1.IsEmpty() && rect2.Height() > 5 && rect3.Height() > 5) ||
((y > threshold * 2 || y < threshold * -3) &&
(FXSYS_fabs(y) < 1 ? FXSYS_fabs(x) < FXSYS_fabs(y) : TRUE))) {
@@ -1616,13 +1602,15 @@ int CPDF_TextPage::ProcessInsertObject(const CPDF_TextObject* pObj,
}
}
}
- if (bNewline)
- return IsHyphen(curChar) ? 3 : 2;
+ if (bNewline) {
+ return IsHyphen(curChar) ? GenerateCharacter::Hyphen
+ : GenerateCharacter::LineBreak;
+ }
int32_t nChars = pObj->CountChars();
if (nChars == 1 && (0x2D == curChar || 0xAD == curChar) &&
IsHyphen(curChar)) {
- return 3;
+ return GenerateCharacter::Hyphen;
}
CFX_WideString PrevStr =
m_pPreTextObj->GetFont()->UnicodeFromCharCode(PrevItem.m_CharCode);
@@ -1653,18 +1641,18 @@ int CPDF_TextPage::ProcessInsertObject(const CPDF_TextObject* pObj,
if (curChar != L' ' && preChar != L' ') {
if ((x - last_pos - last_width) > threshold ||
(last_pos - x - last_width) > threshold) {
- return 1;
+ return GenerateCharacter::Blank;
dsinclair 2016/06/14 19:40:13 Does blank mean '' or ' '? If the latter, would Sp
Lei Zhang 2016/06/14 20:01:15 I was going along with TEXT_BLANK_FOO. Renamed the
}
if (x < 0 && (last_pos - x - last_width) > threshold) {
- return 1;
+ return GenerateCharacter::Blank;
}
if ((x - last_pos - last_width) > this_width ||
(x - last_pos - this_width) > last_width) {
- return 1;
+ return GenerateCharacter::Blank;
}
}
}
- return 0;
+ return GenerateCharacter::None;
}
FX_BOOL CPDF_TextPage::IsSameTextObject(CPDF_TextObject* pTextObj1,
« no previous file with comments | « no previous file | core/fpdftext/include/cpdf_textpage.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698