Chromium Code Reviews| Index: core/src/fpdftext/fpdf_text_int.cpp |
| diff --git a/core/src/fpdftext/fpdf_text_int.cpp b/core/src/fpdftext/fpdf_text_int.cpp |
| index dea5722aa822f4625a85dcd0e1d3555d528f03e5..de78b82ee23287dcc806ff50b14d964a4b9c3b09 100644 |
| --- a/core/src/fpdftext/fpdf_text_int.cpp |
| +++ b/core/src/fpdftext/fpdf_text_int.cpp |
| @@ -10,7 +10,11 @@ |
| #include "../../include/fpdfapi/fpdf_page.h" |
| #include "../../include/fpdfapi/fpdf_module.h" |
| #include <ctype.h> |
| +#include <algorithm> |
| #include "text_int.h" |
| + |
| +namespace { |
| + |
| FX_BOOL _IsIgnoreSpaceCharacter(FX_WCHAR curChar) |
| { |
| if(curChar < 255 ) { |
| @@ -29,6 +33,49 @@ FX_BOOL _IsIgnoreSpaceCharacter(FX_WCHAR curChar) |
| } |
| return TRUE; |
| } |
| + |
| +FX_FLOAT _NormalizeThreshold(FX_FLOAT threshold) |
|
Tom Sepez
2014/12/18 19:33:36
Ok, lets just follow their naming convention even
Lei Zhang
2014/12/18 21:07:10
I'm a bit unclear what the style guide is exactly,
|
| +{ |
| + int nDivide = 6; |
|
Tom Sepez
2014/12/18 19:33:36
Why we need nDivide local? Why is it int? I reali
Lei Zhang
2014/12/18 21:07:09
Done. I was just shuffling code around.
|
| + if (threshold < 300) { |
| + nDivide = 2; |
| + } else if (threshold < 500) { |
| + nDivide = 4; |
| + } else if (threshold < 700) { |
| + nDivide = 5; |
| + } |
| + return threshold / nDivide; |
| +} |
| + |
| +FX_FLOAT _CalculateBaseSpace(const CPDF_TextObject* pTextObj, |
| + const CFX_AffineMatrix& matrix) |
| +{ |
| + FX_FLOAT baseSpace = 0.0; |
| + const int nItems = pTextObj->CountItems(); |
| + if (pTextObj->m_TextState.GetObject()->m_CharSpace && nItems >= 3) { |
| + FX_BOOL bAllChar = TRUE; |
| + FX_FLOAT spacing = matrix.TransformDistance( |
| + pTextObj->m_TextState.GetObject()->m_CharSpace); |
| + baseSpace = spacing; |
| + for (int i = 0; i < nItems; i++) { |
| + CPDF_TextObjectItem item; |
| + pTextObj->GetItemInfo(i, &item); |
| + if (item.m_CharCode == (FX_DWORD) - 1) { |
| + FX_FLOAT fontsize_h = pTextObj->m_TextState.GetFontSizeH(); |
| + FX_FLOAT kerning = -fontsize_h * item.m_OriginX / 1000; |
| + baseSpace = std::min(baseSpace, kerning + spacing); |
| + bAllChar = FALSE; |
| + } |
| + } |
| + if (baseSpace < 0.0 || (nItems == 3 && !bAllChar)) { |
| + baseSpace = 0.0; |
| + } |
| + } |
| + return baseSpace; |
| +} |
| + |
| +} // namespace |
| + |
| CPDFText_ParseOptions::CPDFText_ParseOptions() |
| : m_bGetCharCodeOnly(FALSE), m_bNormalizeObjs(TRUE), m_bOutputHyphen(FALSE) |
| { |
| @@ -1642,29 +1689,7 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) |
| m_pPreTextObj = pTextObj; |
| m_perMatrix.Copy(formMatrix); |
| int nItems = pTextObj->CountItems(); |
| - FX_FLOAT spacing = 0; |
| - FX_FLOAT baseSpace = 0.0; |
| - FX_BOOL bAllChar = TRUE; |
| - if (pTextObj->m_TextState.GetObject()->m_CharSpace && nItems >= 3) { |
| - spacing = matrix.TransformDistance(pTextObj->m_TextState.GetObject()->m_CharSpace); |
| - baseSpace = spacing; |
| - for (int i = 0; i < nItems; i++) { |
| - CPDF_TextObjectItem item; |
| - pTextObj->GetItemInfo(i, &item); |
| - if (item.m_CharCode == (FX_DWORD) - 1) { |
| - FX_FLOAT fontsize_h = pTextObj->m_TextState.GetFontSizeH(); |
| - FX_FLOAT kerning = -fontsize_h * item.m_OriginX / 1000; |
| - if(kerning + spacing < baseSpace) { |
| - baseSpace = kerning + spacing; |
| - } |
| - bAllChar = FALSE; |
| - } |
| - } |
| - spacing = 0; |
| - if(baseSpace < 0.0 || (nItems == 3 && !bAllChar)) { |
| - baseSpace = 0.0; |
| - } |
| - } |
| + FX_FLOAT baseSpace = _CalculateBaseSpace(pTextObj, matrix); |
| FX_BOOL bIsBidiAndMirrosInverse = FALSE; |
| IFX_BidiChar* BidiChar = IFX_BidiChar::Create(); |
| @@ -1712,6 +1737,7 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) |
| FX_INT32 iBufStartAppend = m_TempTextBuf.GetLength(); |
| FX_INT32 iCharListStartAppend = m_TempCharList.GetSize(); |
| + FX_FLOAT spacing = 0; |
|
Tom Sepez
2014/12/18 19:33:36
nit: 0.0
Lei Zhang
2014/12/18 21:07:09
Does that actually make a difference? There's too
|
| for (int i = 0; i < nItems; i++) { |
| CPDF_TextObjectItem item; |
| PAGECHAR_INFO charinfo; |
| @@ -1754,15 +1780,7 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) |
| threshold = fontsize_h; |
| int this_width = FXSYS_abs(GetCharWidth(item.m_CharCode, pFont)); |
| threshold = this_width > last_width ? (FX_FLOAT)this_width : (FX_FLOAT)last_width; |
| - int nDivide = 6; |
| - if (threshold < 300) { |
| - nDivide = 2; |
| - } else if (threshold < 500) { |
| - nDivide = 4; |
| - } else if (threshold < 700) { |
| - nDivide = 5; |
| - } |
| - threshold = threshold / nDivide; |
| + threshold = _NormalizeThreshold(threshold); |
| threshold = fontsize_h * threshold / 1000; |
| } |
| if (threshold && (spacing && spacing >= threshold) ) { |
| @@ -1827,9 +1845,11 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) |
| int nTotal = wstrItem.GetLength(); |
| int n = 0; |
| FX_BOOL bDel = FALSE; |
| - while (n < m_TempCharList.GetSize() && n < 7) { |
| + const int count = std::min(m_TempCharList.GetSize(), 7); |
| + while (n < count) { |
| n++; |
| - PAGECHAR_INFO* charinfo1 = (PAGECHAR_INFO*)m_TempCharList.GetAt(m_TempCharList.GetSize() - n); |
| + int index = m_TempCharList.GetSize() - n; |
|
Tom Sepez
2014/12/18 19:33:36
why not just count down instead? Is n used somewhe
Lei Zhang
2014/12/18 21:07:09
Done.
|
| + PAGECHAR_INFO* charinfo1 = (PAGECHAR_INFO*)m_TempCharList.GetAt(index); |
|
Tom Sepez
2014/12/18 19:33:36
Doesn't the first iteration do ... .GetAt(... .Get
Lei Zhang
2014/12/18 21:07:10
No, it's confusing because the first thing we do w
|
| if(charinfo1->m_CharCode == charinfo.m_CharCode && |
| charinfo1->m_pTextObj->GetFont() == charinfo.m_pTextObj->GetFont() && |
| FXSYS_fabs(charinfo1->m_OriginX - charinfo.m_OriginX) < TEXT_CHARRATIO_GAPDELTA * pTextObj->GetFontSize() && |
| @@ -1862,24 +1882,16 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) |
| FX_INT32 i, j; |
| i = iCharListStartAppend; |
| j = m_TempCharList.GetSize() - 1; |
| - PAGECHAR_INFO tempCharInfo; |
| - FX_INT32 tempIndex = 0; |
| for (; i < j; i++, j--) { |
| - tempCharInfo = m_TempCharList[i]; |
| - m_TempCharList[i] = m_TempCharList[j]; |
| - m_TempCharList[j] = tempCharInfo; |
| - tempIndex = m_TempCharList[i].m_Index; |
| - m_TempCharList[i].m_Index = m_TempCharList[j].m_Index; |
| - m_TempCharList[j].m_Index = tempIndex; |
| + std::swap(m_TempCharList[i], m_TempCharList[j]); |
| + std::swap(m_TempCharList[i].m_Index, m_TempCharList[j].m_Index); |
| } |
| FX_WCHAR * pTempBuffer = m_TempTextBuf.GetBuffer(); |
| i = iBufStartAppend; |
| j = m_TempTextBuf.GetLength() - 1; |
| FX_WCHAR wTemp; |
| for (; i < j; i++, j--) { |
| - wTemp = pTempBuffer[i]; |
| - pTempBuffer[i] = pTempBuffer[j]; |
| - pTempBuffer[j] = wTemp; |
| + std::swap(pTempBuffer[i], pTempBuffer[j]); |
| } |
| } |
| } |