| OLD | NEW |
| 1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
| 6 | 6 |
| 7 #include "../../include/fpdfapi/fpdf_pageobj.h" | 7 #include "../../include/fpdfapi/fpdf_pageobj.h" |
| 8 #include "../../include/fpdfapi/fpdf_page.h" | 8 #include "../../include/fpdfapi/fpdf_page.h" |
| 9 class CPDF_TextStream | 9 class CPDF_TextStream |
| 10 { | 10 { |
| 11 public: | 11 public: |
| 12 CPDF_TextStream(CFX_WideTextBuf& buffer, FX_BOOL bUseLF, CFX_PtrArray* pObjA
rray); | 12 CPDF_TextStream(CFX_WideTextBuf& buffer, bool bUseLF, CFX_PtrArray* pObjArra
y); |
| 13 ~CPDF_TextStream() {} | 13 ~CPDF_TextStream() {} |
| 14 FX_BOOL ProcessObject(const CPDF_TextObject* pObj, FX_BOOL bFirstLine); | 14 bool ProcessObject(const CPDF_TextObject* pObj, bool bFirstLine); |
| 15 CFX_WideTextBuf& m_Buffer; | 15 CFX_WideTextBuf& m_Buffer; |
| 16 FX_BOOL» » » » m_bUseLF; | 16 bool» » » » m_bUseLF; |
| 17 CFX_PtrArray* m_pObjArray; | 17 CFX_PtrArray* m_pObjArray; |
| 18 const CPDF_TextObject* m_pLastObj; | 18 const CPDF_TextObject* m_pLastObj; |
| 19 }; | 19 }; |
| 20 CPDF_TextStream::CPDF_TextStream(CFX_WideTextBuf& buffer, FX_BOOL bUseLF, CFX_Pt
rArray* pObjArray) : m_Buffer(buffer) | 20 CPDF_TextStream::CPDF_TextStream(CFX_WideTextBuf& buffer, bool bUseLF, CFX_PtrAr
ray* pObjArray) : m_Buffer(buffer) |
| 21 { | 21 { |
| 22 m_pLastObj = NULL; | 22 m_pLastObj = NULL; |
| 23 m_bUseLF = bUseLF; | 23 m_bUseLF = bUseLF; |
| 24 m_pObjArray = pObjArray; | 24 m_pObjArray = pObjArray; |
| 25 } | 25 } |
| 26 FX_BOOL FPDFText_IsSameTextObject(const CPDF_TextObject* pTextObj1, const CPDF_T
extObject* pTextObj2) | 26 bool FPDFText_IsSameTextObject(const CPDF_TextObject* pTextObj1, const CPDF_Text
Object* pTextObj2) |
| 27 { | 27 { |
| 28 if (!pTextObj1 || !pTextObj2) { | 28 if (!pTextObj1 || !pTextObj2) { |
| 29 return FALSE; | 29 return false; |
| 30 } | 30 } |
| 31 CFX_FloatRect rcPreObj(pTextObj2->m_Left, pTextObj2->m_Bottom, pTextObj2->m_
Right, pTextObj2->m_Top); | 31 CFX_FloatRect rcPreObj(pTextObj2->m_Left, pTextObj2->m_Bottom, pTextObj2->m_
Right, pTextObj2->m_Top); |
| 32 CFX_FloatRect rcCurObj(pTextObj1->m_Left, pTextObj1->m_Bottom, pTextObj1->m_
Right, pTextObj1->m_Top); | 32 CFX_FloatRect rcCurObj(pTextObj1->m_Left, pTextObj1->m_Bottom, pTextObj1->m_
Right, pTextObj1->m_Top); |
| 33 if (rcPreObj.IsEmpty() && rcCurObj.IsEmpty()) { | 33 if (rcPreObj.IsEmpty() && rcCurObj.IsEmpty()) { |
| 34 return TRUE; | 34 return true; |
| 35 } | 35 } |
| 36 if (!rcPreObj.IsEmpty() || !rcCurObj.IsEmpty()) { | 36 if (!rcPreObj.IsEmpty() || !rcCurObj.IsEmpty()) { |
| 37 rcPreObj.Intersect(rcCurObj); | 37 rcPreObj.Intersect(rcCurObj); |
| 38 if (rcPreObj.IsEmpty()) { | 38 if (rcPreObj.IsEmpty()) { |
| 39 return FALSE; | 39 return false; |
| 40 } | 40 } |
| 41 if (FXSYS_fabs(rcPreObj.Width() - rcCurObj.Width()) > rcCurObj.Width() /
2) { | 41 if (FXSYS_fabs(rcPreObj.Width() - rcCurObj.Width()) > rcCurObj.Width() /
2) { |
| 42 return FALSE; | 42 return false; |
| 43 } | 43 } |
| 44 if (pTextObj2->GetFontSize() != pTextObj1->GetFontSize()) { | 44 if (pTextObj2->GetFontSize() != pTextObj1->GetFontSize()) { |
| 45 return FALSE; | 45 return false; |
| 46 } | 46 } |
| 47 } | 47 } |
| 48 int nPreCount = pTextObj2->CountItems(); | 48 int nPreCount = pTextObj2->CountItems(); |
| 49 int nCurCount = pTextObj1->CountItems(); | 49 int nCurCount = pTextObj1->CountItems(); |
| 50 if (nPreCount != nCurCount) { | 50 if (nPreCount != nCurCount) { |
| 51 return FALSE; | 51 return false; |
| 52 } | 52 } |
| 53 for (int i = 0; i < nPreCount; i++) { | 53 for (int i = 0; i < nPreCount; i++) { |
| 54 CPDF_TextObjectItem itemPer, itemCur; | 54 CPDF_TextObjectItem itemPer, itemCur; |
| 55 pTextObj2->GetItemInfo(i, &itemPer); | 55 pTextObj2->GetItemInfo(i, &itemPer); |
| 56 pTextObj1->GetItemInfo(i, &itemCur); | 56 pTextObj1->GetItemInfo(i, &itemCur); |
| 57 if (itemCur.m_CharCode != itemPer.m_CharCode) { | 57 if (itemCur.m_CharCode != itemPer.m_CharCode) { |
| 58 return FALSE; | 58 return false; |
| 59 } | 59 } |
| 60 } | 60 } |
| 61 return TRUE; | 61 return true; |
| 62 } | 62 } |
| 63 int GetCharWidth(FX_DWORD charCode, CPDF_Font* pFont) | 63 int GetCharWidth(FX_DWORD charCode, CPDF_Font* pFont) |
| 64 { | 64 { |
| 65 if(charCode == -1) { | 65 if(charCode == -1) { |
| 66 return 0; | 66 return 0; |
| 67 } | 67 } |
| 68 int w = pFont->GetCharWidthF(charCode); | 68 int w = pFont->GetCharWidthF(charCode); |
| 69 if(w == 0) { | 69 if(w == 0) { |
| 70 CFX_ByteString str; | 70 CFX_ByteString str; |
| 71 pFont->AppendChar(str, charCode); | 71 pFont->AppendChar(str, charCode); |
| (...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 126 } | 126 } |
| 127 if((x - last_pos - last_width) > this_width || (x - last_pos - this_
width) > last_width ) { | 127 if((x - last_pos - last_width) > this_width || (x - last_pos - this_
width) > last_width ) { |
| 128 return 1; | 128 return 1; |
| 129 } | 129 } |
| 130 } | 130 } |
| 131 if(last_pos + last_width > x + this_width && curChar == L' ') { | 131 if(last_pos + last_width > x + this_width && curChar == L' ') { |
| 132 return 3; | 132 return 3; |
| 133 } | 133 } |
| 134 return 0; | 134 return 0; |
| 135 } | 135 } |
| 136 FX_BOOL CPDF_TextStream::ProcessObject(const CPDF_TextObject* pObj, FX_BOOL bFir
stLine) | 136 bool CPDF_TextStream::ProcessObject(const CPDF_TextObject* pObj, bool bFirstLine
) |
| 137 { | 137 { |
| 138 CPDF_Font* pFont = pObj->GetFont(); | 138 CPDF_Font* pFont = pObj->GetFont(); |
| 139 CFX_AffineMatrix matrix; | 139 CFX_AffineMatrix matrix; |
| 140 pObj->GetTextMatrix(&matrix); | 140 pObj->GetTextMatrix(&matrix); |
| 141 int item_index = 0; | 141 int item_index = 0; |
| 142 if (m_pLastObj) { | 142 if (m_pLastObj) { |
| 143 int result = FPDFText_ProcessInterObj(m_pLastObj, pObj); | 143 int result = FPDFText_ProcessInterObj(m_pLastObj, pObj); |
| 144 if (result == 2) { | 144 if (result == 2) { |
| 145 int len = m_Buffer.GetLength(); | 145 int len = m_Buffer.GetLength(); |
| 146 if (len && m_bUseLF && m_Buffer.GetBuffer()[len - 1] == L'-') { | 146 if (len && m_bUseLF && m_Buffer.GetBuffer()[len - 1] == L'-') { |
| 147 m_Buffer.Delete(len - 1, 1); | 147 m_Buffer.Delete(len - 1, 1); |
| 148 if (m_pObjArray) { | 148 if (m_pObjArray) { |
| 149 m_pObjArray->RemoveAt((len - 1) * 2, 2); | 149 m_pObjArray->RemoveAt((len - 1) * 2, 2); |
| 150 } | 150 } |
| 151 } else { | 151 } else { |
| 152 if (bFirstLine) { | 152 if (bFirstLine) { |
| 153 return TRUE; | 153 return true; |
| 154 } | 154 } |
| 155 if (m_bUseLF) { | 155 if (m_bUseLF) { |
| 156 m_Buffer.AppendChar(L'\r'); | 156 m_Buffer.AppendChar(L'\r'); |
| 157 m_Buffer.AppendChar(L'\n'); | 157 m_Buffer.AppendChar(L'\n'); |
| 158 if (m_pObjArray) { | 158 if (m_pObjArray) { |
| 159 for (int i = 0; i < 4; i ++) { | 159 for (int i = 0; i < 4; i ++) { |
| 160 m_pObjArray->Add(NULL); | 160 m_pObjArray->Add(NULL); |
| 161 } | 161 } |
| 162 } | 162 } |
| 163 } else { | 163 } else { |
| 164 m_Buffer.AppendChar(' '); | 164 m_Buffer.AppendChar(' '); |
| 165 if (m_pObjArray) { | 165 if (m_pObjArray) { |
| 166 m_pObjArray->Add(NULL); | 166 m_pObjArray->Add(NULL); |
| 167 m_pObjArray->Add(NULL); | 167 m_pObjArray->Add(NULL); |
| 168 } | 168 } |
| 169 } | 169 } |
| 170 } | 170 } |
| 171 } else if (result == 1) { | 171 } else if (result == 1) { |
| 172 m_Buffer.AppendChar(L' '); | 172 m_Buffer.AppendChar(L' '); |
| 173 if (m_pObjArray) { | 173 if (m_pObjArray) { |
| 174 m_pObjArray->Add(NULL); | 174 m_pObjArray->Add(NULL); |
| 175 m_pObjArray->Add(NULL); | 175 m_pObjArray->Add(NULL); |
| 176 } | 176 } |
| 177 } else if (result == -1) { | 177 } else if (result == -1) { |
| 178 m_pLastObj = pObj; | 178 m_pLastObj = pObj; |
| 179 return FALSE; | 179 return false; |
| 180 } else if (result == 3) { | 180 } else if (result == 3) { |
| 181 item_index = 1; | 181 item_index = 1; |
| 182 } | 182 } |
| 183 } | 183 } |
| 184 m_pLastObj = pObj; | 184 m_pLastObj = pObj; |
| 185 int nItems = pObj->CountItems(); | 185 int nItems = pObj->CountItems(); |
| 186 FX_FLOAT Ignorekerning = 0; | 186 FX_FLOAT Ignorekerning = 0; |
| 187 for(int i = 1; i < nItems - 1; i += 2) { | 187 for(int i = 1; i < nItems - 1; i += 2) { |
| 188 CPDF_TextObjectItem item; | 188 CPDF_TextObjectItem item; |
| 189 pObj->GetItemInfo(i, &item); | 189 pObj->GetItemInfo(i, &item); |
| (...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 270 } else { | 270 } else { |
| 271 m_Buffer << unicode_str; | 271 m_Buffer << unicode_str; |
| 272 if (m_pObjArray) { | 272 if (m_pObjArray) { |
| 273 for (int i = 0; i < unicode_str.GetLength(); i ++) { | 273 for (int i = 0; i < unicode_str.GetLength(); i ++) { |
| 274 m_pObjArray->Add((void*)pObj); | 274 m_pObjArray->Add((void*)pObj); |
| 275 m_pObjArray->Add((void*)(intptr_t)item_index); | 275 m_pObjArray->Add((void*)(intptr_t)item_index); |
| 276 } | 276 } |
| 277 } | 277 } |
| 278 } | 278 } |
| 279 } | 279 } |
| 280 return FALSE; | 280 return false; |
| 281 } | 281 } |
| 282 void _PDF_GetTextStream_Unicode(CFX_WideTextBuf& buffer, CPDF_PageObjects* pPage
, FX_BOOL bUseLF, | 282 void _PDF_GetTextStream_Unicode(CFX_WideTextBuf& buffer, CPDF_PageObjects* pPage
, bool bUseLF, |
| 283 CFX_PtrArray* pObjArray) | 283 CFX_PtrArray* pObjArray) |
| 284 { | 284 { |
| 285 CPDF_TextStream textstream(buffer, bUseLF, pObjArray); | 285 CPDF_TextStream textstream(buffer, bUseLF, pObjArray); |
| 286 FX_POSITION pos = pPage->GetFirstObjectPosition(); | 286 FX_POSITION pos = pPage->GetFirstObjectPosition(); |
| 287 while (pos) { | 287 while (pos) { |
| 288 CPDF_PageObject* pObject = pPage->GetNextObject(pos); | 288 CPDF_PageObject* pObject = pPage->GetNextObject(pos); |
| 289 if (pObject == NULL) { | 289 if (pObject == NULL) { |
| 290 continue; | 290 continue; |
| 291 } | 291 } |
| 292 if (pObject->m_Type != PDFPAGE_TEXT) { | 292 if (pObject->m_Type != PDFPAGE_TEXT) { |
| 293 continue; | 293 continue; |
| 294 } | 294 } |
| 295 textstream.ProcessObject((CPDF_TextObject*)pObject, FALSE); | 295 textstream.ProcessObject((CPDF_TextObject*)pObject, false); |
| 296 } | 296 } |
| 297 } | 297 } |
| 298 CFX_WideString PDF_GetFirstTextLine_Unicode(CPDF_Document* pDoc, CPDF_Dictionary
* pPage) | 298 CFX_WideString PDF_GetFirstTextLine_Unicode(CPDF_Document* pDoc, CPDF_Dictionary
* pPage) |
| 299 { | 299 { |
| 300 CFX_WideTextBuf buffer; | 300 CFX_WideTextBuf buffer; |
| 301 buffer.EstimateSize(0, 1024); | 301 buffer.EstimateSize(0, 1024); |
| 302 CPDF_Page page; | 302 CPDF_Page page; |
| 303 page.Load(pDoc, pPage); | 303 page.Load(pDoc, pPage); |
| 304 CPDF_ParseOptions options; | 304 CPDF_ParseOptions options; |
| 305 options.m_bTextOnly = TRUE; | 305 options.m_bTextOnly = true; |
| 306 options.m_bSeparateForm = FALSE; | 306 options.m_bSeparateForm = false; |
| 307 page.ParseContent(&options); | 307 page.ParseContent(&options); |
| 308 CPDF_TextStream textstream(buffer, FALSE, NULL); | 308 CPDF_TextStream textstream(buffer, false, NULL); |
| 309 FX_POSITION pos = page.GetFirstObjectPosition(); | 309 FX_POSITION pos = page.GetFirstObjectPosition(); |
| 310 while (pos) { | 310 while (pos) { |
| 311 CPDF_PageObject* pObject = page.GetNextObject(pos); | 311 CPDF_PageObject* pObject = page.GetNextObject(pos); |
| 312 if (pObject->m_Type != PDFPAGE_TEXT) { | 312 if (pObject->m_Type != PDFPAGE_TEXT) { |
| 313 continue; | 313 continue; |
| 314 } | 314 } |
| 315 if (textstream.ProcessObject((CPDF_TextObject*)pObject, TRUE)) { | 315 if (textstream.ProcessObject((CPDF_TextObject*)pObject, true)) { |
| 316 break; | 316 break; |
| 317 } | 317 } |
| 318 } | 318 } |
| 319 return buffer.GetWideString(); | 319 return buffer.GetWideString(); |
| 320 } | 320 } |
| OLD | NEW |