OLD | NEW |
1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
6 | 6 |
7 #include "../../include/fpdfapi/fpdf_pageobj.h" | 7 #include "../../include/fpdfapi/fpdf_pageobj.h" |
8 #include "../../include/fpdfapi/fpdf_page.h" | 8 #include "../../include/fpdfapi/fpdf_page.h" |
9 class CPDF_TextStream | 9 class CPDF_TextStream |
10 { | 10 { |
11 public: | 11 public: |
12 CPDF_TextStream(CFX_WideTextBuf& buffer, FX_BOOL bUseLF, CFX_PtrArray* pObjA
rray); | 12 CPDF_TextStream(CFX_WideTextBuf& buffer, bool bUseLF, CFX_PtrArray* pObjArra
y); |
13 ~CPDF_TextStream() {} | 13 ~CPDF_TextStream() {} |
14 FX_BOOL ProcessObject(const CPDF_TextObject* pObj, FX_BOOL bFirstLine); | 14 bool ProcessObject(const CPDF_TextObject* pObj, bool bFirstLine); |
15 CFX_WideTextBuf& m_Buffer; | 15 CFX_WideTextBuf& m_Buffer; |
16 FX_BOOL» » » » m_bUseLF; | 16 bool» » » » m_bUseLF; |
17 CFX_PtrArray* m_pObjArray; | 17 CFX_PtrArray* m_pObjArray; |
18 const CPDF_TextObject* m_pLastObj; | 18 const CPDF_TextObject* m_pLastObj; |
19 }; | 19 }; |
20 CPDF_TextStream::CPDF_TextStream(CFX_WideTextBuf& buffer, FX_BOOL bUseLF, CFX_Pt
rArray* pObjArray) : m_Buffer(buffer) | 20 CPDF_TextStream::CPDF_TextStream(CFX_WideTextBuf& buffer, bool bUseLF, CFX_PtrAr
ray* pObjArray) : m_Buffer(buffer) |
21 { | 21 { |
22 m_pLastObj = NULL; | 22 m_pLastObj = NULL; |
23 m_bUseLF = bUseLF; | 23 m_bUseLF = bUseLF; |
24 m_pObjArray = pObjArray; | 24 m_pObjArray = pObjArray; |
25 } | 25 } |
26 FX_BOOL FPDFText_IsSameTextObject(const CPDF_TextObject* pTextObj1, const CPDF_T
extObject* pTextObj2) | 26 bool FPDFText_IsSameTextObject(const CPDF_TextObject* pTextObj1, const CPDF_Text
Object* pTextObj2) |
27 { | 27 { |
28 if (!pTextObj1 || !pTextObj2) { | 28 if (!pTextObj1 || !pTextObj2) { |
29 return FALSE; | 29 return false; |
30 } | 30 } |
31 CFX_FloatRect rcPreObj(pTextObj2->m_Left, pTextObj2->m_Bottom, pTextObj2->m_
Right, pTextObj2->m_Top); | 31 CFX_FloatRect rcPreObj(pTextObj2->m_Left, pTextObj2->m_Bottom, pTextObj2->m_
Right, pTextObj2->m_Top); |
32 CFX_FloatRect rcCurObj(pTextObj1->m_Left, pTextObj1->m_Bottom, pTextObj1->m_
Right, pTextObj1->m_Top); | 32 CFX_FloatRect rcCurObj(pTextObj1->m_Left, pTextObj1->m_Bottom, pTextObj1->m_
Right, pTextObj1->m_Top); |
33 if (rcPreObj.IsEmpty() && rcCurObj.IsEmpty()) { | 33 if (rcPreObj.IsEmpty() && rcCurObj.IsEmpty()) { |
34 return TRUE; | 34 return true; |
35 } | 35 } |
36 if (!rcPreObj.IsEmpty() || !rcCurObj.IsEmpty()) { | 36 if (!rcPreObj.IsEmpty() || !rcCurObj.IsEmpty()) { |
37 rcPreObj.Intersect(rcCurObj); | 37 rcPreObj.Intersect(rcCurObj); |
38 if (rcPreObj.IsEmpty()) { | 38 if (rcPreObj.IsEmpty()) { |
39 return FALSE; | 39 return false; |
40 } | 40 } |
41 if (FXSYS_fabs(rcPreObj.Width() - rcCurObj.Width()) > rcCurObj.Width() /
2) { | 41 if (FXSYS_fabs(rcPreObj.Width() - rcCurObj.Width()) > rcCurObj.Width() /
2) { |
42 return FALSE; | 42 return false; |
43 } | 43 } |
44 if (pTextObj2->GetFontSize() != pTextObj1->GetFontSize()) { | 44 if (pTextObj2->GetFontSize() != pTextObj1->GetFontSize()) { |
45 return FALSE; | 45 return false; |
46 } | 46 } |
47 } | 47 } |
48 int nPreCount = pTextObj2->CountItems(); | 48 int nPreCount = pTextObj2->CountItems(); |
49 int nCurCount = pTextObj1->CountItems(); | 49 int nCurCount = pTextObj1->CountItems(); |
50 if (nPreCount != nCurCount) { | 50 if (nPreCount != nCurCount) { |
51 return FALSE; | 51 return false; |
52 } | 52 } |
53 for (int i = 0; i < nPreCount; i++) { | 53 for (int i = 0; i < nPreCount; i++) { |
54 CPDF_TextObjectItem itemPer, itemCur; | 54 CPDF_TextObjectItem itemPer, itemCur; |
55 pTextObj2->GetItemInfo(i, &itemPer); | 55 pTextObj2->GetItemInfo(i, &itemPer); |
56 pTextObj1->GetItemInfo(i, &itemCur); | 56 pTextObj1->GetItemInfo(i, &itemCur); |
57 if (itemCur.m_CharCode != itemPer.m_CharCode) { | 57 if (itemCur.m_CharCode != itemPer.m_CharCode) { |
58 return FALSE; | 58 return false; |
59 } | 59 } |
60 } | 60 } |
61 return TRUE; | 61 return true; |
62 } | 62 } |
63 int GetCharWidth(FX_DWORD charCode, CPDF_Font* pFont) | 63 int GetCharWidth(FX_DWORD charCode, CPDF_Font* pFont) |
64 { | 64 { |
65 if(charCode == -1) { | 65 if(charCode == -1) { |
66 return 0; | 66 return 0; |
67 } | 67 } |
68 int w = pFont->GetCharWidthF(charCode); | 68 int w = pFont->GetCharWidthF(charCode); |
69 if(w == 0) { | 69 if(w == 0) { |
70 CFX_ByteString str; | 70 CFX_ByteString str; |
71 pFont->AppendChar(str, charCode); | 71 pFont->AppendChar(str, charCode); |
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
126 } | 126 } |
127 if((x - last_pos - last_width) > this_width || (x - last_pos - this_
width) > last_width ) { | 127 if((x - last_pos - last_width) > this_width || (x - last_pos - this_
width) > last_width ) { |
128 return 1; | 128 return 1; |
129 } | 129 } |
130 } | 130 } |
131 if(last_pos + last_width > x + this_width && curChar == L' ') { | 131 if(last_pos + last_width > x + this_width && curChar == L' ') { |
132 return 3; | 132 return 3; |
133 } | 133 } |
134 return 0; | 134 return 0; |
135 } | 135 } |
136 FX_BOOL CPDF_TextStream::ProcessObject(const CPDF_TextObject* pObj, FX_BOOL bFir
stLine) | 136 bool CPDF_TextStream::ProcessObject(const CPDF_TextObject* pObj, bool bFirstLine
) |
137 { | 137 { |
138 CPDF_Font* pFont = pObj->GetFont(); | 138 CPDF_Font* pFont = pObj->GetFont(); |
139 CFX_AffineMatrix matrix; | 139 CFX_AffineMatrix matrix; |
140 pObj->GetTextMatrix(&matrix); | 140 pObj->GetTextMatrix(&matrix); |
141 int item_index = 0; | 141 int item_index = 0; |
142 if (m_pLastObj) { | 142 if (m_pLastObj) { |
143 int result = FPDFText_ProcessInterObj(m_pLastObj, pObj); | 143 int result = FPDFText_ProcessInterObj(m_pLastObj, pObj); |
144 if (result == 2) { | 144 if (result == 2) { |
145 int len = m_Buffer.GetLength(); | 145 int len = m_Buffer.GetLength(); |
146 if (len && m_bUseLF && m_Buffer.GetBuffer()[len - 1] == L'-') { | 146 if (len && m_bUseLF && m_Buffer.GetBuffer()[len - 1] == L'-') { |
147 m_Buffer.Delete(len - 1, 1); | 147 m_Buffer.Delete(len - 1, 1); |
148 if (m_pObjArray) { | 148 if (m_pObjArray) { |
149 m_pObjArray->RemoveAt((len - 1) * 2, 2); | 149 m_pObjArray->RemoveAt((len - 1) * 2, 2); |
150 } | 150 } |
151 } else { | 151 } else { |
152 if (bFirstLine) { | 152 if (bFirstLine) { |
153 return TRUE; | 153 return true; |
154 } | 154 } |
155 if (m_bUseLF) { | 155 if (m_bUseLF) { |
156 m_Buffer.AppendChar(L'\r'); | 156 m_Buffer.AppendChar(L'\r'); |
157 m_Buffer.AppendChar(L'\n'); | 157 m_Buffer.AppendChar(L'\n'); |
158 if (m_pObjArray) { | 158 if (m_pObjArray) { |
159 for (int i = 0; i < 4; i ++) { | 159 for (int i = 0; i < 4; i ++) { |
160 m_pObjArray->Add(NULL); | 160 m_pObjArray->Add(NULL); |
161 } | 161 } |
162 } | 162 } |
163 } else { | 163 } else { |
164 m_Buffer.AppendChar(' '); | 164 m_Buffer.AppendChar(' '); |
165 if (m_pObjArray) { | 165 if (m_pObjArray) { |
166 m_pObjArray->Add(NULL); | 166 m_pObjArray->Add(NULL); |
167 m_pObjArray->Add(NULL); | 167 m_pObjArray->Add(NULL); |
168 } | 168 } |
169 } | 169 } |
170 } | 170 } |
171 } else if (result == 1) { | 171 } else if (result == 1) { |
172 m_Buffer.AppendChar(L' '); | 172 m_Buffer.AppendChar(L' '); |
173 if (m_pObjArray) { | 173 if (m_pObjArray) { |
174 m_pObjArray->Add(NULL); | 174 m_pObjArray->Add(NULL); |
175 m_pObjArray->Add(NULL); | 175 m_pObjArray->Add(NULL); |
176 } | 176 } |
177 } else if (result == -1) { | 177 } else if (result == -1) { |
178 m_pLastObj = pObj; | 178 m_pLastObj = pObj; |
179 return FALSE; | 179 return false; |
180 } else if (result == 3) { | 180 } else if (result == 3) { |
181 item_index = 1; | 181 item_index = 1; |
182 } | 182 } |
183 } | 183 } |
184 m_pLastObj = pObj; | 184 m_pLastObj = pObj; |
185 int nItems = pObj->CountItems(); | 185 int nItems = pObj->CountItems(); |
186 FX_FLOAT Ignorekerning = 0; | 186 FX_FLOAT Ignorekerning = 0; |
187 for(int i = 1; i < nItems - 1; i += 2) { | 187 for(int i = 1; i < nItems - 1; i += 2) { |
188 CPDF_TextObjectItem item; | 188 CPDF_TextObjectItem item; |
189 pObj->GetItemInfo(i, &item); | 189 pObj->GetItemInfo(i, &item); |
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
270 } else { | 270 } else { |
271 m_Buffer << unicode_str; | 271 m_Buffer << unicode_str; |
272 if (m_pObjArray) { | 272 if (m_pObjArray) { |
273 for (int i = 0; i < unicode_str.GetLength(); i ++) { | 273 for (int i = 0; i < unicode_str.GetLength(); i ++) { |
274 m_pObjArray->Add((void*)pObj); | 274 m_pObjArray->Add((void*)pObj); |
275 m_pObjArray->Add((void*)(intptr_t)item_index); | 275 m_pObjArray->Add((void*)(intptr_t)item_index); |
276 } | 276 } |
277 } | 277 } |
278 } | 278 } |
279 } | 279 } |
280 return FALSE; | 280 return false; |
281 } | 281 } |
282 void _PDF_GetTextStream_Unicode(CFX_WideTextBuf& buffer, CPDF_PageObjects* pPage
, FX_BOOL bUseLF, | 282 void _PDF_GetTextStream_Unicode(CFX_WideTextBuf& buffer, CPDF_PageObjects* pPage
, bool bUseLF, |
283 CFX_PtrArray* pObjArray) | 283 CFX_PtrArray* pObjArray) |
284 { | 284 { |
285 CPDF_TextStream textstream(buffer, bUseLF, pObjArray); | 285 CPDF_TextStream textstream(buffer, bUseLF, pObjArray); |
286 FX_POSITION pos = pPage->GetFirstObjectPosition(); | 286 FX_POSITION pos = pPage->GetFirstObjectPosition(); |
287 while (pos) { | 287 while (pos) { |
288 CPDF_PageObject* pObject = pPage->GetNextObject(pos); | 288 CPDF_PageObject* pObject = pPage->GetNextObject(pos); |
289 if (pObject == NULL) { | 289 if (pObject == NULL) { |
290 continue; | 290 continue; |
291 } | 291 } |
292 if (pObject->m_Type != PDFPAGE_TEXT) { | 292 if (pObject->m_Type != PDFPAGE_TEXT) { |
293 continue; | 293 continue; |
294 } | 294 } |
295 textstream.ProcessObject((CPDF_TextObject*)pObject, FALSE); | 295 textstream.ProcessObject((CPDF_TextObject*)pObject, false); |
296 } | 296 } |
297 } | 297 } |
298 CFX_WideString PDF_GetFirstTextLine_Unicode(CPDF_Document* pDoc, CPDF_Dictionary
* pPage) | 298 CFX_WideString PDF_GetFirstTextLine_Unicode(CPDF_Document* pDoc, CPDF_Dictionary
* pPage) |
299 { | 299 { |
300 CFX_WideTextBuf buffer; | 300 CFX_WideTextBuf buffer; |
301 buffer.EstimateSize(0, 1024); | 301 buffer.EstimateSize(0, 1024); |
302 CPDF_Page page; | 302 CPDF_Page page; |
303 page.Load(pDoc, pPage); | 303 page.Load(pDoc, pPage); |
304 CPDF_ParseOptions options; | 304 CPDF_ParseOptions options; |
305 options.m_bTextOnly = TRUE; | 305 options.m_bTextOnly = true; |
306 options.m_bSeparateForm = FALSE; | 306 options.m_bSeparateForm = false; |
307 page.ParseContent(&options); | 307 page.ParseContent(&options); |
308 CPDF_TextStream textstream(buffer, FALSE, NULL); | 308 CPDF_TextStream textstream(buffer, false, NULL); |
309 FX_POSITION pos = page.GetFirstObjectPosition(); | 309 FX_POSITION pos = page.GetFirstObjectPosition(); |
310 while (pos) { | 310 while (pos) { |
311 CPDF_PageObject* pObject = page.GetNextObject(pos); | 311 CPDF_PageObject* pObject = page.GetNextObject(pos); |
312 if (pObject->m_Type != PDFPAGE_TEXT) { | 312 if (pObject->m_Type != PDFPAGE_TEXT) { |
313 continue; | 313 continue; |
314 } | 314 } |
315 if (textstream.ProcessObject((CPDF_TextObject*)pObject, TRUE)) { | 315 if (textstream.ProcessObject((CPDF_TextObject*)pObject, true)) { |
316 break; | 316 break; |
317 } | 317 } |
318 } | 318 } |
319 return buffer.GetWideString(); | 319 return buffer.GetWideString(); |
320 } | 320 } |
OLD | NEW |