OLD | NEW |
1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
6 | 6 |
7 #include "../../include/fpdfapi/fpdf_pageobj.h" | 7 #include "../../include/fpdfapi/fpdf_pageobj.h" |
8 #include "../../include/fpdfapi/fpdf_page.h" | 8 #include "../../include/fpdfapi/fpdf_page.h" |
9 class CPDF_TextStream | 9 class CPDF_TextStream { |
10 { | 10 public: |
11 public: | 11 CPDF_TextStream(CFX_WideTextBuf& buffer, |
12 CPDF_TextStream(CFX_WideTextBuf& buffer, FX_BOOL bUseLF, CFX_PtrArray* pObjA
rray); | 12 FX_BOOL bUseLF, |
13 ~CPDF_TextStream() {} | 13 CFX_PtrArray* pObjArray); |
14 FX_BOOL ProcessObject(const CPDF_TextObject* pObj, FX_BOOL bFirstLine); | 14 ~CPDF_TextStream() {} |
15 CFX_WideTextBuf&» m_Buffer; | 15 FX_BOOL ProcessObject(const CPDF_TextObject* pObj, FX_BOOL bFirstLine); |
16 FX_BOOL» » » » m_bUseLF; | 16 CFX_WideTextBuf& m_Buffer; |
17 CFX_PtrArray*» » m_pObjArray; | 17 FX_BOOL m_bUseLF; |
18 const CPDF_TextObject*» m_pLastObj; | 18 CFX_PtrArray* m_pObjArray; |
| 19 const CPDF_TextObject* m_pLastObj; |
19 }; | 20 }; |
20 CPDF_TextStream::CPDF_TextStream(CFX_WideTextBuf& buffer, FX_BOOL bUseLF, CFX_Pt
rArray* pObjArray) : m_Buffer(buffer) | 21 CPDF_TextStream::CPDF_TextStream(CFX_WideTextBuf& buffer, |
21 { | 22 FX_BOOL bUseLF, |
22 m_pLastObj = NULL; | 23 CFX_PtrArray* pObjArray) |
23 m_bUseLF = bUseLF; | 24 : m_Buffer(buffer) { |
24 m_pObjArray = pObjArray; | 25 m_pLastObj = NULL; |
25 } | 26 m_bUseLF = bUseLF; |
26 FX_BOOL FPDFText_IsSameTextObject(const CPDF_TextObject* pTextObj1, const CPDF_T
extObject* pTextObj2) | 27 m_pObjArray = pObjArray; |
27 { | 28 } |
28 if (!pTextObj1 || !pTextObj2) { | 29 FX_BOOL FPDFText_IsSameTextObject(const CPDF_TextObject* pTextObj1, |
29 return FALSE; | 30 const CPDF_TextObject* pTextObj2) { |
30 } | 31 if (!pTextObj1 || !pTextObj2) { |
31 CFX_FloatRect rcPreObj(pTextObj2->m_Left, pTextObj2->m_Bottom, pTextObj2->m_
Right, pTextObj2->m_Top); | 32 return FALSE; |
32 CFX_FloatRect rcCurObj(pTextObj1->m_Left, pTextObj1->m_Bottom, pTextObj1->m_
Right, pTextObj1->m_Top); | 33 } |
33 if (rcPreObj.IsEmpty() && rcCurObj.IsEmpty()) { | 34 CFX_FloatRect rcPreObj(pTextObj2->m_Left, pTextObj2->m_Bottom, |
34 return TRUE; | 35 pTextObj2->m_Right, pTextObj2->m_Top); |
35 } | 36 CFX_FloatRect rcCurObj(pTextObj1->m_Left, pTextObj1->m_Bottom, |
36 if (!rcPreObj.IsEmpty() || !rcCurObj.IsEmpty()) { | 37 pTextObj1->m_Right, pTextObj1->m_Top); |
37 rcPreObj.Intersect(rcCurObj); | 38 if (rcPreObj.IsEmpty() && rcCurObj.IsEmpty()) { |
38 if (rcPreObj.IsEmpty()) { | |
39 return FALSE; | |
40 } | |
41 if (FXSYS_fabs(rcPreObj.Width() - rcCurObj.Width()) > rcCurObj.Width() /
2) { | |
42 return FALSE; | |
43 } | |
44 if (pTextObj2->GetFontSize() != pTextObj1->GetFontSize()) { | |
45 return FALSE; | |
46 } | |
47 } | |
48 int nPreCount = pTextObj2->CountItems(); | |
49 int nCurCount = pTextObj1->CountItems(); | |
50 if (nPreCount != nCurCount) { | |
51 return FALSE; | |
52 } | |
53 for (int i = 0; i < nPreCount; i++) { | |
54 CPDF_TextObjectItem itemPer, itemCur; | |
55 pTextObj2->GetItemInfo(i, &itemPer); | |
56 pTextObj1->GetItemInfo(i, &itemCur); | |
57 if (itemCur.m_CharCode != itemPer.m_CharCode) { | |
58 return FALSE; | |
59 } | |
60 } | |
61 return TRUE; | 39 return TRUE; |
62 } | 40 } |
63 int GetCharWidth(FX_DWORD charCode, CPDF_Font* pFont) | 41 if (!rcPreObj.IsEmpty() || !rcCurObj.IsEmpty()) { |
64 { | 42 rcPreObj.Intersect(rcCurObj); |
65 if(charCode == -1) { | 43 if (rcPreObj.IsEmpty()) { |
66 return 0; | 44 return FALSE; |
67 } | 45 } |
68 int w = pFont->GetCharWidthF(charCode); | 46 if (FXSYS_fabs(rcPreObj.Width() - rcCurObj.Width()) > |
69 if(w == 0) { | 47 rcCurObj.Width() / 2) { |
70 CFX_ByteString str; | 48 return FALSE; |
71 pFont->AppendChar(str, charCode); | 49 } |
72 w = pFont->GetStringWidth(str, 1); | 50 if (pTextObj2->GetFontSize() != pTextObj1->GetFontSize()) { |
73 if(w == 0) { | 51 return FALSE; |
74 FX_RECT BBox; | 52 } |
75 pFont->GetCharBBox(charCode, BBox); | 53 } |
76 w = BBox.right - BBox.left; | 54 int nPreCount = pTextObj2->CountItems(); |
77 } | 55 int nCurCount = pTextObj1->CountItems(); |
78 } | 56 if (nPreCount != nCurCount) { |
79 return w; | 57 return FALSE; |
80 } | 58 } |
81 int FPDFText_ProcessInterObj(const CPDF_TextObject* pPrevObj, const CPDF_TextObj
ect* pObj) | 59 for (int i = 0; i < nPreCount; i++) { |
82 { | 60 CPDF_TextObjectItem itemPer, itemCur; |
83 if(FPDFText_IsSameTextObject(pPrevObj, pObj)) { | 61 pTextObj2->GetItemInfo(i, &itemPer); |
84 return -1; | 62 pTextObj1->GetItemInfo(i, &itemCur); |
85 } | 63 if (itemCur.m_CharCode != itemPer.m_CharCode) { |
| 64 return FALSE; |
| 65 } |
| 66 } |
| 67 return TRUE; |
| 68 } |
| 69 int GetCharWidth(FX_DWORD charCode, CPDF_Font* pFont) { |
| 70 if (charCode == -1) { |
| 71 return 0; |
| 72 } |
| 73 int w = pFont->GetCharWidthF(charCode); |
| 74 if (w == 0) { |
| 75 CFX_ByteString str; |
| 76 pFont->AppendChar(str, charCode); |
| 77 w = pFont->GetStringWidth(str, 1); |
| 78 if (w == 0) { |
| 79 FX_RECT BBox; |
| 80 pFont->GetCharBBox(charCode, BBox); |
| 81 w = BBox.right - BBox.left; |
| 82 } |
| 83 } |
| 84 return w; |
| 85 } |
| 86 int FPDFText_ProcessInterObj(const CPDF_TextObject* pPrevObj, |
| 87 const CPDF_TextObject* pObj) { |
| 88 if (FPDFText_IsSameTextObject(pPrevObj, pObj)) { |
| 89 return -1; |
| 90 } |
| 91 CPDF_TextObjectItem item; |
| 92 int nItem = pPrevObj->CountItems(); |
| 93 pPrevObj->GetItemInfo(nItem - 1, &item); |
| 94 FX_WCHAR preChar = 0, curChar = 0; |
| 95 CFX_WideString wstr = |
| 96 pPrevObj->GetFont()->UnicodeFromCharCode(item.m_CharCode); |
| 97 if (wstr.GetLength()) { |
| 98 preChar = wstr.GetAt(0); |
| 99 } |
| 100 FX_FLOAT last_pos = item.m_OriginX; |
| 101 int nLastWidth = GetCharWidth(item.m_CharCode, pPrevObj->GetFont()); |
| 102 FX_FLOAT last_width = nLastWidth * pPrevObj->GetFontSize() / 1000; |
| 103 last_width = FXSYS_fabs(last_width); |
| 104 pObj->GetItemInfo(0, &item); |
| 105 wstr = pObj->GetFont()->UnicodeFromCharCode(item.m_CharCode); |
| 106 if (wstr.GetLength()) { |
| 107 curChar = wstr.GetAt(0); |
| 108 } |
| 109 int nThisWidth = GetCharWidth(item.m_CharCode, pObj->GetFont()); |
| 110 FX_FLOAT this_width = nThisWidth * pObj->GetFontSize() / 1000; |
| 111 this_width = FXSYS_fabs(this_width); |
| 112 FX_FLOAT threshold = |
| 113 last_width > this_width ? last_width / 4 : this_width / 4; |
| 114 CFX_AffineMatrix prev_matrix, prev_reverse; |
| 115 pPrevObj->GetTextMatrix(&prev_matrix); |
| 116 prev_reverse.SetReverse(prev_matrix); |
| 117 FX_FLOAT x = pObj->GetPosX(), y = pObj->GetPosY(); |
| 118 prev_reverse.Transform(x, y); |
| 119 if (FXSYS_fabs(y) > threshold * 2) { |
| 120 return 2; |
| 121 } |
| 122 threshold = (FX_FLOAT)(nLastWidth > nThisWidth ? nLastWidth : nThisWidth); |
| 123 threshold = threshold > 400 |
| 124 ? (threshold < 700 ? threshold / 4 : threshold / 5) |
| 125 : (threshold / 2); |
| 126 threshold *= nLastWidth > nThisWidth ? FXSYS_fabs(pPrevObj->GetFontSize()) |
| 127 : FXSYS_fabs(pObj->GetFontSize()); |
| 128 threshold /= 1000; |
| 129 if (FXSYS_fabs(last_pos + last_width - x) > threshold && curChar != L' ' && |
| 130 preChar != L' ') |
| 131 if (curChar != L' ' && preChar != L' ') { |
| 132 if ((x - last_pos - last_width) > threshold || |
| 133 (last_pos - x - last_width) > threshold) { |
| 134 return 1; |
| 135 } |
| 136 if (x < 0 && (last_pos - x - last_width) > threshold) { |
| 137 return 1; |
| 138 } |
| 139 if ((x - last_pos - last_width) > this_width || |
| 140 (x - last_pos - this_width) > last_width) { |
| 141 return 1; |
| 142 } |
| 143 } |
| 144 if (last_pos + last_width > x + this_width && curChar == L' ') { |
| 145 return 3; |
| 146 } |
| 147 return 0; |
| 148 } |
| 149 FX_BOOL CPDF_TextStream::ProcessObject(const CPDF_TextObject* pObj, |
| 150 FX_BOOL bFirstLine) { |
| 151 CPDF_Font* pFont = pObj->GetFont(); |
| 152 CFX_AffineMatrix matrix; |
| 153 pObj->GetTextMatrix(&matrix); |
| 154 int item_index = 0; |
| 155 if (m_pLastObj) { |
| 156 int result = FPDFText_ProcessInterObj(m_pLastObj, pObj); |
| 157 if (result == 2) { |
| 158 int len = m_Buffer.GetLength(); |
| 159 if (len && m_bUseLF && m_Buffer.GetBuffer()[len - 1] == L'-') { |
| 160 m_Buffer.Delete(len - 1, 1); |
| 161 if (m_pObjArray) { |
| 162 m_pObjArray->RemoveAt((len - 1) * 2, 2); |
| 163 } |
| 164 } else { |
| 165 if (bFirstLine) { |
| 166 return TRUE; |
| 167 } |
| 168 if (m_bUseLF) { |
| 169 m_Buffer.AppendChar(L'\r'); |
| 170 m_Buffer.AppendChar(L'\n'); |
| 171 if (m_pObjArray) { |
| 172 for (int i = 0; i < 4; i++) { |
| 173 m_pObjArray->Add(NULL); |
| 174 } |
| 175 } |
| 176 } else { |
| 177 m_Buffer.AppendChar(' '); |
| 178 if (m_pObjArray) { |
| 179 m_pObjArray->Add(NULL); |
| 180 m_pObjArray->Add(NULL); |
| 181 } |
| 182 } |
| 183 } |
| 184 } else if (result == 1) { |
| 185 m_Buffer.AppendChar(L' '); |
| 186 if (m_pObjArray) { |
| 187 m_pObjArray->Add(NULL); |
| 188 m_pObjArray->Add(NULL); |
| 189 } |
| 190 } else if (result == -1) { |
| 191 m_pLastObj = pObj; |
| 192 return FALSE; |
| 193 } else if (result == 3) { |
| 194 item_index = 1; |
| 195 } |
| 196 } |
| 197 m_pLastObj = pObj; |
| 198 int nItems = pObj->CountItems(); |
| 199 FX_FLOAT Ignorekerning = 0; |
| 200 for (int i = 1; i < nItems - 1; i += 2) { |
86 CPDF_TextObjectItem item; | 201 CPDF_TextObjectItem item; |
87 int nItem = pPrevObj->CountItems(); | 202 pObj->GetItemInfo(i, &item); |
88 pPrevObj->GetItemInfo(nItem - 1, &item); | 203 if (item.m_CharCode == (FX_DWORD)-1) { |
89 FX_WCHAR preChar = 0, curChar = 0; | 204 if (i == 1) { |
90 CFX_WideString wstr = pPrevObj->GetFont()->UnicodeFromCharCode(item.m_CharCo
de); | 205 Ignorekerning = item.m_OriginX; |
91 if(wstr.GetLength()) { | 206 } else if (Ignorekerning > item.m_OriginX) { |
92 preChar = wstr.GetAt(0); | 207 Ignorekerning = item.m_OriginX; |
93 } | 208 } |
94 FX_FLOAT last_pos = item.m_OriginX; | 209 } else { |
95 int nLastWidth = GetCharWidth(item.m_CharCode, pPrevObj->GetFont()); | 210 Ignorekerning = 0; |
96 FX_FLOAT last_width = nLastWidth * pPrevObj->GetFontSize() / 1000; | 211 break; |
97 last_width = FXSYS_fabs(last_width); | 212 } |
98 pObj->GetItemInfo(0, &item); | 213 } |
99 wstr = pObj->GetFont()->UnicodeFromCharCode(item.m_CharCode); | 214 FX_FLOAT spacing = 0; |
100 if(wstr.GetLength()) { | 215 for (; item_index < nItems; item_index++) { |
101 curChar = wstr.GetAt(0); | 216 CPDF_TextObjectItem item; |
102 } | 217 pObj->GetItemInfo(item_index, &item); |
103 int nThisWidth = GetCharWidth(item.m_CharCode, pObj->GetFont()); | 218 if (item.m_CharCode == (FX_DWORD)-1) { |
104 FX_FLOAT this_width = nThisWidth * pObj->GetFontSize() / 1000; | 219 CFX_WideString wstr = m_Buffer.GetWideString(); |
105 this_width = FXSYS_fabs(this_width); | 220 if (wstr.IsEmpty() || wstr.GetAt(wstr.GetLength() - 1) == L' ') { |
106 FX_FLOAT threshold = last_width > this_width ? last_width / 4 : this_width /
4; | 221 continue; |
107 CFX_AffineMatrix prev_matrix, prev_reverse; | 222 } |
108 pPrevObj->GetTextMatrix(&prev_matrix); | 223 FX_FLOAT fontsize_h = pObj->m_TextState.GetFontSizeH(); |
109 prev_reverse.SetReverse(prev_matrix); | 224 spacing = -fontsize_h * (item.m_OriginX - Ignorekerning) / 1000; |
110 FX_FLOAT x = pObj->GetPosX(), y = pObj->GetPosY(); | 225 continue; |
111 prev_reverse.Transform(x, y); | 226 } |
112 if (FXSYS_fabs(y) > threshold * 2) { | 227 FX_FLOAT charSpace = pObj->m_TextState.GetObject()->m_CharSpace; |
113 return 2; | 228 if (nItems > 3 && !spacing) { |
114 } | 229 charSpace = 0; |
115 threshold = (FX_FLOAT)(nLastWidth > nThisWidth ? nLastWidth : nThisWidth); | 230 } |
116 threshold = threshold > 400 ? (threshold < 700 ? threshold / 4 : threshold
/ 5) : (threshold / 2); | 231 if ((spacing || charSpace) && item_index > 0) { |
117 threshold *= nLastWidth > nThisWidth ? FXSYS_fabs(pPrevObj->GetFontSize()) :
FXSYS_fabs(pObj->GetFontSize()); | 232 int last_width = 0; |
118 threshold /= 1000; | 233 FX_FLOAT fontsize_h = pObj->m_TextState.GetFontSizeH(); |
119 if (FXSYS_fabs(last_pos + last_width - x) > threshold && curChar != L' ' &&
preChar != L' ') | 234 FX_DWORD space_charcode = pFont->CharCodeFromUnicode(' '); |
120 if(curChar != L' ' && preChar != L' ') { | 235 FX_FLOAT threshold = 0; |
121 if((x - last_pos - last_width) > threshold || (last_pos - x - last_w
idth) > threshold) { | 236 if (space_charcode != -1) { |
122 return 1; | 237 threshold = fontsize_h * pFont->GetCharWidthF(space_charcode) / 1000; |
123 } | 238 } |
124 if(x < 0 && (last_pos - x - last_width) > threshold) { | 239 if (threshold > fontsize_h / 3) { |
125 return 1; | 240 threshold = 0; |
126 } | 241 } else { |
127 if((x - last_pos - last_width) > this_width || (x - last_pos - this_
width) > last_width ) { | 242 threshold /= 2; |
128 return 1; | 243 } |
129 } | 244 if (threshold == 0) { |
130 } | 245 threshold = fontsize_h; |
131 if(last_pos + last_width > x + this_width && curChar == L' ') { | 246 int this_width = FXSYS_abs(GetCharWidth(item.m_CharCode, pFont)); |
132 return 3; | 247 threshold = this_width > last_width ? (FX_FLOAT)this_width |
133 } | 248 : (FX_FLOAT)last_width; |
134 return 0; | 249 int nDivide = 6; |
135 } | 250 if (threshold < 300) { |
136 FX_BOOL CPDF_TextStream::ProcessObject(const CPDF_TextObject* pObj, FX_BOOL bFir
stLine) | 251 nDivide = 2; |
137 { | 252 } else if (threshold < 500) { |
138 CPDF_Font* pFont = pObj->GetFont(); | 253 nDivide = 4; |
139 CFX_AffineMatrix matrix; | 254 } else if (threshold < 700) { |
140 pObj->GetTextMatrix(&matrix); | 255 nDivide = 5; |
141 int item_index = 0; | 256 } |
142 if (m_pLastObj) { | 257 threshold = threshold / nDivide; |
143 int result = FPDFText_ProcessInterObj(m_pLastObj, pObj); | 258 threshold = fontsize_h * threshold / 1000; |
144 if (result == 2) { | 259 } |
145 int len = m_Buffer.GetLength(); | 260 if (charSpace > 0.001) { |
146 if (len && m_bUseLF && m_Buffer.GetBuffer()[len - 1] == L'-') { | 261 spacing += matrix.TransformDistance(charSpace); |
147 m_Buffer.Delete(len - 1, 1); | 262 } else if (charSpace < -0.001) { |
148 if (m_pObjArray) { | 263 spacing -= matrix.TransformDistance(FXSYS_fabs(charSpace)); |
149 m_pObjArray->RemoveAt((len - 1) * 2, 2); | 264 } |
150 } | 265 if (threshold && (spacing && spacing >= threshold)) { |
151 } else { | 266 m_Buffer.AppendChar(L' '); |
152 if (bFirstLine) { | 267 if (m_pObjArray) { |
153 return TRUE; | 268 m_pObjArray->Add(NULL); |
154 } | 269 m_pObjArray->Add(NULL); |
155 if (m_bUseLF) { | 270 } |
156 m_Buffer.AppendChar(L'\r'); | 271 } |
157 m_Buffer.AppendChar(L'\n'); | 272 if (item.m_CharCode == (FX_DWORD)-1) { |
158 if (m_pObjArray) { | 273 continue; |
159 for (int i = 0; i < 4; i ++) { | 274 } |
160 m_pObjArray->Add(NULL); | 275 spacing = 0; |
161 } | 276 } |
162 } | 277 CFX_WideString unicode_str = pFont->UnicodeFromCharCode(item.m_CharCode); |
163 } else { | 278 if (unicode_str.IsEmpty()) { |
164 m_Buffer.AppendChar(' '); | 279 m_Buffer.AppendChar((FX_WCHAR)item.m_CharCode); |
165 if (m_pObjArray) { | 280 if (m_pObjArray) { |
166 m_pObjArray->Add(NULL); | 281 m_pObjArray->Add((void*)pObj); |
167 m_pObjArray->Add(NULL); | 282 m_pObjArray->Add((void*)(intptr_t)item_index); |
168 } | 283 } |
169 } | 284 } else { |
170 } | 285 m_Buffer << unicode_str; |
171 } else if (result == 1) { | 286 if (m_pObjArray) { |
172 m_Buffer.AppendChar(L' '); | 287 for (int i = 0; i < unicode_str.GetLength(); i++) { |
173 if (m_pObjArray) { | 288 m_pObjArray->Add((void*)pObj); |
174 m_pObjArray->Add(NULL); | 289 m_pObjArray->Add((void*)(intptr_t)item_index); |
175 m_pObjArray->Add(NULL); | 290 } |
176 } | 291 } |
177 } else if (result == -1) { | 292 } |
178 m_pLastObj = pObj; | 293 } |
179 return FALSE; | 294 return FALSE; |
180 } else if (result == 3) { | 295 } |
181 item_index = 1; | 296 void _PDF_GetTextStream_Unicode(CFX_WideTextBuf& buffer, |
182 } | 297 CPDF_PageObjects* pPage, |
183 } | 298 FX_BOOL bUseLF, |
184 m_pLastObj = pObj; | 299 CFX_PtrArray* pObjArray) { |
185 int nItems = pObj->CountItems(); | 300 CPDF_TextStream textstream(buffer, bUseLF, pObjArray); |
186 FX_FLOAT Ignorekerning = 0; | 301 FX_POSITION pos = pPage->GetFirstObjectPosition(); |
187 for(int i = 1; i < nItems - 1; i += 2) { | 302 while (pos) { |
188 CPDF_TextObjectItem item; | 303 CPDF_PageObject* pObject = pPage->GetNextObject(pos); |
189 pObj->GetItemInfo(i, &item); | 304 if (pObject == NULL) { |
190 if (item.m_CharCode == (FX_DWORD) - 1) { | 305 continue; |
191 if(i == 1) { | 306 } |
192 Ignorekerning = item.m_OriginX; | 307 if (pObject->m_Type != PDFPAGE_TEXT) { |
193 } else if(Ignorekerning > item.m_OriginX) { | 308 continue; |
194 Ignorekerning = item.m_OriginX; | 309 } |
195 } | 310 textstream.ProcessObject((CPDF_TextObject*)pObject, FALSE); |
196 } else { | 311 } |
197 Ignorekerning = 0; | 312 } |
198 break; | 313 CFX_WideString PDF_GetFirstTextLine_Unicode(CPDF_Document* pDoc, |
199 } | 314 CPDF_Dictionary* pPage) { |
200 } | 315 CFX_WideTextBuf buffer; |
201 FX_FLOAT spacing = 0; | 316 buffer.EstimateSize(0, 1024); |
202 for (; item_index < nItems; item_index ++) { | 317 CPDF_Page page; |
203 CPDF_TextObjectItem item; | 318 page.Load(pDoc, pPage); |
204 pObj->GetItemInfo(item_index, &item); | 319 CPDF_ParseOptions options; |
205 if (item.m_CharCode == (FX_DWORD) - 1) { | 320 options.m_bTextOnly = TRUE; |
206 CFX_WideString wstr = m_Buffer.GetWideString(); | 321 options.m_bSeparateForm = FALSE; |
207 if (wstr.IsEmpty() || wstr.GetAt(wstr.GetLength() - 1) == L' ') { | 322 page.ParseContent(&options); |
208 continue; | 323 CPDF_TextStream textstream(buffer, FALSE, NULL); |
209 } | 324 FX_POSITION pos = page.GetFirstObjectPosition(); |
210 FX_FLOAT fontsize_h = pObj->m_TextState.GetFontSizeH(); | 325 while (pos) { |
211 spacing = -fontsize_h * (item.m_OriginX - Ignorekerning) / 1000; | 326 CPDF_PageObject* pObject = page.GetNextObject(pos); |
212 continue; | 327 if (pObject->m_Type != PDFPAGE_TEXT) { |
213 } | 328 continue; |
214 FX_FLOAT charSpace = pObj->m_TextState.GetObject()->m_CharSpace; | 329 } |
215 if(nItems > 3 && !spacing) { | 330 if (textstream.ProcessObject((CPDF_TextObject*)pObject, TRUE)) { |
216 charSpace = 0; | 331 break; |
217 } | 332 } |
218 if((spacing || charSpace) && item_index > 0) { | 333 } |
219 int last_width = 0; | 334 return buffer.GetWideString(); |
220 FX_FLOAT fontsize_h = pObj->m_TextState.GetFontSizeH(); | 335 } |
221 FX_DWORD space_charcode = pFont->CharCodeFromUnicode(' '); | |
222 FX_FLOAT threshold = 0; | |
223 if (space_charcode != -1) { | |
224 threshold = fontsize_h * pFont->GetCharWidthF(space_charcode) /
1000 ; | |
225 } | |
226 if(threshold > fontsize_h / 3) { | |
227 threshold = 0; | |
228 } else { | |
229 threshold /= 2; | |
230 } | |
231 if (threshold == 0) { | |
232 threshold = fontsize_h; | |
233 int this_width = FXSYS_abs(GetCharWidth(item.m_CharCode, pFont))
; | |
234 threshold = this_width > last_width ? (FX_FLOAT)this_width : (FX
_FLOAT)last_width; | |
235 int nDivide = 6; | |
236 if (threshold < 300) { | |
237 nDivide = 2; | |
238 } else if (threshold < 500) { | |
239 nDivide = 4; | |
240 } else if (threshold < 700) { | |
241 nDivide = 5; | |
242 } | |
243 threshold = threshold / nDivide; | |
244 threshold = fontsize_h * threshold / 1000; | |
245 } | |
246 if(charSpace > 0.001) { | |
247 spacing += matrix.TransformDistance(charSpace); | |
248 } else if(charSpace < -0.001) { | |
249 spacing -= matrix.TransformDistance(FXSYS_fabs(charSpace)); | |
250 } | |
251 if (threshold && (spacing && spacing >= threshold) ) { | |
252 m_Buffer.AppendChar(L' '); | |
253 if (m_pObjArray) { | |
254 m_pObjArray->Add(NULL); | |
255 m_pObjArray->Add(NULL); | |
256 } | |
257 } | |
258 if (item.m_CharCode == (FX_DWORD) - 1) { | |
259 continue; | |
260 } | |
261 spacing = 0; | |
262 } | |
263 CFX_WideString unicode_str = pFont->UnicodeFromCharCode(item.m_CharCode)
; | |
264 if (unicode_str.IsEmpty()) { | |
265 m_Buffer.AppendChar((FX_WCHAR)item.m_CharCode); | |
266 if (m_pObjArray) { | |
267 m_pObjArray->Add((void*)pObj); | |
268 m_pObjArray->Add((void*)(intptr_t)item_index); | |
269 } | |
270 } else { | |
271 m_Buffer << unicode_str; | |
272 if (m_pObjArray) { | |
273 for (int i = 0; i < unicode_str.GetLength(); i ++) { | |
274 m_pObjArray->Add((void*)pObj); | |
275 m_pObjArray->Add((void*)(intptr_t)item_index); | |
276 } | |
277 } | |
278 } | |
279 } | |
280 return FALSE; | |
281 } | |
282 void _PDF_GetTextStream_Unicode(CFX_WideTextBuf& buffer, CPDF_PageObjects* pPage
, FX_BOOL bUseLF, | |
283 CFX_PtrArray* pObjArray) | |
284 { | |
285 CPDF_TextStream textstream(buffer, bUseLF, pObjArray); | |
286 FX_POSITION pos = pPage->GetFirstObjectPosition(); | |
287 while (pos) { | |
288 CPDF_PageObject* pObject = pPage->GetNextObject(pos); | |
289 if (pObject == NULL) { | |
290 continue; | |
291 } | |
292 if (pObject->m_Type != PDFPAGE_TEXT) { | |
293 continue; | |
294 } | |
295 textstream.ProcessObject((CPDF_TextObject*)pObject, FALSE); | |
296 } | |
297 } | |
298 CFX_WideString PDF_GetFirstTextLine_Unicode(CPDF_Document* pDoc, CPDF_Dictionary
* pPage) | |
299 { | |
300 CFX_WideTextBuf buffer; | |
301 buffer.EstimateSize(0, 1024); | |
302 CPDF_Page page; | |
303 page.Load(pDoc, pPage); | |
304 CPDF_ParseOptions options; | |
305 options.m_bTextOnly = TRUE; | |
306 options.m_bSeparateForm = FALSE; | |
307 page.ParseContent(&options); | |
308 CPDF_TextStream textstream(buffer, FALSE, NULL); | |
309 FX_POSITION pos = page.GetFirstObjectPosition(); | |
310 while (pos) { | |
311 CPDF_PageObject* pObject = page.GetNextObject(pos); | |
312 if (pObject->m_Type != PDFPAGE_TEXT) { | |
313 continue; | |
314 } | |
315 if (textstream.ProcessObject((CPDF_TextObject*)pObject, TRUE)) { | |
316 break; | |
317 } | |
318 } | |
319 return buffer.GetWideString(); | |
320 } | |
OLD | NEW |