Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(9)

Side by Side Diff: core/src/fpdftext/fpdf_text_search.cpp

Issue 453133004: clang-format all code (Closed) Base URL: https://pdfium.googlesource.com/pdfium.git@master
Patch Set: Created 6 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2014 PDFium Authors. All rights reserved. 1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 6
7 #include "../../include/fpdfapi/fpdf_pageobj.h" 7 #include "../../include/fpdfapi/fpdf_pageobj.h"
8 #include "../../include/fpdftext/fpdf_text.h" 8 #include "../../include/fpdftext/fpdf_text.h"
9 #include "../../include/fpdfapi/fpdf_page.h" 9 #include "../../include/fpdfapi/fpdf_page.h"
10 class CPDF_TextStream : public CFX_Object 10 class CPDF_TextStream : public CFX_Object {
11 { 11 public:
12 public: 12 CPDF_TextStream(CFX_WideTextBuf& buffer,
13 CPDF_TextStream(CFX_WideTextBuf& buffer, FX_BOOL bUseLF, CFX_PtrArray* pObjA rray); 13 FX_BOOL bUseLF,
14 ~CPDF_TextStream() {} 14 CFX_PtrArray* pObjArray);
15 FX_BOOL ProcessObject(const CPDF_TextObject* pObj, FX_BOOL bFirstLine); 15 ~CPDF_TextStream() {}
16 CFX_WideTextBuf&» m_Buffer; 16 FX_BOOL ProcessObject(const CPDF_TextObject* pObj, FX_BOOL bFirstLine);
17 FX_BOOL» » » » m_bUseLF; 17 CFX_WideTextBuf& m_Buffer;
18 CFX_PtrArray*» » m_pObjArray; 18 FX_BOOL m_bUseLF;
19 const CPDF_TextObject*» m_pLastObj; 19 CFX_PtrArray* m_pObjArray;
20 const CPDF_TextObject* m_pLastObj;
20 }; 21 };
21 CPDF_TextStream::CPDF_TextStream(CFX_WideTextBuf& buffer, FX_BOOL bUseLF, CFX_Pt rArray* pObjArray) : m_Buffer(buffer) 22 CPDF_TextStream::CPDF_TextStream(CFX_WideTextBuf& buffer,
22 { 23 FX_BOOL bUseLF,
23 m_pLastObj = NULL; 24 CFX_PtrArray* pObjArray)
24 m_bUseLF = bUseLF; 25 : m_Buffer(buffer) {
25 m_pObjArray = pObjArray; 26 m_pLastObj = NULL;
26 } 27 m_bUseLF = bUseLF;
27 FX_BOOL FPDFText_IsSameTextObject(const CPDF_TextObject* pTextObj1, const CPDF_T extObject* pTextObj2) 28 m_pObjArray = pObjArray;
28 { 29 }
29 if (!pTextObj1 || !pTextObj2) { 30 FX_BOOL FPDFText_IsSameTextObject(const CPDF_TextObject* pTextObj1,
30 return FALSE; 31 const CPDF_TextObject* pTextObj2) {
31 } 32 if (!pTextObj1 || !pTextObj2) {
32 CFX_FloatRect rcPreObj(pTextObj2->m_Left, pTextObj2->m_Bottom, pTextObj2->m_ Right, pTextObj2->m_Top); 33 return FALSE;
33 CFX_FloatRect rcCurObj(pTextObj1->m_Left, pTextObj1->m_Bottom, pTextObj1->m_ Right, pTextObj1->m_Top); 34 }
34 if (rcPreObj.IsEmpty() && rcCurObj.IsEmpty()) { 35 CFX_FloatRect rcPreObj(pTextObj2->m_Left,
35 return TRUE; 36 pTextObj2->m_Bottom,
36 } 37 pTextObj2->m_Right,
37 if (!rcPreObj.IsEmpty() || !rcCurObj.IsEmpty()) { 38 pTextObj2->m_Top);
38 rcPreObj.Intersect(rcCurObj); 39 CFX_FloatRect rcCurObj(pTextObj1->m_Left,
39 if (rcPreObj.IsEmpty()) { 40 pTextObj1->m_Bottom,
40 return FALSE; 41 pTextObj1->m_Right,
41 } 42 pTextObj1->m_Top);
42 if (FXSYS_fabs(rcPreObj.Width() - rcCurObj.Width()) > rcCurObj.Width() / 2) { 43 if (rcPreObj.IsEmpty() && rcCurObj.IsEmpty()) {
43 return FALSE;
44 }
45 if (pTextObj2->GetFontSize() != pTextObj1->GetFontSize()) {
46 return FALSE;
47 }
48 }
49 int nPreCount = pTextObj2->CountItems();
50 int nCurCount = pTextObj1->CountItems();
51 if (nPreCount != nCurCount) {
52 return FALSE;
53 }
54 for (int i = 0; i < nPreCount; i++) {
55 CPDF_TextObjectItem itemPer, itemCur;
56 pTextObj2->GetItemInfo(i, &itemPer);
57 pTextObj1->GetItemInfo(i, &itemCur);
58 if (itemCur.m_CharCode != itemPer.m_CharCode) {
59 return FALSE;
60 }
61 }
62 return TRUE; 44 return TRUE;
63 } 45 }
64 int GetCharWidth(FX_DWORD charCode, CPDF_Font* pFont) 46 if (!rcPreObj.IsEmpty() || !rcCurObj.IsEmpty()) {
65 { 47 rcPreObj.Intersect(rcCurObj);
66 if(charCode == -1) { 48 if (rcPreObj.IsEmpty()) {
67 return 0; 49 return FALSE;
68 } 50 }
69 int w = pFont->GetCharWidthF(charCode); 51 if (FXSYS_fabs(rcPreObj.Width() - rcCurObj.Width()) >
70 if(w == 0) { 52 rcCurObj.Width() / 2) {
71 CFX_ByteString str; 53 return FALSE;
72 pFont->AppendChar(str, charCode); 54 }
73 w = pFont->GetStringWidth(str, 1); 55 if (pTextObj2->GetFontSize() != pTextObj1->GetFontSize()) {
74 if(w == 0) { 56 return FALSE;
75 FX_RECT BBox; 57 }
76 pFont->GetCharBBox(charCode, BBox); 58 }
77 w = BBox.right - BBox.left; 59 int nPreCount = pTextObj2->CountItems();
78 } 60 int nCurCount = pTextObj1->CountItems();
79 } 61 if (nPreCount != nCurCount) {
80 return w; 62 return FALSE;
81 } 63 }
82 int FPDFText_ProcessInterObj(const CPDF_TextObject* pPrevObj, const CPDF_TextObj ect* pObj) 64 for (int i = 0; i < nPreCount; i++) {
83 { 65 CPDF_TextObjectItem itemPer, itemCur;
84 if(FPDFText_IsSameTextObject(pPrevObj, pObj)) { 66 pTextObj2->GetItemInfo(i, &itemPer);
85 return -1; 67 pTextObj1->GetItemInfo(i, &itemCur);
86 } 68 if (itemCur.m_CharCode != itemPer.m_CharCode) {
69 return FALSE;
70 }
71 }
72 return TRUE;
73 }
74 int GetCharWidth(FX_DWORD charCode, CPDF_Font* pFont) {
75 if (charCode == -1) {
76 return 0;
77 }
78 int w = pFont->GetCharWidthF(charCode);
79 if (w == 0) {
80 CFX_ByteString str;
81 pFont->AppendChar(str, charCode);
82 w = pFont->GetStringWidth(str, 1);
83 if (w == 0) {
84 FX_RECT BBox;
85 pFont->GetCharBBox(charCode, BBox);
86 w = BBox.right - BBox.left;
87 }
88 }
89 return w;
90 }
91 int FPDFText_ProcessInterObj(const CPDF_TextObject* pPrevObj,
92 const CPDF_TextObject* pObj) {
93 if (FPDFText_IsSameTextObject(pPrevObj, pObj)) {
94 return -1;
95 }
96 CPDF_TextObjectItem item;
97 int nItem = pPrevObj->CountItems();
98 pPrevObj->GetItemInfo(nItem - 1, &item);
99 FX_WCHAR preChar = 0, curChar = 0;
100 CFX_WideString wstr =
101 pPrevObj->GetFont()->UnicodeFromCharCode(item.m_CharCode);
102 if (wstr.GetLength()) {
103 preChar = wstr.GetAt(0);
104 }
105 FX_FLOAT last_pos = item.m_OriginX;
106 int nLastWidth = GetCharWidth(item.m_CharCode, pPrevObj->GetFont());
107 FX_FLOAT last_width = nLastWidth * pPrevObj->GetFontSize() / 1000;
108 last_width = FXSYS_fabs(last_width);
109 pObj->GetItemInfo(0, &item);
110 wstr = pObj->GetFont()->UnicodeFromCharCode(item.m_CharCode);
111 if (wstr.GetLength()) {
112 curChar = wstr.GetAt(0);
113 }
114 int nThisWidth = GetCharWidth(item.m_CharCode, pObj->GetFont());
115 FX_FLOAT this_width = nThisWidth * pObj->GetFontSize() / 1000;
116 this_width = FXSYS_fabs(this_width);
117 FX_FLOAT threshold =
118 last_width > this_width ? last_width / 4 : this_width / 4;
119 CFX_AffineMatrix prev_matrix, prev_reverse;
120 pPrevObj->GetTextMatrix(&prev_matrix);
121 prev_reverse.SetReverse(prev_matrix);
122 FX_FLOAT x = pObj->GetPosX(), y = pObj->GetPosY();
123 prev_reverse.Transform(x, y);
124 if (FXSYS_fabs(y) > threshold * 2) {
125 return 2;
126 }
127 threshold = (FX_FLOAT)(nLastWidth > nThisWidth ? nLastWidth : nThisWidth);
128 threshold = threshold > 400
129 ? (threshold < 700 ? threshold / 4 : threshold / 5)
130 : (threshold / 2);
131 threshold *= nLastWidth > nThisWidth ? FXSYS_fabs(pPrevObj->GetFontSize())
132 : FXSYS_fabs(pObj->GetFontSize());
133 threshold /= 1000;
134 if (FXSYS_fabs(last_pos + last_width - x) > threshold && curChar != L' ' &&
135 preChar != L' ')
136 if (curChar != L' ' && preChar != L' ') {
137 if ((x - last_pos - last_width) > threshold ||
138 (last_pos - x - last_width) > threshold) {
139 return 1;
140 }
141 if (x < 0 && (last_pos - x - last_width) > threshold) {
142 return 1;
143 }
144 if ((x - last_pos - last_width) > this_width ||
145 (x - last_pos - this_width) > last_width) {
146 return 1;
147 }
148 }
149 if (last_pos + last_width > x + this_width && curChar == L' ') {
150 return 3;
151 }
152 return 0;
153 }
154 FX_BOOL CPDF_TextStream::ProcessObject(const CPDF_TextObject* pObj,
155 FX_BOOL bFirstLine) {
156 CPDF_Font* pFont = pObj->GetFont();
157 CFX_AffineMatrix matrix;
158 pObj->GetTextMatrix(&matrix);
159 int item_index = 0;
160 if (m_pLastObj) {
161 int result = FPDFText_ProcessInterObj(m_pLastObj, pObj);
162 if (result == 2) {
163 int len = m_Buffer.GetLength();
164 if (len && m_bUseLF && m_Buffer.GetBuffer()[len - 1] == L'-') {
165 m_Buffer.Delete(len - 1, 1);
166 if (m_pObjArray) {
167 m_pObjArray->RemoveAt((len - 1) * 2, 2);
168 }
169 } else {
170 if (bFirstLine) {
171 return TRUE;
172 }
173 if (m_bUseLF) {
174 m_Buffer.AppendChar(L'\r');
175 m_Buffer.AppendChar(L'\n');
176 if (m_pObjArray) {
177 for (int i = 0; i < 4; i++) {
178 m_pObjArray->Add(NULL);
179 }
180 }
181 } else {
182 m_Buffer.AppendChar(' ');
183 if (m_pObjArray) {
184 m_pObjArray->Add(NULL);
185 m_pObjArray->Add(NULL);
186 }
187 }
188 }
189 } else if (result == 1) {
190 m_Buffer.AppendChar(L' ');
191 if (m_pObjArray) {
192 m_pObjArray->Add(NULL);
193 m_pObjArray->Add(NULL);
194 }
195 } else if (result == -1) {
196 m_pLastObj = pObj;
197 return FALSE;
198 } else if (result == 3) {
199 item_index = 1;
200 }
201 }
202 m_pLastObj = pObj;
203 int nItems = pObj->CountItems();
204 FX_FLOAT Ignorekerning = 0;
205 for (int i = 1; i < nItems - 1; i += 2) {
87 CPDF_TextObjectItem item; 206 CPDF_TextObjectItem item;
88 int nItem = pPrevObj->CountItems(); 207 pObj->GetItemInfo(i, &item);
89 pPrevObj->GetItemInfo(nItem - 1, &item); 208 if (item.m_CharCode == (FX_DWORD)-1) {
90 FX_WCHAR preChar = 0, curChar = 0; 209 if (i == 1) {
91 CFX_WideString wstr = pPrevObj->GetFont()->UnicodeFromCharCode(item.m_CharCo de); 210 Ignorekerning = item.m_OriginX;
92 if(wstr.GetLength()) { 211 } else if (Ignorekerning > item.m_OriginX) {
93 preChar = wstr.GetAt(0); 212 Ignorekerning = item.m_OriginX;
94 } 213 }
95 FX_FLOAT last_pos = item.m_OriginX; 214 } else {
96 int nLastWidth = GetCharWidth(item.m_CharCode, pPrevObj->GetFont()); 215 Ignorekerning = 0;
97 FX_FLOAT last_width = nLastWidth * pPrevObj->GetFontSize() / 1000; 216 break;
98 last_width = FXSYS_fabs(last_width); 217 }
99 pObj->GetItemInfo(0, &item); 218 }
100 wstr = pObj->GetFont()->UnicodeFromCharCode(item.m_CharCode); 219 FX_FLOAT spacing = 0;
101 if(wstr.GetLength()) { 220 for (; item_index < nItems; item_index++) {
102 curChar = wstr.GetAt(0); 221 CPDF_TextObjectItem item;
103 } 222 pObj->GetItemInfo(item_index, &item);
104 int nThisWidth = GetCharWidth(item.m_CharCode, pObj->GetFont()); 223 if (item.m_CharCode == (FX_DWORD)-1) {
105 FX_FLOAT this_width = nThisWidth * pObj->GetFontSize() / 1000; 224 CFX_WideString wstr = m_Buffer.GetWideString();
106 this_width = FXSYS_fabs(this_width); 225 if (wstr.IsEmpty() || wstr.GetAt(wstr.GetLength() - 1) == L' ') {
107 FX_FLOAT threshold = last_width > this_width ? last_width / 4 : this_width / 4; 226 continue;
108 CFX_AffineMatrix prev_matrix, prev_reverse; 227 }
109 pPrevObj->GetTextMatrix(&prev_matrix); 228 FX_FLOAT fontsize_h = pObj->m_TextState.GetFontSizeH();
110 prev_reverse.SetReverse(prev_matrix); 229 spacing = -fontsize_h * (item.m_OriginX - Ignorekerning) / 1000;
111 FX_FLOAT x = pObj->GetPosX(), y = pObj->GetPosY(); 230 continue;
112 prev_reverse.Transform(x, y); 231 }
113 if (FXSYS_fabs(y) > threshold * 2) { 232 FX_FLOAT charSpace = pObj->m_TextState.GetObject()->m_CharSpace;
114 return 2; 233 if (nItems > 3 && !spacing) {
115 } 234 charSpace = 0;
116 threshold = (FX_FLOAT)(nLastWidth > nThisWidth ? nLastWidth : nThisWidth); 235 }
117 threshold = threshold > 400 ? (threshold < 700 ? threshold / 4 : threshold / 5) : (threshold / 2); 236 if ((spacing || charSpace) && item_index > 0) {
118 threshold *= nLastWidth > nThisWidth ? FXSYS_fabs(pPrevObj->GetFontSize()) : FXSYS_fabs(pObj->GetFontSize()); 237 int last_width = 0;
119 threshold /= 1000; 238 FX_FLOAT fontsize_h = pObj->m_TextState.GetFontSizeH();
120 if (FXSYS_fabs(last_pos + last_width - x) > threshold && curChar != L' ' && preChar != L' ') 239 FX_DWORD space_charcode = pFont->CharCodeFromUnicode(' ');
121 if(curChar != L' ' && preChar != L' ') { 240 FX_FLOAT threshold = 0;
122 if((x - last_pos - last_width) > threshold || (last_pos - x - last_w idth) > threshold) { 241 if (space_charcode != -1) {
123 return 1; 242 threshold = fontsize_h * pFont->GetCharWidthF(space_charcode) / 1000;
124 } 243 }
125 if(x < 0 && (last_pos - x - last_width) > threshold) { 244 if (threshold > fontsize_h / 3) {
126 return 1; 245 threshold = 0;
127 } 246 } else {
128 if((x - last_pos - last_width) > this_width || (x - last_pos - this_ width) > last_width ) { 247 threshold /= 2;
129 return 1; 248 }
130 } 249 if (threshold == 0) {
131 } 250 threshold = fontsize_h;
132 if(last_pos + last_width > x + this_width && curChar == L' ') { 251 int this_width = FXSYS_abs(GetCharWidth(item.m_CharCode, pFont));
133 return 3; 252 threshold = this_width > last_width ? (FX_FLOAT)this_width
134 } 253 : (FX_FLOAT)last_width;
135 return 0; 254 int nDivide = 6;
136 } 255 if (threshold < 300) {
137 FX_BOOL CPDF_TextStream::ProcessObject(const CPDF_TextObject* pObj, FX_BOOL bFir stLine) 256 nDivide = 2;
138 { 257 } else if (threshold < 500) {
139 CPDF_Font* pFont = pObj->GetFont(); 258 nDivide = 4;
140 CFX_AffineMatrix matrix; 259 } else if (threshold < 700) {
141 pObj->GetTextMatrix(&matrix); 260 nDivide = 5;
142 int item_index = 0; 261 }
143 if (m_pLastObj) { 262 threshold = threshold / nDivide;
144 int result = FPDFText_ProcessInterObj(m_pLastObj, pObj); 263 threshold = fontsize_h * threshold / 1000;
145 if (result == 2) { 264 }
146 int len = m_Buffer.GetLength(); 265 if (charSpace > 0.001) {
147 if (len && m_bUseLF && m_Buffer.GetBuffer()[len - 1] == L'-') { 266 spacing += matrix.TransformDistance(charSpace);
148 m_Buffer.Delete(len - 1, 1); 267 } else if (charSpace < -0.001) {
149 if (m_pObjArray) { 268 spacing -= matrix.TransformDistance(FXSYS_fabs(charSpace));
150 m_pObjArray->RemoveAt((len - 1) * 2, 2); 269 }
151 } 270 if (threshold && (spacing && spacing >= threshold)) {
152 } else { 271 m_Buffer.AppendChar(L' ');
153 if (bFirstLine) { 272 if (m_pObjArray) {
154 return TRUE; 273 m_pObjArray->Add(NULL);
155 } 274 m_pObjArray->Add(NULL);
156 if (m_bUseLF) { 275 }
157 m_Buffer.AppendChar(L'\r'); 276 }
158 m_Buffer.AppendChar(L'\n'); 277 if (item.m_CharCode == (FX_DWORD)-1) {
159 if (m_pObjArray) { 278 continue;
160 for (int i = 0; i < 4; i ++) { 279 }
161 m_pObjArray->Add(NULL); 280 spacing = 0;
162 } 281 }
163 } 282 CFX_WideString unicode_str = pFont->UnicodeFromCharCode(item.m_CharCode);
164 } else { 283 if (unicode_str.IsEmpty()) {
165 m_Buffer.AppendChar(' '); 284 m_Buffer.AppendChar((FX_WCHAR)item.m_CharCode);
166 if (m_pObjArray) { 285 if (m_pObjArray) {
167 m_pObjArray->Add(NULL); 286 m_pObjArray->Add((void*)pObj);
168 m_pObjArray->Add(NULL); 287 m_pObjArray->Add((void*)(FX_INTPTR) item_index);
169 } 288 }
170 } 289 } else {
171 } 290 m_Buffer << unicode_str;
172 } else if (result == 1) { 291 if (m_pObjArray) {
173 m_Buffer.AppendChar(L' '); 292 for (int i = 0; i < unicode_str.GetLength(); i++) {
174 if (m_pObjArray) { 293 m_pObjArray->Add((void*)pObj);
175 m_pObjArray->Add(NULL); 294 m_pObjArray->Add((void*)(FX_INTPTR) item_index);
176 m_pObjArray->Add(NULL); 295 }
177 } 296 }
178 } else if (result == -1) { 297 }
179 m_pLastObj = pObj; 298 }
180 return FALSE; 299 return FALSE;
181 } else if (result == 3) { 300 }
182 item_index = 1; 301 void _PDF_GetTextStream_Unicode(CFX_WideTextBuf& buffer,
183 } 302 CPDF_PageObjects* pPage,
184 } 303 FX_BOOL bUseLF,
185 m_pLastObj = pObj; 304 CFX_PtrArray* pObjArray) {
186 int nItems = pObj->CountItems(); 305 CPDF_TextStream textstream(buffer, bUseLF, pObjArray);
187 FX_FLOAT Ignorekerning = 0; 306 FX_POSITION pos = pPage->GetFirstObjectPosition();
188 for(int i = 1; i < nItems - 1; i += 2) { 307 while (pos) {
189 CPDF_TextObjectItem item; 308 CPDF_PageObject* pObject = pPage->GetNextObject(pos);
190 pObj->GetItemInfo(i, &item); 309 if (pObject == NULL) {
191 if (item.m_CharCode == (FX_DWORD) - 1) { 310 continue;
192 if(i == 1) { 311 }
193 Ignorekerning = item.m_OriginX; 312 if (pObject->m_Type != PDFPAGE_TEXT) {
194 } else if(Ignorekerning > item.m_OriginX) { 313 continue;
195 Ignorekerning = item.m_OriginX; 314 }
196 } 315 textstream.ProcessObject((CPDF_TextObject*)pObject, FALSE);
197 } else { 316 }
198 Ignorekerning = 0; 317 }
199 break; 318 CFX_WideString PDF_GetFirstTextLine_Unicode(CPDF_Document* pDoc,
200 } 319 CPDF_Dictionary* pPage) {
201 } 320 CFX_WideTextBuf buffer;
202 FX_FLOAT spacing = 0; 321 buffer.EstimateSize(0, 1024);
203 for (; item_index < nItems; item_index ++) { 322 CPDF_Page page;
204 CPDF_TextObjectItem item; 323 page.Load(pDoc, pPage);
205 pObj->GetItemInfo(item_index, &item); 324 CPDF_ParseOptions options;
206 if (item.m_CharCode == (FX_DWORD) - 1) { 325 options.m_bTextOnly = TRUE;
207 CFX_WideString wstr = m_Buffer.GetWideString(); 326 options.m_bSeparateForm = FALSE;
208 if (wstr.IsEmpty() || wstr.GetAt(wstr.GetLength() - 1) == L' ') { 327 page.ParseContent(&options);
209 continue; 328 CPDF_TextStream textstream(buffer, FALSE, NULL);
210 } 329 FX_POSITION pos = page.GetFirstObjectPosition();
211 FX_FLOAT fontsize_h = pObj->m_TextState.GetFontSizeH(); 330 while (pos) {
212 spacing = -fontsize_h * (item.m_OriginX - Ignorekerning) / 1000; 331 CPDF_PageObject* pObject = page.GetNextObject(pos);
213 continue; 332 if (pObject->m_Type != PDFPAGE_TEXT) {
214 } 333 continue;
215 FX_FLOAT charSpace = pObj->m_TextState.GetObject()->m_CharSpace; 334 }
216 if(nItems > 3 && !spacing) { 335 if (textstream.ProcessObject((CPDF_TextObject*)pObject, TRUE)) {
217 charSpace = 0; 336 break;
218 } 337 }
219 if((spacing || charSpace) && item_index > 0) { 338 }
220 int last_width = 0; 339 return buffer.GetWideString();
221 FX_FLOAT fontsize_h = pObj->m_TextState.GetFontSizeH(); 340 }
222 FX_DWORD space_charcode = pFont->CharCodeFromUnicode(' ');
223 FX_FLOAT threshold = 0;
224 if (space_charcode != -1) {
225 threshold = fontsize_h * pFont->GetCharWidthF(space_charcode) / 1000 ;
226 }
227 if(threshold > fontsize_h / 3) {
228 threshold = 0;
229 } else {
230 threshold /= 2;
231 }
232 if (threshold == 0) {
233 threshold = fontsize_h;
234 int this_width = FXSYS_abs(GetCharWidth(item.m_CharCode, pFont)) ;
235 threshold = this_width > last_width ? (FX_FLOAT)this_width : (FX _FLOAT)last_width;
236 int nDivide = 6;
237 if (threshold < 300) {
238 nDivide = 2;
239 } else if (threshold < 500) {
240 nDivide = 4;
241 } else if (threshold < 700) {
242 nDivide = 5;
243 }
244 threshold = threshold / nDivide;
245 threshold = fontsize_h * threshold / 1000;
246 }
247 if(charSpace > 0.001) {
248 spacing += matrix.TransformDistance(charSpace);
249 } else if(charSpace < -0.001) {
250 spacing -= matrix.TransformDistance(FXSYS_fabs(charSpace));
251 }
252 if (threshold && (spacing && spacing >= threshold) ) {
253 m_Buffer.AppendChar(L' ');
254 if (m_pObjArray) {
255 m_pObjArray->Add(NULL);
256 m_pObjArray->Add(NULL);
257 }
258 }
259 if (item.m_CharCode == (FX_DWORD) - 1) {
260 continue;
261 }
262 spacing = 0;
263 }
264 CFX_WideString unicode_str = pFont->UnicodeFromCharCode(item.m_CharCode) ;
265 if (unicode_str.IsEmpty()) {
266 m_Buffer.AppendChar((FX_WCHAR)item.m_CharCode);
267 if (m_pObjArray) {
268 m_pObjArray->Add((void*)pObj);
269 m_pObjArray->Add((void*)(FX_INTPTR)item_index);
270 }
271 } else {
272 m_Buffer << unicode_str;
273 if (m_pObjArray) {
274 for (int i = 0; i < unicode_str.GetLength(); i ++) {
275 m_pObjArray->Add((void*)pObj);
276 m_pObjArray->Add((void*)(FX_INTPTR)item_index);
277 }
278 }
279 }
280 }
281 return FALSE;
282 }
283 void _PDF_GetTextStream_Unicode(CFX_WideTextBuf& buffer, CPDF_PageObjects* pPage , FX_BOOL bUseLF,
284 CFX_PtrArray* pObjArray)
285 {
286 CPDF_TextStream textstream(buffer, bUseLF, pObjArray);
287 FX_POSITION pos = pPage->GetFirstObjectPosition();
288 while (pos) {
289 CPDF_PageObject* pObject = pPage->GetNextObject(pos);
290 if (pObject == NULL) {
291 continue;
292 }
293 if (pObject->m_Type != PDFPAGE_TEXT) {
294 continue;
295 }
296 textstream.ProcessObject((CPDF_TextObject*)pObject, FALSE);
297 }
298 }
299 CFX_WideString PDF_GetFirstTextLine_Unicode(CPDF_Document* pDoc, CPDF_Dictionary * pPage)
300 {
301 CFX_WideTextBuf buffer;
302 buffer.EstimateSize(0, 1024);
303 CPDF_Page page;
304 page.Load(pDoc, pPage);
305 CPDF_ParseOptions options;
306 options.m_bTextOnly = TRUE;
307 options.m_bSeparateForm = FALSE;
308 page.ParseContent(&options);
309 CPDF_TextStream textstream(buffer, FALSE, NULL);
310 FX_POSITION pos = page.GetFirstObjectPosition();
311 while (pos) {
312 CPDF_PageObject* pObject = page.GetNextObject(pos);
313 if (pObject->m_Type != PDFPAGE_TEXT) {
314 continue;
315 }
316 if (textstream.ProcessObject((CPDF_TextObject*)pObject, TRUE)) {
317 break;
318 }
319 }
320 return buffer.GetWideString();
321 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698