| OLD | NEW |
| 1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
| 6 | 6 |
| 7 #include "../../include/fpdfapi/fpdf_pageobj.h" | 7 #include "../../include/fpdfapi/fpdf_pageobj.h" |
| 8 #include "../../include/fpdftext/fpdf_text.h" | 8 #include "../../include/fpdftext/fpdf_text.h" |
| 9 #include "../../include/fpdfapi/fpdf_page.h" | 9 #include "../../include/fpdfapi/fpdf_page.h" |
| 10 class CPDF_TextStream : public CFX_Object | 10 class CPDF_TextStream : public CFX_Object { |
| 11 { | 11 public: |
| 12 public: | 12 CPDF_TextStream(CFX_WideTextBuf& buffer, |
| 13 CPDF_TextStream(CFX_WideTextBuf& buffer, FX_BOOL bUseLF, CFX_PtrArray* pObjA
rray); | 13 FX_BOOL bUseLF, |
| 14 ~CPDF_TextStream() {} | 14 CFX_PtrArray* pObjArray); |
| 15 FX_BOOL ProcessObject(const CPDF_TextObject* pObj, FX_BOOL bFirstLine); | 15 ~CPDF_TextStream() {} |
| 16 CFX_WideTextBuf&» m_Buffer; | 16 FX_BOOL ProcessObject(const CPDF_TextObject* pObj, FX_BOOL bFirstLine); |
| 17 FX_BOOL» » » » m_bUseLF; | 17 CFX_WideTextBuf& m_Buffer; |
| 18 CFX_PtrArray*» » m_pObjArray; | 18 FX_BOOL m_bUseLF; |
| 19 const CPDF_TextObject*» m_pLastObj; | 19 CFX_PtrArray* m_pObjArray; |
| 20 const CPDF_TextObject* m_pLastObj; |
| 20 }; | 21 }; |
| 21 CPDF_TextStream::CPDF_TextStream(CFX_WideTextBuf& buffer, FX_BOOL bUseLF, CFX_Pt
rArray* pObjArray) : m_Buffer(buffer) | 22 CPDF_TextStream::CPDF_TextStream(CFX_WideTextBuf& buffer, |
| 22 { | 23 FX_BOOL bUseLF, |
| 23 m_pLastObj = NULL; | 24 CFX_PtrArray* pObjArray) |
| 24 m_bUseLF = bUseLF; | 25 : m_Buffer(buffer) { |
| 25 m_pObjArray = pObjArray; | 26 m_pLastObj = NULL; |
| 26 } | 27 m_bUseLF = bUseLF; |
| 27 FX_BOOL FPDFText_IsSameTextObject(const CPDF_TextObject* pTextObj1, const CPDF_T
extObject* pTextObj2) | 28 m_pObjArray = pObjArray; |
| 28 { | 29 } |
| 29 if (!pTextObj1 || !pTextObj2) { | 30 FX_BOOL FPDFText_IsSameTextObject(const CPDF_TextObject* pTextObj1, |
| 30 return FALSE; | 31 const CPDF_TextObject* pTextObj2) { |
| 31 } | 32 if (!pTextObj1 || !pTextObj2) { |
| 32 CFX_FloatRect rcPreObj(pTextObj2->m_Left, pTextObj2->m_Bottom, pTextObj2->m_
Right, pTextObj2->m_Top); | 33 return FALSE; |
| 33 CFX_FloatRect rcCurObj(pTextObj1->m_Left, pTextObj1->m_Bottom, pTextObj1->m_
Right, pTextObj1->m_Top); | 34 } |
| 34 if (rcPreObj.IsEmpty() && rcCurObj.IsEmpty()) { | 35 CFX_FloatRect rcPreObj(pTextObj2->m_Left, |
| 35 return TRUE; | 36 pTextObj2->m_Bottom, |
| 36 } | 37 pTextObj2->m_Right, |
| 37 if (!rcPreObj.IsEmpty() || !rcCurObj.IsEmpty()) { | 38 pTextObj2->m_Top); |
| 38 rcPreObj.Intersect(rcCurObj); | 39 CFX_FloatRect rcCurObj(pTextObj1->m_Left, |
| 39 if (rcPreObj.IsEmpty()) { | 40 pTextObj1->m_Bottom, |
| 40 return FALSE; | 41 pTextObj1->m_Right, |
| 41 } | 42 pTextObj1->m_Top); |
| 42 if (FXSYS_fabs(rcPreObj.Width() - rcCurObj.Width()) > rcCurObj.Width() /
2) { | 43 if (rcPreObj.IsEmpty() && rcCurObj.IsEmpty()) { |
| 43 return FALSE; | |
| 44 } | |
| 45 if (pTextObj2->GetFontSize() != pTextObj1->GetFontSize()) { | |
| 46 return FALSE; | |
| 47 } | |
| 48 } | |
| 49 int nPreCount = pTextObj2->CountItems(); | |
| 50 int nCurCount = pTextObj1->CountItems(); | |
| 51 if (nPreCount != nCurCount) { | |
| 52 return FALSE; | |
| 53 } | |
| 54 for (int i = 0; i < nPreCount; i++) { | |
| 55 CPDF_TextObjectItem itemPer, itemCur; | |
| 56 pTextObj2->GetItemInfo(i, &itemPer); | |
| 57 pTextObj1->GetItemInfo(i, &itemCur); | |
| 58 if (itemCur.m_CharCode != itemPer.m_CharCode) { | |
| 59 return FALSE; | |
| 60 } | |
| 61 } | |
| 62 return TRUE; | 44 return TRUE; |
| 63 } | 45 } |
| 64 int GetCharWidth(FX_DWORD charCode, CPDF_Font* pFont) | 46 if (!rcPreObj.IsEmpty() || !rcCurObj.IsEmpty()) { |
| 65 { | 47 rcPreObj.Intersect(rcCurObj); |
| 66 if(charCode == -1) { | 48 if (rcPreObj.IsEmpty()) { |
| 67 return 0; | 49 return FALSE; |
| 68 } | 50 } |
| 69 int w = pFont->GetCharWidthF(charCode); | 51 if (FXSYS_fabs(rcPreObj.Width() - rcCurObj.Width()) > |
| 70 if(w == 0) { | 52 rcCurObj.Width() / 2) { |
| 71 CFX_ByteString str; | 53 return FALSE; |
| 72 pFont->AppendChar(str, charCode); | 54 } |
| 73 w = pFont->GetStringWidth(str, 1); | 55 if (pTextObj2->GetFontSize() != pTextObj1->GetFontSize()) { |
| 74 if(w == 0) { | 56 return FALSE; |
| 75 FX_RECT BBox; | 57 } |
| 76 pFont->GetCharBBox(charCode, BBox); | 58 } |
| 77 w = BBox.right - BBox.left; | 59 int nPreCount = pTextObj2->CountItems(); |
| 78 } | 60 int nCurCount = pTextObj1->CountItems(); |
| 79 } | 61 if (nPreCount != nCurCount) { |
| 80 return w; | 62 return FALSE; |
| 81 } | 63 } |
| 82 int FPDFText_ProcessInterObj(const CPDF_TextObject* pPrevObj, const CPDF_TextObj
ect* pObj) | 64 for (int i = 0; i < nPreCount; i++) { |
| 83 { | 65 CPDF_TextObjectItem itemPer, itemCur; |
| 84 if(FPDFText_IsSameTextObject(pPrevObj, pObj)) { | 66 pTextObj2->GetItemInfo(i, &itemPer); |
| 85 return -1; | 67 pTextObj1->GetItemInfo(i, &itemCur); |
| 86 } | 68 if (itemCur.m_CharCode != itemPer.m_CharCode) { |
| 69 return FALSE; |
| 70 } |
| 71 } |
| 72 return TRUE; |
| 73 } |
| 74 int GetCharWidth(FX_DWORD charCode, CPDF_Font* pFont) { |
| 75 if (charCode == -1) { |
| 76 return 0; |
| 77 } |
| 78 int w = pFont->GetCharWidthF(charCode); |
| 79 if (w == 0) { |
| 80 CFX_ByteString str; |
| 81 pFont->AppendChar(str, charCode); |
| 82 w = pFont->GetStringWidth(str, 1); |
| 83 if (w == 0) { |
| 84 FX_RECT BBox; |
| 85 pFont->GetCharBBox(charCode, BBox); |
| 86 w = BBox.right - BBox.left; |
| 87 } |
| 88 } |
| 89 return w; |
| 90 } |
| 91 int FPDFText_ProcessInterObj(const CPDF_TextObject* pPrevObj, |
| 92 const CPDF_TextObject* pObj) { |
| 93 if (FPDFText_IsSameTextObject(pPrevObj, pObj)) { |
| 94 return -1; |
| 95 } |
| 96 CPDF_TextObjectItem item; |
| 97 int nItem = pPrevObj->CountItems(); |
| 98 pPrevObj->GetItemInfo(nItem - 1, &item); |
| 99 FX_WCHAR preChar = 0, curChar = 0; |
| 100 CFX_WideString wstr = |
| 101 pPrevObj->GetFont()->UnicodeFromCharCode(item.m_CharCode); |
| 102 if (wstr.GetLength()) { |
| 103 preChar = wstr.GetAt(0); |
| 104 } |
| 105 FX_FLOAT last_pos = item.m_OriginX; |
| 106 int nLastWidth = GetCharWidth(item.m_CharCode, pPrevObj->GetFont()); |
| 107 FX_FLOAT last_width = nLastWidth * pPrevObj->GetFontSize() / 1000; |
| 108 last_width = FXSYS_fabs(last_width); |
| 109 pObj->GetItemInfo(0, &item); |
| 110 wstr = pObj->GetFont()->UnicodeFromCharCode(item.m_CharCode); |
| 111 if (wstr.GetLength()) { |
| 112 curChar = wstr.GetAt(0); |
| 113 } |
| 114 int nThisWidth = GetCharWidth(item.m_CharCode, pObj->GetFont()); |
| 115 FX_FLOAT this_width = nThisWidth * pObj->GetFontSize() / 1000; |
| 116 this_width = FXSYS_fabs(this_width); |
| 117 FX_FLOAT threshold = |
| 118 last_width > this_width ? last_width / 4 : this_width / 4; |
| 119 CFX_AffineMatrix prev_matrix, prev_reverse; |
| 120 pPrevObj->GetTextMatrix(&prev_matrix); |
| 121 prev_reverse.SetReverse(prev_matrix); |
| 122 FX_FLOAT x = pObj->GetPosX(), y = pObj->GetPosY(); |
| 123 prev_reverse.Transform(x, y); |
| 124 if (FXSYS_fabs(y) > threshold * 2) { |
| 125 return 2; |
| 126 } |
| 127 threshold = (FX_FLOAT)(nLastWidth > nThisWidth ? nLastWidth : nThisWidth); |
| 128 threshold = threshold > 400 |
| 129 ? (threshold < 700 ? threshold / 4 : threshold / 5) |
| 130 : (threshold / 2); |
| 131 threshold *= nLastWidth > nThisWidth ? FXSYS_fabs(pPrevObj->GetFontSize()) |
| 132 : FXSYS_fabs(pObj->GetFontSize()); |
| 133 threshold /= 1000; |
| 134 if (FXSYS_fabs(last_pos + last_width - x) > threshold && curChar != L' ' && |
| 135 preChar != L' ') |
| 136 if (curChar != L' ' && preChar != L' ') { |
| 137 if ((x - last_pos - last_width) > threshold || |
| 138 (last_pos - x - last_width) > threshold) { |
| 139 return 1; |
| 140 } |
| 141 if (x < 0 && (last_pos - x - last_width) > threshold) { |
| 142 return 1; |
| 143 } |
| 144 if ((x - last_pos - last_width) > this_width || |
| 145 (x - last_pos - this_width) > last_width) { |
| 146 return 1; |
| 147 } |
| 148 } |
| 149 if (last_pos + last_width > x + this_width && curChar == L' ') { |
| 150 return 3; |
| 151 } |
| 152 return 0; |
| 153 } |
| 154 FX_BOOL CPDF_TextStream::ProcessObject(const CPDF_TextObject* pObj, |
| 155 FX_BOOL bFirstLine) { |
| 156 CPDF_Font* pFont = pObj->GetFont(); |
| 157 CFX_AffineMatrix matrix; |
| 158 pObj->GetTextMatrix(&matrix); |
| 159 int item_index = 0; |
| 160 if (m_pLastObj) { |
| 161 int result = FPDFText_ProcessInterObj(m_pLastObj, pObj); |
| 162 if (result == 2) { |
| 163 int len = m_Buffer.GetLength(); |
| 164 if (len && m_bUseLF && m_Buffer.GetBuffer()[len - 1] == L'-') { |
| 165 m_Buffer.Delete(len - 1, 1); |
| 166 if (m_pObjArray) { |
| 167 m_pObjArray->RemoveAt((len - 1) * 2, 2); |
| 168 } |
| 169 } else { |
| 170 if (bFirstLine) { |
| 171 return TRUE; |
| 172 } |
| 173 if (m_bUseLF) { |
| 174 m_Buffer.AppendChar(L'\r'); |
| 175 m_Buffer.AppendChar(L'\n'); |
| 176 if (m_pObjArray) { |
| 177 for (int i = 0; i < 4; i++) { |
| 178 m_pObjArray->Add(NULL); |
| 179 } |
| 180 } |
| 181 } else { |
| 182 m_Buffer.AppendChar(' '); |
| 183 if (m_pObjArray) { |
| 184 m_pObjArray->Add(NULL); |
| 185 m_pObjArray->Add(NULL); |
| 186 } |
| 187 } |
| 188 } |
| 189 } else if (result == 1) { |
| 190 m_Buffer.AppendChar(L' '); |
| 191 if (m_pObjArray) { |
| 192 m_pObjArray->Add(NULL); |
| 193 m_pObjArray->Add(NULL); |
| 194 } |
| 195 } else if (result == -1) { |
| 196 m_pLastObj = pObj; |
| 197 return FALSE; |
| 198 } else if (result == 3) { |
| 199 item_index = 1; |
| 200 } |
| 201 } |
| 202 m_pLastObj = pObj; |
| 203 int nItems = pObj->CountItems(); |
| 204 FX_FLOAT Ignorekerning = 0; |
| 205 for (int i = 1; i < nItems - 1; i += 2) { |
| 87 CPDF_TextObjectItem item; | 206 CPDF_TextObjectItem item; |
| 88 int nItem = pPrevObj->CountItems(); | 207 pObj->GetItemInfo(i, &item); |
| 89 pPrevObj->GetItemInfo(nItem - 1, &item); | 208 if (item.m_CharCode == (FX_DWORD)-1) { |
| 90 FX_WCHAR preChar = 0, curChar = 0; | 209 if (i == 1) { |
| 91 CFX_WideString wstr = pPrevObj->GetFont()->UnicodeFromCharCode(item.m_CharCo
de); | 210 Ignorekerning = item.m_OriginX; |
| 92 if(wstr.GetLength()) { | 211 } else if (Ignorekerning > item.m_OriginX) { |
| 93 preChar = wstr.GetAt(0); | 212 Ignorekerning = item.m_OriginX; |
| 94 } | 213 } |
| 95 FX_FLOAT last_pos = item.m_OriginX; | 214 } else { |
| 96 int nLastWidth = GetCharWidth(item.m_CharCode, pPrevObj->GetFont()); | 215 Ignorekerning = 0; |
| 97 FX_FLOAT last_width = nLastWidth * pPrevObj->GetFontSize() / 1000; | 216 break; |
| 98 last_width = FXSYS_fabs(last_width); | 217 } |
| 99 pObj->GetItemInfo(0, &item); | 218 } |
| 100 wstr = pObj->GetFont()->UnicodeFromCharCode(item.m_CharCode); | 219 FX_FLOAT spacing = 0; |
| 101 if(wstr.GetLength()) { | 220 for (; item_index < nItems; item_index++) { |
| 102 curChar = wstr.GetAt(0); | 221 CPDF_TextObjectItem item; |
| 103 } | 222 pObj->GetItemInfo(item_index, &item); |
| 104 int nThisWidth = GetCharWidth(item.m_CharCode, pObj->GetFont()); | 223 if (item.m_CharCode == (FX_DWORD)-1) { |
| 105 FX_FLOAT this_width = nThisWidth * pObj->GetFontSize() / 1000; | 224 CFX_WideString wstr = m_Buffer.GetWideString(); |
| 106 this_width = FXSYS_fabs(this_width); | 225 if (wstr.IsEmpty() || wstr.GetAt(wstr.GetLength() - 1) == L' ') { |
| 107 FX_FLOAT threshold = last_width > this_width ? last_width / 4 : this_width /
4; | 226 continue; |
| 108 CFX_AffineMatrix prev_matrix, prev_reverse; | 227 } |
| 109 pPrevObj->GetTextMatrix(&prev_matrix); | 228 FX_FLOAT fontsize_h = pObj->m_TextState.GetFontSizeH(); |
| 110 prev_reverse.SetReverse(prev_matrix); | 229 spacing = -fontsize_h * (item.m_OriginX - Ignorekerning) / 1000; |
| 111 FX_FLOAT x = pObj->GetPosX(), y = pObj->GetPosY(); | 230 continue; |
| 112 prev_reverse.Transform(x, y); | 231 } |
| 113 if (FXSYS_fabs(y) > threshold * 2) { | 232 FX_FLOAT charSpace = pObj->m_TextState.GetObject()->m_CharSpace; |
| 114 return 2; | 233 if (nItems > 3 && !spacing) { |
| 115 } | 234 charSpace = 0; |
| 116 threshold = (FX_FLOAT)(nLastWidth > nThisWidth ? nLastWidth : nThisWidth); | 235 } |
| 117 threshold = threshold > 400 ? (threshold < 700 ? threshold / 4 : threshold
/ 5) : (threshold / 2); | 236 if ((spacing || charSpace) && item_index > 0) { |
| 118 threshold *= nLastWidth > nThisWidth ? FXSYS_fabs(pPrevObj->GetFontSize()) :
FXSYS_fabs(pObj->GetFontSize()); | 237 int last_width = 0; |
| 119 threshold /= 1000; | 238 FX_FLOAT fontsize_h = pObj->m_TextState.GetFontSizeH(); |
| 120 if (FXSYS_fabs(last_pos + last_width - x) > threshold && curChar != L' ' &&
preChar != L' ') | 239 FX_DWORD space_charcode = pFont->CharCodeFromUnicode(' '); |
| 121 if(curChar != L' ' && preChar != L' ') { | 240 FX_FLOAT threshold = 0; |
| 122 if((x - last_pos - last_width) > threshold || (last_pos - x - last_w
idth) > threshold) { | 241 if (space_charcode != -1) { |
| 123 return 1; | 242 threshold = fontsize_h * pFont->GetCharWidthF(space_charcode) / 1000; |
| 124 } | 243 } |
| 125 if(x < 0 && (last_pos - x - last_width) > threshold) { | 244 if (threshold > fontsize_h / 3) { |
| 126 return 1; | 245 threshold = 0; |
| 127 } | 246 } else { |
| 128 if((x - last_pos - last_width) > this_width || (x - last_pos - this_
width) > last_width ) { | 247 threshold /= 2; |
| 129 return 1; | 248 } |
| 130 } | 249 if (threshold == 0) { |
| 131 } | 250 threshold = fontsize_h; |
| 132 if(last_pos + last_width > x + this_width && curChar == L' ') { | 251 int this_width = FXSYS_abs(GetCharWidth(item.m_CharCode, pFont)); |
| 133 return 3; | 252 threshold = this_width > last_width ? (FX_FLOAT)this_width |
| 134 } | 253 : (FX_FLOAT)last_width; |
| 135 return 0; | 254 int nDivide = 6; |
| 136 } | 255 if (threshold < 300) { |
| 137 FX_BOOL CPDF_TextStream::ProcessObject(const CPDF_TextObject* pObj, FX_BOOL bFir
stLine) | 256 nDivide = 2; |
| 138 { | 257 } else if (threshold < 500) { |
| 139 CPDF_Font* pFont = pObj->GetFont(); | 258 nDivide = 4; |
| 140 CFX_AffineMatrix matrix; | 259 } else if (threshold < 700) { |
| 141 pObj->GetTextMatrix(&matrix); | 260 nDivide = 5; |
| 142 int item_index = 0; | 261 } |
| 143 if (m_pLastObj) { | 262 threshold = threshold / nDivide; |
| 144 int result = FPDFText_ProcessInterObj(m_pLastObj, pObj); | 263 threshold = fontsize_h * threshold / 1000; |
| 145 if (result == 2) { | 264 } |
| 146 int len = m_Buffer.GetLength(); | 265 if (charSpace > 0.001) { |
| 147 if (len && m_bUseLF && m_Buffer.GetBuffer()[len - 1] == L'-') { | 266 spacing += matrix.TransformDistance(charSpace); |
| 148 m_Buffer.Delete(len - 1, 1); | 267 } else if (charSpace < -0.001) { |
| 149 if (m_pObjArray) { | 268 spacing -= matrix.TransformDistance(FXSYS_fabs(charSpace)); |
| 150 m_pObjArray->RemoveAt((len - 1) * 2, 2); | 269 } |
| 151 } | 270 if (threshold && (spacing && spacing >= threshold)) { |
| 152 } else { | 271 m_Buffer.AppendChar(L' '); |
| 153 if (bFirstLine) { | 272 if (m_pObjArray) { |
| 154 return TRUE; | 273 m_pObjArray->Add(NULL); |
| 155 } | 274 m_pObjArray->Add(NULL); |
| 156 if (m_bUseLF) { | 275 } |
| 157 m_Buffer.AppendChar(L'\r'); | 276 } |
| 158 m_Buffer.AppendChar(L'\n'); | 277 if (item.m_CharCode == (FX_DWORD)-1) { |
| 159 if (m_pObjArray) { | 278 continue; |
| 160 for (int i = 0; i < 4; i ++) { | 279 } |
| 161 m_pObjArray->Add(NULL); | 280 spacing = 0; |
| 162 } | 281 } |
| 163 } | 282 CFX_WideString unicode_str = pFont->UnicodeFromCharCode(item.m_CharCode); |
| 164 } else { | 283 if (unicode_str.IsEmpty()) { |
| 165 m_Buffer.AppendChar(' '); | 284 m_Buffer.AppendChar((FX_WCHAR)item.m_CharCode); |
| 166 if (m_pObjArray) { | 285 if (m_pObjArray) { |
| 167 m_pObjArray->Add(NULL); | 286 m_pObjArray->Add((void*)pObj); |
| 168 m_pObjArray->Add(NULL); | 287 m_pObjArray->Add((void*)(FX_INTPTR) item_index); |
| 169 } | 288 } |
| 170 } | 289 } else { |
| 171 } | 290 m_Buffer << unicode_str; |
| 172 } else if (result == 1) { | 291 if (m_pObjArray) { |
| 173 m_Buffer.AppendChar(L' '); | 292 for (int i = 0; i < unicode_str.GetLength(); i++) { |
| 174 if (m_pObjArray) { | 293 m_pObjArray->Add((void*)pObj); |
| 175 m_pObjArray->Add(NULL); | 294 m_pObjArray->Add((void*)(FX_INTPTR) item_index); |
| 176 m_pObjArray->Add(NULL); | 295 } |
| 177 } | 296 } |
| 178 } else if (result == -1) { | 297 } |
| 179 m_pLastObj = pObj; | 298 } |
| 180 return FALSE; | 299 return FALSE; |
| 181 } else if (result == 3) { | 300 } |
| 182 item_index = 1; | 301 void _PDF_GetTextStream_Unicode(CFX_WideTextBuf& buffer, |
| 183 } | 302 CPDF_PageObjects* pPage, |
| 184 } | 303 FX_BOOL bUseLF, |
| 185 m_pLastObj = pObj; | 304 CFX_PtrArray* pObjArray) { |
| 186 int nItems = pObj->CountItems(); | 305 CPDF_TextStream textstream(buffer, bUseLF, pObjArray); |
| 187 FX_FLOAT Ignorekerning = 0; | 306 FX_POSITION pos = pPage->GetFirstObjectPosition(); |
| 188 for(int i = 1; i < nItems - 1; i += 2) { | 307 while (pos) { |
| 189 CPDF_TextObjectItem item; | 308 CPDF_PageObject* pObject = pPage->GetNextObject(pos); |
| 190 pObj->GetItemInfo(i, &item); | 309 if (pObject == NULL) { |
| 191 if (item.m_CharCode == (FX_DWORD) - 1) { | 310 continue; |
| 192 if(i == 1) { | 311 } |
| 193 Ignorekerning = item.m_OriginX; | 312 if (pObject->m_Type != PDFPAGE_TEXT) { |
| 194 } else if(Ignorekerning > item.m_OriginX) { | 313 continue; |
| 195 Ignorekerning = item.m_OriginX; | 314 } |
| 196 } | 315 textstream.ProcessObject((CPDF_TextObject*)pObject, FALSE); |
| 197 } else { | 316 } |
| 198 Ignorekerning = 0; | 317 } |
| 199 break; | 318 CFX_WideString PDF_GetFirstTextLine_Unicode(CPDF_Document* pDoc, |
| 200 } | 319 CPDF_Dictionary* pPage) { |
| 201 } | 320 CFX_WideTextBuf buffer; |
| 202 FX_FLOAT spacing = 0; | 321 buffer.EstimateSize(0, 1024); |
| 203 for (; item_index < nItems; item_index ++) { | 322 CPDF_Page page; |
| 204 CPDF_TextObjectItem item; | 323 page.Load(pDoc, pPage); |
| 205 pObj->GetItemInfo(item_index, &item); | 324 CPDF_ParseOptions options; |
| 206 if (item.m_CharCode == (FX_DWORD) - 1) { | 325 options.m_bTextOnly = TRUE; |
| 207 CFX_WideString wstr = m_Buffer.GetWideString(); | 326 options.m_bSeparateForm = FALSE; |
| 208 if (wstr.IsEmpty() || wstr.GetAt(wstr.GetLength() - 1) == L' ') { | 327 page.ParseContent(&options); |
| 209 continue; | 328 CPDF_TextStream textstream(buffer, FALSE, NULL); |
| 210 } | 329 FX_POSITION pos = page.GetFirstObjectPosition(); |
| 211 FX_FLOAT fontsize_h = pObj->m_TextState.GetFontSizeH(); | 330 while (pos) { |
| 212 spacing = -fontsize_h * (item.m_OriginX - Ignorekerning) / 1000; | 331 CPDF_PageObject* pObject = page.GetNextObject(pos); |
| 213 continue; | 332 if (pObject->m_Type != PDFPAGE_TEXT) { |
| 214 } | 333 continue; |
| 215 FX_FLOAT charSpace = pObj->m_TextState.GetObject()->m_CharSpace; | 334 } |
| 216 if(nItems > 3 && !spacing) { | 335 if (textstream.ProcessObject((CPDF_TextObject*)pObject, TRUE)) { |
| 217 charSpace = 0; | 336 break; |
| 218 } | 337 } |
| 219 if((spacing || charSpace) && item_index > 0) { | 338 } |
| 220 int last_width = 0; | 339 return buffer.GetWideString(); |
| 221 FX_FLOAT fontsize_h = pObj->m_TextState.GetFontSizeH(); | 340 } |
| 222 FX_DWORD space_charcode = pFont->CharCodeFromUnicode(' '); | |
| 223 FX_FLOAT threshold = 0; | |
| 224 if (space_charcode != -1) { | |
| 225 threshold = fontsize_h * pFont->GetCharWidthF(space_charcode) /
1000 ; | |
| 226 } | |
| 227 if(threshold > fontsize_h / 3) { | |
| 228 threshold = 0; | |
| 229 } else { | |
| 230 threshold /= 2; | |
| 231 } | |
| 232 if (threshold == 0) { | |
| 233 threshold = fontsize_h; | |
| 234 int this_width = FXSYS_abs(GetCharWidth(item.m_CharCode, pFont))
; | |
| 235 threshold = this_width > last_width ? (FX_FLOAT)this_width : (FX
_FLOAT)last_width; | |
| 236 int nDivide = 6; | |
| 237 if (threshold < 300) { | |
| 238 nDivide = 2; | |
| 239 } else if (threshold < 500) { | |
| 240 nDivide = 4; | |
| 241 } else if (threshold < 700) { | |
| 242 nDivide = 5; | |
| 243 } | |
| 244 threshold = threshold / nDivide; | |
| 245 threshold = fontsize_h * threshold / 1000; | |
| 246 } | |
| 247 if(charSpace > 0.001) { | |
| 248 spacing += matrix.TransformDistance(charSpace); | |
| 249 } else if(charSpace < -0.001) { | |
| 250 spacing -= matrix.TransformDistance(FXSYS_fabs(charSpace)); | |
| 251 } | |
| 252 if (threshold && (spacing && spacing >= threshold) ) { | |
| 253 m_Buffer.AppendChar(L' '); | |
| 254 if (m_pObjArray) { | |
| 255 m_pObjArray->Add(NULL); | |
| 256 m_pObjArray->Add(NULL); | |
| 257 } | |
| 258 } | |
| 259 if (item.m_CharCode == (FX_DWORD) - 1) { | |
| 260 continue; | |
| 261 } | |
| 262 spacing = 0; | |
| 263 } | |
| 264 CFX_WideString unicode_str = pFont->UnicodeFromCharCode(item.m_CharCode)
; | |
| 265 if (unicode_str.IsEmpty()) { | |
| 266 m_Buffer.AppendChar((FX_WCHAR)item.m_CharCode); | |
| 267 if (m_pObjArray) { | |
| 268 m_pObjArray->Add((void*)pObj); | |
| 269 m_pObjArray->Add((void*)(FX_INTPTR)item_index); | |
| 270 } | |
| 271 } else { | |
| 272 m_Buffer << unicode_str; | |
| 273 if (m_pObjArray) { | |
| 274 for (int i = 0; i < unicode_str.GetLength(); i ++) { | |
| 275 m_pObjArray->Add((void*)pObj); | |
| 276 m_pObjArray->Add((void*)(FX_INTPTR)item_index); | |
| 277 } | |
| 278 } | |
| 279 } | |
| 280 } | |
| 281 return FALSE; | |
| 282 } | |
| 283 void _PDF_GetTextStream_Unicode(CFX_WideTextBuf& buffer, CPDF_PageObjects* pPage
, FX_BOOL bUseLF, | |
| 284 CFX_PtrArray* pObjArray) | |
| 285 { | |
| 286 CPDF_TextStream textstream(buffer, bUseLF, pObjArray); | |
| 287 FX_POSITION pos = pPage->GetFirstObjectPosition(); | |
| 288 while (pos) { | |
| 289 CPDF_PageObject* pObject = pPage->GetNextObject(pos); | |
| 290 if (pObject == NULL) { | |
| 291 continue; | |
| 292 } | |
| 293 if (pObject->m_Type != PDFPAGE_TEXT) { | |
| 294 continue; | |
| 295 } | |
| 296 textstream.ProcessObject((CPDF_TextObject*)pObject, FALSE); | |
| 297 } | |
| 298 } | |
| 299 CFX_WideString PDF_GetFirstTextLine_Unicode(CPDF_Document* pDoc, CPDF_Dictionary
* pPage) | |
| 300 { | |
| 301 CFX_WideTextBuf buffer; | |
| 302 buffer.EstimateSize(0, 1024); | |
| 303 CPDF_Page page; | |
| 304 page.Load(pDoc, pPage); | |
| 305 CPDF_ParseOptions options; | |
| 306 options.m_bTextOnly = TRUE; | |
| 307 options.m_bSeparateForm = FALSE; | |
| 308 page.ParseContent(&options); | |
| 309 CPDF_TextStream textstream(buffer, FALSE, NULL); | |
| 310 FX_POSITION pos = page.GetFirstObjectPosition(); | |
| 311 while (pos) { | |
| 312 CPDF_PageObject* pObject = page.GetNextObject(pos); | |
| 313 if (pObject->m_Type != PDFPAGE_TEXT) { | |
| 314 continue; | |
| 315 } | |
| 316 if (textstream.ProcessObject((CPDF_TextObject*)pObject, TRUE)) { | |
| 317 break; | |
| 318 } | |
| 319 } | |
| 320 return buffer.GetWideString(); | |
| 321 } | |
| OLD | NEW |