OLD | NEW |
(Empty) | |
| 1 // Copyright 2014 PDFium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
| 6 |
| 7 #include <map> |
| 8 |
| 9 #include "core/fpdfapi/fpdf_parser/include/cpdf_array.h" |
| 10 #include "core/fpdfapi/fpdf_parser/include/cpdf_dictionary.h" |
| 11 #include "core/fpdfapi/fpdf_parser/include/cpdf_document.h" |
| 12 #include "core/fpdfapi/fpdf_parser/include/cpdf_reference.h" |
| 13 #include "core/fpdfapi/fpdf_parser/include/cpdf_stream.h" |
| 14 #include "core/fpdfdoc/cpdf_numbertree.h" |
| 15 #include "core/fpdfdoc/include/fpdf_tagged.h" |
| 16 #include "core/fpdfdoc/tagged_int.h" |
| 17 |
| 18 namespace { |
| 19 |
| 20 const int nMaxRecursion = 32; |
| 21 |
| 22 bool IsTagged(const CPDF_Document* pDoc) { |
| 23 CPDF_Dictionary* pCatalog = pDoc->GetRoot(); |
| 24 CPDF_Dictionary* pMarkInfo = pCatalog->GetDictBy("MarkInfo"); |
| 25 return pMarkInfo && pMarkInfo->GetIntegerBy("Marked"); |
| 26 } |
| 27 |
| 28 } // namespace |
| 29 |
| 30 // static |
| 31 IPDF_StructTree* IPDF_StructTree::LoadPage(const CPDF_Document* pDoc, |
| 32 const CPDF_Dictionary* pPageDict) { |
| 33 if (!IsTagged(pDoc)) |
| 34 return nullptr; |
| 35 |
| 36 CPDF_StructTreeImpl* pTree = new CPDF_StructTreeImpl(pDoc); |
| 37 pTree->LoadPageTree(pPageDict); |
| 38 return pTree; |
| 39 } |
| 40 |
| 41 // static. |
| 42 IPDF_StructTree* IPDF_StructTree::LoadDoc(const CPDF_Document* pDoc) { |
| 43 if (!IsTagged(pDoc)) |
| 44 return nullptr; |
| 45 |
| 46 CPDF_StructTreeImpl* pTree = new CPDF_StructTreeImpl(pDoc); |
| 47 pTree->LoadDocTree(); |
| 48 return pTree; |
| 49 } |
| 50 |
| 51 CPDF_StructTreeImpl::CPDF_StructTreeImpl(const CPDF_Document* pDoc) |
| 52 : m_pTreeRoot(pDoc->GetRoot()->GetDictBy("StructTreeRoot")), |
| 53 m_pRoleMap(m_pTreeRoot ? m_pTreeRoot->GetDictBy("RoleMap") : nullptr), |
| 54 m_pPage(nullptr) {} |
| 55 |
| 56 CPDF_StructTreeImpl::~CPDF_StructTreeImpl() {} |
| 57 |
| 58 int CPDF_StructTreeImpl::CountTopElements() const { |
| 59 return pdfium::CollectionSize<int>(m_Kids); |
| 60 } |
| 61 |
| 62 IPDF_StructElement* CPDF_StructTreeImpl::GetTopElement(int i) const { |
| 63 return m_Kids[i].Get(); |
| 64 } |
| 65 |
| 66 void CPDF_StructTreeImpl::LoadDocTree() { |
| 67 m_pPage = nullptr; |
| 68 if (!m_pTreeRoot) |
| 69 return; |
| 70 |
| 71 CPDF_Object* pKids = m_pTreeRoot->GetDirectObjectBy("K"); |
| 72 if (!pKids) |
| 73 return; |
| 74 |
| 75 if (CPDF_Dictionary* pDict = pKids->AsDictionary()) { |
| 76 m_Kids.push_back(CFX_RetainPtr<CPDF_StructElementImpl>( |
| 77 new CPDF_StructElementImpl(this, nullptr, pDict))); |
| 78 return; |
| 79 } |
| 80 |
| 81 CPDF_Array* pArray = pKids->AsArray(); |
| 82 if (!pArray) |
| 83 return; |
| 84 |
| 85 for (size_t i = 0; i < pArray->GetCount(); i++) { |
| 86 m_Kids.push_back(CFX_RetainPtr<CPDF_StructElementImpl>( |
| 87 new CPDF_StructElementImpl(this, nullptr, pArray->GetDictAt(i)))); |
| 88 } |
| 89 } |
| 90 |
| 91 void CPDF_StructTreeImpl::LoadPageTree(const CPDF_Dictionary* pPageDict) { |
| 92 m_pPage = pPageDict; |
| 93 if (!m_pTreeRoot) |
| 94 return; |
| 95 |
| 96 CPDF_Object* pKids = m_pTreeRoot->GetDirectObjectBy("K"); |
| 97 if (!pKids) |
| 98 return; |
| 99 |
| 100 uint32_t dwKids = 0; |
| 101 if (pKids->IsDictionary()) |
| 102 dwKids = 1; |
| 103 else if (CPDF_Array* pArray = pKids->AsArray()) |
| 104 dwKids = pArray->GetCount(); |
| 105 else |
| 106 return; |
| 107 |
| 108 m_Kids.clear(); |
| 109 m_Kids.resize(dwKids); |
| 110 CPDF_Dictionary* pParentTree = m_pTreeRoot->GetDictBy("ParentTree"); |
| 111 if (!pParentTree) |
| 112 return; |
| 113 |
| 114 CPDF_NumberTree parent_tree(pParentTree); |
| 115 int parents_id = pPageDict->GetIntegerBy("StructParents", -1); |
| 116 if (parents_id < 0) |
| 117 return; |
| 118 |
| 119 CPDF_Array* pParentArray = ToArray(parent_tree.LookupValue(parents_id)); |
| 120 if (!pParentArray) |
| 121 return; |
| 122 |
| 123 std::map<CPDF_Dictionary*, CPDF_StructElementImpl*> element_map; |
| 124 for (size_t i = 0; i < pParentArray->GetCount(); i++) { |
| 125 if (CPDF_Dictionary* pParent = pParentArray->GetDictAt(i)) |
| 126 AddPageNode(pParent, element_map); |
| 127 } |
| 128 } |
| 129 |
| 130 CPDF_StructElementImpl* CPDF_StructTreeImpl::AddPageNode( |
| 131 CPDF_Dictionary* pDict, |
| 132 std::map<CPDF_Dictionary*, CPDF_StructElementImpl*>& map, |
| 133 int nLevel) { |
| 134 if (nLevel > nMaxRecursion) |
| 135 return nullptr; |
| 136 |
| 137 auto it = map.find(pDict); |
| 138 if (it != map.end()) |
| 139 return it->second; |
| 140 |
| 141 CPDF_StructElementImpl* pElement = |
| 142 new CPDF_StructElementImpl(this, nullptr, pDict); |
| 143 map[pDict] = pElement; |
| 144 CPDF_Dictionary* pParent = pDict->GetDictBy("P"); |
| 145 if (!pParent || pParent->GetStringBy("Type") == "StructTreeRoot") { |
| 146 if (!AddTopLevelNode(pDict, pElement)) { |
| 147 pElement->Release(); |
| 148 map.erase(pDict); |
| 149 } |
| 150 } else { |
| 151 CPDF_StructElementImpl* pParentElement = |
| 152 AddPageNode(pParent, map, nLevel + 1); |
| 153 FX_BOOL bSave = FALSE; |
| 154 for (CPDF_StructKid& kid : pParentElement->m_Kids) { |
| 155 if (kid.m_Type != CPDF_StructKid::Element) |
| 156 continue; |
| 157 if (kid.m_Element.m_pDict != pDict) |
| 158 continue; |
| 159 kid.m_Element.m_pElement = pElement->Retain(); |
| 160 bSave = TRUE; |
| 161 } |
| 162 if (!bSave) { |
| 163 pElement->Release(); |
| 164 map.erase(pDict); |
| 165 } |
| 166 } |
| 167 return pElement; |
| 168 } |
| 169 FX_BOOL CPDF_StructTreeImpl::AddTopLevelNode(CPDF_Dictionary* pDict, |
| 170 CPDF_StructElementImpl* pElement) { |
| 171 CPDF_Object* pObj = m_pTreeRoot->GetDirectObjectBy("K"); |
| 172 if (!pObj) |
| 173 return FALSE; |
| 174 |
| 175 if (pObj->IsDictionary()) { |
| 176 if (pObj->GetObjNum() != pDict->GetObjNum()) |
| 177 return FALSE; |
| 178 m_Kids[0].Reset(pElement); |
| 179 } |
| 180 if (CPDF_Array* pTopKids = pObj->AsArray()) { |
| 181 bool bSave = false; |
| 182 for (size_t i = 0; i < pTopKids->GetCount(); i++) { |
| 183 CPDF_Reference* pKidRef = ToReference(pTopKids->GetObjectAt(i)); |
| 184 if (pKidRef && pKidRef->GetRefObjNum() == pDict->GetObjNum()) { |
| 185 m_Kids[i].Reset(pElement); |
| 186 bSave = true; |
| 187 } |
| 188 } |
| 189 if (!bSave) |
| 190 return FALSE; |
| 191 } |
| 192 return TRUE; |
| 193 } |
| 194 |
| 195 CPDF_StructElementImpl::CPDF_StructElementImpl(CPDF_StructTreeImpl* pTree, |
| 196 CPDF_StructElementImpl* pParent, |
| 197 CPDF_Dictionary* pDict) |
| 198 : m_RefCount(0), |
| 199 m_pTree(pTree), |
| 200 m_pParent(pParent), |
| 201 m_pDict(pDict), |
| 202 m_Type(pDict->GetStringBy("S")) { |
| 203 if (pTree->m_pRoleMap) { |
| 204 CFX_ByteString mapped = pTree->m_pRoleMap->GetStringBy(m_Type); |
| 205 if (!mapped.IsEmpty()) |
| 206 m_Type = mapped; |
| 207 } |
| 208 LoadKids(pDict); |
| 209 } |
| 210 |
| 211 IPDF_StructTree* CPDF_StructElementImpl::GetTree() const { |
| 212 return m_pTree; |
| 213 } |
| 214 |
| 215 const CFX_ByteString& CPDF_StructElementImpl::GetType() const { |
| 216 return m_Type; |
| 217 } |
| 218 |
| 219 IPDF_StructElement* CPDF_StructElementImpl::GetParent() const { |
| 220 return m_pParent; |
| 221 } |
| 222 |
| 223 CPDF_Dictionary* CPDF_StructElementImpl::GetDict() const { |
| 224 return m_pDict; |
| 225 } |
| 226 |
| 227 int CPDF_StructElementImpl::CountKids() const { |
| 228 return pdfium::CollectionSize<int>(m_Kids); |
| 229 } |
| 230 |
| 231 const CPDF_StructKid& CPDF_StructElementImpl::GetKid(int index) const { |
| 232 return m_Kids[index]; |
| 233 } |
| 234 |
| 235 CPDF_StructElementImpl::~CPDF_StructElementImpl() { |
| 236 for (CPDF_StructKid& kid : m_Kids) { |
| 237 if (kid.m_Type == CPDF_StructKid::Element && kid.m_Element.m_pElement) |
| 238 static_cast<CPDF_StructElementImpl*>(kid.m_Element.m_pElement)->Release(); |
| 239 } |
| 240 } |
| 241 |
| 242 CPDF_StructElementImpl* CPDF_StructElementImpl::Retain() { |
| 243 m_RefCount++; |
| 244 return this; |
| 245 } |
| 246 void CPDF_StructElementImpl::Release() { |
| 247 if (--m_RefCount < 1) { |
| 248 delete this; |
| 249 } |
| 250 } |
| 251 void CPDF_StructElementImpl::LoadKids(CPDF_Dictionary* pDict) { |
| 252 CPDF_Object* pObj = pDict->GetObjectBy("Pg"); |
| 253 uint32_t PageObjNum = 0; |
| 254 if (CPDF_Reference* pRef = ToReference(pObj)) |
| 255 PageObjNum = pRef->GetRefObjNum(); |
| 256 |
| 257 CPDF_Object* pKids = pDict->GetDirectObjectBy("K"); |
| 258 if (!pKids) |
| 259 return; |
| 260 |
| 261 m_Kids.clear(); |
| 262 if (CPDF_Array* pArray = pKids->AsArray()) { |
| 263 m_Kids.resize(pArray->GetCount()); |
| 264 for (uint32_t i = 0; i < pArray->GetCount(); i++) { |
| 265 CPDF_Object* pKid = pArray->GetDirectObjectAt(i); |
| 266 LoadKid(PageObjNum, pKid, &m_Kids[i]); |
| 267 } |
| 268 } else { |
| 269 m_Kids.resize(1); |
| 270 LoadKid(PageObjNum, pKids, &m_Kids[0]); |
| 271 } |
| 272 } |
| 273 void CPDF_StructElementImpl::LoadKid(uint32_t PageObjNum, |
| 274 CPDF_Object* pKidObj, |
| 275 CPDF_StructKid* pKid) { |
| 276 pKid->m_Type = CPDF_StructKid::Invalid; |
| 277 if (!pKidObj) |
| 278 return; |
| 279 |
| 280 if (pKidObj->IsNumber()) { |
| 281 if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) { |
| 282 return; |
| 283 } |
| 284 pKid->m_Type = CPDF_StructKid::PageContent; |
| 285 pKid->m_PageContent.m_ContentId = pKidObj->GetInteger(); |
| 286 pKid->m_PageContent.m_PageObjNum = PageObjNum; |
| 287 return; |
| 288 } |
| 289 |
| 290 CPDF_Dictionary* pKidDict = pKidObj->AsDictionary(); |
| 291 if (!pKidDict) |
| 292 return; |
| 293 |
| 294 if (CPDF_Reference* pRef = ToReference(pKidDict->GetObjectBy("Pg"))) |
| 295 PageObjNum = pRef->GetRefObjNum(); |
| 296 |
| 297 CFX_ByteString type = pKidDict->GetStringBy("Type"); |
| 298 if (type == "MCR") { |
| 299 if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) { |
| 300 return; |
| 301 } |
| 302 pKid->m_Type = CPDF_StructKid::StreamContent; |
| 303 if (CPDF_Reference* pRef = ToReference(pKidDict->GetObjectBy("Stm"))) { |
| 304 pKid->m_StreamContent.m_RefObjNum = pRef->GetRefObjNum(); |
| 305 } else { |
| 306 pKid->m_StreamContent.m_RefObjNum = 0; |
| 307 } |
| 308 pKid->m_StreamContent.m_PageObjNum = PageObjNum; |
| 309 pKid->m_StreamContent.m_ContentId = pKidDict->GetIntegerBy("MCID"); |
| 310 } else if (type == "OBJR") { |
| 311 if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) { |
| 312 return; |
| 313 } |
| 314 pKid->m_Type = CPDF_StructKid::Object; |
| 315 if (CPDF_Reference* pObj = ToReference(pKidDict->GetObjectBy("Obj"))) { |
| 316 pKid->m_Object.m_RefObjNum = pObj->GetRefObjNum(); |
| 317 } else { |
| 318 pKid->m_Object.m_RefObjNum = 0; |
| 319 } |
| 320 pKid->m_Object.m_PageObjNum = PageObjNum; |
| 321 } else { |
| 322 pKid->m_Type = CPDF_StructKid::Element; |
| 323 pKid->m_Element.m_pDict = pKidDict; |
| 324 if (!m_pTree->m_pPage) { |
| 325 pKid->m_Element.m_pElement = |
| 326 new CPDF_StructElementImpl(m_pTree, this, pKidDict); |
| 327 } else { |
| 328 pKid->m_Element.m_pElement = nullptr; |
| 329 } |
| 330 } |
| 331 } |
| 332 static CPDF_Dictionary* FindAttrDict(CPDF_Object* pAttrs, |
| 333 const CFX_ByteStringC& owner, |
| 334 FX_FLOAT nLevel = 0.0F) { |
| 335 if (nLevel > nMaxRecursion) |
| 336 return nullptr; |
| 337 if (!pAttrs) |
| 338 return nullptr; |
| 339 |
| 340 CPDF_Dictionary* pDict = nullptr; |
| 341 if (pAttrs->IsDictionary()) { |
| 342 pDict = pAttrs->AsDictionary(); |
| 343 } else if (CPDF_Stream* pStream = pAttrs->AsStream()) { |
| 344 pDict = pStream->GetDict(); |
| 345 } else if (CPDF_Array* pArray = pAttrs->AsArray()) { |
| 346 for (uint32_t i = 0; i < pArray->GetCount(); i++) { |
| 347 CPDF_Object* pElement = pArray->GetDirectObjectAt(i); |
| 348 pDict = FindAttrDict(pElement, owner, nLevel + 1); |
| 349 if (pDict) |
| 350 return pDict; |
| 351 } |
| 352 } |
| 353 if (pDict && pDict->GetStringBy("O") == owner) |
| 354 return pDict; |
| 355 return nullptr; |
| 356 } |
| 357 CPDF_Object* CPDF_StructElementImpl::GetAttr(const CFX_ByteStringC& owner, |
| 358 const CFX_ByteStringC& name, |
| 359 FX_BOOL bInheritable, |
| 360 FX_FLOAT fLevel) { |
| 361 if (fLevel > nMaxRecursion) { |
| 362 return nullptr; |
| 363 } |
| 364 if (bInheritable) { |
| 365 CPDF_Object* pAttr = GetAttr(owner, name, FALSE); |
| 366 if (pAttr) { |
| 367 return pAttr; |
| 368 } |
| 369 if (!m_pParent) { |
| 370 return nullptr; |
| 371 } |
| 372 return m_pParent->GetAttr(owner, name, TRUE, fLevel + 1); |
| 373 } |
| 374 CPDF_Object* pA = m_pDict->GetDirectObjectBy("A"); |
| 375 if (pA) { |
| 376 CPDF_Dictionary* pAttrDict = FindAttrDict(pA, owner); |
| 377 if (pAttrDict) { |
| 378 CPDF_Object* pAttr = pAttrDict->GetDirectObjectBy(CFX_ByteString(name)); |
| 379 if (pAttr) { |
| 380 return pAttr; |
| 381 } |
| 382 } |
| 383 } |
| 384 CPDF_Object* pC = m_pDict->GetDirectObjectBy("C"); |
| 385 if (!pC) |
| 386 return nullptr; |
| 387 |
| 388 CPDF_Dictionary* pClassMap = m_pTree->m_pTreeRoot->GetDictBy("ClassMap"); |
| 389 if (!pClassMap) |
| 390 return nullptr; |
| 391 |
| 392 if (CPDF_Array* pArray = pC->AsArray()) { |
| 393 for (uint32_t i = 0; i < pArray->GetCount(); i++) { |
| 394 CFX_ByteString class_name = pArray->GetStringAt(i); |
| 395 CPDF_Dictionary* pClassDict = pClassMap->GetDictBy(class_name); |
| 396 if (pClassDict && pClassDict->GetStringBy("O") == owner) |
| 397 return pClassDict->GetDirectObjectBy(CFX_ByteString(name)); |
| 398 } |
| 399 return nullptr; |
| 400 } |
| 401 CFX_ByteString class_name = pC->GetString(); |
| 402 CPDF_Dictionary* pClassDict = pClassMap->GetDictBy(class_name); |
| 403 if (pClassDict && pClassDict->GetStringBy("O") == owner) |
| 404 return pClassDict->GetDirectObjectBy(CFX_ByteString(name)); |
| 405 return nullptr; |
| 406 } |
| 407 CPDF_Object* CPDF_StructElementImpl::GetAttr(const CFX_ByteStringC& owner, |
| 408 const CFX_ByteStringC& name, |
| 409 FX_BOOL bInheritable, |
| 410 int subindex) { |
| 411 CPDF_Object* pAttr = GetAttr(owner, name, bInheritable); |
| 412 CPDF_Array* pArray = ToArray(pAttr); |
| 413 if (!pArray || subindex == -1) |
| 414 return pAttr; |
| 415 |
| 416 if (subindex >= static_cast<int>(pArray->GetCount())) |
| 417 return pAttr; |
| 418 return pArray->GetDirectObjectAt(subindex); |
| 419 } |
| 420 CFX_ByteString CPDF_StructElementImpl::GetName( |
| 421 const CFX_ByteStringC& owner, |
| 422 const CFX_ByteStringC& name, |
| 423 const CFX_ByteStringC& default_value, |
| 424 FX_BOOL bInheritable, |
| 425 int subindex) { |
| 426 CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex); |
| 427 if (ToName(pAttr)) |
| 428 return pAttr->GetString(); |
| 429 return CFX_ByteString(default_value); |
| 430 } |
| 431 |
| 432 FX_ARGB CPDF_StructElementImpl::GetColor(const CFX_ByteStringC& owner, |
| 433 const CFX_ByteStringC& name, |
| 434 FX_ARGB default_value, |
| 435 FX_BOOL bInheritable, |
| 436 int subindex) { |
| 437 CPDF_Array* pArray = ToArray(GetAttr(owner, name, bInheritable, subindex)); |
| 438 if (!pArray) |
| 439 return default_value; |
| 440 return 0xff000000 | ((int)(pArray->GetNumberAt(0) * 255) << 16) | |
| 441 ((int)(pArray->GetNumberAt(1) * 255) << 8) | |
| 442 (int)(pArray->GetNumberAt(2) * 255); |
| 443 } |
| 444 FX_FLOAT CPDF_StructElementImpl::GetNumber(const CFX_ByteStringC& owner, |
| 445 const CFX_ByteStringC& name, |
| 446 FX_FLOAT default_value, |
| 447 FX_BOOL bInheritable, |
| 448 int subindex) { |
| 449 CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex); |
| 450 return ToNumber(pAttr) ? pAttr->GetNumber() : default_value; |
| 451 } |
| 452 int CPDF_StructElementImpl::GetInteger(const CFX_ByteStringC& owner, |
| 453 const CFX_ByteStringC& name, |
| 454 int default_value, |
| 455 FX_BOOL bInheritable, |
| 456 int subindex) { |
| 457 CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex); |
| 458 return ToNumber(pAttr) ? pAttr->GetInteger() : default_value; |
| 459 } |
OLD | NEW |