OLD | NEW |
---|---|
1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
6 | 6 |
7 #include "core/fxcrt/xml_int.h" | 7 #include "core/fxcrt/xml_int.h" |
8 | 8 |
9 #include <vector> | 9 #include <vector> |
10 | 10 |
11 #include "core/fxcrt/fx_ext.h" | 11 #include "core/fxcrt/fx_ext.h" |
12 #include "core/fxcrt/fx_xml.h" | 12 #include "core/fxcrt/fx_xml.h" |
13 #include "third_party/base/ptr_util.h" | 13 #include "third_party/base/ptr_util.h" |
14 #include "third_party/base/stl_util.h" | 14 #include "third_party/base/stl_util.h" |
15 | 15 |
16 namespace { | |
17 | |
18 #define FXCRTM_XML_CHARTYPE_Normal 0x00 | |
npm
2016/10/26 18:26:04
Some of these appear to be unused. Remove them
Tom Sepez
2016/10/26 20:47:03
Yes, but they document what the bits are in the by
| |
19 #define FXCRTM_XML_CHARTYPE_SpaceChar 0x01 | |
20 #define FXCRTM_XML_CHARTYPE_Letter 0x02 | |
21 #define FXCRTM_XML_CHARTYPE_Digital 0x04 | |
22 #define FXCRTM_XML_CHARTYPE_NameIntro 0x08 | |
23 #define FXCRTM_XML_CHARTYPE_NameChar 0x10 | |
24 #define FXCRTM_XML_CHARTYPE_HexDigital 0x20 | |
25 #define FXCRTM_XML_CHARTYPE_HexLowerLetter 0x40 | |
26 #define FXCRTM_XML_CHARTYPE_HexUpperLetter 0x60 | |
27 #define FXCRTM_XML_CHARTYPE_HexChar 0x60 | |
28 | |
29 const uint8_t g_FXCRT_XML_ByteTypes[256] = { | |
30 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, | |
31 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, | |
32 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, | |
33 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x10, 0x00, | |
34 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x08, 0x00, | |
35 0x00, 0x00, 0x00, 0x00, 0x00, 0x7A, 0x7A, 0x7A, 0x7A, 0x7A, 0x7A, 0x1A, | |
36 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, | |
37 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x00, 0x00, 0x00, 0x00, 0x18, | |
38 0x00, 0x5A, 0x5A, 0x5A, 0x5A, 0x5A, 0x5A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, | |
39 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, | |
40 0x1A, 0x1A, 0x1A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1A, 0x1A, 0x1A, 0x1A, | |
41 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, | |
42 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, | |
43 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, | |
44 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, | |
45 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, | |
46 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, | |
47 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, | |
48 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, | |
49 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, | |
50 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, | |
51 0x1A, 0x1A, 0x01, 0x01, | |
52 }; | |
53 | |
54 bool g_FXCRT_XML_IsWhiteSpace(uint8_t ch) { | |
55 return !!(g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_SpaceChar); | |
56 } | |
57 | |
58 bool g_FXCRT_XML_IsDigital(uint8_t ch) { | |
59 return !!(g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_Digital); | |
60 } | |
61 | |
62 bool g_FXCRT_XML_IsNameIntro(uint8_t ch) { | |
63 return !!(g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_NameIntro); | |
64 } | |
65 | |
66 bool g_FXCRT_XML_IsNameChar(uint8_t ch) { | |
67 return !!(g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_NameChar); | |
68 } | |
69 | |
70 } // namespace | |
71 | |
16 CXML_DataBufAcc::CXML_DataBufAcc(const uint8_t* pBuffer, size_t size) | 72 CXML_DataBufAcc::CXML_DataBufAcc(const uint8_t* pBuffer, size_t size) |
17 : m_pBuffer(pBuffer), m_dwSize(size), m_dwCurPos(0) {} | 73 : m_pBuffer(pBuffer), m_dwSize(size), m_dwCurPos(0) {} |
18 | 74 |
19 CXML_DataBufAcc::~CXML_DataBufAcc() {} | 75 CXML_DataBufAcc::~CXML_DataBufAcc() {} |
20 | 76 |
21 void CXML_DataBufAcc::Release() { | 77 void CXML_DataBufAcc::Release() { |
22 delete this; | 78 delete this; |
23 } | 79 } |
24 | 80 |
25 FX_BOOL CXML_DataBufAcc::IsEOF() { | 81 FX_BOOL CXML_DataBufAcc::IsEOF() { |
(...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
121 m_dwBufferSize(0), | 177 m_dwBufferSize(0), |
122 m_nBufferOffset(0), | 178 m_nBufferOffset(0), |
123 m_dwIndex(0) {} | 179 m_dwIndex(0) {} |
124 | 180 |
125 CXML_Parser::~CXML_Parser() { | 181 CXML_Parser::~CXML_Parser() { |
126 if (m_bOwnedStream) { | 182 if (m_bOwnedStream) { |
127 m_pDataAcc->Release(); | 183 m_pDataAcc->Release(); |
128 } | 184 } |
129 } | 185 } |
130 | 186 |
131 FX_BOOL CXML_Parser::Init(uint8_t* pBuffer, size_t size) { | 187 bool CXML_Parser::Init(uint8_t* pBuffer, size_t size) { |
132 m_pDataAcc = new CXML_DataBufAcc(pBuffer, size); | 188 m_pDataAcc = new CXML_DataBufAcc(pBuffer, size); |
133 return Init(TRUE); | 189 return Init(true); |
134 } | 190 } |
135 FX_BOOL CXML_Parser::Init(IFX_SeekableReadStream* pFileRead) { | 191 |
192 bool CXML_Parser::Init(IFX_SeekableReadStream* pFileRead) { | |
136 m_pDataAcc = new CXML_DataStmAcc(pFileRead); | 193 m_pDataAcc = new CXML_DataStmAcc(pFileRead); |
137 return Init(TRUE); | 194 return Init(true); |
138 } | 195 } |
139 FX_BOOL CXML_Parser::Init(IFX_BufferRead* pBuffer) { | 196 |
140 if (!pBuffer) { | 197 bool CXML_Parser::Init(IFX_BufferRead* pBuffer) { |
141 return FALSE; | 198 if (!pBuffer) |
142 } | 199 return false; |
200 | |
143 m_pDataAcc = pBuffer; | 201 m_pDataAcc = pBuffer; |
144 return Init(FALSE); | 202 return Init(false); |
145 } | 203 } |
146 FX_BOOL CXML_Parser::Init(FX_BOOL bOwndedStream) { | 204 |
205 bool CXML_Parser::Init(bool bOwndedStream) { | |
147 m_bOwnedStream = bOwndedStream; | 206 m_bOwnedStream = bOwndedStream; |
148 m_nOffset = 0; | 207 m_nOffset = 0; |
149 return ReadNextBlock(); | 208 return ReadNextBlock(); |
150 } | 209 } |
151 FX_BOOL CXML_Parser::ReadNextBlock() { | 210 |
152 if (!m_pDataAcc->ReadNextBlock()) { | 211 bool CXML_Parser::ReadNextBlock() { |
153 return FALSE; | 212 if (!m_pDataAcc->ReadNextBlock()) |
154 } | 213 return false; |
214 | |
155 m_pBuffer = m_pDataAcc->GetBlockBuffer(); | 215 m_pBuffer = m_pDataAcc->GetBlockBuffer(); |
156 m_dwBufferSize = m_pDataAcc->GetBlockSize(); | 216 m_dwBufferSize = m_pDataAcc->GetBlockSize(); |
157 m_nBufferOffset = m_pDataAcc->GetBlockOffset(); | 217 m_nBufferOffset = m_pDataAcc->GetBlockOffset(); |
158 m_dwIndex = 0; | 218 m_dwIndex = 0; |
159 return m_dwBufferSize > 0; | 219 return m_dwBufferSize > 0; |
160 } | 220 } |
161 FX_BOOL CXML_Parser::IsEOF() { | 221 |
162 if (!m_pDataAcc->IsEOF()) { | 222 bool CXML_Parser::IsEOF() { |
163 return FALSE; | 223 return m_pDataAcc->IsEOF() && m_dwIndex >= m_dwBufferSize; |
164 } | |
165 return m_dwIndex >= m_dwBufferSize; | |
166 } | 224 } |
167 #define FXCRTM_XML_CHARTYPE_Normal 0x00 | 225 |
168 #define FXCRTM_XML_CHARTYPE_SpaceChar 0x01 | |
169 #define FXCRTM_XML_CHARTYPE_Letter 0x02 | |
170 #define FXCRTM_XML_CHARTYPE_Digital 0x04 | |
171 #define FXCRTM_XML_CHARTYPE_NameIntro 0x08 | |
172 #define FXCRTM_XML_CHARTYPE_NameChar 0x10 | |
173 #define FXCRTM_XML_CHARTYPE_HexDigital 0x20 | |
174 #define FXCRTM_XML_CHARTYPE_HexLowerLetter 0x40 | |
175 #define FXCRTM_XML_CHARTYPE_HexUpperLetter 0x60 | |
176 #define FXCRTM_XML_CHARTYPE_HexChar 0x60 | |
177 uint8_t g_FXCRT_XML_ByteTypes[256] = { | |
178 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, | |
179 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, | |
180 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, | |
181 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x10, 0x00, | |
182 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x08, 0x00, | |
183 0x00, 0x00, 0x00, 0x00, 0x00, 0x7A, 0x7A, 0x7A, 0x7A, 0x7A, 0x7A, 0x1A, | |
184 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, | |
185 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x00, 0x00, 0x00, 0x00, 0x18, | |
186 0x00, 0x5A, 0x5A, 0x5A, 0x5A, 0x5A, 0x5A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, | |
187 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, | |
188 0x1A, 0x1A, 0x1A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1A, 0x1A, 0x1A, 0x1A, | |
189 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, | |
190 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, | |
191 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, | |
192 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, | |
193 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, | |
194 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, | |
195 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, | |
196 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, | |
197 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, | |
198 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, | |
199 0x1A, 0x1A, 0x01, 0x01, | |
200 }; | |
201 FX_BOOL g_FXCRT_XML_IsWhiteSpace(uint8_t ch) { | |
202 return (g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_SpaceChar) != 0; | |
203 } | |
204 FX_BOOL g_FXCRT_XML_IsLetter(uint8_t ch) { | |
205 return (g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_Letter) != 0; | |
206 } | |
207 FX_BOOL g_FXCRT_XML_IsDigital(uint8_t ch) { | |
208 return (g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_Digital) != 0; | |
209 } | |
210 FX_BOOL g_FXCRT_XML_IsNameIntro(uint8_t ch) { | |
211 return (g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_NameIntro) != 0; | |
212 } | |
213 FX_BOOL g_FXCRT_XML_IsNameChar(uint8_t ch) { | |
214 return (g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_NameChar) != 0; | |
215 } | |
216 FX_BOOL g_FXCRT_XML_IsHexChar(uint8_t ch) { | |
217 return (g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_HexChar) != 0; | |
218 } | |
219 void CXML_Parser::SkipWhiteSpaces() { | 226 void CXML_Parser::SkipWhiteSpaces() { |
220 m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex; | 227 m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex; |
221 if (IsEOF()) { | 228 if (IsEOF()) { |
222 return; | 229 return; |
223 } | 230 } |
224 do { | 231 do { |
225 while (m_dwIndex < m_dwBufferSize && | 232 while (m_dwIndex < m_dwBufferSize && |
226 g_FXCRT_XML_IsWhiteSpace(m_pBuffer[m_dwIndex])) { | 233 g_FXCRT_XML_IsWhiteSpace(m_pBuffer[m_dwIndex])) { |
227 m_dwIndex++; | 234 m_dwIndex++; |
228 } | 235 } |
(...skipping 678 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
907 m_pMap->push_back({space, name, CFX_WideString(value)}); | 914 m_pMap->push_back({space, name, CFX_WideString(value)}); |
908 } | 915 } |
909 | 916 |
910 int CXML_AttrMap::GetSize() const { | 917 int CXML_AttrMap::GetSize() const { |
911 return m_pMap ? pdfium::CollectionSize<int>(*m_pMap) : 0; | 918 return m_pMap ? pdfium::CollectionSize<int>(*m_pMap) : 0; |
912 } | 919 } |
913 | 920 |
914 CXML_AttrItem& CXML_AttrMap::GetAt(int index) const { | 921 CXML_AttrItem& CXML_AttrMap::GetAt(int index) const { |
915 return (*m_pMap)[index]; | 922 return (*m_pMap)[index]; |
916 } | 923 } |
OLD | NEW |