core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp - Issue 1773103003: Split off CPDF_Parser and CPDF_SimpleParser into .h/.cpp files

Side by Side Diff: core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp

Issue 1773103003: Split off CPDF_Parser and CPDF_SimpleParser into .h/.cpp files (Closed) Base URL: https://pdfium.googlesource.com/pdfium.git@master

Patch Set: Then address C#3. Created 4 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2014 PDFium Authors. All rights reserved.	1 // Copyright 2014 PDFium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com	5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com

6	6

	7 #include "core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.h"

	8

7 #include "core/include/fpdfapi/fpdf_parser.h"	9 #include "core/include/fpdfapi/fpdf_parser.h"

8

9 #include "core/include/fxcrt/fx_ext.h"	10 #include "core/include/fxcrt/fx_ext.h"

10	11

11 // Indexed by 8-bit character code, contains either:	12 // Indexed by 8-bit character code, contains either:

12 // 'W' - for whitespace: NUL, TAB, CR, LF, FF, SPACE, 0x80, 0xff	13 // 'W' - for whitespace: NUL, TAB, CR, LF, FF, SPACE, 0x80, 0xff

13 // 'N' - for numeric: 0123456789+-.	14 // 'N' - for numeric: 0123456789+-.

14 // 'D' - for delimiter: %()/<>[]{}	15 // 'D' - for delimiter: %()/<>[]{}

15 // 'R' - otherwise.	16 // 'R' - otherwise.

16 const char PDF_CharType[256] = {	17 const char PDF_CharType[256] = {

17 // NUL SOH STX ETX EOT ENQ ACK BEL BS HT LF VT FF CR SO	18 // NUL SOH STX ETX EOT ENQ ACK BEL BS HT LF VT FF CR SO

18 // SI	19 // SI

(...skipping 34 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
53 'W', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',	54 'W', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',

54 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',	55 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',

55 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',	56 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',

56 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',	57 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',

57 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',	58 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',

58 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',	59 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',

59 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',	60 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',

60 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',	61 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',

61 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'W'};	62 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'W'};

62	63

63 CPDF_SimpleParser::CPDF_SimpleParser(const uint8_t* pData, FX_DWORD dwSize) {	64 int32_t GetHeaderOffset(IFX_FileRead* pFile) {

64 m_pData = pData;	65 // TODO(dsinclair): This is a complicated way of saying %PDF, simplify?

65 m_dwSize = dwSize;	66 const FX_DWORD tag = FXDWORD_FROM_LSBFIRST(0x46445025);

66 m_dwCurPos = 0;	67

	68 const size_t kBufSize = 4;

	69 uint8_t buf[kBufSize];

	70 int32_t offset = 0;

	71 while (offset <= 1024) {

	72 if (!pFile->ReadBlock(buf, offset, kBufSize))

	73 return -1;

	74

	75 if ((FX_DWORD)buf == tag)

	76 return offset;

	77

	78 ++offset;

	79 }

	80 return -1;

67 }	81 }

68	82

69 CPDF_SimpleParser::CPDF_SimpleParser(const CFX_ByteStringC& str) {	83 int32_t GetDirectInteger(CPDF_Dictionary* pDict, const CFX_ByteStringC& key) {

70 m_pData = str.GetPtr();	84 CPDF_Number* pObj = ToNumber(pDict->GetElement(key));

71 m_dwSize = str.GetLength();	85 return pObj ? pObj->GetInteger() : 0;

72 m_dwCurPos = 0;

73 }

74

75 void CPDF_SimpleParser::ParseWord(const uint8_t*& pStart, FX_DWORD& dwSize) {

76 pStart = NULL;

77 dwSize = 0;

78 uint8_t ch;

79 while (1) {

80 if (m_dwSize <= m_dwCurPos)

81 return;

82 ch = m_pData[m_dwCurPos++];

83 while (PDFCharIsWhitespace(ch)) {

84 if (m_dwSize <= m_dwCurPos)

85 return;

86 ch = m_pData[m_dwCurPos++];

87 }

88

89 if (ch != '%')

90 break;

91

92 while (1) {

93 if (m_dwSize <= m_dwCurPos)

94 return;

95 ch = m_pData[m_dwCurPos++];

96 if (ch == '\r' \|\| ch == '\n')

97 break;

98 }

99 }

100

101 FX_DWORD start_pos = m_dwCurPos - 1;

102 pStart = m_pData + start_pos;

103 if (PDFCharIsDelimiter(ch)) {

104 if (ch == '/') {

105 while (1) {

106 if (m_dwSize <= m_dwCurPos)

107 return;

108 ch = m_pData[m_dwCurPos++];

109 if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {

110 m_dwCurPos--;

111 dwSize = m_dwCurPos - start_pos;

112 return;

113 }

114 }

115 } else {

116 dwSize = 1;

117 if (ch == '<') {

118 if (m_dwSize <= m_dwCurPos)

119 return;

120 ch = m_pData[m_dwCurPos++];

121 if (ch == '<')

122 dwSize = 2;

123 else

124 m_dwCurPos--;

125 } else if (ch == '>') {

126 if (m_dwSize <= m_dwCurPos)

127 return;

128 ch = m_pData[m_dwCurPos++];

129 if (ch == '>')

130 dwSize = 2;

131 else

132 m_dwCurPos--;

133 }

134 }

135 return;

136 }

137

138 dwSize = 1;

139 while (1) {

140 if (m_dwSize <= m_dwCurPos)

141 return;

142 ch = m_pData[m_dwCurPos++];

143

144 if (PDFCharIsDelimiter(ch) \|\| PDFCharIsWhitespace(ch)) {

145 m_dwCurPos--;

146 break;

147 }

148 dwSize++;

149 }

150 }

151

152 CFX_ByteStringC CPDF_SimpleParser::GetWord() {

153 const uint8_t* pStart;

154 FX_DWORD dwSize;

155 ParseWord(pStart, dwSize);

156 if (dwSize == 1 && pStart[0] == '<') {

157 while (m_dwCurPos < m_dwSize && m_pData[m_dwCurPos] != '>') {

158 m_dwCurPos++;

159 }

160 if (m_dwCurPos < m_dwSize) {

161 m_dwCurPos++;

162 }

163 return CFX_ByteStringC(pStart,

164 (FX_STRSIZE)(m_dwCurPos - (pStart - m_pData)));

165 }

166 if (dwSize == 1 && pStart[0] == '(') {

167 int level = 1;

168 while (m_dwCurPos < m_dwSize) {

169 if (m_pData[m_dwCurPos] == ')') {

170 level--;

171 if (level == 0) {

172 break;

173 }

174 }

175 if (m_pData[m_dwCurPos] == '\\') {

176 if (m_dwSize <= m_dwCurPos) {

177 break;

178 }

179 m_dwCurPos++;

180 } else if (m_pData[m_dwCurPos] == '(') {

181 level++;

182 }

183 if (m_dwSize <= m_dwCurPos) {

184 break;

185 }

186 m_dwCurPos++;

187 }

188 if (m_dwCurPos < m_dwSize) {

189 m_dwCurPos++;

190 }

191 return CFX_ByteStringC(pStart,

192 (FX_STRSIZE)(m_dwCurPos - (pStart - m_pData)));

193 }

194 return CFX_ByteStringC(pStart, dwSize);

195 }

196

197 bool CPDF_SimpleParser::FindTagParamFromStart(const CFX_ByteStringC& token,

198 int nParams) {

199 nParams++;

200 FX_DWORD* pBuf = FX_Alloc(FX_DWORD, nParams);

201 int buf_index = 0;

202 int buf_count = 0;

203 m_dwCurPos = 0;

204 while (1) {

205 pBuf[buf_index++] = m_dwCurPos;

206 if (buf_index == nParams) {

207 buf_index = 0;

208 }

209 buf_count++;

210 if (buf_count > nParams) {

211 buf_count = nParams;

212 }

213 CFX_ByteStringC word = GetWord();

214 if (word.IsEmpty()) {

215 FX_Free(pBuf);

216 return false;

217 }

218 if (word == token) {

219 if (buf_count < nParams) {

220 continue;

221 }

222 m_dwCurPos = pBuf[buf_index];

223 FX_Free(pBuf);

224 return true;

225 }

226 }

227 return false;

228 }	86 }

229	87

230 CFX_ByteString PDF_NameDecode(const CFX_ByteStringC& bstr) {	88 CFX_ByteString PDF_NameDecode(const CFX_ByteStringC& bstr) {

231 int size = bstr.GetLength();	89 int size = bstr.GetLength();

232 const FX_CHAR* pSrc = bstr.GetCStr();	90 const FX_CHAR* pSrc = bstr.GetCStr();

233 if (!FXSYS_memchr(pSrc, '#', size)) {	91 if (!FXSYS_memchr(pSrc, '#', size)) {

234 return bstr;	92 return bstr;

235 }	93 }

236 CFX_ByteString result;	94 CFX_ByteString result;

237 FX_CHAR* pDestStart = result.GetBuffer(size);	95 FX_CHAR* pDestStart = result.GetBuffer(size);

(...skipping 158 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
396 if (pFound) {	254 if (pFound) {

397 return pFound;	255 return pFound;

398 }	256 }

399 }	257 }

400 return NULL;	258 return NULL;

401 }	259 }

402	260

403 CPDF_Object* CPDF_NumberTree::LookupValue(int num) {	261 CPDF_Object* CPDF_NumberTree::LookupValue(int num) {

404 return SearchNumberNode(m_pRoot, num);	262 return SearchNumberNode(m_pRoot, num);

405 }	263 }

OLD	NEW