Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(249)

Side by Side Diff: third_party/expat/files/lib/xmltok_impl.c

Issue 2761253002: Update expat to 2.2.0 to fix CVE vulnerability. (Closed)
Patch Set: update README.chromium Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd 1 /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
2 See the file COPYING for copying permission. 2 See the file COPYING for copying permission.
3 */ 3 */
4 4
5 /* This file is included! */ 5 /* This file is included! */
6 #ifdef XML_TOK_IMPL_C 6 #ifdef XML_TOK_IMPL_C
7 7
8 #ifndef IS_INVALID_CHAR 8 #ifndef IS_INVALID_CHAR
9 #define IS_INVALID_CHAR(enc, ptr, n) (0) 9 #define IS_INVALID_CHAR(enc, ptr, n) (0)
10 #endif 10 #endif
(...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after
80 ptr += MINBPC(enc); \ 80 ptr += MINBPC(enc); \
81 break; \ 81 break; \
82 CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \ 82 CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \
83 CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \ 83 CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \
84 CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr) 84 CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr)
85 85
86 #ifndef PREFIX 86 #ifndef PREFIX
87 #define PREFIX(ident) ident 87 #define PREFIX(ident) ident
88 #endif 88 #endif
89 89
90
91 #define HAS_CHARS(enc, ptr, end, count) \
92 (end - ptr >= count * MINBPC(enc))
93
94 #define HAS_CHAR(enc, ptr, end) \
95 HAS_CHARS(enc, ptr, end, 1)
96
97 #define REQUIRE_CHARS(enc, ptr, end, count) \
98 { \
99 if (! HAS_CHARS(enc, ptr, end, count)) { \
100 return XML_TOK_PARTIAL; \
101 } \
102 }
103
104 #define REQUIRE_CHAR(enc, ptr, end) \
105 REQUIRE_CHARS(enc, ptr, end, 1)
106
107
90 /* ptr points to character following "<!-" */ 108 /* ptr points to character following "<!-" */
91 109
92 static int PTRCALL 110 static int PTRCALL
93 PREFIX(scanComment)(const ENCODING *enc, const char *ptr, 111 PREFIX(scanComment)(const ENCODING *enc, const char *ptr,
94 const char *end, const char **nextTokPtr) 112 const char *end, const char **nextTokPtr)
95 { 113 {
96 if (ptr != end) { 114 if (HAS_CHAR(enc, ptr, end)) {
97 if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { 115 if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
98 *nextTokPtr = ptr; 116 *nextTokPtr = ptr;
99 return XML_TOK_INVALID; 117 return XML_TOK_INVALID;
100 } 118 }
101 ptr += MINBPC(enc); 119 ptr += MINBPC(enc);
102 while (ptr != end) { 120 while (HAS_CHAR(enc, ptr, end)) {
103 switch (BYTE_TYPE(enc, ptr)) { 121 switch (BYTE_TYPE(enc, ptr)) {
104 INVALID_CASES(ptr, nextTokPtr) 122 INVALID_CASES(ptr, nextTokPtr)
105 case BT_MINUS: 123 case BT_MINUS:
106 if ((ptr += MINBPC(enc)) == end) 124 ptr += MINBPC(enc);
107 return XML_TOK_PARTIAL; 125 REQUIRE_CHAR(enc, ptr, end);
108 if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { 126 if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
109 if ((ptr += MINBPC(enc)) == end) 127 ptr += MINBPC(enc);
110 return XML_TOK_PARTIAL; 128 REQUIRE_CHAR(enc, ptr, end);
111 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { 129 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
112 *nextTokPtr = ptr; 130 *nextTokPtr = ptr;
113 return XML_TOK_INVALID; 131 return XML_TOK_INVALID;
114 } 132 }
115 *nextTokPtr = ptr + MINBPC(enc); 133 *nextTokPtr = ptr + MINBPC(enc);
116 return XML_TOK_COMMENT; 134 return XML_TOK_COMMENT;
117 } 135 }
118 break; 136 break;
119 default: 137 default:
120 ptr += MINBPC(enc); 138 ptr += MINBPC(enc);
121 break; 139 break;
122 } 140 }
123 } 141 }
124 } 142 }
125 return XML_TOK_PARTIAL; 143 return XML_TOK_PARTIAL;
126 } 144 }
127 145
128 /* ptr points to character following "<!" */ 146 /* ptr points to character following "<!" */
129 147
130 static int PTRCALL 148 static int PTRCALL
131 PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, 149 PREFIX(scanDecl)(const ENCODING *enc, const char *ptr,
132 const char *end, const char **nextTokPtr) 150 const char *end, const char **nextTokPtr)
133 { 151 {
134 if (ptr == end) 152 REQUIRE_CHAR(enc, ptr, end);
135 return XML_TOK_PARTIAL;
136 switch (BYTE_TYPE(enc, ptr)) { 153 switch (BYTE_TYPE(enc, ptr)) {
137 case BT_MINUS: 154 case BT_MINUS:
138 return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); 155 return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
139 case BT_LSQB: 156 case BT_LSQB:
140 *nextTokPtr = ptr + MINBPC(enc); 157 *nextTokPtr = ptr + MINBPC(enc);
141 return XML_TOK_COND_SECT_OPEN; 158 return XML_TOK_COND_SECT_OPEN;
142 case BT_NMSTRT: 159 case BT_NMSTRT:
143 case BT_HEX: 160 case BT_HEX:
144 ptr += MINBPC(enc); 161 ptr += MINBPC(enc);
145 break; 162 break;
146 default: 163 default:
147 *nextTokPtr = ptr; 164 *nextTokPtr = ptr;
148 return XML_TOK_INVALID; 165 return XML_TOK_INVALID;
149 } 166 }
150 while (ptr != end) { 167 while (HAS_CHAR(enc, ptr, end)) {
151 switch (BYTE_TYPE(enc, ptr)) { 168 switch (BYTE_TYPE(enc, ptr)) {
152 case BT_PERCNT: 169 case BT_PERCNT:
153 if (ptr + MINBPC(enc) == end) 170 REQUIRE_CHARS(enc, ptr, end, 2);
154 return XML_TOK_PARTIAL;
155 /* don't allow <!ENTITY% foo "whatever"> */ 171 /* don't allow <!ENTITY% foo "whatever"> */
156 switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) { 172 switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) {
157 case BT_S: case BT_CR: case BT_LF: case BT_PERCNT: 173 case BT_S: case BT_CR: case BT_LF: case BT_PERCNT:
158 *nextTokPtr = ptr; 174 *nextTokPtr = ptr;
159 return XML_TOK_INVALID; 175 return XML_TOK_INVALID;
160 } 176 }
161 /* fall through */ 177 /* fall through */
162 case BT_S: case BT_CR: case BT_LF: 178 case BT_S: case BT_CR: case BT_LF:
163 *nextTokPtr = ptr; 179 *nextTokPtr = ptr;
164 return XML_TOK_DECL_OPEN; 180 return XML_TOK_DECL_OPEN;
165 case BT_NMSTRT: 181 case BT_NMSTRT:
166 case BT_HEX: 182 case BT_HEX:
167 ptr += MINBPC(enc); 183 ptr += MINBPC(enc);
168 break; 184 break;
169 default: 185 default:
170 *nextTokPtr = ptr; 186 *nextTokPtr = ptr;
171 return XML_TOK_INVALID; 187 return XML_TOK_INVALID;
172 } 188 }
173 } 189 }
174 return XML_TOK_PARTIAL; 190 return XML_TOK_PARTIAL;
175 } 191 }
176 192
177 static int PTRCALL 193 static int PTRCALL
178 PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, 194 PREFIX(checkPiTarget)(const ENCODING *UNUSED_P(enc), const char *ptr,
179 const char *end, int *tokPtr) 195 const char *end, int *tokPtr)
180 { 196 {
181 int upper = 0; 197 int upper = 0;
182 *tokPtr = XML_TOK_PI; 198 *tokPtr = XML_TOK_PI;
183 if (end - ptr != MINBPC(enc)*3) 199 if (end - ptr != MINBPC(enc)*3)
184 return 1; 200 return 1;
185 switch (BYTE_TO_ASCII(enc, ptr)) { 201 switch (BYTE_TO_ASCII(enc, ptr)) {
186 case ASCII_x: 202 case ASCII_x:
187 break; 203 break;
188 case ASCII_X: 204 case ASCII_X:
(...skipping 29 matching lines...) Expand all
218 } 234 }
219 235
220 /* ptr points to character following "<?" */ 236 /* ptr points to character following "<?" */
221 237
222 static int PTRCALL 238 static int PTRCALL
223 PREFIX(scanPi)(const ENCODING *enc, const char *ptr, 239 PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
224 const char *end, const char **nextTokPtr) 240 const char *end, const char **nextTokPtr)
225 { 241 {
226 int tok; 242 int tok;
227 const char *target = ptr; 243 const char *target = ptr;
228 if (ptr == end) 244 REQUIRE_CHAR(enc, ptr, end);
229 return XML_TOK_PARTIAL;
230 switch (BYTE_TYPE(enc, ptr)) { 245 switch (BYTE_TYPE(enc, ptr)) {
231 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 246 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
232 default: 247 default:
233 *nextTokPtr = ptr; 248 *nextTokPtr = ptr;
234 return XML_TOK_INVALID; 249 return XML_TOK_INVALID;
235 } 250 }
236 while (ptr != end) { 251 while (HAS_CHAR(enc, ptr, end)) {
237 switch (BYTE_TYPE(enc, ptr)) { 252 switch (BYTE_TYPE(enc, ptr)) {
238 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 253 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
239 case BT_S: case BT_CR: case BT_LF: 254 case BT_S: case BT_CR: case BT_LF:
240 if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) { 255 if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
241 *nextTokPtr = ptr; 256 *nextTokPtr = ptr;
242 return XML_TOK_INVALID; 257 return XML_TOK_INVALID;
243 } 258 }
244 ptr += MINBPC(enc); 259 ptr += MINBPC(enc);
245 while (ptr != end) { 260 while (HAS_CHAR(enc, ptr, end)) {
246 switch (BYTE_TYPE(enc, ptr)) { 261 switch (BYTE_TYPE(enc, ptr)) {
247 INVALID_CASES(ptr, nextTokPtr) 262 INVALID_CASES(ptr, nextTokPtr)
248 case BT_QUEST: 263 case BT_QUEST:
249 ptr += MINBPC(enc); 264 ptr += MINBPC(enc);
250 if (ptr == end) 265 REQUIRE_CHAR(enc, ptr, end);
251 return XML_TOK_PARTIAL;
252 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { 266 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
253 *nextTokPtr = ptr + MINBPC(enc); 267 *nextTokPtr = ptr + MINBPC(enc);
254 return tok; 268 return tok;
255 } 269 }
256 break; 270 break;
257 default: 271 default:
258 ptr += MINBPC(enc); 272 ptr += MINBPC(enc);
259 break; 273 break;
260 } 274 }
261 } 275 }
262 return XML_TOK_PARTIAL; 276 return XML_TOK_PARTIAL;
263 case BT_QUEST: 277 case BT_QUEST:
264 if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) { 278 if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
265 *nextTokPtr = ptr; 279 *nextTokPtr = ptr;
266 return XML_TOK_INVALID; 280 return XML_TOK_INVALID;
267 } 281 }
268 ptr += MINBPC(enc); 282 ptr += MINBPC(enc);
269 if (ptr == end) 283 REQUIRE_CHAR(enc, ptr, end);
270 return XML_TOK_PARTIAL;
271 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { 284 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
272 *nextTokPtr = ptr + MINBPC(enc); 285 *nextTokPtr = ptr + MINBPC(enc);
273 return tok; 286 return tok;
274 } 287 }
275 /* fall through */ 288 /* fall through */
276 default: 289 default:
277 *nextTokPtr = ptr; 290 *nextTokPtr = ptr;
278 return XML_TOK_INVALID; 291 return XML_TOK_INVALID;
279 } 292 }
280 } 293 }
281 return XML_TOK_PARTIAL; 294 return XML_TOK_PARTIAL;
282 } 295 }
283 296
284 static int PTRCALL 297 static int PTRCALL
285 PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, 298 PREFIX(scanCdataSection)(const ENCODING *UNUSED_P(enc), const char *ptr,
286 const char *end, const char **nextTokPtr) 299 const char *end, const char **nextTokPtr)
287 { 300 {
288 static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A, 301 static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A,
289 ASCII_T, ASCII_A, ASCII_LSQB }; 302 ASCII_T, ASCII_A, ASCII_LSQB };
290 int i; 303 int i;
291 /* CDATA[ */ 304 /* CDATA[ */
292 if (end - ptr < 6 * MINBPC(enc)) 305 REQUIRE_CHARS(enc, ptr, end, 6);
293 return XML_TOK_PARTIAL;
294 for (i = 0; i < 6; i++, ptr += MINBPC(enc)) { 306 for (i = 0; i < 6; i++, ptr += MINBPC(enc)) {
295 if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) { 307 if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) {
296 *nextTokPtr = ptr; 308 *nextTokPtr = ptr;
297 return XML_TOK_INVALID; 309 return XML_TOK_INVALID;
298 } 310 }
299 } 311 }
300 *nextTokPtr = ptr; 312 *nextTokPtr = ptr;
301 return XML_TOK_CDATA_SECT_OPEN; 313 return XML_TOK_CDATA_SECT_OPEN;
302 } 314 }
303 315
304 static int PTRCALL 316 static int PTRCALL
305 PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, 317 PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
306 const char *end, const char **nextTokPtr) 318 const char *end, const char **nextTokPtr)
307 { 319 {
308 if (ptr == end) 320 if (ptr >= end)
309 return XML_TOK_NONE; 321 return XML_TOK_NONE;
310 if (MINBPC(enc) > 1) { 322 if (MINBPC(enc) > 1) {
311 size_t n = end - ptr; 323 size_t n = end - ptr;
312 if (n & (MINBPC(enc) - 1)) { 324 if (n & (MINBPC(enc) - 1)) {
313 n &= ~(MINBPC(enc) - 1); 325 n &= ~(MINBPC(enc) - 1);
314 if (n == 0) 326 if (n == 0)
315 return XML_TOK_PARTIAL; 327 return XML_TOK_PARTIAL;
316 end = ptr + n; 328 end = ptr + n;
317 } 329 }
318 } 330 }
319 switch (BYTE_TYPE(enc, ptr)) { 331 switch (BYTE_TYPE(enc, ptr)) {
320 case BT_RSQB: 332 case BT_RSQB:
321 ptr += MINBPC(enc); 333 ptr += MINBPC(enc);
322 if (ptr == end) 334 REQUIRE_CHAR(enc, ptr, end);
323 return XML_TOK_PARTIAL;
324 if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) 335 if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
325 break; 336 break;
326 ptr += MINBPC(enc); 337 ptr += MINBPC(enc);
327 if (ptr == end) 338 REQUIRE_CHAR(enc, ptr, end);
328 return XML_TOK_PARTIAL;
329 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { 339 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
330 ptr -= MINBPC(enc); 340 ptr -= MINBPC(enc);
331 break; 341 break;
332 } 342 }
333 *nextTokPtr = ptr + MINBPC(enc); 343 *nextTokPtr = ptr + MINBPC(enc);
334 return XML_TOK_CDATA_SECT_CLOSE; 344 return XML_TOK_CDATA_SECT_CLOSE;
335 case BT_CR: 345 case BT_CR:
336 ptr += MINBPC(enc); 346 ptr += MINBPC(enc);
337 if (ptr == end) 347 REQUIRE_CHAR(enc, ptr, end);
338 return XML_TOK_PARTIAL;
339 if (BYTE_TYPE(enc, ptr) == BT_LF) 348 if (BYTE_TYPE(enc, ptr) == BT_LF)
340 ptr += MINBPC(enc); 349 ptr += MINBPC(enc);
341 *nextTokPtr = ptr; 350 *nextTokPtr = ptr;
342 return XML_TOK_DATA_NEWLINE; 351 return XML_TOK_DATA_NEWLINE;
343 case BT_LF: 352 case BT_LF:
344 *nextTokPtr = ptr + MINBPC(enc); 353 *nextTokPtr = ptr + MINBPC(enc);
345 return XML_TOK_DATA_NEWLINE; 354 return XML_TOK_DATA_NEWLINE;
346 INVALID_CASES(ptr, nextTokPtr) 355 INVALID_CASES(ptr, nextTokPtr)
347 default: 356 default:
348 ptr += MINBPC(enc); 357 ptr += MINBPC(enc);
349 break; 358 break;
350 } 359 }
351 while (ptr != end) { 360 while (HAS_CHAR(enc, ptr, end)) {
352 switch (BYTE_TYPE(enc, ptr)) { 361 switch (BYTE_TYPE(enc, ptr)) {
353 #define LEAD_CASE(n) \ 362 #define LEAD_CASE(n) \
354 case BT_LEAD ## n: \ 363 case BT_LEAD ## n: \
355 if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ 364 if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
356 *nextTokPtr = ptr; \ 365 *nextTokPtr = ptr; \
357 return XML_TOK_DATA_CHARS; \ 366 return XML_TOK_DATA_CHARS; \
358 } \ 367 } \
359 ptr += n; \ 368 ptr += n; \
360 break; 369 break;
361 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) 370 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
(...skipping 14 matching lines...) Expand all
376 *nextTokPtr = ptr; 385 *nextTokPtr = ptr;
377 return XML_TOK_DATA_CHARS; 386 return XML_TOK_DATA_CHARS;
378 } 387 }
379 388
380 /* ptr points to character following "</" */ 389 /* ptr points to character following "</" */
381 390
382 static int PTRCALL 391 static int PTRCALL
383 PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, 392 PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr,
384 const char *end, const char **nextTokPtr) 393 const char *end, const char **nextTokPtr)
385 { 394 {
386 if (ptr == end) 395 REQUIRE_CHAR(enc, ptr, end);
387 return XML_TOK_PARTIAL;
388 switch (BYTE_TYPE(enc, ptr)) { 396 switch (BYTE_TYPE(enc, ptr)) {
389 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 397 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
390 default: 398 default:
391 *nextTokPtr = ptr; 399 *nextTokPtr = ptr;
392 return XML_TOK_INVALID; 400 return XML_TOK_INVALID;
393 } 401 }
394 while (ptr != end) { 402 while (HAS_CHAR(enc, ptr, end)) {
395 switch (BYTE_TYPE(enc, ptr)) { 403 switch (BYTE_TYPE(enc, ptr)) {
396 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 404 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
397 case BT_S: case BT_CR: case BT_LF: 405 case BT_S: case BT_CR: case BT_LF:
398 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { 406 for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
399 switch (BYTE_TYPE(enc, ptr)) { 407 switch (BYTE_TYPE(enc, ptr)) {
400 case BT_S: case BT_CR: case BT_LF: 408 case BT_S: case BT_CR: case BT_LF:
401 break; 409 break;
402 case BT_GT: 410 case BT_GT:
403 *nextTokPtr = ptr + MINBPC(enc); 411 *nextTokPtr = ptr + MINBPC(enc);
404 return XML_TOK_END_TAG; 412 return XML_TOK_END_TAG;
405 default: 413 default:
406 *nextTokPtr = ptr; 414 *nextTokPtr = ptr;
407 return XML_TOK_INVALID; 415 return XML_TOK_INVALID;
408 } 416 }
(...skipping 16 matching lines...) Expand all
425 } 433 }
426 return XML_TOK_PARTIAL; 434 return XML_TOK_PARTIAL;
427 } 435 }
428 436
429 /* ptr points to character following "&#X" */ 437 /* ptr points to character following "&#X" */
430 438
431 static int PTRCALL 439 static int PTRCALL
432 PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, 440 PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr,
433 const char *end, const char **nextTokPtr) 441 const char *end, const char **nextTokPtr)
434 { 442 {
435 if (ptr != end) { 443 if (HAS_CHAR(enc, ptr, end)) {
436 switch (BYTE_TYPE(enc, ptr)) { 444 switch (BYTE_TYPE(enc, ptr)) {
437 case BT_DIGIT: 445 case BT_DIGIT:
438 case BT_HEX: 446 case BT_HEX:
439 break; 447 break;
440 default: 448 default:
441 *nextTokPtr = ptr; 449 *nextTokPtr = ptr;
442 return XML_TOK_INVALID; 450 return XML_TOK_INVALID;
443 } 451 }
444 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { 452 for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
445 switch (BYTE_TYPE(enc, ptr)) { 453 switch (BYTE_TYPE(enc, ptr)) {
446 case BT_DIGIT: 454 case BT_DIGIT:
447 case BT_HEX: 455 case BT_HEX:
448 break; 456 break;
449 case BT_SEMI: 457 case BT_SEMI:
450 *nextTokPtr = ptr + MINBPC(enc); 458 *nextTokPtr = ptr + MINBPC(enc);
451 return XML_TOK_CHAR_REF; 459 return XML_TOK_CHAR_REF;
452 default: 460 default:
453 *nextTokPtr = ptr; 461 *nextTokPtr = ptr;
454 return XML_TOK_INVALID; 462 return XML_TOK_INVALID;
455 } 463 }
456 } 464 }
457 } 465 }
458 return XML_TOK_PARTIAL; 466 return XML_TOK_PARTIAL;
459 } 467 }
460 468
461 /* ptr points to character following "&#" */ 469 /* ptr points to character following "&#" */
462 470
463 static int PTRCALL 471 static int PTRCALL
464 PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, 472 PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr,
465 const char *end, const char **nextTokPtr) 473 const char *end, const char **nextTokPtr)
466 { 474 {
467 if (ptr != end) { 475 if (HAS_CHAR(enc, ptr, end)) {
468 if (CHAR_MATCHES(enc, ptr, ASCII_x)) 476 if (CHAR_MATCHES(enc, ptr, ASCII_x))
469 return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); 477 return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
470 switch (BYTE_TYPE(enc, ptr)) { 478 switch (BYTE_TYPE(enc, ptr)) {
471 case BT_DIGIT: 479 case BT_DIGIT:
472 break; 480 break;
473 default: 481 default:
474 *nextTokPtr = ptr; 482 *nextTokPtr = ptr;
475 return XML_TOK_INVALID; 483 return XML_TOK_INVALID;
476 } 484 }
477 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { 485 for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
478 switch (BYTE_TYPE(enc, ptr)) { 486 switch (BYTE_TYPE(enc, ptr)) {
479 case BT_DIGIT: 487 case BT_DIGIT:
480 break; 488 break;
481 case BT_SEMI: 489 case BT_SEMI:
482 *nextTokPtr = ptr + MINBPC(enc); 490 *nextTokPtr = ptr + MINBPC(enc);
483 return XML_TOK_CHAR_REF; 491 return XML_TOK_CHAR_REF;
484 default: 492 default:
485 *nextTokPtr = ptr; 493 *nextTokPtr = ptr;
486 return XML_TOK_INVALID; 494 return XML_TOK_INVALID;
487 } 495 }
488 } 496 }
489 } 497 }
490 return XML_TOK_PARTIAL; 498 return XML_TOK_PARTIAL;
491 } 499 }
492 500
493 /* ptr points to character following "&" */ 501 /* ptr points to character following "&" */
494 502
495 static int PTRCALL 503 static int PTRCALL
496 PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end, 504 PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
497 const char **nextTokPtr) 505 const char **nextTokPtr)
498 { 506 {
499 if (ptr == end) 507 REQUIRE_CHAR(enc, ptr, end);
500 return XML_TOK_PARTIAL;
501 switch (BYTE_TYPE(enc, ptr)) { 508 switch (BYTE_TYPE(enc, ptr)) {
502 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 509 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
503 case BT_NUM: 510 case BT_NUM:
504 return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); 511 return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
505 default: 512 default:
506 *nextTokPtr = ptr; 513 *nextTokPtr = ptr;
507 return XML_TOK_INVALID; 514 return XML_TOK_INVALID;
508 } 515 }
509 while (ptr != end) { 516 while (HAS_CHAR(enc, ptr, end)) {
510 switch (BYTE_TYPE(enc, ptr)) { 517 switch (BYTE_TYPE(enc, ptr)) {
511 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 518 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
512 case BT_SEMI: 519 case BT_SEMI:
513 *nextTokPtr = ptr + MINBPC(enc); 520 *nextTokPtr = ptr + MINBPC(enc);
514 return XML_TOK_ENTITY_REF; 521 return XML_TOK_ENTITY_REF;
515 default: 522 default:
516 *nextTokPtr = ptr; 523 *nextTokPtr = ptr;
517 return XML_TOK_INVALID; 524 return XML_TOK_INVALID;
518 } 525 }
519 } 526 }
520 return XML_TOK_PARTIAL; 527 return XML_TOK_PARTIAL;
521 } 528 }
522 529
523 /* ptr points to character following first character of attribute name */ 530 /* ptr points to character following first character of attribute name */
524 531
525 static int PTRCALL 532 static int PTRCALL
526 PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, 533 PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
527 const char **nextTokPtr) 534 const char **nextTokPtr)
528 { 535 {
529 #ifdef XML_NS 536 #ifdef XML_NS
530 int hadColon = 0; 537 int hadColon = 0;
531 #endif 538 #endif
532 while (ptr != end) { 539 while (HAS_CHAR(enc, ptr, end)) {
533 switch (BYTE_TYPE(enc, ptr)) { 540 switch (BYTE_TYPE(enc, ptr)) {
534 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 541 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
535 #ifdef XML_NS 542 #ifdef XML_NS
536 case BT_COLON: 543 case BT_COLON:
537 if (hadColon) { 544 if (hadColon) {
538 *nextTokPtr = ptr; 545 *nextTokPtr = ptr;
539 return XML_TOK_INVALID; 546 return XML_TOK_INVALID;
540 } 547 }
541 hadColon = 1; 548 hadColon = 1;
542 ptr += MINBPC(enc); 549 ptr += MINBPC(enc);
543 if (ptr == end) 550 REQUIRE_CHAR(enc, ptr, end);
544 return XML_TOK_PARTIAL;
545 switch (BYTE_TYPE(enc, ptr)) { 551 switch (BYTE_TYPE(enc, ptr)) {
546 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 552 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
547 default: 553 default:
548 *nextTokPtr = ptr; 554 *nextTokPtr = ptr;
549 return XML_TOK_INVALID; 555 return XML_TOK_INVALID;
550 } 556 }
551 break; 557 break;
552 #endif 558 #endif
553 case BT_S: case BT_CR: case BT_LF: 559 case BT_S: case BT_CR: case BT_LF:
554 for (;;) { 560 for (;;) {
555 int t; 561 int t;
556 562
557 ptr += MINBPC(enc); 563 ptr += MINBPC(enc);
558 if (ptr == end) 564 REQUIRE_CHAR(enc, ptr, end);
559 return XML_TOK_PARTIAL;
560 t = BYTE_TYPE(enc, ptr); 565 t = BYTE_TYPE(enc, ptr);
561 if (t == BT_EQUALS) 566 if (t == BT_EQUALS)
562 break; 567 break;
563 switch (t) { 568 switch (t) {
564 case BT_S: 569 case BT_S:
565 case BT_LF: 570 case BT_LF:
566 case BT_CR: 571 case BT_CR:
567 break; 572 break;
568 default: 573 default:
569 *nextTokPtr = ptr; 574 *nextTokPtr = ptr;
570 return XML_TOK_INVALID; 575 return XML_TOK_INVALID;
571 } 576 }
572 } 577 }
573 /* fall through */ 578 /* fall through */
574 case BT_EQUALS: 579 case BT_EQUALS:
575 { 580 {
576 int open; 581 int open;
577 #ifdef XML_NS 582 #ifdef XML_NS
578 hadColon = 0; 583 hadColon = 0;
579 #endif 584 #endif
580 for (;;) { 585 for (;;) {
581 ptr += MINBPC(enc); 586 ptr += MINBPC(enc);
582 if (ptr == end) 587 REQUIRE_CHAR(enc, ptr, end);
583 return XML_TOK_PARTIAL;
584 open = BYTE_TYPE(enc, ptr); 588 open = BYTE_TYPE(enc, ptr);
585 if (open == BT_QUOT || open == BT_APOS) 589 if (open == BT_QUOT || open == BT_APOS)
586 break; 590 break;
587 switch (open) { 591 switch (open) {
588 case BT_S: 592 case BT_S:
589 case BT_LF: 593 case BT_LF:
590 case BT_CR: 594 case BT_CR:
591 break; 595 break;
592 default: 596 default:
593 *nextTokPtr = ptr; 597 *nextTokPtr = ptr;
594 return XML_TOK_INVALID; 598 return XML_TOK_INVALID;
595 } 599 }
596 } 600 }
597 ptr += MINBPC(enc); 601 ptr += MINBPC(enc);
598 /* in attribute value */ 602 /* in attribute value */
599 for (;;) { 603 for (;;) {
600 int t; 604 int t;
601 if (ptr == end) 605 REQUIRE_CHAR(enc, ptr, end);
602 return XML_TOK_PARTIAL;
603 t = BYTE_TYPE(enc, ptr); 606 t = BYTE_TYPE(enc, ptr);
604 if (t == open) 607 if (t == open)
605 break; 608 break;
606 switch (t) { 609 switch (t) {
607 INVALID_CASES(ptr, nextTokPtr) 610 INVALID_CASES(ptr, nextTokPtr)
608 case BT_AMP: 611 case BT_AMP:
609 { 612 {
610 int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr); 613 int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr);
611 if (tok <= 0) { 614 if (tok <= 0) {
612 if (tok == XML_TOK_INVALID) 615 if (tok == XML_TOK_INVALID)
613 *nextTokPtr = ptr; 616 *nextTokPtr = ptr;
614 return tok; 617 return tok;
615 } 618 }
616 break; 619 break;
617 } 620 }
618 case BT_LT: 621 case BT_LT:
619 *nextTokPtr = ptr; 622 *nextTokPtr = ptr;
620 return XML_TOK_INVALID; 623 return XML_TOK_INVALID;
621 default: 624 default:
622 ptr += MINBPC(enc); 625 ptr += MINBPC(enc);
623 break; 626 break;
624 } 627 }
625 } 628 }
626 ptr += MINBPC(enc); 629 ptr += MINBPC(enc);
627 if (ptr == end) 630 REQUIRE_CHAR(enc, ptr, end);
628 return XML_TOK_PARTIAL;
629 switch (BYTE_TYPE(enc, ptr)) { 631 switch (BYTE_TYPE(enc, ptr)) {
630 case BT_S: 632 case BT_S:
631 case BT_CR: 633 case BT_CR:
632 case BT_LF: 634 case BT_LF:
633 break; 635 break;
634 case BT_SOL: 636 case BT_SOL:
635 goto sol; 637 goto sol;
636 case BT_GT: 638 case BT_GT:
637 goto gt; 639 goto gt;
638 default: 640 default:
639 *nextTokPtr = ptr; 641 *nextTokPtr = ptr;
640 return XML_TOK_INVALID; 642 return XML_TOK_INVALID;
641 } 643 }
642 /* ptr points to closing quote */ 644 /* ptr points to closing quote */
643 for (;;) { 645 for (;;) {
644 ptr += MINBPC(enc); 646 ptr += MINBPC(enc);
645 if (ptr == end) 647 REQUIRE_CHAR(enc, ptr, end);
646 return XML_TOK_PARTIAL;
647 switch (BYTE_TYPE(enc, ptr)) { 648 switch (BYTE_TYPE(enc, ptr)) {
648 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 649 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
649 case BT_S: case BT_CR: case BT_LF: 650 case BT_S: case BT_CR: case BT_LF:
650 continue; 651 continue;
651 case BT_GT: 652 case BT_GT:
652 gt: 653 gt:
653 *nextTokPtr = ptr + MINBPC(enc); 654 *nextTokPtr = ptr + MINBPC(enc);
654 return XML_TOK_START_TAG_WITH_ATTS; 655 return XML_TOK_START_TAG_WITH_ATTS;
655 case BT_SOL: 656 case BT_SOL:
656 sol: 657 sol:
657 ptr += MINBPC(enc); 658 ptr += MINBPC(enc);
658 if (ptr == end) 659 REQUIRE_CHAR(enc, ptr, end);
659 return XML_TOK_PARTIAL;
660 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { 660 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
661 *nextTokPtr = ptr; 661 *nextTokPtr = ptr;
662 return XML_TOK_INVALID; 662 return XML_TOK_INVALID;
663 } 663 }
664 *nextTokPtr = ptr + MINBPC(enc); 664 *nextTokPtr = ptr + MINBPC(enc);
665 return XML_TOK_EMPTY_ELEMENT_WITH_ATTS; 665 return XML_TOK_EMPTY_ELEMENT_WITH_ATTS;
666 default: 666 default:
667 *nextTokPtr = ptr; 667 *nextTokPtr = ptr;
668 return XML_TOK_INVALID; 668 return XML_TOK_INVALID;
669 } 669 }
(...skipping 11 matching lines...) Expand all
681 681
682 /* ptr points to character following "<" */ 682 /* ptr points to character following "<" */
683 683
684 static int PTRCALL 684 static int PTRCALL
685 PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, 685 PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
686 const char **nextTokPtr) 686 const char **nextTokPtr)
687 { 687 {
688 #ifdef XML_NS 688 #ifdef XML_NS
689 int hadColon; 689 int hadColon;
690 #endif 690 #endif
691 if (ptr == end) 691 REQUIRE_CHAR(enc, ptr, end);
692 return XML_TOK_PARTIAL;
693 switch (BYTE_TYPE(enc, ptr)) { 692 switch (BYTE_TYPE(enc, ptr)) {
694 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 693 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
695 case BT_EXCL: 694 case BT_EXCL:
696 if ((ptr += MINBPC(enc)) == end) 695 ptr += MINBPC(enc);
697 return XML_TOK_PARTIAL; 696 REQUIRE_CHAR(enc, ptr, end);
698 switch (BYTE_TYPE(enc, ptr)) { 697 switch (BYTE_TYPE(enc, ptr)) {
699 case BT_MINUS: 698 case BT_MINUS:
700 return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); 699 return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
701 case BT_LSQB: 700 case BT_LSQB:
702 return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc), 701 return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc),
703 end, nextTokPtr); 702 end, nextTokPtr);
704 } 703 }
705 *nextTokPtr = ptr; 704 *nextTokPtr = ptr;
706 return XML_TOK_INVALID; 705 return XML_TOK_INVALID;
707 case BT_QUEST: 706 case BT_QUEST:
708 return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); 707 return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
709 case BT_SOL: 708 case BT_SOL:
710 return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr); 709 return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr);
711 default: 710 default:
712 *nextTokPtr = ptr; 711 *nextTokPtr = ptr;
713 return XML_TOK_INVALID; 712 return XML_TOK_INVALID;
714 } 713 }
715 #ifdef XML_NS 714 #ifdef XML_NS
716 hadColon = 0; 715 hadColon = 0;
717 #endif 716 #endif
718 /* we have a start-tag */ 717 /* we have a start-tag */
719 while (ptr != end) { 718 while (HAS_CHAR(enc, ptr, end)) {
720 switch (BYTE_TYPE(enc, ptr)) { 719 switch (BYTE_TYPE(enc, ptr)) {
721 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 720 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
722 #ifdef XML_NS 721 #ifdef XML_NS
723 case BT_COLON: 722 case BT_COLON:
724 if (hadColon) { 723 if (hadColon) {
725 *nextTokPtr = ptr; 724 *nextTokPtr = ptr;
726 return XML_TOK_INVALID; 725 return XML_TOK_INVALID;
727 } 726 }
728 hadColon = 1; 727 hadColon = 1;
729 ptr += MINBPC(enc); 728 ptr += MINBPC(enc);
730 if (ptr == end) 729 REQUIRE_CHAR(enc, ptr, end);
731 return XML_TOK_PARTIAL;
732 switch (BYTE_TYPE(enc, ptr)) { 730 switch (BYTE_TYPE(enc, ptr)) {
733 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 731 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
734 default: 732 default:
735 *nextTokPtr = ptr; 733 *nextTokPtr = ptr;
736 return XML_TOK_INVALID; 734 return XML_TOK_INVALID;
737 } 735 }
738 break; 736 break;
739 #endif 737 #endif
740 case BT_S: case BT_CR: case BT_LF: 738 case BT_S: case BT_CR: case BT_LF:
741 { 739 {
742 ptr += MINBPC(enc); 740 ptr += MINBPC(enc);
743 while (ptr != end) { 741 while (HAS_CHAR(enc, ptr, end)) {
744 switch (BYTE_TYPE(enc, ptr)) { 742 switch (BYTE_TYPE(enc, ptr)) {
745 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 743 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
746 case BT_GT: 744 case BT_GT:
747 goto gt; 745 goto gt;
748 case BT_SOL: 746 case BT_SOL:
749 goto sol; 747 goto sol;
750 case BT_S: case BT_CR: case BT_LF: 748 case BT_S: case BT_CR: case BT_LF:
751 ptr += MINBPC(enc); 749 ptr += MINBPC(enc);
752 continue; 750 continue;
753 default: 751 default:
754 *nextTokPtr = ptr; 752 *nextTokPtr = ptr;
755 return XML_TOK_INVALID; 753 return XML_TOK_INVALID;
756 } 754 }
757 return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr); 755 return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr);
758 } 756 }
759 return XML_TOK_PARTIAL; 757 return XML_TOK_PARTIAL;
760 } 758 }
761 case BT_GT: 759 case BT_GT:
762 gt: 760 gt:
763 *nextTokPtr = ptr + MINBPC(enc); 761 *nextTokPtr = ptr + MINBPC(enc);
764 return XML_TOK_START_TAG_NO_ATTS; 762 return XML_TOK_START_TAG_NO_ATTS;
765 case BT_SOL: 763 case BT_SOL:
766 sol: 764 sol:
767 ptr += MINBPC(enc); 765 ptr += MINBPC(enc);
768 if (ptr == end) 766 REQUIRE_CHAR(enc, ptr, end);
769 return XML_TOK_PARTIAL;
770 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { 767 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
771 *nextTokPtr = ptr; 768 *nextTokPtr = ptr;
772 return XML_TOK_INVALID; 769 return XML_TOK_INVALID;
773 } 770 }
774 *nextTokPtr = ptr + MINBPC(enc); 771 *nextTokPtr = ptr + MINBPC(enc);
775 return XML_TOK_EMPTY_ELEMENT_NO_ATTS; 772 return XML_TOK_EMPTY_ELEMENT_NO_ATTS;
776 default: 773 default:
777 *nextTokPtr = ptr; 774 *nextTokPtr = ptr;
778 return XML_TOK_INVALID; 775 return XML_TOK_INVALID;
779 } 776 }
780 } 777 }
781 return XML_TOK_PARTIAL; 778 return XML_TOK_PARTIAL;
782 } 779 }
783 780
784 static int PTRCALL 781 static int PTRCALL
785 PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, 782 PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
786 const char **nextTokPtr) 783 const char **nextTokPtr)
787 { 784 {
788 if (ptr == end) 785 if (ptr >= end)
789 return XML_TOK_NONE; 786 return XML_TOK_NONE;
790 if (MINBPC(enc) > 1) { 787 if (MINBPC(enc) > 1) {
791 size_t n = end - ptr; 788 size_t n = end - ptr;
792 if (n & (MINBPC(enc) - 1)) { 789 if (n & (MINBPC(enc) - 1)) {
793 n &= ~(MINBPC(enc) - 1); 790 n &= ~(MINBPC(enc) - 1);
794 if (n == 0) 791 if (n == 0)
795 return XML_TOK_PARTIAL; 792 return XML_TOK_PARTIAL;
796 end = ptr + n; 793 end = ptr + n;
797 } 794 }
798 } 795 }
799 switch (BYTE_TYPE(enc, ptr)) { 796 switch (BYTE_TYPE(enc, ptr)) {
800 case BT_LT: 797 case BT_LT:
801 return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr); 798 return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr);
802 case BT_AMP: 799 case BT_AMP:
803 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); 800 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
804 case BT_CR: 801 case BT_CR:
805 ptr += MINBPC(enc); 802 ptr += MINBPC(enc);
806 if (ptr == end) 803 if (! HAS_CHAR(enc, ptr, end))
807 return XML_TOK_TRAILING_CR; 804 return XML_TOK_TRAILING_CR;
808 if (BYTE_TYPE(enc, ptr) == BT_LF) 805 if (BYTE_TYPE(enc, ptr) == BT_LF)
809 ptr += MINBPC(enc); 806 ptr += MINBPC(enc);
810 *nextTokPtr = ptr; 807 *nextTokPtr = ptr;
811 return XML_TOK_DATA_NEWLINE; 808 return XML_TOK_DATA_NEWLINE;
812 case BT_LF: 809 case BT_LF:
813 *nextTokPtr = ptr + MINBPC(enc); 810 *nextTokPtr = ptr + MINBPC(enc);
814 return XML_TOK_DATA_NEWLINE; 811 return XML_TOK_DATA_NEWLINE;
815 case BT_RSQB: 812 case BT_RSQB:
816 ptr += MINBPC(enc); 813 ptr += MINBPC(enc);
817 if (ptr == end) 814 if (! HAS_CHAR(enc, ptr, end))
818 return XML_TOK_TRAILING_RSQB; 815 return XML_TOK_TRAILING_RSQB;
819 if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) 816 if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
820 break; 817 break;
821 ptr += MINBPC(enc); 818 ptr += MINBPC(enc);
822 if (ptr == end) 819 if (! HAS_CHAR(enc, ptr, end))
823 return XML_TOK_TRAILING_RSQB; 820 return XML_TOK_TRAILING_RSQB;
824 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { 821 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
825 ptr -= MINBPC(enc); 822 ptr -= MINBPC(enc);
826 break; 823 break;
827 } 824 }
828 *nextTokPtr = ptr; 825 *nextTokPtr = ptr;
829 return XML_TOK_INVALID; 826 return XML_TOK_INVALID;
830 INVALID_CASES(ptr, nextTokPtr) 827 INVALID_CASES(ptr, nextTokPtr)
831 default: 828 default:
832 ptr += MINBPC(enc); 829 ptr += MINBPC(enc);
833 break; 830 break;
834 } 831 }
835 while (ptr != end) { 832 while (HAS_CHAR(enc, ptr, end)) {
836 switch (BYTE_TYPE(enc, ptr)) { 833 switch (BYTE_TYPE(enc, ptr)) {
837 #define LEAD_CASE(n) \ 834 #define LEAD_CASE(n) \
838 case BT_LEAD ## n: \ 835 case BT_LEAD ## n: \
839 if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ 836 if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
840 *nextTokPtr = ptr; \ 837 *nextTokPtr = ptr; \
841 return XML_TOK_DATA_CHARS; \ 838 return XML_TOK_DATA_CHARS; \
842 } \ 839 } \
843 ptr += n; \ 840 ptr += n; \
844 break; 841 break;
845 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) 842 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
846 #undef LEAD_CASE 843 #undef LEAD_CASE
847 case BT_RSQB: 844 case BT_RSQB:
848 if (ptr + MINBPC(enc) != end) { 845 if (HAS_CHARS(enc, ptr, end, 2)) {
849 if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) { 846 if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) {
850 ptr += MINBPC(enc); 847 ptr += MINBPC(enc);
851 break; 848 break;
852 } 849 }
853 if (ptr + 2*MINBPC(enc) != end) { 850 if (HAS_CHARS(enc, ptr, end, 3)) {
854 if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) { 851 if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) {
855 ptr += MINBPC(enc); 852 ptr += MINBPC(enc);
856 break; 853 break;
857 } 854 }
858 *nextTokPtr = ptr + 2*MINBPC(enc); 855 *nextTokPtr = ptr + 2*MINBPC(enc);
859 return XML_TOK_INVALID; 856 return XML_TOK_INVALID;
860 } 857 }
861 } 858 }
862 /* fall through */ 859 /* fall through */
863 case BT_AMP: 860 case BT_AMP:
(...skipping 13 matching lines...) Expand all
877 *nextTokPtr = ptr; 874 *nextTokPtr = ptr;
878 return XML_TOK_DATA_CHARS; 875 return XML_TOK_DATA_CHARS;
879 } 876 }
880 877
881 /* ptr points to character following "%" */ 878 /* ptr points to character following "%" */
882 879
883 static int PTRCALL 880 static int PTRCALL
884 PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end, 881 PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
885 const char **nextTokPtr) 882 const char **nextTokPtr)
886 { 883 {
887 if (ptr == end) 884 REQUIRE_CHAR(enc, ptr, end);
888 return XML_TOK_PARTIAL;
889 switch (BYTE_TYPE(enc, ptr)) { 885 switch (BYTE_TYPE(enc, ptr)) {
890 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 886 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
891 case BT_S: case BT_LF: case BT_CR: case BT_PERCNT: 887 case BT_S: case BT_LF: case BT_CR: case BT_PERCNT:
892 *nextTokPtr = ptr; 888 *nextTokPtr = ptr;
893 return XML_TOK_PERCENT; 889 return XML_TOK_PERCENT;
894 default: 890 default:
895 *nextTokPtr = ptr; 891 *nextTokPtr = ptr;
896 return XML_TOK_INVALID; 892 return XML_TOK_INVALID;
897 } 893 }
898 while (ptr != end) { 894 while (HAS_CHAR(enc, ptr, end)) {
899 switch (BYTE_TYPE(enc, ptr)) { 895 switch (BYTE_TYPE(enc, ptr)) {
900 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 896 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
901 case BT_SEMI: 897 case BT_SEMI:
902 *nextTokPtr = ptr + MINBPC(enc); 898 *nextTokPtr = ptr + MINBPC(enc);
903 return XML_TOK_PARAM_ENTITY_REF; 899 return XML_TOK_PARAM_ENTITY_REF;
904 default: 900 default:
905 *nextTokPtr = ptr; 901 *nextTokPtr = ptr;
906 return XML_TOK_INVALID; 902 return XML_TOK_INVALID;
907 } 903 }
908 } 904 }
909 return XML_TOK_PARTIAL; 905 return XML_TOK_PARTIAL;
910 } 906 }
911 907
912 static int PTRCALL 908 static int PTRCALL
913 PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end, 909 PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
914 const char **nextTokPtr) 910 const char **nextTokPtr)
915 { 911 {
916 if (ptr == end) 912 REQUIRE_CHAR(enc, ptr, end);
917 return XML_TOK_PARTIAL;
918 switch (BYTE_TYPE(enc, ptr)) { 913 switch (BYTE_TYPE(enc, ptr)) {
919 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 914 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
920 default: 915 default:
921 *nextTokPtr = ptr; 916 *nextTokPtr = ptr;
922 return XML_TOK_INVALID; 917 return XML_TOK_INVALID;
923 } 918 }
924 while (ptr != end) { 919 while (HAS_CHAR(enc, ptr, end)) {
925 switch (BYTE_TYPE(enc, ptr)) { 920 switch (BYTE_TYPE(enc, ptr)) {
926 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 921 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
927 case BT_CR: case BT_LF: case BT_S: 922 case BT_CR: case BT_LF: case BT_S:
928 case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR: 923 case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR:
929 *nextTokPtr = ptr; 924 *nextTokPtr = ptr;
930 return XML_TOK_POUND_NAME; 925 return XML_TOK_POUND_NAME;
931 default: 926 default:
932 *nextTokPtr = ptr; 927 *nextTokPtr = ptr;
933 return XML_TOK_INVALID; 928 return XML_TOK_INVALID;
934 } 929 }
935 } 930 }
936 return -XML_TOK_POUND_NAME; 931 return -XML_TOK_POUND_NAME;
937 } 932 }
938 933
939 static int PTRCALL 934 static int PTRCALL
940 PREFIX(scanLit)(int open, const ENCODING *enc, 935 PREFIX(scanLit)(int open, const ENCODING *enc,
941 const char *ptr, const char *end, 936 const char *ptr, const char *end,
942 const char **nextTokPtr) 937 const char **nextTokPtr)
943 { 938 {
944 while (ptr != end) { 939 while (HAS_CHAR(enc, ptr, end)) {
945 int t = BYTE_TYPE(enc, ptr); 940 int t = BYTE_TYPE(enc, ptr);
946 switch (t) { 941 switch (t) {
947 INVALID_CASES(ptr, nextTokPtr) 942 INVALID_CASES(ptr, nextTokPtr)
948 case BT_QUOT: 943 case BT_QUOT:
949 case BT_APOS: 944 case BT_APOS:
950 ptr += MINBPC(enc); 945 ptr += MINBPC(enc);
951 if (t != open) 946 if (t != open)
952 break; 947 break;
953 if (ptr == end) 948 if (! HAS_CHAR(enc, ptr, end))
954 return -XML_TOK_LITERAL; 949 return -XML_TOK_LITERAL;
955 *nextTokPtr = ptr; 950 *nextTokPtr = ptr;
956 switch (BYTE_TYPE(enc, ptr)) { 951 switch (BYTE_TYPE(enc, ptr)) {
957 case BT_S: case BT_CR: case BT_LF: 952 case BT_S: case BT_CR: case BT_LF:
958 case BT_GT: case BT_PERCNT: case BT_LSQB: 953 case BT_GT: case BT_PERCNT: case BT_LSQB:
959 return XML_TOK_LITERAL; 954 return XML_TOK_LITERAL;
960 default: 955 default:
961 return XML_TOK_INVALID; 956 return XML_TOK_INVALID;
962 } 957 }
963 default: 958 default:
964 ptr += MINBPC(enc); 959 ptr += MINBPC(enc);
965 break; 960 break;
966 } 961 }
967 } 962 }
968 return XML_TOK_PARTIAL; 963 return XML_TOK_PARTIAL;
969 } 964 }
970 965
971 static int PTRCALL 966 static int PTRCALL
972 PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, 967 PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
973 const char **nextTokPtr) 968 const char **nextTokPtr)
974 { 969 {
975 int tok; 970 int tok;
976 if (ptr == end) 971 if (ptr >= end)
977 return XML_TOK_NONE; 972 return XML_TOK_NONE;
978 if (MINBPC(enc) > 1) { 973 if (MINBPC(enc) > 1) {
979 size_t n = end - ptr; 974 size_t n = end - ptr;
980 if (n & (MINBPC(enc) - 1)) { 975 if (n & (MINBPC(enc) - 1)) {
981 n &= ~(MINBPC(enc) - 1); 976 n &= ~(MINBPC(enc) - 1);
982 if (n == 0) 977 if (n == 0)
983 return XML_TOK_PARTIAL; 978 return XML_TOK_PARTIAL;
984 end = ptr + n; 979 end = ptr + n;
985 } 980 }
986 } 981 }
987 switch (BYTE_TYPE(enc, ptr)) { 982 switch (BYTE_TYPE(enc, ptr)) {
988 case BT_QUOT: 983 case BT_QUOT:
989 return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr); 984 return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr);
990 case BT_APOS: 985 case BT_APOS:
991 return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr); 986 return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr);
992 case BT_LT: 987 case BT_LT:
993 { 988 {
994 ptr += MINBPC(enc); 989 ptr += MINBPC(enc);
995 if (ptr == end) 990 REQUIRE_CHAR(enc, ptr, end);
996 return XML_TOK_PARTIAL;
997 switch (BYTE_TYPE(enc, ptr)) { 991 switch (BYTE_TYPE(enc, ptr)) {
998 case BT_EXCL: 992 case BT_EXCL:
999 return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr); 993 return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1000 case BT_QUEST: 994 case BT_QUEST:
1001 return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); 995 return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1002 case BT_NMSTRT: 996 case BT_NMSTRT:
1003 case BT_HEX: 997 case BT_HEX:
1004 case BT_NONASCII: 998 case BT_NONASCII:
1005 case BT_LEAD2: 999 case BT_LEAD2:
1006 case BT_LEAD3: 1000 case BT_LEAD3:
1007 case BT_LEAD4: 1001 case BT_LEAD4:
1008 *nextTokPtr = ptr - MINBPC(enc); 1002 *nextTokPtr = ptr - MINBPC(enc);
1009 return XML_TOK_INSTANCE_START; 1003 return XML_TOK_INSTANCE_START;
1010 } 1004 }
1011 *nextTokPtr = ptr; 1005 *nextTokPtr = ptr;
1012 return XML_TOK_INVALID; 1006 return XML_TOK_INVALID;
1013 } 1007 }
1014 case BT_CR: 1008 case BT_CR:
1015 if (ptr + MINBPC(enc) == end) { 1009 if (ptr + MINBPC(enc) == end) {
1016 *nextTokPtr = end; 1010 *nextTokPtr = end;
1017 /* indicate that this might be part of a CR/LF pair */ 1011 /* indicate that this might be part of a CR/LF pair */
1018 return -XML_TOK_PROLOG_S; 1012 return -XML_TOK_PROLOG_S;
1019 } 1013 }
1020 /* fall through */ 1014 /* fall through */
1021 case BT_S: case BT_LF: 1015 case BT_S: case BT_LF:
1022 for (;;) { 1016 for (;;) {
1023 ptr += MINBPC(enc); 1017 ptr += MINBPC(enc);
1024 if (ptr == end) 1018 if (! HAS_CHAR(enc, ptr, end))
1025 break; 1019 break;
1026 switch (BYTE_TYPE(enc, ptr)) { 1020 switch (BYTE_TYPE(enc, ptr)) {
1027 case BT_S: case BT_LF: 1021 case BT_S: case BT_LF:
1028 break; 1022 break;
1029 case BT_CR: 1023 case BT_CR:
1030 /* don't split CR/LF pair */ 1024 /* don't split CR/LF pair */
1031 if (ptr + MINBPC(enc) != end) 1025 if (ptr + MINBPC(enc) != end)
1032 break; 1026 break;
1033 /* fall through */ 1027 /* fall through */
1034 default: 1028 default:
1035 *nextTokPtr = ptr; 1029 *nextTokPtr = ptr;
1036 return XML_TOK_PROLOG_S; 1030 return XML_TOK_PROLOG_S;
1037 } 1031 }
1038 } 1032 }
1039 *nextTokPtr = ptr; 1033 *nextTokPtr = ptr;
1040 return XML_TOK_PROLOG_S; 1034 return XML_TOK_PROLOG_S;
1041 case BT_PERCNT: 1035 case BT_PERCNT:
1042 return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr); 1036 return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1043 case BT_COMMA: 1037 case BT_COMMA:
1044 *nextTokPtr = ptr + MINBPC(enc); 1038 *nextTokPtr = ptr + MINBPC(enc);
1045 return XML_TOK_COMMA; 1039 return XML_TOK_COMMA;
1046 case BT_LSQB: 1040 case BT_LSQB:
1047 *nextTokPtr = ptr + MINBPC(enc); 1041 *nextTokPtr = ptr + MINBPC(enc);
1048 return XML_TOK_OPEN_BRACKET; 1042 return XML_TOK_OPEN_BRACKET;
1049 case BT_RSQB: 1043 case BT_RSQB:
1050 ptr += MINBPC(enc); 1044 ptr += MINBPC(enc);
1051 if (ptr == end) 1045 if (! HAS_CHAR(enc, ptr, end))
1052 return -XML_TOK_CLOSE_BRACKET; 1046 return -XML_TOK_CLOSE_BRACKET;
1053 if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { 1047 if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
1054 if (ptr + MINBPC(enc) == end) 1048 REQUIRE_CHARS(enc, ptr, end, 2);
1055 return XML_TOK_PARTIAL;
1056 if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) { 1049 if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) {
1057 *nextTokPtr = ptr + 2*MINBPC(enc); 1050 *nextTokPtr = ptr + 2*MINBPC(enc);
1058 return XML_TOK_COND_SECT_CLOSE; 1051 return XML_TOK_COND_SECT_CLOSE;
1059 } 1052 }
1060 } 1053 }
1061 *nextTokPtr = ptr; 1054 *nextTokPtr = ptr;
1062 return XML_TOK_CLOSE_BRACKET; 1055 return XML_TOK_CLOSE_BRACKET;
1063 case BT_LPAR: 1056 case BT_LPAR:
1064 *nextTokPtr = ptr + MINBPC(enc); 1057 *nextTokPtr = ptr + MINBPC(enc);
1065 return XML_TOK_OPEN_PAREN; 1058 return XML_TOK_OPEN_PAREN;
1066 case BT_RPAR: 1059 case BT_RPAR:
1067 ptr += MINBPC(enc); 1060 ptr += MINBPC(enc);
1068 if (ptr == end) 1061 if (! HAS_CHAR(enc, ptr, end))
1069 return -XML_TOK_CLOSE_PAREN; 1062 return -XML_TOK_CLOSE_PAREN;
1070 switch (BYTE_TYPE(enc, ptr)) { 1063 switch (BYTE_TYPE(enc, ptr)) {
1071 case BT_AST: 1064 case BT_AST:
1072 *nextTokPtr = ptr + MINBPC(enc); 1065 *nextTokPtr = ptr + MINBPC(enc);
1073 return XML_TOK_CLOSE_PAREN_ASTERISK; 1066 return XML_TOK_CLOSE_PAREN_ASTERISK;
1074 case BT_QUEST: 1067 case BT_QUEST:
1075 *nextTokPtr = ptr + MINBPC(enc); 1068 *nextTokPtr = ptr + MINBPC(enc);
1076 return XML_TOK_CLOSE_PAREN_QUESTION; 1069 return XML_TOK_CLOSE_PAREN_QUESTION;
1077 case BT_PLUS: 1070 case BT_PLUS:
1078 *nextTokPtr = ptr + MINBPC(enc); 1071 *nextTokPtr = ptr + MINBPC(enc);
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after
1134 if (IS_NAME_CHAR_MINBPC(enc, ptr)) { 1127 if (IS_NAME_CHAR_MINBPC(enc, ptr)) {
1135 ptr += MINBPC(enc); 1128 ptr += MINBPC(enc);
1136 tok = XML_TOK_NMTOKEN; 1129 tok = XML_TOK_NMTOKEN;
1137 break; 1130 break;
1138 } 1131 }
1139 /* fall through */ 1132 /* fall through */
1140 default: 1133 default:
1141 *nextTokPtr = ptr; 1134 *nextTokPtr = ptr;
1142 return XML_TOK_INVALID; 1135 return XML_TOK_INVALID;
1143 } 1136 }
1144 while (ptr != end) { 1137 while (HAS_CHAR(enc, ptr, end)) {
1145 switch (BYTE_TYPE(enc, ptr)) { 1138 switch (BYTE_TYPE(enc, ptr)) {
1146 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 1139 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
1147 case BT_GT: case BT_RPAR: case BT_COMMA: 1140 case BT_GT: case BT_RPAR: case BT_COMMA:
1148 case BT_VERBAR: case BT_LSQB: case BT_PERCNT: 1141 case BT_VERBAR: case BT_LSQB: case BT_PERCNT:
1149 case BT_S: case BT_CR: case BT_LF: 1142 case BT_S: case BT_CR: case BT_LF:
1150 *nextTokPtr = ptr; 1143 *nextTokPtr = ptr;
1151 return tok; 1144 return tok;
1152 #ifdef XML_NS 1145 #ifdef XML_NS
1153 case BT_COLON: 1146 case BT_COLON:
1154 ptr += MINBPC(enc); 1147 ptr += MINBPC(enc);
1155 switch (tok) { 1148 switch (tok) {
1156 case XML_TOK_NAME: 1149 case XML_TOK_NAME:
1157 if (ptr == end) 1150 REQUIRE_CHAR(enc, ptr, end);
1158 return XML_TOK_PARTIAL;
1159 tok = XML_TOK_PREFIXED_NAME; 1151 tok = XML_TOK_PREFIXED_NAME;
1160 switch (BYTE_TYPE(enc, ptr)) { 1152 switch (BYTE_TYPE(enc, ptr)) {
1161 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 1153 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
1162 default: 1154 default:
1163 tok = XML_TOK_NMTOKEN; 1155 tok = XML_TOK_NMTOKEN;
1164 break; 1156 break;
1165 } 1157 }
1166 break; 1158 break;
1167 case XML_TOK_PREFIXED_NAME: 1159 case XML_TOK_PREFIXED_NAME:
1168 tok = XML_TOK_NMTOKEN; 1160 tok = XML_TOK_NMTOKEN;
(...skipping 28 matching lines...) Expand all
1197 } 1189 }
1198 } 1190 }
1199 return -tok; 1191 return -tok;
1200 } 1192 }
1201 1193
1202 static int PTRCALL 1194 static int PTRCALL
1203 PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, 1195 PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr,
1204 const char *end, const char **nextTokPtr) 1196 const char *end, const char **nextTokPtr)
1205 { 1197 {
1206 const char *start; 1198 const char *start;
1207 if (ptr == end) 1199 if (ptr >= end)
1208 return XML_TOK_NONE; 1200 return XML_TOK_NONE;
1201 else if (! HAS_CHAR(enc, ptr, end))
1202 return XML_TOK_PARTIAL;
1209 start = ptr; 1203 start = ptr;
1210 while (ptr != end) { 1204 while (HAS_CHAR(enc, ptr, end)) {
1211 switch (BYTE_TYPE(enc, ptr)) { 1205 switch (BYTE_TYPE(enc, ptr)) {
1212 #define LEAD_CASE(n) \ 1206 #define LEAD_CASE(n) \
1213 case BT_LEAD ## n: ptr += n; break; 1207 case BT_LEAD ## n: ptr += n; break;
1214 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) 1208 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1215 #undef LEAD_CASE 1209 #undef LEAD_CASE
1216 case BT_AMP: 1210 case BT_AMP:
1217 if (ptr == start) 1211 if (ptr == start)
1218 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); 1212 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1219 *nextTokPtr = ptr; 1213 *nextTokPtr = ptr;
1220 return XML_TOK_DATA_CHARS; 1214 return XML_TOK_DATA_CHARS;
1221 case BT_LT: 1215 case BT_LT:
1222 /* this is for inside entity references */ 1216 /* this is for inside entity references */
1223 *nextTokPtr = ptr; 1217 *nextTokPtr = ptr;
1224 return XML_TOK_INVALID; 1218 return XML_TOK_INVALID;
1225 case BT_LF: 1219 case BT_LF:
1226 if (ptr == start) { 1220 if (ptr == start) {
1227 *nextTokPtr = ptr + MINBPC(enc); 1221 *nextTokPtr = ptr + MINBPC(enc);
1228 return XML_TOK_DATA_NEWLINE; 1222 return XML_TOK_DATA_NEWLINE;
1229 } 1223 }
1230 *nextTokPtr = ptr; 1224 *nextTokPtr = ptr;
1231 return XML_TOK_DATA_CHARS; 1225 return XML_TOK_DATA_CHARS;
1232 case BT_CR: 1226 case BT_CR:
1233 if (ptr == start) { 1227 if (ptr == start) {
1234 ptr += MINBPC(enc); 1228 ptr += MINBPC(enc);
1235 if (ptr == end) 1229 if (! HAS_CHAR(enc, ptr, end))
1236 return XML_TOK_TRAILING_CR; 1230 return XML_TOK_TRAILING_CR;
1237 if (BYTE_TYPE(enc, ptr) == BT_LF) 1231 if (BYTE_TYPE(enc, ptr) == BT_LF)
1238 ptr += MINBPC(enc); 1232 ptr += MINBPC(enc);
1239 *nextTokPtr = ptr; 1233 *nextTokPtr = ptr;
1240 return XML_TOK_DATA_NEWLINE; 1234 return XML_TOK_DATA_NEWLINE;
1241 } 1235 }
1242 *nextTokPtr = ptr; 1236 *nextTokPtr = ptr;
1243 return XML_TOK_DATA_CHARS; 1237 return XML_TOK_DATA_CHARS;
1244 case BT_S: 1238 case BT_S:
1245 if (ptr == start) { 1239 if (ptr == start) {
1246 *nextTokPtr = ptr + MINBPC(enc); 1240 *nextTokPtr = ptr + MINBPC(enc);
1247 return XML_TOK_ATTRIBUTE_VALUE_S; 1241 return XML_TOK_ATTRIBUTE_VALUE_S;
1248 } 1242 }
1249 *nextTokPtr = ptr; 1243 *nextTokPtr = ptr;
1250 return XML_TOK_DATA_CHARS; 1244 return XML_TOK_DATA_CHARS;
1251 default: 1245 default:
1252 ptr += MINBPC(enc); 1246 ptr += MINBPC(enc);
1253 break; 1247 break;
1254 } 1248 }
1255 } 1249 }
1256 *nextTokPtr = ptr; 1250 *nextTokPtr = ptr;
1257 return XML_TOK_DATA_CHARS; 1251 return XML_TOK_DATA_CHARS;
1258 } 1252 }
1259 1253
1260 static int PTRCALL 1254 static int PTRCALL
1261 PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, 1255 PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr,
1262 const char *end, const char **nextTokPtr) 1256 const char *end, const char **nextTokPtr)
1263 { 1257 {
1264 const char *start; 1258 const char *start;
1265 if (ptr == end) 1259 if (ptr >= end)
1266 return XML_TOK_NONE; 1260 return XML_TOK_NONE;
1261 else if (! HAS_CHAR(enc, ptr, end))
1262 return XML_TOK_PARTIAL;
1267 start = ptr; 1263 start = ptr;
1268 while (ptr != end) { 1264 while (HAS_CHAR(enc, ptr, end)) {
1269 switch (BYTE_TYPE(enc, ptr)) { 1265 switch (BYTE_TYPE(enc, ptr)) {
1270 #define LEAD_CASE(n) \ 1266 #define LEAD_CASE(n) \
1271 case BT_LEAD ## n: ptr += n; break; 1267 case BT_LEAD ## n: ptr += n; break;
1272 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) 1268 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1273 #undef LEAD_CASE 1269 #undef LEAD_CASE
1274 case BT_AMP: 1270 case BT_AMP:
1275 if (ptr == start) 1271 if (ptr == start)
1276 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); 1272 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1277 *nextTokPtr = ptr; 1273 *nextTokPtr = ptr;
1278 return XML_TOK_DATA_CHARS; 1274 return XML_TOK_DATA_CHARS;
1279 case BT_PERCNT: 1275 case BT_PERCNT:
1280 if (ptr == start) { 1276 if (ptr == start) {
1281 int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc), 1277 int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc),
1282 end, nextTokPtr); 1278 end, nextTokPtr);
1283 return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok; 1279 return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok;
1284 } 1280 }
1285 *nextTokPtr = ptr; 1281 *nextTokPtr = ptr;
1286 return XML_TOK_DATA_CHARS; 1282 return XML_TOK_DATA_CHARS;
1287 case BT_LF: 1283 case BT_LF:
1288 if (ptr == start) { 1284 if (ptr == start) {
1289 *nextTokPtr = ptr + MINBPC(enc); 1285 *nextTokPtr = ptr + MINBPC(enc);
1290 return XML_TOK_DATA_NEWLINE; 1286 return XML_TOK_DATA_NEWLINE;
1291 } 1287 }
1292 *nextTokPtr = ptr; 1288 *nextTokPtr = ptr;
1293 return XML_TOK_DATA_CHARS; 1289 return XML_TOK_DATA_CHARS;
1294 case BT_CR: 1290 case BT_CR:
1295 if (ptr == start) { 1291 if (ptr == start) {
1296 ptr += MINBPC(enc); 1292 ptr += MINBPC(enc);
1297 if (ptr == end) 1293 if (! HAS_CHAR(enc, ptr, end))
1298 return XML_TOK_TRAILING_CR; 1294 return XML_TOK_TRAILING_CR;
1299 if (BYTE_TYPE(enc, ptr) == BT_LF) 1295 if (BYTE_TYPE(enc, ptr) == BT_LF)
1300 ptr += MINBPC(enc); 1296 ptr += MINBPC(enc);
1301 *nextTokPtr = ptr; 1297 *nextTokPtr = ptr;
1302 return XML_TOK_DATA_NEWLINE; 1298 return XML_TOK_DATA_NEWLINE;
1303 } 1299 }
1304 *nextTokPtr = ptr; 1300 *nextTokPtr = ptr;
1305 return XML_TOK_DATA_CHARS; 1301 return XML_TOK_DATA_CHARS;
1306 default: 1302 default:
1307 ptr += MINBPC(enc); 1303 ptr += MINBPC(enc);
(...skipping 11 matching lines...) Expand all
1319 const char *end, const char **nextTokPtr) 1315 const char *end, const char **nextTokPtr)
1320 { 1316 {
1321 int level = 0; 1317 int level = 0;
1322 if (MINBPC(enc) > 1) { 1318 if (MINBPC(enc) > 1) {
1323 size_t n = end - ptr; 1319 size_t n = end - ptr;
1324 if (n & (MINBPC(enc) - 1)) { 1320 if (n & (MINBPC(enc) - 1)) {
1325 n &= ~(MINBPC(enc) - 1); 1321 n &= ~(MINBPC(enc) - 1);
1326 end = ptr + n; 1322 end = ptr + n;
1327 } 1323 }
1328 } 1324 }
1329 while (ptr != end) { 1325 while (HAS_CHAR(enc, ptr, end)) {
1330 switch (BYTE_TYPE(enc, ptr)) { 1326 switch (BYTE_TYPE(enc, ptr)) {
1331 INVALID_CASES(ptr, nextTokPtr) 1327 INVALID_CASES(ptr, nextTokPtr)
1332 case BT_LT: 1328 case BT_LT:
1333 if ((ptr += MINBPC(enc)) == end) 1329 ptr += MINBPC(enc);
1334 return XML_TOK_PARTIAL; 1330 REQUIRE_CHAR(enc, ptr, end);
1335 if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) { 1331 if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) {
1336 if ((ptr += MINBPC(enc)) == end) 1332 ptr += MINBPC(enc);
1337 return XML_TOK_PARTIAL; 1333 REQUIRE_CHAR(enc, ptr, end);
1338 if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) { 1334 if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) {
1339 ++level; 1335 ++level;
1340 ptr += MINBPC(enc); 1336 ptr += MINBPC(enc);
1341 } 1337 }
1342 } 1338 }
1343 break; 1339 break;
1344 case BT_RSQB: 1340 case BT_RSQB:
1345 if ((ptr += MINBPC(enc)) == end) 1341 ptr += MINBPC(enc);
1346 return XML_TOK_PARTIAL; 1342 REQUIRE_CHAR(enc, ptr, end);
1347 if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { 1343 if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
1348 if ((ptr += MINBPC(enc)) == end) 1344 ptr += MINBPC(enc);
1349 return XML_TOK_PARTIAL; 1345 REQUIRE_CHAR(enc, ptr, end);
1350 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { 1346 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
1351 ptr += MINBPC(enc); 1347 ptr += MINBPC(enc);
1352 if (level == 0) { 1348 if (level == 0) {
1353 *nextTokPtr = ptr; 1349 *nextTokPtr = ptr;
1354 return XML_TOK_IGNORE_SECT; 1350 return XML_TOK_IGNORE_SECT;
1355 } 1351 }
1356 --level; 1352 --level;
1357 } 1353 }
1358 } 1354 }
1359 break; 1355 break;
1360 default: 1356 default:
1361 ptr += MINBPC(enc); 1357 ptr += MINBPC(enc);
1362 break; 1358 break;
1363 } 1359 }
1364 } 1360 }
1365 return XML_TOK_PARTIAL; 1361 return XML_TOK_PARTIAL;
1366 } 1362 }
1367 1363
1368 #endif /* XML_DTD */ 1364 #endif /* XML_DTD */
1369 1365
1370 static int PTRCALL 1366 static int PTRCALL
1371 PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, 1367 PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
1372 const char **badPtr) 1368 const char **badPtr)
1373 { 1369 {
1374 ptr += MINBPC(enc); 1370 ptr += MINBPC(enc);
1375 end -= MINBPC(enc); 1371 end -= MINBPC(enc);
1376 for (; ptr != end; ptr += MINBPC(enc)) { 1372 for (; HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
1377 switch (BYTE_TYPE(enc, ptr)) { 1373 switch (BYTE_TYPE(enc, ptr)) {
1378 case BT_DIGIT: 1374 case BT_DIGIT:
1379 case BT_HEX: 1375 case BT_HEX:
1380 case BT_MINUS: 1376 case BT_MINUS:
1381 case BT_APOS: 1377 case BT_APOS:
1382 case BT_LPAR: 1378 case BT_LPAR:
1383 case BT_RPAR: 1379 case BT_RPAR:
1384 case BT_PLUS: 1380 case BT_PLUS:
1385 case BT_COMMA: 1381 case BT_COMMA:
1386 case BT_SOL: 1382 case BT_SOL:
(...skipping 127 matching lines...) Expand 10 before | Expand all | Expand 10 after
1514 return nAtts; 1510 return nAtts;
1515 break; 1511 break;
1516 default: 1512 default:
1517 break; 1513 break;
1518 } 1514 }
1519 } 1515 }
1520 /* not reached */ 1516 /* not reached */
1521 } 1517 }
1522 1518
1523 static int PTRFASTCALL 1519 static int PTRFASTCALL
1524 PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) 1520 PREFIX(charRefNumber)(const ENCODING *UNUSED_P(enc), const char *ptr)
1525 { 1521 {
1526 int result = 0; 1522 int result = 0;
1527 /* skip &# */ 1523 /* skip &# */
1528 ptr += 2*MINBPC(enc); 1524 ptr += 2*MINBPC(enc);
1529 if (CHAR_MATCHES(enc, ptr, ASCII_x)) { 1525 if (CHAR_MATCHES(enc, ptr, ASCII_x)) {
1530 for (ptr += MINBPC(enc); 1526 for (ptr += MINBPC(enc);
1531 !CHAR_MATCHES(enc, ptr, ASCII_SEMI); 1527 !CHAR_MATCHES(enc, ptr, ASCII_SEMI);
1532 ptr += MINBPC(enc)) { 1528 ptr += MINBPC(enc)) {
1533 int c = BYTE_TO_ASCII(enc, ptr); 1529 int c = BYTE_TO_ASCII(enc, ptr);
1534 switch (c) { 1530 switch (c) {
(...skipping 23 matching lines...) Expand all
1558 result *= 10; 1554 result *= 10;
1559 result += (c - ASCII_0); 1555 result += (c - ASCII_0);
1560 if (result >= 0x110000) 1556 if (result >= 0x110000)
1561 return -1; 1557 return -1;
1562 } 1558 }
1563 } 1559 }
1564 return checkCharRefNumber(result); 1560 return checkCharRefNumber(result);
1565 } 1561 }
1566 1562
1567 static int PTRCALL 1563 static int PTRCALL
1568 PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, 1564 PREFIX(predefinedEntityName)(const ENCODING *UNUSED_P(enc), const char *ptr,
1569 const char *end) 1565 const char *end)
1570 { 1566 {
1571 switch ((end - ptr)/MINBPC(enc)) { 1567 switch ((end - ptr)/MINBPC(enc)) {
1572 case 2: 1568 case 2:
1573 if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) { 1569 if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) {
1574 switch (BYTE_TO_ASCII(enc, ptr)) { 1570 switch (BYTE_TO_ASCII(enc, ptr)) {
1575 case ASCII_l: 1571 case ASCII_l:
1576 return ASCII_LT; 1572 return ASCII_LT;
1577 case ASCII_g: 1573 case ASCII_g:
1578 return ASCII_GT; 1574 return ASCII_GT;
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after
1676 return 0; 1672 return 0;
1677 default: 1673 default:
1678 return 1; 1674 return 1;
1679 } 1675 }
1680 } 1676 }
1681 } 1677 }
1682 /* not reached */ 1678 /* not reached */
1683 } 1679 }
1684 1680
1685 static int PTRCALL 1681 static int PTRCALL
1686 PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1, 1682 PREFIX(nameMatchesAscii)(const ENCODING *UNUSED_P(enc), const char *ptr1,
1687 const char *end1, const char *ptr2) 1683 const char *end1, const char *ptr2)
1688 { 1684 {
1689 for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) { 1685 for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
1690 if (ptr1 == end1) 1686 if (end1 - ptr1 < MINBPC(enc))
1691 return 0; 1687 return 0;
1692 if (!CHAR_MATCHES(enc, ptr1, *ptr2)) 1688 if (!CHAR_MATCHES(enc, ptr1, *ptr2))
1693 return 0; 1689 return 0;
1694 } 1690 }
1695 return ptr1 == end1; 1691 return ptr1 == end1;
1696 } 1692 }
1697 1693
1698 static int PTRFASTCALL 1694 static int PTRFASTCALL
1699 PREFIX(nameLength)(const ENCODING *enc, const char *ptr) 1695 PREFIX(nameLength)(const ENCODING *enc, const char *ptr)
1700 { 1696 {
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
1737 } 1733 }
1738 } 1734 }
1739 } 1735 }
1740 1736
1741 static void PTRCALL 1737 static void PTRCALL
1742 PREFIX(updatePosition)(const ENCODING *enc, 1738 PREFIX(updatePosition)(const ENCODING *enc,
1743 const char *ptr, 1739 const char *ptr,
1744 const char *end, 1740 const char *end,
1745 POSITION *pos) 1741 POSITION *pos)
1746 { 1742 {
1747 while (ptr < end) { 1743 while (HAS_CHAR(enc, ptr, end)) {
1748 switch (BYTE_TYPE(enc, ptr)) { 1744 switch (BYTE_TYPE(enc, ptr)) {
1749 #define LEAD_CASE(n) \ 1745 #define LEAD_CASE(n) \
1750 case BT_LEAD ## n: \ 1746 case BT_LEAD ## n: \
1751 ptr += n; \ 1747 ptr += n; \
1752 break; 1748 break;
1753 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) 1749 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1754 #undef LEAD_CASE 1750 #undef LEAD_CASE
1755 case BT_LF: 1751 case BT_LF:
1756 pos->columnNumber = (XML_Size)-1; 1752 pos->columnNumber = (XML_Size)-1;
1757 pos->lineNumber++; 1753 pos->lineNumber++;
1758 ptr += MINBPC(enc); 1754 ptr += MINBPC(enc);
1759 break; 1755 break;
1760 case BT_CR: 1756 case BT_CR:
1761 pos->lineNumber++; 1757 pos->lineNumber++;
1762 ptr += MINBPC(enc); 1758 ptr += MINBPC(enc);
1763 if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF) 1759 if (HAS_CHAR(enc, ptr, end) && BYTE_TYPE(enc, ptr) == BT_LF)
1764 ptr += MINBPC(enc); 1760 ptr += MINBPC(enc);
1765 pos->columnNumber = (XML_Size)-1; 1761 pos->columnNumber = (XML_Size)-1;
1766 break; 1762 break;
1767 default: 1763 default:
1768 ptr += MINBPC(enc); 1764 ptr += MINBPC(enc);
1769 break; 1765 break;
1770 } 1766 }
1771 pos->columnNumber++; 1767 pos->columnNumber++;
1772 } 1768 }
1773 } 1769 }
1774 1770
1775 #undef DO_LEAD_CASE 1771 #undef DO_LEAD_CASE
1776 #undef MULTIBYTE_CASES 1772 #undef MULTIBYTE_CASES
1777 #undef INVALID_CASES 1773 #undef INVALID_CASES
1778 #undef CHECK_NAME_CASE 1774 #undef CHECK_NAME_CASE
1779 #undef CHECK_NAME_CASES 1775 #undef CHECK_NAME_CASES
1780 #undef CHECK_NMSTRT_CASE 1776 #undef CHECK_NMSTRT_CASE
1781 #undef CHECK_NMSTRT_CASES 1777 #undef CHECK_NMSTRT_CASES
1782 1778
1783 #endif /* XML_TOK_IMPL_C */ 1779 #endif /* XML_TOK_IMPL_C */
OLDNEW
« no previous file with comments | « third_party/expat/files/lib/xmltok.c.origin ('k') | third_party/expat/files/lib/xmltok_impl.c.original » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698