OLD | NEW |
1 /* | 1 /* |
2 * HTMLparser.c : an HTML 4.0 non-verifying parser | 2 * HTMLparser.c : an HTML 4.0 non-verifying parser |
3 * | 3 * |
4 * See Copyright for the status of this software. | 4 * See Copyright for the status of this software. |
5 * | 5 * |
6 * daniel@veillard.com | 6 * daniel@veillard.com |
7 */ | 7 */ |
8 | 8 |
9 #define IN_LIBXML | 9 #define IN_LIBXML |
10 #include "libxml.h" | 10 #include "libxml.h" |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
52 /* #define DEBUG_PUSH */ | 52 /* #define DEBUG_PUSH */ |
53 | 53 |
54 static int htmlOmittedDefaultValue = 1; | 54 static int htmlOmittedDefaultValue = 1; |
55 | 55 |
56 xmlChar * htmlDecodeEntities(htmlParserCtxtPtr ctxt, int len, | 56 xmlChar * htmlDecodeEntities(htmlParserCtxtPtr ctxt, int len, |
57 xmlChar end, xmlChar end2, xmlChar end3); | 57 xmlChar end, xmlChar end2, xmlChar end3); |
58 static void htmlParseComment(htmlParserCtxtPtr ctxt); | 58 static void htmlParseComment(htmlParserCtxtPtr ctxt); |
59 | 59 |
60 /************************************************************************ | 60 /************************************************************************ |
61 * * | 61 * * |
62 * » » Some factorized error routines» » » » * | 62 *» » Some factorized error routines» » » » * |
63 * * | 63 * * |
64 ************************************************************************/ | 64 ************************************************************************/ |
65 | 65 |
66 /** | 66 /** |
67 * htmlErrMemory: | 67 * htmlErrMemory: |
68 * @ctxt: an HTML parser context | 68 * @ctxt: an HTML parser context |
69 * @extra: extra informations | 69 * @extra: extra informations |
70 * | 70 * |
71 * Handle a redefinition of attribute error | 71 * Handle a redefinition of attribute error |
72 */ | 72 */ |
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
140 ctxt->errNo = error; | 140 ctxt->errNo = error; |
141 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_HTML, error, | 141 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_HTML, error, |
142 XML_ERR_ERROR, NULL, 0, NULL, NULL, | 142 XML_ERR_ERROR, NULL, 0, NULL, NULL, |
143 NULL, val, 0, msg, val); | 143 NULL, val, 0, msg, val); |
144 if (ctxt != NULL) | 144 if (ctxt != NULL) |
145 ctxt->wellFormed = 0; | 145 ctxt->wellFormed = 0; |
146 } | 146 } |
147 | 147 |
148 /************************************************************************ | 148 /************************************************************************ |
149 * * | 149 * * |
150 * » » Parser stacks related functions and macros» » * | 150 *» Parser stacks related functions and macros» » * |
151 * * | 151 * * |
152 ************************************************************************/ | 152 ************************************************************************/ |
153 | 153 |
154 /** | 154 /** |
155 * htmlnamePush: | 155 * htmlnamePush: |
156 * @ctxt: an HTML parser context | 156 * @ctxt: an HTML parser context |
157 * @value: the element name | 157 * @value: the element name |
158 * | 158 * |
159 * Pushes a new element name on top of the name stack | 159 * Pushes a new element name on top of the name stack |
160 * | 160 * |
161 * Returns 0 in case of error, the index in the stack otherwise | 161 * Returns 0 in case of error, the index in the stack otherwise |
162 */ | 162 */ |
163 static int | 163 static int |
164 htmlnamePush(htmlParserCtxtPtr ctxt, const xmlChar * value) | 164 htmlnamePush(htmlParserCtxtPtr ctxt, const xmlChar * value) |
165 { | 165 { |
| 166 if ((ctxt->html < 3) && (xmlStrEqual(value, BAD_CAST "head"))) |
| 167 ctxt->html = 3; |
| 168 if ((ctxt->html < 10) && (xmlStrEqual(value, BAD_CAST "body"))) |
| 169 ctxt->html = 10; |
166 if (ctxt->nameNr >= ctxt->nameMax) { | 170 if (ctxt->nameNr >= ctxt->nameMax) { |
167 ctxt->nameMax *= 2; | 171 ctxt->nameMax *= 2; |
168 ctxt->nameTab = (const xmlChar * *) | 172 ctxt->nameTab = (const xmlChar * *) |
169 xmlRealloc((xmlChar * *)ctxt->nameTab, | 173 xmlRealloc((xmlChar * *)ctxt->nameTab, |
170 ctxt->nameMax * | 174 ctxt->nameMax * |
171 sizeof(ctxt->nameTab[0])); | 175 sizeof(ctxt->nameTab[0])); |
172 if (ctxt->nameTab == NULL) { | 176 if (ctxt->nameTab == NULL) { |
173 htmlErrMemory(ctxt, NULL); | 177 htmlErrMemory(ctxt, NULL); |
174 return (0); | 178 return (0); |
175 } | 179 } |
(...skipping 22 matching lines...) Expand all Loading... |
198 return (NULL); | 202 return (NULL); |
199 if (ctxt->nameNr > 0) | 203 if (ctxt->nameNr > 0) |
200 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; | 204 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; |
201 else | 205 else |
202 ctxt->name = NULL; | 206 ctxt->name = NULL; |
203 ret = ctxt->nameTab[ctxt->nameNr]; | 207 ret = ctxt->nameTab[ctxt->nameNr]; |
204 ctxt->nameTab[ctxt->nameNr] = NULL; | 208 ctxt->nameTab[ctxt->nameNr] = NULL; |
205 return (ret); | 209 return (ret); |
206 } | 210 } |
207 | 211 |
| 212 /** |
| 213 * htmlNodeInfoPush: |
| 214 * @ctxt: an HTML parser context |
| 215 * @value: the node info |
| 216 * |
| 217 * Pushes a new element name on top of the node info stack |
| 218 * |
| 219 * Returns 0 in case of error, the index in the stack otherwise |
| 220 */ |
| 221 static int |
| 222 htmlNodeInfoPush(htmlParserCtxtPtr ctxt, htmlParserNodeInfo *value) |
| 223 { |
| 224 if (ctxt->nodeInfoNr >= ctxt->nodeInfoMax) { |
| 225 if (ctxt->nodeInfoMax == 0) |
| 226 ctxt->nodeInfoMax = 5; |
| 227 ctxt->nodeInfoMax *= 2; |
| 228 ctxt->nodeInfoTab = (htmlParserNodeInfo *) |
| 229 xmlRealloc((htmlParserNodeInfo *)ctxt->nodeInfoTab, |
| 230 ctxt->nodeInfoMax * |
| 231 sizeof(ctxt->nodeInfoTab[0])); |
| 232 if (ctxt->nodeInfoTab == NULL) { |
| 233 htmlErrMemory(ctxt, NULL); |
| 234 return (0); |
| 235 } |
| 236 } |
| 237 ctxt->nodeInfoTab[ctxt->nodeInfoNr] = *value; |
| 238 ctxt->nodeInfo = &ctxt->nodeInfoTab[ctxt->nodeInfoNr]; |
| 239 return (ctxt->nodeInfoNr++); |
| 240 } |
| 241 |
| 242 /** |
| 243 * htmlNodeInfoPop: |
| 244 * @ctxt: an HTML parser context |
| 245 * |
| 246 * Pops the top element name from the node info stack |
| 247 * |
| 248 * Returns 0 in case of error, the pointer to NodeInfo otherwise |
| 249 */ |
| 250 static htmlParserNodeInfo * |
| 251 htmlNodeInfoPop(htmlParserCtxtPtr ctxt) |
| 252 { |
| 253 if (ctxt->nodeInfoNr <= 0) |
| 254 return (NULL); |
| 255 ctxt->nodeInfoNr--; |
| 256 if (ctxt->nodeInfoNr < 0) |
| 257 return (NULL); |
| 258 if (ctxt->nodeInfoNr > 0) |
| 259 ctxt->nodeInfo = &ctxt->nodeInfoTab[ctxt->nodeInfoNr - 1]; |
| 260 else |
| 261 ctxt->nodeInfo = NULL; |
| 262 return &ctxt->nodeInfoTab[ctxt->nodeInfoNr]; |
| 263 } |
| 264 |
208 /* | 265 /* |
209 * Macros for accessing the content. Those should be used only by the parser, | 266 * Macros for accessing the content. Those should be used only by the parser, |
210 * and not exported. | 267 * and not exported. |
211 * | 268 * |
212 * Dirty macros, i.e. one need to make assumption on the context to use them | 269 * Dirty macros, i.e. one need to make assumption on the context to use them |
213 * | 270 * |
214 * CUR_PTR return the current pointer to the xmlChar to be parsed. | 271 * CUR_PTR return the current pointer to the xmlChar to be parsed. |
215 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled | 272 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled |
216 * in ISO-Latin or UTF-8, and the current 16 bit value if compiled | 273 * in ISO-Latin or UTF-8, and the current 16 bit value if compiled |
217 * in UNICODE mode. This should be used internally by the parser | 274 * in UNICODE mode. This should be used internally by the parser |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
256 | 313 |
257 #define SKIP_BLANKS htmlSkipBlankChars(ctxt) | 314 #define SKIP_BLANKS htmlSkipBlankChars(ctxt) |
258 | 315 |
259 /* Inported from XML */ | 316 /* Inported from XML */ |
260 | 317 |
261 /* #define CUR (ctxt->token ? ctxt->token : (int) (*ctxt->input->cur)) */ | 318 /* #define CUR (ctxt->token ? ctxt->token : (int) (*ctxt->input->cur)) */ |
262 #define CUR ((int) (*ctxt->input->cur)) | 319 #define CUR ((int) (*ctxt->input->cur)) |
263 #define NEXT xmlNextChar(ctxt) | 320 #define NEXT xmlNextChar(ctxt) |
264 | 321 |
265 #define RAW (ctxt->token ? -1 : (*ctxt->input->cur)) | 322 #define RAW (ctxt->token ? -1 : (*ctxt->input->cur)) |
266 #define NXT(val) ctxt->input->cur[(val)] | |
267 #define CUR_PTR ctxt->input->cur | |
268 | 323 |
269 | 324 |
270 #define NEXTL(l) do { \ | 325 #define NEXTL(l) do { \ |
271 if (*(ctxt->input->cur) == '\n') { \ | 326 if (*(ctxt->input->cur) == '\n') { \ |
272 ctxt->input->line++; ctxt->input->col = 1; \ | 327 ctxt->input->line++; ctxt->input->col = 1; \ |
273 } else ctxt->input->col++; \ | 328 } else ctxt->input->col++; \ |
274 ctxt->token = 0; ctxt->input->cur += l; ctxt->nbChars++; \ | 329 ctxt->token = 0; ctxt->input->cur += l; ctxt->nbChars++; \ |
275 } while (0) | 330 } while (0) |
276 | 331 |
277 /************ | 332 /************ |
278 \ | 333 \ |
279 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ | 334 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ |
280 if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); | 335 if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); |
281 ************/ | 336 ************/ |
282 | 337 |
283 #define CUR_CHAR(l) htmlCurrentChar(ctxt, &l) | 338 #define CUR_CHAR(l) htmlCurrentChar(ctxt, &l) |
284 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) | 339 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) |
285 | 340 |
286 #define COPY_BUF(l,b,i,v) \ | 341 #define COPY_BUF(l,b,i,v) \ |
287 if (l == 1) b[i++] = (xmlChar) v; \ | 342 if (l == 1) b[i++] = (xmlChar) v; \ |
288 else i += xmlCopyChar(l,&b[i],v) | 343 else i += xmlCopyChar(l,&b[i],v) |
289 | 344 |
290 /** | 345 /** |
| 346 * htmlFindEncoding: |
| 347 * @the HTML parser context |
| 348 * |
| 349 * Ty to find and encoding in the current data available in the input |
| 350 * buffer this is needed to try to switch to the proper encoding when |
| 351 * one face a character error. |
| 352 * That's an heuristic, since it's operating outside of parsing it could |
| 353 * try to use a meta which had been commented out, that's the reason it |
| 354 * should only be used in case of error, not as a default. |
| 355 * |
| 356 * Returns an encoding string or NULL if not found, the string need to |
| 357 * be freed |
| 358 */ |
| 359 static xmlChar * |
| 360 htmlFindEncoding(xmlParserCtxtPtr ctxt) { |
| 361 const xmlChar *start, *cur, *end; |
| 362 |
| 363 if ((ctxt == NULL) || (ctxt->input == NULL) || |
| 364 (ctxt->input->encoding != NULL) || (ctxt->input->buf == NULL) || |
| 365 (ctxt->input->buf->encoder != NULL)) |
| 366 return(NULL); |
| 367 if ((ctxt->input->cur == NULL) || (ctxt->input->end == NULL)) |
| 368 return(NULL); |
| 369 |
| 370 start = ctxt->input->cur; |
| 371 end = ctxt->input->end; |
| 372 /* we also expect the input buffer to be zero terminated */ |
| 373 if (*end != 0) |
| 374 return(NULL); |
| 375 |
| 376 cur = xmlStrcasestr(start, BAD_CAST "HTTP-EQUIV"); |
| 377 if (cur == NULL) |
| 378 return(NULL); |
| 379 cur = xmlStrcasestr(cur, BAD_CAST "CONTENT"); |
| 380 if (cur == NULL) |
| 381 return(NULL); |
| 382 cur = xmlStrcasestr(cur, BAD_CAST "CHARSET="); |
| 383 if (cur == NULL) |
| 384 return(NULL); |
| 385 cur += 8; |
| 386 start = cur; |
| 387 while (((*cur >= 'A') && (*cur <= 'Z')) || |
| 388 ((*cur >= 'a') && (*cur <= 'z')) || |
| 389 ((*cur >= '0') && (*cur <= '9')) || |
| 390 (*cur == '-') || (*cur == '_') || (*cur == ':') || (*cur == '/')) |
| 391 cur++; |
| 392 if (cur == start) |
| 393 return(NULL); |
| 394 return(xmlStrndup(start, cur - start)); |
| 395 } |
| 396 |
| 397 /** |
291 * htmlCurrentChar: | 398 * htmlCurrentChar: |
292 * @ctxt: the HTML parser context | 399 * @ctxt: the HTML parser context |
293 * @len: pointer to the length of the char read | 400 * @len: pointer to the length of the char read |
294 * | 401 * |
295 * The current char value, if using UTF-8 this may actually span multiple | 402 * The current char value, if using UTF-8 this may actually span multiple |
296 * bytes in the input buffer. Implement the end of line normalization: | 403 * bytes in the input buffer. Implement the end of line normalization: |
297 * 2.11 End-of-Line Handling | 404 * 2.11 End-of-Line Handling |
298 * If the encoding is unspecified, in the case we find an ISO-Latin-1 | 405 * If the encoding is unspecified, in the case we find an ISO-Latin-1 |
299 * char, then the encoding converter is plugged in automatically. | 406 * char, then the encoding converter is plugged in automatically. |
300 * | 407 * |
301 * Returns the current char value and its length | 408 * Returns the current char value and its length |
302 */ | 409 */ |
303 | 410 |
304 static int | 411 static int |
305 htmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { | 412 htmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { |
306 if (ctxt->instate == XML_PARSER_EOF) | 413 if (ctxt->instate == XML_PARSER_EOF) |
307 return(0); | 414 return(0); |
308 | 415 |
309 if (ctxt->token != 0) { | 416 if (ctxt->token != 0) { |
310 *len = 0; | 417 *len = 0; |
311 return(ctxt->token); | 418 return(ctxt->token); |
312 }» | 419 } |
313 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { | 420 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { |
314 /* | 421 /* |
315 * We are supposed to handle UTF8, check it's valid | 422 * We are supposed to handle UTF8, check it's valid |
316 * From rfc2044: encoding of the Unicode values on UTF-8: | 423 * From rfc2044: encoding of the Unicode values on UTF-8: |
317 * | 424 * |
318 * UCS-4 range (hex.) UTF-8 octet sequence (binary) | 425 * UCS-4 range (hex.) UTF-8 octet sequence (binary) |
319 * 0000 0000-0000 007F 0xxxxxxx | 426 * 0000 0000-0000 007F 0xxxxxxx |
320 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx | 427 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx |
321 » * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx | 428 » * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx |
322 * | 429 * |
323 * Check for the 0x110000 limit too | 430 * Check for the 0x110000 limit too |
324 */ | 431 */ |
325 const unsigned char *cur = ctxt->input->cur; | 432 const unsigned char *cur = ctxt->input->cur; |
326 unsigned char c; | 433 unsigned char c; |
327 unsigned int val; | 434 unsigned int val; |
328 | 435 |
329 c = *cur; | 436 c = *cur; |
330 if (c & 0x80) { | 437 if (c & 0x80) { |
331 » if (cur[1] == 0) | 438 » if (cur[1] == 0) { |
332 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); | 439 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); |
| 440 cur = ctxt->input->cur; |
| 441 } |
333 if ((cur[1] & 0xc0) != 0x80) | 442 if ((cur[1] & 0xc0) != 0x80) |
334 goto encoding_error; | 443 goto encoding_error; |
335 if ((c & 0xe0) == 0xe0) { | 444 if ((c & 0xe0) == 0xe0) { |
336 | 445 |
337 » » if (cur[2] == 0) | 446 » » if (cur[2] == 0) { |
338 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); | 447 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); |
| 448 cur = ctxt->input->cur; |
| 449 } |
339 if ((cur[2] & 0xc0) != 0x80) | 450 if ((cur[2] & 0xc0) != 0x80) |
340 goto encoding_error; | 451 goto encoding_error; |
341 if ((c & 0xf0) == 0xf0) { | 452 if ((c & 0xf0) == 0xf0) { |
342 » » if (cur[3] == 0) | 453 » » if (cur[3] == 0) { |
343 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); | 454 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); |
| 455 cur = ctxt->input->cur; |
| 456 } |
344 if (((c & 0xf8) != 0xf0) || | 457 if (((c & 0xf8) != 0xf0) || |
345 ((cur[3] & 0xc0) != 0x80)) | 458 ((cur[3] & 0xc0) != 0x80)) |
346 goto encoding_error; | 459 goto encoding_error; |
347 /* 4-byte code */ | 460 /* 4-byte code */ |
348 *len = 4; | 461 *len = 4; |
349 val = (cur[0] & 0x7) << 18; | 462 val = (cur[0] & 0x7) << 18; |
350 val |= (cur[1] & 0x3f) << 12; | 463 val |= (cur[1] & 0x3f) << 12; |
351 val |= (cur[2] & 0x3f) << 6; | 464 val |= (cur[2] & 0x3f) << 6; |
352 val |= cur[3] & 0x3f; | 465 val |= cur[3] & 0x3f; |
353 } else { | 466 } else { |
354 /* 3-byte code */ | 467 /* 3-byte code */ |
355 *len = 3; | 468 *len = 3; |
356 val = (cur[0] & 0xf) << 12; | 469 val = (cur[0] & 0xf) << 12; |
357 val |= (cur[1] & 0x3f) << 6; | 470 val |= (cur[1] & 0x3f) << 6; |
358 val |= cur[2] & 0x3f; | 471 val |= cur[2] & 0x3f; |
359 } | 472 } |
360 } else { | 473 } else { |
361 /* 2-byte code */ | 474 /* 2-byte code */ |
362 *len = 2; | 475 *len = 2; |
363 val = (cur[0] & 0x1f) << 6; | 476 val = (cur[0] & 0x1f) << 6; |
364 val |= cur[1] & 0x3f; | 477 val |= cur[1] & 0x3f; |
365 } | 478 } |
366 if (!IS_CHAR(val)) { | 479 if (!IS_CHAR(val)) { |
367 htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR, | 480 htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR, |
368 "Char 0x%X out of allowed range\n", val); | 481 "Char 0x%X out of allowed range\n", val); |
369 » } | 482 » } |
370 return(val); | 483 return(val); |
371 } else { | 484 } else { |
| 485 if ((*ctxt->input->cur == 0) && |
| 486 (ctxt->input->cur < ctxt->input->end)) { |
| 487 htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR, |
| 488 "Char 0x%X out of allowed range\n", 0); |
| 489 *len = 1; |
| 490 return(' '); |
| 491 } |
372 /* 1-byte code */ | 492 /* 1-byte code */ |
373 *len = 1; | 493 *len = 1; |
374 return((int) *ctxt->input->cur); | 494 return((int) *ctxt->input->cur); |
375 } | 495 } |
376 } | 496 } |
377 /* | 497 /* |
378 * Assume it's a fixed length encoding (1) with | 498 * Assume it's a fixed length encoding (1) with |
379 * a compatible encoding for the ASCII set, since | 499 * a compatible encoding for the ASCII set, since |
380 * XML constructs only use < 128 chars | 500 * XML constructs only use < 128 chars |
381 */ | 501 */ |
382 *len = 1; | 502 *len = 1; |
383 if ((int) *ctxt->input->cur < 0x80) | 503 if ((int) *ctxt->input->cur < 0x80) |
384 return((int) *ctxt->input->cur); | 504 return((int) *ctxt->input->cur); |
385 | 505 |
386 /* | 506 /* |
387 * Humm this is bad, do an automatic flow conversion | 507 * Humm this is bad, do an automatic flow conversion |
388 */ | 508 */ |
389 xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1); | 509 { |
390 ctxt->charset = XML_CHAR_ENCODING_UTF8; | 510 xmlChar * guess; |
| 511 xmlCharEncodingHandlerPtr handler; |
| 512 |
| 513 guess = htmlFindEncoding(ctxt); |
| 514 if (guess == NULL) { |
| 515 xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1); |
| 516 } else { |
| 517 if (ctxt->input->encoding != NULL) |
| 518 xmlFree((xmlChar *) ctxt->input->encoding); |
| 519 ctxt->input->encoding = guess; |
| 520 handler = xmlFindCharEncodingHandler((const char *) guess); |
| 521 if (handler != NULL) { |
| 522 xmlSwitchToEncoding(ctxt, handler); |
| 523 } else { |
| 524 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, |
| 525 "Unsupported encoding %s", guess, NULL); |
| 526 } |
| 527 } |
| 528 ctxt->charset = XML_CHAR_ENCODING_UTF8; |
| 529 } |
| 530 |
391 return(xmlCurrentChar(ctxt, len)); | 531 return(xmlCurrentChar(ctxt, len)); |
392 | 532 |
393 encoding_error: | 533 encoding_error: |
394 /* | 534 /* |
395 * If we detect an UTF8 error that probably mean that the | 535 * If we detect an UTF8 error that probably mean that the |
396 * input encoding didn't get properly advertized in the | 536 * input encoding didn't get properly advertized in the |
397 * declaration header. Report the error and switch the encoding | 537 * declaration header. Report the error and switch the encoding |
398 * to ISO-Latin-1 (if you don't like this policy, just declare the | 538 * to ISO-Latin-1 (if you don't like this policy, just declare the |
399 * encoding !) | 539 * encoding !) |
400 */ | 540 */ |
401 { | 541 { |
402 char buffer[150]; | 542 char buffer[150]; |
403 | 543 |
404 if (ctxt->input->end - ctxt->input->cur >= 4) { | 544 if (ctxt->input->end - ctxt->input->cur >= 4) { |
405 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", | 545 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", |
406 ctxt->input->cur[0], ctxt->input->cur[1], | 546 ctxt->input->cur[0], ctxt->input->cur[1], |
407 ctxt->input->cur[2], ctxt->input->cur[3]); | 547 ctxt->input->cur[2], ctxt->input->cur[3]); |
408 } else { | 548 } else { |
409 snprintf(buffer, 149, "Bytes: 0x%02X\n", ctxt->input->cur[0]); | 549 snprintf(buffer, 149, "Bytes: 0x%02X\n", ctxt->input->cur[0]); |
410 } | 550 } |
411 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, | 551 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, |
412 "Input is not proper UTF-8, indicate encoding !\n", | 552 "Input is not proper UTF-8, indicate encoding !\n", |
413 BAD_CAST buffer, NULL); | 553 BAD_CAST buffer, NULL); |
414 } | 554 } |
415 | 555 |
416 ctxt->charset = XML_CHAR_ENCODING_8859_1; | 556 ctxt->charset = XML_CHAR_ENCODING_8859_1; |
417 *len = 1; | 557 *len = 1; |
418 return((int) *ctxt->input->cur); | 558 return((int) *ctxt->input->cur); |
419 } | 559 } |
420 | 560 |
421 /** | 561 /** |
422 * htmlSkipBlankChars: | 562 * htmlSkipBlankChars: |
423 * @ctxt: the HTML parser context | 563 * @ctxt: the HTML parser context |
424 * | 564 * |
425 * skip all blanks character found at that point in the input streams. | 565 * skip all blanks character found at that point in the input streams. |
426 * | 566 * |
(...skipping 19 matching lines...) Expand all Loading... |
446 } | 586 } |
447 res++; | 587 res++; |
448 } | 588 } |
449 return(res); | 589 return(res); |
450 } | 590 } |
451 | 591 |
452 | 592 |
453 | 593 |
454 /************************************************************************ | 594 /************************************************************************ |
455 * * | 595 * * |
456 * » » The list of HTML elements and their properties» » * | 596 *» The list of HTML elements and their properties» » * |
457 * * | 597 * * |
458 ************************************************************************/ | 598 ************************************************************************/ |
459 | 599 |
460 /* | 600 /* |
461 * Start Tag: 1 means the start tag can be ommited | 601 * Start Tag: 1 means the start tag can be ommited |
462 * End Tag: 1 means the end tag can be ommited | 602 * End Tag: 1 means the end tag can be ommited |
463 * 2 means it's forbidden (empty elements) | 603 * 2 means it's forbidden (empty elements) |
464 * 3 means the tag is stylistic and should be closed easily | 604 * 3 means the tag is stylistic and should be closed easily |
465 * Depr: this element is deprecated | 605 * Depr: this element is deprecated |
466 * DTD: 1 means that this element is valid only in the Loose DTD | 606 * DTD: 1 means that this element is valid only in the Loose DTD |
467 * 2 means that this element is valid only in the Frameset DTD | 607 * 2 means that this element is valid only in the Frameset DTD |
468 * | 608 * |
469 * Name,Start Tag,End Tag,Save End,Empty,Deprecated,DTD,inline,Description | 609 * Name,Start Tag,End Tag,Save End,Empty,Deprecated,DTD,inline,Description |
470 , subElements , impliedsubelt , Attributes, userdata | 610 , subElements , impliedsubelt , Attributes, userdata |
471 */ | 611 */ |
472 | 612 |
473 /* Definitions and a couple of vars for HTML Elements */ | 613 /* Definitions and a couple of vars for HTML Elements */ |
474 | 614 |
475 #define FONTSTYLE "tt", "i", "b", "u", "s", "strike", "big", "small" | 615 #define FONTSTYLE "tt", "i", "b", "u", "s", "strike", "big", "small" |
476 #define NB_FONTSTYLE 8 | 616 #define NB_FONTSTYLE 8 |
477 #define PHRASE "em", "strong", "dfn", "code", "samp", "kbd", "var", "cite", "abb
r", "acronym" | 617 #define PHRASE "em", "strong", "dfn", "code", "samp", "kbd", "var", "cite", "abb
r", "acronym" |
478 #define NB_PHRASE 10 | 618 #define NB_PHRASE 10 |
479 #define SPECIAL "a", "img", "applet", "embed", "object", "font", "basefont", "br
", "script", "map", "q", "sub", "sup", "span", "bdo", "iframe" | 619 #define SPECIAL "a", "img", "applet", "embed", "object", "font", "basefont", "br
", "script", "map", "q", "sub", "sup", "span", "bdo", "iframe" |
480 #define NB_SPECIAL 16 | 620 #define NB_SPECIAL 16 |
481 #define INLINE PCDATA FONTSTYLE PHRASE SPECIAL FORMCTRL | 621 #define INLINE FONTSTYLE, PHRASE, SPECIAL, FORMCTRL |
482 #define NB_INLINE NB_PCDATA + NB_FONTSTYLE + NB_PHRASE + NB_SPECIAL + NB_FORMCTR
L | 622 #define NB_INLINE NB_PCDATA + NB_FONTSTYLE + NB_PHRASE + NB_SPECIAL + NB_FORMCTR
L |
483 #define BLOCK HEADING, LIST "pre", "p", "dl", "div", "center", "noscript", "nofr
ames", "blockquote", "form", "isindex", "hr", "table", "fieldset", "address" | 623 #define BLOCK HEADING, LIST, "pre", "p", "dl", "div", "center", "noscript", "nof
rames", "blockquote", "form", "isindex", "hr", "table", "fieldset", "address" |
484 #define NB_BLOCK NB_HEADING + NB_LIST + 14 | 624 #define NB_BLOCK NB_HEADING + NB_LIST + 14 |
485 #define FORMCTRL "input", "select", "textarea", "label", "button" | 625 #define FORMCTRL "input", "select", "textarea", "label", "button" |
486 #define NB_FORMCTRL 5 | 626 #define NB_FORMCTRL 5 |
487 #define PCDATA | 627 #define PCDATA |
488 #define NB_PCDATA 0 | 628 #define NB_PCDATA 0 |
489 #define HEADING "h1", "h2", "h3", "h4", "h5", "h6" | 629 #define HEADING "h1", "h2", "h3", "h4", "h5", "h6" |
490 #define NB_HEADING 6 | 630 #define NB_HEADING 6 |
491 #define LIST "ul", "ol", "dir", "menu" | 631 #define LIST "ul", "ol", "dir", "menu" |
492 #define NB_LIST 4 | 632 #define NB_LIST 4 |
493 #define MODIFIER | 633 #define MODIFIER |
(...skipping 105 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
599 static const char* const optgroup_attrs[] = { ATTRS, "disabled", NULL } ; | 739 static const char* const optgroup_attrs[] = { ATTRS, "disabled", NULL } ; |
600 static const char* const option_attrs[] = { ATTRS, "disabled", "label", "selecte
d", "value", NULL } ; | 740 static const char* const option_attrs[] = { ATTRS, "disabled", "label", "selecte
d", "value", NULL } ; |
601 static const char* const param_attrs[] = { "id", "value", "valuetype", "type", N
ULL } ; | 741 static const char* const param_attrs[] = { "id", "value", "valuetype", "type", N
ULL } ; |
602 static const char* const width_attr[] = { "width", NULL } ; | 742 static const char* const width_attr[] = { "width", NULL } ; |
603 static const char* const pre_content[] = { PHRASE, "tt", "i", "b", "u", "s", "st
rike", "a", "br", "script", "map", "q", "span", "bdo", "iframe", NULL } ; | 743 static const char* const pre_content[] = { PHRASE, "tt", "i", "b", "u", "s", "st
rike", "a", "br", "script", "map", "q", "span", "bdo", "iframe", NULL } ; |
604 static const char* const script_attrs[] = { "charset", "src", "defer", "event",
"for", NULL } ; | 744 static const char* const script_attrs[] = { "charset", "src", "defer", "event",
"for", NULL } ; |
605 static const char* const language_attr[] = { "language", NULL } ; | 745 static const char* const language_attr[] = { "language", NULL } ; |
606 static const char* const select_content[] = { "optgroup", "option", NULL } ; | 746 static const char* const select_content[] = { "optgroup", "option", NULL } ; |
607 static const char* const select_attrs[] = { ATTRS, "name", "size", "multiple", "
disabled", "tabindex", "onfocus", "onblur", "onchange", NULL } ; | 747 static const char* const select_attrs[] = { ATTRS, "name", "size", "multiple", "
disabled", "tabindex", "onfocus", "onblur", "onchange", NULL } ; |
608 static const char* const style_attrs[] = { I18N, "media", "title", NULL } ; | 748 static const char* const style_attrs[] = { I18N, "media", "title", NULL } ; |
609 static const char* const table_attrs[] = { ATTRS "summary", "width", "border", "
frame", "rules", "cellspacing", "cellpadding", "datapagesize", NULL } ; | 749 static const char* const table_attrs[] = { ATTRS, "summary", "width", "border",
"frame", "rules", "cellspacing", "cellpadding", "datapagesize", NULL } ; |
610 static const char* const table_depr[] = { "align", "bgcolor", NULL } ; | 750 static const char* const table_depr[] = { "align", "bgcolor", NULL } ; |
611 static const char* const table_contents[] = { "caption", "col", "colgroup", "the
ad", "tfoot", "tbody", "tr", NULL} ; | 751 static const char* const table_contents[] = { "caption", "col", "colgroup", "the
ad", "tfoot", "tbody", "tr", NULL} ; |
612 static const char* const tr_elt[] = { "tr", NULL } ; | 752 static const char* const tr_elt[] = { "tr", NULL } ; |
613 static const char* const talign_attrs[] = { ATTRS, CELLHALIGN, CELLVALIGN, NULL}
; | 753 static const char* const talign_attrs[] = { ATTRS, CELLHALIGN, CELLVALIGN, NULL}
; |
614 static const char* const th_td_depr[] = { "nowrap", "bgcolor", "width", "height"
, NULL } ; | 754 static const char* const th_td_depr[] = { "nowrap", "bgcolor", "width", "height"
, NULL } ; |
615 static const char* const th_td_attr[] = { ATTRS, "abbr", "axis", "headers", "sco
pe", "rowspan", "colspan", CELLHALIGN, CELLVALIGN, NULL } ; | 755 static const char* const th_td_attr[] = { ATTRS, "abbr", "axis", "headers", "sco
pe", "rowspan", "colspan", CELLHALIGN, CELLVALIGN, NULL } ; |
616 static const char* const textarea_attrs[] = { ATTRS, "name", "disabled", "readon
ly", "tabindex", "accesskey", "onfocus", "onblur", "onselect", "onchange", NULL
} ; | 756 static const char* const textarea_attrs[] = { ATTRS, "name", "disabled", "readon
ly", "tabindex", "accesskey", "onfocus", "onblur", "onselect", "onchange", NULL
} ; |
617 static const char* const tr_contents[] = { "th", "td", NULL } ; | 757 static const char* const tr_contents[] = { "th", "td", NULL } ; |
618 static const char* const bgcolor_attr[] = { "bgcolor", NULL } ; | 758 static const char* const bgcolor_attr[] = { "bgcolor", NULL } ; |
619 static const char* const li_elt[] = { "li", NULL } ; | 759 static const char* const li_elt[] = { "li", NULL } ; |
(...skipping 311 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
931 "dl", "p", "dt", "menu", "dir", "address", "pre", "listing", | 1071 "dl", "p", "dt", "menu", "dir", "address", "pre", "listing", |
932 "xmp", "head", NULL, | 1072 "xmp", "head", NULL, |
933 "dt", "p", "menu", "dir", "address", "pre", "listing", "xmp", | 1073 "dt", "p", "menu", "dir", "address", "pre", "listing", "xmp", |
934 "head", "dd", NULL, | 1074 "head", "dd", NULL, |
935 "dd", "p", "menu", "dir", "address", "pre", "listing", "xmp", | 1075 "dd", "p", "menu", "dir", "address", "pre", "listing", "xmp", |
936 "head", "dt", NULL, | 1076 "head", "dt", NULL, |
937 "ul", "p", "head", "ol", "menu", "dir", "address", "pre", | 1077 "ul", "p", "head", "ol", "menu", "dir", "address", "pre", |
938 "listing", "xmp", NULL, | 1078 "listing", "xmp", NULL, |
939 "ol", "p", "head", "ul", NULL, | 1079 "ol", "p", "head", "ul", NULL, |
940 "menu", "p", "head", "ul", NULL, | 1080 "menu", "p", "head", "ul", NULL, |
941 "p",» » "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", NULL, | 1081 "p",» » "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", FONTSTYLE, NULL
, |
942 "div", "p", "head", NULL, | 1082 "div", "p", "head", NULL, |
943 "noscript", "p", "head", NULL, | 1083 "noscript", "p", "head", NULL, |
944 "center", "font", "b", "i", "p", "head", NULL, | 1084 "center", "font", "b", "i", "p", "head", NULL, |
945 "a", "a", NULL, | 1085 "a", "a", NULL, |
946 "caption", "p", NULL, | 1086 "caption", "p", NULL, |
947 "colgroup", "caption", "colgroup", "col", "p", NULL, | 1087 "colgroup", "caption", "colgroup", "col", "p", NULL, |
948 "col", "caption", "col", "p", NULL, | 1088 "col", "caption", "col", "p", NULL, |
949 "table", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", "pre", | 1089 "table", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", "pre", |
950 "listing", "xmp", "a", NULL, | 1090 "listing", "xmp", "a", NULL, |
951 "th", "th", "td", "p", "span", "font", "a", "b", "i", "u", NULL, | 1091 "th", "th", "td", "p", "span", "font", "a", "b", "i", "u", NULL, |
952 "td",» » "th", "td", "p", "span", "font", "a", "b", "i", "u", NULL, | 1092 "td",» » "th", "td", "p", "span", "font", "a", "b", "i", "u", NULL, |
953 "tr", "th", "td", "tr", "caption", "col", "colgroup", "p", NULL, | 1093 "tr", "th", "td", "tr", "caption", "col", "colgroup", "p", NULL, |
954 "thead", "caption", "col", "colgroup", NULL, | 1094 "thead", "caption", "col", "colgroup", NULL, |
955 "tfoot", "th", "td", "tr", "caption", "col", "colgroup", "thead", | 1095 "tfoot", "th", "td", "tr", "caption", "col", "colgroup", "thead", |
956 "tbody", "p", NULL, | 1096 "tbody", "p", NULL, |
957 "tbody", "th", "td", "tr", "caption", "col", "colgroup", "thead", | 1097 "tbody", "th", "td", "tr", "caption", "col", "colgroup", "thead", |
958 "tfoot", "tbody", "p", NULL, | 1098 "tfoot", "tbody", "p", NULL, |
959 "optgroup", "option", NULL, | 1099 "optgroup", "option", NULL, |
960 "option", "option", NULL, | 1100 "option", "option", NULL, |
961 "fieldset", "legend", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", | 1101 "fieldset", "legend", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", |
962 "pre", "listing", "xmp", "a", NULL, | 1102 "pre", "listing", "xmp", "a", NULL, |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1001 "onchange", | 1141 "onchange", |
1002 "onselect" | 1142 "onselect" |
1003 }; | 1143 }; |
1004 | 1144 |
1005 /* | 1145 /* |
1006 * This table is used by the htmlparser to know what to do with | 1146 * This table is used by the htmlparser to know what to do with |
1007 * broken html pages. By assigning different priorities to different | 1147 * broken html pages. By assigning different priorities to different |
1008 * elements the parser can decide how to handle extra endtags. | 1148 * elements the parser can decide how to handle extra endtags. |
1009 * Endtags are only allowed to close elements with lower or equal | 1149 * Endtags are only allowed to close elements with lower or equal |
1010 * priority. | 1150 * priority. |
1011 */ | 1151 */ |
1012 | 1152 |
1013 typedef struct { | 1153 typedef struct { |
1014 const char *name; | 1154 const char *name; |
1015 int priority; | 1155 int priority; |
1016 } elementPriority; | 1156 } elementPriority; |
1017 | 1157 |
1018 static const elementPriority htmlEndPriority[] = { | 1158 static const elementPriority htmlEndPriority[] = { |
1019 {"div", 150}, | 1159 {"div", 150}, |
1020 {"td", 160}, | 1160 {"td", 160}, |
1021 {"th", 160}, | 1161 {"th", 160}, |
1022 {"tr", 170}, | 1162 {"tr", 170}, |
1023 {"thead", 180}, | 1163 {"thead", 180}, |
1024 {"tbody", 180}, | 1164 {"tbody", 180}, |
1025 {"tfoot", 180}, | 1165 {"tfoot", 180}, |
1026 {"table", 190}, | 1166 {"table", 190}, |
1027 {"head", 200}, | 1167 {"head", 200}, |
1028 {"body", 200}, | 1168 {"body", 200}, |
1029 {"html", 220}, | 1169 {"html", 220}, |
1030 {NULL, 100} /* Default priority */ | 1170 {NULL, 100} /* Default priority */ |
1031 }; | 1171 }; |
1032 | 1172 |
1033 static const char** htmlStartCloseIndex[100]; | 1173 static const char** htmlStartCloseIndex[100]; |
1034 static int htmlStartCloseIndexinitialized = 0; | 1174 static int htmlStartCloseIndexinitialized = 0; |
1035 | 1175 |
1036 /************************************************************************ | 1176 /************************************************************************ |
1037 * * | 1177 * * |
1038 * » » functions to handle HTML specific data» » » * | 1178 *» functions to handle HTML specific data» » » * |
1039 * * | 1179 * * |
1040 ************************************************************************/ | 1180 ************************************************************************/ |
1041 | 1181 |
1042 /** | 1182 /** |
1043 * htmlInitAutoClose: | 1183 * htmlInitAutoClose: |
1044 * | 1184 * |
1045 * Initialize the htmlStartCloseIndex for fast lookup of closing tags names. | 1185 * Initialize the htmlStartCloseIndex for fast lookup of closing tags names. |
1046 * This is not reentrant. Call xmlInitParser() once before processing in | 1186 * This is not reentrant. Call xmlInitParser() once before processing in |
1047 * case of use in multithreaded programs. | 1187 * case of use in multithreaded programs. |
1048 */ | 1188 */ |
(...skipping 29 matching lines...) Expand all Loading... |
1078 sizeof(html40ElementTable[0]));i++) { | 1218 sizeof(html40ElementTable[0]));i++) { |
1079 if (!xmlStrcasecmp(tag, BAD_CAST html40ElementTable[i].name)) | 1219 if (!xmlStrcasecmp(tag, BAD_CAST html40ElementTable[i].name)) |
1080 return((htmlElemDescPtr) &html40ElementTable[i]); | 1220 return((htmlElemDescPtr) &html40ElementTable[i]); |
1081 } | 1221 } |
1082 return(NULL); | 1222 return(NULL); |
1083 } | 1223 } |
1084 | 1224 |
1085 /** | 1225 /** |
1086 * htmlGetEndPriority: | 1226 * htmlGetEndPriority: |
1087 * @name: The name of the element to look up the priority for. | 1227 * @name: The name of the element to look up the priority for. |
1088 * | 1228 * |
1089 * Return value: The "endtag" priority. | 1229 * Return value: The "endtag" priority. |
1090 **/ | 1230 **/ |
1091 static int | 1231 static int |
1092 htmlGetEndPriority (const xmlChar *name) { | 1232 htmlGetEndPriority (const xmlChar *name) { |
1093 int i = 0; | 1233 int i = 0; |
1094 | 1234 |
1095 while ((htmlEndPriority[i].name != NULL) && | 1235 while ((htmlEndPriority[i].name != NULL) && |
1096 (!xmlStrEqual((const xmlChar *)htmlEndPriority[i].name, name))) | 1236 (!xmlStrEqual((const xmlChar *)htmlEndPriority[i].name, name))) |
1097 i++; | 1237 i++; |
1098 | 1238 |
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1157 priority = htmlGetEndPriority(newtag); | 1297 priority = htmlGetEndPriority(newtag); |
1158 | 1298 |
1159 for (i = (ctxt->nameNr - 1); i >= 0; i--) { | 1299 for (i = (ctxt->nameNr - 1); i >= 0; i--) { |
1160 | 1300 |
1161 if (xmlStrEqual(newtag, ctxt->nameTab[i])) | 1301 if (xmlStrEqual(newtag, ctxt->nameTab[i])) |
1162 break; | 1302 break; |
1163 /* | 1303 /* |
1164 * A missplaced endtag can only close elements with lower | 1304 * A missplaced endtag can only close elements with lower |
1165 * or equal priority, so if we find an element with higher | 1305 * or equal priority, so if we find an element with higher |
1166 * priority before we find an element with | 1306 * priority before we find an element with |
1167 * matching name, we just ignore this endtag | 1307 * matching name, we just ignore this endtag |
1168 */ | 1308 */ |
1169 if (htmlGetEndPriority(ctxt->nameTab[i]) > priority) | 1309 if (htmlGetEndPriority(ctxt->nameTab[i]) > priority) |
1170 return; | 1310 return; |
1171 } | 1311 } |
1172 if (i < 0) | 1312 if (i < 0) |
1173 return; | 1313 return; |
1174 | 1314 |
1175 while (!xmlStrEqual(newtag, ctxt->name)) { | 1315 while (!xmlStrEqual(newtag, ctxt->name)) { |
1176 info = htmlTagLookup(ctxt->name); | 1316 info = htmlTagLookup(ctxt->name); |
1177 if ((info != NULL) && (info->endTag == 3)) { | 1317 if ((info != NULL) && (info->endTag == 3)) { |
(...skipping 30 matching lines...) Expand all Loading... |
1208 /** | 1348 /** |
1209 * htmlAutoClose: | 1349 * htmlAutoClose: |
1210 * @ctxt: an HTML parser context | 1350 * @ctxt: an HTML parser context |
1211 * @newtag: The new tag name or NULL | 1351 * @newtag: The new tag name or NULL |
1212 * | 1352 * |
1213 * The HTML DTD allows a tag to implicitly close other tags. | 1353 * The HTML DTD allows a tag to implicitly close other tags. |
1214 * The list is kept in htmlStartClose array. This function is | 1354 * The list is kept in htmlStartClose array. This function is |
1215 * called when a new tag has been detected and generates the | 1355 * called when a new tag has been detected and generates the |
1216 * appropriates closes if possible/needed. | 1356 * appropriates closes if possible/needed. |
1217 * If newtag is NULL this mean we are at the end of the resource | 1357 * If newtag is NULL this mean we are at the end of the resource |
1218 * and we should check | 1358 * and we should check |
1219 */ | 1359 */ |
1220 static void | 1360 static void |
1221 htmlAutoClose(htmlParserCtxtPtr ctxt, const xmlChar * newtag) | 1361 htmlAutoClose(htmlParserCtxtPtr ctxt, const xmlChar * newtag) |
1222 { | 1362 { |
1223 while ((newtag != NULL) && (ctxt->name != NULL) && | 1363 while ((newtag != NULL) && (ctxt->name != NULL) && |
1224 (htmlCheckAutoClose(newtag, ctxt->name))) { | 1364 (htmlCheckAutoClose(newtag, ctxt->name))) { |
1225 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) | 1365 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) |
1226 ctxt->sax->endElement(ctxt->userData, ctxt->name); | 1366 ctxt->sax->endElement(ctxt->userData, ctxt->name); |
1227 htmlnamePop(ctxt); | 1367 htmlnamePop(ctxt); |
1228 } | 1368 } |
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1296 * htmlCheckImplied: | 1436 * htmlCheckImplied: |
1297 * @ctxt: an HTML parser context | 1437 * @ctxt: an HTML parser context |
1298 * @newtag: The new tag name | 1438 * @newtag: The new tag name |
1299 * | 1439 * |
1300 * The HTML DTD allows a tag to exists only implicitly | 1440 * The HTML DTD allows a tag to exists only implicitly |
1301 * called when a new tag has been detected and generates the | 1441 * called when a new tag has been detected and generates the |
1302 * appropriates implicit tags if missing | 1442 * appropriates implicit tags if missing |
1303 */ | 1443 */ |
1304 static void | 1444 static void |
1305 htmlCheckImplied(htmlParserCtxtPtr ctxt, const xmlChar *newtag) { | 1445 htmlCheckImplied(htmlParserCtxtPtr ctxt, const xmlChar *newtag) { |
| 1446 int i; |
| 1447 |
| 1448 if (ctxt->options & HTML_PARSE_NOIMPLIED) |
| 1449 return; |
1306 if (!htmlOmittedDefaultValue) | 1450 if (!htmlOmittedDefaultValue) |
1307 return; | 1451 return; |
1308 if (xmlStrEqual(newtag, BAD_CAST"html")) | 1452 if (xmlStrEqual(newtag, BAD_CAST"html")) |
1309 return; | 1453 return; |
1310 if (ctxt->nameNr <= 0) { | 1454 if (ctxt->nameNr <= 0) { |
1311 htmlnamePush(ctxt, BAD_CAST"html"); | 1455 htmlnamePush(ctxt, BAD_CAST"html"); |
1312 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) | 1456 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) |
1313 ctxt->sax->startElement(ctxt->userData, BAD_CAST"html", NULL); | 1457 ctxt->sax->startElement(ctxt->userData, BAD_CAST"html", NULL); |
1314 } | 1458 } |
1315 if ((xmlStrEqual(newtag, BAD_CAST"body")) || (xmlStrEqual(newtag, BAD_CAST"h
ead"))) | 1459 if ((xmlStrEqual(newtag, BAD_CAST"body")) || (xmlStrEqual(newtag, BAD_CAST"h
ead"))) |
1316 return; | 1460 return; |
1317 if ((ctxt->nameNr <= 1) && | 1461 if ((ctxt->nameNr <= 1) && |
1318 ((xmlStrEqual(newtag, BAD_CAST"script")) || | 1462 ((xmlStrEqual(newtag, BAD_CAST"script")) || |
1319 (xmlStrEqual(newtag, BAD_CAST"style")) || | 1463 (xmlStrEqual(newtag, BAD_CAST"style")) || |
1320 (xmlStrEqual(newtag, BAD_CAST"meta")) || | 1464 (xmlStrEqual(newtag, BAD_CAST"meta")) || |
1321 (xmlStrEqual(newtag, BAD_CAST"link")) || | 1465 (xmlStrEqual(newtag, BAD_CAST"link")) || |
1322 (xmlStrEqual(newtag, BAD_CAST"title")) || | 1466 (xmlStrEqual(newtag, BAD_CAST"title")) || |
1323 (xmlStrEqual(newtag, BAD_CAST"base")))) { | 1467 (xmlStrEqual(newtag, BAD_CAST"base")))) { |
1324 » /* | 1468 if (ctxt->html >= 3) { |
1325 » * dropped OBJECT ... i you put it first BODY will be | 1469 /* we already saw or generated an <head> before */ |
1326 » * assumed ! | 1470 return; |
1327 » */ | 1471 } |
1328 » htmlnamePush(ctxt, BAD_CAST"head"); | 1472 /* |
1329 » if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) | 1473 * dropped OBJECT ... i you put it first BODY will be |
1330 » » ctxt->sax->startElement(ctxt->userData, BAD_CAST"head", NULL); | 1474 * assumed ! |
| 1475 */ |
| 1476 htmlnamePush(ctxt, BAD_CAST"head"); |
| 1477 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) |
| 1478 ctxt->sax->startElement(ctxt->userData, BAD_CAST"head", NULL); |
1331 } else if ((!xmlStrEqual(newtag, BAD_CAST"noframes")) && | 1479 } else if ((!xmlStrEqual(newtag, BAD_CAST"noframes")) && |
1332 (!xmlStrEqual(newtag, BAD_CAST"frame")) && | 1480 (!xmlStrEqual(newtag, BAD_CAST"frame")) && |
1333 (!xmlStrEqual(newtag, BAD_CAST"frameset"))) { | 1481 (!xmlStrEqual(newtag, BAD_CAST"frameset"))) { |
1334 » int i; | 1482 if (ctxt->html >= 10) { |
| 1483 /* we already saw or generated a <body> before */ |
| 1484 return; |
| 1485 } |
1335 for (i = 0;i < ctxt->nameNr;i++) { | 1486 for (i = 0;i < ctxt->nameNr;i++) { |
1336 if (xmlStrEqual(ctxt->nameTab[i], BAD_CAST"body")) { | 1487 if (xmlStrEqual(ctxt->nameTab[i], BAD_CAST"body")) { |
1337 return; | 1488 return; |
1338 } | 1489 } |
1339 if (xmlStrEqual(ctxt->nameTab[i], BAD_CAST"head")) { | 1490 if (xmlStrEqual(ctxt->nameTab[i], BAD_CAST"head")) { |
1340 return; | 1491 return; |
1341 } | 1492 } |
1342 } | 1493 } |
1343 » | 1494 |
1344 htmlnamePush(ctxt, BAD_CAST"body"); | 1495 htmlnamePush(ctxt, BAD_CAST"body"); |
1345 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) | 1496 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) |
1346 ctxt->sax->startElement(ctxt->userData, BAD_CAST"body", NULL); | 1497 ctxt->sax->startElement(ctxt->userData, BAD_CAST"body", NULL); |
1347 } | 1498 } |
1348 } | 1499 } |
1349 | 1500 |
1350 /** | 1501 /** |
1351 * htmlCheckParagraph | 1502 * htmlCheckParagraph |
1352 * @ctxt: an HTML parser context | 1503 * @ctxt: an HTML parser context |
1353 * | 1504 * |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1395 * | 1546 * |
1396 * Check if an attribute is of content type Script | 1547 * Check if an attribute is of content type Script |
1397 * | 1548 * |
1398 * Returns 1 is the attribute is a script 0 otherwise | 1549 * Returns 1 is the attribute is a script 0 otherwise |
1399 */ | 1550 */ |
1400 int | 1551 int |
1401 htmlIsScriptAttribute(const xmlChar *name) { | 1552 htmlIsScriptAttribute(const xmlChar *name) { |
1402 unsigned int i; | 1553 unsigned int i; |
1403 | 1554 |
1404 if (name == NULL) | 1555 if (name == NULL) |
1405 »return(0); | 1556 return(0); |
1406 /* | 1557 /* |
1407 * all script attributes start with 'on' | 1558 * all script attributes start with 'on' |
1408 */ | 1559 */ |
1409 if ((name[0] != 'o') || (name[1] != 'n')) | 1560 if ((name[0] != 'o') || (name[1] != 'n')) |
1410 »return(0); | 1561 return(0); |
1411 for (i = 0; | 1562 for (i = 0; |
1412 i < sizeof(htmlScriptAttributes)/sizeof(htmlScriptAttributes[0]); | 1563 i < sizeof(htmlScriptAttributes)/sizeof(htmlScriptAttributes[0]); |
1413 i++) { | 1564 i++) { |
1414 if (xmlStrEqual(name, (const xmlChar *) htmlScriptAttributes[i])) | 1565 if (xmlStrEqual(name, (const xmlChar *) htmlScriptAttributes[i])) |
1415 return(1); | 1566 return(1); |
1416 } | 1567 } |
1417 return(0); | 1568 return(0); |
1418 } | 1569 } |
1419 | 1570 |
1420 /************************************************************************ | 1571 /************************************************************************ |
1421 * * | 1572 * * |
1422 * » » The list of HTML predefined entities» » » * | 1573 *» The list of HTML predefined entities» » » * |
1423 * * | 1574 * * |
1424 ************************************************************************/ | 1575 ************************************************************************/ |
1425 | 1576 |
1426 | 1577 |
1427 static const htmlEntityDesc html40EntitiesTable[] = { | 1578 static const htmlEntityDesc html40EntitiesTable[] = { |
1428 /* | 1579 /* |
1429 * the 4 absolute ones, plus apostrophe. | 1580 * the 4 absolute ones, plus apostrophe. |
1430 */ | 1581 */ |
1431 { 34, "quot", "quotation mark = APL quote, U+0022 ISOnum" }, | 1582 { 34, "quot", "quotation mark = APL quote, U+0022 ISOnum" }, |
1432 { 38, "amp", "ampersand, U+0026 ISOnum" }, | 1583 { 38, "amp", "ampersand, U+0026 ISOnum" }, |
(...skipping 393 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1826 else if (d < 0xF8) { c= d & 0x07; trailing= 3; } | 1977 else if (d < 0xF8) { c= d & 0x07; trailing= 3; } |
1827 else { | 1978 else { |
1828 /* no chance for this in Ascii */ | 1979 /* no chance for this in Ascii */ |
1829 *outlen = out - outstart; | 1980 *outlen = out - outstart; |
1830 *inlen = processed - instart; | 1981 *inlen = processed - instart; |
1831 return(-2); | 1982 return(-2); |
1832 } | 1983 } |
1833 | 1984 |
1834 if (inend - in < trailing) { | 1985 if (inend - in < trailing) { |
1835 break; | 1986 break; |
1836 » } | 1987 » } |
1837 | 1988 |
1838 for ( ; trailing; trailing--) { | 1989 for ( ; trailing; trailing--) { |
1839 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) | 1990 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) |
1840 break; | 1991 break; |
1841 c <<= 6; | 1992 c <<= 6; |
1842 c |= d & 0x3F; | 1993 c |= d & 0x3F; |
1843 } | 1994 } |
1844 | 1995 |
1845 /* assertion: c is a single UTF-4 value */ | 1996 /* assertion: c is a single UTF-4 value */ |
1846 if (c < 0x80) { | 1997 if (c < 0x80) { |
(...skipping 169 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2016 return(input); | 2167 return(input); |
2017 } | 2168 } |
2018 | 2169 |
2019 | 2170 |
2020 /************************************************************************ | 2171 /************************************************************************ |
2021 * * | 2172 * * |
2022 * Commodity functions, cleanup needed ? * | 2173 * Commodity functions, cleanup needed ? * |
2023 * * | 2174 * * |
2024 ************************************************************************/ | 2175 ************************************************************************/ |
2025 /* | 2176 /* |
2026 * all tags allowing pc data from the html 4.01 loose dtd | 2177 * all tags allowing pc data from the html 4.01 loose dtd |
2027 * NOTE: it might be more apropriate to integrate this information | 2178 * NOTE: it might be more apropriate to integrate this information |
2028 * into the html40ElementTable array but I don't want to risk any | 2179 * into the html40ElementTable array but I don't want to risk any |
2029 * binary incomptibility | 2180 * binary incomptibility |
2030 */ | 2181 */ |
2031 static const char *allowPCData[] = { | 2182 static const char *allowPCData[] = { |
2032 "a", "abbr", "acronym", "address", "applet", "b", "bdo", "big", | 2183 "a", "abbr", "acronym", "address", "applet", "b", "bdo", "big", |
2033 "blockquote", "body", "button", "caption", "center", "cite", "code", | 2184 "blockquote", "body", "button", "caption", "center", "cite", "code", |
2034 "dd", "del", "dfn", "div", "dt", "em", "font", "form", "h1", "h2", | 2185 "dd", "del", "dfn", "div", "dt", "em", "font", "form", "h1", "h2", |
2035 "h3", "h4", "h5", "h6", "i", "iframe", "ins", "kbd", "label", "legend", | 2186 "h3", "h4", "h5", "h6", "i", "iframe", "ins", "kbd", "label", "legend", |
2036 "li", "noframes", "noscript", "object", "p", "pre", "q", "s", "samp", | 2187 "li", "noframes", "noscript", "object", "p", "pre", "q", "s", "samp", |
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2076 } | 2227 } |
2077 } | 2228 } |
2078 | 2229 |
2079 if (ctxt->node == NULL) return(0); | 2230 if (ctxt->node == NULL) return(0); |
2080 lastChild = xmlGetLastChild(ctxt->node); | 2231 lastChild = xmlGetLastChild(ctxt->node); |
2081 while ((lastChild) && (lastChild->type == XML_COMMENT_NODE)) | 2232 while ((lastChild) && (lastChild->type == XML_COMMENT_NODE)) |
2082 lastChild = lastChild->prev; | 2233 lastChild = lastChild->prev; |
2083 if (lastChild == NULL) { | 2234 if (lastChild == NULL) { |
2084 if ((ctxt->node->type != XML_ELEMENT_NODE) && | 2235 if ((ctxt->node->type != XML_ELEMENT_NODE) && |
2085 (ctxt->node->content != NULL)) return(0); | 2236 (ctxt->node->content != NULL)) return(0); |
2086 » /* keep ws in constructs like ...<b> </b>... | 2237 » /* keep ws in constructs like ...<b> </b>... |
2087 for all tags "b" allowing PCDATA */ | 2238 for all tags "b" allowing PCDATA */ |
2088 for ( i = 0; i < sizeof(allowPCData)/sizeof(allowPCData[0]); i++ ) { | 2239 for ( i = 0; i < sizeof(allowPCData)/sizeof(allowPCData[0]); i++ ) { |
2089 if ( xmlStrEqual(ctxt->name, BAD_CAST allowPCData[i]) ) { | 2240 if ( xmlStrEqual(ctxt->name, BAD_CAST allowPCData[i]) ) { |
2090 return(0); | 2241 return(0); |
2091 } | 2242 } |
2092 } | 2243 } |
2093 } else if (xmlNodeIsText(lastChild)) { | 2244 } else if (xmlNodeIsText(lastChild)) { |
2094 return(0); | 2245 return(0); |
2095 } else { | 2246 } else { |
2096 » /* keep ws in constructs like <p><b>xy</b> <i>z</i><p> | 2247 » /* keep ws in constructs like <p><b>xy</b> <i>z</i><p> |
2097 for all tags "p" allowing PCDATA */ | 2248 for all tags "p" allowing PCDATA */ |
2098 for ( i = 0; i < sizeof(allowPCData)/sizeof(allowPCData[0]); i++ ) { | 2249 for ( i = 0; i < sizeof(allowPCData)/sizeof(allowPCData[0]); i++ ) { |
2099 if ( xmlStrEqual(lastChild->name, BAD_CAST allowPCData[i]) ) { | 2250 if ( xmlStrEqual(lastChild->name, BAD_CAST allowPCData[i]) ) { |
2100 return(0); | 2251 return(0); |
2101 } | 2252 } |
2102 } | 2253 } |
2103 } | 2254 } |
2104 return(1); | 2255 return(1); |
2105 } | 2256 } |
2106 | 2257 |
(...skipping 19 matching lines...) Expand all Loading... |
2126 htmlErrMemory(NULL, "HTML document creation failed\n"); | 2277 htmlErrMemory(NULL, "HTML document creation failed\n"); |
2127 return(NULL); | 2278 return(NULL); |
2128 } | 2279 } |
2129 memset(cur, 0, sizeof(xmlDoc)); | 2280 memset(cur, 0, sizeof(xmlDoc)); |
2130 | 2281 |
2131 cur->type = XML_HTML_DOCUMENT_NODE; | 2282 cur->type = XML_HTML_DOCUMENT_NODE; |
2132 cur->version = NULL; | 2283 cur->version = NULL; |
2133 cur->intSubset = NULL; | 2284 cur->intSubset = NULL; |
2134 cur->doc = cur; | 2285 cur->doc = cur; |
2135 cur->name = NULL; | 2286 cur->name = NULL; |
2136 cur->children = NULL; | 2287 cur->children = NULL; |
2137 cur->extSubset = NULL; | 2288 cur->extSubset = NULL; |
2138 cur->oldNs = NULL; | 2289 cur->oldNs = NULL; |
2139 cur->encoding = NULL; | 2290 cur->encoding = NULL; |
2140 cur->standalone = 1; | 2291 cur->standalone = 1; |
2141 cur->compression = 0; | 2292 cur->compression = 0; |
2142 cur->ids = NULL; | 2293 cur->ids = NULL; |
2143 cur->refs = NULL; | 2294 cur->refs = NULL; |
2144 cur->_private = NULL; | 2295 cur->_private = NULL; |
2145 cur->charset = XML_CHAR_ENCODING_UTF8; | 2296 cur->charset = XML_CHAR_ENCODING_UTF8; |
| 2297 cur->properties = XML_DOC_HTML | XML_DOC_USERBUILT; |
2146 if ((ExternalID != NULL) || | 2298 if ((ExternalID != NULL) || |
2147 (URI != NULL)) | 2299 (URI != NULL)) |
2148 xmlCreateIntSubset(cur, BAD_CAST "html", ExternalID, URI); | 2300 xmlCreateIntSubset(cur, BAD_CAST "html", ExternalID, URI); |
2149 return(cur); | 2301 return(cur); |
2150 } | 2302 } |
2151 | 2303 |
2152 /** | 2304 /** |
2153 * htmlNewDoc: | 2305 * htmlNewDoc: |
2154 * @URI: URI for the dtd, or NULL | 2306 * @URI: URI for the dtd, or NULL |
2155 * @ExternalID: the external ID of the DTD, or NULL | 2307 * @ExternalID: the external ID of the DTD, or NULL |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2193 * | 2345 * |
2194 * Returns the Tag Name parsed or NULL | 2346 * Returns the Tag Name parsed or NULL |
2195 */ | 2347 */ |
2196 | 2348 |
2197 static const xmlChar * | 2349 static const xmlChar * |
2198 htmlParseHTMLName(htmlParserCtxtPtr ctxt) { | 2350 htmlParseHTMLName(htmlParserCtxtPtr ctxt) { |
2199 int i = 0; | 2351 int i = 0; |
2200 xmlChar loc[HTML_PARSER_BUFFER_SIZE]; | 2352 xmlChar loc[HTML_PARSER_BUFFER_SIZE]; |
2201 | 2353 |
2202 if (!IS_ASCII_LETTER(CUR) && (CUR != '_') && | 2354 if (!IS_ASCII_LETTER(CUR) && (CUR != '_') && |
2203 (CUR != ':')) return(NULL); | 2355 (CUR != ':') && (CUR != '.')) return(NULL); |
2204 | 2356 |
2205 while ((i < HTML_PARSER_BUFFER_SIZE) && | 2357 while ((i < HTML_PARSER_BUFFER_SIZE) && |
2206 ((IS_ASCII_LETTER(CUR)) || (IS_ASCII_DIGIT(CUR)) || | 2358 ((IS_ASCII_LETTER(CUR)) || (IS_ASCII_DIGIT(CUR)) || |
2207 » (CUR == ':') || (CUR == '-') || (CUR == '_'))) { | 2359 » (CUR == ':') || (CUR == '-') || (CUR == '_') || |
| 2360 (CUR == '.'))) { |
2208 if ((CUR >= 'A') && (CUR <= 'Z')) loc[i] = CUR + 0x20; | 2361 if ((CUR >= 'A') && (CUR <= 'Z')) loc[i] = CUR + 0x20; |
2209 else loc[i] = CUR; | 2362 else loc[i] = CUR; |
2210 i++; | 2363 i++; |
2211 » | 2364 |
2212 NEXT; | 2365 NEXT; |
2213 } | 2366 } |
2214 | 2367 |
2215 return(xmlDictLookup(ctxt->dict, loc, i)); | 2368 return(xmlDictLookup(ctxt->dict, loc, i)); |
2216 } | 2369 } |
2217 | 2370 |
2218 | 2371 |
2219 /** | 2372 /** |
2220 * htmlParseHTMLName_nonInvasive: | 2373 * htmlParseHTMLName_nonInvasive: |
2221 * @ctxt: an HTML parser context | 2374 * @ctxt: an HTML parser context |
2222 * | 2375 * |
2223 * parse an HTML tag or attribute name, note that we convert it to lowercase | 2376 * parse an HTML tag or attribute name, note that we convert it to lowercase |
2224 * since HTML names are not case-sensitive, this doesn't consume the data | 2377 * since HTML names are not case-sensitive, this doesn't consume the data |
2225 * from the stream, it's a look-ahead | 2378 * from the stream, it's a look-ahead |
2226 * | 2379 * |
2227 * Returns the Tag Name parsed or NULL | 2380 * Returns the Tag Name parsed or NULL |
2228 */ | 2381 */ |
2229 | 2382 |
2230 static const xmlChar * | 2383 static const xmlChar * |
2231 htmlParseHTMLName_nonInvasive(htmlParserCtxtPtr ctxt) { | 2384 htmlParseHTMLName_nonInvasive(htmlParserCtxtPtr ctxt) { |
2232 int i = 0; | 2385 int i = 0; |
2233 xmlChar loc[HTML_PARSER_BUFFER_SIZE]; | 2386 xmlChar loc[HTML_PARSER_BUFFER_SIZE]; |
2234 | 2387 |
2235 if (!IS_ASCII_LETTER(NXT(1)) && (NXT(1) != '_') && | 2388 if (!IS_ASCII_LETTER(NXT(1)) && (NXT(1) != '_') && |
2236 (NXT(1) != ':')) return(NULL); | 2389 (NXT(1) != ':')) return(NULL); |
2237 | 2390 |
2238 while ((i < HTML_PARSER_BUFFER_SIZE) && | 2391 while ((i < HTML_PARSER_BUFFER_SIZE) && |
2239 ((IS_ASCII_LETTER(NXT(1+i))) || (IS_ASCII_DIGIT(NXT(1+i))) || | 2392 ((IS_ASCII_LETTER(NXT(1+i))) || (IS_ASCII_DIGIT(NXT(1+i))) || |
2240 (NXT(1+i) == ':') || (NXT(1+i) == '-') || (NXT(1+i) == '_'))) { | 2393 (NXT(1+i) == ':') || (NXT(1+i) == '-') || (NXT(1+i) == '_'))) { |
2241 if ((NXT(1+i) >= 'A') && (NXT(1+i) <= 'Z')) loc[i] = NXT(1+i) + 0x20; | 2394 if ((NXT(1+i) >= 'A') && (NXT(1+i) <= 'Z')) loc[i] = NXT(1+i) + 0x20; |
2242 else loc[i] = NXT(1+i); | 2395 else loc[i] = NXT(1+i); |
2243 i++; | 2396 i++; |
2244 } | 2397 } |
2245 | 2398 |
2246 return(xmlDictLookup(ctxt->dict, loc, i)); | 2399 return(xmlDictLookup(ctxt->dict, loc, i)); |
2247 } | 2400 } |
2248 | 2401 |
2249 | 2402 |
2250 /** | 2403 /** |
2251 * htmlParseName: | 2404 * htmlParseName: |
2252 * @ctxt: an HTML parser context | 2405 * @ctxt: an HTML parser context |
2253 * | 2406 * |
2254 * parse an HTML name, this routine is case sensitive. | 2407 * parse an HTML name, this routine is case sensitive. |
2255 * | 2408 * |
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2303 c = CUR_CHAR(l); | 2456 c = CUR_CHAR(l); |
2304 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ | 2457 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ |
2305 (!IS_LETTER(c) && (c != '_') && | 2458 (!IS_LETTER(c) && (c != '_') && |
2306 (c != ':'))) { | 2459 (c != ':'))) { |
2307 return(NULL); | 2460 return(NULL); |
2308 } | 2461 } |
2309 | 2462 |
2310 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ | 2463 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ |
2311 ((IS_LETTER(c)) || (IS_DIGIT(c)) || | 2464 ((IS_LETTER(c)) || (IS_DIGIT(c)) || |
2312 (c == '.') || (c == '-') || | 2465 (c == '.') || (c == '-') || |
2313 » (c == '_') || (c == ':') || | 2466 » (c == '_') || (c == ':') || |
2314 (IS_COMBINING(c)) || | 2467 (IS_COMBINING(c)) || |
2315 (IS_EXTENDER(c)))) { | 2468 (IS_EXTENDER(c)))) { |
2316 if (count++ > 100) { | 2469 if (count++ > 100) { |
2317 count = 0; | 2470 count = 0; |
2318 GROW; | 2471 GROW; |
2319 } | 2472 } |
2320 len += l; | 2473 len += l; |
2321 NEXTL(l); | 2474 NEXTL(l); |
2322 c = CUR_CHAR(l); | 2475 c = CUR_CHAR(l); |
2323 } | 2476 } |
2324 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); | 2477 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); |
2325 } | 2478 } |
2326 | 2479 |
2327 | 2480 |
2328 /** | 2481 /** |
2329 * htmlParseHTMLAttribute: | 2482 * htmlParseHTMLAttribute: |
2330 * @ctxt: an HTML parser context | 2483 * @ctxt: an HTML parser context |
2331 * @stop: a char stop value | 2484 * @stop: a char stop value |
2332 * | 2485 * |
2333 * parse an HTML attribute value till the stop (quote), if | 2486 * parse an HTML attribute value till the stop (quote), if |
2334 * stop is 0 then it stops at the first space | 2487 * stop is 0 then it stops at the first space |
2335 * | 2488 * |
2336 * Returns the attribute parsed or NULL | 2489 * Returns the attribute parsed or NULL |
2337 */ | 2490 */ |
2338 | 2491 |
2339 static xmlChar * | 2492 static xmlChar * |
2340 htmlParseHTMLAttribute(htmlParserCtxtPtr ctxt, const xmlChar stop) { | 2493 htmlParseHTMLAttribute(htmlParserCtxtPtr ctxt, const xmlChar stop) { |
2341 xmlChar *buffer = NULL; | 2494 xmlChar *buffer = NULL; |
2342 int buffer_size = 0; | 2495 int buffer_size = 0; |
(...skipping 24 matching lines...) Expand all Loading... |
2367 unsigned int c; | 2520 unsigned int c; |
2368 int bits; | 2521 int bits; |
2369 | 2522 |
2370 c = htmlParseCharRef(ctxt); | 2523 c = htmlParseCharRef(ctxt); |
2371 if (c < 0x80) | 2524 if (c < 0x80) |
2372 { *out++ = c; bits= -6; } | 2525 { *out++ = c; bits= -6; } |
2373 else if (c < 0x800) | 2526 else if (c < 0x800) |
2374 { *out++ =((c >> 6) & 0x1F) | 0xC0; bits= 0; } | 2527 { *out++ =((c >> 6) & 0x1F) | 0xC0; bits= 0; } |
2375 else if (c < 0x10000) | 2528 else if (c < 0x10000) |
2376 { *out++ =((c >> 12) & 0x0F) | 0xE0; bits= 6; } | 2529 { *out++ =((c >> 12) & 0x0F) | 0xE0; bits= 6; } |
2377 » » else | 2530 » » else |
2378 { *out++ =((c >> 18) & 0x07) | 0xF0; bits= 12; } | 2531 { *out++ =((c >> 18) & 0x07) | 0xF0; bits= 12; } |
2379 » | 2532 |
2380 for ( ; bits >= 0; bits-= 6) { | 2533 for ( ; bits >= 0; bits-= 6) { |
2381 *out++ = ((c >> bits) & 0x3F) | 0x80; | 2534 *out++ = ((c >> bits) & 0x3F) | 0x80; |
2382 } | 2535 } |
2383 » » | 2536 |
2384 if (out - buffer > buffer_size - 100) { | 2537 if (out - buffer > buffer_size - 100) { |
2385 int indx = out - buffer; | 2538 int indx = out - buffer; |
2386 | 2539 |
2387 growBuffer(buffer); | 2540 growBuffer(buffer); |
2388 out = &buffer[indx]; | 2541 out = &buffer[indx]; |
2389 } | 2542 } |
2390 } else { | 2543 } else { |
2391 ent = htmlParseEntityRef(ctxt, &name); | 2544 ent = htmlParseEntityRef(ctxt, &name); |
2392 if (name == NULL) { | 2545 if (name == NULL) { |
2393 *out++ = '&'; | 2546 *out++ = '&'; |
(...skipping 25 matching lines...) Expand all Loading... |
2419 growBuffer(buffer); | 2572 growBuffer(buffer); |
2420 out = &buffer[indx]; | 2573 out = &buffer[indx]; |
2421 } | 2574 } |
2422 c = ent->value; | 2575 c = ent->value; |
2423 if (c < 0x80) | 2576 if (c < 0x80) |
2424 { *out++ = c; bits= -6; } | 2577 { *out++ = c; bits= -6; } |
2425 else if (c < 0x800) | 2578 else if (c < 0x800) |
2426 { *out++ =((c >> 6) & 0x1F) | 0xC0; bits= 0; } | 2579 { *out++ =((c >> 6) & 0x1F) | 0xC0; bits= 0; } |
2427 else if (c < 0x10000) | 2580 else if (c < 0x10000) |
2428 { *out++ =((c >> 12) & 0x0F) | 0xE0; bits= 6; } | 2581 { *out++ =((c >> 12) & 0x0F) | 0xE0; bits= 6; } |
2429 » » else | 2582 » » else |
2430 { *out++ =((c >> 18) & 0x07) | 0xF0; bits= 12; } | 2583 { *out++ =((c >> 18) & 0x07) | 0xF0; bits= 12; } |
2431 » | 2584 |
2432 for ( ; bits >= 0; bits-= 6) { | 2585 for ( ; bits >= 0; bits-= 6) { |
2433 *out++ = ((c >> bits) & 0x3F) | 0x80; | 2586 *out++ = ((c >> bits) & 0x3F) | 0x80; |
2434 } | 2587 } |
2435 } | 2588 } |
2436 } | 2589 } |
2437 } else { | 2590 } else { |
2438 unsigned int c; | 2591 unsigned int c; |
2439 int bits, l; | 2592 int bits, l; |
2440 | 2593 |
2441 if (out - buffer > buffer_size - 100) { | 2594 if (out - buffer > buffer_size - 100) { |
2442 int indx = out - buffer; | 2595 int indx = out - buffer; |
2443 | 2596 |
2444 growBuffer(buffer); | 2597 growBuffer(buffer); |
2445 out = &buffer[indx]; | 2598 out = &buffer[indx]; |
2446 } | 2599 } |
2447 c = CUR_CHAR(l); | 2600 c = CUR_CHAR(l); |
2448 if (c < 0x80) | 2601 if (c < 0x80) |
2449 { *out++ = c; bits= -6; } | 2602 { *out++ = c; bits= -6; } |
2450 else if (c < 0x800) | 2603 else if (c < 0x800) |
2451 { *out++ =((c >> 6) & 0x1F) | 0xC0; bits= 0; } | 2604 { *out++ =((c >> 6) & 0x1F) | 0xC0; bits= 0; } |
2452 else if (c < 0x10000) | 2605 else if (c < 0x10000) |
2453 { *out++ =((c >> 12) & 0x0F) | 0xE0; bits= 6; } | 2606 { *out++ =((c >> 12) & 0x0F) | 0xE0; bits= 6; } |
2454 » else | 2607 » else |
2455 { *out++ =((c >> 18) & 0x07) | 0xF0; bits= 12; } | 2608 { *out++ =((c >> 18) & 0x07) | 0xF0; bits= 12; } |
2456 | 2609 |
2457 for ( ; bits >= 0; bits-= 6) { | 2610 for ( ; bits >= 0; bits-= 6) { |
2458 *out++ = ((c >> bits) & 0x3F) | 0x80; | 2611 *out++ = ((c >> bits) & 0x3F) | 0x80; |
2459 } | 2612 } |
2460 NEXT; | 2613 NEXT; |
2461 } | 2614 } |
2462 } | 2615 } |
2463 *out++ = 0; | 2616 *out = 0; |
2464 return(buffer); | 2617 return(buffer); |
2465 } | 2618 } |
2466 | 2619 |
2467 /** | 2620 /** |
2468 * htmlParseEntityRef: | 2621 * htmlParseEntityRef: |
2469 * @ctxt: an HTML parser context | 2622 * @ctxt: an HTML parser context |
2470 * @str: location to store the entity name | 2623 * @str: location to store the entity name |
2471 * | 2624 * |
2472 * parse an HTML ENTITY references | 2625 * parse an HTML ENTITY references |
2473 * | 2626 * |
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2514 return(ent); | 2667 return(ent); |
2515 } | 2668 } |
2516 | 2669 |
2517 /** | 2670 /** |
2518 * htmlParseAttValue: | 2671 * htmlParseAttValue: |
2519 * @ctxt: an HTML parser context | 2672 * @ctxt: an HTML parser context |
2520 * | 2673 * |
2521 * parse a value for an attribute | 2674 * parse a value for an attribute |
2522 * Note: the parser won't do substitution of entities here, this | 2675 * Note: the parser won't do substitution of entities here, this |
2523 * will be handled later in xmlStringGetNodeList, unless it was | 2676 * will be handled later in xmlStringGetNodeList, unless it was |
2524 * asked for ctxt->replaceEntities != 0 | 2677 * asked for ctxt->replaceEntities != 0 |
2525 * | 2678 * |
2526 * Returns the AttValue parsed or NULL. | 2679 * Returns the AttValue parsed or NULL. |
2527 */ | 2680 */ |
2528 | 2681 |
2529 static xmlChar * | 2682 static xmlChar * |
2530 htmlParseAttValue(htmlParserCtxtPtr ctxt) { | 2683 htmlParseAttValue(htmlParserCtxtPtr ctxt) { |
2531 xmlChar *ret = NULL; | 2684 xmlChar *ret = NULL; |
2532 | 2685 |
2533 if (CUR == '"') { | 2686 if (CUR == '"') { |
2534 NEXT; | 2687 NEXT; |
(...skipping 20 matching lines...) Expand all Loading... |
2555 htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, | 2708 htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, |
2556 "AttValue: no value found\n", NULL, NULL); | 2709 "AttValue: no value found\n", NULL, NULL); |
2557 } | 2710 } |
2558 } | 2711 } |
2559 return(ret); | 2712 return(ret); |
2560 } | 2713 } |
2561 | 2714 |
2562 /** | 2715 /** |
2563 * htmlParseSystemLiteral: | 2716 * htmlParseSystemLiteral: |
2564 * @ctxt: an HTML parser context | 2717 * @ctxt: an HTML parser context |
2565 * | 2718 * |
2566 * parse an HTML Literal | 2719 * parse an HTML Literal |
2567 * | 2720 * |
2568 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") | 2721 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") |
2569 * | 2722 * |
2570 * Returns the SystemLiteral parsed or NULL | 2723 * Returns the SystemLiteral parsed or NULL |
2571 */ | 2724 */ |
2572 | 2725 |
2573 static xmlChar * | 2726 static xmlChar * |
2574 htmlParseSystemLiteral(htmlParserCtxtPtr ctxt) { | 2727 htmlParseSystemLiteral(htmlParserCtxtPtr ctxt) { |
2575 const xmlChar *q; | 2728 const xmlChar *q; |
(...skipping 20 matching lines...) Expand all Loading... |
2596 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, | 2749 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, |
2597 "Unfinished SystemLiteral\n", NULL, NULL); | 2750 "Unfinished SystemLiteral\n", NULL, NULL); |
2598 } else { | 2751 } else { |
2599 ret = xmlStrndup(q, CUR_PTR - q); | 2752 ret = xmlStrndup(q, CUR_PTR - q); |
2600 NEXT; | 2753 NEXT; |
2601 } | 2754 } |
2602 } else { | 2755 } else { |
2603 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, | 2756 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, |
2604 " or ' expected\n", NULL, NULL); | 2757 " or ' expected\n", NULL, NULL); |
2605 } | 2758 } |
2606 | 2759 |
2607 return(ret); | 2760 return(ret); |
2608 } | 2761 } |
2609 | 2762 |
2610 /** | 2763 /** |
2611 * htmlParsePubidLiteral: | 2764 * htmlParsePubidLiteral: |
2612 * @ctxt: an HTML parser context | 2765 * @ctxt: an HTML parser context |
2613 * | 2766 * |
2614 * parse an HTML public literal | 2767 * parse an HTML public literal |
2615 * | 2768 * |
2616 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" | 2769 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" |
(...skipping 28 matching lines...) Expand all Loading... |
2645 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, | 2798 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, |
2646 "Unfinished PubidLiteral\n", NULL, NULL); | 2799 "Unfinished PubidLiteral\n", NULL, NULL); |
2647 } else { | 2800 } else { |
2648 ret = xmlStrndup(q, CUR_PTR - q); | 2801 ret = xmlStrndup(q, CUR_PTR - q); |
2649 NEXT; | 2802 NEXT; |
2650 } | 2803 } |
2651 } else { | 2804 } else { |
2652 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, | 2805 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, |
2653 "PubidLiteral \" or ' expected\n", NULL, NULL); | 2806 "PubidLiteral \" or ' expected\n", NULL, NULL); |
2654 } | 2807 } |
2655 | 2808 |
2656 return(ret); | 2809 return(ret); |
2657 } | 2810 } |
2658 | 2811 |
2659 /** | 2812 /** |
2660 * htmlParseScript: | 2813 * htmlParseScript: |
2661 * @ctxt: an HTML parser context | 2814 * @ctxt: an HTML parser context |
2662 * | 2815 * |
2663 * parse the content of an HTML SCRIPT or STYLE element | 2816 * parse the content of an HTML SCRIPT or STYLE element |
2664 * http://www.w3.org/TR/html4/sgml/dtd.html#Script | 2817 * http://www.w3.org/TR/html4/sgml/dtd.html#Script |
2665 * http://www.w3.org/TR/html4/sgml/dtd.html#StyleSheet | 2818 * http://www.w3.org/TR/html4/sgml/dtd.html#StyleSheet |
(...skipping 26 matching lines...) Expand all Loading... |
2692 * Authors should therefore escape "</" within the content. | 2845 * Authors should therefore escape "</" within the content. |
2693 * Escape mechanisms are specific to each scripting or | 2846 * Escape mechanisms are specific to each scripting or |
2694 * style sheet language. | 2847 * style sheet language. |
2695 * | 2848 * |
2696 * In recovery mode, only break if end tag match the | 2849 * In recovery mode, only break if end tag match the |
2697 * current tag, effectively ignoring all tags inside the | 2850 * current tag, effectively ignoring all tags inside the |
2698 * script/style block and treating the entire block as | 2851 * script/style block and treating the entire block as |
2699 * CDATA. | 2852 * CDATA. |
2700 */ | 2853 */ |
2701 if (ctxt->recovery) { | 2854 if (ctxt->recovery) { |
2702 if (xmlStrncasecmp(ctxt->name, ctxt->input->cur+2, | 2855 if (xmlStrncasecmp(ctxt->name, ctxt->input->cur+2, |
2703 » » » » xmlStrlen(ctxt->name)) == 0) | 2856 » » » » xmlStrlen(ctxt->name)) == 0) |
2704 { | 2857 { |
2705 break; /* while */ | 2858 break; /* while */ |
2706 } else { | 2859 } else { |
2707 htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH, | 2860 htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH, |
2708 "Element %s embeds close tag\n", | 2861 "Element %s embeds close tag\n", |
2709 ctxt->name, NULL); | 2862 ctxt->name, NULL); |
2710 } | 2863 } |
2711 } else { | 2864 } else { |
2712 if (((NXT(2) >= 'A') && (NXT(2) <= 'Z')) || | 2865 if (((NXT(2) >= 'A') && (NXT(2) <= 'Z')) || |
2713 ((NXT(2) >= 'a') && (NXT(2) <= 'z'))) | 2866 ((NXT(2) >= 'a') && (NXT(2) <= 'z'))) |
2714 { | 2867 { |
2715 break; /* while */ | 2868 break; /* while */ |
2716 } | 2869 } |
2717 } | 2870 } |
2718 } | 2871 } |
2719 COPY_BUF(l,buf,nbchar,cur); | 2872 COPY_BUF(l,buf,nbchar,cur); |
2720 if (nbchar >= HTML_PARSER_BIG_BUFFER_SIZE) { | 2873 if (nbchar >= HTML_PARSER_BIG_BUFFER_SIZE) { |
2721 if (ctxt->sax->cdataBlock!= NULL) { | 2874 if (ctxt->sax->cdataBlock!= NULL) { |
2722 /* | 2875 /* |
2723 * Insert as CDATA, which is the same as HTML_PRESERVE_NODE | 2876 * Insert as CDATA, which is the same as HTML_PRESERVE_NODE |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2760 * if we are within a CDATA section ']]>' marks an end of section. | 2913 * if we are within a CDATA section ']]>' marks an end of section. |
2761 * | 2914 * |
2762 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) | 2915 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) |
2763 */ | 2916 */ |
2764 | 2917 |
2765 static void | 2918 static void |
2766 htmlParseCharData(htmlParserCtxtPtr ctxt) { | 2919 htmlParseCharData(htmlParserCtxtPtr ctxt) { |
2767 xmlChar buf[HTML_PARSER_BIG_BUFFER_SIZE + 5]; | 2920 xmlChar buf[HTML_PARSER_BIG_BUFFER_SIZE + 5]; |
2768 int nbchar = 0; | 2921 int nbchar = 0; |
2769 int cur, l; | 2922 int cur, l; |
| 2923 int chunk = 0; |
2770 | 2924 |
2771 SHRINK; | 2925 SHRINK; |
2772 cur = CUR_CHAR(l); | 2926 cur = CUR_CHAR(l); |
2773 while (((cur != '<') || (ctxt->token == '<')) && | 2927 while (((cur != '<') || (ctxt->token == '<')) && |
2774 ((cur != '&') || (ctxt->token == '&')) && | 2928 ((cur != '&') || (ctxt->token == '&')) && |
2775 (cur != 0)) { | 2929 (cur != 0)) { |
2776 if (!(IS_CHAR(cur))) { | 2930 if (!(IS_CHAR(cur))) { |
2777 htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR, | 2931 htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR, |
2778 "Invalid char in CDATA 0x%X\n", cur); | 2932 "Invalid char in CDATA 0x%X\n", cur); |
2779 } else { | 2933 } else { |
2780 COPY_BUF(l,buf,nbchar,cur); | 2934 COPY_BUF(l,buf,nbchar,cur); |
2781 } | 2935 } |
2782 if (nbchar >= HTML_PARSER_BIG_BUFFER_SIZE) { | 2936 if (nbchar >= HTML_PARSER_BIG_BUFFER_SIZE) { |
2783 /* | 2937 /* |
2784 * Ok the segment is to be consumed as chars. | 2938 * Ok the segment is to be consumed as chars. |
2785 */ | 2939 */ |
2786 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { | 2940 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { |
2787 if (areBlanks(ctxt, buf, nbchar)) { | 2941 if (areBlanks(ctxt, buf, nbchar)) { |
2788 if (ctxt->sax->ignorableWhitespace != NULL) | 2942 if (ctxt->sax->ignorableWhitespace != NULL) |
2789 ctxt->sax->ignorableWhitespace(ctxt->userData, | 2943 ctxt->sax->ignorableWhitespace(ctxt->userData, |
2790 buf, nbchar); | 2944 buf, nbchar); |
2791 } else { | 2945 } else { |
2792 htmlCheckParagraph(ctxt); | 2946 htmlCheckParagraph(ctxt); |
2793 if (ctxt->sax->characters != NULL) | 2947 if (ctxt->sax->characters != NULL) |
2794 ctxt->sax->characters(ctxt->userData, buf, nbchar); | 2948 ctxt->sax->characters(ctxt->userData, buf, nbchar); |
2795 } | 2949 } |
2796 } | 2950 } |
2797 nbchar = 0; | 2951 nbchar = 0; |
2798 } | 2952 } |
2799 NEXTL(l); | 2953 NEXTL(l); |
| 2954 chunk++; |
| 2955 if (chunk > HTML_PARSER_BUFFER_SIZE) { |
| 2956 chunk = 0; |
| 2957 SHRINK; |
| 2958 GROW; |
| 2959 } |
2800 cur = CUR_CHAR(l); | 2960 cur = CUR_CHAR(l); |
2801 if (cur == 0) { | 2961 if (cur == 0) { |
2802 SHRINK; | 2962 SHRINK; |
2803 GROW; | 2963 GROW; |
2804 cur = CUR_CHAR(l); | 2964 cur = CUR_CHAR(l); |
2805 } | 2965 } |
2806 } | 2966 } |
2807 if (nbchar != 0) { | 2967 if (nbchar != 0) { |
2808 buf[nbchar] = 0; | 2968 buf[nbchar] = 0; |
2809 | 2969 |
(...skipping 174 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2984 /* | 3144 /* |
2985 * SAX: PI detected. | 3145 * SAX: PI detected. |
2986 */ | 3146 */ |
2987 if ((ctxt->sax) && (!ctxt->disableSAX) && | 3147 if ((ctxt->sax) && (!ctxt->disableSAX) && |
2988 (ctxt->sax->processingInstruction != NULL)) | 3148 (ctxt->sax->processingInstruction != NULL)) |
2989 ctxt->sax->processingInstruction(ctxt->userData, | 3149 ctxt->sax->processingInstruction(ctxt->userData, |
2990 target, buf); | 3150 target, buf); |
2991 } | 3151 } |
2992 xmlFree(buf); | 3152 xmlFree(buf); |
2993 } else { | 3153 } else { |
2994 » htmlParseErr(ctxt, XML_ERR_PI_NOT_STARTED, | 3154 » htmlParseErr(ctxt, XML_ERR_PI_NOT_STARTED, |
2995 "PI is not started correctly", NULL, NULL); | 3155 "PI is not started correctly", NULL, NULL); |
2996 } | 3156 } |
2997 ctxt->instate = state; | 3157 ctxt->instate = state; |
2998 } | 3158 } |
2999 } | 3159 } |
3000 | 3160 |
3001 /** | 3161 /** |
3002 * htmlParseComment: | 3162 * htmlParseComment: |
3003 * @ctxt: an HTML parser context | 3163 * @ctxt: an HTML parser context |
3004 * | 3164 * |
(...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3100 if ((ctxt == NULL) || (ctxt->input == NULL)) { | 3260 if ((ctxt == NULL) || (ctxt->input == NULL)) { |
3101 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, | 3261 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, |
3102 "htmlParseCharRef: context error\n", | 3262 "htmlParseCharRef: context error\n", |
3103 NULL, NULL); | 3263 NULL, NULL); |
3104 return(0); | 3264 return(0); |
3105 } | 3265 } |
3106 if ((CUR == '&') && (NXT(1) == '#') && | 3266 if ((CUR == '&') && (NXT(1) == '#') && |
3107 ((NXT(2) == 'x') || NXT(2) == 'X')) { | 3267 ((NXT(2) == 'x') || NXT(2) == 'X')) { |
3108 SKIP(3); | 3268 SKIP(3); |
3109 while (CUR != ';') { | 3269 while (CUR != ';') { |
3110 » if ((CUR >= '0') && (CUR <= '9')) | 3270 » if ((CUR >= '0') && (CUR <= '9')) |
3111 val = val * 16 + (CUR - '0'); | 3271 val = val * 16 + (CUR - '0'); |
3112 else if ((CUR >= 'a') && (CUR <= 'f')) | 3272 else if ((CUR >= 'a') && (CUR <= 'f')) |
3113 val = val * 16 + (CUR - 'a') + 10; | 3273 val = val * 16 + (CUR - 'a') + 10; |
3114 else if ((CUR >= 'A') && (CUR <= 'F')) | 3274 else if ((CUR >= 'A') && (CUR <= 'F')) |
3115 val = val * 16 + (CUR - 'A') + 10; | 3275 val = val * 16 + (CUR - 'A') + 10; |
3116 else { | 3276 else { |
3117 htmlParseErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, | 3277 htmlParseErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, |
3118 "htmlParseCharRef: missing semicolumn\n", | 3278 "htmlParseCharRef: missing semicolumn\n", |
3119 NULL, NULL); | 3279 NULL, NULL); |
3120 break; | 3280 break; |
3121 } | 3281 } |
3122 NEXT; | 3282 NEXT; |
3123 } | 3283 } |
3124 if (CUR == ';') | 3284 if (CUR == ';') |
3125 NEXT; | 3285 NEXT; |
3126 } else if ((CUR == '&') && (NXT(1) == '#')) { | 3286 } else if ((CUR == '&') && (NXT(1) == '#')) { |
3127 SKIP(2); | 3287 SKIP(2); |
3128 while (CUR != ';') { | 3288 while (CUR != ';') { |
3129 » if ((CUR >= '0') && (CUR <= '9')) | 3289 » if ((CUR >= '0') && (CUR <= '9')) |
3130 val = val * 10 + (CUR - '0'); | 3290 val = val * 10 + (CUR - '0'); |
3131 else { | 3291 else { |
3132 htmlParseErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, | 3292 htmlParseErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, |
3133 "htmlParseCharRef: missing semicolumn\n", | 3293 "htmlParseCharRef: missing semicolumn\n", |
3134 NULL, NULL); | 3294 NULL, NULL); |
3135 break; | 3295 break; |
3136 } | 3296 } |
3137 NEXT; | 3297 NEXT; |
3138 } | 3298 } |
3139 if (CUR == ';') | 3299 if (CUR == ';') |
(...skipping 15 matching lines...) Expand all Loading... |
3155 return(0); | 3315 return(0); |
3156 } | 3316 } |
3157 | 3317 |
3158 | 3318 |
3159 /** | 3319 /** |
3160 * htmlParseDocTypeDecl: | 3320 * htmlParseDocTypeDecl: |
3161 * @ctxt: an HTML parser context | 3321 * @ctxt: an HTML parser context |
3162 * | 3322 * |
3163 * parse a DOCTYPE declaration | 3323 * parse a DOCTYPE declaration |
3164 * | 3324 * |
3165 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? | 3325 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? |
3166 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>' | 3326 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>' |
3167 */ | 3327 */ |
3168 | 3328 |
3169 static void | 3329 static void |
3170 htmlParseDocTypeDecl(htmlParserCtxtPtr ctxt) { | 3330 htmlParseDocTypeDecl(htmlParserCtxtPtr ctxt) { |
3171 const xmlChar *name; | 3331 const xmlChar *name; |
3172 xmlChar *ExternalID = NULL; | 3332 xmlChar *ExternalID = NULL; |
3173 xmlChar *URI = NULL; | 3333 xmlChar *URI = NULL; |
3174 | 3334 |
3175 /* | 3335 /* |
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3259 } | 3419 } |
3260 | 3420 |
3261 /* | 3421 /* |
3262 * read the value | 3422 * read the value |
3263 */ | 3423 */ |
3264 SKIP_BLANKS; | 3424 SKIP_BLANKS; |
3265 if (CUR == '=') { | 3425 if (CUR == '=') { |
3266 NEXT; | 3426 NEXT; |
3267 SKIP_BLANKS; | 3427 SKIP_BLANKS; |
3268 val = htmlParseAttValue(ctxt); | 3428 val = htmlParseAttValue(ctxt); |
3269 } else if (htmlIsBooleanAttr(name)) { | |
3270 /* | |
3271 * assume a minimized attribute | |
3272 */ | |
3273 val = xmlStrdup(name); | |
3274 } | 3429 } |
3275 | 3430 |
3276 *value = val; | 3431 *value = val; |
3277 return(name); | 3432 return(name); |
3278 } | 3433 } |
3279 | 3434 |
3280 /** | 3435 /** |
3281 * htmlCheckEncoding: | 3436 * htmlCheckEncoding: |
3282 * @ctxt: an HTML parser context | 3437 * @ctxt: an HTML parser context |
3283 * @attvalue: the attribute value | 3438 * @attvalue: the attribute value |
3284 * | 3439 * |
3285 * Checks an http-equiv attribute from a Meta tag to detect | 3440 * Checks an http-equiv attribute from a Meta tag to detect |
3286 * the encoding | 3441 * the encoding |
3287 * If a new encoding is detected the parser is switched to decode | 3442 * If a new encoding is detected the parser is switched to decode |
3288 * it and pass UTF8 | 3443 * it and pass UTF8 |
3289 */ | 3444 */ |
3290 static void | 3445 static void |
3291 htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) { | 3446 htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) { |
3292 const xmlChar *encoding; | 3447 const xmlChar *encoding; |
3293 | 3448 |
3294 if ((ctxt == NULL) || (attvalue == NULL)) | 3449 if ((ctxt == NULL) || (attvalue == NULL)) |
3295 return; | 3450 return; |
3296 | 3451 |
3297 /* do not change encoding */» | 3452 /* do not change encoding */ |
3298 if (ctxt->input->encoding != NULL) | 3453 if (ctxt->input->encoding != NULL) |
3299 return; | 3454 return; |
3300 | 3455 |
3301 encoding = xmlStrcasestr(attvalue, BAD_CAST"charset="); | 3456 encoding = xmlStrcasestr(attvalue, BAD_CAST"charset="); |
3302 if (encoding != NULL) { | 3457 if (encoding != NULL) { |
3303 encoding += 8; | 3458 encoding += 8; |
3304 } else { | 3459 } else { |
3305 encoding = xmlStrcasestr(attvalue, BAD_CAST"charset ="); | 3460 encoding = xmlStrcasestr(attvalue, BAD_CAST"charset ="); |
3306 if (encoding != NULL) | 3461 if (encoding != NULL) |
3307 encoding += 9; | 3462 encoding += 9; |
3308 } | 3463 } |
3309 if (encoding != NULL) { | 3464 if (encoding != NULL) { |
3310 xmlCharEncoding enc; | 3465 xmlCharEncoding enc; |
3311 xmlCharEncodingHandlerPtr handler; | 3466 xmlCharEncodingHandlerPtr handler; |
3312 | 3467 |
3313 while ((*encoding == ' ') || (*encoding == '\t')) encoding++; | 3468 while ((*encoding == ' ') || (*encoding == '\t')) encoding++; |
3314 | 3469 |
3315 if (ctxt->input->encoding != NULL) | 3470 if (ctxt->input->encoding != NULL) |
3316 xmlFree((xmlChar *) ctxt->input->encoding); | 3471 xmlFree((xmlChar *) ctxt->input->encoding); |
3317 ctxt->input->encoding = xmlStrdup(encoding); | 3472 ctxt->input->encoding = xmlStrdup(encoding); |
3318 | 3473 |
3319 enc = xmlParseCharEncoding((const char *) encoding); | 3474 enc = xmlParseCharEncoding((const char *) encoding); |
3320 /* | 3475 /* |
3321 * registered set of known encodings | 3476 * registered set of known encodings |
3322 */ | 3477 */ |
3323 if (enc != XML_CHAR_ENCODING_ERROR) { | 3478 if (enc != XML_CHAR_ENCODING_ERROR) { |
3324 » if (((enc == XML_CHAR_ENCODING_UTF16LE) || | 3479 » if (((enc == XML_CHAR_ENCODING_UTF16LE) || |
3325 (enc == XML_CHAR_ENCODING_UTF16BE) || | 3480 (enc == XML_CHAR_ENCODING_UTF16BE) || |
3326 (enc == XML_CHAR_ENCODING_UCS4LE) || | 3481 (enc == XML_CHAR_ENCODING_UCS4LE) || |
3327 (enc == XML_CHAR_ENCODING_UCS4BE)) && | 3482 (enc == XML_CHAR_ENCODING_UCS4BE)) && |
3328 (ctxt->input->buf != NULL) && | 3483 (ctxt->input->buf != NULL) && |
3329 (ctxt->input->buf->encoder == NULL)) { | 3484 (ctxt->input->buf->encoder == NULL)) { |
3330 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, | 3485 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, |
3331 "htmlCheckEncoding: wrong encoding meta\n", | 3486 "htmlCheckEncoding: wrong encoding meta\n", |
3332 NULL, NULL); | 3487 NULL, NULL); |
3333 } else { | 3488 } else { |
3334 xmlSwitchEncoding(ctxt, enc); | 3489 xmlSwitchEncoding(ctxt, enc); |
(...skipping 27 matching lines...) Expand all Loading... |
3362 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder, | 3517 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder, |
3363 ctxt->input->buf->buffer, | 3518 ctxt->input->buf->buffer, |
3364 ctxt->input->buf->raw); | 3519 ctxt->input->buf->raw); |
3365 if (nbchars < 0) { | 3520 if (nbchars < 0) { |
3366 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, | 3521 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, |
3367 "htmlCheckEncoding: encoder error\n", | 3522 "htmlCheckEncoding: encoder error\n", |
3368 NULL, NULL); | 3523 NULL, NULL); |
3369 } | 3524 } |
3370 ctxt->input->base = | 3525 ctxt->input->base = |
3371 ctxt->input->cur = ctxt->input->buf->buffer->content; | 3526 ctxt->input->cur = ctxt->input->buf->buffer->content; |
| 3527 ctxt->input->end = |
| 3528 &ctxt->input->base[ctxt->input->buf->buffer->use]; |
3372 } | 3529 } |
3373 } | 3530 } |
3374 } | 3531 } |
3375 | 3532 |
3376 /** | 3533 /** |
3377 * htmlCheckMeta: | 3534 * htmlCheckMeta: |
3378 * @ctxt: an HTML parser context | 3535 * @ctxt: an HTML parser context |
3379 * @atts: the attributes values | 3536 * @atts: the attributes values |
3380 * | 3537 * |
3381 * Checks an attributes from a Meta tag | 3538 * Checks an attributes from a Meta tag |
(...skipping 20 matching lines...) Expand all Loading... |
3402 att = atts[i++]; | 3559 att = atts[i++]; |
3403 } | 3560 } |
3404 if ((http) && (content != NULL)) | 3561 if ((http) && (content != NULL)) |
3405 htmlCheckEncoding(ctxt, content); | 3562 htmlCheckEncoding(ctxt, content); |
3406 | 3563 |
3407 } | 3564 } |
3408 | 3565 |
3409 /** | 3566 /** |
3410 * htmlParseStartTag: | 3567 * htmlParseStartTag: |
3411 * @ctxt: an HTML parser context | 3568 * @ctxt: an HTML parser context |
3412 * | 3569 * |
3413 * parse a start of tag either for rule element or | 3570 * parse a start of tag either for rule element or |
3414 * EmptyElement. In both case we don't parse the tag closing chars. | 3571 * EmptyElement. In both case we don't parse the tag closing chars. |
3415 * | 3572 * |
3416 * [40] STag ::= '<' Name (S Attribute)* S? '>' | 3573 * [40] STag ::= '<' Name (S Attribute)* S? '>' |
3417 * | 3574 * |
3418 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' | 3575 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' |
3419 * | 3576 * |
3420 * With namespace: | 3577 * With namespace: |
3421 * | 3578 * |
3422 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' | 3579 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' |
3423 * | 3580 * |
3424 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' | 3581 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' |
3425 * | 3582 * |
3426 * Returns 0 in case of success, -1 in case of error and 1 if discarded | 3583 * Returns 0 in case of success, -1 in case of error and 1 if discarded |
3427 */ | 3584 */ |
3428 | 3585 |
3429 static int | 3586 static int |
3430 htmlParseStartTag(htmlParserCtxtPtr ctxt) { | 3587 htmlParseStartTag(htmlParserCtxtPtr ctxt) { |
3431 const xmlChar *name; | 3588 const xmlChar *name; |
3432 const xmlChar *attname; | 3589 const xmlChar *attname; |
3433 xmlChar *attvalue; | 3590 xmlChar *attvalue; |
3434 const xmlChar **atts; | 3591 const xmlChar **atts; |
3435 int nbatts = 0; | 3592 int nbatts = 0; |
3436 int maxatts; | 3593 int maxatts; |
3437 int meta = 0; | 3594 int meta = 0; |
3438 int i; | 3595 int i; |
3439 int discardtag = 0; | 3596 int discardtag = 0; |
3440 | 3597 |
| 3598 if (ctxt->instate == XML_PARSER_EOF) |
| 3599 return(-1); |
3441 if ((ctxt == NULL) || (ctxt->input == NULL)) { | 3600 if ((ctxt == NULL) || (ctxt->input == NULL)) { |
3442 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, | 3601 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, |
3443 "htmlParseStartTag: context error\n", NULL, NULL); | 3602 "htmlParseStartTag: context error\n", NULL, NULL); |
3444 return -1; | 3603 return -1; |
3445 } | 3604 } |
3446 if (CUR != '<') return -1; | 3605 if (CUR != '<') return -1; |
3447 NEXT; | 3606 NEXT; |
3448 | 3607 |
3449 atts = ctxt->atts; | 3608 atts = ctxt->atts; |
3450 maxatts = ctxt->maxatts; | 3609 maxatts = ctxt->maxatts; |
3451 | 3610 |
3452 GROW; | 3611 GROW; |
3453 name = htmlParseHTMLName(ctxt); | 3612 name = htmlParseHTMLName(ctxt); |
3454 if (name == NULL) { | 3613 if (name == NULL) { |
3455 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED, | 3614 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED, |
3456 "htmlParseStartTag: invalid element name\n", | 3615 "htmlParseStartTag: invalid element name\n", |
3457 NULL, NULL); | 3616 NULL, NULL); |
3458 /* Dump the bogus tag like browsers do */ | 3617 /* Dump the bogus tag like browsers do */ |
3459 » while ((IS_CHAR_CH(CUR)) && (CUR != '>')) | 3618 » while ((IS_CHAR_CH(CUR)) && (CUR != '>') && |
| 3619 (ctxt->instate != XML_PARSER_EOF)) |
3460 NEXT; | 3620 NEXT; |
3461 return -1; | 3621 return -1; |
3462 } | 3622 } |
3463 if (xmlStrEqual(name, BAD_CAST"meta")) | 3623 if (xmlStrEqual(name, BAD_CAST"meta")) |
3464 meta = 1; | 3624 meta = 1; |
3465 | 3625 |
3466 /* | 3626 /* |
3467 * Check for auto-closure of HTML elements. | 3627 * Check for auto-closure of HTML elements. |
3468 */ | 3628 */ |
3469 htmlAutoClose(ctxt, name); | 3629 htmlAutoClose(ctxt, name); |
3470 | 3630 |
3471 /* | 3631 /* |
3472 * Check for implied HTML elements. | 3632 * Check for implied HTML elements. |
3473 */ | 3633 */ |
3474 htmlCheckImplied(ctxt, name); | 3634 htmlCheckImplied(ctxt, name); |
3475 | 3635 |
3476 /* | 3636 /* |
3477 * Avoid html at any level > 0, head at any level != 1 | 3637 * Avoid html at any level > 0, head at any level != 1 |
3478 * or any attempt to recurse body | 3638 * or any attempt to recurse body |
3479 */ | 3639 */ |
3480 if ((ctxt->nameNr > 0) && (xmlStrEqual(name, BAD_CAST"html"))) { | 3640 if ((ctxt->nameNr > 0) && (xmlStrEqual(name, BAD_CAST"html"))) { |
3481 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR, | 3641 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR, |
3482 "htmlParseStartTag: misplaced <html> tag\n", | 3642 "htmlParseStartTag: misplaced <html> tag\n", |
3483 name, NULL); | 3643 name, NULL); |
3484 discardtag = 1; | 3644 discardtag = 1; |
| 3645 ctxt->depth++; |
3485 } | 3646 } |
3486 if ((ctxt->nameNr != 1) && | 3647 if ((ctxt->nameNr != 1) && |
3487 (xmlStrEqual(name, BAD_CAST"head"))) { | 3648 (xmlStrEqual(name, BAD_CAST"head"))) { |
3488 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR, | 3649 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR, |
3489 "htmlParseStartTag: misplaced <head> tag\n", | 3650 "htmlParseStartTag: misplaced <head> tag\n", |
3490 name, NULL); | 3651 name, NULL); |
3491 discardtag = 1; | 3652 discardtag = 1; |
| 3653 ctxt->depth++; |
3492 } | 3654 } |
3493 if (xmlStrEqual(name, BAD_CAST"body")) { | 3655 if (xmlStrEqual(name, BAD_CAST"body")) { |
3494 int indx; | 3656 int indx; |
3495 for (indx = 0;indx < ctxt->nameNr;indx++) { | 3657 for (indx = 0;indx < ctxt->nameNr;indx++) { |
3496 if (xmlStrEqual(ctxt->nameTab[indx], BAD_CAST"body")) { | 3658 if (xmlStrEqual(ctxt->nameTab[indx], BAD_CAST"body")) { |
3497 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR, | 3659 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR, |
3498 "htmlParseStartTag: misplaced <body> tag\n", | 3660 "htmlParseStartTag: misplaced <body> tag\n", |
3499 name, NULL); | 3661 name, NULL); |
3500 discardtag = 1; | 3662 discardtag = 1; |
| 3663 ctxt->depth++; |
3501 } | 3664 } |
3502 } | 3665 } |
3503 } | 3666 } |
3504 | 3667 |
3505 /* | 3668 /* |
3506 * Now parse the attributes, it ends up with the ending | 3669 * Now parse the attributes, it ends up with the ending |
3507 * | 3670 * |
3508 * (S Attribute)* S? | 3671 * (S Attribute)* S? |
3509 */ | 3672 */ |
3510 SKIP_BLANKS; | 3673 SKIP_BLANKS; |
3511 while ((IS_CHAR_CH(CUR)) && | 3674 while ((IS_CHAR_CH(CUR)) && |
3512 (CUR != '>') && | 3675 (CUR != '>') && |
3513 ((CUR != '/') || (NXT(1) != '>'))) { | 3676 ((CUR != '/') || (NXT(1) != '>'))) { |
3514 long cons = ctxt->nbChars; | 3677 long cons = ctxt->nbChars; |
3515 | 3678 |
3516 GROW; | 3679 GROW; |
3517 attname = htmlParseAttribute(ctxt, &attvalue); | 3680 attname = htmlParseAttribute(ctxt, &attvalue); |
3518 if (attname != NULL) { | 3681 if (attname != NULL) { |
3519 | 3682 |
3520 /* | 3683 /* |
3521 * Well formedness requires at most one declaration of an attribute | 3684 * Well formedness requires at most one declaration of an attribute |
3522 */ | 3685 */ |
(...skipping 118 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3641 if ((CUR != '<') || (NXT(1) != '/')) { | 3804 if ((CUR != '<') || (NXT(1) != '/')) { |
3642 htmlParseErr(ctxt, XML_ERR_LTSLASH_REQUIRED, | 3805 htmlParseErr(ctxt, XML_ERR_LTSLASH_REQUIRED, |
3643 "htmlParseEndTag: '</' not found\n", NULL, NULL); | 3806 "htmlParseEndTag: '</' not found\n", NULL, NULL); |
3644 return (0); | 3807 return (0); |
3645 } | 3808 } |
3646 SKIP(2); | 3809 SKIP(2); |
3647 | 3810 |
3648 name = htmlParseHTMLName(ctxt); | 3811 name = htmlParseHTMLName(ctxt); |
3649 if (name == NULL) | 3812 if (name == NULL) |
3650 return (0); | 3813 return (0); |
3651 | |
3652 /* | 3814 /* |
3653 * We should definitely be at the ending "S? '>'" part | 3815 * We should definitely be at the ending "S? '>'" part |
3654 */ | 3816 */ |
3655 SKIP_BLANKS; | 3817 SKIP_BLANKS; |
3656 if ((!IS_CHAR_CH(CUR)) || (CUR != '>')) { | 3818 if ((!IS_CHAR_CH(CUR)) || (CUR != '>')) { |
3657 htmlParseErr(ctxt, XML_ERR_GT_REQUIRED, | 3819 htmlParseErr(ctxt, XML_ERR_GT_REQUIRED, |
3658 "End tag : expected '>'\n", NULL, NULL); | 3820 "End tag : expected '>'\n", NULL, NULL); |
3659 if (ctxt->recovery) { | 3821 if (ctxt->recovery) { |
3660 /* | 3822 /* |
3661 * We're not at the ending > !! | 3823 * We're not at the ending > !! |
3662 * Error, unless in recover mode where we search forwards | 3824 * Error, unless in recover mode where we search forwards |
3663 * until we find a > | 3825 * until we find a > |
3664 */ | 3826 */ |
3665 while (CUR != '\0' && CUR != '>') NEXT; | 3827 while (CUR != '\0' && CUR != '>') NEXT; |
3666 NEXT; | 3828 NEXT; |
3667 } | 3829 } |
3668 } else | 3830 } else |
3669 NEXT; | 3831 NEXT; |
3670 | 3832 |
3671 /* | 3833 /* |
| 3834 * if we ignored misplaced tags in htmlParseStartTag don't pop them |
| 3835 * out now. |
| 3836 */ |
| 3837 if ((ctxt->depth > 0) && |
| 3838 (xmlStrEqual(name, BAD_CAST "html") || |
| 3839 xmlStrEqual(name, BAD_CAST "body") || |
| 3840 xmlStrEqual(name, BAD_CAST "head"))) { |
| 3841 ctxt->depth--; |
| 3842 return (0); |
| 3843 } |
| 3844 |
| 3845 /* |
3672 * If the name read is not one of the element in the parsing stack | 3846 * If the name read is not one of the element in the parsing stack |
3673 * then return, it's just an error. | 3847 * then return, it's just an error. |
3674 */ | 3848 */ |
3675 for (i = (ctxt->nameNr - 1); i >= 0; i--) { | 3849 for (i = (ctxt->nameNr - 1); i >= 0; i--) { |
3676 if (xmlStrEqual(name, ctxt->nameTab[i])) | 3850 if (xmlStrEqual(name, ctxt->nameTab[i])) |
3677 break; | 3851 break; |
3678 } | 3852 } |
3679 if (i < 0) { | 3853 if (i < 0) { |
3680 htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH, | 3854 htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH, |
3681 "Unexpected end tag : %s\n", name, NULL); | 3855 "Unexpected end tag : %s\n", name, NULL); |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3715 ret = 0; | 3889 ret = 0; |
3716 } | 3890 } |
3717 | 3891 |
3718 return (ret); | 3892 return (ret); |
3719 } | 3893 } |
3720 | 3894 |
3721 | 3895 |
3722 /** | 3896 /** |
3723 * htmlParseReference: | 3897 * htmlParseReference: |
3724 * @ctxt: an HTML parser context | 3898 * @ctxt: an HTML parser context |
3725 * | 3899 * |
3726 * parse and handle entity references in content, | 3900 * parse and handle entity references in content, |
3727 * this will end-up in a call to character() since this is either a | 3901 * this will end-up in a call to character() since this is either a |
3728 * CharRef, or a predefined entity. | 3902 * CharRef, or a predefined entity. |
3729 */ | 3903 */ |
3730 static void | 3904 static void |
3731 htmlParseReference(htmlParserCtxtPtr ctxt) { | 3905 htmlParseReference(htmlParserCtxtPtr ctxt) { |
3732 const htmlEntityDesc * ent; | 3906 const htmlEntityDesc * ent; |
3733 xmlChar out[6]; | 3907 xmlChar out[6]; |
3734 const xmlChar *name; | 3908 const xmlChar *name; |
3735 if (CUR != '&') return; | 3909 if (CUR != '&') return; |
3736 | 3910 |
3737 if (NXT(1) == '#') { | 3911 if (NXT(1) == '#') { |
3738 unsigned int c; | 3912 unsigned int c; |
3739 int bits, i = 0; | 3913 int bits, i = 0; |
3740 | 3914 |
3741 c = htmlParseCharRef(ctxt); | 3915 c = htmlParseCharRef(ctxt); |
3742 if (c == 0) | 3916 if (c == 0) |
3743 return; | 3917 return; |
3744 | 3918 |
3745 if (c < 0x80) { out[i++]= c; bits= -6; } | 3919 if (c < 0x80) { out[i++]= c; bits= -6; } |
3746 else if (c < 0x800) { out[i++]=((c >> 6) & 0x1F) | 0xC0; bits= 0; } | 3920 else if (c < 0x800) { out[i++]=((c >> 6) & 0x1F) | 0xC0; bits= 0; } |
3747 else if (c < 0x10000) { out[i++]=((c >> 12) & 0x0F) | 0xE0; bits= 6; } | 3921 else if (c < 0x10000) { out[i++]=((c >> 12) & 0x0F) | 0xE0; bits= 6; } |
3748 else { out[i++]=((c >> 18) & 0x07) | 0xF0; bits= 12; } | 3922 else { out[i++]=((c >> 18) & 0x07) | 0xF0; bits= 12; } |
3749 | 3923 |
3750 for ( ; bits >= 0; bits-= 6) { | 3924 for ( ; bits >= 0; bits-= 6) { |
3751 out[i++]= ((c >> bits) & 0x3F) | 0x80; | 3925 out[i++]= ((c >> bits) & 0x3F) | 0x80; |
3752 } | 3926 } |
3753 out[i] = 0; | 3927 out[i] = 0; |
3754 | 3928 |
3755 htmlCheckParagraph(ctxt); | 3929 htmlCheckParagraph(ctxt); |
3756 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) | 3930 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) |
3757 ctxt->sax->characters(ctxt->userData, out, i); | 3931 ctxt->sax->characters(ctxt->userData, out, i); |
3758 } else { | 3932 } else { |
3759 ent = htmlParseEntityRef(ctxt, &name); | 3933 ent = htmlParseEntityRef(ctxt, &name); |
(...skipping 14 matching lines...) Expand all Loading... |
3774 unsigned int c; | 3948 unsigned int c; |
3775 int bits, i = 0; | 3949 int bits, i = 0; |
3776 | 3950 |
3777 c = ent->value; | 3951 c = ent->value; |
3778 if (c < 0x80) | 3952 if (c < 0x80) |
3779 { out[i++]= c; bits= -6; } | 3953 { out[i++]= c; bits= -6; } |
3780 else if (c < 0x800) | 3954 else if (c < 0x800) |
3781 { out[i++]=((c >> 6) & 0x1F) | 0xC0; bits= 0; } | 3955 { out[i++]=((c >> 6) & 0x1F) | 0xC0; bits= 0; } |
3782 else if (c < 0x10000) | 3956 else if (c < 0x10000) |
3783 { out[i++]=((c >> 12) & 0x0F) | 0xE0; bits= 6; } | 3957 { out[i++]=((c >> 12) & 0x0F) | 0xE0; bits= 6; } |
3784 » else | 3958 » else |
3785 { out[i++]=((c >> 18) & 0x07) | 0xF0; bits= 12; } | 3959 { out[i++]=((c >> 18) & 0x07) | 0xF0; bits= 12; } |
3786 | 3960 |
3787 for ( ; bits >= 0; bits-= 6) { | 3961 for ( ; bits >= 0; bits-= 6) { |
3788 out[i++]= ((c >> bits) & 0x3F) | 0x80; | 3962 out[i++]= ((c >> bits) & 0x3F) | 0x80; |
3789 } | 3963 } |
3790 out[i] = 0; | 3964 out[i] = 0; |
3791 | 3965 |
3792 htmlCheckParagraph(ctxt); | 3966 htmlCheckParagraph(ctxt); |
3793 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) | 3967 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) |
3794 ctxt->sax->characters(ctxt->userData, out, i); | 3968 ctxt->sax->characters(ctxt->userData, out, i); |
3795 } | 3969 } |
3796 } | 3970 } |
3797 } | 3971 } |
3798 | 3972 |
3799 /** | 3973 /** |
3800 * htmlParseContent: | 3974 * htmlParseContent: |
3801 * @ctxt: an HTML parser context | 3975 * @ctxt: an HTML parser context |
3802 * | 3976 * |
3803 * Parse a content: comment, sub-element, reference or text. | 3977 * Parse a content: comment, sub-element, reference or text. |
| 3978 * Kept for compatibility with old code |
3804 */ | 3979 */ |
3805 | 3980 |
3806 static void | 3981 static void |
3807 htmlParseContent(htmlParserCtxtPtr ctxt) { | 3982 htmlParseContent(htmlParserCtxtPtr ctxt) { |
3808 xmlChar *currentNode; | 3983 xmlChar *currentNode; |
3809 int depth; | 3984 int depth; |
3810 const xmlChar *name; | 3985 const xmlChar *name; |
3811 | 3986 |
3812 currentNode = xmlStrdup(ctxt->name); | 3987 currentNode = xmlStrdup(ctxt->name); |
3813 depth = ctxt->nameNr; | 3988 depth = ctxt->nameNr; |
3814 while (1) { | 3989 while (1) { |
3815 long cons = ctxt->nbChars; | 3990 long cons = ctxt->nbChars; |
3816 | 3991 |
3817 GROW; | 3992 GROW; |
| 3993 |
| 3994 if (ctxt->instate == XML_PARSER_EOF) |
| 3995 break; |
| 3996 |
3818 /* | 3997 /* |
3819 * Our tag or one of it's parent or children is ending. | 3998 * Our tag or one of it's parent or children is ending. |
3820 */ | 3999 */ |
3821 if ((CUR == '<') && (NXT(1) == '/')) { | 4000 if ((CUR == '<') && (NXT(1) == '/')) { |
3822 if (htmlParseEndTag(ctxt) && | 4001 if (htmlParseEndTag(ctxt) && |
3823 ((currentNode != NULL) || (ctxt->nameNr == 0))) { | 4002 ((currentNode != NULL) || (ctxt->nameNr == 0))) { |
3824 if (currentNode != NULL) | 4003 if (currentNode != NULL) |
3825 xmlFree(currentNode); | 4004 xmlFree(currentNode); |
3826 return; | 4005 return; |
3827 } | 4006 } |
3828 continue; /* while */ | 4007 continue; /* while */ |
3829 } | 4008 } |
3830 | 4009 |
3831 else if ((CUR == '<') && | 4010 else if ((CUR == '<') && |
3832 ((IS_ASCII_LETTER(NXT(1))) || | 4011 ((IS_ASCII_LETTER(NXT(1))) || |
3833 (NXT(1) == '_') || (NXT(1) == ':'))) { | 4012 (NXT(1) == '_') || (NXT(1) == ':'))) { |
3834 name = htmlParseHTMLName_nonInvasive(ctxt); | 4013 name = htmlParseHTMLName_nonInvasive(ctxt); |
3835 if (name == NULL) { | 4014 if (name == NULL) { |
3836 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED, | 4015 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED, |
3837 "htmlParseStartTag: invalid element name\n", | 4016 "htmlParseStartTag: invalid element name\n", |
3838 NULL, NULL); | 4017 NULL, NULL); |
3839 /* Dump the bogus tag like browsers do */ | 4018 /* Dump the bogus tag like browsers do */ |
3840 » while ((IS_CHAR_CH(CUR)) && (CUR != '>')) | 4019 while ((IS_CHAR_CH(CUR)) && (CUR != '>')) |
3841 NEXT; | 4020 NEXT; |
3842 | 4021 |
3843 if (currentNode != NULL) | 4022 if (currentNode != NULL) |
3844 xmlFree(currentNode); | 4023 xmlFree(currentNode); |
3845 return; | 4024 return; |
3846 } | 4025 } |
3847 | 4026 |
3848 if (ctxt->name != NULL) { | 4027 if (ctxt->name != NULL) { |
3849 if (htmlCheckAutoClose(name, ctxt->name) == 1) { | 4028 if (htmlCheckAutoClose(name, ctxt->name) == 1) { |
3850 htmlAutoClose(ctxt, name); | 4029 htmlAutoClose(ctxt, name); |
3851 continue; | 4030 continue; |
3852 } | 4031 } |
3853 » }» | 4032 » } |
3854 } | 4033 } |
3855 | 4034 |
3856 /* | 4035 /* |
3857 * Has this node been popped out during parsing of | 4036 * Has this node been popped out during parsing of |
3858 * the next element | 4037 * the next element |
3859 */ | 4038 */ |
3860 if ((ctxt->nameNr > 0) && (depth >= ctxt->nameNr) && | 4039 if ((ctxt->nameNr > 0) && (depth >= ctxt->nameNr) && |
3861 (!xmlStrEqual(currentNode, ctxt->name))) | 4040 (!xmlStrEqual(currentNode, ctxt->name))) |
3862 { | 4041 { |
3863 if (currentNode != NULL) xmlFree(currentNode); | 4042 if (currentNode != NULL) xmlFree(currentNode); |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3902 | 4081 |
3903 /* | 4082 /* |
3904 * Third case : a sub-element. | 4083 * Third case : a sub-element. |
3905 */ | 4084 */ |
3906 else if (CUR == '<') { | 4085 else if (CUR == '<') { |
3907 htmlParseElement(ctxt); | 4086 htmlParseElement(ctxt); |
3908 } | 4087 } |
3909 | 4088 |
3910 /* | 4089 /* |
3911 * Fourth case : a reference. If if has not been resolved, | 4090 * Fourth case : a reference. If if has not been resolved, |
3912 » * parsing returns it's Name, create the node | 4091 » * parsing returns it's Name, create the node |
3913 */ | 4092 */ |
3914 else if (CUR == '&') { | 4093 else if (CUR == '&') { |
3915 htmlParseReference(ctxt); | 4094 htmlParseReference(ctxt); |
3916 } | 4095 } |
3917 | 4096 |
3918 /* | 4097 /* |
3919 * Fifth case : end of the resource | 4098 * Fifth case : end of the resource |
3920 */ | 4099 */ |
3921 else if (CUR == 0) { | 4100 else if (CUR == 0) { |
3922 htmlAutoCloseOnEnd(ctxt); | 4101 htmlAutoCloseOnEnd(ctxt); |
(...skipping 15 matching lines...) Expand all Loading... |
3938 } | 4117 } |
3939 break; | 4118 break; |
3940 } | 4119 } |
3941 } | 4120 } |
3942 GROW; | 4121 GROW; |
3943 } | 4122 } |
3944 if (currentNode != NULL) xmlFree(currentNode); | 4123 if (currentNode != NULL) xmlFree(currentNode); |
3945 } | 4124 } |
3946 | 4125 |
3947 /** | 4126 /** |
3948 * htmlParseContent: | |
3949 * @ctxt: an HTML parser context | |
3950 * | |
3951 * Parse a content: comment, sub-element, reference or text. | |
3952 */ | |
3953 | |
3954 void | |
3955 __htmlParseContent(void *ctxt) { | |
3956 if (ctxt != NULL) | |
3957 htmlParseContent((htmlParserCtxtPtr) ctxt); | |
3958 } | |
3959 | |
3960 /** | |
3961 * htmlParseElement: | 4127 * htmlParseElement: |
3962 * @ctxt: an HTML parser context | 4128 * @ctxt: an HTML parser context |
3963 * | 4129 * |
3964 * parse an HTML element, this is highly recursive | 4130 * parse an HTML element, this is highly recursive |
| 4131 * this is kept for compatibility with previous code versions |
3965 * | 4132 * |
3966 * [39] element ::= EmptyElemTag | STag content ETag | 4133 * [39] element ::= EmptyElemTag | STag content ETag |
3967 * | 4134 * |
3968 * [41] Attribute ::= Name Eq AttValue | 4135 * [41] Attribute ::= Name Eq AttValue |
3969 */ | 4136 */ |
3970 | 4137 |
3971 void | 4138 void |
3972 htmlParseElement(htmlParserCtxtPtr ctxt) { | 4139 htmlParseElement(htmlParserCtxtPtr ctxt) { |
3973 const xmlChar *name; | 4140 const xmlChar *name; |
3974 xmlChar *currentNode = NULL; | 4141 xmlChar *currentNode = NULL; |
3975 const htmlElemDesc * info; | 4142 const htmlElemDesc * info; |
3976 htmlParserNodeInfo node_info; | 4143 htmlParserNodeInfo node_info; |
3977 int failed; | 4144 int failed; |
3978 int depth; | 4145 int depth; |
3979 const xmlChar *oldptr; | 4146 const xmlChar *oldptr; |
3980 | 4147 |
3981 if ((ctxt == NULL) || (ctxt->input == NULL)) { | 4148 if ((ctxt == NULL) || (ctxt->input == NULL)) { |
3982 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, | 4149 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, |
3983 "htmlParseElement: context error\n", NULL, NULL); | 4150 "htmlParseElement: context error\n", NULL, NULL); |
3984 return; | 4151 return; |
3985 } | 4152 } |
| 4153 |
| 4154 if (ctxt->instate == XML_PARSER_EOF) |
| 4155 return; |
| 4156 |
3986 /* Capture start position */ | 4157 /* Capture start position */ |
3987 if (ctxt->record_info) { | 4158 if (ctxt->record_info) { |
3988 node_info.begin_pos = ctxt->input->consumed + | 4159 node_info.begin_pos = ctxt->input->consumed + |
3989 (CUR_PTR - ctxt->input->base); | 4160 (CUR_PTR - ctxt->input->base); |
3990 node_info.begin_line = ctxt->input->line; | 4161 node_info.begin_line = ctxt->input->line; |
3991 } | 4162 } |
3992 | 4163 |
3993 failed = htmlParseStartTag(ctxt); | 4164 failed = htmlParseStartTag(ctxt); |
3994 name = ctxt->name; | 4165 name = ctxt->name; |
3995 if ((failed == -1) || (name == NULL)) { | 4166 if ((failed == -1) || (name == NULL)) { |
(...skipping 24 matching lines...) Expand all Loading... |
4020 | 4191 |
4021 if (CUR == '>') { | 4192 if (CUR == '>') { |
4022 NEXT; | 4193 NEXT; |
4023 } else { | 4194 } else { |
4024 htmlParseErr(ctxt, XML_ERR_GT_REQUIRED, | 4195 htmlParseErr(ctxt, XML_ERR_GT_REQUIRED, |
4025 "Couldn't find end of Start Tag %s\n", name, NULL); | 4196 "Couldn't find end of Start Tag %s\n", name, NULL); |
4026 | 4197 |
4027 /* | 4198 /* |
4028 * end of parsing of this node. | 4199 * end of parsing of this node. |
4029 */ | 4200 */ |
4030 » if (xmlStrEqual(name, ctxt->name)) { | 4201 » if (xmlStrEqual(name, ctxt->name)) { |
4031 nodePop(ctxt); | 4202 nodePop(ctxt); |
4032 htmlnamePop(ctxt); | 4203 htmlnamePop(ctxt); |
4033 » } | 4204 » } |
4034 | 4205 |
4035 /* | 4206 /* |
4036 * Capture end position and add node | 4207 * Capture end position and add node |
4037 */ | 4208 */ |
4038 if (ctxt->record_info) { | 4209 if (ctxt->record_info) { |
4039 node_info.end_pos = ctxt->input->consumed + | 4210 node_info.end_pos = ctxt->input->consumed + |
4040 (CUR_PTR - ctxt->input->base); | 4211 (CUR_PTR - ctxt->input->base); |
4041 node_info.end_line = ctxt->input->line; | 4212 node_info.end_line = ctxt->input->line; |
4042 node_info.node = ctxt->node; | 4213 node_info.node = ctxt->node; |
4043 xmlParserAddNodeInfo(ctxt, &node_info); | 4214 xmlParserAddNodeInfo(ctxt, &node_info); |
(...skipping 13 matching lines...) Expand all Loading... |
4057 | 4228 |
4058 /* | 4229 /* |
4059 * Parse the content of the element: | 4230 * Parse the content of the element: |
4060 */ | 4231 */ |
4061 currentNode = xmlStrdup(ctxt->name); | 4232 currentNode = xmlStrdup(ctxt->name); |
4062 depth = ctxt->nameNr; | 4233 depth = ctxt->nameNr; |
4063 while (IS_CHAR_CH(CUR)) { | 4234 while (IS_CHAR_CH(CUR)) { |
4064 oldptr = ctxt->input->cur; | 4235 oldptr = ctxt->input->cur; |
4065 htmlParseContent(ctxt); | 4236 htmlParseContent(ctxt); |
4066 if (oldptr==ctxt->input->cur) break; | 4237 if (oldptr==ctxt->input->cur) break; |
4067 » if (ctxt->nameNr < depth) break; | 4238 » if (ctxt->nameNr < depth) break; |
4068 }» | 4239 } |
4069 | 4240 |
4070 /* | 4241 /* |
4071 * Capture end position and add node | 4242 * Capture end position and add node |
4072 */ | 4243 */ |
4073 if ( currentNode != NULL && ctxt->record_info ) { | 4244 if ( currentNode != NULL && ctxt->record_info ) { |
4074 node_info.end_pos = ctxt->input->consumed + | 4245 node_info.end_pos = ctxt->input->consumed + |
4075 (CUR_PTR - ctxt->input->base); | 4246 (CUR_PTR - ctxt->input->base); |
4076 node_info.end_line = ctxt->input->line; | 4247 node_info.end_line = ctxt->input->line; |
4077 node_info.node = ctxt->node; | 4248 node_info.node = ctxt->node; |
4078 xmlParserAddNodeInfo(ctxt, &node_info); | 4249 xmlParserAddNodeInfo(ctxt, &node_info); |
4079 } | 4250 } |
4080 if (!IS_CHAR_CH(CUR)) { | 4251 if (!IS_CHAR_CH(CUR)) { |
4081 htmlAutoCloseOnEnd(ctxt); | 4252 htmlAutoCloseOnEnd(ctxt); |
4082 } | 4253 } |
4083 | 4254 |
4084 if (currentNode != NULL) | 4255 if (currentNode != NULL) |
4085 xmlFree(currentNode); | 4256 xmlFree(currentNode); |
4086 } | 4257 } |
4087 | 4258 |
| 4259 static void |
| 4260 htmlParserFinishElementParsing(htmlParserCtxtPtr ctxt) { |
| 4261 /* |
| 4262 * Capture end position and add node |
| 4263 */ |
| 4264 if ( ctxt->node != NULL && ctxt->record_info ) { |
| 4265 ctxt->nodeInfo->end_pos = ctxt->input->consumed + |
| 4266 (CUR_PTR - ctxt->input->base); |
| 4267 ctxt->nodeInfo->end_line = ctxt->input->line; |
| 4268 ctxt->nodeInfo->node = ctxt->node; |
| 4269 xmlParserAddNodeInfo(ctxt, ctxt->nodeInfo); |
| 4270 htmlNodeInfoPop(ctxt); |
| 4271 } |
| 4272 if (!IS_CHAR_CH(CUR)) { |
| 4273 htmlAutoCloseOnEnd(ctxt); |
| 4274 } |
| 4275 } |
| 4276 |
| 4277 /** |
| 4278 * htmlParseElementInternal: |
| 4279 * @ctxt: an HTML parser context |
| 4280 * |
| 4281 * parse an HTML element, new version, non recursive |
| 4282 * |
| 4283 * [39] element ::= EmptyElemTag | STag content ETag |
| 4284 * |
| 4285 * [41] Attribute ::= Name Eq AttValue |
| 4286 */ |
| 4287 |
| 4288 static void |
| 4289 htmlParseElementInternal(htmlParserCtxtPtr ctxt) { |
| 4290 const xmlChar *name; |
| 4291 const htmlElemDesc * info; |
| 4292 htmlParserNodeInfo node_info; |
| 4293 int failed; |
| 4294 |
| 4295 if ((ctxt == NULL) || (ctxt->input == NULL)) { |
| 4296 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, |
| 4297 "htmlParseElementInternal: context error\n", NULL, NULL); |
| 4298 return; |
| 4299 } |
| 4300 |
| 4301 if (ctxt->instate == XML_PARSER_EOF) |
| 4302 return; |
| 4303 |
| 4304 /* Capture start position */ |
| 4305 if (ctxt->record_info) { |
| 4306 node_info.begin_pos = ctxt->input->consumed + |
| 4307 (CUR_PTR - ctxt->input->base); |
| 4308 node_info.begin_line = ctxt->input->line; |
| 4309 } |
| 4310 |
| 4311 failed = htmlParseStartTag(ctxt); |
| 4312 name = ctxt->name; |
| 4313 if ((failed == -1) || (name == NULL)) { |
| 4314 if (CUR == '>') |
| 4315 NEXT; |
| 4316 return; |
| 4317 } |
| 4318 |
| 4319 /* |
| 4320 * Lookup the info for that element. |
| 4321 */ |
| 4322 info = htmlTagLookup(name); |
| 4323 if (info == NULL) { |
| 4324 htmlParseErr(ctxt, XML_HTML_UNKNOWN_TAG, |
| 4325 "Tag %s invalid\n", name, NULL); |
| 4326 } |
| 4327 |
| 4328 /* |
| 4329 * Check for an Empty Element labeled the XML/SGML way |
| 4330 */ |
| 4331 if ((CUR == '/') && (NXT(1) == '>')) { |
| 4332 SKIP(2); |
| 4333 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) |
| 4334 ctxt->sax->endElement(ctxt->userData, name); |
| 4335 htmlnamePop(ctxt); |
| 4336 return; |
| 4337 } |
| 4338 |
| 4339 if (CUR == '>') { |
| 4340 NEXT; |
| 4341 } else { |
| 4342 htmlParseErr(ctxt, XML_ERR_GT_REQUIRED, |
| 4343 "Couldn't find end of Start Tag %s\n", name, NULL); |
| 4344 |
| 4345 /* |
| 4346 * end of parsing of this node. |
| 4347 */ |
| 4348 if (xmlStrEqual(name, ctxt->name)) { |
| 4349 nodePop(ctxt); |
| 4350 htmlnamePop(ctxt); |
| 4351 } |
| 4352 |
| 4353 if (ctxt->record_info) |
| 4354 htmlNodeInfoPush(ctxt, &node_info); |
| 4355 htmlParserFinishElementParsing(ctxt); |
| 4356 return; |
| 4357 } |
| 4358 |
| 4359 /* |
| 4360 * Check for an Empty Element from DTD definition |
| 4361 */ |
| 4362 if ((info != NULL) && (info->empty)) { |
| 4363 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) |
| 4364 ctxt->sax->endElement(ctxt->userData, name); |
| 4365 htmlnamePop(ctxt); |
| 4366 return; |
| 4367 } |
| 4368 |
| 4369 if (ctxt->record_info) |
| 4370 htmlNodeInfoPush(ctxt, &node_info); |
| 4371 } |
| 4372 |
| 4373 /** |
| 4374 * htmlParseContentInternal: |
| 4375 * @ctxt: an HTML parser context |
| 4376 * |
| 4377 * Parse a content: comment, sub-element, reference or text. |
| 4378 * New version for non recursive htmlParseElementInternal |
| 4379 */ |
| 4380 |
| 4381 static void |
| 4382 htmlParseContentInternal(htmlParserCtxtPtr ctxt) { |
| 4383 xmlChar *currentNode; |
| 4384 int depth; |
| 4385 const xmlChar *name; |
| 4386 |
| 4387 currentNode = xmlStrdup(ctxt->name); |
| 4388 depth = ctxt->nameNr; |
| 4389 while (1) { |
| 4390 long cons = ctxt->nbChars; |
| 4391 |
| 4392 GROW; |
| 4393 |
| 4394 if (ctxt->instate == XML_PARSER_EOF) |
| 4395 break; |
| 4396 |
| 4397 /* |
| 4398 * Our tag or one of it's parent or children is ending. |
| 4399 */ |
| 4400 if ((CUR == '<') && (NXT(1) == '/')) { |
| 4401 if (htmlParseEndTag(ctxt) && |
| 4402 ((currentNode != NULL) || (ctxt->nameNr == 0))) { |
| 4403 if (currentNode != NULL) |
| 4404 xmlFree(currentNode); |
| 4405 |
| 4406 currentNode = xmlStrdup(ctxt->name); |
| 4407 depth = ctxt->nameNr; |
| 4408 } |
| 4409 continue; /* while */ |
| 4410 } |
| 4411 |
| 4412 else if ((CUR == '<') && |
| 4413 ((IS_ASCII_LETTER(NXT(1))) || |
| 4414 (NXT(1) == '_') || (NXT(1) == ':'))) { |
| 4415 name = htmlParseHTMLName_nonInvasive(ctxt); |
| 4416 if (name == NULL) { |
| 4417 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED, |
| 4418 "htmlParseStartTag: invalid element name\n", |
| 4419 NULL, NULL); |
| 4420 /* Dump the bogus tag like browsers do */ |
| 4421 while ((IS_CHAR_CH(CUR)) && (CUR != '>')) |
| 4422 NEXT; |
| 4423 |
| 4424 htmlParserFinishElementParsing(ctxt); |
| 4425 if (currentNode != NULL) |
| 4426 xmlFree(currentNode); |
| 4427 |
| 4428 currentNode = xmlStrdup(ctxt->name); |
| 4429 depth = ctxt->nameNr; |
| 4430 continue; |
| 4431 } |
| 4432 |
| 4433 if (ctxt->name != NULL) { |
| 4434 if (htmlCheckAutoClose(name, ctxt->name) == 1) { |
| 4435 htmlAutoClose(ctxt, name); |
| 4436 continue; |
| 4437 } |
| 4438 } |
| 4439 } |
| 4440 |
| 4441 /* |
| 4442 * Has this node been popped out during parsing of |
| 4443 * the next element |
| 4444 */ |
| 4445 if ((ctxt->nameNr > 0) && (depth >= ctxt->nameNr) && |
| 4446 (!xmlStrEqual(currentNode, ctxt->name))) |
| 4447 { |
| 4448 htmlParserFinishElementParsing(ctxt); |
| 4449 if (currentNode != NULL) xmlFree(currentNode); |
| 4450 |
| 4451 currentNode = xmlStrdup(ctxt->name); |
| 4452 depth = ctxt->nameNr; |
| 4453 continue; |
| 4454 } |
| 4455 |
| 4456 if ((CUR != 0) && ((xmlStrEqual(currentNode, BAD_CAST"script")) || |
| 4457 (xmlStrEqual(currentNode, BAD_CAST"style")))) { |
| 4458 /* |
| 4459 * Handle SCRIPT/STYLE separately |
| 4460 */ |
| 4461 htmlParseScript(ctxt); |
| 4462 } else { |
| 4463 /* |
| 4464 * Sometimes DOCTYPE arrives in the middle of the document |
| 4465 */ |
| 4466 if ((CUR == '<') && (NXT(1) == '!') && |
| 4467 (UPP(2) == 'D') && (UPP(3) == 'O') && |
| 4468 (UPP(4) == 'C') && (UPP(5) == 'T') && |
| 4469 (UPP(6) == 'Y') && (UPP(7) == 'P') && |
| 4470 (UPP(8) == 'E')) { |
| 4471 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR, |
| 4472 "Misplaced DOCTYPE declaration\n", |
| 4473 BAD_CAST "DOCTYPE" , NULL); |
| 4474 htmlParseDocTypeDecl(ctxt); |
| 4475 } |
| 4476 |
| 4477 /* |
| 4478 * First case : a comment |
| 4479 */ |
| 4480 if ((CUR == '<') && (NXT(1) == '!') && |
| 4481 (NXT(2) == '-') && (NXT(3) == '-')) { |
| 4482 htmlParseComment(ctxt); |
| 4483 } |
| 4484 |
| 4485 /* |
| 4486 * Second case : a Processing Instruction. |
| 4487 */ |
| 4488 else if ((CUR == '<') && (NXT(1) == '?')) { |
| 4489 htmlParsePI(ctxt); |
| 4490 } |
| 4491 |
| 4492 /* |
| 4493 * Third case : a sub-element. |
| 4494 */ |
| 4495 else if (CUR == '<') { |
| 4496 htmlParseElementInternal(ctxt); |
| 4497 if (currentNode != NULL) xmlFree(currentNode); |
| 4498 |
| 4499 currentNode = xmlStrdup(ctxt->name); |
| 4500 depth = ctxt->nameNr; |
| 4501 } |
| 4502 |
| 4503 /* |
| 4504 * Fourth case : a reference. If if has not been resolved, |
| 4505 * parsing returns it's Name, create the node |
| 4506 */ |
| 4507 else if (CUR == '&') { |
| 4508 htmlParseReference(ctxt); |
| 4509 } |
| 4510 |
| 4511 /* |
| 4512 * Fifth case : end of the resource |
| 4513 */ |
| 4514 else if (CUR == 0) { |
| 4515 htmlAutoCloseOnEnd(ctxt); |
| 4516 break; |
| 4517 } |
| 4518 |
| 4519 /* |
| 4520 * Last case, text. Note that References are handled directly. |
| 4521 */ |
| 4522 else { |
| 4523 htmlParseCharData(ctxt); |
| 4524 } |
| 4525 |
| 4526 if (cons == ctxt->nbChars) { |
| 4527 if (ctxt->node != NULL) { |
| 4528 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, |
| 4529 "detected an error in element content\n", |
| 4530 NULL, NULL); |
| 4531 } |
| 4532 break; |
| 4533 } |
| 4534 } |
| 4535 GROW; |
| 4536 } |
| 4537 if (currentNode != NULL) xmlFree(currentNode); |
| 4538 } |
| 4539 |
| 4540 /** |
| 4541 * htmlParseContent: |
| 4542 * @ctxt: an HTML parser context |
| 4543 * |
| 4544 * Parse a content: comment, sub-element, reference or text. |
| 4545 * This is the entry point when called from parser.c |
| 4546 */ |
| 4547 |
| 4548 void |
| 4549 __htmlParseContent(void *ctxt) { |
| 4550 if (ctxt != NULL) |
| 4551 htmlParseContentInternal((htmlParserCtxtPtr) ctxt); |
| 4552 } |
| 4553 |
4088 /** | 4554 /** |
4089 * htmlParseDocument: | 4555 * htmlParseDocument: |
4090 * @ctxt: an HTML parser context | 4556 * @ctxt: an HTML parser context |
4091 * | 4557 * |
4092 * parse an HTML document (and build a tree if using the standard SAX | 4558 * parse an HTML document (and build a tree if using the standard SAX |
4093 * interface). | 4559 * interface). |
4094 * | 4560 * |
4095 * Returns 0, -1 in case of error. the parser context is augmented | 4561 * Returns 0, -1 in case of error. the parser context is augmented |
4096 * as a result of the parsing. | 4562 * as a result of the parsing. |
4097 */ | 4563 */ |
4098 | 4564 |
4099 int | 4565 int |
4100 htmlParseDocument(htmlParserCtxtPtr ctxt) { | 4566 htmlParseDocument(htmlParserCtxtPtr ctxt) { |
| 4567 xmlChar start[4]; |
| 4568 xmlCharEncoding enc; |
4101 xmlDtdPtr dtd; | 4569 xmlDtdPtr dtd; |
4102 | 4570 |
4103 xmlInitParser(); | 4571 xmlInitParser(); |
4104 | 4572 |
4105 htmlDefaultSAXHandlerInit(); | 4573 htmlDefaultSAXHandlerInit(); |
4106 | 4574 |
4107 if ((ctxt == NULL) || (ctxt->input == NULL)) { | 4575 if ((ctxt == NULL) || (ctxt->input == NULL)) { |
4108 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, | 4576 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, |
4109 "htmlParseDocument: context error\n", NULL, NULL); | 4577 "htmlParseDocument: context error\n", NULL, NULL); |
4110 return(XML_ERR_INTERNAL_ERROR); | 4578 return(XML_ERR_INTERNAL_ERROR); |
4111 } | 4579 } |
4112 ctxt->html = 1; | 4580 ctxt->html = 1; |
| 4581 ctxt->linenumbers = 1; |
4113 GROW; | 4582 GROW; |
4114 /* | 4583 /* |
4115 * SAX: beginning of the document processing. | 4584 * SAX: beginning of the document processing. |
4116 */ | 4585 */ |
4117 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) | 4586 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) |
4118 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); | 4587 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); |
4119 | 4588 |
| 4589 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) && |
| 4590 ((ctxt->input->end - ctxt->input->cur) >= 4)) { |
| 4591 /* |
| 4592 * Get the 4 first bytes and decode the charset |
| 4593 * if enc != XML_CHAR_ENCODING_NONE |
| 4594 * plug some encoding conversion routines. |
| 4595 */ |
| 4596 start[0] = RAW; |
| 4597 start[1] = NXT(1); |
| 4598 start[2] = NXT(2); |
| 4599 start[3] = NXT(3); |
| 4600 enc = xmlDetectCharEncoding(&start[0], 4); |
| 4601 if (enc != XML_CHAR_ENCODING_NONE) { |
| 4602 xmlSwitchEncoding(ctxt, enc); |
| 4603 } |
| 4604 } |
| 4605 |
4120 /* | 4606 /* |
4121 * Wipe out everything which is before the first '<' | 4607 * Wipe out everything which is before the first '<' |
4122 */ | 4608 */ |
4123 SKIP_BLANKS; | 4609 SKIP_BLANKS; |
4124 if (CUR == 0) { | 4610 if (CUR == 0) { |
4125 » htmlParseErr(ctxt, XML_ERR_DOCUMENT_EMPTY, | 4611 » htmlParseErr(ctxt, XML_ERR_DOCUMENT_EMPTY, |
4126 "Document is empty\n", NULL, NULL); | 4612 "Document is empty\n", NULL, NULL); |
4127 } | 4613 } |
4128 | 4614 |
4129 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) | 4615 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) |
4130 ctxt->sax->startDocument(ctxt->userData); | 4616 ctxt->sax->startDocument(ctxt->userData); |
4131 | 4617 |
4132 | 4618 |
4133 /* | 4619 /* |
4134 * Parse possible comments and PIs before any content | 4620 * Parse possible comments and PIs before any content |
4135 */ | 4621 */ |
4136 while (((CUR == '<') && (NXT(1) == '!') && | 4622 while (((CUR == '<') && (NXT(1) == '!') && |
4137 (NXT(2) == '-') && (NXT(3) == '-')) || | 4623 (NXT(2) == '-') && (NXT(3) == '-')) || |
4138 ((CUR == '<') && (NXT(1) == '?'))) { | 4624 ((CUR == '<') && (NXT(1) == '?'))) { |
4139 htmlParseComment(ctxt);» | 4625 htmlParseComment(ctxt); |
4140 htmlParsePI(ctxt);» | 4626 htmlParsePI(ctxt); |
4141 SKIP_BLANKS; | 4627 SKIP_BLANKS; |
4142 }» | 4628 } |
4143 | 4629 |
4144 | 4630 |
4145 /* | 4631 /* |
4146 * Then possibly doc type declaration(s) and more Misc | 4632 * Then possibly doc type declaration(s) and more Misc |
4147 * (doctypedecl Misc*)? | 4633 * (doctypedecl Misc*)? |
4148 */ | 4634 */ |
4149 if ((CUR == '<') && (NXT(1) == '!') && | 4635 if ((CUR == '<') && (NXT(1) == '!') && |
4150 (UPP(2) == 'D') && (UPP(3) == 'O') && | 4636 (UPP(2) == 'D') && (UPP(3) == 'O') && |
4151 (UPP(4) == 'C') && (UPP(5) == 'T') && | 4637 (UPP(4) == 'C') && (UPP(5) == 'T') && |
4152 (UPP(6) == 'Y') && (UPP(7) == 'P') && | 4638 (UPP(6) == 'Y') && (UPP(7) == 'P') && |
4153 (UPP(8) == 'E')) { | 4639 (UPP(8) == 'E')) { |
4154 htmlParseDocTypeDecl(ctxt); | 4640 htmlParseDocTypeDecl(ctxt); |
4155 } | 4641 } |
4156 SKIP_BLANKS; | 4642 SKIP_BLANKS; |
4157 | 4643 |
4158 /* | 4644 /* |
4159 * Parse possible comments and PIs before any content | 4645 * Parse possible comments and PIs before any content |
4160 */ | 4646 */ |
4161 while (((CUR == '<') && (NXT(1) == '!') && | 4647 while (((CUR == '<') && (NXT(1) == '!') && |
4162 (NXT(2) == '-') && (NXT(3) == '-')) || | 4648 (NXT(2) == '-') && (NXT(3) == '-')) || |
4163 ((CUR == '<') && (NXT(1) == '?'))) { | 4649 ((CUR == '<') && (NXT(1) == '?'))) { |
4164 htmlParseComment(ctxt);» | 4650 htmlParseComment(ctxt); |
4165 htmlParsePI(ctxt);» | 4651 htmlParsePI(ctxt); |
4166 SKIP_BLANKS; | 4652 SKIP_BLANKS; |
4167 }» | 4653 } |
4168 | 4654 |
4169 /* | 4655 /* |
4170 * Time to start parsing the tree itself | 4656 * Time to start parsing the tree itself |
4171 */ | 4657 */ |
4172 htmlParseContent(ctxt); | 4658 htmlParseContentInternal(ctxt); |
4173 | 4659 |
4174 /* | 4660 /* |
4175 * autoclose | 4661 * autoclose |
4176 */ | 4662 */ |
4177 if (CUR == 0) | 4663 if (CUR == 0) |
4178 htmlAutoCloseOnEnd(ctxt); | 4664 htmlAutoCloseOnEnd(ctxt); |
4179 | 4665 |
4180 | 4666 |
4181 /* | 4667 /* |
4182 * SAX: end of the document processing. | 4668 * SAX: end of the document processing. |
4183 */ | 4669 */ |
4184 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) | 4670 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) |
4185 ctxt->sax->endDocument(ctxt->userData); | 4671 ctxt->sax->endDocument(ctxt->userData); |
4186 | 4672 |
4187 if (ctxt->myDoc != NULL) { | 4673 if (ctxt->myDoc != NULL) { |
4188 dtd = xmlGetIntSubset(ctxt->myDoc); | 4674 dtd = xmlGetIntSubset(ctxt->myDoc); |
4189 if (dtd == NULL) | 4675 if (dtd == NULL) |
4190 » ctxt->myDoc->intSubset = | 4676 » ctxt->myDoc->intSubset = |
4191 » » xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html", | 4677 » » xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html", |
4192 BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN", | 4678 BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN", |
4193 BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd"); | 4679 BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd"); |
4194 } | 4680 } |
4195 if (! ctxt->wellFormed) return(-1); | 4681 if (! ctxt->wellFormed) return(-1); |
4196 return(0); | 4682 return(0); |
4197 } | 4683 } |
4198 | 4684 |
4199 | 4685 |
4200 /************************************************************************ | 4686 /************************************************************************ |
4201 * * | 4687 * * |
(...skipping 25 matching lines...) Expand all Loading... |
4227 } | 4713 } |
4228 sax = (htmlSAXHandler *) xmlMalloc(sizeof(htmlSAXHandler)); | 4714 sax = (htmlSAXHandler *) xmlMalloc(sizeof(htmlSAXHandler)); |
4229 if (sax == NULL) { | 4715 if (sax == NULL) { |
4230 htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n"); | 4716 htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n"); |
4231 return(-1); | 4717 return(-1); |
4232 } | 4718 } |
4233 else | 4719 else |
4234 memset(sax, 0, sizeof(htmlSAXHandler)); | 4720 memset(sax, 0, sizeof(htmlSAXHandler)); |
4235 | 4721 |
4236 /* Allocate the Input stack */ | 4722 /* Allocate the Input stack */ |
4237 ctxt->inputTab = (htmlParserInputPtr *) | 4723 ctxt->inputTab = (htmlParserInputPtr *) |
4238 xmlMalloc(5 * sizeof(htmlParserInputPtr)); | 4724 xmlMalloc(5 * sizeof(htmlParserInputPtr)); |
4239 if (ctxt->inputTab == NULL) { | 4725 if (ctxt->inputTab == NULL) { |
4240 htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n"); | 4726 htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n"); |
4241 ctxt->inputNr = 0; | 4727 ctxt->inputNr = 0; |
4242 ctxt->inputMax = 0; | 4728 ctxt->inputMax = 0; |
4243 ctxt->input = NULL; | 4729 ctxt->input = NULL; |
4244 return(-1); | 4730 return(-1); |
4245 } | 4731 } |
4246 ctxt->inputNr = 0; | 4732 ctxt->inputNr = 0; |
4247 ctxt->inputMax = 5; | 4733 ctxt->inputMax = 5; |
(...skipping 17 matching lines...) Expand all Loading... |
4265 } | 4751 } |
4266 ctxt->nodeNr = 0; | 4752 ctxt->nodeNr = 0; |
4267 ctxt->nodeMax = 10; | 4753 ctxt->nodeMax = 10; |
4268 ctxt->node = NULL; | 4754 ctxt->node = NULL; |
4269 | 4755 |
4270 /* Allocate the Name stack */ | 4756 /* Allocate the Name stack */ |
4271 ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *)); | 4757 ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *)); |
4272 if (ctxt->nameTab == NULL) { | 4758 if (ctxt->nameTab == NULL) { |
4273 htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n"); | 4759 htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n"); |
4274 ctxt->nameNr = 0; | 4760 ctxt->nameNr = 0; |
4275 » ctxt->nameMax = 10; | 4761 » ctxt->nameMax = 0; |
4276 ctxt->name = NULL; | 4762 ctxt->name = NULL; |
4277 ctxt->nodeNr = 0; | 4763 ctxt->nodeNr = 0; |
4278 ctxt->nodeMax = 0; | 4764 ctxt->nodeMax = 0; |
4279 ctxt->node = NULL; | 4765 ctxt->node = NULL; |
4280 ctxt->inputNr = 0; | 4766 ctxt->inputNr = 0; |
4281 ctxt->inputMax = 0; | 4767 ctxt->inputMax = 0; |
4282 ctxt->input = NULL; | 4768 ctxt->input = NULL; |
4283 return(-1); | 4769 return(-1); |
4284 } | 4770 } |
4285 ctxt->nameNr = 0; | 4771 ctxt->nameNr = 0; |
4286 ctxt->nameMax = 10; | 4772 ctxt->nameMax = 10; |
4287 ctxt->name = NULL; | 4773 ctxt->name = NULL; |
4288 | 4774 |
| 4775 ctxt->nodeInfoTab = NULL; |
| 4776 ctxt->nodeInfoNr = 0; |
| 4777 ctxt->nodeInfoMax = 0; |
| 4778 |
4289 if (sax == NULL) ctxt->sax = (xmlSAXHandlerPtr) &htmlDefaultSAXHandler; | 4779 if (sax == NULL) ctxt->sax = (xmlSAXHandlerPtr) &htmlDefaultSAXHandler; |
4290 else { | 4780 else { |
4291 ctxt->sax = sax; | 4781 ctxt->sax = sax; |
4292 memcpy(sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1)); | 4782 memcpy(sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1)); |
4293 } | 4783 } |
4294 ctxt->userData = ctxt; | 4784 ctxt->userData = ctxt; |
4295 ctxt->myDoc = NULL; | 4785 ctxt->myDoc = NULL; |
4296 ctxt->wellFormed = 1; | 4786 ctxt->wellFormed = 1; |
4297 ctxt->replaceEntities = 0; | 4787 ctxt->replaceEntities = 0; |
4298 ctxt->linenumbers = xmlLineNumbersDefaultValue; | 4788 ctxt->linenumbers = xmlLineNumbersDefaultValue; |
(...skipping 126 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4425 ctxt->input->encoding = xmlStrdup((const xmlChar *) encoding); | 4915 ctxt->input->encoding = xmlStrdup((const xmlChar *) encoding); |
4426 | 4916 |
4427 enc = xmlParseCharEncoding(encoding); | 4917 enc = xmlParseCharEncoding(encoding); |
4428 /* | 4918 /* |
4429 * registered set of known encodings | 4919 * registered set of known encodings |
4430 */ | 4920 */ |
4431 if (enc != XML_CHAR_ENCODING_ERROR) { | 4921 if (enc != XML_CHAR_ENCODING_ERROR) { |
4432 xmlSwitchEncoding(ctxt, enc); | 4922 xmlSwitchEncoding(ctxt, enc); |
4433 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { | 4923 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { |
4434 htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, | 4924 htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, |
4435 » » "Unsupported encoding %s\n", | 4925 » » "Unsupported encoding %s\n", |
4436 (const xmlChar *) encoding, NULL); | 4926 (const xmlChar *) encoding, NULL); |
4437 } | 4927 } |
4438 } else { | 4928 } else { |
4439 /* | 4929 /* |
4440 * fallback for unknown encodings | 4930 * fallback for unknown encodings |
4441 */ | 4931 */ |
4442 handler = xmlFindCharEncodingHandler((const char *) encoding); | 4932 handler = xmlFindCharEncodingHandler((const char *) encoding); |
4443 if (handler != NULL) { | 4933 if (handler != NULL) { |
4444 xmlSwitchToEncoding(ctxt, handler); | 4934 xmlSwitchToEncoding(ctxt, handler); |
4445 } else { | 4935 } else { |
4446 htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, | 4936 htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, |
4447 "Unsupported encoding %s\n", | 4937 "Unsupported encoding %s\n", |
4448 (const xmlChar *) encoding, NULL); | 4938 (const xmlChar *) encoding, NULL); |
4449 } | 4939 } |
4450 } | 4940 } |
4451 } | 4941 } |
4452 return(ctxt); | 4942 return(ctxt); |
4453 } | 4943 } |
4454 | 4944 |
4455 #ifdef LIBXML_PUSH_ENABLED | 4945 #ifdef LIBXML_PUSH_ENABLED |
4456 /************************************************************************ | 4946 /************************************************************************ |
4457 * * | 4947 * * |
4458 * » » Progressive parsing interfaces» » » » * | 4948 *» Progressive parsing interfaces» » » » * |
4459 * * | 4949 * * |
4460 ************************************************************************/ | 4950 ************************************************************************/ |
4461 | 4951 |
4462 /** | 4952 /** |
4463 * htmlParseLookupSequence: | 4953 * htmlParseLookupSequence: |
4464 * @ctxt: an HTML parser context | 4954 * @ctxt: an HTML parser context |
4465 * @first: the first char to lookup | 4955 * @first: the first char to lookup |
4466 * @next: the next char to lookup or zero | 4956 * @next: the next char to lookup or zero |
4467 * @third: the next char to lookup or zero | 4957 * @third: the next char to lookup or zero |
4468 * @comment: flag to force checking inside comments | 4958 * @comment: flag to force checking inside comments |
4469 * | 4959 * |
4470 * Try to find if a sequence (first, next, third) or just (first next) or | 4960 * Try to find if a sequence (first, next, third) or just (first next) or |
4471 * (first) is available in the input stream. | 4961 * (first) is available in the input stream. |
4472 * This function has a side effect of (possibly) incrementing ctxt->checkIndex | 4962 * This function has a side effect of (possibly) incrementing ctxt->checkIndex |
4473 * to avoid rescanning sequences of bytes, it DOES change the state of the | 4963 * to avoid rescanning sequences of bytes, it DOES change the state of the |
4474 * parser, do not use liberally. | 4964 * parser, do not use liberally. |
4475 * This is basically similar to xmlParseLookupSequence() | 4965 * This is basically similar to xmlParseLookupSequence() |
4476 * | 4966 * |
4477 * Returns the index to the current parsing point if the full sequence | 4967 * Returns the index to the current parsing point if the full sequence |
4478 * is available, -1 otherwise. | 4968 * is available, -1 otherwise. |
4479 */ | 4969 */ |
4480 static int | 4970 static int |
4481 htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first, | 4971 htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first, |
4482 xmlChar next, xmlChar third, int iscomment) { | 4972 xmlChar next, xmlChar third, int iscomment, |
| 4973 int ignoreattrval) |
| 4974 { |
4483 int base, len; | 4975 int base, len; |
4484 htmlParserInputPtr in; | 4976 htmlParserInputPtr in; |
4485 const xmlChar *buf; | 4977 const xmlChar *buf; |
4486 int incomment = 0; | 4978 int incomment = 0; |
| 4979 int invalue = 0; |
| 4980 char valdellim = 0x0; |
4487 | 4981 |
4488 in = ctxt->input; | 4982 in = ctxt->input; |
4489 if (in == NULL) return(-1); | 4983 if (in == NULL) |
| 4984 return (-1); |
| 4985 |
4490 base = in->cur - in->base; | 4986 base = in->cur - in->base; |
4491 if (base < 0) return(-1); | 4987 if (base < 0) |
| 4988 return (-1); |
| 4989 |
4492 if (ctxt->checkIndex > base) | 4990 if (ctxt->checkIndex > base) |
4493 base = ctxt->checkIndex; | 4991 base = ctxt->checkIndex; |
| 4992 |
4494 if (in->buf == NULL) { | 4993 if (in->buf == NULL) { |
4495 » buf = in->base; | 4994 buf = in->base; |
4496 » len = in->length; | 4995 len = in->length; |
4497 } else { | 4996 } else { |
4498 » buf = in->buf->buffer->content; | 4997 buf = in->buf->buffer->content; |
4499 » len = in->buf->buffer->use; | 4998 len = in->buf->buffer->use; |
4500 } | 4999 } |
| 5000 |
4501 /* take into account the sequence length */ | 5001 /* take into account the sequence length */ |
4502 if (third) len -= 2; | 5002 if (third) |
4503 else if (next) len --; | 5003 len -= 2; |
4504 for (;base < len;base++) { | 5004 else if (next) |
4505 » if (!incomment && (base + 4 < len) && !iscomment) { | 5005 len--; |
4506 » if ((buf[base] == '<') && (buf[base + 1] == '!') && | 5006 for (; base < len; base++) { |
4507 » » (buf[base + 2] == '-') && (buf[base + 3] == '-')) { | 5007 if ((!incomment) && (base + 4 < len) && (!iscomment)) { |
4508 » » incomment = 1; | 5008 if ((buf[base] == '<') && (buf[base + 1] == '!') && |
4509 » » /* do not increment past <! - some people use <!--> */ | 5009 (buf[base + 2] == '-') && (buf[base + 3] == '-')) { |
4510 » » base += 2; | 5010 incomment = 1; |
4511 » } | 5011 /* do not increment past <! - some people use <!--> */ |
4512 » } | 5012 base += 2; |
4513 » if (incomment) { | 5013 } |
4514 » if (base + 3 > len) | 5014 } |
4515 » » return(-1); | 5015 if (ignoreattrval) { |
4516 » if ((buf[base] == '-') && (buf[base + 1] == '-') && | 5016 if (buf[base] == '"' || buf[base] == '\'') { |
4517 » » (buf[base + 2] == '>')) { | 5017 if (invalue) { |
4518 » » incomment = 0; | 5018 if (buf[base] == valdellim) { |
4519 » » base += 2; | 5019 invalue = 0; |
4520 » } | 5020 continue; |
4521 » continue; | 5021 } |
4522 » } | 5022 } else { |
| 5023 valdellim = buf[base]; |
| 5024 invalue = 1; |
| 5025 continue; |
| 5026 } |
| 5027 } else if (invalue) { |
| 5028 continue; |
| 5029 } |
| 5030 } |
| 5031 if (incomment) { |
| 5032 if (base + 3 > len) |
| 5033 return (-1); |
| 5034 if ((buf[base] == '-') && (buf[base + 1] == '-') && |
| 5035 (buf[base + 2] == '>')) { |
| 5036 incomment = 0; |
| 5037 base += 2; |
| 5038 } |
| 5039 continue; |
| 5040 } |
4523 if (buf[base] == first) { | 5041 if (buf[base] == first) { |
4524 » if (third != 0) { | 5042 if (third != 0) { |
4525 » » if ((buf[base + 1] != next) || | 5043 if ((buf[base + 1] != next) || (buf[base + 2] != third)) |
4526 » » (buf[base + 2] != third)) continue; | 5044 continue; |
4527 » } else if (next != 0) { | 5045 } else if (next != 0) { |
4528 » » if (buf[base + 1] != next) continue; | 5046 if (buf[base + 1] != next) |
4529 » } | 5047 continue; |
4530 » ctxt->checkIndex = 0; | 5048 } |
| 5049 ctxt->checkIndex = 0; |
4531 #ifdef DEBUG_PUSH | 5050 #ifdef DEBUG_PUSH |
4532 » if (next == 0) | 5051 if (next == 0) |
4533 » » xmlGenericError(xmlGenericErrorContext, | 5052 xmlGenericError(xmlGenericErrorContext, |
4534 » » » "HPP: lookup '%c' found at %d\n", | 5053 "HPP: lookup '%c' found at %d\n", |
4535 » » » first, base); | 5054 first, base); |
4536 » else if (third == 0) | 5055 else if (third == 0) |
4537 » » xmlGenericError(xmlGenericErrorContext, | 5056 xmlGenericError(xmlGenericErrorContext, |
4538 » » » "HPP: lookup '%c%c' found at %d\n", | 5057 "HPP: lookup '%c%c' found at %d\n", |
4539 » » » first, next, base); | 5058 first, next, base); |
4540 » else | 5059 else |
4541 » » xmlGenericError(xmlGenericErrorContext, | 5060 xmlGenericError(xmlGenericErrorContext, |
4542 » » » "HPP: lookup '%c%c%c' found at %d\n", | 5061 "HPP: lookup '%c%c%c' found at %d\n", |
4543 » » » first, next, third, base); | 5062 first, next, third, base); |
4544 #endif | 5063 #endif |
4545 » return(base - (in->cur - in->base)); | 5064 return (base - (in->cur - in->base)); |
4546 » } | 5065 } |
4547 } | 5066 } |
4548 ctxt->checkIndex = base; | 5067 if ((!incomment) && (!invalue)) |
| 5068 ctxt->checkIndex = base; |
4549 #ifdef DEBUG_PUSH | 5069 #ifdef DEBUG_PUSH |
4550 if (next == 0) | 5070 if (next == 0) |
4551 » xmlGenericError(xmlGenericErrorContext, | 5071 xmlGenericError(xmlGenericErrorContext, |
4552 » » "HPP: lookup '%c' failed\n", first); | 5072 "HPP: lookup '%c' failed\n", first); |
4553 else if (third == 0) | 5073 else if (third == 0) |
4554 » xmlGenericError(xmlGenericErrorContext, | 5074 xmlGenericError(xmlGenericErrorContext, |
4555 » » "HPP: lookup '%c%c' failed\n", first, next); | 5075 "HPP: lookup '%c%c' failed\n", first, next); |
4556 else» | 5076 else |
4557 » xmlGenericError(xmlGenericErrorContext, | 5077 xmlGenericError(xmlGenericErrorContext, |
4558 » » "HPP: lookup '%c%c%c' failed\n", first, next, third); | 5078 "HPP: lookup '%c%c%c' failed\n", first, next, |
| 5079 third); |
4559 #endif | 5080 #endif |
4560 return(-1); | 5081 return (-1); |
4561 } | 5082 } |
4562 | 5083 |
4563 /** | 5084 /** |
| 5085 * htmlParseLookupChars: |
| 5086 * @ctxt: an HTML parser context |
| 5087 * @stop: Array of chars, which stop the lookup. |
| 5088 * @stopLen: Length of stop-Array |
| 5089 * |
| 5090 * Try to find if any char of the stop-Array is available in the input |
| 5091 * stream. |
| 5092 * This function has a side effect of (possibly) incrementing ctxt->checkIndex |
| 5093 * to avoid rescanning sequences of bytes, it DOES change the state of the |
| 5094 * parser, do not use liberally. |
| 5095 * |
| 5096 * Returns the index to the current parsing point if a stopChar |
| 5097 * is available, -1 otherwise. |
| 5098 */ |
| 5099 static int |
| 5100 htmlParseLookupChars(htmlParserCtxtPtr ctxt, const xmlChar * stop, |
| 5101 int stopLen) |
| 5102 { |
| 5103 int base, len; |
| 5104 htmlParserInputPtr in; |
| 5105 const xmlChar *buf; |
| 5106 int incomment = 0; |
| 5107 int i; |
| 5108 |
| 5109 in = ctxt->input; |
| 5110 if (in == NULL) |
| 5111 return (-1); |
| 5112 |
| 5113 base = in->cur - in->base; |
| 5114 if (base < 0) |
| 5115 return (-1); |
| 5116 |
| 5117 if (ctxt->checkIndex > base) |
| 5118 base = ctxt->checkIndex; |
| 5119 |
| 5120 if (in->buf == NULL) { |
| 5121 buf = in->base; |
| 5122 len = in->length; |
| 5123 } else { |
| 5124 buf = in->buf->buffer->content; |
| 5125 len = in->buf->buffer->use; |
| 5126 } |
| 5127 |
| 5128 for (; base < len; base++) { |
| 5129 if (!incomment && (base + 4 < len)) { |
| 5130 if ((buf[base] == '<') && (buf[base + 1] == '!') && |
| 5131 (buf[base + 2] == '-') && (buf[base + 3] == '-')) { |
| 5132 incomment = 1; |
| 5133 /* do not increment past <! - some people use <!--> */ |
| 5134 base += 2; |
| 5135 } |
| 5136 } |
| 5137 if (incomment) { |
| 5138 if (base + 3 > len) |
| 5139 return (-1); |
| 5140 if ((buf[base] == '-') && (buf[base + 1] == '-') && |
| 5141 (buf[base + 2] == '>')) { |
| 5142 incomment = 0; |
| 5143 base += 2; |
| 5144 } |
| 5145 continue; |
| 5146 } |
| 5147 for (i = 0; i < stopLen; ++i) { |
| 5148 if (buf[base] == stop[i]) { |
| 5149 ctxt->checkIndex = 0; |
| 5150 return (base - (in->cur - in->base)); |
| 5151 } |
| 5152 } |
| 5153 } |
| 5154 ctxt->checkIndex = base; |
| 5155 return (-1); |
| 5156 } |
| 5157 |
| 5158 /** |
4564 * htmlParseTryOrFinish: | 5159 * htmlParseTryOrFinish: |
4565 * @ctxt: an HTML parser context | 5160 * @ctxt: an HTML parser context |
4566 * @terminate: last chunk indicator | 5161 * @terminate: last chunk indicator |
4567 * | 5162 * |
4568 * Try to progress on parsing | 5163 * Try to progress on parsing |
4569 * | 5164 * |
4570 * Returns zero if no parsing was possible | 5165 * Returns zero if no parsing was possible |
4571 */ | 5166 */ |
4572 static int | 5167 static int |
4573 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { | 5168 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { |
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4632 while (1) { | 5227 while (1) { |
4633 | 5228 |
4634 in = ctxt->input; | 5229 in = ctxt->input; |
4635 if (in == NULL) break; | 5230 if (in == NULL) break; |
4636 if (in->buf == NULL) | 5231 if (in->buf == NULL) |
4637 avail = in->length - (in->cur - in->base); | 5232 avail = in->length - (in->cur - in->base); |
4638 else | 5233 else |
4639 avail = in->buf->buffer->use - (in->cur - in->base); | 5234 avail = in->buf->buffer->use - (in->cur - in->base); |
4640 if ((avail == 0) && (terminate)) { | 5235 if ((avail == 0) && (terminate)) { |
4641 htmlAutoCloseOnEnd(ctxt); | 5236 htmlAutoCloseOnEnd(ctxt); |
4642 » if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) { | 5237 » if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) { |
4643 /* | 5238 /* |
4644 * SAX: end of the document processing. | 5239 * SAX: end of the document processing. |
4645 */ | 5240 */ |
4646 ctxt->instate = XML_PARSER_EOF; | 5241 ctxt->instate = XML_PARSER_EOF; |
4647 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) | 5242 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) |
4648 ctxt->sax->endDocument(ctxt->userData); | 5243 ctxt->sax->endDocument(ctxt->userData); |
4649 } | 5244 } |
4650 } | 5245 } |
4651 if (avail < 1) | 5246 if (avail < 1) |
4652 goto done; | 5247 goto done; |
(...skipping 29 matching lines...) Expand all Loading... |
4682 ctxt->sax->startDocument(ctxt->userData); | 5277 ctxt->sax->startDocument(ctxt->userData); |
4683 | 5278 |
4684 cur = in->cur[0]; | 5279 cur = in->cur[0]; |
4685 next = in->cur[1]; | 5280 next = in->cur[1]; |
4686 if ((cur == '<') && (next == '!') && | 5281 if ((cur == '<') && (next == '!') && |
4687 (UPP(2) == 'D') && (UPP(3) == 'O') && | 5282 (UPP(2) == 'D') && (UPP(3) == 'O') && |
4688 (UPP(4) == 'C') && (UPP(5) == 'T') && | 5283 (UPP(4) == 'C') && (UPP(5) == 'T') && |
4689 (UPP(6) == 'Y') && (UPP(7) == 'P') && | 5284 (UPP(6) == 'Y') && (UPP(7) == 'P') && |
4690 (UPP(8) == 'E')) { | 5285 (UPP(8) == 'E')) { |
4691 if ((!terminate) && | 5286 if ((!terminate) && |
4692 » » (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0)) | 5287 » » (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)) |
4693 goto done; | 5288 goto done; |
4694 #ifdef DEBUG_PUSH | 5289 #ifdef DEBUG_PUSH |
4695 xmlGenericError(xmlGenericErrorContext, | 5290 xmlGenericError(xmlGenericErrorContext, |
4696 "HPP: Parsing internal subset\n"); | 5291 "HPP: Parsing internal subset\n"); |
4697 #endif | 5292 #endif |
4698 htmlParseDocTypeDecl(ctxt); | 5293 htmlParseDocTypeDecl(ctxt); |
4699 ctxt->instate = XML_PARSER_PROLOG; | 5294 ctxt->instate = XML_PARSER_PROLOG; |
4700 #ifdef DEBUG_PUSH | 5295 #ifdef DEBUG_PUSH |
4701 xmlGenericError(xmlGenericErrorContext, | 5296 xmlGenericError(xmlGenericErrorContext, |
4702 "HPP: entering PROLOG\n"); | 5297 "HPP: entering PROLOG\n"); |
(...skipping 12 matching lines...) Expand all Loading... |
4715 avail = in->length - (in->cur - in->base); | 5310 avail = in->length - (in->cur - in->base); |
4716 else | 5311 else |
4717 avail = in->buf->buffer->use - (in->cur - in->base); | 5312 avail = in->buf->buffer->use - (in->cur - in->base); |
4718 if (avail < 2) | 5313 if (avail < 2) |
4719 goto done; | 5314 goto done; |
4720 cur = in->cur[0]; | 5315 cur = in->cur[0]; |
4721 next = in->cur[1]; | 5316 next = in->cur[1]; |
4722 if ((cur == '<') && (next == '!') && | 5317 if ((cur == '<') && (next == '!') && |
4723 (in->cur[2] == '-') && (in->cur[3] == '-')) { | 5318 (in->cur[2] == '-') && (in->cur[3] == '-')) { |
4724 if ((!terminate) && | 5319 if ((!terminate) && |
4725 » » (htmlParseLookupSequence(ctxt, '-', '-', '>', 1) < 0)) | 5320 » » (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0)
) |
4726 goto done; | 5321 goto done; |
4727 #ifdef DEBUG_PUSH | 5322 #ifdef DEBUG_PUSH |
4728 xmlGenericError(xmlGenericErrorContext, | 5323 xmlGenericError(xmlGenericErrorContext, |
4729 "HPP: Parsing Comment\n"); | 5324 "HPP: Parsing Comment\n"); |
4730 #endif | 5325 #endif |
4731 htmlParseComment(ctxt); | 5326 htmlParseComment(ctxt); |
4732 ctxt->instate = XML_PARSER_MISC; | 5327 ctxt->instate = XML_PARSER_MISC; |
4733 } else if ((cur == '<') && (next == '?')) { | 5328 } else if ((cur == '<') && (next == '?')) { |
4734 if ((!terminate) && | 5329 if ((!terminate) && |
4735 » » (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0)) | 5330 » » (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)) |
4736 goto done; | 5331 goto done; |
4737 #ifdef DEBUG_PUSH | 5332 #ifdef DEBUG_PUSH |
4738 xmlGenericError(xmlGenericErrorContext, | 5333 xmlGenericError(xmlGenericErrorContext, |
4739 "HPP: Parsing PI\n"); | 5334 "HPP: Parsing PI\n"); |
4740 #endif | 5335 #endif |
4741 htmlParsePI(ctxt); | 5336 htmlParsePI(ctxt); |
4742 ctxt->instate = XML_PARSER_MISC; | 5337 ctxt->instate = XML_PARSER_MISC; |
4743 } else if ((cur == '<') && (next == '!') && | 5338 } else if ((cur == '<') && (next == '!') && |
4744 (UPP(2) == 'D') && (UPP(3) == 'O') && | 5339 (UPP(2) == 'D') && (UPP(3) == 'O') && |
4745 (UPP(4) == 'C') && (UPP(5) == 'T') && | 5340 (UPP(4) == 'C') && (UPP(5) == 'T') && |
4746 (UPP(6) == 'Y') && (UPP(7) == 'P') && | 5341 (UPP(6) == 'Y') && (UPP(7) == 'P') && |
4747 (UPP(8) == 'E')) { | 5342 (UPP(8) == 'E')) { |
4748 if ((!terminate) && | 5343 if ((!terminate) && |
4749 » » (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0)) | 5344 » » (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)) |
4750 goto done; | 5345 goto done; |
4751 #ifdef DEBUG_PUSH | 5346 #ifdef DEBUG_PUSH |
4752 xmlGenericError(xmlGenericErrorContext, | 5347 xmlGenericError(xmlGenericErrorContext, |
4753 "HPP: Parsing internal subset\n"); | 5348 "HPP: Parsing internal subset\n"); |
4754 #endif | 5349 #endif |
4755 htmlParseDocTypeDecl(ctxt); | 5350 htmlParseDocTypeDecl(ctxt); |
4756 ctxt->instate = XML_PARSER_PROLOG; | 5351 ctxt->instate = XML_PARSER_PROLOG; |
4757 #ifdef DEBUG_PUSH | 5352 #ifdef DEBUG_PUSH |
4758 xmlGenericError(xmlGenericErrorContext, | 5353 xmlGenericError(xmlGenericErrorContext, |
4759 "HPP: entering PROLOG\n"); | 5354 "HPP: entering PROLOG\n"); |
4760 #endif | 5355 #endif |
4761 } else if ((cur == '<') && (next == '!') && | 5356 } else if ((cur == '<') && (next == '!') && |
4762 (avail < 9)) { | 5357 (avail < 9)) { |
4763 goto done; | 5358 goto done; |
4764 } else { | 5359 } else { |
4765 ctxt->instate = XML_PARSER_START_TAG; | 5360 ctxt->instate = XML_PARSER_START_TAG; |
4766 #ifdef DEBUG_PUSH | 5361 #ifdef DEBUG_PUSH |
4767 xmlGenericError(xmlGenericErrorContext, | 5362 xmlGenericError(xmlGenericErrorContext, |
4768 "HPP: entering START_TAG\n"); | 5363 "HPP: entering START_TAG\n"); |
4769 #endif | 5364 #endif |
4770 } | 5365 } |
4771 break; | 5366 break; |
4772 case XML_PARSER_PROLOG: | 5367 case XML_PARSER_PROLOG: |
4773 SKIP_BLANKS; | 5368 SKIP_BLANKS; |
4774 if (in->buf == NULL) | 5369 if (in->buf == NULL) |
4775 avail = in->length - (in->cur - in->base); | 5370 avail = in->length - (in->cur - in->base); |
4776 else | 5371 else |
4777 avail = in->buf->buffer->use - (in->cur - in->base); | 5372 avail = in->buf->buffer->use - (in->cur - in->base); |
4778 » » if (avail < 2) | 5373 » » if (avail < 2) |
4779 goto done; | 5374 goto done; |
4780 cur = in->cur[0]; | 5375 cur = in->cur[0]; |
4781 next = in->cur[1]; | 5376 next = in->cur[1]; |
4782 if ((cur == '<') && (next == '!') && | 5377 if ((cur == '<') && (next == '!') && |
4783 (in->cur[2] == '-') && (in->cur[3] == '-')) { | 5378 (in->cur[2] == '-') && (in->cur[3] == '-')) { |
4784 if ((!terminate) && | 5379 if ((!terminate) && |
4785 » » (htmlParseLookupSequence(ctxt, '-', '-', '>', 1) < 0)) | 5380 » » (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0)
) |
4786 goto done; | 5381 goto done; |
4787 #ifdef DEBUG_PUSH | 5382 #ifdef DEBUG_PUSH |
4788 xmlGenericError(xmlGenericErrorContext, | 5383 xmlGenericError(xmlGenericErrorContext, |
4789 "HPP: Parsing Comment\n"); | 5384 "HPP: Parsing Comment\n"); |
4790 #endif | 5385 #endif |
4791 htmlParseComment(ctxt); | 5386 htmlParseComment(ctxt); |
4792 ctxt->instate = XML_PARSER_PROLOG; | 5387 ctxt->instate = XML_PARSER_PROLOG; |
4793 } else if ((cur == '<') && (next == '?')) { | 5388 } else if ((cur == '<') && (next == '?')) { |
4794 if ((!terminate) && | 5389 if ((!terminate) && |
4795 » » (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0)) | 5390 » » (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)) |
4796 goto done; | 5391 goto done; |
4797 #ifdef DEBUG_PUSH | 5392 #ifdef DEBUG_PUSH |
4798 xmlGenericError(xmlGenericErrorContext, | 5393 xmlGenericError(xmlGenericErrorContext, |
4799 "HPP: Parsing PI\n"); | 5394 "HPP: Parsing PI\n"); |
4800 #endif | 5395 #endif |
4801 htmlParsePI(ctxt); | 5396 htmlParsePI(ctxt); |
4802 ctxt->instate = XML_PARSER_PROLOG; | 5397 ctxt->instate = XML_PARSER_PROLOG; |
4803 } else if ((cur == '<') && (next == '!') && | 5398 } else if ((cur == '<') && (next == '!') && |
4804 (avail < 4)) { | 5399 (avail < 4)) { |
4805 goto done; | 5400 goto done; |
(...skipping 16 matching lines...) Expand all Loading... |
4822 if (IS_BLANK_CH(cur)) { | 5417 if (IS_BLANK_CH(cur)) { |
4823 htmlParseCharData(ctxt); | 5418 htmlParseCharData(ctxt); |
4824 goto done; | 5419 goto done; |
4825 } | 5420 } |
4826 if (avail < 2) | 5421 if (avail < 2) |
4827 goto done; | 5422 goto done; |
4828 next = in->cur[1]; | 5423 next = in->cur[1]; |
4829 if ((cur == '<') && (next == '!') && | 5424 if ((cur == '<') && (next == '!') && |
4830 (in->cur[2] == '-') && (in->cur[3] == '-')) { | 5425 (in->cur[2] == '-') && (in->cur[3] == '-')) { |
4831 if ((!terminate) && | 5426 if ((!terminate) && |
4832 » » (htmlParseLookupSequence(ctxt, '-', '-', '>', 1) < 0)) | 5427 » » (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0)
) |
4833 goto done; | 5428 goto done; |
4834 #ifdef DEBUG_PUSH | 5429 #ifdef DEBUG_PUSH |
4835 xmlGenericError(xmlGenericErrorContext, | 5430 xmlGenericError(xmlGenericErrorContext, |
4836 "HPP: Parsing Comment\n"); | 5431 "HPP: Parsing Comment\n"); |
4837 #endif | 5432 #endif |
4838 htmlParseComment(ctxt); | 5433 htmlParseComment(ctxt); |
4839 ctxt->instate = XML_PARSER_EPILOG; | 5434 ctxt->instate = XML_PARSER_EPILOG; |
4840 } else if ((cur == '<') && (next == '?')) { | 5435 } else if ((cur == '<') && (next == '?')) { |
4841 if ((!terminate) && | 5436 if ((!terminate) && |
4842 » » (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0)) | 5437 » » (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)) |
4843 goto done; | 5438 goto done; |
4844 #ifdef DEBUG_PUSH | 5439 #ifdef DEBUG_PUSH |
4845 xmlGenericError(xmlGenericErrorContext, | 5440 xmlGenericError(xmlGenericErrorContext, |
4846 "HPP: Parsing PI\n"); | 5441 "HPP: Parsing PI\n"); |
4847 #endif | 5442 #endif |
4848 htmlParsePI(ctxt); | 5443 htmlParsePI(ctxt); |
4849 ctxt->instate = XML_PARSER_EPILOG; | 5444 ctxt->instate = XML_PARSER_EPILOG; |
4850 } else if ((cur == '<') && (next == '!') && | 5445 } else if ((cur == '<') && (next == '!') && |
4851 (avail < 4)) { | 5446 (avail < 4)) { |
4852 goto done; | 5447 goto done; |
(...skipping 29 matching lines...) Expand all Loading... |
4882 if (in->cur[1] == '/') { | 5477 if (in->cur[1] == '/') { |
4883 ctxt->instate = XML_PARSER_END_TAG; | 5478 ctxt->instate = XML_PARSER_END_TAG; |
4884 ctxt->checkIndex = 0; | 5479 ctxt->checkIndex = 0; |
4885 #ifdef DEBUG_PUSH | 5480 #ifdef DEBUG_PUSH |
4886 xmlGenericError(xmlGenericErrorContext, | 5481 xmlGenericError(xmlGenericErrorContext, |
4887 "HPP: entering END_TAG\n"); | 5482 "HPP: entering END_TAG\n"); |
4888 #endif | 5483 #endif |
4889 break; | 5484 break; |
4890 } | 5485 } |
4891 if ((!terminate) && | 5486 if ((!terminate) && |
4892 » » (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0)) | 5487 » » (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)) |
4893 goto done; | 5488 goto done; |
4894 | 5489 |
4895 failed = htmlParseStartTag(ctxt); | 5490 failed = htmlParseStartTag(ctxt); |
4896 name = ctxt->name; | 5491 name = ctxt->name; |
4897 if ((failed == -1) || | 5492 if ((failed == -1) || |
4898 (name == NULL)) { | 5493 (name == NULL)) { |
4899 if (CUR == '>') | 5494 if (CUR == '>') |
4900 NEXT; | 5495 NEXT; |
4901 break; | 5496 break; |
4902 } | 5497 } |
(...skipping 26 matching lines...) Expand all Loading... |
4929 if (CUR == '>') { | 5524 if (CUR == '>') { |
4930 NEXT; | 5525 NEXT; |
4931 } else { | 5526 } else { |
4932 htmlParseErr(ctxt, XML_ERR_GT_REQUIRED, | 5527 htmlParseErr(ctxt, XML_ERR_GT_REQUIRED, |
4933 "Couldn't find end of Start Tag %s\n", | 5528 "Couldn't find end of Start Tag %s\n", |
4934 name, NULL); | 5529 name, NULL); |
4935 | 5530 |
4936 /* | 5531 /* |
4937 * end of parsing of this node. | 5532 * end of parsing of this node. |
4938 */ | 5533 */ |
4939 » » if (xmlStrEqual(name, ctxt->name)) { | 5534 » » if (xmlStrEqual(name, ctxt->name)) { |
4940 nodePop(ctxt); | 5535 nodePop(ctxt); |
4941 htmlnamePop(ctxt); | 5536 htmlnamePop(ctxt); |
4942 » » } | 5537 » » } |
4943 | 5538 |
4944 ctxt->instate = XML_PARSER_CONTENT; | 5539 ctxt->instate = XML_PARSER_CONTENT; |
4945 #ifdef DEBUG_PUSH | 5540 #ifdef DEBUG_PUSH |
4946 xmlGenericError(xmlGenericErrorContext, | 5541 xmlGenericError(xmlGenericErrorContext, |
4947 "HPP: entering CONTENT\n"); | 5542 "HPP: entering CONTENT\n"); |
4948 #endif | 5543 #endif |
4949 break; | 5544 break; |
4950 } | 5545 } |
4951 | 5546 |
4952 /* | 5547 /* |
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5007 cons = ctxt->nbChars; | 5602 cons = ctxt->nbChars; |
5008 if ((xmlStrEqual(ctxt->name, BAD_CAST"script")) || | 5603 if ((xmlStrEqual(ctxt->name, BAD_CAST"script")) || |
5009 (xmlStrEqual(ctxt->name, BAD_CAST"style"))) { | 5604 (xmlStrEqual(ctxt->name, BAD_CAST"style"))) { |
5010 /* | 5605 /* |
5011 * Handle SCRIPT/STYLE separately | 5606 * Handle SCRIPT/STYLE separately |
5012 */ | 5607 */ |
5013 if (!terminate) { | 5608 if (!terminate) { |
5014 int idx; | 5609 int idx; |
5015 xmlChar val; | 5610 xmlChar val; |
5016 | 5611 |
5017 » » » idx = htmlParseLookupSequence(ctxt, '<', '/', 0, 0); | 5612 » » » idx = htmlParseLookupSequence(ctxt, '<', '/', 0, 0, 1); |
5018 if (idx < 0) | 5613 if (idx < 0) |
5019 goto done; | 5614 goto done; |
5020 val = in->cur[idx + 2]; | 5615 val = in->cur[idx + 2]; |
5021 if (val == 0) /* bad cut of input */ | 5616 if (val == 0) /* bad cut of input */ |
5022 goto done; | 5617 goto done; |
5023 } | 5618 } |
5024 htmlParseScript(ctxt); | 5619 htmlParseScript(ctxt); |
5025 if ((cur == '<') && (next == '/')) { | 5620 if ((cur == '<') && (next == '/')) { |
5026 ctxt->instate = XML_PARSER_END_TAG; | 5621 ctxt->instate = XML_PARSER_END_TAG; |
5027 ctxt->checkIndex = 0; | 5622 ctxt->checkIndex = 0; |
5028 #ifdef DEBUG_PUSH | 5623 #ifdef DEBUG_PUSH |
5029 xmlGenericError(xmlGenericErrorContext, | 5624 xmlGenericError(xmlGenericErrorContext, |
5030 "HPP: entering END_TAG\n"); | 5625 "HPP: entering END_TAG\n"); |
5031 #endif | 5626 #endif |
5032 break; | 5627 break; |
5033 } | 5628 } |
5034 } else { | 5629 } else { |
5035 /* | 5630 /* |
5036 * Sometimes DOCTYPE arrives in the middle of the document | 5631 * Sometimes DOCTYPE arrives in the middle of the document |
5037 */ | 5632 */ |
5038 if ((cur == '<') && (next == '!') && | 5633 if ((cur == '<') && (next == '!') && |
5039 (UPP(2) == 'D') && (UPP(3) == 'O') && | 5634 (UPP(2) == 'D') && (UPP(3) == 'O') && |
5040 (UPP(4) == 'C') && (UPP(5) == 'T') && | 5635 (UPP(4) == 'C') && (UPP(5) == 'T') && |
5041 (UPP(6) == 'Y') && (UPP(7) == 'P') && | 5636 (UPP(6) == 'Y') && (UPP(7) == 'P') && |
5042 (UPP(8) == 'E')) { | 5637 (UPP(8) == 'E')) { |
5043 if ((!terminate) && | 5638 if ((!terminate) && |
5044 » » » (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0)) | 5639 » » » (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)
) |
5045 goto done; | 5640 goto done; |
5046 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR, | 5641 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR, |
5047 "Misplaced DOCTYPE declaration\n", | 5642 "Misplaced DOCTYPE declaration\n", |
5048 BAD_CAST "DOCTYPE" , NULL); | 5643 BAD_CAST "DOCTYPE" , NULL); |
5049 htmlParseDocTypeDecl(ctxt); | 5644 htmlParseDocTypeDecl(ctxt); |
5050 } else if ((cur == '<') && (next == '!') && | 5645 } else if ((cur == '<') && (next == '!') && |
5051 (in->cur[2] == '-') && (in->cur[3] == '-')) { | 5646 (in->cur[2] == '-') && (in->cur[3] == '-')) { |
5052 if ((!terminate) && | 5647 if ((!terminate) && |
5053 (htmlParseLookupSequence( | 5648 (htmlParseLookupSequence( |
5054 » » » » » ctxt, '-', '-', '>', 1) < 0)) | 5649 » » » » ctxt, '-', '-', '>', 1, 1) < 0)) |
5055 goto done; | 5650 goto done; |
5056 #ifdef DEBUG_PUSH | 5651 #ifdef DEBUG_PUSH |
5057 xmlGenericError(xmlGenericErrorContext, | 5652 xmlGenericError(xmlGenericErrorContext, |
5058 "HPP: Parsing Comment\n"); | 5653 "HPP: Parsing Comment\n"); |
5059 #endif | 5654 #endif |
5060 htmlParseComment(ctxt); | 5655 htmlParseComment(ctxt); |
5061 ctxt->instate = XML_PARSER_CONTENT; | 5656 ctxt->instate = XML_PARSER_CONTENT; |
5062 } else if ((cur == '<') && (next == '?')) { | 5657 } else if ((cur == '<') && (next == '?')) { |
5063 if ((!terminate) && | 5658 if ((!terminate) && |
5064 » » » (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0)) | 5659 » » » (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)
) |
5065 goto done; | 5660 goto done; |
5066 #ifdef DEBUG_PUSH | 5661 #ifdef DEBUG_PUSH |
5067 xmlGenericError(xmlGenericErrorContext, | 5662 xmlGenericError(xmlGenericErrorContext, |
5068 "HPP: Parsing PI\n"); | 5663 "HPP: Parsing PI\n"); |
5069 #endif | 5664 #endif |
5070 htmlParsePI(ctxt); | 5665 htmlParsePI(ctxt); |
5071 ctxt->instate = XML_PARSER_CONTENT; | 5666 ctxt->instate = XML_PARSER_CONTENT; |
5072 } else if ((cur == '<') && (next == '!') && (avail < 4)) { | 5667 } else if ((cur == '<') && (next == '!') && (avail < 4)) { |
5073 goto done; | 5668 goto done; |
5074 } else if ((cur == '<') && (next == '/')) { | 5669 } else if ((cur == '<') && (next == '/')) { |
5075 ctxt->instate = XML_PARSER_END_TAG; | 5670 ctxt->instate = XML_PARSER_END_TAG; |
5076 ctxt->checkIndex = 0; | 5671 ctxt->checkIndex = 0; |
5077 #ifdef DEBUG_PUSH | 5672 #ifdef DEBUG_PUSH |
5078 xmlGenericError(xmlGenericErrorContext, | 5673 xmlGenericError(xmlGenericErrorContext, |
5079 "HPP: entering END_TAG\n"); | 5674 "HPP: entering END_TAG\n"); |
5080 #endif | 5675 #endif |
5081 break; | 5676 break; |
5082 } else if (cur == '<') { | 5677 } else if (cur == '<') { |
5083 ctxt->instate = XML_PARSER_START_TAG; | 5678 ctxt->instate = XML_PARSER_START_TAG; |
5084 ctxt->checkIndex = 0; | 5679 ctxt->checkIndex = 0; |
5085 #ifdef DEBUG_PUSH | 5680 #ifdef DEBUG_PUSH |
5086 xmlGenericError(xmlGenericErrorContext, | 5681 xmlGenericError(xmlGenericErrorContext, |
5087 "HPP: entering START_TAG\n"); | 5682 "HPP: entering START_TAG\n"); |
5088 #endif | 5683 #endif |
5089 break; | 5684 break; |
5090 } else if (cur == '&') { | 5685 } else if (cur == '&') { |
5091 if ((!terminate) && | 5686 if ((!terminate) && |
5092 » » » (htmlParseLookupSequence(ctxt, ';', 0, 0, 0) < 0)) | 5687 » » » (htmlParseLookupChars(ctxt, |
| 5688 BAD_CAST "; >/", 4) < 0)) |
5093 goto done; | 5689 goto done; |
5094 #ifdef DEBUG_PUSH | 5690 #ifdef DEBUG_PUSH |
5095 xmlGenericError(xmlGenericErrorContext, | 5691 xmlGenericError(xmlGenericErrorContext, |
5096 "HPP: Parsing Reference\n"); | 5692 "HPP: Parsing Reference\n"); |
5097 #endif | 5693 #endif |
5098 /* TODO: check generation of subtrees if noent !!! */ | 5694 /* TODO: check generation of subtrees if noent !!! */ |
5099 htmlParseReference(ctxt); | 5695 htmlParseReference(ctxt); |
5100 } else { | 5696 } else { |
5101 /* | 5697 /* |
5102 * check that the text sequence is complete | 5698 * check that the text sequence is complete |
5103 * before handing out the data to the parser | 5699 * before handing out the data to the parser |
5104 * to avoid problems with erroneous end of | 5700 * to avoid problems with erroneous end of |
5105 * data detection. | 5701 * data detection. |
5106 */ | 5702 */ |
5107 if ((!terminate) && | 5703 if ((!terminate) && |
5108 » » » (htmlParseLookupSequence(ctxt, '<', 0, 0, 0) < 0)) | 5704 (htmlParseLookupChars(ctxt, BAD_CAST "<&", 2) < 0)) |
5109 goto done; | 5705 goto done; |
5110 ctxt->checkIndex = 0; | 5706 ctxt->checkIndex = 0; |
5111 #ifdef DEBUG_PUSH | 5707 #ifdef DEBUG_PUSH |
5112 xmlGenericError(xmlGenericErrorContext, | 5708 xmlGenericError(xmlGenericErrorContext, |
5113 "HPP: Parsing char data\n"); | 5709 "HPP: Parsing char data\n"); |
5114 #endif | 5710 #endif |
5115 htmlParseCharData(ctxt); | 5711 htmlParseCharData(ctxt); |
5116 } | 5712 } |
5117 } | 5713 } |
5118 if (cons == ctxt->nbChars) { | 5714 if (cons == ctxt->nbChars) { |
5119 if (ctxt->node != NULL) { | 5715 if (ctxt->node != NULL) { |
5120 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, | 5716 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, |
5121 "detected an error in element content\n", | 5717 "detected an error in element content\n", |
5122 NULL, NULL); | 5718 NULL, NULL); |
5123 } | 5719 } |
5124 NEXT; | 5720 NEXT; |
5125 break; | 5721 break; |
5126 } | 5722 } |
5127 | 5723 |
5128 break; | 5724 break; |
5129 } | 5725 } |
5130 case XML_PARSER_END_TAG: | 5726 case XML_PARSER_END_TAG: |
5131 if (avail < 2) | 5727 if (avail < 2) |
5132 goto done; | 5728 goto done; |
5133 if ((!terminate) && | 5729 if ((!terminate) && |
5134 » » (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0)) | 5730 » » (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)) |
5135 goto done; | 5731 goto done; |
5136 htmlParseEndTag(ctxt); | 5732 htmlParseEndTag(ctxt); |
5137 if (ctxt->nameNr == 0) { | 5733 if (ctxt->nameNr == 0) { |
5138 ctxt->instate = XML_PARSER_EPILOG; | 5734 ctxt->instate = XML_PARSER_EPILOG; |
5139 } else { | 5735 } else { |
5140 ctxt->instate = XML_PARSER_CONTENT; | 5736 ctxt->instate = XML_PARSER_CONTENT; |
5141 } | 5737 } |
5142 ctxt->checkIndex = 0; | 5738 ctxt->checkIndex = 0; |
5143 #ifdef DEBUG_PUSH | 5739 #ifdef DEBUG_PUSH |
5144 xmlGenericError(xmlGenericErrorContext, | 5740 xmlGenericError(xmlGenericErrorContext, |
(...skipping 106 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5251 ctxt->instate = XML_PARSER_CONTENT; | 5847 ctxt->instate = XML_PARSER_CONTENT; |
5252 ctxt->checkIndex = 0; | 5848 ctxt->checkIndex = 0; |
5253 #ifdef DEBUG_PUSH | 5849 #ifdef DEBUG_PUSH |
5254 xmlGenericError(xmlGenericErrorContext, | 5850 xmlGenericError(xmlGenericErrorContext, |
5255 "HPP: entering CONTENT\n"); | 5851 "HPP: entering CONTENT\n"); |
5256 #endif | 5852 #endif |
5257 break; | 5853 break; |
5258 | 5854 |
5259 } | 5855 } |
5260 } | 5856 } |
5261 done: | 5857 done: |
5262 if ((avail == 0) && (terminate)) { | 5858 if ((avail == 0) && (terminate)) { |
5263 htmlAutoCloseOnEnd(ctxt); | 5859 htmlAutoCloseOnEnd(ctxt); |
5264 » if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) { | 5860 » if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) { |
5265 /* | 5861 /* |
5266 * SAX: end of the document processing. | 5862 * SAX: end of the document processing. |
5267 */ | 5863 */ |
5268 ctxt->instate = XML_PARSER_EOF; | 5864 ctxt->instate = XML_PARSER_EOF; |
5269 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) | 5865 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) |
5270 ctxt->sax->endDocument(ctxt->userData); | 5866 ctxt->sax->endDocument(ctxt->userData); |
5271 } | 5867 } |
5272 } | 5868 } |
5273 if ((ctxt->myDoc != NULL) && | 5869 if ((ctxt->myDoc != NULL) && |
5274 ((terminate) || (ctxt->instate == XML_PARSER_EOF) || | 5870 ((terminate) || (ctxt->instate == XML_PARSER_EOF) || |
5275 (ctxt->instate == XML_PARSER_EPILOG))) { | 5871 (ctxt->instate == XML_PARSER_EPILOG))) { |
5276 xmlDtdPtr dtd; | 5872 xmlDtdPtr dtd; |
5277 dtd = xmlGetIntSubset(ctxt->myDoc); | 5873 dtd = xmlGetIntSubset(ctxt->myDoc); |
5278 if (dtd == NULL) | 5874 if (dtd == NULL) |
5279 » ctxt->myDoc->intSubset = | 5875 » ctxt->myDoc->intSubset = |
5280 » » xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html", | 5876 » » xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html", |
5281 BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN", | 5877 BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN", |
5282 BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd"); | 5878 BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd"); |
5283 } | 5879 } |
5284 #ifdef DEBUG_PUSH | 5880 #ifdef DEBUG_PUSH |
5285 xmlGenericError(xmlGenericErrorContext, "HPP: done %d\n", ret); | 5881 xmlGenericError(xmlGenericErrorContext, "HPP: done %d\n", ret); |
5286 #endif | 5882 #endif |
5287 return(ret); | 5883 return(ret); |
5288 } | 5884 } |
5289 | 5885 |
5290 /** | 5886 /** |
(...skipping 13 matching lines...) Expand all Loading... |
5304 if ((ctxt == NULL) || (ctxt->input == NULL)) { | 5900 if ((ctxt == NULL) || (ctxt->input == NULL)) { |
5305 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, | 5901 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, |
5306 "htmlParseChunk: context error\n", NULL, NULL); | 5902 "htmlParseChunk: context error\n", NULL, NULL); |
5307 return(XML_ERR_INTERNAL_ERROR); | 5903 return(XML_ERR_INTERNAL_ERROR); |
5308 } | 5904 } |
5309 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && | 5905 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && |
5310 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { | 5906 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { |
5311 int base = ctxt->input->base - ctxt->input->buf->buffer->content; | 5907 int base = ctxt->input->base - ctxt->input->buf->buffer->content; |
5312 int cur = ctxt->input->cur - ctxt->input->base; | 5908 int cur = ctxt->input->cur - ctxt->input->base; |
5313 int res; | 5909 int res; |
5314 » | 5910 |
5315 » res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);» | 5911 » res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk); |
5316 if (res < 0) { | 5912 if (res < 0) { |
5317 ctxt->errNo = XML_PARSER_EOF; | 5913 ctxt->errNo = XML_PARSER_EOF; |
5318 ctxt->disableSAX = 1; | 5914 ctxt->disableSAX = 1; |
5319 return (XML_PARSER_EOF); | 5915 return (XML_PARSER_EOF); |
5320 } | 5916 } |
5321 ctxt->input->base = ctxt->input->buf->buffer->content + base; | 5917 ctxt->input->base = ctxt->input->buf->buffer->content + base; |
5322 ctxt->input->cur = ctxt->input->base + cur; | 5918 ctxt->input->cur = ctxt->input->base + cur; |
5323 ctxt->input->end = | 5919 ctxt->input->end = |
5324 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; | 5920 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; |
5325 #ifdef DEBUG_PUSH | 5921 #ifdef DEBUG_PUSH |
5326 xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size); | 5922 xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size); |
5327 #endif | 5923 #endif |
5328 | 5924 |
5329 #if 0 | 5925 #if 0 |
5330 if ((terminate) || (ctxt->input->buf->buffer->use > 80)) | 5926 if ((terminate) || (ctxt->input->buf->buffer->use > 80)) |
5331 htmlParseTryOrFinish(ctxt, terminate); | 5927 htmlParseTryOrFinish(ctxt, terminate); |
5332 #endif | 5928 #endif |
5333 } else if (ctxt->instate != XML_PARSER_EOF) { | 5929 } else if (ctxt->instate != XML_PARSER_EOF) { |
5334 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) { | 5930 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) { |
5335 xmlParserInputBufferPtr in = ctxt->input->buf; | 5931 xmlParserInputBufferPtr in = ctxt->input->buf; |
5336 if ((in->encoder != NULL) && (in->buffer != NULL) && | 5932 if ((in->encoder != NULL) && (in->buffer != NULL) && |
5337 (in->raw != NULL)) { | 5933 (in->raw != NULL)) { |
5338 int nbchars; | 5934 int nbchars; |
5339 » » | 5935 |
5340 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw); | 5936 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw); |
5341 if (nbchars < 0) { | 5937 if (nbchars < 0) { |
5342 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, | 5938 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, |
5343 "encoder error\n", NULL, NULL); | 5939 "encoder error\n", NULL, NULL); |
5344 return(XML_ERR_INVALID_ENCODING); | 5940 return(XML_ERR_INVALID_ENCODING); |
5345 } | 5941 } |
5346 } | 5942 } |
5347 } | 5943 } |
5348 } | 5944 } |
5349 htmlParseTryOrFinish(ctxt, terminate); | 5945 htmlParseTryOrFinish(ctxt, terminate); |
5350 if (terminate) { | 5946 if (terminate) { |
5351 if ((ctxt->instate != XML_PARSER_EOF) && | 5947 if ((ctxt->instate != XML_PARSER_EOF) && |
5352 (ctxt->instate != XML_PARSER_EPILOG) && | 5948 (ctxt->instate != XML_PARSER_EPILOG) && |
5353 (ctxt->instate != XML_PARSER_MISC)) { | 5949 (ctxt->instate != XML_PARSER_MISC)) { |
5354 ctxt->errNo = XML_ERR_DOCUMENT_END; | 5950 ctxt->errNo = XML_ERR_DOCUMENT_END; |
5355 ctxt->wellFormed = 0; | 5951 ctxt->wellFormed = 0; |
5356 » } | 5952 » } |
5357 if (ctxt->instate != XML_PARSER_EOF) { | 5953 if (ctxt->instate != XML_PARSER_EOF) { |
5358 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) | 5954 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) |
5359 ctxt->sax->endDocument(ctxt->userData); | 5955 ctxt->sax->endDocument(ctxt->userData); |
5360 } | 5956 } |
5361 ctxt->instate = XML_PARSER_EOF; | 5957 ctxt->instate = XML_PARSER_EOF; |
5362 } | 5958 } |
5363 return((xmlParserErrors) ctxt->errNo);» | 5959 return((xmlParserErrors) ctxt->errNo); |
5364 } | 5960 } |
5365 | 5961 |
5366 /************************************************************************ | 5962 /************************************************************************ |
5367 * * | 5963 * * |
5368 * User entry points * | 5964 * User entry points * |
5369 * * | 5965 * * |
5370 ************************************************************************/ | 5966 ************************************************************************/ |
5371 | 5967 |
5372 /** | 5968 /** |
5373 * htmlCreatePushParserCtxt: | 5969 * htmlCreatePushParserCtxt: |
5374 * @sax: a SAX handler | 5970 * @sax: a SAX handler |
5375 * @user_data: The user data returned on SAX callbacks | 5971 * @user_data: The user data returned on SAX callbacks |
5376 * @chunk: a pointer to an array of chars | 5972 * @chunk: a pointer to an array of chars |
5377 * @size: number of chars in the array | 5973 * @size: number of chars in the array |
5378 * @filename: an optional file name or URI | 5974 * @filename: an optional file name or URI |
5379 * @enc: an optional encoding | 5975 * @enc: an optional encoding |
5380 * | 5976 * |
5381 * Create a parser context for using the HTML parser in push mode | 5977 * Create a parser context for using the HTML parser in push mode |
5382 * The value of @filename is used for fetching external entities | 5978 * The value of @filename is used for fetching external entities |
5383 * and error/warning reports. | 5979 * and error/warning reports. |
5384 * | 5980 * |
5385 * Returns the new parser context or NULL | 5981 * Returns the new parser context or NULL |
5386 */ | 5982 */ |
5387 htmlParserCtxtPtr | 5983 htmlParserCtxtPtr |
5388 htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax, void *user_data, | 5984 htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax, void *user_data, |
5389 const char *chunk, int size, const char *filename, | 5985 const char *chunk, int size, const char *filename, |
5390 xmlCharEncoding enc) { | 5986 xmlCharEncoding enc) { |
5391 htmlParserCtxtPtr ctxt; | 5987 htmlParserCtxtPtr ctxt; |
5392 htmlParserInputPtr inputStream; | 5988 htmlParserInputPtr inputStream; |
5393 xmlParserInputBufferPtr buf; | 5989 xmlParserInputBufferPtr buf; |
5394 | 5990 |
5395 xmlInitParser(); | 5991 xmlInitParser(); |
5396 | 5992 |
5397 buf = xmlAllocParserInputBuffer(enc); | 5993 buf = xmlAllocParserInputBuffer(enc); |
5398 if (buf == NULL) return(NULL); | 5994 if (buf == NULL) return(NULL); |
(...skipping 10 matching lines...) Expand all Loading... |
5409 xmlFree(ctxt->sax); | 6005 xmlFree(ctxt->sax); |
5410 ctxt->sax = (htmlSAXHandlerPtr) xmlMalloc(sizeof(htmlSAXHandler)); | 6006 ctxt->sax = (htmlSAXHandlerPtr) xmlMalloc(sizeof(htmlSAXHandler)); |
5411 if (ctxt->sax == NULL) { | 6007 if (ctxt->sax == NULL) { |
5412 xmlFree(buf); | 6008 xmlFree(buf); |
5413 xmlFree(ctxt); | 6009 xmlFree(ctxt); |
5414 return(NULL); | 6010 return(NULL); |
5415 } | 6011 } |
5416 memcpy(ctxt->sax, sax, sizeof(htmlSAXHandler)); | 6012 memcpy(ctxt->sax, sax, sizeof(htmlSAXHandler)); |
5417 if (user_data != NULL) | 6013 if (user_data != NULL) |
5418 ctxt->userData = user_data; | 6014 ctxt->userData = user_data; |
5419 }» | 6015 } |
5420 if (filename == NULL) { | 6016 if (filename == NULL) { |
5421 ctxt->directory = NULL; | 6017 ctxt->directory = NULL; |
5422 } else { | 6018 } else { |
5423 ctxt->directory = xmlParserGetDirectory(filename); | 6019 ctxt->directory = xmlParserGetDirectory(filename); |
5424 } | 6020 } |
5425 | 6021 |
5426 inputStream = htmlNewInputStream(ctxt); | 6022 inputStream = htmlNewInputStream(ctxt); |
5427 if (inputStream == NULL) { | 6023 if (inputStream == NULL) { |
5428 xmlFreeParserCtxt(ctxt); | 6024 xmlFreeParserCtxt(ctxt); |
5429 xmlFree(buf); | 6025 xmlFree(buf); |
5430 return(NULL); | 6026 return(NULL); |
5431 } | 6027 } |
5432 | 6028 |
5433 if (filename == NULL) | 6029 if (filename == NULL) |
5434 inputStream->filename = NULL; | 6030 inputStream->filename = NULL; |
5435 else | 6031 else |
5436 inputStream->filename = (char *) | 6032 inputStream->filename = (char *) |
5437 xmlCanonicPath((const xmlChar *) filename); | 6033 xmlCanonicPath((const xmlChar *) filename); |
5438 inputStream->buf = buf; | 6034 inputStream->buf = buf; |
5439 inputStream->base = inputStream->buf->buffer->content; | 6035 inputStream->base = inputStream->buf->buffer->content; |
5440 inputStream->cur = inputStream->buf->buffer->content; | 6036 inputStream->cur = inputStream->buf->buffer->content; |
5441 inputStream->end = | 6037 inputStream->end = |
5442 &inputStream->buf->buffer->content[inputStream->buf->buffer->use]; | 6038 &inputStream->buf->buffer->content[inputStream->buf->buffer->use]; |
5443 | 6039 |
5444 inputPush(ctxt, inputStream); | 6040 inputPush(ctxt, inputStream); |
5445 | 6041 |
5446 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && | 6042 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && |
5447 (ctxt->input->buf != NULL)) {» | 6043 (ctxt->input->buf != NULL)) { |
5448 int base = ctxt->input->base - ctxt->input->buf->buffer->content; | 6044 int base = ctxt->input->base - ctxt->input->buf->buffer->content; |
5449 int cur = ctxt->input->cur - ctxt->input->base; | 6045 int cur = ctxt->input->cur - ctxt->input->base; |
5450 | 6046 |
5451 » xmlParserInputBufferPush(ctxt->input->buf, size, chunk);» | 6047 » xmlParserInputBufferPush(ctxt->input->buf, size, chunk); |
5452 | 6048 |
5453 ctxt->input->base = ctxt->input->buf->buffer->content + base; | 6049 ctxt->input->base = ctxt->input->buf->buffer->content + base; |
5454 ctxt->input->cur = ctxt->input->base + cur; | 6050 ctxt->input->cur = ctxt->input->base + cur; |
5455 ctxt->input->end = | 6051 ctxt->input->end = |
5456 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; | 6052 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; |
5457 #ifdef DEBUG_PUSH | 6053 #ifdef DEBUG_PUSH |
5458 xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size); | 6054 xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size); |
5459 #endif | 6055 #endif |
5460 } | 6056 } |
5461 ctxt->progressive = 1; | 6057 ctxt->progressive = 1; |
5462 | 6058 |
5463 return(ctxt); | 6059 return(ctxt); |
5464 } | 6060 } |
5465 #endif /* LIBXML_PUSH_ENABLED */ | 6061 #endif /* LIBXML_PUSH_ENABLED */ |
5466 | 6062 |
5467 /** | 6063 /** |
5468 * htmlSAXParseDoc: | 6064 * htmlSAXParseDoc: |
5469 * @cur: a pointer to an array of xmlChar | 6065 * @cur: a pointer to an array of xmlChar |
5470 * @encoding: a free form C string describing the HTML document encoding, or NU
LL | 6066 * @encoding: a free form C string describing the HTML document encoding, or NU
LL |
5471 * @sax: the SAX handler block | 6067 * @sax: the SAX handler block |
5472 * @userData: if using SAX, this pointer will be provided on callbacks. | 6068 * @userData: if using SAX, this pointer will be provided on callbacks. |
5473 * | 6069 * |
5474 * Parse an HTML in-memory document. If sax is not NULL, use the SAX callbacks | 6070 * Parse an HTML in-memory document. If sax is not NULL, use the SAX callbacks |
5475 * to handle parse events. If sax is NULL, fallback to the default DOM | 6071 * to handle parse events. If sax is NULL, fallback to the default DOM |
5476 * behavior and return a tree. | 6072 * behavior and return a tree. |
5477 * | 6073 * |
5478 * Returns the resulting document tree unless SAX is NULL or the document is | 6074 * Returns the resulting document tree unless SAX is NULL or the document is |
5479 * not well formed. | 6075 * not well formed. |
5480 */ | 6076 */ |
5481 | 6077 |
5482 htmlDocPtr | 6078 htmlDocPtr |
5483 htmlSAXParseDoc(xmlChar *cur, const char *encoding, htmlSAXHandlerPtr sax, void
*userData) { | 6079 htmlSAXParseDoc(xmlChar *cur, const char *encoding, htmlSAXHandlerPtr sax, void
*userData) { |
5484 htmlDocPtr ret; | 6080 htmlDocPtr ret; |
5485 htmlParserCtxtPtr ctxt; | 6081 htmlParserCtxtPtr ctxt; |
5486 | 6082 |
5487 xmlInitParser(); | 6083 xmlInitParser(); |
5488 | 6084 |
5489 if (cur == NULL) return(NULL); | 6085 if (cur == NULL) return(NULL); |
5490 | 6086 |
5491 | 6087 |
5492 ctxt = htmlCreateDocParserCtxt(cur, encoding); | 6088 ctxt = htmlCreateDocParserCtxt(cur, encoding); |
5493 if (ctxt == NULL) return(NULL); | 6089 if (ctxt == NULL) return(NULL); |
5494 if (sax != NULL) { | 6090 if (sax != NULL) { |
5495 if (ctxt->sax != NULL) xmlFree (ctxt->sax); | 6091 if (ctxt->sax != NULL) xmlFree (ctxt->sax); |
5496 ctxt->sax = sax; | 6092 ctxt->sax = sax; |
5497 ctxt->userData = userData; | 6093 ctxt->userData = userData; |
5498 } | 6094 } |
5499 | 6095 |
5500 htmlParseDocument(ctxt); | 6096 htmlParseDocument(ctxt); |
5501 ret = ctxt->myDoc; | 6097 ret = ctxt->myDoc; |
5502 if (sax != NULL) { | 6098 if (sax != NULL) { |
5503 ctxt->sax = NULL; | 6099 ctxt->sax = NULL; |
5504 ctxt->userData = NULL; | 6100 ctxt->userData = NULL; |
5505 } | 6101 } |
5506 htmlFreeParserCtxt(ctxt); | 6102 htmlFreeParserCtxt(ctxt); |
5507 | 6103 |
5508 return(ret); | 6104 return(ret); |
5509 } | 6105 } |
5510 | 6106 |
5511 /** | 6107 /** |
5512 * htmlParseDoc: | 6108 * htmlParseDoc: |
5513 * @cur: a pointer to an array of xmlChar | 6109 * @cur: a pointer to an array of xmlChar |
5514 * @encoding: a free form C string describing the HTML document encoding, or NU
LL | 6110 * @encoding: a free form C string describing the HTML document encoding, or NU
LL |
5515 * | 6111 * |
5516 * parse an HTML in-memory document and build a tree. | 6112 * parse an HTML in-memory document and build a tree. |
5517 * | 6113 * |
5518 * Returns the resulting document tree | 6114 * Returns the resulting document tree |
5519 */ | 6115 */ |
5520 | 6116 |
5521 htmlDocPtr | 6117 htmlDocPtr |
5522 htmlParseDoc(xmlChar *cur, const char *encoding) { | 6118 htmlParseDoc(xmlChar *cur, const char *encoding) { |
5523 return(htmlSAXParseDoc(cur, encoding, NULL, NULL)); | 6119 return(htmlSAXParseDoc(cur, encoding, NULL, NULL)); |
5524 } | 6120 } |
5525 | 6121 |
5526 | 6122 |
5527 /** | 6123 /** |
5528 * htmlCreateFileParserCtxt: | 6124 * htmlCreateFileParserCtxt: |
5529 * @filename: the filename | 6125 * @filename: the filename |
5530 * @encoding: a free form C string describing the HTML document encoding, or NU
LL | 6126 * @encoding: a free form C string describing the HTML document encoding, or NU
LL |
5531 * | 6127 * |
5532 * Create a parser context for a file content. | 6128 * Create a parser context for a file content. |
5533 * Automatic support for ZLIB/Compress compressed document is provided | 6129 * Automatic support for ZLIB/Compress compressed document is provided |
5534 * by default if found at compile-time. | 6130 * by default if found at compile-time. |
5535 * | 6131 * |
5536 * Returns the new parser context or NULL | 6132 * Returns the new parser context or NULL |
5537 */ | 6133 */ |
5538 htmlParserCtxtPtr | 6134 htmlParserCtxtPtr |
5539 htmlCreateFileParserCtxt(const char *filename, const char *encoding) | 6135 htmlCreateFileParserCtxt(const char *filename, const char *encoding) |
5540 { | 6136 { |
5541 htmlParserCtxtPtr ctxt; | 6137 htmlParserCtxtPtr ctxt; |
5542 htmlParserInputPtr inputStream; | 6138 htmlParserInputPtr inputStream; |
(...skipping 11 matching lines...) Expand all Loading... |
5554 canonicFilename = (char *) xmlCanonicPath((const xmlChar *) filename); | 6150 canonicFilename = (char *) xmlCanonicPath((const xmlChar *) filename); |
5555 if (canonicFilename == NULL) { | 6151 if (canonicFilename == NULL) { |
5556 #ifdef LIBXML_SAX1_ENABLED | 6152 #ifdef LIBXML_SAX1_ENABLED |
5557 if (xmlDefaultSAXHandler.error != NULL) { | 6153 if (xmlDefaultSAXHandler.error != NULL) { |
5558 xmlDefaultSAXHandler.error(NULL, "out of memory\n"); | 6154 xmlDefaultSAXHandler.error(NULL, "out of memory\n"); |
5559 } | 6155 } |
5560 #endif | 6156 #endif |
5561 xmlFreeParserCtxt(ctxt); | 6157 xmlFreeParserCtxt(ctxt); |
5562 return(NULL); | 6158 return(NULL); |
5563 } | 6159 } |
5564 | 6160 |
5565 inputStream = xmlLoadExternalEntity(canonicFilename, NULL, ctxt); | 6161 inputStream = xmlLoadExternalEntity(canonicFilename, NULL, ctxt); |
5566 xmlFree(canonicFilename); | 6162 xmlFree(canonicFilename); |
5567 if (inputStream == NULL) { | 6163 if (inputStream == NULL) { |
5568 xmlFreeParserCtxt(ctxt); | 6164 xmlFreeParserCtxt(ctxt); |
5569 return(NULL); | 6165 return(NULL); |
5570 } | 6166 } |
5571 | 6167 |
5572 inputPush(ctxt, inputStream); | 6168 inputPush(ctxt, inputStream); |
5573 | 6169 |
5574 /* set encoding */ | 6170 /* set encoding */ |
5575 if (encoding) { | 6171 if (encoding) { |
5576 content = xmlMallocAtomic (xmlStrlen(content_line) + strlen(encoding) +
1); | 6172 content = xmlMallocAtomic (xmlStrlen(content_line) + strlen(encoding) +
1); |
5577 » if (content) { | 6173 » if (content) { |
5578 strcpy ((char *)content, (char *)content_line); | 6174 strcpy ((char *)content, (char *)content_line); |
5579 strcat ((char *)content, (char *)encoding); | 6175 strcat ((char *)content, (char *)encoding); |
5580 htmlCheckEncoding (ctxt, content); | 6176 htmlCheckEncoding (ctxt, content); |
5581 xmlFree (content); | 6177 xmlFree (content); |
5582 } | 6178 } |
5583 } | 6179 } |
5584 | 6180 |
5585 return(ctxt); | 6181 return(ctxt); |
5586 } | 6182 } |
5587 | 6183 |
5588 /** | 6184 /** |
5589 * htmlSAXParseFile: | 6185 * htmlSAXParseFile: |
5590 * @filename: the filename | 6186 * @filename: the filename |
5591 * @encoding: a free form C string describing the HTML document encoding, or NU
LL | 6187 * @encoding: a free form C string describing the HTML document encoding, or NU
LL |
5592 * @sax: the SAX handler block | 6188 * @sax: the SAX handler block |
5593 * @userData: if using SAX, this pointer will be provided on callbacks. | 6189 * @userData: if using SAX, this pointer will be provided on callbacks. |
5594 * | 6190 * |
5595 * parse an HTML file and build a tree. Automatic support for ZLIB/Compress | 6191 * parse an HTML file and build a tree. Automatic support for ZLIB/Compress |
5596 * compressed document is provided by default if found at compile-time. | 6192 * compressed document is provided by default if found at compile-time. |
5597 * It use the given SAX function block to handle the parsing callback. | 6193 * It use the given SAX function block to handle the parsing callback. |
5598 * If sax is NULL, fallback to the default DOM tree building routines. | 6194 * If sax is NULL, fallback to the default DOM tree building routines. |
5599 * | 6195 * |
5600 * Returns the resulting document tree unless SAX is NULL or the document is | 6196 * Returns the resulting document tree unless SAX is NULL or the document is |
5601 * not well formed. | 6197 * not well formed. |
5602 */ | 6198 */ |
5603 | 6199 |
5604 htmlDocPtr | 6200 htmlDocPtr |
5605 htmlSAXParseFile(const char *filename, const char *encoding, htmlSAXHandlerPtr s
ax, | 6201 htmlSAXParseFile(const char *filename, const char *encoding, htmlSAXHandlerPtr s
ax, |
5606 void *userData) { | 6202 void *userData) { |
5607 htmlDocPtr ret; | 6203 htmlDocPtr ret; |
5608 htmlParserCtxtPtr ctxt; | 6204 htmlParserCtxtPtr ctxt; |
5609 htmlSAXHandlerPtr oldsax = NULL; | 6205 htmlSAXHandlerPtr oldsax = NULL; |
5610 | 6206 |
5611 xmlInitParser(); | 6207 xmlInitParser(); |
5612 | 6208 |
5613 ctxt = htmlCreateFileParserCtxt(filename, encoding); | 6209 ctxt = htmlCreateFileParserCtxt(filename, encoding); |
5614 if (ctxt == NULL) return(NULL); | 6210 if (ctxt == NULL) return(NULL); |
5615 if (sax != NULL) { | 6211 if (sax != NULL) { |
5616 oldsax = ctxt->sax; | 6212 oldsax = ctxt->sax; |
5617 ctxt->sax = sax; | 6213 ctxt->sax = sax; |
5618 ctxt->userData = userData; | 6214 ctxt->userData = userData; |
5619 } | 6215 } |
5620 | 6216 |
5621 htmlParseDocument(ctxt); | 6217 htmlParseDocument(ctxt); |
5622 | 6218 |
5623 ret = ctxt->myDoc; | 6219 ret = ctxt->myDoc; |
5624 if (sax != NULL) { | 6220 if (sax != NULL) { |
5625 ctxt->sax = oldsax; | 6221 ctxt->sax = oldsax; |
5626 ctxt->userData = NULL; | 6222 ctxt->userData = NULL; |
5627 } | 6223 } |
5628 htmlFreeParserCtxt(ctxt); | 6224 htmlFreeParserCtxt(ctxt); |
5629 | 6225 |
5630 return(ret); | 6226 return(ret); |
5631 } | 6227 } |
5632 | 6228 |
5633 /** | 6229 /** |
5634 * htmlParseFile: | 6230 * htmlParseFile: |
5635 * @filename: the filename | 6231 * @filename: the filename |
5636 * @encoding: a free form C string describing the HTML document encoding, or NU
LL | 6232 * @encoding: a free form C string describing the HTML document encoding, or NU
LL |
5637 * | 6233 * |
5638 * parse an HTML file and build a tree. Automatic support for ZLIB/Compress | 6234 * parse an HTML file and build a tree. Automatic support for ZLIB/Compress |
5639 * compressed document is provided by default if found at compile-time. | 6235 * compressed document is provided by default if found at compile-time. |
5640 * | 6236 * |
5641 * Returns the resulting document tree | 6237 * Returns the resulting document tree |
5642 */ | 6238 */ |
5643 | 6239 |
5644 htmlDocPtr | 6240 htmlDocPtr |
5645 htmlParseFile(const char *filename, const char *encoding) { | 6241 htmlParseFile(const char *filename, const char *encoding) { |
5646 return(htmlSAXParseFile(filename, encoding, NULL, NULL)); | 6242 return(htmlSAXParseFile(filename, encoding, NULL, NULL)); |
5647 } | 6243 } |
5648 | 6244 |
5649 /** | 6245 /** |
5650 * htmlHandleOmittedElem: | 6246 * htmlHandleOmittedElem: |
5651 * @val: int 0 or 1 | 6247 * @val: int 0 or 1 |
5652 * | 6248 * |
5653 * Set and return the previous value for handling HTML omitted tags. | 6249 * Set and return the previous value for handling HTML omitted tags. |
5654 * | 6250 * |
5655 * Returns the last value for 0 for no handling, 1 for auto insertion. | 6251 * Returns the last value for 0 for no handling, 1 for auto insertion. |
5656 */ | 6252 */ |
5657 | 6253 |
5658 int | 6254 int |
5659 htmlHandleOmittedElem(int val) { | 6255 htmlHandleOmittedElem(int val) { |
5660 int old = htmlOmittedDefaultValue; | 6256 int old = htmlOmittedDefaultValue; |
5661 | 6257 |
(...skipping 119 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5781 * * | 6377 * * |
5782 ************************************************************************/ | 6378 ************************************************************************/ |
5783 /** | 6379 /** |
5784 * DICT_FREE: | 6380 * DICT_FREE: |
5785 * @str: a string | 6381 * @str: a string |
5786 * | 6382 * |
5787 * Free a string if it is not owned by the "dict" dictionnary in the | 6383 * Free a string if it is not owned by the "dict" dictionnary in the |
5788 * current scope | 6384 * current scope |
5789 */ | 6385 */ |
5790 #define DICT_FREE(str) \ | 6386 #define DICT_FREE(str) \ |
5791 » if ((str) && ((!dict) || » » » » \ | 6387 » if ((str) && ((!dict) ||» » » » \ |
5792 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \ | 6388 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \ |
5793 xmlFree((char *)(str)); | 6389 xmlFree((char *)(str)); |
5794 | 6390 |
5795 /** | 6391 /** |
5796 * htmlCtxtReset: | 6392 * htmlCtxtReset: |
5797 * @ctxt: an HTML parser context | 6393 * @ctxt: an HTML parser context |
5798 * | 6394 * |
5799 * Reset a parser context | 6395 * Reset a parser context |
5800 */ | 6396 */ |
5801 void | 6397 void |
5802 htmlCtxtReset(htmlParserCtxtPtr ctxt) | 6398 htmlCtxtReset(htmlParserCtxtPtr ctxt) |
5803 { | 6399 { |
5804 xmlParserInputPtr input; | 6400 xmlParserInputPtr input; |
5805 xmlDictPtr dict; | 6401 xmlDictPtr dict; |
5806 | 6402 |
5807 if (ctxt == NULL) | 6403 if (ctxt == NULL) |
5808 return; | 6404 return; |
5809 | 6405 |
5810 xmlInitParser(); | 6406 xmlInitParser(); |
5811 dict = ctxt->dict; | 6407 dict = ctxt->dict; |
5812 | 6408 |
5813 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ | 6409 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ |
5814 xmlFreeInputStream(input); | 6410 xmlFreeInputStream(input); |
5815 } | 6411 } |
5816 ctxt->inputNr = 0; | 6412 ctxt->inputNr = 0; |
(...skipping 106 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5923 ctxt->keepBlanks = 1; | 6519 ctxt->keepBlanks = 1; |
5924 if (options & HTML_PARSE_RECOVER) { | 6520 if (options & HTML_PARSE_RECOVER) { |
5925 ctxt->recovery = 1; | 6521 ctxt->recovery = 1; |
5926 options -= HTML_PARSE_RECOVER; | 6522 options -= HTML_PARSE_RECOVER; |
5927 } else | 6523 } else |
5928 ctxt->recovery = 0; | 6524 ctxt->recovery = 0; |
5929 if (options & HTML_PARSE_COMPACT) { | 6525 if (options & HTML_PARSE_COMPACT) { |
5930 ctxt->options |= HTML_PARSE_COMPACT; | 6526 ctxt->options |= HTML_PARSE_COMPACT; |
5931 options -= HTML_PARSE_COMPACT; | 6527 options -= HTML_PARSE_COMPACT; |
5932 } | 6528 } |
| 6529 if (options & XML_PARSE_HUGE) { |
| 6530 ctxt->options |= XML_PARSE_HUGE; |
| 6531 options -= XML_PARSE_HUGE; |
| 6532 } |
5933 ctxt->dictNames = 0; | 6533 ctxt->dictNames = 0; |
5934 return (options); | 6534 return (options); |
5935 } | 6535 } |
5936 | 6536 |
5937 /** | 6537 /** |
5938 * htmlDoRead: | 6538 * htmlDoRead: |
5939 * @ctxt: an HTML parser context | 6539 * @ctxt: an HTML parser context |
5940 * @URL: the base URL to use for the document | 6540 * @URL: the base URL to use for the document |
5941 * @encoding: the document encoding, or NULL | 6541 * @encoding: the document encoding, or NULL |
5942 * @options: a combination of htmlParserOption(s) | 6542 * @options: a combination of htmlParserOption(s) |
5943 * @reuse: keep the context for reuse | 6543 * @reuse: keep the context for reuse |
5944 * | 6544 * |
5945 * Common front-end for the htmlRead functions | 6545 * Common front-end for the htmlRead functions |
5946 * | 6546 * |
5947 * Returns the resulting document tree or NULL | 6547 * Returns the resulting document tree or NULL |
5948 */ | 6548 */ |
5949 static htmlDocPtr | 6549 static htmlDocPtr |
5950 htmlDoRead(htmlParserCtxtPtr ctxt, const char *URL, const char *encoding, | 6550 htmlDoRead(htmlParserCtxtPtr ctxt, const char *URL, const char *encoding, |
5951 int options, int reuse) | 6551 int options, int reuse) |
5952 { | 6552 { |
5953 htmlDocPtr ret; | 6553 htmlDocPtr ret; |
5954 | 6554 |
5955 htmlCtxtUseOptions(ctxt, options); | 6555 htmlCtxtUseOptions(ctxt, options); |
5956 ctxt->html = 1; | 6556 ctxt->html = 1; |
5957 if (encoding != NULL) { | 6557 if (encoding != NULL) { |
5958 xmlCharEncodingHandlerPtr hdlr; | 6558 xmlCharEncodingHandlerPtr hdlr; |
5959 | 6559 |
5960 hdlr = xmlFindCharEncodingHandler(encoding); | 6560 hdlr = xmlFindCharEncodingHandler(encoding); |
5961 » if (hdlr != NULL) | 6561 » if (hdlr != NULL) { |
5962 xmlSwitchToEncoding(ctxt, hdlr); | 6562 xmlSwitchToEncoding(ctxt, hdlr); |
| 6563 if (ctxt->input->encoding != NULL) |
| 6564 xmlFree((xmlChar *) ctxt->input->encoding); |
| 6565 ctxt->input->encoding = xmlStrdup((xmlChar *)encoding); |
| 6566 } |
5963 } | 6567 } |
5964 if ((URL != NULL) && (ctxt->input != NULL) && | 6568 if ((URL != NULL) && (ctxt->input != NULL) && |
5965 (ctxt->input->filename == NULL)) | 6569 (ctxt->input->filename == NULL)) |
5966 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL); | 6570 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL); |
5967 htmlParseDocument(ctxt); | 6571 htmlParseDocument(ctxt); |
5968 ret = ctxt->myDoc; | 6572 ret = ctxt->myDoc; |
5969 ctxt->myDoc = NULL; | 6573 ctxt->myDoc = NULL; |
5970 if (!reuse) { | 6574 if (!reuse) { |
5971 if ((ctxt->dictNames) && | 6575 if ((ctxt->dictNames) && |
5972 (ret != NULL) && | 6576 (ret != NULL) && |
5973 (ret->dict == ctxt->dict)) | 6577 (ret->dict == ctxt->dict)) |
5974 ctxt->dict = NULL; | 6578 ctxt->dict = NULL; |
5975 xmlFreeParserCtxt(ctxt); | 6579 xmlFreeParserCtxt(ctxt); |
5976 } | 6580 } |
5977 return (ret); | 6581 return (ret); |
5978 } | 6582 } |
5979 | 6583 |
5980 /** | 6584 /** |
5981 * htmlReadDoc: | 6585 * htmlReadDoc: |
5982 * @cur: a pointer to a zero terminated string | 6586 * @cur: a pointer to a zero terminated string |
5983 * @URL: the base URL to use for the document | 6587 * @URL: the base URL to use for the document |
5984 * @encoding: the document encoding, or NULL | 6588 * @encoding: the document encoding, or NULL |
5985 * @options: a combination of htmlParserOption(s) | 6589 * @options: a combination of htmlParserOption(s) |
5986 * | 6590 * |
5987 * parse an XML in-memory document and build a tree. | 6591 * parse an XML in-memory document and build a tree. |
5988 * | 6592 * |
5989 * Returns the resulting document tree | 6593 * Returns the resulting document tree |
5990 */ | 6594 */ |
5991 htmlDocPtr | 6595 htmlDocPtr |
5992 htmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int opti
ons) | 6596 htmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int opti
ons) |
5993 { | 6597 { |
5994 htmlParserCtxtPtr ctxt; | 6598 htmlParserCtxtPtr ctxt; |
5995 | 6599 |
5996 if (cur == NULL) | 6600 if (cur == NULL) |
5997 return (NULL); | 6601 return (NULL); |
5998 | 6602 |
5999 xmlInitParser(); | 6603 xmlInitParser(); |
6000 ctxt = htmlCreateDocParserCtxt(cur, NULL); | 6604 ctxt = htmlCreateDocParserCtxt(cur, NULL); |
6001 if (ctxt == NULL) | 6605 if (ctxt == NULL) |
6002 return (NULL); | 6606 return (NULL); |
6003 return (htmlDoRead(ctxt, URL, encoding, options, 0)); | 6607 return (htmlDoRead(ctxt, URL, encoding, options, 0)); |
6004 } | 6608 } |
6005 | 6609 |
6006 /** | 6610 /** |
6007 * htmlReadFile: | 6611 * htmlReadFile: |
6008 * @filename: a file or URL | 6612 * @filename: a file or URL |
6009 * @encoding: the document encoding, or NULL | 6613 * @encoding: the document encoding, or NULL |
6010 * @options: a combination of htmlParserOption(s) | 6614 * @options: a combination of htmlParserOption(s) |
6011 * | 6615 * |
6012 * parse an XML file from the filesystem or the network. | 6616 * parse an XML file from the filesystem or the network. |
6013 * | 6617 * |
6014 * Returns the resulting document tree | 6618 * Returns the resulting document tree |
6015 */ | 6619 */ |
6016 htmlDocPtr | 6620 htmlDocPtr |
6017 htmlReadFile(const char *filename, const char *encoding, int options) | 6621 htmlReadFile(const char *filename, const char *encoding, int options) |
6018 { | 6622 { |
6019 htmlParserCtxtPtr ctxt; | 6623 htmlParserCtxtPtr ctxt; |
6020 | 6624 |
6021 xmlInitParser(); | 6625 xmlInitParser(); |
6022 ctxt = htmlCreateFileParserCtxt(filename, encoding); | 6626 ctxt = htmlCreateFileParserCtxt(filename, encoding); |
6023 if (ctxt == NULL) | 6627 if (ctxt == NULL) |
6024 return (NULL); | 6628 return (NULL); |
6025 return (htmlDoRead(ctxt, NULL, NULL, options, 0)); | 6629 return (htmlDoRead(ctxt, NULL, NULL, options, 0)); |
6026 } | 6630 } |
6027 | 6631 |
6028 /** | 6632 /** |
6029 * htmlReadMemory: | 6633 * htmlReadMemory: |
6030 * @buffer: a pointer to a char array | 6634 * @buffer: a pointer to a char array |
6031 * @size: the size of the array | 6635 * @size: the size of the array |
6032 * @URL: the base URL to use for the document | 6636 * @URL: the base URL to use for the document |
6033 * @encoding: the document encoding, or NULL | 6637 * @encoding: the document encoding, or NULL |
6034 * @options: a combination of htmlParserOption(s) | 6638 * @options: a combination of htmlParserOption(s) |
6035 * | 6639 * |
6036 * parse an XML in-memory document and build a tree. | 6640 * parse an XML in-memory document and build a tree. |
6037 * | 6641 * |
6038 * Returns the resulting document tree | 6642 * Returns the resulting document tree |
6039 */ | 6643 */ |
6040 htmlDocPtr | 6644 htmlDocPtr |
6041 htmlReadMemory(const char *buffer, int size, const char *URL, const char *encodi
ng, int options) | 6645 htmlReadMemory(const char *buffer, int size, const char *URL, const char *encodi
ng, int options) |
6042 { | 6646 { |
6043 htmlParserCtxtPtr ctxt; | 6647 htmlParserCtxtPtr ctxt; |
6044 | 6648 |
6045 xmlInitParser(); | 6649 xmlInitParser(); |
6046 ctxt = xmlCreateMemoryParserCtxt(buffer, size); | 6650 ctxt = xmlCreateMemoryParserCtxt(buffer, size); |
6047 if (ctxt == NULL) | 6651 if (ctxt == NULL) |
6048 return (NULL); | 6652 return (NULL); |
6049 htmlDefaultSAXHandlerInit(); | 6653 htmlDefaultSAXHandlerInit(); |
6050 if (ctxt->sax != NULL) | 6654 if (ctxt->sax != NULL) |
6051 memcpy(ctxt->sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1)); | 6655 memcpy(ctxt->sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1)); |
6052 return (htmlDoRead(ctxt, URL, encoding, options, 0)); | 6656 return (htmlDoRead(ctxt, URL, encoding, options, 0)); |
6053 } | 6657 } |
6054 | 6658 |
6055 /** | 6659 /** |
6056 * htmlReadFd: | 6660 * htmlReadFd: |
6057 * @fd: an open file descriptor | 6661 * @fd: an open file descriptor |
6058 * @URL: the base URL to use for the document | 6662 * @URL: the base URL to use for the document |
6059 * @encoding: the document encoding, or NULL | 6663 * @encoding: the document encoding, or NULL |
6060 * @options: a combination of htmlParserOption(s) | 6664 * @options: a combination of htmlParserOption(s) |
6061 * | 6665 * |
6062 * parse an XML from a file descriptor and build a tree. | 6666 * parse an XML from a file descriptor and build a tree. |
6063 * | 6667 * |
6064 * Returns the resulting document tree | 6668 * Returns the resulting document tree |
6065 */ | 6669 */ |
6066 htmlDocPtr | 6670 htmlDocPtr |
6067 htmlReadFd(int fd, const char *URL, const char *encoding, int options) | 6671 htmlReadFd(int fd, const char *URL, const char *encoding, int options) |
6068 { | 6672 { |
6069 htmlParserCtxtPtr ctxt; | 6673 htmlParserCtxtPtr ctxt; |
6070 xmlParserInputBufferPtr input; | 6674 xmlParserInputBufferPtr input; |
6071 xmlParserInputPtr stream; | 6675 xmlParserInputPtr stream; |
6072 | 6676 |
6073 if (fd < 0) | 6677 if (fd < 0) |
(...skipping 21 matching lines...) Expand all Loading... |
6095 /** | 6699 /** |
6096 * htmlReadIO: | 6700 * htmlReadIO: |
6097 * @ioread: an I/O read function | 6701 * @ioread: an I/O read function |
6098 * @ioclose: an I/O close function | 6702 * @ioclose: an I/O close function |
6099 * @ioctx: an I/O handler | 6703 * @ioctx: an I/O handler |
6100 * @URL: the base URL to use for the document | 6704 * @URL: the base URL to use for the document |
6101 * @encoding: the document encoding, or NULL | 6705 * @encoding: the document encoding, or NULL |
6102 * @options: a combination of htmlParserOption(s) | 6706 * @options: a combination of htmlParserOption(s) |
6103 * | 6707 * |
6104 * parse an HTML document from I/O functions and source and build a tree. | 6708 * parse an HTML document from I/O functions and source and build a tree. |
6105 * | 6709 * |
6106 * Returns the resulting document tree | 6710 * Returns the resulting document tree |
6107 */ | 6711 */ |
6108 htmlDocPtr | 6712 htmlDocPtr |
6109 htmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, | 6713 htmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, |
6110 void *ioctx, const char *URL, const char *encoding, int options) | 6714 void *ioctx, const char *URL, const char *encoding, int options) |
6111 { | 6715 { |
6112 htmlParserCtxtPtr ctxt; | 6716 htmlParserCtxtPtr ctxt; |
6113 xmlParserInputBufferPtr input; | 6717 xmlParserInputBufferPtr input; |
6114 xmlParserInputPtr stream; | 6718 xmlParserInputPtr stream; |
6115 | 6719 |
(...skipping 23 matching lines...) Expand all Loading... |
6139 /** | 6743 /** |
6140 * htmlCtxtReadDoc: | 6744 * htmlCtxtReadDoc: |
6141 * @ctxt: an HTML parser context | 6745 * @ctxt: an HTML parser context |
6142 * @cur: a pointer to a zero terminated string | 6746 * @cur: a pointer to a zero terminated string |
6143 * @URL: the base URL to use for the document | 6747 * @URL: the base URL to use for the document |
6144 * @encoding: the document encoding, or NULL | 6748 * @encoding: the document encoding, or NULL |
6145 * @options: a combination of htmlParserOption(s) | 6749 * @options: a combination of htmlParserOption(s) |
6146 * | 6750 * |
6147 * parse an XML in-memory document and build a tree. | 6751 * parse an XML in-memory document and build a tree. |
6148 * This reuses the existing @ctxt parser context | 6752 * This reuses the existing @ctxt parser context |
6149 * | 6753 * |
6150 * Returns the resulting document tree | 6754 * Returns the resulting document tree |
6151 */ | 6755 */ |
6152 htmlDocPtr | 6756 htmlDocPtr |
6153 htmlCtxtReadDoc(htmlParserCtxtPtr ctxt, const xmlChar * cur, | 6757 htmlCtxtReadDoc(htmlParserCtxtPtr ctxt, const xmlChar * cur, |
6154 const char *URL, const char *encoding, int options) | 6758 const char *URL, const char *encoding, int options) |
6155 { | 6759 { |
6156 xmlParserInputPtr stream; | 6760 xmlParserInputPtr stream; |
6157 | 6761 |
6158 if (cur == NULL) | 6762 if (cur == NULL) |
6159 return (NULL); | 6763 return (NULL); |
(...skipping 12 matching lines...) Expand all Loading... |
6172 | 6776 |
6173 /** | 6777 /** |
6174 * htmlCtxtReadFile: | 6778 * htmlCtxtReadFile: |
6175 * @ctxt: an HTML parser context | 6779 * @ctxt: an HTML parser context |
6176 * @filename: a file or URL | 6780 * @filename: a file or URL |
6177 * @encoding: the document encoding, or NULL | 6781 * @encoding: the document encoding, or NULL |
6178 * @options: a combination of htmlParserOption(s) | 6782 * @options: a combination of htmlParserOption(s) |
6179 * | 6783 * |
6180 * parse an XML file from the filesystem or the network. | 6784 * parse an XML file from the filesystem or the network. |
6181 * This reuses the existing @ctxt parser context | 6785 * This reuses the existing @ctxt parser context |
6182 * | 6786 * |
6183 * Returns the resulting document tree | 6787 * Returns the resulting document tree |
6184 */ | 6788 */ |
6185 htmlDocPtr | 6789 htmlDocPtr |
6186 htmlCtxtReadFile(htmlParserCtxtPtr ctxt, const char *filename, | 6790 htmlCtxtReadFile(htmlParserCtxtPtr ctxt, const char *filename, |
6187 const char *encoding, int options) | 6791 const char *encoding, int options) |
6188 { | 6792 { |
6189 xmlParserInputPtr stream; | 6793 xmlParserInputPtr stream; |
6190 | 6794 |
6191 if (filename == NULL) | 6795 if (filename == NULL) |
6192 return (NULL); | 6796 return (NULL); |
(...skipping 14 matching lines...) Expand all Loading... |
6207 * htmlCtxtReadMemory: | 6811 * htmlCtxtReadMemory: |
6208 * @ctxt: an HTML parser context | 6812 * @ctxt: an HTML parser context |
6209 * @buffer: a pointer to a char array | 6813 * @buffer: a pointer to a char array |
6210 * @size: the size of the array | 6814 * @size: the size of the array |
6211 * @URL: the base URL to use for the document | 6815 * @URL: the base URL to use for the document |
6212 * @encoding: the document encoding, or NULL | 6816 * @encoding: the document encoding, or NULL |
6213 * @options: a combination of htmlParserOption(s) | 6817 * @options: a combination of htmlParserOption(s) |
6214 * | 6818 * |
6215 * parse an XML in-memory document and build a tree. | 6819 * parse an XML in-memory document and build a tree. |
6216 * This reuses the existing @ctxt parser context | 6820 * This reuses the existing @ctxt parser context |
6217 * | 6821 * |
6218 * Returns the resulting document tree | 6822 * Returns the resulting document tree |
6219 */ | 6823 */ |
6220 htmlDocPtr | 6824 htmlDocPtr |
6221 htmlCtxtReadMemory(htmlParserCtxtPtr ctxt, const char *buffer, int size, | 6825 htmlCtxtReadMemory(htmlParserCtxtPtr ctxt, const char *buffer, int size, |
6222 const char *URL, const char *encoding, int options) | 6826 const char *URL, const char *encoding, int options) |
6223 { | 6827 { |
6224 xmlParserInputBufferPtr input; | 6828 xmlParserInputBufferPtr input; |
6225 xmlParserInputPtr stream; | 6829 xmlParserInputPtr stream; |
6226 | 6830 |
6227 if (ctxt == NULL) | 6831 if (ctxt == NULL) |
(...skipping 21 matching lines...) Expand all Loading... |
6249 /** | 6853 /** |
6250 * htmlCtxtReadFd: | 6854 * htmlCtxtReadFd: |
6251 * @ctxt: an HTML parser context | 6855 * @ctxt: an HTML parser context |
6252 * @fd: an open file descriptor | 6856 * @fd: an open file descriptor |
6253 * @URL: the base URL to use for the document | 6857 * @URL: the base URL to use for the document |
6254 * @encoding: the document encoding, or NULL | 6858 * @encoding: the document encoding, or NULL |
6255 * @options: a combination of htmlParserOption(s) | 6859 * @options: a combination of htmlParserOption(s) |
6256 * | 6860 * |
6257 * parse an XML from a file descriptor and build a tree. | 6861 * parse an XML from a file descriptor and build a tree. |
6258 * This reuses the existing @ctxt parser context | 6862 * This reuses the existing @ctxt parser context |
6259 * | 6863 * |
6260 * Returns the resulting document tree | 6864 * Returns the resulting document tree |
6261 */ | 6865 */ |
6262 htmlDocPtr | 6866 htmlDocPtr |
6263 htmlCtxtReadFd(htmlParserCtxtPtr ctxt, int fd, | 6867 htmlCtxtReadFd(htmlParserCtxtPtr ctxt, int fd, |
6264 const char *URL, const char *encoding, int options) | 6868 const char *URL, const char *encoding, int options) |
6265 { | 6869 { |
6266 xmlParserInputBufferPtr input; | 6870 xmlParserInputBufferPtr input; |
6267 xmlParserInputPtr stream; | 6871 xmlParserInputPtr stream; |
6268 | 6872 |
6269 if (fd < 0) | 6873 if (fd < 0) |
(...skipping 21 matching lines...) Expand all Loading... |
6291 * @ctxt: an HTML parser context | 6895 * @ctxt: an HTML parser context |
6292 * @ioread: an I/O read function | 6896 * @ioread: an I/O read function |
6293 * @ioclose: an I/O close function | 6897 * @ioclose: an I/O close function |
6294 * @ioctx: an I/O handler | 6898 * @ioctx: an I/O handler |
6295 * @URL: the base URL to use for the document | 6899 * @URL: the base URL to use for the document |
6296 * @encoding: the document encoding, or NULL | 6900 * @encoding: the document encoding, or NULL |
6297 * @options: a combination of htmlParserOption(s) | 6901 * @options: a combination of htmlParserOption(s) |
6298 * | 6902 * |
6299 * parse an HTML document from I/O functions and source and build a tree. | 6903 * parse an HTML document from I/O functions and source and build a tree. |
6300 * This reuses the existing @ctxt parser context | 6904 * This reuses the existing @ctxt parser context |
6301 * | 6905 * |
6302 * Returns the resulting document tree | 6906 * Returns the resulting document tree |
6303 */ | 6907 */ |
6304 htmlDocPtr | 6908 htmlDocPtr |
6305 htmlCtxtReadIO(htmlParserCtxtPtr ctxt, xmlInputReadCallback ioread, | 6909 htmlCtxtReadIO(htmlParserCtxtPtr ctxt, xmlInputReadCallback ioread, |
6306 xmlInputCloseCallback ioclose, void *ioctx, | 6910 xmlInputCloseCallback ioclose, void *ioctx, |
6307 const char *URL, | 6911 const char *URL, |
6308 const char *encoding, int options) | 6912 const char *encoding, int options) |
6309 { | 6913 { |
6310 xmlParserInputBufferPtr input; | 6914 xmlParserInputBufferPtr input; |
6311 xmlParserInputPtr stream; | 6915 xmlParserInputPtr stream; |
(...skipping 14 matching lines...) Expand all Loading... |
6326 xmlFreeParserInputBuffer(input); | 6930 xmlFreeParserInputBuffer(input); |
6327 return (NULL); | 6931 return (NULL); |
6328 } | 6932 } |
6329 inputPush(ctxt, stream); | 6933 inputPush(ctxt, stream); |
6330 return (htmlDoRead(ctxt, URL, encoding, options, 1)); | 6934 return (htmlDoRead(ctxt, URL, encoding, options, 1)); |
6331 } | 6935 } |
6332 | 6936 |
6333 #define bottom_HTMLparser | 6937 #define bottom_HTMLparser |
6334 #include "elfgcchack.h" | 6938 #include "elfgcchack.h" |
6335 #endif /* LIBXML_HTML_ENABLED */ | 6939 #endif /* LIBXML_HTML_ENABLED */ |
OLD | NEW |