| OLD | NEW |
| 1 /* | 1 /* |
| 2 * HTMLparser.c : an HTML 4.0 non-verifying parser | 2 * HTMLparser.c : an HTML 4.0 non-verifying parser |
| 3 * | 3 * |
| 4 * See Copyright for the status of this software. | 4 * See Copyright for the status of this software. |
| 5 * | 5 * |
| 6 * daniel@veillard.com | 6 * daniel@veillard.com |
| 7 */ | 7 */ |
| 8 | 8 |
| 9 #define IN_LIBXML | 9 #define IN_LIBXML |
| 10 #include "libxml.h" | 10 #include "libxml.h" |
| (...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 52 /* #define DEBUG_PUSH */ | 52 /* #define DEBUG_PUSH */ |
| 53 | 53 |
| 54 static int htmlOmittedDefaultValue = 1; | 54 static int htmlOmittedDefaultValue = 1; |
| 55 | 55 |
| 56 xmlChar * htmlDecodeEntities(htmlParserCtxtPtr ctxt, int len, | 56 xmlChar * htmlDecodeEntities(htmlParserCtxtPtr ctxt, int len, |
| 57 xmlChar end, xmlChar end2, xmlChar end3); | 57 xmlChar end, xmlChar end2, xmlChar end3); |
| 58 static void htmlParseComment(htmlParserCtxtPtr ctxt); | 58 static void htmlParseComment(htmlParserCtxtPtr ctxt); |
| 59 | 59 |
| 60 /************************************************************************ | 60 /************************************************************************ |
| 61 * * | 61 * * |
| 62 * » » Some factorized error routines» » » » * | 62 *» » Some factorized error routines» » » » * |
| 63 * * | 63 * * |
| 64 ************************************************************************/ | 64 ************************************************************************/ |
| 65 | 65 |
| 66 /** | 66 /** |
| 67 * htmlErrMemory: | 67 * htmlErrMemory: |
| 68 * @ctxt: an HTML parser context | 68 * @ctxt: an HTML parser context |
| 69 * @extra: extra informations | 69 * @extra: extra informations |
| 70 * | 70 * |
| 71 * Handle a redefinition of attribute error | 71 * Handle a redefinition of attribute error |
| 72 */ | 72 */ |
| (...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 140 ctxt->errNo = error; | 140 ctxt->errNo = error; |
| 141 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_HTML, error, | 141 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_HTML, error, |
| 142 XML_ERR_ERROR, NULL, 0, NULL, NULL, | 142 XML_ERR_ERROR, NULL, 0, NULL, NULL, |
| 143 NULL, val, 0, msg, val); | 143 NULL, val, 0, msg, val); |
| 144 if (ctxt != NULL) | 144 if (ctxt != NULL) |
| 145 ctxt->wellFormed = 0; | 145 ctxt->wellFormed = 0; |
| 146 } | 146 } |
| 147 | 147 |
| 148 /************************************************************************ | 148 /************************************************************************ |
| 149 * * | 149 * * |
| 150 * » » Parser stacks related functions and macros» » * | 150 *» Parser stacks related functions and macros» » * |
| 151 * * | 151 * * |
| 152 ************************************************************************/ | 152 ************************************************************************/ |
| 153 | 153 |
| 154 /** | 154 /** |
| 155 * htmlnamePush: | 155 * htmlnamePush: |
| 156 * @ctxt: an HTML parser context | 156 * @ctxt: an HTML parser context |
| 157 * @value: the element name | 157 * @value: the element name |
| 158 * | 158 * |
| 159 * Pushes a new element name on top of the name stack | 159 * Pushes a new element name on top of the name stack |
| 160 * | 160 * |
| 161 * Returns 0 in case of error, the index in the stack otherwise | 161 * Returns 0 in case of error, the index in the stack otherwise |
| 162 */ | 162 */ |
| 163 static int | 163 static int |
| 164 htmlnamePush(htmlParserCtxtPtr ctxt, const xmlChar * value) | 164 htmlnamePush(htmlParserCtxtPtr ctxt, const xmlChar * value) |
| 165 { | 165 { |
| 166 if ((ctxt->html < 3) && (xmlStrEqual(value, BAD_CAST "head"))) |
| 167 ctxt->html = 3; |
| 168 if ((ctxt->html < 10) && (xmlStrEqual(value, BAD_CAST "body"))) |
| 169 ctxt->html = 10; |
| 166 if (ctxt->nameNr >= ctxt->nameMax) { | 170 if (ctxt->nameNr >= ctxt->nameMax) { |
| 167 ctxt->nameMax *= 2; | 171 ctxt->nameMax *= 2; |
| 168 ctxt->nameTab = (const xmlChar * *) | 172 ctxt->nameTab = (const xmlChar * *) |
| 169 xmlRealloc((xmlChar * *)ctxt->nameTab, | 173 xmlRealloc((xmlChar * *)ctxt->nameTab, |
| 170 ctxt->nameMax * | 174 ctxt->nameMax * |
| 171 sizeof(ctxt->nameTab[0])); | 175 sizeof(ctxt->nameTab[0])); |
| 172 if (ctxt->nameTab == NULL) { | 176 if (ctxt->nameTab == NULL) { |
| 173 htmlErrMemory(ctxt, NULL); | 177 htmlErrMemory(ctxt, NULL); |
| 174 return (0); | 178 return (0); |
| 175 } | 179 } |
| (...skipping 22 matching lines...) Expand all Loading... |
| 198 return (NULL); | 202 return (NULL); |
| 199 if (ctxt->nameNr > 0) | 203 if (ctxt->nameNr > 0) |
| 200 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; | 204 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; |
| 201 else | 205 else |
| 202 ctxt->name = NULL; | 206 ctxt->name = NULL; |
| 203 ret = ctxt->nameTab[ctxt->nameNr]; | 207 ret = ctxt->nameTab[ctxt->nameNr]; |
| 204 ctxt->nameTab[ctxt->nameNr] = NULL; | 208 ctxt->nameTab[ctxt->nameNr] = NULL; |
| 205 return (ret); | 209 return (ret); |
| 206 } | 210 } |
| 207 | 211 |
| 212 /** |
| 213 * htmlNodeInfoPush: |
| 214 * @ctxt: an HTML parser context |
| 215 * @value: the node info |
| 216 * |
| 217 * Pushes a new element name on top of the node info stack |
| 218 * |
| 219 * Returns 0 in case of error, the index in the stack otherwise |
| 220 */ |
| 221 static int |
| 222 htmlNodeInfoPush(htmlParserCtxtPtr ctxt, htmlParserNodeInfo *value) |
| 223 { |
| 224 if (ctxt->nodeInfoNr >= ctxt->nodeInfoMax) { |
| 225 if (ctxt->nodeInfoMax == 0) |
| 226 ctxt->nodeInfoMax = 5; |
| 227 ctxt->nodeInfoMax *= 2; |
| 228 ctxt->nodeInfoTab = (htmlParserNodeInfo *) |
| 229 xmlRealloc((htmlParserNodeInfo *)ctxt->nodeInfoTab, |
| 230 ctxt->nodeInfoMax * |
| 231 sizeof(ctxt->nodeInfoTab[0])); |
| 232 if (ctxt->nodeInfoTab == NULL) { |
| 233 htmlErrMemory(ctxt, NULL); |
| 234 return (0); |
| 235 } |
| 236 } |
| 237 ctxt->nodeInfoTab[ctxt->nodeInfoNr] = *value; |
| 238 ctxt->nodeInfo = &ctxt->nodeInfoTab[ctxt->nodeInfoNr]; |
| 239 return (ctxt->nodeInfoNr++); |
| 240 } |
| 241 |
| 242 /** |
| 243 * htmlNodeInfoPop: |
| 244 * @ctxt: an HTML parser context |
| 245 * |
| 246 * Pops the top element name from the node info stack |
| 247 * |
| 248 * Returns 0 in case of error, the pointer to NodeInfo otherwise |
| 249 */ |
| 250 static htmlParserNodeInfo * |
| 251 htmlNodeInfoPop(htmlParserCtxtPtr ctxt) |
| 252 { |
| 253 if (ctxt->nodeInfoNr <= 0) |
| 254 return (NULL); |
| 255 ctxt->nodeInfoNr--; |
| 256 if (ctxt->nodeInfoNr < 0) |
| 257 return (NULL); |
| 258 if (ctxt->nodeInfoNr > 0) |
| 259 ctxt->nodeInfo = &ctxt->nodeInfoTab[ctxt->nodeInfoNr - 1]; |
| 260 else |
| 261 ctxt->nodeInfo = NULL; |
| 262 return &ctxt->nodeInfoTab[ctxt->nodeInfoNr]; |
| 263 } |
| 264 |
| 208 /* | 265 /* |
| 209 * Macros for accessing the content. Those should be used only by the parser, | 266 * Macros for accessing the content. Those should be used only by the parser, |
| 210 * and not exported. | 267 * and not exported. |
| 211 * | 268 * |
| 212 * Dirty macros, i.e. one need to make assumption on the context to use them | 269 * Dirty macros, i.e. one need to make assumption on the context to use them |
| 213 * | 270 * |
| 214 * CUR_PTR return the current pointer to the xmlChar to be parsed. | 271 * CUR_PTR return the current pointer to the xmlChar to be parsed. |
| 215 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled | 272 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled |
| 216 * in ISO-Latin or UTF-8, and the current 16 bit value if compiled | 273 * in ISO-Latin or UTF-8, and the current 16 bit value if compiled |
| 217 * in UNICODE mode. This should be used internally by the parser | 274 * in UNICODE mode. This should be used internally by the parser |
| (...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 256 | 313 |
| 257 #define SKIP_BLANKS htmlSkipBlankChars(ctxt) | 314 #define SKIP_BLANKS htmlSkipBlankChars(ctxt) |
| 258 | 315 |
| 259 /* Inported from XML */ | 316 /* Inported from XML */ |
| 260 | 317 |
| 261 /* #define CUR (ctxt->token ? ctxt->token : (int) (*ctxt->input->cur)) */ | 318 /* #define CUR (ctxt->token ? ctxt->token : (int) (*ctxt->input->cur)) */ |
| 262 #define CUR ((int) (*ctxt->input->cur)) | 319 #define CUR ((int) (*ctxt->input->cur)) |
| 263 #define NEXT xmlNextChar(ctxt) | 320 #define NEXT xmlNextChar(ctxt) |
| 264 | 321 |
| 265 #define RAW (ctxt->token ? -1 : (*ctxt->input->cur)) | 322 #define RAW (ctxt->token ? -1 : (*ctxt->input->cur)) |
| 266 #define NXT(val) ctxt->input->cur[(val)] | |
| 267 #define CUR_PTR ctxt->input->cur | |
| 268 | 323 |
| 269 | 324 |
| 270 #define NEXTL(l) do { \ | 325 #define NEXTL(l) do { \ |
| 271 if (*(ctxt->input->cur) == '\n') { \ | 326 if (*(ctxt->input->cur) == '\n') { \ |
| 272 ctxt->input->line++; ctxt->input->col = 1; \ | 327 ctxt->input->line++; ctxt->input->col = 1; \ |
| 273 } else ctxt->input->col++; \ | 328 } else ctxt->input->col++; \ |
| 274 ctxt->token = 0; ctxt->input->cur += l; ctxt->nbChars++; \ | 329 ctxt->token = 0; ctxt->input->cur += l; ctxt->nbChars++; \ |
| 275 } while (0) | 330 } while (0) |
| 276 | 331 |
| 277 /************ | 332 /************ |
| 278 \ | 333 \ |
| 279 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ | 334 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ |
| 280 if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); | 335 if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); |
| 281 ************/ | 336 ************/ |
| 282 | 337 |
| 283 #define CUR_CHAR(l) htmlCurrentChar(ctxt, &l) | 338 #define CUR_CHAR(l) htmlCurrentChar(ctxt, &l) |
| 284 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) | 339 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) |
| 285 | 340 |
| 286 #define COPY_BUF(l,b,i,v) \ | 341 #define COPY_BUF(l,b,i,v) \ |
| 287 if (l == 1) b[i++] = (xmlChar) v; \ | 342 if (l == 1) b[i++] = (xmlChar) v; \ |
| 288 else i += xmlCopyChar(l,&b[i],v) | 343 else i += xmlCopyChar(l,&b[i],v) |
| 289 | 344 |
| 290 /** | 345 /** |
| 346 * htmlFindEncoding: |
| 347 * @the HTML parser context |
| 348 * |
| 349 * Ty to find and encoding in the current data available in the input |
| 350 * buffer this is needed to try to switch to the proper encoding when |
| 351 * one face a character error. |
| 352 * That's an heuristic, since it's operating outside of parsing it could |
| 353 * try to use a meta which had been commented out, that's the reason it |
| 354 * should only be used in case of error, not as a default. |
| 355 * |
| 356 * Returns an encoding string or NULL if not found, the string need to |
| 357 * be freed |
| 358 */ |
| 359 static xmlChar * |
| 360 htmlFindEncoding(xmlParserCtxtPtr ctxt) { |
| 361 const xmlChar *start, *cur, *end; |
| 362 |
| 363 if ((ctxt == NULL) || (ctxt->input == NULL) || |
| 364 (ctxt->input->encoding != NULL) || (ctxt->input->buf == NULL) || |
| 365 (ctxt->input->buf->encoder != NULL)) |
| 366 return(NULL); |
| 367 if ((ctxt->input->cur == NULL) || (ctxt->input->end == NULL)) |
| 368 return(NULL); |
| 369 |
| 370 start = ctxt->input->cur; |
| 371 end = ctxt->input->end; |
| 372 /* we also expect the input buffer to be zero terminated */ |
| 373 if (*end != 0) |
| 374 return(NULL); |
| 375 |
| 376 cur = xmlStrcasestr(start, BAD_CAST "HTTP-EQUIV"); |
| 377 if (cur == NULL) |
| 378 return(NULL); |
| 379 cur = xmlStrcasestr(cur, BAD_CAST "CONTENT"); |
| 380 if (cur == NULL) |
| 381 return(NULL); |
| 382 cur = xmlStrcasestr(cur, BAD_CAST "CHARSET="); |
| 383 if (cur == NULL) |
| 384 return(NULL); |
| 385 cur += 8; |
| 386 start = cur; |
| 387 while (((*cur >= 'A') && (*cur <= 'Z')) || |
| 388 ((*cur >= 'a') && (*cur <= 'z')) || |
| 389 ((*cur >= '0') && (*cur <= '9')) || |
| 390 (*cur == '-') || (*cur == '_') || (*cur == ':') || (*cur == '/')) |
| 391 cur++; |
| 392 if (cur == start) |
| 393 return(NULL); |
| 394 return(xmlStrndup(start, cur - start)); |
| 395 } |
| 396 |
| 397 /** |
| 291 * htmlCurrentChar: | 398 * htmlCurrentChar: |
| 292 * @ctxt: the HTML parser context | 399 * @ctxt: the HTML parser context |
| 293 * @len: pointer to the length of the char read | 400 * @len: pointer to the length of the char read |
| 294 * | 401 * |
| 295 * The current char value, if using UTF-8 this may actually span multiple | 402 * The current char value, if using UTF-8 this may actually span multiple |
| 296 * bytes in the input buffer. Implement the end of line normalization: | 403 * bytes in the input buffer. Implement the end of line normalization: |
| 297 * 2.11 End-of-Line Handling | 404 * 2.11 End-of-Line Handling |
| 298 * If the encoding is unspecified, in the case we find an ISO-Latin-1 | 405 * If the encoding is unspecified, in the case we find an ISO-Latin-1 |
| 299 * char, then the encoding converter is plugged in automatically. | 406 * char, then the encoding converter is plugged in automatically. |
| 300 * | 407 * |
| 301 * Returns the current char value and its length | 408 * Returns the current char value and its length |
| 302 */ | 409 */ |
| 303 | 410 |
| 304 static int | 411 static int |
| 305 htmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { | 412 htmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { |
| 306 if (ctxt->instate == XML_PARSER_EOF) | 413 if (ctxt->instate == XML_PARSER_EOF) |
| 307 return(0); | 414 return(0); |
| 308 | 415 |
| 309 if (ctxt->token != 0) { | 416 if (ctxt->token != 0) { |
| 310 *len = 0; | 417 *len = 0; |
| 311 return(ctxt->token); | 418 return(ctxt->token); |
| 312 }» | 419 } |
| 313 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { | 420 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { |
| 314 /* | 421 /* |
| 315 * We are supposed to handle UTF8, check it's valid | 422 * We are supposed to handle UTF8, check it's valid |
| 316 * From rfc2044: encoding of the Unicode values on UTF-8: | 423 * From rfc2044: encoding of the Unicode values on UTF-8: |
| 317 * | 424 * |
| 318 * UCS-4 range (hex.) UTF-8 octet sequence (binary) | 425 * UCS-4 range (hex.) UTF-8 octet sequence (binary) |
| 319 * 0000 0000-0000 007F 0xxxxxxx | 426 * 0000 0000-0000 007F 0xxxxxxx |
| 320 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx | 427 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx |
| 321 » * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx | 428 » * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx |
| 322 * | 429 * |
| 323 * Check for the 0x110000 limit too | 430 * Check for the 0x110000 limit too |
| 324 */ | 431 */ |
| 325 const unsigned char *cur = ctxt->input->cur; | 432 const unsigned char *cur = ctxt->input->cur; |
| 326 unsigned char c; | 433 unsigned char c; |
| 327 unsigned int val; | 434 unsigned int val; |
| 328 | 435 |
| 329 c = *cur; | 436 c = *cur; |
| 330 if (c & 0x80) { | 437 if (c & 0x80) { |
| 331 » if (cur[1] == 0) | 438 » if (cur[1] == 0) { |
| 332 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); | 439 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); |
| 440 cur = ctxt->input->cur; |
| 441 } |
| 333 if ((cur[1] & 0xc0) != 0x80) | 442 if ((cur[1] & 0xc0) != 0x80) |
| 334 goto encoding_error; | 443 goto encoding_error; |
| 335 if ((c & 0xe0) == 0xe0) { | 444 if ((c & 0xe0) == 0xe0) { |
| 336 | 445 |
| 337 » » if (cur[2] == 0) | 446 » » if (cur[2] == 0) { |
| 338 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); | 447 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); |
| 448 cur = ctxt->input->cur; |
| 449 } |
| 339 if ((cur[2] & 0xc0) != 0x80) | 450 if ((cur[2] & 0xc0) != 0x80) |
| 340 goto encoding_error; | 451 goto encoding_error; |
| 341 if ((c & 0xf0) == 0xf0) { | 452 if ((c & 0xf0) == 0xf0) { |
| 342 » » if (cur[3] == 0) | 453 » » if (cur[3] == 0) { |
| 343 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); | 454 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); |
| 455 cur = ctxt->input->cur; |
| 456 } |
| 344 if (((c & 0xf8) != 0xf0) || | 457 if (((c & 0xf8) != 0xf0) || |
| 345 ((cur[3] & 0xc0) != 0x80)) | 458 ((cur[3] & 0xc0) != 0x80)) |
| 346 goto encoding_error; | 459 goto encoding_error; |
| 347 /* 4-byte code */ | 460 /* 4-byte code */ |
| 348 *len = 4; | 461 *len = 4; |
| 349 val = (cur[0] & 0x7) << 18; | 462 val = (cur[0] & 0x7) << 18; |
| 350 val |= (cur[1] & 0x3f) << 12; | 463 val |= (cur[1] & 0x3f) << 12; |
| 351 val |= (cur[2] & 0x3f) << 6; | 464 val |= (cur[2] & 0x3f) << 6; |
| 352 val |= cur[3] & 0x3f; | 465 val |= cur[3] & 0x3f; |
| 353 } else { | 466 } else { |
| 354 /* 3-byte code */ | 467 /* 3-byte code */ |
| 355 *len = 3; | 468 *len = 3; |
| 356 val = (cur[0] & 0xf) << 12; | 469 val = (cur[0] & 0xf) << 12; |
| 357 val |= (cur[1] & 0x3f) << 6; | 470 val |= (cur[1] & 0x3f) << 6; |
| 358 val |= cur[2] & 0x3f; | 471 val |= cur[2] & 0x3f; |
| 359 } | 472 } |
| 360 } else { | 473 } else { |
| 361 /* 2-byte code */ | 474 /* 2-byte code */ |
| 362 *len = 2; | 475 *len = 2; |
| 363 val = (cur[0] & 0x1f) << 6; | 476 val = (cur[0] & 0x1f) << 6; |
| 364 val |= cur[1] & 0x3f; | 477 val |= cur[1] & 0x3f; |
| 365 } | 478 } |
| 366 if (!IS_CHAR(val)) { | 479 if (!IS_CHAR(val)) { |
| 367 htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR, | 480 htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR, |
| 368 "Char 0x%X out of allowed range\n", val); | 481 "Char 0x%X out of allowed range\n", val); |
| 369 » } | 482 » } |
| 370 return(val); | 483 return(val); |
| 371 } else { | 484 } else { |
| 485 if ((*ctxt->input->cur == 0) && |
| 486 (ctxt->input->cur < ctxt->input->end)) { |
| 487 htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR, |
| 488 "Char 0x%X out of allowed range\n", 0); |
| 489 *len = 1; |
| 490 return(' '); |
| 491 } |
| 372 /* 1-byte code */ | 492 /* 1-byte code */ |
| 373 *len = 1; | 493 *len = 1; |
| 374 return((int) *ctxt->input->cur); | 494 return((int) *ctxt->input->cur); |
| 375 } | 495 } |
| 376 } | 496 } |
| 377 /* | 497 /* |
| 378 * Assume it's a fixed length encoding (1) with | 498 * Assume it's a fixed length encoding (1) with |
| 379 * a compatible encoding for the ASCII set, since | 499 * a compatible encoding for the ASCII set, since |
| 380 * XML constructs only use < 128 chars | 500 * XML constructs only use < 128 chars |
| 381 */ | 501 */ |
| 382 *len = 1; | 502 *len = 1; |
| 383 if ((int) *ctxt->input->cur < 0x80) | 503 if ((int) *ctxt->input->cur < 0x80) |
| 384 return((int) *ctxt->input->cur); | 504 return((int) *ctxt->input->cur); |
| 385 | 505 |
| 386 /* | 506 /* |
| 387 * Humm this is bad, do an automatic flow conversion | 507 * Humm this is bad, do an automatic flow conversion |
| 388 */ | 508 */ |
| 389 xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1); | 509 { |
| 390 ctxt->charset = XML_CHAR_ENCODING_UTF8; | 510 xmlChar * guess; |
| 511 xmlCharEncodingHandlerPtr handler; |
| 512 |
| 513 guess = htmlFindEncoding(ctxt); |
| 514 if (guess == NULL) { |
| 515 xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1); |
| 516 } else { |
| 517 if (ctxt->input->encoding != NULL) |
| 518 xmlFree((xmlChar *) ctxt->input->encoding); |
| 519 ctxt->input->encoding = guess; |
| 520 handler = xmlFindCharEncodingHandler((const char *) guess); |
| 521 if (handler != NULL) { |
| 522 xmlSwitchToEncoding(ctxt, handler); |
| 523 } else { |
| 524 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, |
| 525 "Unsupported encoding %s", guess, NULL); |
| 526 } |
| 527 } |
| 528 ctxt->charset = XML_CHAR_ENCODING_UTF8; |
| 529 } |
| 530 |
| 391 return(xmlCurrentChar(ctxt, len)); | 531 return(xmlCurrentChar(ctxt, len)); |
| 392 | 532 |
| 393 encoding_error: | 533 encoding_error: |
| 394 /* | 534 /* |
| 395 * If we detect an UTF8 error that probably mean that the | 535 * If we detect an UTF8 error that probably mean that the |
| 396 * input encoding didn't get properly advertized in the | 536 * input encoding didn't get properly advertized in the |
| 397 * declaration header. Report the error and switch the encoding | 537 * declaration header. Report the error and switch the encoding |
| 398 * to ISO-Latin-1 (if you don't like this policy, just declare the | 538 * to ISO-Latin-1 (if you don't like this policy, just declare the |
| 399 * encoding !) | 539 * encoding !) |
| 400 */ | 540 */ |
| 401 { | 541 { |
| 402 char buffer[150]; | 542 char buffer[150]; |
| 403 | 543 |
| 404 if (ctxt->input->end - ctxt->input->cur >= 4) { | 544 if (ctxt->input->end - ctxt->input->cur >= 4) { |
| 405 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", | 545 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", |
| 406 ctxt->input->cur[0], ctxt->input->cur[1], | 546 ctxt->input->cur[0], ctxt->input->cur[1], |
| 407 ctxt->input->cur[2], ctxt->input->cur[3]); | 547 ctxt->input->cur[2], ctxt->input->cur[3]); |
| 408 } else { | 548 } else { |
| 409 snprintf(buffer, 149, "Bytes: 0x%02X\n", ctxt->input->cur[0]); | 549 snprintf(buffer, 149, "Bytes: 0x%02X\n", ctxt->input->cur[0]); |
| 410 } | 550 } |
| 411 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, | 551 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, |
| 412 "Input is not proper UTF-8, indicate encoding !\n", | 552 "Input is not proper UTF-8, indicate encoding !\n", |
| 413 BAD_CAST buffer, NULL); | 553 BAD_CAST buffer, NULL); |
| 414 } | 554 } |
| 415 | 555 |
| 416 ctxt->charset = XML_CHAR_ENCODING_8859_1; | 556 ctxt->charset = XML_CHAR_ENCODING_8859_1; |
| 417 *len = 1; | 557 *len = 1; |
| 418 return((int) *ctxt->input->cur); | 558 return((int) *ctxt->input->cur); |
| 419 } | 559 } |
| 420 | 560 |
| 421 /** | 561 /** |
| 422 * htmlSkipBlankChars: | 562 * htmlSkipBlankChars: |
| 423 * @ctxt: the HTML parser context | 563 * @ctxt: the HTML parser context |
| 424 * | 564 * |
| 425 * skip all blanks character found at that point in the input streams. | 565 * skip all blanks character found at that point in the input streams. |
| 426 * | 566 * |
| (...skipping 19 matching lines...) Expand all Loading... |
| 446 } | 586 } |
| 447 res++; | 587 res++; |
| 448 } | 588 } |
| 449 return(res); | 589 return(res); |
| 450 } | 590 } |
| 451 | 591 |
| 452 | 592 |
| 453 | 593 |
| 454 /************************************************************************ | 594 /************************************************************************ |
| 455 * * | 595 * * |
| 456 * » » The list of HTML elements and their properties» » * | 596 *» The list of HTML elements and their properties» » * |
| 457 * * | 597 * * |
| 458 ************************************************************************/ | 598 ************************************************************************/ |
| 459 | 599 |
| 460 /* | 600 /* |
| 461 * Start Tag: 1 means the start tag can be ommited | 601 * Start Tag: 1 means the start tag can be ommited |
| 462 * End Tag: 1 means the end tag can be ommited | 602 * End Tag: 1 means the end tag can be ommited |
| 463 * 2 means it's forbidden (empty elements) | 603 * 2 means it's forbidden (empty elements) |
| 464 * 3 means the tag is stylistic and should be closed easily | 604 * 3 means the tag is stylistic and should be closed easily |
| 465 * Depr: this element is deprecated | 605 * Depr: this element is deprecated |
| 466 * DTD: 1 means that this element is valid only in the Loose DTD | 606 * DTD: 1 means that this element is valid only in the Loose DTD |
| 467 * 2 means that this element is valid only in the Frameset DTD | 607 * 2 means that this element is valid only in the Frameset DTD |
| 468 * | 608 * |
| 469 * Name,Start Tag,End Tag,Save End,Empty,Deprecated,DTD,inline,Description | 609 * Name,Start Tag,End Tag,Save End,Empty,Deprecated,DTD,inline,Description |
| 470 , subElements , impliedsubelt , Attributes, userdata | 610 , subElements , impliedsubelt , Attributes, userdata |
| 471 */ | 611 */ |
| 472 | 612 |
| 473 /* Definitions and a couple of vars for HTML Elements */ | 613 /* Definitions and a couple of vars for HTML Elements */ |
| 474 | 614 |
| 475 #define FONTSTYLE "tt", "i", "b", "u", "s", "strike", "big", "small" | 615 #define FONTSTYLE "tt", "i", "b", "u", "s", "strike", "big", "small" |
| 476 #define NB_FONTSTYLE 8 | 616 #define NB_FONTSTYLE 8 |
| 477 #define PHRASE "em", "strong", "dfn", "code", "samp", "kbd", "var", "cite", "abb
r", "acronym" | 617 #define PHRASE "em", "strong", "dfn", "code", "samp", "kbd", "var", "cite", "abb
r", "acronym" |
| 478 #define NB_PHRASE 10 | 618 #define NB_PHRASE 10 |
| 479 #define SPECIAL "a", "img", "applet", "embed", "object", "font", "basefont", "br
", "script", "map", "q", "sub", "sup", "span", "bdo", "iframe" | 619 #define SPECIAL "a", "img", "applet", "embed", "object", "font", "basefont", "br
", "script", "map", "q", "sub", "sup", "span", "bdo", "iframe" |
| 480 #define NB_SPECIAL 16 | 620 #define NB_SPECIAL 16 |
| 481 #define INLINE PCDATA FONTSTYLE PHRASE SPECIAL FORMCTRL | 621 #define INLINE FONTSTYLE, PHRASE, SPECIAL, FORMCTRL |
| 482 #define NB_INLINE NB_PCDATA + NB_FONTSTYLE + NB_PHRASE + NB_SPECIAL + NB_FORMCTR
L | 622 #define NB_INLINE NB_PCDATA + NB_FONTSTYLE + NB_PHRASE + NB_SPECIAL + NB_FORMCTR
L |
| 483 #define BLOCK HEADING, LIST "pre", "p", "dl", "div", "center", "noscript", "nofr
ames", "blockquote", "form", "isindex", "hr", "table", "fieldset", "address" | 623 #define BLOCK HEADING, LIST, "pre", "p", "dl", "div", "center", "noscript", "nof
rames", "blockquote", "form", "isindex", "hr", "table", "fieldset", "address" |
| 484 #define NB_BLOCK NB_HEADING + NB_LIST + 14 | 624 #define NB_BLOCK NB_HEADING + NB_LIST + 14 |
| 485 #define FORMCTRL "input", "select", "textarea", "label", "button" | 625 #define FORMCTRL "input", "select", "textarea", "label", "button" |
| 486 #define NB_FORMCTRL 5 | 626 #define NB_FORMCTRL 5 |
| 487 #define PCDATA | 627 #define PCDATA |
| 488 #define NB_PCDATA 0 | 628 #define NB_PCDATA 0 |
| 489 #define HEADING "h1", "h2", "h3", "h4", "h5", "h6" | 629 #define HEADING "h1", "h2", "h3", "h4", "h5", "h6" |
| 490 #define NB_HEADING 6 | 630 #define NB_HEADING 6 |
| 491 #define LIST "ul", "ol", "dir", "menu" | 631 #define LIST "ul", "ol", "dir", "menu" |
| 492 #define NB_LIST 4 | 632 #define NB_LIST 4 |
| 493 #define MODIFIER | 633 #define MODIFIER |
| (...skipping 105 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 599 static const char* const optgroup_attrs[] = { ATTRS, "disabled", NULL } ; | 739 static const char* const optgroup_attrs[] = { ATTRS, "disabled", NULL } ; |
| 600 static const char* const option_attrs[] = { ATTRS, "disabled", "label", "selecte
d", "value", NULL } ; | 740 static const char* const option_attrs[] = { ATTRS, "disabled", "label", "selecte
d", "value", NULL } ; |
| 601 static const char* const param_attrs[] = { "id", "value", "valuetype", "type", N
ULL } ; | 741 static const char* const param_attrs[] = { "id", "value", "valuetype", "type", N
ULL } ; |
| 602 static const char* const width_attr[] = { "width", NULL } ; | 742 static const char* const width_attr[] = { "width", NULL } ; |
| 603 static const char* const pre_content[] = { PHRASE, "tt", "i", "b", "u", "s", "st
rike", "a", "br", "script", "map", "q", "span", "bdo", "iframe", NULL } ; | 743 static const char* const pre_content[] = { PHRASE, "tt", "i", "b", "u", "s", "st
rike", "a", "br", "script", "map", "q", "span", "bdo", "iframe", NULL } ; |
| 604 static const char* const script_attrs[] = { "charset", "src", "defer", "event",
"for", NULL } ; | 744 static const char* const script_attrs[] = { "charset", "src", "defer", "event",
"for", NULL } ; |
| 605 static const char* const language_attr[] = { "language", NULL } ; | 745 static const char* const language_attr[] = { "language", NULL } ; |
| 606 static const char* const select_content[] = { "optgroup", "option", NULL } ; | 746 static const char* const select_content[] = { "optgroup", "option", NULL } ; |
| 607 static const char* const select_attrs[] = { ATTRS, "name", "size", "multiple", "
disabled", "tabindex", "onfocus", "onblur", "onchange", NULL } ; | 747 static const char* const select_attrs[] = { ATTRS, "name", "size", "multiple", "
disabled", "tabindex", "onfocus", "onblur", "onchange", NULL } ; |
| 608 static const char* const style_attrs[] = { I18N, "media", "title", NULL } ; | 748 static const char* const style_attrs[] = { I18N, "media", "title", NULL } ; |
| 609 static const char* const table_attrs[] = { ATTRS "summary", "width", "border", "
frame", "rules", "cellspacing", "cellpadding", "datapagesize", NULL } ; | 749 static const char* const table_attrs[] = { ATTRS, "summary", "width", "border",
"frame", "rules", "cellspacing", "cellpadding", "datapagesize", NULL } ; |
| 610 static const char* const table_depr[] = { "align", "bgcolor", NULL } ; | 750 static const char* const table_depr[] = { "align", "bgcolor", NULL } ; |
| 611 static const char* const table_contents[] = { "caption", "col", "colgroup", "the
ad", "tfoot", "tbody", "tr", NULL} ; | 751 static const char* const table_contents[] = { "caption", "col", "colgroup", "the
ad", "tfoot", "tbody", "tr", NULL} ; |
| 612 static const char* const tr_elt[] = { "tr", NULL } ; | 752 static const char* const tr_elt[] = { "tr", NULL } ; |
| 613 static const char* const talign_attrs[] = { ATTRS, CELLHALIGN, CELLVALIGN, NULL}
; | 753 static const char* const talign_attrs[] = { ATTRS, CELLHALIGN, CELLVALIGN, NULL}
; |
| 614 static const char* const th_td_depr[] = { "nowrap", "bgcolor", "width", "height"
, NULL } ; | 754 static const char* const th_td_depr[] = { "nowrap", "bgcolor", "width", "height"
, NULL } ; |
| 615 static const char* const th_td_attr[] = { ATTRS, "abbr", "axis", "headers", "sco
pe", "rowspan", "colspan", CELLHALIGN, CELLVALIGN, NULL } ; | 755 static const char* const th_td_attr[] = { ATTRS, "abbr", "axis", "headers", "sco
pe", "rowspan", "colspan", CELLHALIGN, CELLVALIGN, NULL } ; |
| 616 static const char* const textarea_attrs[] = { ATTRS, "name", "disabled", "readon
ly", "tabindex", "accesskey", "onfocus", "onblur", "onselect", "onchange", NULL
} ; | 756 static const char* const textarea_attrs[] = { ATTRS, "name", "disabled", "readon
ly", "tabindex", "accesskey", "onfocus", "onblur", "onselect", "onchange", NULL
} ; |
| 617 static const char* const tr_contents[] = { "th", "td", NULL } ; | 757 static const char* const tr_contents[] = { "th", "td", NULL } ; |
| 618 static const char* const bgcolor_attr[] = { "bgcolor", NULL } ; | 758 static const char* const bgcolor_attr[] = { "bgcolor", NULL } ; |
| 619 static const char* const li_elt[] = { "li", NULL } ; | 759 static const char* const li_elt[] = { "li", NULL } ; |
| (...skipping 311 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 931 "dl", "p", "dt", "menu", "dir", "address", "pre", "listing", | 1071 "dl", "p", "dt", "menu", "dir", "address", "pre", "listing", |
| 932 "xmp", "head", NULL, | 1072 "xmp", "head", NULL, |
| 933 "dt", "p", "menu", "dir", "address", "pre", "listing", "xmp", | 1073 "dt", "p", "menu", "dir", "address", "pre", "listing", "xmp", |
| 934 "head", "dd", NULL, | 1074 "head", "dd", NULL, |
| 935 "dd", "p", "menu", "dir", "address", "pre", "listing", "xmp", | 1075 "dd", "p", "menu", "dir", "address", "pre", "listing", "xmp", |
| 936 "head", "dt", NULL, | 1076 "head", "dt", NULL, |
| 937 "ul", "p", "head", "ol", "menu", "dir", "address", "pre", | 1077 "ul", "p", "head", "ol", "menu", "dir", "address", "pre", |
| 938 "listing", "xmp", NULL, | 1078 "listing", "xmp", NULL, |
| 939 "ol", "p", "head", "ul", NULL, | 1079 "ol", "p", "head", "ul", NULL, |
| 940 "menu", "p", "head", "ul", NULL, | 1080 "menu", "p", "head", "ul", NULL, |
| 941 "p",» » "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", NULL, | 1081 "p",» » "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", FONTSTYLE, NULL
, |
| 942 "div", "p", "head", NULL, | 1082 "div", "p", "head", NULL, |
| 943 "noscript", "p", "head", NULL, | 1083 "noscript", "p", "head", NULL, |
| 944 "center", "font", "b", "i", "p", "head", NULL, | 1084 "center", "font", "b", "i", "p", "head", NULL, |
| 945 "a", "a", NULL, | 1085 "a", "a", NULL, |
| 946 "caption", "p", NULL, | 1086 "caption", "p", NULL, |
| 947 "colgroup", "caption", "colgroup", "col", "p", NULL, | 1087 "colgroup", "caption", "colgroup", "col", "p", NULL, |
| 948 "col", "caption", "col", "p", NULL, | 1088 "col", "caption", "col", "p", NULL, |
| 949 "table", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", "pre", | 1089 "table", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", "pre", |
| 950 "listing", "xmp", "a", NULL, | 1090 "listing", "xmp", "a", NULL, |
| 951 "th", "th", "td", "p", "span", "font", "a", "b", "i", "u", NULL, | 1091 "th", "th", "td", "p", "span", "font", "a", "b", "i", "u", NULL, |
| 952 "td",» » "th", "td", "p", "span", "font", "a", "b", "i", "u", NULL, | 1092 "td",» » "th", "td", "p", "span", "font", "a", "b", "i", "u", NULL, |
| 953 "tr", "th", "td", "tr", "caption", "col", "colgroup", "p", NULL, | 1093 "tr", "th", "td", "tr", "caption", "col", "colgroup", "p", NULL, |
| 954 "thead", "caption", "col", "colgroup", NULL, | 1094 "thead", "caption", "col", "colgroup", NULL, |
| 955 "tfoot", "th", "td", "tr", "caption", "col", "colgroup", "thead", | 1095 "tfoot", "th", "td", "tr", "caption", "col", "colgroup", "thead", |
| 956 "tbody", "p", NULL, | 1096 "tbody", "p", NULL, |
| 957 "tbody", "th", "td", "tr", "caption", "col", "colgroup", "thead", | 1097 "tbody", "th", "td", "tr", "caption", "col", "colgroup", "thead", |
| 958 "tfoot", "tbody", "p", NULL, | 1098 "tfoot", "tbody", "p", NULL, |
| 959 "optgroup", "option", NULL, | 1099 "optgroup", "option", NULL, |
| 960 "option", "option", NULL, | 1100 "option", "option", NULL, |
| 961 "fieldset", "legend", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", | 1101 "fieldset", "legend", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", |
| 962 "pre", "listing", "xmp", "a", NULL, | 1102 "pre", "listing", "xmp", "a", NULL, |
| (...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1001 "onchange", | 1141 "onchange", |
| 1002 "onselect" | 1142 "onselect" |
| 1003 }; | 1143 }; |
| 1004 | 1144 |
| 1005 /* | 1145 /* |
| 1006 * This table is used by the htmlparser to know what to do with | 1146 * This table is used by the htmlparser to know what to do with |
| 1007 * broken html pages. By assigning different priorities to different | 1147 * broken html pages. By assigning different priorities to different |
| 1008 * elements the parser can decide how to handle extra endtags. | 1148 * elements the parser can decide how to handle extra endtags. |
| 1009 * Endtags are only allowed to close elements with lower or equal | 1149 * Endtags are only allowed to close elements with lower or equal |
| 1010 * priority. | 1150 * priority. |
| 1011 */ | 1151 */ |
| 1012 | 1152 |
| 1013 typedef struct { | 1153 typedef struct { |
| 1014 const char *name; | 1154 const char *name; |
| 1015 int priority; | 1155 int priority; |
| 1016 } elementPriority; | 1156 } elementPriority; |
| 1017 | 1157 |
| 1018 static const elementPriority htmlEndPriority[] = { | 1158 static const elementPriority htmlEndPriority[] = { |
| 1019 {"div", 150}, | 1159 {"div", 150}, |
| 1020 {"td", 160}, | 1160 {"td", 160}, |
| 1021 {"th", 160}, | 1161 {"th", 160}, |
| 1022 {"tr", 170}, | 1162 {"tr", 170}, |
| 1023 {"thead", 180}, | 1163 {"thead", 180}, |
| 1024 {"tbody", 180}, | 1164 {"tbody", 180}, |
| 1025 {"tfoot", 180}, | 1165 {"tfoot", 180}, |
| 1026 {"table", 190}, | 1166 {"table", 190}, |
| 1027 {"head", 200}, | 1167 {"head", 200}, |
| 1028 {"body", 200}, | 1168 {"body", 200}, |
| 1029 {"html", 220}, | 1169 {"html", 220}, |
| 1030 {NULL, 100} /* Default priority */ | 1170 {NULL, 100} /* Default priority */ |
| 1031 }; | 1171 }; |
| 1032 | 1172 |
| 1033 static const char** htmlStartCloseIndex[100]; | 1173 static const char** htmlStartCloseIndex[100]; |
| 1034 static int htmlStartCloseIndexinitialized = 0; | 1174 static int htmlStartCloseIndexinitialized = 0; |
| 1035 | 1175 |
| 1036 /************************************************************************ | 1176 /************************************************************************ |
| 1037 * * | 1177 * * |
| 1038 * » » functions to handle HTML specific data» » » * | 1178 *» functions to handle HTML specific data» » » * |
| 1039 * * | 1179 * * |
| 1040 ************************************************************************/ | 1180 ************************************************************************/ |
| 1041 | 1181 |
| 1042 /** | 1182 /** |
| 1043 * htmlInitAutoClose: | 1183 * htmlInitAutoClose: |
| 1044 * | 1184 * |
| 1045 * Initialize the htmlStartCloseIndex for fast lookup of closing tags names. | 1185 * Initialize the htmlStartCloseIndex for fast lookup of closing tags names. |
| 1046 * This is not reentrant. Call xmlInitParser() once before processing in | 1186 * This is not reentrant. Call xmlInitParser() once before processing in |
| 1047 * case of use in multithreaded programs. | 1187 * case of use in multithreaded programs. |
| 1048 */ | 1188 */ |
| (...skipping 29 matching lines...) Expand all Loading... |
| 1078 sizeof(html40ElementTable[0]));i++) { | 1218 sizeof(html40ElementTable[0]));i++) { |
| 1079 if (!xmlStrcasecmp(tag, BAD_CAST html40ElementTable[i].name)) | 1219 if (!xmlStrcasecmp(tag, BAD_CAST html40ElementTable[i].name)) |
| 1080 return((htmlElemDescPtr) &html40ElementTable[i]); | 1220 return((htmlElemDescPtr) &html40ElementTable[i]); |
| 1081 } | 1221 } |
| 1082 return(NULL); | 1222 return(NULL); |
| 1083 } | 1223 } |
| 1084 | 1224 |
| 1085 /** | 1225 /** |
| 1086 * htmlGetEndPriority: | 1226 * htmlGetEndPriority: |
| 1087 * @name: The name of the element to look up the priority for. | 1227 * @name: The name of the element to look up the priority for. |
| 1088 * | 1228 * |
| 1089 * Return value: The "endtag" priority. | 1229 * Return value: The "endtag" priority. |
| 1090 **/ | 1230 **/ |
| 1091 static int | 1231 static int |
| 1092 htmlGetEndPriority (const xmlChar *name) { | 1232 htmlGetEndPriority (const xmlChar *name) { |
| 1093 int i = 0; | 1233 int i = 0; |
| 1094 | 1234 |
| 1095 while ((htmlEndPriority[i].name != NULL) && | 1235 while ((htmlEndPriority[i].name != NULL) && |
| 1096 (!xmlStrEqual((const xmlChar *)htmlEndPriority[i].name, name))) | 1236 (!xmlStrEqual((const xmlChar *)htmlEndPriority[i].name, name))) |
| 1097 i++; | 1237 i++; |
| 1098 | 1238 |
| (...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1157 priority = htmlGetEndPriority(newtag); | 1297 priority = htmlGetEndPriority(newtag); |
| 1158 | 1298 |
| 1159 for (i = (ctxt->nameNr - 1); i >= 0; i--) { | 1299 for (i = (ctxt->nameNr - 1); i >= 0; i--) { |
| 1160 | 1300 |
| 1161 if (xmlStrEqual(newtag, ctxt->nameTab[i])) | 1301 if (xmlStrEqual(newtag, ctxt->nameTab[i])) |
| 1162 break; | 1302 break; |
| 1163 /* | 1303 /* |
| 1164 * A missplaced endtag can only close elements with lower | 1304 * A missplaced endtag can only close elements with lower |
| 1165 * or equal priority, so if we find an element with higher | 1305 * or equal priority, so if we find an element with higher |
| 1166 * priority before we find an element with | 1306 * priority before we find an element with |
| 1167 * matching name, we just ignore this endtag | 1307 * matching name, we just ignore this endtag |
| 1168 */ | 1308 */ |
| 1169 if (htmlGetEndPriority(ctxt->nameTab[i]) > priority) | 1309 if (htmlGetEndPriority(ctxt->nameTab[i]) > priority) |
| 1170 return; | 1310 return; |
| 1171 } | 1311 } |
| 1172 if (i < 0) | 1312 if (i < 0) |
| 1173 return; | 1313 return; |
| 1174 | 1314 |
| 1175 while (!xmlStrEqual(newtag, ctxt->name)) { | 1315 while (!xmlStrEqual(newtag, ctxt->name)) { |
| 1176 info = htmlTagLookup(ctxt->name); | 1316 info = htmlTagLookup(ctxt->name); |
| 1177 if ((info != NULL) && (info->endTag == 3)) { | 1317 if ((info != NULL) && (info->endTag == 3)) { |
| (...skipping 30 matching lines...) Expand all Loading... |
| 1208 /** | 1348 /** |
| 1209 * htmlAutoClose: | 1349 * htmlAutoClose: |
| 1210 * @ctxt: an HTML parser context | 1350 * @ctxt: an HTML parser context |
| 1211 * @newtag: The new tag name or NULL | 1351 * @newtag: The new tag name or NULL |
| 1212 * | 1352 * |
| 1213 * The HTML DTD allows a tag to implicitly close other tags. | 1353 * The HTML DTD allows a tag to implicitly close other tags. |
| 1214 * The list is kept in htmlStartClose array. This function is | 1354 * The list is kept in htmlStartClose array. This function is |
| 1215 * called when a new tag has been detected and generates the | 1355 * called when a new tag has been detected and generates the |
| 1216 * appropriates closes if possible/needed. | 1356 * appropriates closes if possible/needed. |
| 1217 * If newtag is NULL this mean we are at the end of the resource | 1357 * If newtag is NULL this mean we are at the end of the resource |
| 1218 * and we should check | 1358 * and we should check |
| 1219 */ | 1359 */ |
| 1220 static void | 1360 static void |
| 1221 htmlAutoClose(htmlParserCtxtPtr ctxt, const xmlChar * newtag) | 1361 htmlAutoClose(htmlParserCtxtPtr ctxt, const xmlChar * newtag) |
| 1222 { | 1362 { |
| 1223 while ((newtag != NULL) && (ctxt->name != NULL) && | 1363 while ((newtag != NULL) && (ctxt->name != NULL) && |
| 1224 (htmlCheckAutoClose(newtag, ctxt->name))) { | 1364 (htmlCheckAutoClose(newtag, ctxt->name))) { |
| 1225 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) | 1365 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) |
| 1226 ctxt->sax->endElement(ctxt->userData, ctxt->name); | 1366 ctxt->sax->endElement(ctxt->userData, ctxt->name); |
| 1227 htmlnamePop(ctxt); | 1367 htmlnamePop(ctxt); |
| 1228 } | 1368 } |
| (...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1296 * htmlCheckImplied: | 1436 * htmlCheckImplied: |
| 1297 * @ctxt: an HTML parser context | 1437 * @ctxt: an HTML parser context |
| 1298 * @newtag: The new tag name | 1438 * @newtag: The new tag name |
| 1299 * | 1439 * |
| 1300 * The HTML DTD allows a tag to exists only implicitly | 1440 * The HTML DTD allows a tag to exists only implicitly |
| 1301 * called when a new tag has been detected and generates the | 1441 * called when a new tag has been detected and generates the |
| 1302 * appropriates implicit tags if missing | 1442 * appropriates implicit tags if missing |
| 1303 */ | 1443 */ |
| 1304 static void | 1444 static void |
| 1305 htmlCheckImplied(htmlParserCtxtPtr ctxt, const xmlChar *newtag) { | 1445 htmlCheckImplied(htmlParserCtxtPtr ctxt, const xmlChar *newtag) { |
| 1446 int i; |
| 1447 |
| 1448 if (ctxt->options & HTML_PARSE_NOIMPLIED) |
| 1449 return; |
| 1306 if (!htmlOmittedDefaultValue) | 1450 if (!htmlOmittedDefaultValue) |
| 1307 return; | 1451 return; |
| 1308 if (xmlStrEqual(newtag, BAD_CAST"html")) | 1452 if (xmlStrEqual(newtag, BAD_CAST"html")) |
| 1309 return; | 1453 return; |
| 1310 if (ctxt->nameNr <= 0) { | 1454 if (ctxt->nameNr <= 0) { |
| 1311 htmlnamePush(ctxt, BAD_CAST"html"); | 1455 htmlnamePush(ctxt, BAD_CAST"html"); |
| 1312 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) | 1456 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) |
| 1313 ctxt->sax->startElement(ctxt->userData, BAD_CAST"html", NULL); | 1457 ctxt->sax->startElement(ctxt->userData, BAD_CAST"html", NULL); |
| 1314 } | 1458 } |
| 1315 if ((xmlStrEqual(newtag, BAD_CAST"body")) || (xmlStrEqual(newtag, BAD_CAST"h
ead"))) | 1459 if ((xmlStrEqual(newtag, BAD_CAST"body")) || (xmlStrEqual(newtag, BAD_CAST"h
ead"))) |
| 1316 return; | 1460 return; |
| 1317 if ((ctxt->nameNr <= 1) && | 1461 if ((ctxt->nameNr <= 1) && |
| 1318 ((xmlStrEqual(newtag, BAD_CAST"script")) || | 1462 ((xmlStrEqual(newtag, BAD_CAST"script")) || |
| 1319 (xmlStrEqual(newtag, BAD_CAST"style")) || | 1463 (xmlStrEqual(newtag, BAD_CAST"style")) || |
| 1320 (xmlStrEqual(newtag, BAD_CAST"meta")) || | 1464 (xmlStrEqual(newtag, BAD_CAST"meta")) || |
| 1321 (xmlStrEqual(newtag, BAD_CAST"link")) || | 1465 (xmlStrEqual(newtag, BAD_CAST"link")) || |
| 1322 (xmlStrEqual(newtag, BAD_CAST"title")) || | 1466 (xmlStrEqual(newtag, BAD_CAST"title")) || |
| 1323 (xmlStrEqual(newtag, BAD_CAST"base")))) { | 1467 (xmlStrEqual(newtag, BAD_CAST"base")))) { |
| 1324 » /* | 1468 if (ctxt->html >= 3) { |
| 1325 » * dropped OBJECT ... i you put it first BODY will be | 1469 /* we already saw or generated an <head> before */ |
| 1326 » * assumed ! | 1470 return; |
| 1327 » */ | 1471 } |
| 1328 » htmlnamePush(ctxt, BAD_CAST"head"); | 1472 /* |
| 1329 » if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) | 1473 * dropped OBJECT ... i you put it first BODY will be |
| 1330 » » ctxt->sax->startElement(ctxt->userData, BAD_CAST"head", NULL); | 1474 * assumed ! |
| 1475 */ |
| 1476 htmlnamePush(ctxt, BAD_CAST"head"); |
| 1477 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) |
| 1478 ctxt->sax->startElement(ctxt->userData, BAD_CAST"head", NULL); |
| 1331 } else if ((!xmlStrEqual(newtag, BAD_CAST"noframes")) && | 1479 } else if ((!xmlStrEqual(newtag, BAD_CAST"noframes")) && |
| 1332 (!xmlStrEqual(newtag, BAD_CAST"frame")) && | 1480 (!xmlStrEqual(newtag, BAD_CAST"frame")) && |
| 1333 (!xmlStrEqual(newtag, BAD_CAST"frameset"))) { | 1481 (!xmlStrEqual(newtag, BAD_CAST"frameset"))) { |
| 1334 » int i; | 1482 if (ctxt->html >= 10) { |
| 1483 /* we already saw or generated a <body> before */ |
| 1484 return; |
| 1485 } |
| 1335 for (i = 0;i < ctxt->nameNr;i++) { | 1486 for (i = 0;i < ctxt->nameNr;i++) { |
| 1336 if (xmlStrEqual(ctxt->nameTab[i], BAD_CAST"body")) { | 1487 if (xmlStrEqual(ctxt->nameTab[i], BAD_CAST"body")) { |
| 1337 return; | 1488 return; |
| 1338 } | 1489 } |
| 1339 if (xmlStrEqual(ctxt->nameTab[i], BAD_CAST"head")) { | 1490 if (xmlStrEqual(ctxt->nameTab[i], BAD_CAST"head")) { |
| 1340 return; | 1491 return; |
| 1341 } | 1492 } |
| 1342 } | 1493 } |
| 1343 » | 1494 |
| 1344 htmlnamePush(ctxt, BAD_CAST"body"); | 1495 htmlnamePush(ctxt, BAD_CAST"body"); |
| 1345 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) | 1496 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) |
| 1346 ctxt->sax->startElement(ctxt->userData, BAD_CAST"body", NULL); | 1497 ctxt->sax->startElement(ctxt->userData, BAD_CAST"body", NULL); |
| 1347 } | 1498 } |
| 1348 } | 1499 } |
| 1349 | 1500 |
| 1350 /** | 1501 /** |
| 1351 * htmlCheckParagraph | 1502 * htmlCheckParagraph |
| 1352 * @ctxt: an HTML parser context | 1503 * @ctxt: an HTML parser context |
| 1353 * | 1504 * |
| (...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1395 * | 1546 * |
| 1396 * Check if an attribute is of content type Script | 1547 * Check if an attribute is of content type Script |
| 1397 * | 1548 * |
| 1398 * Returns 1 is the attribute is a script 0 otherwise | 1549 * Returns 1 is the attribute is a script 0 otherwise |
| 1399 */ | 1550 */ |
| 1400 int | 1551 int |
| 1401 htmlIsScriptAttribute(const xmlChar *name) { | 1552 htmlIsScriptAttribute(const xmlChar *name) { |
| 1402 unsigned int i; | 1553 unsigned int i; |
| 1403 | 1554 |
| 1404 if (name == NULL) | 1555 if (name == NULL) |
| 1405 »return(0); | 1556 return(0); |
| 1406 /* | 1557 /* |
| 1407 * all script attributes start with 'on' | 1558 * all script attributes start with 'on' |
| 1408 */ | 1559 */ |
| 1409 if ((name[0] != 'o') || (name[1] != 'n')) | 1560 if ((name[0] != 'o') || (name[1] != 'n')) |
| 1410 »return(0); | 1561 return(0); |
| 1411 for (i = 0; | 1562 for (i = 0; |
| 1412 i < sizeof(htmlScriptAttributes)/sizeof(htmlScriptAttributes[0]); | 1563 i < sizeof(htmlScriptAttributes)/sizeof(htmlScriptAttributes[0]); |
| 1413 i++) { | 1564 i++) { |
| 1414 if (xmlStrEqual(name, (const xmlChar *) htmlScriptAttributes[i])) | 1565 if (xmlStrEqual(name, (const xmlChar *) htmlScriptAttributes[i])) |
| 1415 return(1); | 1566 return(1); |
| 1416 } | 1567 } |
| 1417 return(0); | 1568 return(0); |
| 1418 } | 1569 } |
| 1419 | 1570 |
| 1420 /************************************************************************ | 1571 /************************************************************************ |
| 1421 * * | 1572 * * |
| 1422 * » » The list of HTML predefined entities» » » * | 1573 *» The list of HTML predefined entities» » » * |
| 1423 * * | 1574 * * |
| 1424 ************************************************************************/ | 1575 ************************************************************************/ |
| 1425 | 1576 |
| 1426 | 1577 |
| 1427 static const htmlEntityDesc html40EntitiesTable[] = { | 1578 static const htmlEntityDesc html40EntitiesTable[] = { |
| 1428 /* | 1579 /* |
| 1429 * the 4 absolute ones, plus apostrophe. | 1580 * the 4 absolute ones, plus apostrophe. |
| 1430 */ | 1581 */ |
| 1431 { 34, "quot", "quotation mark = APL quote, U+0022 ISOnum" }, | 1582 { 34, "quot", "quotation mark = APL quote, U+0022 ISOnum" }, |
| 1432 { 38, "amp", "ampersand, U+0026 ISOnum" }, | 1583 { 38, "amp", "ampersand, U+0026 ISOnum" }, |
| (...skipping 393 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1826 else if (d < 0xF8) { c= d & 0x07; trailing= 3; } | 1977 else if (d < 0xF8) { c= d & 0x07; trailing= 3; } |
| 1827 else { | 1978 else { |
| 1828 /* no chance for this in Ascii */ | 1979 /* no chance for this in Ascii */ |
| 1829 *outlen = out - outstart; | 1980 *outlen = out - outstart; |
| 1830 *inlen = processed - instart; | 1981 *inlen = processed - instart; |
| 1831 return(-2); | 1982 return(-2); |
| 1832 } | 1983 } |
| 1833 | 1984 |
| 1834 if (inend - in < trailing) { | 1985 if (inend - in < trailing) { |
| 1835 break; | 1986 break; |
| 1836 » } | 1987 » } |
| 1837 | 1988 |
| 1838 for ( ; trailing; trailing--) { | 1989 for ( ; trailing; trailing--) { |
| 1839 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) | 1990 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) |
| 1840 break; | 1991 break; |
| 1841 c <<= 6; | 1992 c <<= 6; |
| 1842 c |= d & 0x3F; | 1993 c |= d & 0x3F; |
| 1843 } | 1994 } |
| 1844 | 1995 |
| 1845 /* assertion: c is a single UTF-4 value */ | 1996 /* assertion: c is a single UTF-4 value */ |
| 1846 if (c < 0x80) { | 1997 if (c < 0x80) { |
| (...skipping 169 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2016 return(input); | 2167 return(input); |
| 2017 } | 2168 } |
| 2018 | 2169 |
| 2019 | 2170 |
| 2020 /************************************************************************ | 2171 /************************************************************************ |
| 2021 * * | 2172 * * |
| 2022 * Commodity functions, cleanup needed ? * | 2173 * Commodity functions, cleanup needed ? * |
| 2023 * * | 2174 * * |
| 2024 ************************************************************************/ | 2175 ************************************************************************/ |
| 2025 /* | 2176 /* |
| 2026 * all tags allowing pc data from the html 4.01 loose dtd | 2177 * all tags allowing pc data from the html 4.01 loose dtd |
| 2027 * NOTE: it might be more apropriate to integrate this information | 2178 * NOTE: it might be more apropriate to integrate this information |
| 2028 * into the html40ElementTable array but I don't want to risk any | 2179 * into the html40ElementTable array but I don't want to risk any |
| 2029 * binary incomptibility | 2180 * binary incomptibility |
| 2030 */ | 2181 */ |
| 2031 static const char *allowPCData[] = { | 2182 static const char *allowPCData[] = { |
| 2032 "a", "abbr", "acronym", "address", "applet", "b", "bdo", "big", | 2183 "a", "abbr", "acronym", "address", "applet", "b", "bdo", "big", |
| 2033 "blockquote", "body", "button", "caption", "center", "cite", "code", | 2184 "blockquote", "body", "button", "caption", "center", "cite", "code", |
| 2034 "dd", "del", "dfn", "div", "dt", "em", "font", "form", "h1", "h2", | 2185 "dd", "del", "dfn", "div", "dt", "em", "font", "form", "h1", "h2", |
| 2035 "h3", "h4", "h5", "h6", "i", "iframe", "ins", "kbd", "label", "legend", | 2186 "h3", "h4", "h5", "h6", "i", "iframe", "ins", "kbd", "label", "legend", |
| 2036 "li", "noframes", "noscript", "object", "p", "pre", "q", "s", "samp", | 2187 "li", "noframes", "noscript", "object", "p", "pre", "q", "s", "samp", |
| (...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2076 } | 2227 } |
| 2077 } | 2228 } |
| 2078 | 2229 |
| 2079 if (ctxt->node == NULL) return(0); | 2230 if (ctxt->node == NULL) return(0); |
| 2080 lastChild = xmlGetLastChild(ctxt->node); | 2231 lastChild = xmlGetLastChild(ctxt->node); |
| 2081 while ((lastChild) && (lastChild->type == XML_COMMENT_NODE)) | 2232 while ((lastChild) && (lastChild->type == XML_COMMENT_NODE)) |
| 2082 lastChild = lastChild->prev; | 2233 lastChild = lastChild->prev; |
| 2083 if (lastChild == NULL) { | 2234 if (lastChild == NULL) { |
| 2084 if ((ctxt->node->type != XML_ELEMENT_NODE) && | 2235 if ((ctxt->node->type != XML_ELEMENT_NODE) && |
| 2085 (ctxt->node->content != NULL)) return(0); | 2236 (ctxt->node->content != NULL)) return(0); |
| 2086 » /* keep ws in constructs like ...<b> </b>... | 2237 » /* keep ws in constructs like ...<b> </b>... |
| 2087 for all tags "b" allowing PCDATA */ | 2238 for all tags "b" allowing PCDATA */ |
| 2088 for ( i = 0; i < sizeof(allowPCData)/sizeof(allowPCData[0]); i++ ) { | 2239 for ( i = 0; i < sizeof(allowPCData)/sizeof(allowPCData[0]); i++ ) { |
| 2089 if ( xmlStrEqual(ctxt->name, BAD_CAST allowPCData[i]) ) { | 2240 if ( xmlStrEqual(ctxt->name, BAD_CAST allowPCData[i]) ) { |
| 2090 return(0); | 2241 return(0); |
| 2091 } | 2242 } |
| 2092 } | 2243 } |
| 2093 } else if (xmlNodeIsText(lastChild)) { | 2244 } else if (xmlNodeIsText(lastChild)) { |
| 2094 return(0); | 2245 return(0); |
| 2095 } else { | 2246 } else { |
| 2096 » /* keep ws in constructs like <p><b>xy</b> <i>z</i><p> | 2247 » /* keep ws in constructs like <p><b>xy</b> <i>z</i><p> |
| 2097 for all tags "p" allowing PCDATA */ | 2248 for all tags "p" allowing PCDATA */ |
| 2098 for ( i = 0; i < sizeof(allowPCData)/sizeof(allowPCData[0]); i++ ) { | 2249 for ( i = 0; i < sizeof(allowPCData)/sizeof(allowPCData[0]); i++ ) { |
| 2099 if ( xmlStrEqual(lastChild->name, BAD_CAST allowPCData[i]) ) { | 2250 if ( xmlStrEqual(lastChild->name, BAD_CAST allowPCData[i]) ) { |
| 2100 return(0); | 2251 return(0); |
| 2101 } | 2252 } |
| 2102 } | 2253 } |
| 2103 } | 2254 } |
| 2104 return(1); | 2255 return(1); |
| 2105 } | 2256 } |
| 2106 | 2257 |
| (...skipping 19 matching lines...) Expand all Loading... |
| 2126 htmlErrMemory(NULL, "HTML document creation failed\n"); | 2277 htmlErrMemory(NULL, "HTML document creation failed\n"); |
| 2127 return(NULL); | 2278 return(NULL); |
| 2128 } | 2279 } |
| 2129 memset(cur, 0, sizeof(xmlDoc)); | 2280 memset(cur, 0, sizeof(xmlDoc)); |
| 2130 | 2281 |
| 2131 cur->type = XML_HTML_DOCUMENT_NODE; | 2282 cur->type = XML_HTML_DOCUMENT_NODE; |
| 2132 cur->version = NULL; | 2283 cur->version = NULL; |
| 2133 cur->intSubset = NULL; | 2284 cur->intSubset = NULL; |
| 2134 cur->doc = cur; | 2285 cur->doc = cur; |
| 2135 cur->name = NULL; | 2286 cur->name = NULL; |
| 2136 cur->children = NULL; | 2287 cur->children = NULL; |
| 2137 cur->extSubset = NULL; | 2288 cur->extSubset = NULL; |
| 2138 cur->oldNs = NULL; | 2289 cur->oldNs = NULL; |
| 2139 cur->encoding = NULL; | 2290 cur->encoding = NULL; |
| 2140 cur->standalone = 1; | 2291 cur->standalone = 1; |
| 2141 cur->compression = 0; | 2292 cur->compression = 0; |
| 2142 cur->ids = NULL; | 2293 cur->ids = NULL; |
| 2143 cur->refs = NULL; | 2294 cur->refs = NULL; |
| 2144 cur->_private = NULL; | 2295 cur->_private = NULL; |
| 2145 cur->charset = XML_CHAR_ENCODING_UTF8; | 2296 cur->charset = XML_CHAR_ENCODING_UTF8; |
| 2297 cur->properties = XML_DOC_HTML | XML_DOC_USERBUILT; |
| 2146 if ((ExternalID != NULL) || | 2298 if ((ExternalID != NULL) || |
| 2147 (URI != NULL)) | 2299 (URI != NULL)) |
| 2148 xmlCreateIntSubset(cur, BAD_CAST "html", ExternalID, URI); | 2300 xmlCreateIntSubset(cur, BAD_CAST "html", ExternalID, URI); |
| 2149 return(cur); | 2301 return(cur); |
| 2150 } | 2302 } |
| 2151 | 2303 |
| 2152 /** | 2304 /** |
| 2153 * htmlNewDoc: | 2305 * htmlNewDoc: |
| 2154 * @URI: URI for the dtd, or NULL | 2306 * @URI: URI for the dtd, or NULL |
| 2155 * @ExternalID: the external ID of the DTD, or NULL | 2307 * @ExternalID: the external ID of the DTD, or NULL |
| (...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2193 * | 2345 * |
| 2194 * Returns the Tag Name parsed or NULL | 2346 * Returns the Tag Name parsed or NULL |
| 2195 */ | 2347 */ |
| 2196 | 2348 |
| 2197 static const xmlChar * | 2349 static const xmlChar * |
| 2198 htmlParseHTMLName(htmlParserCtxtPtr ctxt) { | 2350 htmlParseHTMLName(htmlParserCtxtPtr ctxt) { |
| 2199 int i = 0; | 2351 int i = 0; |
| 2200 xmlChar loc[HTML_PARSER_BUFFER_SIZE]; | 2352 xmlChar loc[HTML_PARSER_BUFFER_SIZE]; |
| 2201 | 2353 |
| 2202 if (!IS_ASCII_LETTER(CUR) && (CUR != '_') && | 2354 if (!IS_ASCII_LETTER(CUR) && (CUR != '_') && |
| 2203 (CUR != ':')) return(NULL); | 2355 (CUR != ':') && (CUR != '.')) return(NULL); |
| 2204 | 2356 |
| 2205 while ((i < HTML_PARSER_BUFFER_SIZE) && | 2357 while ((i < HTML_PARSER_BUFFER_SIZE) && |
| 2206 ((IS_ASCII_LETTER(CUR)) || (IS_ASCII_DIGIT(CUR)) || | 2358 ((IS_ASCII_LETTER(CUR)) || (IS_ASCII_DIGIT(CUR)) || |
| 2207 » (CUR == ':') || (CUR == '-') || (CUR == '_'))) { | 2359 » (CUR == ':') || (CUR == '-') || (CUR == '_') || |
| 2360 (CUR == '.'))) { |
| 2208 if ((CUR >= 'A') && (CUR <= 'Z')) loc[i] = CUR + 0x20; | 2361 if ((CUR >= 'A') && (CUR <= 'Z')) loc[i] = CUR + 0x20; |
| 2209 else loc[i] = CUR; | 2362 else loc[i] = CUR; |
| 2210 i++; | 2363 i++; |
| 2211 » | 2364 |
| 2212 NEXT; | 2365 NEXT; |
| 2213 } | 2366 } |
| 2214 | 2367 |
| 2215 return(xmlDictLookup(ctxt->dict, loc, i)); | 2368 return(xmlDictLookup(ctxt->dict, loc, i)); |
| 2216 } | 2369 } |
| 2217 | 2370 |
| 2218 | 2371 |
| 2219 /** | 2372 /** |
| 2220 * htmlParseHTMLName_nonInvasive: | 2373 * htmlParseHTMLName_nonInvasive: |
| 2221 * @ctxt: an HTML parser context | 2374 * @ctxt: an HTML parser context |
| 2222 * | 2375 * |
| 2223 * parse an HTML tag or attribute name, note that we convert it to lowercase | 2376 * parse an HTML tag or attribute name, note that we convert it to lowercase |
| 2224 * since HTML names are not case-sensitive, this doesn't consume the data | 2377 * since HTML names are not case-sensitive, this doesn't consume the data |
| 2225 * from the stream, it's a look-ahead | 2378 * from the stream, it's a look-ahead |
| 2226 * | 2379 * |
| 2227 * Returns the Tag Name parsed or NULL | 2380 * Returns the Tag Name parsed or NULL |
| 2228 */ | 2381 */ |
| 2229 | 2382 |
| 2230 static const xmlChar * | 2383 static const xmlChar * |
| 2231 htmlParseHTMLName_nonInvasive(htmlParserCtxtPtr ctxt) { | 2384 htmlParseHTMLName_nonInvasive(htmlParserCtxtPtr ctxt) { |
| 2232 int i = 0; | 2385 int i = 0; |
| 2233 xmlChar loc[HTML_PARSER_BUFFER_SIZE]; | 2386 xmlChar loc[HTML_PARSER_BUFFER_SIZE]; |
| 2234 | 2387 |
| 2235 if (!IS_ASCII_LETTER(NXT(1)) && (NXT(1) != '_') && | 2388 if (!IS_ASCII_LETTER(NXT(1)) && (NXT(1) != '_') && |
| 2236 (NXT(1) != ':')) return(NULL); | 2389 (NXT(1) != ':')) return(NULL); |
| 2237 | 2390 |
| 2238 while ((i < HTML_PARSER_BUFFER_SIZE) && | 2391 while ((i < HTML_PARSER_BUFFER_SIZE) && |
| 2239 ((IS_ASCII_LETTER(NXT(1+i))) || (IS_ASCII_DIGIT(NXT(1+i))) || | 2392 ((IS_ASCII_LETTER(NXT(1+i))) || (IS_ASCII_DIGIT(NXT(1+i))) || |
| 2240 (NXT(1+i) == ':') || (NXT(1+i) == '-') || (NXT(1+i) == '_'))) { | 2393 (NXT(1+i) == ':') || (NXT(1+i) == '-') || (NXT(1+i) == '_'))) { |
| 2241 if ((NXT(1+i) >= 'A') && (NXT(1+i) <= 'Z')) loc[i] = NXT(1+i) + 0x20; | 2394 if ((NXT(1+i) >= 'A') && (NXT(1+i) <= 'Z')) loc[i] = NXT(1+i) + 0x20; |
| 2242 else loc[i] = NXT(1+i); | 2395 else loc[i] = NXT(1+i); |
| 2243 i++; | 2396 i++; |
| 2244 } | 2397 } |
| 2245 | 2398 |
| 2246 return(xmlDictLookup(ctxt->dict, loc, i)); | 2399 return(xmlDictLookup(ctxt->dict, loc, i)); |
| 2247 } | 2400 } |
| 2248 | 2401 |
| 2249 | 2402 |
| 2250 /** | 2403 /** |
| 2251 * htmlParseName: | 2404 * htmlParseName: |
| 2252 * @ctxt: an HTML parser context | 2405 * @ctxt: an HTML parser context |
| 2253 * | 2406 * |
| 2254 * parse an HTML name, this routine is case sensitive. | 2407 * parse an HTML name, this routine is case sensitive. |
| 2255 * | 2408 * |
| (...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2303 c = CUR_CHAR(l); | 2456 c = CUR_CHAR(l); |
| 2304 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ | 2457 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ |
| 2305 (!IS_LETTER(c) && (c != '_') && | 2458 (!IS_LETTER(c) && (c != '_') && |
| 2306 (c != ':'))) { | 2459 (c != ':'))) { |
| 2307 return(NULL); | 2460 return(NULL); |
| 2308 } | 2461 } |
| 2309 | 2462 |
| 2310 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ | 2463 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ |
| 2311 ((IS_LETTER(c)) || (IS_DIGIT(c)) || | 2464 ((IS_LETTER(c)) || (IS_DIGIT(c)) || |
| 2312 (c == '.') || (c == '-') || | 2465 (c == '.') || (c == '-') || |
| 2313 » (c == '_') || (c == ':') || | 2466 » (c == '_') || (c == ':') || |
| 2314 (IS_COMBINING(c)) || | 2467 (IS_COMBINING(c)) || |
| 2315 (IS_EXTENDER(c)))) { | 2468 (IS_EXTENDER(c)))) { |
| 2316 if (count++ > 100) { | 2469 if (count++ > 100) { |
| 2317 count = 0; | 2470 count = 0; |
| 2318 GROW; | 2471 GROW; |
| 2319 } | 2472 } |
| 2320 len += l; | 2473 len += l; |
| 2321 NEXTL(l); | 2474 NEXTL(l); |
| 2322 c = CUR_CHAR(l); | 2475 c = CUR_CHAR(l); |
| 2323 } | 2476 } |
| 2324 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); | 2477 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); |
| 2325 } | 2478 } |
| 2326 | 2479 |
| 2327 | 2480 |
| 2328 /** | 2481 /** |
| 2329 * htmlParseHTMLAttribute: | 2482 * htmlParseHTMLAttribute: |
| 2330 * @ctxt: an HTML parser context | 2483 * @ctxt: an HTML parser context |
| 2331 * @stop: a char stop value | 2484 * @stop: a char stop value |
| 2332 * | 2485 * |
| 2333 * parse an HTML attribute value till the stop (quote), if | 2486 * parse an HTML attribute value till the stop (quote), if |
| 2334 * stop is 0 then it stops at the first space | 2487 * stop is 0 then it stops at the first space |
| 2335 * | 2488 * |
| 2336 * Returns the attribute parsed or NULL | 2489 * Returns the attribute parsed or NULL |
| 2337 */ | 2490 */ |
| 2338 | 2491 |
| 2339 static xmlChar * | 2492 static xmlChar * |
| 2340 htmlParseHTMLAttribute(htmlParserCtxtPtr ctxt, const xmlChar stop) { | 2493 htmlParseHTMLAttribute(htmlParserCtxtPtr ctxt, const xmlChar stop) { |
| 2341 xmlChar *buffer = NULL; | 2494 xmlChar *buffer = NULL; |
| 2342 int buffer_size = 0; | 2495 int buffer_size = 0; |
| (...skipping 24 matching lines...) Expand all Loading... |
| 2367 unsigned int c; | 2520 unsigned int c; |
| 2368 int bits; | 2521 int bits; |
| 2369 | 2522 |
| 2370 c = htmlParseCharRef(ctxt); | 2523 c = htmlParseCharRef(ctxt); |
| 2371 if (c < 0x80) | 2524 if (c < 0x80) |
| 2372 { *out++ = c; bits= -6; } | 2525 { *out++ = c; bits= -6; } |
| 2373 else if (c < 0x800) | 2526 else if (c < 0x800) |
| 2374 { *out++ =((c >> 6) & 0x1F) | 0xC0; bits= 0; } | 2527 { *out++ =((c >> 6) & 0x1F) | 0xC0; bits= 0; } |
| 2375 else if (c < 0x10000) | 2528 else if (c < 0x10000) |
| 2376 { *out++ =((c >> 12) & 0x0F) | 0xE0; bits= 6; } | 2529 { *out++ =((c >> 12) & 0x0F) | 0xE0; bits= 6; } |
| 2377 » » else | 2530 » » else |
| 2378 { *out++ =((c >> 18) & 0x07) | 0xF0; bits= 12; } | 2531 { *out++ =((c >> 18) & 0x07) | 0xF0; bits= 12; } |
| 2379 » | 2532 |
| 2380 for ( ; bits >= 0; bits-= 6) { | 2533 for ( ; bits >= 0; bits-= 6) { |
| 2381 *out++ = ((c >> bits) & 0x3F) | 0x80; | 2534 *out++ = ((c >> bits) & 0x3F) | 0x80; |
| 2382 } | 2535 } |
| 2383 » » | 2536 |
| 2384 if (out - buffer > buffer_size - 100) { | 2537 if (out - buffer > buffer_size - 100) { |
| 2385 int indx = out - buffer; | 2538 int indx = out - buffer; |
| 2386 | 2539 |
| 2387 growBuffer(buffer); | 2540 growBuffer(buffer); |
| 2388 out = &buffer[indx]; | 2541 out = &buffer[indx]; |
| 2389 } | 2542 } |
| 2390 } else { | 2543 } else { |
| 2391 ent = htmlParseEntityRef(ctxt, &name); | 2544 ent = htmlParseEntityRef(ctxt, &name); |
| 2392 if (name == NULL) { | 2545 if (name == NULL) { |
| 2393 *out++ = '&'; | 2546 *out++ = '&'; |
| (...skipping 25 matching lines...) Expand all Loading... |
| 2419 growBuffer(buffer); | 2572 growBuffer(buffer); |
| 2420 out = &buffer[indx]; | 2573 out = &buffer[indx]; |
| 2421 } | 2574 } |
| 2422 c = ent->value; | 2575 c = ent->value; |
| 2423 if (c < 0x80) | 2576 if (c < 0x80) |
| 2424 { *out++ = c; bits= -6; } | 2577 { *out++ = c; bits= -6; } |
| 2425 else if (c < 0x800) | 2578 else if (c < 0x800) |
| 2426 { *out++ =((c >> 6) & 0x1F) | 0xC0; bits= 0; } | 2579 { *out++ =((c >> 6) & 0x1F) | 0xC0; bits= 0; } |
| 2427 else if (c < 0x10000) | 2580 else if (c < 0x10000) |
| 2428 { *out++ =((c >> 12) & 0x0F) | 0xE0; bits= 6; } | 2581 { *out++ =((c >> 12) & 0x0F) | 0xE0; bits= 6; } |
| 2429 » » else | 2582 » » else |
| 2430 { *out++ =((c >> 18) & 0x07) | 0xF0; bits= 12; } | 2583 { *out++ =((c >> 18) & 0x07) | 0xF0; bits= 12; } |
| 2431 » | 2584 |
| 2432 for ( ; bits >= 0; bits-= 6) { | 2585 for ( ; bits >= 0; bits-= 6) { |
| 2433 *out++ = ((c >> bits) & 0x3F) | 0x80; | 2586 *out++ = ((c >> bits) & 0x3F) | 0x80; |
| 2434 } | 2587 } |
| 2435 } | 2588 } |
| 2436 } | 2589 } |
| 2437 } else { | 2590 } else { |
| 2438 unsigned int c; | 2591 unsigned int c; |
| 2439 int bits, l; | 2592 int bits, l; |
| 2440 | 2593 |
| 2441 if (out - buffer > buffer_size - 100) { | 2594 if (out - buffer > buffer_size - 100) { |
| 2442 int indx = out - buffer; | 2595 int indx = out - buffer; |
| 2443 | 2596 |
| 2444 growBuffer(buffer); | 2597 growBuffer(buffer); |
| 2445 out = &buffer[indx]; | 2598 out = &buffer[indx]; |
| 2446 } | 2599 } |
| 2447 c = CUR_CHAR(l); | 2600 c = CUR_CHAR(l); |
| 2448 if (c < 0x80) | 2601 if (c < 0x80) |
| 2449 { *out++ = c; bits= -6; } | 2602 { *out++ = c; bits= -6; } |
| 2450 else if (c < 0x800) | 2603 else if (c < 0x800) |
| 2451 { *out++ =((c >> 6) & 0x1F) | 0xC0; bits= 0; } | 2604 { *out++ =((c >> 6) & 0x1F) | 0xC0; bits= 0; } |
| 2452 else if (c < 0x10000) | 2605 else if (c < 0x10000) |
| 2453 { *out++ =((c >> 12) & 0x0F) | 0xE0; bits= 6; } | 2606 { *out++ =((c >> 12) & 0x0F) | 0xE0; bits= 6; } |
| 2454 » else | 2607 » else |
| 2455 { *out++ =((c >> 18) & 0x07) | 0xF0; bits= 12; } | 2608 { *out++ =((c >> 18) & 0x07) | 0xF0; bits= 12; } |
| 2456 | 2609 |
| 2457 for ( ; bits >= 0; bits-= 6) { | 2610 for ( ; bits >= 0; bits-= 6) { |
| 2458 *out++ = ((c >> bits) & 0x3F) | 0x80; | 2611 *out++ = ((c >> bits) & 0x3F) | 0x80; |
| 2459 } | 2612 } |
| 2460 NEXT; | 2613 NEXT; |
| 2461 } | 2614 } |
| 2462 } | 2615 } |
| 2463 *out++ = 0; | 2616 *out = 0; |
| 2464 return(buffer); | 2617 return(buffer); |
| 2465 } | 2618 } |
| 2466 | 2619 |
| 2467 /** | 2620 /** |
| 2468 * htmlParseEntityRef: | 2621 * htmlParseEntityRef: |
| 2469 * @ctxt: an HTML parser context | 2622 * @ctxt: an HTML parser context |
| 2470 * @str: location to store the entity name | 2623 * @str: location to store the entity name |
| 2471 * | 2624 * |
| 2472 * parse an HTML ENTITY references | 2625 * parse an HTML ENTITY references |
| 2473 * | 2626 * |
| (...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2514 return(ent); | 2667 return(ent); |
| 2515 } | 2668 } |
| 2516 | 2669 |
| 2517 /** | 2670 /** |
| 2518 * htmlParseAttValue: | 2671 * htmlParseAttValue: |
| 2519 * @ctxt: an HTML parser context | 2672 * @ctxt: an HTML parser context |
| 2520 * | 2673 * |
| 2521 * parse a value for an attribute | 2674 * parse a value for an attribute |
| 2522 * Note: the parser won't do substitution of entities here, this | 2675 * Note: the parser won't do substitution of entities here, this |
| 2523 * will be handled later in xmlStringGetNodeList, unless it was | 2676 * will be handled later in xmlStringGetNodeList, unless it was |
| 2524 * asked for ctxt->replaceEntities != 0 | 2677 * asked for ctxt->replaceEntities != 0 |
| 2525 * | 2678 * |
| 2526 * Returns the AttValue parsed or NULL. | 2679 * Returns the AttValue parsed or NULL. |
| 2527 */ | 2680 */ |
| 2528 | 2681 |
| 2529 static xmlChar * | 2682 static xmlChar * |
| 2530 htmlParseAttValue(htmlParserCtxtPtr ctxt) { | 2683 htmlParseAttValue(htmlParserCtxtPtr ctxt) { |
| 2531 xmlChar *ret = NULL; | 2684 xmlChar *ret = NULL; |
| 2532 | 2685 |
| 2533 if (CUR == '"') { | 2686 if (CUR == '"') { |
| 2534 NEXT; | 2687 NEXT; |
| (...skipping 20 matching lines...) Expand all Loading... |
| 2555 htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, | 2708 htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, |
| 2556 "AttValue: no value found\n", NULL, NULL); | 2709 "AttValue: no value found\n", NULL, NULL); |
| 2557 } | 2710 } |
| 2558 } | 2711 } |
| 2559 return(ret); | 2712 return(ret); |
| 2560 } | 2713 } |
| 2561 | 2714 |
| 2562 /** | 2715 /** |
| 2563 * htmlParseSystemLiteral: | 2716 * htmlParseSystemLiteral: |
| 2564 * @ctxt: an HTML parser context | 2717 * @ctxt: an HTML parser context |
| 2565 * | 2718 * |
| 2566 * parse an HTML Literal | 2719 * parse an HTML Literal |
| 2567 * | 2720 * |
| 2568 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") | 2721 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") |
| 2569 * | 2722 * |
| 2570 * Returns the SystemLiteral parsed or NULL | 2723 * Returns the SystemLiteral parsed or NULL |
| 2571 */ | 2724 */ |
| 2572 | 2725 |
| 2573 static xmlChar * | 2726 static xmlChar * |
| 2574 htmlParseSystemLiteral(htmlParserCtxtPtr ctxt) { | 2727 htmlParseSystemLiteral(htmlParserCtxtPtr ctxt) { |
| 2575 const xmlChar *q; | 2728 const xmlChar *q; |
| (...skipping 20 matching lines...) Expand all Loading... |
| 2596 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, | 2749 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, |
| 2597 "Unfinished SystemLiteral\n", NULL, NULL); | 2750 "Unfinished SystemLiteral\n", NULL, NULL); |
| 2598 } else { | 2751 } else { |
| 2599 ret = xmlStrndup(q, CUR_PTR - q); | 2752 ret = xmlStrndup(q, CUR_PTR - q); |
| 2600 NEXT; | 2753 NEXT; |
| 2601 } | 2754 } |
| 2602 } else { | 2755 } else { |
| 2603 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, | 2756 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, |
| 2604 " or ' expected\n", NULL, NULL); | 2757 " or ' expected\n", NULL, NULL); |
| 2605 } | 2758 } |
| 2606 | 2759 |
| 2607 return(ret); | 2760 return(ret); |
| 2608 } | 2761 } |
| 2609 | 2762 |
| 2610 /** | 2763 /** |
| 2611 * htmlParsePubidLiteral: | 2764 * htmlParsePubidLiteral: |
| 2612 * @ctxt: an HTML parser context | 2765 * @ctxt: an HTML parser context |
| 2613 * | 2766 * |
| 2614 * parse an HTML public literal | 2767 * parse an HTML public literal |
| 2615 * | 2768 * |
| 2616 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" | 2769 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" |
| (...skipping 28 matching lines...) Expand all Loading... |
| 2645 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, | 2798 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, |
| 2646 "Unfinished PubidLiteral\n", NULL, NULL); | 2799 "Unfinished PubidLiteral\n", NULL, NULL); |
| 2647 } else { | 2800 } else { |
| 2648 ret = xmlStrndup(q, CUR_PTR - q); | 2801 ret = xmlStrndup(q, CUR_PTR - q); |
| 2649 NEXT; | 2802 NEXT; |
| 2650 } | 2803 } |
| 2651 } else { | 2804 } else { |
| 2652 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, | 2805 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, |
| 2653 "PubidLiteral \" or ' expected\n", NULL, NULL); | 2806 "PubidLiteral \" or ' expected\n", NULL, NULL); |
| 2654 } | 2807 } |
| 2655 | 2808 |
| 2656 return(ret); | 2809 return(ret); |
| 2657 } | 2810 } |
| 2658 | 2811 |
| 2659 /** | 2812 /** |
| 2660 * htmlParseScript: | 2813 * htmlParseScript: |
| 2661 * @ctxt: an HTML parser context | 2814 * @ctxt: an HTML parser context |
| 2662 * | 2815 * |
| 2663 * parse the content of an HTML SCRIPT or STYLE element | 2816 * parse the content of an HTML SCRIPT or STYLE element |
| 2664 * http://www.w3.org/TR/html4/sgml/dtd.html#Script | 2817 * http://www.w3.org/TR/html4/sgml/dtd.html#Script |
| 2665 * http://www.w3.org/TR/html4/sgml/dtd.html#StyleSheet | 2818 * http://www.w3.org/TR/html4/sgml/dtd.html#StyleSheet |
| (...skipping 26 matching lines...) Expand all Loading... |
| 2692 * Authors should therefore escape "</" within the content. | 2845 * Authors should therefore escape "</" within the content. |
| 2693 * Escape mechanisms are specific to each scripting or | 2846 * Escape mechanisms are specific to each scripting or |
| 2694 * style sheet language. | 2847 * style sheet language. |
| 2695 * | 2848 * |
| 2696 * In recovery mode, only break if end tag match the | 2849 * In recovery mode, only break if end tag match the |
| 2697 * current tag, effectively ignoring all tags inside the | 2850 * current tag, effectively ignoring all tags inside the |
| 2698 * script/style block and treating the entire block as | 2851 * script/style block and treating the entire block as |
| 2699 * CDATA. | 2852 * CDATA. |
| 2700 */ | 2853 */ |
| 2701 if (ctxt->recovery) { | 2854 if (ctxt->recovery) { |
| 2702 if (xmlStrncasecmp(ctxt->name, ctxt->input->cur+2, | 2855 if (xmlStrncasecmp(ctxt->name, ctxt->input->cur+2, |
| 2703 » » » » xmlStrlen(ctxt->name)) == 0) | 2856 » » » » xmlStrlen(ctxt->name)) == 0) |
| 2704 { | 2857 { |
| 2705 break; /* while */ | 2858 break; /* while */ |
| 2706 } else { | 2859 } else { |
| 2707 htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH, | 2860 htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH, |
| 2708 "Element %s embeds close tag\n", | 2861 "Element %s embeds close tag\n", |
| 2709 ctxt->name, NULL); | 2862 ctxt->name, NULL); |
| 2710 } | 2863 } |
| 2711 } else { | 2864 } else { |
| 2712 if (((NXT(2) >= 'A') && (NXT(2) <= 'Z')) || | 2865 if (((NXT(2) >= 'A') && (NXT(2) <= 'Z')) || |
| 2713 ((NXT(2) >= 'a') && (NXT(2) <= 'z'))) | 2866 ((NXT(2) >= 'a') && (NXT(2) <= 'z'))) |
| 2714 { | 2867 { |
| 2715 break; /* while */ | 2868 break; /* while */ |
| 2716 } | 2869 } |
| 2717 } | 2870 } |
| 2718 } | 2871 } |
| 2719 COPY_BUF(l,buf,nbchar,cur); | 2872 COPY_BUF(l,buf,nbchar,cur); |
| 2720 if (nbchar >= HTML_PARSER_BIG_BUFFER_SIZE) { | 2873 if (nbchar >= HTML_PARSER_BIG_BUFFER_SIZE) { |
| 2721 if (ctxt->sax->cdataBlock!= NULL) { | 2874 if (ctxt->sax->cdataBlock!= NULL) { |
| 2722 /* | 2875 /* |
| 2723 * Insert as CDATA, which is the same as HTML_PRESERVE_NODE | 2876 * Insert as CDATA, which is the same as HTML_PRESERVE_NODE |
| (...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2760 * if we are within a CDATA section ']]>' marks an end of section. | 2913 * if we are within a CDATA section ']]>' marks an end of section. |
| 2761 * | 2914 * |
| 2762 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) | 2915 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) |
| 2763 */ | 2916 */ |
| 2764 | 2917 |
| 2765 static void | 2918 static void |
| 2766 htmlParseCharData(htmlParserCtxtPtr ctxt) { | 2919 htmlParseCharData(htmlParserCtxtPtr ctxt) { |
| 2767 xmlChar buf[HTML_PARSER_BIG_BUFFER_SIZE + 5]; | 2920 xmlChar buf[HTML_PARSER_BIG_BUFFER_SIZE + 5]; |
| 2768 int nbchar = 0; | 2921 int nbchar = 0; |
| 2769 int cur, l; | 2922 int cur, l; |
| 2923 int chunk = 0; |
| 2770 | 2924 |
| 2771 SHRINK; | 2925 SHRINK; |
| 2772 cur = CUR_CHAR(l); | 2926 cur = CUR_CHAR(l); |
| 2773 while (((cur != '<') || (ctxt->token == '<')) && | 2927 while (((cur != '<') || (ctxt->token == '<')) && |
| 2774 ((cur != '&') || (ctxt->token == '&')) && | 2928 ((cur != '&') || (ctxt->token == '&')) && |
| 2775 (cur != 0)) { | 2929 (cur != 0)) { |
| 2776 if (!(IS_CHAR(cur))) { | 2930 if (!(IS_CHAR(cur))) { |
| 2777 htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR, | 2931 htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR, |
| 2778 "Invalid char in CDATA 0x%X\n", cur); | 2932 "Invalid char in CDATA 0x%X\n", cur); |
| 2779 } else { | 2933 } else { |
| 2780 COPY_BUF(l,buf,nbchar,cur); | 2934 COPY_BUF(l,buf,nbchar,cur); |
| 2781 } | 2935 } |
| 2782 if (nbchar >= HTML_PARSER_BIG_BUFFER_SIZE) { | 2936 if (nbchar >= HTML_PARSER_BIG_BUFFER_SIZE) { |
| 2783 /* | 2937 /* |
| 2784 * Ok the segment is to be consumed as chars. | 2938 * Ok the segment is to be consumed as chars. |
| 2785 */ | 2939 */ |
| 2786 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { | 2940 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { |
| 2787 if (areBlanks(ctxt, buf, nbchar)) { | 2941 if (areBlanks(ctxt, buf, nbchar)) { |
| 2788 if (ctxt->sax->ignorableWhitespace != NULL) | 2942 if (ctxt->sax->ignorableWhitespace != NULL) |
| 2789 ctxt->sax->ignorableWhitespace(ctxt->userData, | 2943 ctxt->sax->ignorableWhitespace(ctxt->userData, |
| 2790 buf, nbchar); | 2944 buf, nbchar); |
| 2791 } else { | 2945 } else { |
| 2792 htmlCheckParagraph(ctxt); | 2946 htmlCheckParagraph(ctxt); |
| 2793 if (ctxt->sax->characters != NULL) | 2947 if (ctxt->sax->characters != NULL) |
| 2794 ctxt->sax->characters(ctxt->userData, buf, nbchar); | 2948 ctxt->sax->characters(ctxt->userData, buf, nbchar); |
| 2795 } | 2949 } |
| 2796 } | 2950 } |
| 2797 nbchar = 0; | 2951 nbchar = 0; |
| 2798 } | 2952 } |
| 2799 NEXTL(l); | 2953 NEXTL(l); |
| 2954 chunk++; |
| 2955 if (chunk > HTML_PARSER_BUFFER_SIZE) { |
| 2956 chunk = 0; |
| 2957 SHRINK; |
| 2958 GROW; |
| 2959 } |
| 2800 cur = CUR_CHAR(l); | 2960 cur = CUR_CHAR(l); |
| 2801 if (cur == 0) { | 2961 if (cur == 0) { |
| 2802 SHRINK; | 2962 SHRINK; |
| 2803 GROW; | 2963 GROW; |
| 2804 cur = CUR_CHAR(l); | 2964 cur = CUR_CHAR(l); |
| 2805 } | 2965 } |
| 2806 } | 2966 } |
| 2807 if (nbchar != 0) { | 2967 if (nbchar != 0) { |
| 2808 buf[nbchar] = 0; | 2968 buf[nbchar] = 0; |
| 2809 | 2969 |
| (...skipping 174 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2984 /* | 3144 /* |
| 2985 * SAX: PI detected. | 3145 * SAX: PI detected. |
| 2986 */ | 3146 */ |
| 2987 if ((ctxt->sax) && (!ctxt->disableSAX) && | 3147 if ((ctxt->sax) && (!ctxt->disableSAX) && |
| 2988 (ctxt->sax->processingInstruction != NULL)) | 3148 (ctxt->sax->processingInstruction != NULL)) |
| 2989 ctxt->sax->processingInstruction(ctxt->userData, | 3149 ctxt->sax->processingInstruction(ctxt->userData, |
| 2990 target, buf); | 3150 target, buf); |
| 2991 } | 3151 } |
| 2992 xmlFree(buf); | 3152 xmlFree(buf); |
| 2993 } else { | 3153 } else { |
| 2994 » htmlParseErr(ctxt, XML_ERR_PI_NOT_STARTED, | 3154 » htmlParseErr(ctxt, XML_ERR_PI_NOT_STARTED, |
| 2995 "PI is not started correctly", NULL, NULL); | 3155 "PI is not started correctly", NULL, NULL); |
| 2996 } | 3156 } |
| 2997 ctxt->instate = state; | 3157 ctxt->instate = state; |
| 2998 } | 3158 } |
| 2999 } | 3159 } |
| 3000 | 3160 |
| 3001 /** | 3161 /** |
| 3002 * htmlParseComment: | 3162 * htmlParseComment: |
| 3003 * @ctxt: an HTML parser context | 3163 * @ctxt: an HTML parser context |
| 3004 * | 3164 * |
| (...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3100 if ((ctxt == NULL) || (ctxt->input == NULL)) { | 3260 if ((ctxt == NULL) || (ctxt->input == NULL)) { |
| 3101 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, | 3261 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, |
| 3102 "htmlParseCharRef: context error\n", | 3262 "htmlParseCharRef: context error\n", |
| 3103 NULL, NULL); | 3263 NULL, NULL); |
| 3104 return(0); | 3264 return(0); |
| 3105 } | 3265 } |
| 3106 if ((CUR == '&') && (NXT(1) == '#') && | 3266 if ((CUR == '&') && (NXT(1) == '#') && |
| 3107 ((NXT(2) == 'x') || NXT(2) == 'X')) { | 3267 ((NXT(2) == 'x') || NXT(2) == 'X')) { |
| 3108 SKIP(3); | 3268 SKIP(3); |
| 3109 while (CUR != ';') { | 3269 while (CUR != ';') { |
| 3110 » if ((CUR >= '0') && (CUR <= '9')) | 3270 » if ((CUR >= '0') && (CUR <= '9')) |
| 3111 val = val * 16 + (CUR - '0'); | 3271 val = val * 16 + (CUR - '0'); |
| 3112 else if ((CUR >= 'a') && (CUR <= 'f')) | 3272 else if ((CUR >= 'a') && (CUR <= 'f')) |
| 3113 val = val * 16 + (CUR - 'a') + 10; | 3273 val = val * 16 + (CUR - 'a') + 10; |
| 3114 else if ((CUR >= 'A') && (CUR <= 'F')) | 3274 else if ((CUR >= 'A') && (CUR <= 'F')) |
| 3115 val = val * 16 + (CUR - 'A') + 10; | 3275 val = val * 16 + (CUR - 'A') + 10; |
| 3116 else { | 3276 else { |
| 3117 htmlParseErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, | 3277 htmlParseErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, |
| 3118 "htmlParseCharRef: missing semicolumn\n", | 3278 "htmlParseCharRef: missing semicolumn\n", |
| 3119 NULL, NULL); | 3279 NULL, NULL); |
| 3120 break; | 3280 break; |
| 3121 } | 3281 } |
| 3122 NEXT; | 3282 NEXT; |
| 3123 } | 3283 } |
| 3124 if (CUR == ';') | 3284 if (CUR == ';') |
| 3125 NEXT; | 3285 NEXT; |
| 3126 } else if ((CUR == '&') && (NXT(1) == '#')) { | 3286 } else if ((CUR == '&') && (NXT(1) == '#')) { |
| 3127 SKIP(2); | 3287 SKIP(2); |
| 3128 while (CUR != ';') { | 3288 while (CUR != ';') { |
| 3129 » if ((CUR >= '0') && (CUR <= '9')) | 3289 » if ((CUR >= '0') && (CUR <= '9')) |
| 3130 val = val * 10 + (CUR - '0'); | 3290 val = val * 10 + (CUR - '0'); |
| 3131 else { | 3291 else { |
| 3132 htmlParseErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, | 3292 htmlParseErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, |
| 3133 "htmlParseCharRef: missing semicolumn\n", | 3293 "htmlParseCharRef: missing semicolumn\n", |
| 3134 NULL, NULL); | 3294 NULL, NULL); |
| 3135 break; | 3295 break; |
| 3136 } | 3296 } |
| 3137 NEXT; | 3297 NEXT; |
| 3138 } | 3298 } |
| 3139 if (CUR == ';') | 3299 if (CUR == ';') |
| (...skipping 15 matching lines...) Expand all Loading... |
| 3155 return(0); | 3315 return(0); |
| 3156 } | 3316 } |
| 3157 | 3317 |
| 3158 | 3318 |
| 3159 /** | 3319 /** |
| 3160 * htmlParseDocTypeDecl: | 3320 * htmlParseDocTypeDecl: |
| 3161 * @ctxt: an HTML parser context | 3321 * @ctxt: an HTML parser context |
| 3162 * | 3322 * |
| 3163 * parse a DOCTYPE declaration | 3323 * parse a DOCTYPE declaration |
| 3164 * | 3324 * |
| 3165 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? | 3325 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? |
| 3166 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>' | 3326 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>' |
| 3167 */ | 3327 */ |
| 3168 | 3328 |
| 3169 static void | 3329 static void |
| 3170 htmlParseDocTypeDecl(htmlParserCtxtPtr ctxt) { | 3330 htmlParseDocTypeDecl(htmlParserCtxtPtr ctxt) { |
| 3171 const xmlChar *name; | 3331 const xmlChar *name; |
| 3172 xmlChar *ExternalID = NULL; | 3332 xmlChar *ExternalID = NULL; |
| 3173 xmlChar *URI = NULL; | 3333 xmlChar *URI = NULL; |
| 3174 | 3334 |
| 3175 /* | 3335 /* |
| (...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3259 } | 3419 } |
| 3260 | 3420 |
| 3261 /* | 3421 /* |
| 3262 * read the value | 3422 * read the value |
| 3263 */ | 3423 */ |
| 3264 SKIP_BLANKS; | 3424 SKIP_BLANKS; |
| 3265 if (CUR == '=') { | 3425 if (CUR == '=') { |
| 3266 NEXT; | 3426 NEXT; |
| 3267 SKIP_BLANKS; | 3427 SKIP_BLANKS; |
| 3268 val = htmlParseAttValue(ctxt); | 3428 val = htmlParseAttValue(ctxt); |
| 3269 } else if (htmlIsBooleanAttr(name)) { | |
| 3270 /* | |
| 3271 * assume a minimized attribute | |
| 3272 */ | |
| 3273 val = xmlStrdup(name); | |
| 3274 } | 3429 } |
| 3275 | 3430 |
| 3276 *value = val; | 3431 *value = val; |
| 3277 return(name); | 3432 return(name); |
| 3278 } | 3433 } |
| 3279 | 3434 |
| 3280 /** | 3435 /** |
| 3281 * htmlCheckEncoding: | 3436 * htmlCheckEncoding: |
| 3282 * @ctxt: an HTML parser context | 3437 * @ctxt: an HTML parser context |
| 3283 * @attvalue: the attribute value | 3438 * @attvalue: the attribute value |
| 3284 * | 3439 * |
| 3285 * Checks an http-equiv attribute from a Meta tag to detect | 3440 * Checks an http-equiv attribute from a Meta tag to detect |
| 3286 * the encoding | 3441 * the encoding |
| 3287 * If a new encoding is detected the parser is switched to decode | 3442 * If a new encoding is detected the parser is switched to decode |
| 3288 * it and pass UTF8 | 3443 * it and pass UTF8 |
| 3289 */ | 3444 */ |
| 3290 static void | 3445 static void |
| 3291 htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) { | 3446 htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) { |
| 3292 const xmlChar *encoding; | 3447 const xmlChar *encoding; |
| 3293 | 3448 |
| 3294 if ((ctxt == NULL) || (attvalue == NULL)) | 3449 if ((ctxt == NULL) || (attvalue == NULL)) |
| 3295 return; | 3450 return; |
| 3296 | 3451 |
| 3297 /* do not change encoding */» | 3452 /* do not change encoding */ |
| 3298 if (ctxt->input->encoding != NULL) | 3453 if (ctxt->input->encoding != NULL) |
| 3299 return; | 3454 return; |
| 3300 | 3455 |
| 3301 encoding = xmlStrcasestr(attvalue, BAD_CAST"charset="); | 3456 encoding = xmlStrcasestr(attvalue, BAD_CAST"charset="); |
| 3302 if (encoding != NULL) { | 3457 if (encoding != NULL) { |
| 3303 encoding += 8; | 3458 encoding += 8; |
| 3304 } else { | 3459 } else { |
| 3305 encoding = xmlStrcasestr(attvalue, BAD_CAST"charset ="); | 3460 encoding = xmlStrcasestr(attvalue, BAD_CAST"charset ="); |
| 3306 if (encoding != NULL) | 3461 if (encoding != NULL) |
| 3307 encoding += 9; | 3462 encoding += 9; |
| 3308 } | 3463 } |
| 3309 if (encoding != NULL) { | 3464 if (encoding != NULL) { |
| 3310 xmlCharEncoding enc; | 3465 xmlCharEncoding enc; |
| 3311 xmlCharEncodingHandlerPtr handler; | 3466 xmlCharEncodingHandlerPtr handler; |
| 3312 | 3467 |
| 3313 while ((*encoding == ' ') || (*encoding == '\t')) encoding++; | 3468 while ((*encoding == ' ') || (*encoding == '\t')) encoding++; |
| 3314 | 3469 |
| 3315 if (ctxt->input->encoding != NULL) | 3470 if (ctxt->input->encoding != NULL) |
| 3316 xmlFree((xmlChar *) ctxt->input->encoding); | 3471 xmlFree((xmlChar *) ctxt->input->encoding); |
| 3317 ctxt->input->encoding = xmlStrdup(encoding); | 3472 ctxt->input->encoding = xmlStrdup(encoding); |
| 3318 | 3473 |
| 3319 enc = xmlParseCharEncoding((const char *) encoding); | 3474 enc = xmlParseCharEncoding((const char *) encoding); |
| 3320 /* | 3475 /* |
| 3321 * registered set of known encodings | 3476 * registered set of known encodings |
| 3322 */ | 3477 */ |
| 3323 if (enc != XML_CHAR_ENCODING_ERROR) { | 3478 if (enc != XML_CHAR_ENCODING_ERROR) { |
| 3324 » if (((enc == XML_CHAR_ENCODING_UTF16LE) || | 3479 » if (((enc == XML_CHAR_ENCODING_UTF16LE) || |
| 3325 (enc == XML_CHAR_ENCODING_UTF16BE) || | 3480 (enc == XML_CHAR_ENCODING_UTF16BE) || |
| 3326 (enc == XML_CHAR_ENCODING_UCS4LE) || | 3481 (enc == XML_CHAR_ENCODING_UCS4LE) || |
| 3327 (enc == XML_CHAR_ENCODING_UCS4BE)) && | 3482 (enc == XML_CHAR_ENCODING_UCS4BE)) && |
| 3328 (ctxt->input->buf != NULL) && | 3483 (ctxt->input->buf != NULL) && |
| 3329 (ctxt->input->buf->encoder == NULL)) { | 3484 (ctxt->input->buf->encoder == NULL)) { |
| 3330 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, | 3485 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, |
| 3331 "htmlCheckEncoding: wrong encoding meta\n", | 3486 "htmlCheckEncoding: wrong encoding meta\n", |
| 3332 NULL, NULL); | 3487 NULL, NULL); |
| 3333 } else { | 3488 } else { |
| 3334 xmlSwitchEncoding(ctxt, enc); | 3489 xmlSwitchEncoding(ctxt, enc); |
| (...skipping 27 matching lines...) Expand all Loading... |
| 3362 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder, | 3517 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder, |
| 3363 ctxt->input->buf->buffer, | 3518 ctxt->input->buf->buffer, |
| 3364 ctxt->input->buf->raw); | 3519 ctxt->input->buf->raw); |
| 3365 if (nbchars < 0) { | 3520 if (nbchars < 0) { |
| 3366 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, | 3521 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, |
| 3367 "htmlCheckEncoding: encoder error\n", | 3522 "htmlCheckEncoding: encoder error\n", |
| 3368 NULL, NULL); | 3523 NULL, NULL); |
| 3369 } | 3524 } |
| 3370 ctxt->input->base = | 3525 ctxt->input->base = |
| 3371 ctxt->input->cur = ctxt->input->buf->buffer->content; | 3526 ctxt->input->cur = ctxt->input->buf->buffer->content; |
| 3527 ctxt->input->end = |
| 3528 &ctxt->input->base[ctxt->input->buf->buffer->use]; |
| 3372 } | 3529 } |
| 3373 } | 3530 } |
| 3374 } | 3531 } |
| 3375 | 3532 |
| 3376 /** | 3533 /** |
| 3377 * htmlCheckMeta: | 3534 * htmlCheckMeta: |
| 3378 * @ctxt: an HTML parser context | 3535 * @ctxt: an HTML parser context |
| 3379 * @atts: the attributes values | 3536 * @atts: the attributes values |
| 3380 * | 3537 * |
| 3381 * Checks an attributes from a Meta tag | 3538 * Checks an attributes from a Meta tag |
| (...skipping 20 matching lines...) Expand all Loading... |
| 3402 att = atts[i++]; | 3559 att = atts[i++]; |
| 3403 } | 3560 } |
| 3404 if ((http) && (content != NULL)) | 3561 if ((http) && (content != NULL)) |
| 3405 htmlCheckEncoding(ctxt, content); | 3562 htmlCheckEncoding(ctxt, content); |
| 3406 | 3563 |
| 3407 } | 3564 } |
| 3408 | 3565 |
| 3409 /** | 3566 /** |
| 3410 * htmlParseStartTag: | 3567 * htmlParseStartTag: |
| 3411 * @ctxt: an HTML parser context | 3568 * @ctxt: an HTML parser context |
| 3412 * | 3569 * |
| 3413 * parse a start of tag either for rule element or | 3570 * parse a start of tag either for rule element or |
| 3414 * EmptyElement. In both case we don't parse the tag closing chars. | 3571 * EmptyElement. In both case we don't parse the tag closing chars. |
| 3415 * | 3572 * |
| 3416 * [40] STag ::= '<' Name (S Attribute)* S? '>' | 3573 * [40] STag ::= '<' Name (S Attribute)* S? '>' |
| 3417 * | 3574 * |
| 3418 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' | 3575 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' |
| 3419 * | 3576 * |
| 3420 * With namespace: | 3577 * With namespace: |
| 3421 * | 3578 * |
| 3422 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' | 3579 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' |
| 3423 * | 3580 * |
| 3424 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' | 3581 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' |
| 3425 * | 3582 * |
| 3426 * Returns 0 in case of success, -1 in case of error and 1 if discarded | 3583 * Returns 0 in case of success, -1 in case of error and 1 if discarded |
| 3427 */ | 3584 */ |
| 3428 | 3585 |
| 3429 static int | 3586 static int |
| 3430 htmlParseStartTag(htmlParserCtxtPtr ctxt) { | 3587 htmlParseStartTag(htmlParserCtxtPtr ctxt) { |
| 3431 const xmlChar *name; | 3588 const xmlChar *name; |
| 3432 const xmlChar *attname; | 3589 const xmlChar *attname; |
| 3433 xmlChar *attvalue; | 3590 xmlChar *attvalue; |
| 3434 const xmlChar **atts; | 3591 const xmlChar **atts; |
| 3435 int nbatts = 0; | 3592 int nbatts = 0; |
| 3436 int maxatts; | 3593 int maxatts; |
| 3437 int meta = 0; | 3594 int meta = 0; |
| 3438 int i; | 3595 int i; |
| 3439 int discardtag = 0; | 3596 int discardtag = 0; |
| 3440 | 3597 |
| 3598 if (ctxt->instate == XML_PARSER_EOF) |
| 3599 return(-1); |
| 3441 if ((ctxt == NULL) || (ctxt->input == NULL)) { | 3600 if ((ctxt == NULL) || (ctxt->input == NULL)) { |
| 3442 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, | 3601 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, |
| 3443 "htmlParseStartTag: context error\n", NULL, NULL); | 3602 "htmlParseStartTag: context error\n", NULL, NULL); |
| 3444 return -1; | 3603 return -1; |
| 3445 } | 3604 } |
| 3446 if (CUR != '<') return -1; | 3605 if (CUR != '<') return -1; |
| 3447 NEXT; | 3606 NEXT; |
| 3448 | 3607 |
| 3449 atts = ctxt->atts; | 3608 atts = ctxt->atts; |
| 3450 maxatts = ctxt->maxatts; | 3609 maxatts = ctxt->maxatts; |
| 3451 | 3610 |
| 3452 GROW; | 3611 GROW; |
| 3453 name = htmlParseHTMLName(ctxt); | 3612 name = htmlParseHTMLName(ctxt); |
| 3454 if (name == NULL) { | 3613 if (name == NULL) { |
| 3455 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED, | 3614 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED, |
| 3456 "htmlParseStartTag: invalid element name\n", | 3615 "htmlParseStartTag: invalid element name\n", |
| 3457 NULL, NULL); | 3616 NULL, NULL); |
| 3458 /* Dump the bogus tag like browsers do */ | 3617 /* Dump the bogus tag like browsers do */ |
| 3459 » while ((IS_CHAR_CH(CUR)) && (CUR != '>')) | 3618 » while ((IS_CHAR_CH(CUR)) && (CUR != '>') && |
| 3619 (ctxt->instate != XML_PARSER_EOF)) |
| 3460 NEXT; | 3620 NEXT; |
| 3461 return -1; | 3621 return -1; |
| 3462 } | 3622 } |
| 3463 if (xmlStrEqual(name, BAD_CAST"meta")) | 3623 if (xmlStrEqual(name, BAD_CAST"meta")) |
| 3464 meta = 1; | 3624 meta = 1; |
| 3465 | 3625 |
| 3466 /* | 3626 /* |
| 3467 * Check for auto-closure of HTML elements. | 3627 * Check for auto-closure of HTML elements. |
| 3468 */ | 3628 */ |
| 3469 htmlAutoClose(ctxt, name); | 3629 htmlAutoClose(ctxt, name); |
| 3470 | 3630 |
| 3471 /* | 3631 /* |
| 3472 * Check for implied HTML elements. | 3632 * Check for implied HTML elements. |
| 3473 */ | 3633 */ |
| 3474 htmlCheckImplied(ctxt, name); | 3634 htmlCheckImplied(ctxt, name); |
| 3475 | 3635 |
| 3476 /* | 3636 /* |
| 3477 * Avoid html at any level > 0, head at any level != 1 | 3637 * Avoid html at any level > 0, head at any level != 1 |
| 3478 * or any attempt to recurse body | 3638 * or any attempt to recurse body |
| 3479 */ | 3639 */ |
| 3480 if ((ctxt->nameNr > 0) && (xmlStrEqual(name, BAD_CAST"html"))) { | 3640 if ((ctxt->nameNr > 0) && (xmlStrEqual(name, BAD_CAST"html"))) { |
| 3481 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR, | 3641 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR, |
| 3482 "htmlParseStartTag: misplaced <html> tag\n", | 3642 "htmlParseStartTag: misplaced <html> tag\n", |
| 3483 name, NULL); | 3643 name, NULL); |
| 3484 discardtag = 1; | 3644 discardtag = 1; |
| 3645 ctxt->depth++; |
| 3485 } | 3646 } |
| 3486 if ((ctxt->nameNr != 1) && | 3647 if ((ctxt->nameNr != 1) && |
| 3487 (xmlStrEqual(name, BAD_CAST"head"))) { | 3648 (xmlStrEqual(name, BAD_CAST"head"))) { |
| 3488 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR, | 3649 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR, |
| 3489 "htmlParseStartTag: misplaced <head> tag\n", | 3650 "htmlParseStartTag: misplaced <head> tag\n", |
| 3490 name, NULL); | 3651 name, NULL); |
| 3491 discardtag = 1; | 3652 discardtag = 1; |
| 3653 ctxt->depth++; |
| 3492 } | 3654 } |
| 3493 if (xmlStrEqual(name, BAD_CAST"body")) { | 3655 if (xmlStrEqual(name, BAD_CAST"body")) { |
| 3494 int indx; | 3656 int indx; |
| 3495 for (indx = 0;indx < ctxt->nameNr;indx++) { | 3657 for (indx = 0;indx < ctxt->nameNr;indx++) { |
| 3496 if (xmlStrEqual(ctxt->nameTab[indx], BAD_CAST"body")) { | 3658 if (xmlStrEqual(ctxt->nameTab[indx], BAD_CAST"body")) { |
| 3497 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR, | 3659 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR, |
| 3498 "htmlParseStartTag: misplaced <body> tag\n", | 3660 "htmlParseStartTag: misplaced <body> tag\n", |
| 3499 name, NULL); | 3661 name, NULL); |
| 3500 discardtag = 1; | 3662 discardtag = 1; |
| 3663 ctxt->depth++; |
| 3501 } | 3664 } |
| 3502 } | 3665 } |
| 3503 } | 3666 } |
| 3504 | 3667 |
| 3505 /* | 3668 /* |
| 3506 * Now parse the attributes, it ends up with the ending | 3669 * Now parse the attributes, it ends up with the ending |
| 3507 * | 3670 * |
| 3508 * (S Attribute)* S? | 3671 * (S Attribute)* S? |
| 3509 */ | 3672 */ |
| 3510 SKIP_BLANKS; | 3673 SKIP_BLANKS; |
| 3511 while ((IS_CHAR_CH(CUR)) && | 3674 while ((IS_CHAR_CH(CUR)) && |
| 3512 (CUR != '>') && | 3675 (CUR != '>') && |
| 3513 ((CUR != '/') || (NXT(1) != '>'))) { | 3676 ((CUR != '/') || (NXT(1) != '>'))) { |
| 3514 long cons = ctxt->nbChars; | 3677 long cons = ctxt->nbChars; |
| 3515 | 3678 |
| 3516 GROW; | 3679 GROW; |
| 3517 attname = htmlParseAttribute(ctxt, &attvalue); | 3680 attname = htmlParseAttribute(ctxt, &attvalue); |
| 3518 if (attname != NULL) { | 3681 if (attname != NULL) { |
| 3519 | 3682 |
| 3520 /* | 3683 /* |
| 3521 * Well formedness requires at most one declaration of an attribute | 3684 * Well formedness requires at most one declaration of an attribute |
| 3522 */ | 3685 */ |
| (...skipping 118 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3641 if ((CUR != '<') || (NXT(1) != '/')) { | 3804 if ((CUR != '<') || (NXT(1) != '/')) { |
| 3642 htmlParseErr(ctxt, XML_ERR_LTSLASH_REQUIRED, | 3805 htmlParseErr(ctxt, XML_ERR_LTSLASH_REQUIRED, |
| 3643 "htmlParseEndTag: '</' not found\n", NULL, NULL); | 3806 "htmlParseEndTag: '</' not found\n", NULL, NULL); |
| 3644 return (0); | 3807 return (0); |
| 3645 } | 3808 } |
| 3646 SKIP(2); | 3809 SKIP(2); |
| 3647 | 3810 |
| 3648 name = htmlParseHTMLName(ctxt); | 3811 name = htmlParseHTMLName(ctxt); |
| 3649 if (name == NULL) | 3812 if (name == NULL) |
| 3650 return (0); | 3813 return (0); |
| 3651 | |
| 3652 /* | 3814 /* |
| 3653 * We should definitely be at the ending "S? '>'" part | 3815 * We should definitely be at the ending "S? '>'" part |
| 3654 */ | 3816 */ |
| 3655 SKIP_BLANKS; | 3817 SKIP_BLANKS; |
| 3656 if ((!IS_CHAR_CH(CUR)) || (CUR != '>')) { | 3818 if ((!IS_CHAR_CH(CUR)) || (CUR != '>')) { |
| 3657 htmlParseErr(ctxt, XML_ERR_GT_REQUIRED, | 3819 htmlParseErr(ctxt, XML_ERR_GT_REQUIRED, |
| 3658 "End tag : expected '>'\n", NULL, NULL); | 3820 "End tag : expected '>'\n", NULL, NULL); |
| 3659 if (ctxt->recovery) { | 3821 if (ctxt->recovery) { |
| 3660 /* | 3822 /* |
| 3661 * We're not at the ending > !! | 3823 * We're not at the ending > !! |
| 3662 * Error, unless in recover mode where we search forwards | 3824 * Error, unless in recover mode where we search forwards |
| 3663 * until we find a > | 3825 * until we find a > |
| 3664 */ | 3826 */ |
| 3665 while (CUR != '\0' && CUR != '>') NEXT; | 3827 while (CUR != '\0' && CUR != '>') NEXT; |
| 3666 NEXT; | 3828 NEXT; |
| 3667 } | 3829 } |
| 3668 } else | 3830 } else |
| 3669 NEXT; | 3831 NEXT; |
| 3670 | 3832 |
| 3671 /* | 3833 /* |
| 3834 * if we ignored misplaced tags in htmlParseStartTag don't pop them |
| 3835 * out now. |
| 3836 */ |
| 3837 if ((ctxt->depth > 0) && |
| 3838 (xmlStrEqual(name, BAD_CAST "html") || |
| 3839 xmlStrEqual(name, BAD_CAST "body") || |
| 3840 xmlStrEqual(name, BAD_CAST "head"))) { |
| 3841 ctxt->depth--; |
| 3842 return (0); |
| 3843 } |
| 3844 |
| 3845 /* |
| 3672 * If the name read is not one of the element in the parsing stack | 3846 * If the name read is not one of the element in the parsing stack |
| 3673 * then return, it's just an error. | 3847 * then return, it's just an error. |
| 3674 */ | 3848 */ |
| 3675 for (i = (ctxt->nameNr - 1); i >= 0; i--) { | 3849 for (i = (ctxt->nameNr - 1); i >= 0; i--) { |
| 3676 if (xmlStrEqual(name, ctxt->nameTab[i])) | 3850 if (xmlStrEqual(name, ctxt->nameTab[i])) |
| 3677 break; | 3851 break; |
| 3678 } | 3852 } |
| 3679 if (i < 0) { | 3853 if (i < 0) { |
| 3680 htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH, | 3854 htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH, |
| 3681 "Unexpected end tag : %s\n", name, NULL); | 3855 "Unexpected end tag : %s\n", name, NULL); |
| (...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3715 ret = 0; | 3889 ret = 0; |
| 3716 } | 3890 } |
| 3717 | 3891 |
| 3718 return (ret); | 3892 return (ret); |
| 3719 } | 3893 } |
| 3720 | 3894 |
| 3721 | 3895 |
| 3722 /** | 3896 /** |
| 3723 * htmlParseReference: | 3897 * htmlParseReference: |
| 3724 * @ctxt: an HTML parser context | 3898 * @ctxt: an HTML parser context |
| 3725 * | 3899 * |
| 3726 * parse and handle entity references in content, | 3900 * parse and handle entity references in content, |
| 3727 * this will end-up in a call to character() since this is either a | 3901 * this will end-up in a call to character() since this is either a |
| 3728 * CharRef, or a predefined entity. | 3902 * CharRef, or a predefined entity. |
| 3729 */ | 3903 */ |
| 3730 static void | 3904 static void |
| 3731 htmlParseReference(htmlParserCtxtPtr ctxt) { | 3905 htmlParseReference(htmlParserCtxtPtr ctxt) { |
| 3732 const htmlEntityDesc * ent; | 3906 const htmlEntityDesc * ent; |
| 3733 xmlChar out[6]; | 3907 xmlChar out[6]; |
| 3734 const xmlChar *name; | 3908 const xmlChar *name; |
| 3735 if (CUR != '&') return; | 3909 if (CUR != '&') return; |
| 3736 | 3910 |
| 3737 if (NXT(1) == '#') { | 3911 if (NXT(1) == '#') { |
| 3738 unsigned int c; | 3912 unsigned int c; |
| 3739 int bits, i = 0; | 3913 int bits, i = 0; |
| 3740 | 3914 |
| 3741 c = htmlParseCharRef(ctxt); | 3915 c = htmlParseCharRef(ctxt); |
| 3742 if (c == 0) | 3916 if (c == 0) |
| 3743 return; | 3917 return; |
| 3744 | 3918 |
| 3745 if (c < 0x80) { out[i++]= c; bits= -6; } | 3919 if (c < 0x80) { out[i++]= c; bits= -6; } |
| 3746 else if (c < 0x800) { out[i++]=((c >> 6) & 0x1F) | 0xC0; bits= 0; } | 3920 else if (c < 0x800) { out[i++]=((c >> 6) & 0x1F) | 0xC0; bits= 0; } |
| 3747 else if (c < 0x10000) { out[i++]=((c >> 12) & 0x0F) | 0xE0; bits= 6; } | 3921 else if (c < 0x10000) { out[i++]=((c >> 12) & 0x0F) | 0xE0; bits= 6; } |
| 3748 else { out[i++]=((c >> 18) & 0x07) | 0xF0; bits= 12; } | 3922 else { out[i++]=((c >> 18) & 0x07) | 0xF0; bits= 12; } |
| 3749 | 3923 |
| 3750 for ( ; bits >= 0; bits-= 6) { | 3924 for ( ; bits >= 0; bits-= 6) { |
| 3751 out[i++]= ((c >> bits) & 0x3F) | 0x80; | 3925 out[i++]= ((c >> bits) & 0x3F) | 0x80; |
| 3752 } | 3926 } |
| 3753 out[i] = 0; | 3927 out[i] = 0; |
| 3754 | 3928 |
| 3755 htmlCheckParagraph(ctxt); | 3929 htmlCheckParagraph(ctxt); |
| 3756 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) | 3930 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) |
| 3757 ctxt->sax->characters(ctxt->userData, out, i); | 3931 ctxt->sax->characters(ctxt->userData, out, i); |
| 3758 } else { | 3932 } else { |
| 3759 ent = htmlParseEntityRef(ctxt, &name); | 3933 ent = htmlParseEntityRef(ctxt, &name); |
| (...skipping 14 matching lines...) Expand all Loading... |
| 3774 unsigned int c; | 3948 unsigned int c; |
| 3775 int bits, i = 0; | 3949 int bits, i = 0; |
| 3776 | 3950 |
| 3777 c = ent->value; | 3951 c = ent->value; |
| 3778 if (c < 0x80) | 3952 if (c < 0x80) |
| 3779 { out[i++]= c; bits= -6; } | 3953 { out[i++]= c; bits= -6; } |
| 3780 else if (c < 0x800) | 3954 else if (c < 0x800) |
| 3781 { out[i++]=((c >> 6) & 0x1F) | 0xC0; bits= 0; } | 3955 { out[i++]=((c >> 6) & 0x1F) | 0xC0; bits= 0; } |
| 3782 else if (c < 0x10000) | 3956 else if (c < 0x10000) |
| 3783 { out[i++]=((c >> 12) & 0x0F) | 0xE0; bits= 6; } | 3957 { out[i++]=((c >> 12) & 0x0F) | 0xE0; bits= 6; } |
| 3784 » else | 3958 » else |
| 3785 { out[i++]=((c >> 18) & 0x07) | 0xF0; bits= 12; } | 3959 { out[i++]=((c >> 18) & 0x07) | 0xF0; bits= 12; } |
| 3786 | 3960 |
| 3787 for ( ; bits >= 0; bits-= 6) { | 3961 for ( ; bits >= 0; bits-= 6) { |
| 3788 out[i++]= ((c >> bits) & 0x3F) | 0x80; | 3962 out[i++]= ((c >> bits) & 0x3F) | 0x80; |
| 3789 } | 3963 } |
| 3790 out[i] = 0; | 3964 out[i] = 0; |
| 3791 | 3965 |
| 3792 htmlCheckParagraph(ctxt); | 3966 htmlCheckParagraph(ctxt); |
| 3793 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) | 3967 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) |
| 3794 ctxt->sax->characters(ctxt->userData, out, i); | 3968 ctxt->sax->characters(ctxt->userData, out, i); |
| 3795 } | 3969 } |
| 3796 } | 3970 } |
| 3797 } | 3971 } |
| 3798 | 3972 |
| 3799 /** | 3973 /** |
| 3800 * htmlParseContent: | 3974 * htmlParseContent: |
| 3801 * @ctxt: an HTML parser context | 3975 * @ctxt: an HTML parser context |
| 3802 * | 3976 * |
| 3803 * Parse a content: comment, sub-element, reference or text. | 3977 * Parse a content: comment, sub-element, reference or text. |
| 3978 * Kept for compatibility with old code |
| 3804 */ | 3979 */ |
| 3805 | 3980 |
| 3806 static void | 3981 static void |
| 3807 htmlParseContent(htmlParserCtxtPtr ctxt) { | 3982 htmlParseContent(htmlParserCtxtPtr ctxt) { |
| 3808 xmlChar *currentNode; | 3983 xmlChar *currentNode; |
| 3809 int depth; | 3984 int depth; |
| 3810 const xmlChar *name; | 3985 const xmlChar *name; |
| 3811 | 3986 |
| 3812 currentNode = xmlStrdup(ctxt->name); | 3987 currentNode = xmlStrdup(ctxt->name); |
| 3813 depth = ctxt->nameNr; | 3988 depth = ctxt->nameNr; |
| 3814 while (1) { | 3989 while (1) { |
| 3815 long cons = ctxt->nbChars; | 3990 long cons = ctxt->nbChars; |
| 3816 | 3991 |
| 3817 GROW; | 3992 GROW; |
| 3993 |
| 3994 if (ctxt->instate == XML_PARSER_EOF) |
| 3995 break; |
| 3996 |
| 3818 /* | 3997 /* |
| 3819 * Our tag or one of it's parent or children is ending. | 3998 * Our tag or one of it's parent or children is ending. |
| 3820 */ | 3999 */ |
| 3821 if ((CUR == '<') && (NXT(1) == '/')) { | 4000 if ((CUR == '<') && (NXT(1) == '/')) { |
| 3822 if (htmlParseEndTag(ctxt) && | 4001 if (htmlParseEndTag(ctxt) && |
| 3823 ((currentNode != NULL) || (ctxt->nameNr == 0))) { | 4002 ((currentNode != NULL) || (ctxt->nameNr == 0))) { |
| 3824 if (currentNode != NULL) | 4003 if (currentNode != NULL) |
| 3825 xmlFree(currentNode); | 4004 xmlFree(currentNode); |
| 3826 return; | 4005 return; |
| 3827 } | 4006 } |
| 3828 continue; /* while */ | 4007 continue; /* while */ |
| 3829 } | 4008 } |
| 3830 | 4009 |
| 3831 else if ((CUR == '<') && | 4010 else if ((CUR == '<') && |
| 3832 ((IS_ASCII_LETTER(NXT(1))) || | 4011 ((IS_ASCII_LETTER(NXT(1))) || |
| 3833 (NXT(1) == '_') || (NXT(1) == ':'))) { | 4012 (NXT(1) == '_') || (NXT(1) == ':'))) { |
| 3834 name = htmlParseHTMLName_nonInvasive(ctxt); | 4013 name = htmlParseHTMLName_nonInvasive(ctxt); |
| 3835 if (name == NULL) { | 4014 if (name == NULL) { |
| 3836 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED, | 4015 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED, |
| 3837 "htmlParseStartTag: invalid element name\n", | 4016 "htmlParseStartTag: invalid element name\n", |
| 3838 NULL, NULL); | 4017 NULL, NULL); |
| 3839 /* Dump the bogus tag like browsers do */ | 4018 /* Dump the bogus tag like browsers do */ |
| 3840 » while ((IS_CHAR_CH(CUR)) && (CUR != '>')) | 4019 while ((IS_CHAR_CH(CUR)) && (CUR != '>')) |
| 3841 NEXT; | 4020 NEXT; |
| 3842 | 4021 |
| 3843 if (currentNode != NULL) | 4022 if (currentNode != NULL) |
| 3844 xmlFree(currentNode); | 4023 xmlFree(currentNode); |
| 3845 return; | 4024 return; |
| 3846 } | 4025 } |
| 3847 | 4026 |
| 3848 if (ctxt->name != NULL) { | 4027 if (ctxt->name != NULL) { |
| 3849 if (htmlCheckAutoClose(name, ctxt->name) == 1) { | 4028 if (htmlCheckAutoClose(name, ctxt->name) == 1) { |
| 3850 htmlAutoClose(ctxt, name); | 4029 htmlAutoClose(ctxt, name); |
| 3851 continue; | 4030 continue; |
| 3852 } | 4031 } |
| 3853 » }» | 4032 » } |
| 3854 } | 4033 } |
| 3855 | 4034 |
| 3856 /* | 4035 /* |
| 3857 * Has this node been popped out during parsing of | 4036 * Has this node been popped out during parsing of |
| 3858 * the next element | 4037 * the next element |
| 3859 */ | 4038 */ |
| 3860 if ((ctxt->nameNr > 0) && (depth >= ctxt->nameNr) && | 4039 if ((ctxt->nameNr > 0) && (depth >= ctxt->nameNr) && |
| 3861 (!xmlStrEqual(currentNode, ctxt->name))) | 4040 (!xmlStrEqual(currentNode, ctxt->name))) |
| 3862 { | 4041 { |
| 3863 if (currentNode != NULL) xmlFree(currentNode); | 4042 if (currentNode != NULL) xmlFree(currentNode); |
| (...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3902 | 4081 |
| 3903 /* | 4082 /* |
| 3904 * Third case : a sub-element. | 4083 * Third case : a sub-element. |
| 3905 */ | 4084 */ |
| 3906 else if (CUR == '<') { | 4085 else if (CUR == '<') { |
| 3907 htmlParseElement(ctxt); | 4086 htmlParseElement(ctxt); |
| 3908 } | 4087 } |
| 3909 | 4088 |
| 3910 /* | 4089 /* |
| 3911 * Fourth case : a reference. If if has not been resolved, | 4090 * Fourth case : a reference. If if has not been resolved, |
| 3912 » * parsing returns it's Name, create the node | 4091 » * parsing returns it's Name, create the node |
| 3913 */ | 4092 */ |
| 3914 else if (CUR == '&') { | 4093 else if (CUR == '&') { |
| 3915 htmlParseReference(ctxt); | 4094 htmlParseReference(ctxt); |
| 3916 } | 4095 } |
| 3917 | 4096 |
| 3918 /* | 4097 /* |
| 3919 * Fifth case : end of the resource | 4098 * Fifth case : end of the resource |
| 3920 */ | 4099 */ |
| 3921 else if (CUR == 0) { | 4100 else if (CUR == 0) { |
| 3922 htmlAutoCloseOnEnd(ctxt); | 4101 htmlAutoCloseOnEnd(ctxt); |
| (...skipping 15 matching lines...) Expand all Loading... |
| 3938 } | 4117 } |
| 3939 break; | 4118 break; |
| 3940 } | 4119 } |
| 3941 } | 4120 } |
| 3942 GROW; | 4121 GROW; |
| 3943 } | 4122 } |
| 3944 if (currentNode != NULL) xmlFree(currentNode); | 4123 if (currentNode != NULL) xmlFree(currentNode); |
| 3945 } | 4124 } |
| 3946 | 4125 |
| 3947 /** | 4126 /** |
| 3948 * htmlParseContent: | |
| 3949 * @ctxt: an HTML parser context | |
| 3950 * | |
| 3951 * Parse a content: comment, sub-element, reference or text. | |
| 3952 */ | |
| 3953 | |
| 3954 void | |
| 3955 __htmlParseContent(void *ctxt) { | |
| 3956 if (ctxt != NULL) | |
| 3957 htmlParseContent((htmlParserCtxtPtr) ctxt); | |
| 3958 } | |
| 3959 | |
| 3960 /** | |
| 3961 * htmlParseElement: | 4127 * htmlParseElement: |
| 3962 * @ctxt: an HTML parser context | 4128 * @ctxt: an HTML parser context |
| 3963 * | 4129 * |
| 3964 * parse an HTML element, this is highly recursive | 4130 * parse an HTML element, this is highly recursive |
| 4131 * this is kept for compatibility with previous code versions |
| 3965 * | 4132 * |
| 3966 * [39] element ::= EmptyElemTag | STag content ETag | 4133 * [39] element ::= EmptyElemTag | STag content ETag |
| 3967 * | 4134 * |
| 3968 * [41] Attribute ::= Name Eq AttValue | 4135 * [41] Attribute ::= Name Eq AttValue |
| 3969 */ | 4136 */ |
| 3970 | 4137 |
| 3971 void | 4138 void |
| 3972 htmlParseElement(htmlParserCtxtPtr ctxt) { | 4139 htmlParseElement(htmlParserCtxtPtr ctxt) { |
| 3973 const xmlChar *name; | 4140 const xmlChar *name; |
| 3974 xmlChar *currentNode = NULL; | 4141 xmlChar *currentNode = NULL; |
| 3975 const htmlElemDesc * info; | 4142 const htmlElemDesc * info; |
| 3976 htmlParserNodeInfo node_info; | 4143 htmlParserNodeInfo node_info; |
| 3977 int failed; | 4144 int failed; |
| 3978 int depth; | 4145 int depth; |
| 3979 const xmlChar *oldptr; | 4146 const xmlChar *oldptr; |
| 3980 | 4147 |
| 3981 if ((ctxt == NULL) || (ctxt->input == NULL)) { | 4148 if ((ctxt == NULL) || (ctxt->input == NULL)) { |
| 3982 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, | 4149 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, |
| 3983 "htmlParseElement: context error\n", NULL, NULL); | 4150 "htmlParseElement: context error\n", NULL, NULL); |
| 3984 return; | 4151 return; |
| 3985 } | 4152 } |
| 4153 |
| 4154 if (ctxt->instate == XML_PARSER_EOF) |
| 4155 return; |
| 4156 |
| 3986 /* Capture start position */ | 4157 /* Capture start position */ |
| 3987 if (ctxt->record_info) { | 4158 if (ctxt->record_info) { |
| 3988 node_info.begin_pos = ctxt->input->consumed + | 4159 node_info.begin_pos = ctxt->input->consumed + |
| 3989 (CUR_PTR - ctxt->input->base); | 4160 (CUR_PTR - ctxt->input->base); |
| 3990 node_info.begin_line = ctxt->input->line; | 4161 node_info.begin_line = ctxt->input->line; |
| 3991 } | 4162 } |
| 3992 | 4163 |
| 3993 failed = htmlParseStartTag(ctxt); | 4164 failed = htmlParseStartTag(ctxt); |
| 3994 name = ctxt->name; | 4165 name = ctxt->name; |
| 3995 if ((failed == -1) || (name == NULL)) { | 4166 if ((failed == -1) || (name == NULL)) { |
| (...skipping 24 matching lines...) Expand all Loading... |
| 4020 | 4191 |
| 4021 if (CUR == '>') { | 4192 if (CUR == '>') { |
| 4022 NEXT; | 4193 NEXT; |
| 4023 } else { | 4194 } else { |
| 4024 htmlParseErr(ctxt, XML_ERR_GT_REQUIRED, | 4195 htmlParseErr(ctxt, XML_ERR_GT_REQUIRED, |
| 4025 "Couldn't find end of Start Tag %s\n", name, NULL); | 4196 "Couldn't find end of Start Tag %s\n", name, NULL); |
| 4026 | 4197 |
| 4027 /* | 4198 /* |
| 4028 * end of parsing of this node. | 4199 * end of parsing of this node. |
| 4029 */ | 4200 */ |
| 4030 » if (xmlStrEqual(name, ctxt->name)) { | 4201 » if (xmlStrEqual(name, ctxt->name)) { |
| 4031 nodePop(ctxt); | 4202 nodePop(ctxt); |
| 4032 htmlnamePop(ctxt); | 4203 htmlnamePop(ctxt); |
| 4033 » } | 4204 » } |
| 4034 | 4205 |
| 4035 /* | 4206 /* |
| 4036 * Capture end position and add node | 4207 * Capture end position and add node |
| 4037 */ | 4208 */ |
| 4038 if (ctxt->record_info) { | 4209 if (ctxt->record_info) { |
| 4039 node_info.end_pos = ctxt->input->consumed + | 4210 node_info.end_pos = ctxt->input->consumed + |
| 4040 (CUR_PTR - ctxt->input->base); | 4211 (CUR_PTR - ctxt->input->base); |
| 4041 node_info.end_line = ctxt->input->line; | 4212 node_info.end_line = ctxt->input->line; |
| 4042 node_info.node = ctxt->node; | 4213 node_info.node = ctxt->node; |
| 4043 xmlParserAddNodeInfo(ctxt, &node_info); | 4214 xmlParserAddNodeInfo(ctxt, &node_info); |
| (...skipping 13 matching lines...) Expand all Loading... |
| 4057 | 4228 |
| 4058 /* | 4229 /* |
| 4059 * Parse the content of the element: | 4230 * Parse the content of the element: |
| 4060 */ | 4231 */ |
| 4061 currentNode = xmlStrdup(ctxt->name); | 4232 currentNode = xmlStrdup(ctxt->name); |
| 4062 depth = ctxt->nameNr; | 4233 depth = ctxt->nameNr; |
| 4063 while (IS_CHAR_CH(CUR)) { | 4234 while (IS_CHAR_CH(CUR)) { |
| 4064 oldptr = ctxt->input->cur; | 4235 oldptr = ctxt->input->cur; |
| 4065 htmlParseContent(ctxt); | 4236 htmlParseContent(ctxt); |
| 4066 if (oldptr==ctxt->input->cur) break; | 4237 if (oldptr==ctxt->input->cur) break; |
| 4067 » if (ctxt->nameNr < depth) break; | 4238 » if (ctxt->nameNr < depth) break; |
| 4068 }» | 4239 } |
| 4069 | 4240 |
| 4070 /* | 4241 /* |
| 4071 * Capture end position and add node | 4242 * Capture end position and add node |
| 4072 */ | 4243 */ |
| 4073 if ( currentNode != NULL && ctxt->record_info ) { | 4244 if ( currentNode != NULL && ctxt->record_info ) { |
| 4074 node_info.end_pos = ctxt->input->consumed + | 4245 node_info.end_pos = ctxt->input->consumed + |
| 4075 (CUR_PTR - ctxt->input->base); | 4246 (CUR_PTR - ctxt->input->base); |
| 4076 node_info.end_line = ctxt->input->line; | 4247 node_info.end_line = ctxt->input->line; |
| 4077 node_info.node = ctxt->node; | 4248 node_info.node = ctxt->node; |
| 4078 xmlParserAddNodeInfo(ctxt, &node_info); | 4249 xmlParserAddNodeInfo(ctxt, &node_info); |
| 4079 } | 4250 } |
| 4080 if (!IS_CHAR_CH(CUR)) { | 4251 if (!IS_CHAR_CH(CUR)) { |
| 4081 htmlAutoCloseOnEnd(ctxt); | 4252 htmlAutoCloseOnEnd(ctxt); |
| 4082 } | 4253 } |
| 4083 | 4254 |
| 4084 if (currentNode != NULL) | 4255 if (currentNode != NULL) |
| 4085 xmlFree(currentNode); | 4256 xmlFree(currentNode); |
| 4086 } | 4257 } |
| 4087 | 4258 |
| 4259 static void |
| 4260 htmlParserFinishElementParsing(htmlParserCtxtPtr ctxt) { |
| 4261 /* |
| 4262 * Capture end position and add node |
| 4263 */ |
| 4264 if ( ctxt->node != NULL && ctxt->record_info ) { |
| 4265 ctxt->nodeInfo->end_pos = ctxt->input->consumed + |
| 4266 (CUR_PTR - ctxt->input->base); |
| 4267 ctxt->nodeInfo->end_line = ctxt->input->line; |
| 4268 ctxt->nodeInfo->node = ctxt->node; |
| 4269 xmlParserAddNodeInfo(ctxt, ctxt->nodeInfo); |
| 4270 htmlNodeInfoPop(ctxt); |
| 4271 } |
| 4272 if (!IS_CHAR_CH(CUR)) { |
| 4273 htmlAutoCloseOnEnd(ctxt); |
| 4274 } |
| 4275 } |
| 4276 |
| 4277 /** |
| 4278 * htmlParseElementInternal: |
| 4279 * @ctxt: an HTML parser context |
| 4280 * |
| 4281 * parse an HTML element, new version, non recursive |
| 4282 * |
| 4283 * [39] element ::= EmptyElemTag | STag content ETag |
| 4284 * |
| 4285 * [41] Attribute ::= Name Eq AttValue |
| 4286 */ |
| 4287 |
| 4288 static void |
| 4289 htmlParseElementInternal(htmlParserCtxtPtr ctxt) { |
| 4290 const xmlChar *name; |
| 4291 const htmlElemDesc * info; |
| 4292 htmlParserNodeInfo node_info; |
| 4293 int failed; |
| 4294 |
| 4295 if ((ctxt == NULL) || (ctxt->input == NULL)) { |
| 4296 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, |
| 4297 "htmlParseElementInternal: context error\n", NULL, NULL); |
| 4298 return; |
| 4299 } |
| 4300 |
| 4301 if (ctxt->instate == XML_PARSER_EOF) |
| 4302 return; |
| 4303 |
| 4304 /* Capture start position */ |
| 4305 if (ctxt->record_info) { |
| 4306 node_info.begin_pos = ctxt->input->consumed + |
| 4307 (CUR_PTR - ctxt->input->base); |
| 4308 node_info.begin_line = ctxt->input->line; |
| 4309 } |
| 4310 |
| 4311 failed = htmlParseStartTag(ctxt); |
| 4312 name = ctxt->name; |
| 4313 if ((failed == -1) || (name == NULL)) { |
| 4314 if (CUR == '>') |
| 4315 NEXT; |
| 4316 return; |
| 4317 } |
| 4318 |
| 4319 /* |
| 4320 * Lookup the info for that element. |
| 4321 */ |
| 4322 info = htmlTagLookup(name); |
| 4323 if (info == NULL) { |
| 4324 htmlParseErr(ctxt, XML_HTML_UNKNOWN_TAG, |
| 4325 "Tag %s invalid\n", name, NULL); |
| 4326 } |
| 4327 |
| 4328 /* |
| 4329 * Check for an Empty Element labeled the XML/SGML way |
| 4330 */ |
| 4331 if ((CUR == '/') && (NXT(1) == '>')) { |
| 4332 SKIP(2); |
| 4333 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) |
| 4334 ctxt->sax->endElement(ctxt->userData, name); |
| 4335 htmlnamePop(ctxt); |
| 4336 return; |
| 4337 } |
| 4338 |
| 4339 if (CUR == '>') { |
| 4340 NEXT; |
| 4341 } else { |
| 4342 htmlParseErr(ctxt, XML_ERR_GT_REQUIRED, |
| 4343 "Couldn't find end of Start Tag %s\n", name, NULL); |
| 4344 |
| 4345 /* |
| 4346 * end of parsing of this node. |
| 4347 */ |
| 4348 if (xmlStrEqual(name, ctxt->name)) { |
| 4349 nodePop(ctxt); |
| 4350 htmlnamePop(ctxt); |
| 4351 } |
| 4352 |
| 4353 if (ctxt->record_info) |
| 4354 htmlNodeInfoPush(ctxt, &node_info); |
| 4355 htmlParserFinishElementParsing(ctxt); |
| 4356 return; |
| 4357 } |
| 4358 |
| 4359 /* |
| 4360 * Check for an Empty Element from DTD definition |
| 4361 */ |
| 4362 if ((info != NULL) && (info->empty)) { |
| 4363 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) |
| 4364 ctxt->sax->endElement(ctxt->userData, name); |
| 4365 htmlnamePop(ctxt); |
| 4366 return; |
| 4367 } |
| 4368 |
| 4369 if (ctxt->record_info) |
| 4370 htmlNodeInfoPush(ctxt, &node_info); |
| 4371 } |
| 4372 |
| 4373 /** |
| 4374 * htmlParseContentInternal: |
| 4375 * @ctxt: an HTML parser context |
| 4376 * |
| 4377 * Parse a content: comment, sub-element, reference or text. |
| 4378 * New version for non recursive htmlParseElementInternal |
| 4379 */ |
| 4380 |
| 4381 static void |
| 4382 htmlParseContentInternal(htmlParserCtxtPtr ctxt) { |
| 4383 xmlChar *currentNode; |
| 4384 int depth; |
| 4385 const xmlChar *name; |
| 4386 |
| 4387 currentNode = xmlStrdup(ctxt->name); |
| 4388 depth = ctxt->nameNr; |
| 4389 while (1) { |
| 4390 long cons = ctxt->nbChars; |
| 4391 |
| 4392 GROW; |
| 4393 |
| 4394 if (ctxt->instate == XML_PARSER_EOF) |
| 4395 break; |
| 4396 |
| 4397 /* |
| 4398 * Our tag or one of it's parent or children is ending. |
| 4399 */ |
| 4400 if ((CUR == '<') && (NXT(1) == '/')) { |
| 4401 if (htmlParseEndTag(ctxt) && |
| 4402 ((currentNode != NULL) || (ctxt->nameNr == 0))) { |
| 4403 if (currentNode != NULL) |
| 4404 xmlFree(currentNode); |
| 4405 |
| 4406 currentNode = xmlStrdup(ctxt->name); |
| 4407 depth = ctxt->nameNr; |
| 4408 } |
| 4409 continue; /* while */ |
| 4410 } |
| 4411 |
| 4412 else if ((CUR == '<') && |
| 4413 ((IS_ASCII_LETTER(NXT(1))) || |
| 4414 (NXT(1) == '_') || (NXT(1) == ':'))) { |
| 4415 name = htmlParseHTMLName_nonInvasive(ctxt); |
| 4416 if (name == NULL) { |
| 4417 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED, |
| 4418 "htmlParseStartTag: invalid element name\n", |
| 4419 NULL, NULL); |
| 4420 /* Dump the bogus tag like browsers do */ |
| 4421 while ((IS_CHAR_CH(CUR)) && (CUR != '>')) |
| 4422 NEXT; |
| 4423 |
| 4424 htmlParserFinishElementParsing(ctxt); |
| 4425 if (currentNode != NULL) |
| 4426 xmlFree(currentNode); |
| 4427 |
| 4428 currentNode = xmlStrdup(ctxt->name); |
| 4429 depth = ctxt->nameNr; |
| 4430 continue; |
| 4431 } |
| 4432 |
| 4433 if (ctxt->name != NULL) { |
| 4434 if (htmlCheckAutoClose(name, ctxt->name) == 1) { |
| 4435 htmlAutoClose(ctxt, name); |
| 4436 continue; |
| 4437 } |
| 4438 } |
| 4439 } |
| 4440 |
| 4441 /* |
| 4442 * Has this node been popped out during parsing of |
| 4443 * the next element |
| 4444 */ |
| 4445 if ((ctxt->nameNr > 0) && (depth >= ctxt->nameNr) && |
| 4446 (!xmlStrEqual(currentNode, ctxt->name))) |
| 4447 { |
| 4448 htmlParserFinishElementParsing(ctxt); |
| 4449 if (currentNode != NULL) xmlFree(currentNode); |
| 4450 |
| 4451 currentNode = xmlStrdup(ctxt->name); |
| 4452 depth = ctxt->nameNr; |
| 4453 continue; |
| 4454 } |
| 4455 |
| 4456 if ((CUR != 0) && ((xmlStrEqual(currentNode, BAD_CAST"script")) || |
| 4457 (xmlStrEqual(currentNode, BAD_CAST"style")))) { |
| 4458 /* |
| 4459 * Handle SCRIPT/STYLE separately |
| 4460 */ |
| 4461 htmlParseScript(ctxt); |
| 4462 } else { |
| 4463 /* |
| 4464 * Sometimes DOCTYPE arrives in the middle of the document |
| 4465 */ |
| 4466 if ((CUR == '<') && (NXT(1) == '!') && |
| 4467 (UPP(2) == 'D') && (UPP(3) == 'O') && |
| 4468 (UPP(4) == 'C') && (UPP(5) == 'T') && |
| 4469 (UPP(6) == 'Y') && (UPP(7) == 'P') && |
| 4470 (UPP(8) == 'E')) { |
| 4471 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR, |
| 4472 "Misplaced DOCTYPE declaration\n", |
| 4473 BAD_CAST "DOCTYPE" , NULL); |
| 4474 htmlParseDocTypeDecl(ctxt); |
| 4475 } |
| 4476 |
| 4477 /* |
| 4478 * First case : a comment |
| 4479 */ |
| 4480 if ((CUR == '<') && (NXT(1) == '!') && |
| 4481 (NXT(2) == '-') && (NXT(3) == '-')) { |
| 4482 htmlParseComment(ctxt); |
| 4483 } |
| 4484 |
| 4485 /* |
| 4486 * Second case : a Processing Instruction. |
| 4487 */ |
| 4488 else if ((CUR == '<') && (NXT(1) == '?')) { |
| 4489 htmlParsePI(ctxt); |
| 4490 } |
| 4491 |
| 4492 /* |
| 4493 * Third case : a sub-element. |
| 4494 */ |
| 4495 else if (CUR == '<') { |
| 4496 htmlParseElementInternal(ctxt); |
| 4497 if (currentNode != NULL) xmlFree(currentNode); |
| 4498 |
| 4499 currentNode = xmlStrdup(ctxt->name); |
| 4500 depth = ctxt->nameNr; |
| 4501 } |
| 4502 |
| 4503 /* |
| 4504 * Fourth case : a reference. If if has not been resolved, |
| 4505 * parsing returns it's Name, create the node |
| 4506 */ |
| 4507 else if (CUR == '&') { |
| 4508 htmlParseReference(ctxt); |
| 4509 } |
| 4510 |
| 4511 /* |
| 4512 * Fifth case : end of the resource |
| 4513 */ |
| 4514 else if (CUR == 0) { |
| 4515 htmlAutoCloseOnEnd(ctxt); |
| 4516 break; |
| 4517 } |
| 4518 |
| 4519 /* |
| 4520 * Last case, text. Note that References are handled directly. |
| 4521 */ |
| 4522 else { |
| 4523 htmlParseCharData(ctxt); |
| 4524 } |
| 4525 |
| 4526 if (cons == ctxt->nbChars) { |
| 4527 if (ctxt->node != NULL) { |
| 4528 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, |
| 4529 "detected an error in element content\n", |
| 4530 NULL, NULL); |
| 4531 } |
| 4532 break; |
| 4533 } |
| 4534 } |
| 4535 GROW; |
| 4536 } |
| 4537 if (currentNode != NULL) xmlFree(currentNode); |
| 4538 } |
| 4539 |
| 4540 /** |
| 4541 * htmlParseContent: |
| 4542 * @ctxt: an HTML parser context |
| 4543 * |
| 4544 * Parse a content: comment, sub-element, reference or text. |
| 4545 * This is the entry point when called from parser.c |
| 4546 */ |
| 4547 |
| 4548 void |
| 4549 __htmlParseContent(void *ctxt) { |
| 4550 if (ctxt != NULL) |
| 4551 htmlParseContentInternal((htmlParserCtxtPtr) ctxt); |
| 4552 } |
| 4553 |
| 4088 /** | 4554 /** |
| 4089 * htmlParseDocument: | 4555 * htmlParseDocument: |
| 4090 * @ctxt: an HTML parser context | 4556 * @ctxt: an HTML parser context |
| 4091 * | 4557 * |
| 4092 * parse an HTML document (and build a tree if using the standard SAX | 4558 * parse an HTML document (and build a tree if using the standard SAX |
| 4093 * interface). | 4559 * interface). |
| 4094 * | 4560 * |
| 4095 * Returns 0, -1 in case of error. the parser context is augmented | 4561 * Returns 0, -1 in case of error. the parser context is augmented |
| 4096 * as a result of the parsing. | 4562 * as a result of the parsing. |
| 4097 */ | 4563 */ |
| 4098 | 4564 |
| 4099 int | 4565 int |
| 4100 htmlParseDocument(htmlParserCtxtPtr ctxt) { | 4566 htmlParseDocument(htmlParserCtxtPtr ctxt) { |
| 4567 xmlChar start[4]; |
| 4568 xmlCharEncoding enc; |
| 4101 xmlDtdPtr dtd; | 4569 xmlDtdPtr dtd; |
| 4102 | 4570 |
| 4103 xmlInitParser(); | 4571 xmlInitParser(); |
| 4104 | 4572 |
| 4105 htmlDefaultSAXHandlerInit(); | 4573 htmlDefaultSAXHandlerInit(); |
| 4106 | 4574 |
| 4107 if ((ctxt == NULL) || (ctxt->input == NULL)) { | 4575 if ((ctxt == NULL) || (ctxt->input == NULL)) { |
| 4108 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, | 4576 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, |
| 4109 "htmlParseDocument: context error\n", NULL, NULL); | 4577 "htmlParseDocument: context error\n", NULL, NULL); |
| 4110 return(XML_ERR_INTERNAL_ERROR); | 4578 return(XML_ERR_INTERNAL_ERROR); |
| 4111 } | 4579 } |
| 4112 ctxt->html = 1; | 4580 ctxt->html = 1; |
| 4581 ctxt->linenumbers = 1; |
| 4113 GROW; | 4582 GROW; |
| 4114 /* | 4583 /* |
| 4115 * SAX: beginning of the document processing. | 4584 * SAX: beginning of the document processing. |
| 4116 */ | 4585 */ |
| 4117 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) | 4586 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) |
| 4118 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); | 4587 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); |
| 4119 | 4588 |
| 4589 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) && |
| 4590 ((ctxt->input->end - ctxt->input->cur) >= 4)) { |
| 4591 /* |
| 4592 * Get the 4 first bytes and decode the charset |
| 4593 * if enc != XML_CHAR_ENCODING_NONE |
| 4594 * plug some encoding conversion routines. |
| 4595 */ |
| 4596 start[0] = RAW; |
| 4597 start[1] = NXT(1); |
| 4598 start[2] = NXT(2); |
| 4599 start[3] = NXT(3); |
| 4600 enc = xmlDetectCharEncoding(&start[0], 4); |
| 4601 if (enc != XML_CHAR_ENCODING_NONE) { |
| 4602 xmlSwitchEncoding(ctxt, enc); |
| 4603 } |
| 4604 } |
| 4605 |
| 4120 /* | 4606 /* |
| 4121 * Wipe out everything which is before the first '<' | 4607 * Wipe out everything which is before the first '<' |
| 4122 */ | 4608 */ |
| 4123 SKIP_BLANKS; | 4609 SKIP_BLANKS; |
| 4124 if (CUR == 0) { | 4610 if (CUR == 0) { |
| 4125 » htmlParseErr(ctxt, XML_ERR_DOCUMENT_EMPTY, | 4611 » htmlParseErr(ctxt, XML_ERR_DOCUMENT_EMPTY, |
| 4126 "Document is empty\n", NULL, NULL); | 4612 "Document is empty\n", NULL, NULL); |
| 4127 } | 4613 } |
| 4128 | 4614 |
| 4129 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) | 4615 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) |
| 4130 ctxt->sax->startDocument(ctxt->userData); | 4616 ctxt->sax->startDocument(ctxt->userData); |
| 4131 | 4617 |
| 4132 | 4618 |
| 4133 /* | 4619 /* |
| 4134 * Parse possible comments and PIs before any content | 4620 * Parse possible comments and PIs before any content |
| 4135 */ | 4621 */ |
| 4136 while (((CUR == '<') && (NXT(1) == '!') && | 4622 while (((CUR == '<') && (NXT(1) == '!') && |
| 4137 (NXT(2) == '-') && (NXT(3) == '-')) || | 4623 (NXT(2) == '-') && (NXT(3) == '-')) || |
| 4138 ((CUR == '<') && (NXT(1) == '?'))) { | 4624 ((CUR == '<') && (NXT(1) == '?'))) { |
| 4139 htmlParseComment(ctxt);» | 4625 htmlParseComment(ctxt); |
| 4140 htmlParsePI(ctxt);» | 4626 htmlParsePI(ctxt); |
| 4141 SKIP_BLANKS; | 4627 SKIP_BLANKS; |
| 4142 }» | 4628 } |
| 4143 | 4629 |
| 4144 | 4630 |
| 4145 /* | 4631 /* |
| 4146 * Then possibly doc type declaration(s) and more Misc | 4632 * Then possibly doc type declaration(s) and more Misc |
| 4147 * (doctypedecl Misc*)? | 4633 * (doctypedecl Misc*)? |
| 4148 */ | 4634 */ |
| 4149 if ((CUR == '<') && (NXT(1) == '!') && | 4635 if ((CUR == '<') && (NXT(1) == '!') && |
| 4150 (UPP(2) == 'D') && (UPP(3) == 'O') && | 4636 (UPP(2) == 'D') && (UPP(3) == 'O') && |
| 4151 (UPP(4) == 'C') && (UPP(5) == 'T') && | 4637 (UPP(4) == 'C') && (UPP(5) == 'T') && |
| 4152 (UPP(6) == 'Y') && (UPP(7) == 'P') && | 4638 (UPP(6) == 'Y') && (UPP(7) == 'P') && |
| 4153 (UPP(8) == 'E')) { | 4639 (UPP(8) == 'E')) { |
| 4154 htmlParseDocTypeDecl(ctxt); | 4640 htmlParseDocTypeDecl(ctxt); |
| 4155 } | 4641 } |
| 4156 SKIP_BLANKS; | 4642 SKIP_BLANKS; |
| 4157 | 4643 |
| 4158 /* | 4644 /* |
| 4159 * Parse possible comments and PIs before any content | 4645 * Parse possible comments and PIs before any content |
| 4160 */ | 4646 */ |
| 4161 while (((CUR == '<') && (NXT(1) == '!') && | 4647 while (((CUR == '<') && (NXT(1) == '!') && |
| 4162 (NXT(2) == '-') && (NXT(3) == '-')) || | 4648 (NXT(2) == '-') && (NXT(3) == '-')) || |
| 4163 ((CUR == '<') && (NXT(1) == '?'))) { | 4649 ((CUR == '<') && (NXT(1) == '?'))) { |
| 4164 htmlParseComment(ctxt);» | 4650 htmlParseComment(ctxt); |
| 4165 htmlParsePI(ctxt);» | 4651 htmlParsePI(ctxt); |
| 4166 SKIP_BLANKS; | 4652 SKIP_BLANKS; |
| 4167 }» | 4653 } |
| 4168 | 4654 |
| 4169 /* | 4655 /* |
| 4170 * Time to start parsing the tree itself | 4656 * Time to start parsing the tree itself |
| 4171 */ | 4657 */ |
| 4172 htmlParseContent(ctxt); | 4658 htmlParseContentInternal(ctxt); |
| 4173 | 4659 |
| 4174 /* | 4660 /* |
| 4175 * autoclose | 4661 * autoclose |
| 4176 */ | 4662 */ |
| 4177 if (CUR == 0) | 4663 if (CUR == 0) |
| 4178 htmlAutoCloseOnEnd(ctxt); | 4664 htmlAutoCloseOnEnd(ctxt); |
| 4179 | 4665 |
| 4180 | 4666 |
| 4181 /* | 4667 /* |
| 4182 * SAX: end of the document processing. | 4668 * SAX: end of the document processing. |
| 4183 */ | 4669 */ |
| 4184 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) | 4670 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) |
| 4185 ctxt->sax->endDocument(ctxt->userData); | 4671 ctxt->sax->endDocument(ctxt->userData); |
| 4186 | 4672 |
| 4187 if (ctxt->myDoc != NULL) { | 4673 if (ctxt->myDoc != NULL) { |
| 4188 dtd = xmlGetIntSubset(ctxt->myDoc); | 4674 dtd = xmlGetIntSubset(ctxt->myDoc); |
| 4189 if (dtd == NULL) | 4675 if (dtd == NULL) |
| 4190 » ctxt->myDoc->intSubset = | 4676 » ctxt->myDoc->intSubset = |
| 4191 » » xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html", | 4677 » » xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html", |
| 4192 BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN", | 4678 BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN", |
| 4193 BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd"); | 4679 BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd"); |
| 4194 } | 4680 } |
| 4195 if (! ctxt->wellFormed) return(-1); | 4681 if (! ctxt->wellFormed) return(-1); |
| 4196 return(0); | 4682 return(0); |
| 4197 } | 4683 } |
| 4198 | 4684 |
| 4199 | 4685 |
| 4200 /************************************************************************ | 4686 /************************************************************************ |
| 4201 * * | 4687 * * |
| (...skipping 25 matching lines...) Expand all Loading... |
| 4227 } | 4713 } |
| 4228 sax = (htmlSAXHandler *) xmlMalloc(sizeof(htmlSAXHandler)); | 4714 sax = (htmlSAXHandler *) xmlMalloc(sizeof(htmlSAXHandler)); |
| 4229 if (sax == NULL) { | 4715 if (sax == NULL) { |
| 4230 htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n"); | 4716 htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n"); |
| 4231 return(-1); | 4717 return(-1); |
| 4232 } | 4718 } |
| 4233 else | 4719 else |
| 4234 memset(sax, 0, sizeof(htmlSAXHandler)); | 4720 memset(sax, 0, sizeof(htmlSAXHandler)); |
| 4235 | 4721 |
| 4236 /* Allocate the Input stack */ | 4722 /* Allocate the Input stack */ |
| 4237 ctxt->inputTab = (htmlParserInputPtr *) | 4723 ctxt->inputTab = (htmlParserInputPtr *) |
| 4238 xmlMalloc(5 * sizeof(htmlParserInputPtr)); | 4724 xmlMalloc(5 * sizeof(htmlParserInputPtr)); |
| 4239 if (ctxt->inputTab == NULL) { | 4725 if (ctxt->inputTab == NULL) { |
| 4240 htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n"); | 4726 htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n"); |
| 4241 ctxt->inputNr = 0; | 4727 ctxt->inputNr = 0; |
| 4242 ctxt->inputMax = 0; | 4728 ctxt->inputMax = 0; |
| 4243 ctxt->input = NULL; | 4729 ctxt->input = NULL; |
| 4244 return(-1); | 4730 return(-1); |
| 4245 } | 4731 } |
| 4246 ctxt->inputNr = 0; | 4732 ctxt->inputNr = 0; |
| 4247 ctxt->inputMax = 5; | 4733 ctxt->inputMax = 5; |
| (...skipping 17 matching lines...) Expand all Loading... |
| 4265 } | 4751 } |
| 4266 ctxt->nodeNr = 0; | 4752 ctxt->nodeNr = 0; |
| 4267 ctxt->nodeMax = 10; | 4753 ctxt->nodeMax = 10; |
| 4268 ctxt->node = NULL; | 4754 ctxt->node = NULL; |
| 4269 | 4755 |
| 4270 /* Allocate the Name stack */ | 4756 /* Allocate the Name stack */ |
| 4271 ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *)); | 4757 ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *)); |
| 4272 if (ctxt->nameTab == NULL) { | 4758 if (ctxt->nameTab == NULL) { |
| 4273 htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n"); | 4759 htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n"); |
| 4274 ctxt->nameNr = 0; | 4760 ctxt->nameNr = 0; |
| 4275 » ctxt->nameMax = 10; | 4761 » ctxt->nameMax = 0; |
| 4276 ctxt->name = NULL; | 4762 ctxt->name = NULL; |
| 4277 ctxt->nodeNr = 0; | 4763 ctxt->nodeNr = 0; |
| 4278 ctxt->nodeMax = 0; | 4764 ctxt->nodeMax = 0; |
| 4279 ctxt->node = NULL; | 4765 ctxt->node = NULL; |
| 4280 ctxt->inputNr = 0; | 4766 ctxt->inputNr = 0; |
| 4281 ctxt->inputMax = 0; | 4767 ctxt->inputMax = 0; |
| 4282 ctxt->input = NULL; | 4768 ctxt->input = NULL; |
| 4283 return(-1); | 4769 return(-1); |
| 4284 } | 4770 } |
| 4285 ctxt->nameNr = 0; | 4771 ctxt->nameNr = 0; |
| 4286 ctxt->nameMax = 10; | 4772 ctxt->nameMax = 10; |
| 4287 ctxt->name = NULL; | 4773 ctxt->name = NULL; |
| 4288 | 4774 |
| 4775 ctxt->nodeInfoTab = NULL; |
| 4776 ctxt->nodeInfoNr = 0; |
| 4777 ctxt->nodeInfoMax = 0; |
| 4778 |
| 4289 if (sax == NULL) ctxt->sax = (xmlSAXHandlerPtr) &htmlDefaultSAXHandler; | 4779 if (sax == NULL) ctxt->sax = (xmlSAXHandlerPtr) &htmlDefaultSAXHandler; |
| 4290 else { | 4780 else { |
| 4291 ctxt->sax = sax; | 4781 ctxt->sax = sax; |
| 4292 memcpy(sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1)); | 4782 memcpy(sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1)); |
| 4293 } | 4783 } |
| 4294 ctxt->userData = ctxt; | 4784 ctxt->userData = ctxt; |
| 4295 ctxt->myDoc = NULL; | 4785 ctxt->myDoc = NULL; |
| 4296 ctxt->wellFormed = 1; | 4786 ctxt->wellFormed = 1; |
| 4297 ctxt->replaceEntities = 0; | 4787 ctxt->replaceEntities = 0; |
| 4298 ctxt->linenumbers = xmlLineNumbersDefaultValue; | 4788 ctxt->linenumbers = xmlLineNumbersDefaultValue; |
| (...skipping 126 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4425 ctxt->input->encoding = xmlStrdup((const xmlChar *) encoding); | 4915 ctxt->input->encoding = xmlStrdup((const xmlChar *) encoding); |
| 4426 | 4916 |
| 4427 enc = xmlParseCharEncoding(encoding); | 4917 enc = xmlParseCharEncoding(encoding); |
| 4428 /* | 4918 /* |
| 4429 * registered set of known encodings | 4919 * registered set of known encodings |
| 4430 */ | 4920 */ |
| 4431 if (enc != XML_CHAR_ENCODING_ERROR) { | 4921 if (enc != XML_CHAR_ENCODING_ERROR) { |
| 4432 xmlSwitchEncoding(ctxt, enc); | 4922 xmlSwitchEncoding(ctxt, enc); |
| 4433 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { | 4923 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { |
| 4434 htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, | 4924 htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, |
| 4435 » » "Unsupported encoding %s\n", | 4925 » » "Unsupported encoding %s\n", |
| 4436 (const xmlChar *) encoding, NULL); | 4926 (const xmlChar *) encoding, NULL); |
| 4437 } | 4927 } |
| 4438 } else { | 4928 } else { |
| 4439 /* | 4929 /* |
| 4440 * fallback for unknown encodings | 4930 * fallback for unknown encodings |
| 4441 */ | 4931 */ |
| 4442 handler = xmlFindCharEncodingHandler((const char *) encoding); | 4932 handler = xmlFindCharEncodingHandler((const char *) encoding); |
| 4443 if (handler != NULL) { | 4933 if (handler != NULL) { |
| 4444 xmlSwitchToEncoding(ctxt, handler); | 4934 xmlSwitchToEncoding(ctxt, handler); |
| 4445 } else { | 4935 } else { |
| 4446 htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, | 4936 htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, |
| 4447 "Unsupported encoding %s\n", | 4937 "Unsupported encoding %s\n", |
| 4448 (const xmlChar *) encoding, NULL); | 4938 (const xmlChar *) encoding, NULL); |
| 4449 } | 4939 } |
| 4450 } | 4940 } |
| 4451 } | 4941 } |
| 4452 return(ctxt); | 4942 return(ctxt); |
| 4453 } | 4943 } |
| 4454 | 4944 |
| 4455 #ifdef LIBXML_PUSH_ENABLED | 4945 #ifdef LIBXML_PUSH_ENABLED |
| 4456 /************************************************************************ | 4946 /************************************************************************ |
| 4457 * * | 4947 * * |
| 4458 * » » Progressive parsing interfaces» » » » * | 4948 *» Progressive parsing interfaces» » » » * |
| 4459 * * | 4949 * * |
| 4460 ************************************************************************/ | 4950 ************************************************************************/ |
| 4461 | 4951 |
| 4462 /** | 4952 /** |
| 4463 * htmlParseLookupSequence: | 4953 * htmlParseLookupSequence: |
| 4464 * @ctxt: an HTML parser context | 4954 * @ctxt: an HTML parser context |
| 4465 * @first: the first char to lookup | 4955 * @first: the first char to lookup |
| 4466 * @next: the next char to lookup or zero | 4956 * @next: the next char to lookup or zero |
| 4467 * @third: the next char to lookup or zero | 4957 * @third: the next char to lookup or zero |
| 4468 * @comment: flag to force checking inside comments | 4958 * @comment: flag to force checking inside comments |
| 4469 * | 4959 * |
| 4470 * Try to find if a sequence (first, next, third) or just (first next) or | 4960 * Try to find if a sequence (first, next, third) or just (first next) or |
| 4471 * (first) is available in the input stream. | 4961 * (first) is available in the input stream. |
| 4472 * This function has a side effect of (possibly) incrementing ctxt->checkIndex | 4962 * This function has a side effect of (possibly) incrementing ctxt->checkIndex |
| 4473 * to avoid rescanning sequences of bytes, it DOES change the state of the | 4963 * to avoid rescanning sequences of bytes, it DOES change the state of the |
| 4474 * parser, do not use liberally. | 4964 * parser, do not use liberally. |
| 4475 * This is basically similar to xmlParseLookupSequence() | 4965 * This is basically similar to xmlParseLookupSequence() |
| 4476 * | 4966 * |
| 4477 * Returns the index to the current parsing point if the full sequence | 4967 * Returns the index to the current parsing point if the full sequence |
| 4478 * is available, -1 otherwise. | 4968 * is available, -1 otherwise. |
| 4479 */ | 4969 */ |
| 4480 static int | 4970 static int |
| 4481 htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first, | 4971 htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first, |
| 4482 xmlChar next, xmlChar third, int iscomment) { | 4972 xmlChar next, xmlChar third, int iscomment, |
| 4973 int ignoreattrval) |
| 4974 { |
| 4483 int base, len; | 4975 int base, len; |
| 4484 htmlParserInputPtr in; | 4976 htmlParserInputPtr in; |
| 4485 const xmlChar *buf; | 4977 const xmlChar *buf; |
| 4486 int incomment = 0; | 4978 int incomment = 0; |
| 4979 int invalue = 0; |
| 4980 char valdellim = 0x0; |
| 4487 | 4981 |
| 4488 in = ctxt->input; | 4982 in = ctxt->input; |
| 4489 if (in == NULL) return(-1); | 4983 if (in == NULL) |
| 4984 return (-1); |
| 4985 |
| 4490 base = in->cur - in->base; | 4986 base = in->cur - in->base; |
| 4491 if (base < 0) return(-1); | 4987 if (base < 0) |
| 4988 return (-1); |
| 4989 |
| 4492 if (ctxt->checkIndex > base) | 4990 if (ctxt->checkIndex > base) |
| 4493 base = ctxt->checkIndex; | 4991 base = ctxt->checkIndex; |
| 4992 |
| 4494 if (in->buf == NULL) { | 4993 if (in->buf == NULL) { |
| 4495 » buf = in->base; | 4994 buf = in->base; |
| 4496 » len = in->length; | 4995 len = in->length; |
| 4497 } else { | 4996 } else { |
| 4498 » buf = in->buf->buffer->content; | 4997 buf = in->buf->buffer->content; |
| 4499 » len = in->buf->buffer->use; | 4998 len = in->buf->buffer->use; |
| 4500 } | 4999 } |
| 5000 |
| 4501 /* take into account the sequence length */ | 5001 /* take into account the sequence length */ |
| 4502 if (third) len -= 2; | 5002 if (third) |
| 4503 else if (next) len --; | 5003 len -= 2; |
| 4504 for (;base < len;base++) { | 5004 else if (next) |
| 4505 » if (!incomment && (base + 4 < len) && !iscomment) { | 5005 len--; |
| 4506 » if ((buf[base] == '<') && (buf[base + 1] == '!') && | 5006 for (; base < len; base++) { |
| 4507 » » (buf[base + 2] == '-') && (buf[base + 3] == '-')) { | 5007 if ((!incomment) && (base + 4 < len) && (!iscomment)) { |
| 4508 » » incomment = 1; | 5008 if ((buf[base] == '<') && (buf[base + 1] == '!') && |
| 4509 » » /* do not increment past <! - some people use <!--> */ | 5009 (buf[base + 2] == '-') && (buf[base + 3] == '-')) { |
| 4510 » » base += 2; | 5010 incomment = 1; |
| 4511 » } | 5011 /* do not increment past <! - some people use <!--> */ |
| 4512 » } | 5012 base += 2; |
| 4513 » if (incomment) { | 5013 } |
| 4514 » if (base + 3 > len) | 5014 } |
| 4515 » » return(-1); | 5015 if (ignoreattrval) { |
| 4516 » if ((buf[base] == '-') && (buf[base + 1] == '-') && | 5016 if (buf[base] == '"' || buf[base] == '\'') { |
| 4517 » » (buf[base + 2] == '>')) { | 5017 if (invalue) { |
| 4518 » » incomment = 0; | 5018 if (buf[base] == valdellim) { |
| 4519 » » base += 2; | 5019 invalue = 0; |
| 4520 » } | 5020 continue; |
| 4521 » continue; | 5021 } |
| 4522 » } | 5022 } else { |
| 5023 valdellim = buf[base]; |
| 5024 invalue = 1; |
| 5025 continue; |
| 5026 } |
| 5027 } else if (invalue) { |
| 5028 continue; |
| 5029 } |
| 5030 } |
| 5031 if (incomment) { |
| 5032 if (base + 3 > len) |
| 5033 return (-1); |
| 5034 if ((buf[base] == '-') && (buf[base + 1] == '-') && |
| 5035 (buf[base + 2] == '>')) { |
| 5036 incomment = 0; |
| 5037 base += 2; |
| 5038 } |
| 5039 continue; |
| 5040 } |
| 4523 if (buf[base] == first) { | 5041 if (buf[base] == first) { |
| 4524 » if (third != 0) { | 5042 if (third != 0) { |
| 4525 » » if ((buf[base + 1] != next) || | 5043 if ((buf[base + 1] != next) || (buf[base + 2] != third)) |
| 4526 » » (buf[base + 2] != third)) continue; | 5044 continue; |
| 4527 » } else if (next != 0) { | 5045 } else if (next != 0) { |
| 4528 » » if (buf[base + 1] != next) continue; | 5046 if (buf[base + 1] != next) |
| 4529 » } | 5047 continue; |
| 4530 » ctxt->checkIndex = 0; | 5048 } |
| 5049 ctxt->checkIndex = 0; |
| 4531 #ifdef DEBUG_PUSH | 5050 #ifdef DEBUG_PUSH |
| 4532 » if (next == 0) | 5051 if (next == 0) |
| 4533 » » xmlGenericError(xmlGenericErrorContext, | 5052 xmlGenericError(xmlGenericErrorContext, |
| 4534 » » » "HPP: lookup '%c' found at %d\n", | 5053 "HPP: lookup '%c' found at %d\n", |
| 4535 » » » first, base); | 5054 first, base); |
| 4536 » else if (third == 0) | 5055 else if (third == 0) |
| 4537 » » xmlGenericError(xmlGenericErrorContext, | 5056 xmlGenericError(xmlGenericErrorContext, |
| 4538 » » » "HPP: lookup '%c%c' found at %d\n", | 5057 "HPP: lookup '%c%c' found at %d\n", |
| 4539 » » » first, next, base); | 5058 first, next, base); |
| 4540 » else | 5059 else |
| 4541 » » xmlGenericError(xmlGenericErrorContext, | 5060 xmlGenericError(xmlGenericErrorContext, |
| 4542 » » » "HPP: lookup '%c%c%c' found at %d\n", | 5061 "HPP: lookup '%c%c%c' found at %d\n", |
| 4543 » » » first, next, third, base); | 5062 first, next, third, base); |
| 4544 #endif | 5063 #endif |
| 4545 » return(base - (in->cur - in->base)); | 5064 return (base - (in->cur - in->base)); |
| 4546 » } | 5065 } |
| 4547 } | 5066 } |
| 4548 ctxt->checkIndex = base; | 5067 if ((!incomment) && (!invalue)) |
| 5068 ctxt->checkIndex = base; |
| 4549 #ifdef DEBUG_PUSH | 5069 #ifdef DEBUG_PUSH |
| 4550 if (next == 0) | 5070 if (next == 0) |
| 4551 » xmlGenericError(xmlGenericErrorContext, | 5071 xmlGenericError(xmlGenericErrorContext, |
| 4552 » » "HPP: lookup '%c' failed\n", first); | 5072 "HPP: lookup '%c' failed\n", first); |
| 4553 else if (third == 0) | 5073 else if (third == 0) |
| 4554 » xmlGenericError(xmlGenericErrorContext, | 5074 xmlGenericError(xmlGenericErrorContext, |
| 4555 » » "HPP: lookup '%c%c' failed\n", first, next); | 5075 "HPP: lookup '%c%c' failed\n", first, next); |
| 4556 else» | 5076 else |
| 4557 » xmlGenericError(xmlGenericErrorContext, | 5077 xmlGenericError(xmlGenericErrorContext, |
| 4558 » » "HPP: lookup '%c%c%c' failed\n", first, next, third); | 5078 "HPP: lookup '%c%c%c' failed\n", first, next, |
| 5079 third); |
| 4559 #endif | 5080 #endif |
| 4560 return(-1); | 5081 return (-1); |
| 4561 } | 5082 } |
| 4562 | 5083 |
| 4563 /** | 5084 /** |
| 5085 * htmlParseLookupChars: |
| 5086 * @ctxt: an HTML parser context |
| 5087 * @stop: Array of chars, which stop the lookup. |
| 5088 * @stopLen: Length of stop-Array |
| 5089 * |
| 5090 * Try to find if any char of the stop-Array is available in the input |
| 5091 * stream. |
| 5092 * This function has a side effect of (possibly) incrementing ctxt->checkIndex |
| 5093 * to avoid rescanning sequences of bytes, it DOES change the state of the |
| 5094 * parser, do not use liberally. |
| 5095 * |
| 5096 * Returns the index to the current parsing point if a stopChar |
| 5097 * is available, -1 otherwise. |
| 5098 */ |
| 5099 static int |
| 5100 htmlParseLookupChars(htmlParserCtxtPtr ctxt, const xmlChar * stop, |
| 5101 int stopLen) |
| 5102 { |
| 5103 int base, len; |
| 5104 htmlParserInputPtr in; |
| 5105 const xmlChar *buf; |
| 5106 int incomment = 0; |
| 5107 int i; |
| 5108 |
| 5109 in = ctxt->input; |
| 5110 if (in == NULL) |
| 5111 return (-1); |
| 5112 |
| 5113 base = in->cur - in->base; |
| 5114 if (base < 0) |
| 5115 return (-1); |
| 5116 |
| 5117 if (ctxt->checkIndex > base) |
| 5118 base = ctxt->checkIndex; |
| 5119 |
| 5120 if (in->buf == NULL) { |
| 5121 buf = in->base; |
| 5122 len = in->length; |
| 5123 } else { |
| 5124 buf = in->buf->buffer->content; |
| 5125 len = in->buf->buffer->use; |
| 5126 } |
| 5127 |
| 5128 for (; base < len; base++) { |
| 5129 if (!incomment && (base + 4 < len)) { |
| 5130 if ((buf[base] == '<') && (buf[base + 1] == '!') && |
| 5131 (buf[base + 2] == '-') && (buf[base + 3] == '-')) { |
| 5132 incomment = 1; |
| 5133 /* do not increment past <! - some people use <!--> */ |
| 5134 base += 2; |
| 5135 } |
| 5136 } |
| 5137 if (incomment) { |
| 5138 if (base + 3 > len) |
| 5139 return (-1); |
| 5140 if ((buf[base] == '-') && (buf[base + 1] == '-') && |
| 5141 (buf[base + 2] == '>')) { |
| 5142 incomment = 0; |
| 5143 base += 2; |
| 5144 } |
| 5145 continue; |
| 5146 } |
| 5147 for (i = 0; i < stopLen; ++i) { |
| 5148 if (buf[base] == stop[i]) { |
| 5149 ctxt->checkIndex = 0; |
| 5150 return (base - (in->cur - in->base)); |
| 5151 } |
| 5152 } |
| 5153 } |
| 5154 ctxt->checkIndex = base; |
| 5155 return (-1); |
| 5156 } |
| 5157 |
| 5158 /** |
| 4564 * htmlParseTryOrFinish: | 5159 * htmlParseTryOrFinish: |
| 4565 * @ctxt: an HTML parser context | 5160 * @ctxt: an HTML parser context |
| 4566 * @terminate: last chunk indicator | 5161 * @terminate: last chunk indicator |
| 4567 * | 5162 * |
| 4568 * Try to progress on parsing | 5163 * Try to progress on parsing |
| 4569 * | 5164 * |
| 4570 * Returns zero if no parsing was possible | 5165 * Returns zero if no parsing was possible |
| 4571 */ | 5166 */ |
| 4572 static int | 5167 static int |
| 4573 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { | 5168 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { |
| (...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4632 while (1) { | 5227 while (1) { |
| 4633 | 5228 |
| 4634 in = ctxt->input; | 5229 in = ctxt->input; |
| 4635 if (in == NULL) break; | 5230 if (in == NULL) break; |
| 4636 if (in->buf == NULL) | 5231 if (in->buf == NULL) |
| 4637 avail = in->length - (in->cur - in->base); | 5232 avail = in->length - (in->cur - in->base); |
| 4638 else | 5233 else |
| 4639 avail = in->buf->buffer->use - (in->cur - in->base); | 5234 avail = in->buf->buffer->use - (in->cur - in->base); |
| 4640 if ((avail == 0) && (terminate)) { | 5235 if ((avail == 0) && (terminate)) { |
| 4641 htmlAutoCloseOnEnd(ctxt); | 5236 htmlAutoCloseOnEnd(ctxt); |
| 4642 » if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) { | 5237 » if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) { |
| 4643 /* | 5238 /* |
| 4644 * SAX: end of the document processing. | 5239 * SAX: end of the document processing. |
| 4645 */ | 5240 */ |
| 4646 ctxt->instate = XML_PARSER_EOF; | 5241 ctxt->instate = XML_PARSER_EOF; |
| 4647 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) | 5242 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) |
| 4648 ctxt->sax->endDocument(ctxt->userData); | 5243 ctxt->sax->endDocument(ctxt->userData); |
| 4649 } | 5244 } |
| 4650 } | 5245 } |
| 4651 if (avail < 1) | 5246 if (avail < 1) |
| 4652 goto done; | 5247 goto done; |
| (...skipping 29 matching lines...) Expand all Loading... |
| 4682 ctxt->sax->startDocument(ctxt->userData); | 5277 ctxt->sax->startDocument(ctxt->userData); |
| 4683 | 5278 |
| 4684 cur = in->cur[0]; | 5279 cur = in->cur[0]; |
| 4685 next = in->cur[1]; | 5280 next = in->cur[1]; |
| 4686 if ((cur == '<') && (next == '!') && | 5281 if ((cur == '<') && (next == '!') && |
| 4687 (UPP(2) == 'D') && (UPP(3) == 'O') && | 5282 (UPP(2) == 'D') && (UPP(3) == 'O') && |
| 4688 (UPP(4) == 'C') && (UPP(5) == 'T') && | 5283 (UPP(4) == 'C') && (UPP(5) == 'T') && |
| 4689 (UPP(6) == 'Y') && (UPP(7) == 'P') && | 5284 (UPP(6) == 'Y') && (UPP(7) == 'P') && |
| 4690 (UPP(8) == 'E')) { | 5285 (UPP(8) == 'E')) { |
| 4691 if ((!terminate) && | 5286 if ((!terminate) && |
| 4692 » » (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0)) | 5287 » » (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)) |
| 4693 goto done; | 5288 goto done; |
| 4694 #ifdef DEBUG_PUSH | 5289 #ifdef DEBUG_PUSH |
| 4695 xmlGenericError(xmlGenericErrorContext, | 5290 xmlGenericError(xmlGenericErrorContext, |
| 4696 "HPP: Parsing internal subset\n"); | 5291 "HPP: Parsing internal subset\n"); |
| 4697 #endif | 5292 #endif |
| 4698 htmlParseDocTypeDecl(ctxt); | 5293 htmlParseDocTypeDecl(ctxt); |
| 4699 ctxt->instate = XML_PARSER_PROLOG; | 5294 ctxt->instate = XML_PARSER_PROLOG; |
| 4700 #ifdef DEBUG_PUSH | 5295 #ifdef DEBUG_PUSH |
| 4701 xmlGenericError(xmlGenericErrorContext, | 5296 xmlGenericError(xmlGenericErrorContext, |
| 4702 "HPP: entering PROLOG\n"); | 5297 "HPP: entering PROLOG\n"); |
| (...skipping 12 matching lines...) Expand all Loading... |
| 4715 avail = in->length - (in->cur - in->base); | 5310 avail = in->length - (in->cur - in->base); |
| 4716 else | 5311 else |
| 4717 avail = in->buf->buffer->use - (in->cur - in->base); | 5312 avail = in->buf->buffer->use - (in->cur - in->base); |
| 4718 if (avail < 2) | 5313 if (avail < 2) |
| 4719 goto done; | 5314 goto done; |
| 4720 cur = in->cur[0]; | 5315 cur = in->cur[0]; |
| 4721 next = in->cur[1]; | 5316 next = in->cur[1]; |
| 4722 if ((cur == '<') && (next == '!') && | 5317 if ((cur == '<') && (next == '!') && |
| 4723 (in->cur[2] == '-') && (in->cur[3] == '-')) { | 5318 (in->cur[2] == '-') && (in->cur[3] == '-')) { |
| 4724 if ((!terminate) && | 5319 if ((!terminate) && |
| 4725 » » (htmlParseLookupSequence(ctxt, '-', '-', '>', 1) < 0)) | 5320 » » (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0)
) |
| 4726 goto done; | 5321 goto done; |
| 4727 #ifdef DEBUG_PUSH | 5322 #ifdef DEBUG_PUSH |
| 4728 xmlGenericError(xmlGenericErrorContext, | 5323 xmlGenericError(xmlGenericErrorContext, |
| 4729 "HPP: Parsing Comment\n"); | 5324 "HPP: Parsing Comment\n"); |
| 4730 #endif | 5325 #endif |
| 4731 htmlParseComment(ctxt); | 5326 htmlParseComment(ctxt); |
| 4732 ctxt->instate = XML_PARSER_MISC; | 5327 ctxt->instate = XML_PARSER_MISC; |
| 4733 } else if ((cur == '<') && (next == '?')) { | 5328 } else if ((cur == '<') && (next == '?')) { |
| 4734 if ((!terminate) && | 5329 if ((!terminate) && |
| 4735 » » (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0)) | 5330 » » (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)) |
| 4736 goto done; | 5331 goto done; |
| 4737 #ifdef DEBUG_PUSH | 5332 #ifdef DEBUG_PUSH |
| 4738 xmlGenericError(xmlGenericErrorContext, | 5333 xmlGenericError(xmlGenericErrorContext, |
| 4739 "HPP: Parsing PI\n"); | 5334 "HPP: Parsing PI\n"); |
| 4740 #endif | 5335 #endif |
| 4741 htmlParsePI(ctxt); | 5336 htmlParsePI(ctxt); |
| 4742 ctxt->instate = XML_PARSER_MISC; | 5337 ctxt->instate = XML_PARSER_MISC; |
| 4743 } else if ((cur == '<') && (next == '!') && | 5338 } else if ((cur == '<') && (next == '!') && |
| 4744 (UPP(2) == 'D') && (UPP(3) == 'O') && | 5339 (UPP(2) == 'D') && (UPP(3) == 'O') && |
| 4745 (UPP(4) == 'C') && (UPP(5) == 'T') && | 5340 (UPP(4) == 'C') && (UPP(5) == 'T') && |
| 4746 (UPP(6) == 'Y') && (UPP(7) == 'P') && | 5341 (UPP(6) == 'Y') && (UPP(7) == 'P') && |
| 4747 (UPP(8) == 'E')) { | 5342 (UPP(8) == 'E')) { |
| 4748 if ((!terminate) && | 5343 if ((!terminate) && |
| 4749 » » (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0)) | 5344 » » (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)) |
| 4750 goto done; | 5345 goto done; |
| 4751 #ifdef DEBUG_PUSH | 5346 #ifdef DEBUG_PUSH |
| 4752 xmlGenericError(xmlGenericErrorContext, | 5347 xmlGenericError(xmlGenericErrorContext, |
| 4753 "HPP: Parsing internal subset\n"); | 5348 "HPP: Parsing internal subset\n"); |
| 4754 #endif | 5349 #endif |
| 4755 htmlParseDocTypeDecl(ctxt); | 5350 htmlParseDocTypeDecl(ctxt); |
| 4756 ctxt->instate = XML_PARSER_PROLOG; | 5351 ctxt->instate = XML_PARSER_PROLOG; |
| 4757 #ifdef DEBUG_PUSH | 5352 #ifdef DEBUG_PUSH |
| 4758 xmlGenericError(xmlGenericErrorContext, | 5353 xmlGenericError(xmlGenericErrorContext, |
| 4759 "HPP: entering PROLOG\n"); | 5354 "HPP: entering PROLOG\n"); |
| 4760 #endif | 5355 #endif |
| 4761 } else if ((cur == '<') && (next == '!') && | 5356 } else if ((cur == '<') && (next == '!') && |
| 4762 (avail < 9)) { | 5357 (avail < 9)) { |
| 4763 goto done; | 5358 goto done; |
| 4764 } else { | 5359 } else { |
| 4765 ctxt->instate = XML_PARSER_START_TAG; | 5360 ctxt->instate = XML_PARSER_START_TAG; |
| 4766 #ifdef DEBUG_PUSH | 5361 #ifdef DEBUG_PUSH |
| 4767 xmlGenericError(xmlGenericErrorContext, | 5362 xmlGenericError(xmlGenericErrorContext, |
| 4768 "HPP: entering START_TAG\n"); | 5363 "HPP: entering START_TAG\n"); |
| 4769 #endif | 5364 #endif |
| 4770 } | 5365 } |
| 4771 break; | 5366 break; |
| 4772 case XML_PARSER_PROLOG: | 5367 case XML_PARSER_PROLOG: |
| 4773 SKIP_BLANKS; | 5368 SKIP_BLANKS; |
| 4774 if (in->buf == NULL) | 5369 if (in->buf == NULL) |
| 4775 avail = in->length - (in->cur - in->base); | 5370 avail = in->length - (in->cur - in->base); |
| 4776 else | 5371 else |
| 4777 avail = in->buf->buffer->use - (in->cur - in->base); | 5372 avail = in->buf->buffer->use - (in->cur - in->base); |
| 4778 » » if (avail < 2) | 5373 » » if (avail < 2) |
| 4779 goto done; | 5374 goto done; |
| 4780 cur = in->cur[0]; | 5375 cur = in->cur[0]; |
| 4781 next = in->cur[1]; | 5376 next = in->cur[1]; |
| 4782 if ((cur == '<') && (next == '!') && | 5377 if ((cur == '<') && (next == '!') && |
| 4783 (in->cur[2] == '-') && (in->cur[3] == '-')) { | 5378 (in->cur[2] == '-') && (in->cur[3] == '-')) { |
| 4784 if ((!terminate) && | 5379 if ((!terminate) && |
| 4785 » » (htmlParseLookupSequence(ctxt, '-', '-', '>', 1) < 0)) | 5380 » » (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0)
) |
| 4786 goto done; | 5381 goto done; |
| 4787 #ifdef DEBUG_PUSH | 5382 #ifdef DEBUG_PUSH |
| 4788 xmlGenericError(xmlGenericErrorContext, | 5383 xmlGenericError(xmlGenericErrorContext, |
| 4789 "HPP: Parsing Comment\n"); | 5384 "HPP: Parsing Comment\n"); |
| 4790 #endif | 5385 #endif |
| 4791 htmlParseComment(ctxt); | 5386 htmlParseComment(ctxt); |
| 4792 ctxt->instate = XML_PARSER_PROLOG; | 5387 ctxt->instate = XML_PARSER_PROLOG; |
| 4793 } else if ((cur == '<') && (next == '?')) { | 5388 } else if ((cur == '<') && (next == '?')) { |
| 4794 if ((!terminate) && | 5389 if ((!terminate) && |
| 4795 » » (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0)) | 5390 » » (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)) |
| 4796 goto done; | 5391 goto done; |
| 4797 #ifdef DEBUG_PUSH | 5392 #ifdef DEBUG_PUSH |
| 4798 xmlGenericError(xmlGenericErrorContext, | 5393 xmlGenericError(xmlGenericErrorContext, |
| 4799 "HPP: Parsing PI\n"); | 5394 "HPP: Parsing PI\n"); |
| 4800 #endif | 5395 #endif |
| 4801 htmlParsePI(ctxt); | 5396 htmlParsePI(ctxt); |
| 4802 ctxt->instate = XML_PARSER_PROLOG; | 5397 ctxt->instate = XML_PARSER_PROLOG; |
| 4803 } else if ((cur == '<') && (next == '!') && | 5398 } else if ((cur == '<') && (next == '!') && |
| 4804 (avail < 4)) { | 5399 (avail < 4)) { |
| 4805 goto done; | 5400 goto done; |
| (...skipping 16 matching lines...) Expand all Loading... |
| 4822 if (IS_BLANK_CH(cur)) { | 5417 if (IS_BLANK_CH(cur)) { |
| 4823 htmlParseCharData(ctxt); | 5418 htmlParseCharData(ctxt); |
| 4824 goto done; | 5419 goto done; |
| 4825 } | 5420 } |
| 4826 if (avail < 2) | 5421 if (avail < 2) |
| 4827 goto done; | 5422 goto done; |
| 4828 next = in->cur[1]; | 5423 next = in->cur[1]; |
| 4829 if ((cur == '<') && (next == '!') && | 5424 if ((cur == '<') && (next == '!') && |
| 4830 (in->cur[2] == '-') && (in->cur[3] == '-')) { | 5425 (in->cur[2] == '-') && (in->cur[3] == '-')) { |
| 4831 if ((!terminate) && | 5426 if ((!terminate) && |
| 4832 » » (htmlParseLookupSequence(ctxt, '-', '-', '>', 1) < 0)) | 5427 » » (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0)
) |
| 4833 goto done; | 5428 goto done; |
| 4834 #ifdef DEBUG_PUSH | 5429 #ifdef DEBUG_PUSH |
| 4835 xmlGenericError(xmlGenericErrorContext, | 5430 xmlGenericError(xmlGenericErrorContext, |
| 4836 "HPP: Parsing Comment\n"); | 5431 "HPP: Parsing Comment\n"); |
| 4837 #endif | 5432 #endif |
| 4838 htmlParseComment(ctxt); | 5433 htmlParseComment(ctxt); |
| 4839 ctxt->instate = XML_PARSER_EPILOG; | 5434 ctxt->instate = XML_PARSER_EPILOG; |
| 4840 } else if ((cur == '<') && (next == '?')) { | 5435 } else if ((cur == '<') && (next == '?')) { |
| 4841 if ((!terminate) && | 5436 if ((!terminate) && |
| 4842 » » (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0)) | 5437 » » (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)) |
| 4843 goto done; | 5438 goto done; |
| 4844 #ifdef DEBUG_PUSH | 5439 #ifdef DEBUG_PUSH |
| 4845 xmlGenericError(xmlGenericErrorContext, | 5440 xmlGenericError(xmlGenericErrorContext, |
| 4846 "HPP: Parsing PI\n"); | 5441 "HPP: Parsing PI\n"); |
| 4847 #endif | 5442 #endif |
| 4848 htmlParsePI(ctxt); | 5443 htmlParsePI(ctxt); |
| 4849 ctxt->instate = XML_PARSER_EPILOG; | 5444 ctxt->instate = XML_PARSER_EPILOG; |
| 4850 } else if ((cur == '<') && (next == '!') && | 5445 } else if ((cur == '<') && (next == '!') && |
| 4851 (avail < 4)) { | 5446 (avail < 4)) { |
| 4852 goto done; | 5447 goto done; |
| (...skipping 29 matching lines...) Expand all Loading... |
| 4882 if (in->cur[1] == '/') { | 5477 if (in->cur[1] == '/') { |
| 4883 ctxt->instate = XML_PARSER_END_TAG; | 5478 ctxt->instate = XML_PARSER_END_TAG; |
| 4884 ctxt->checkIndex = 0; | 5479 ctxt->checkIndex = 0; |
| 4885 #ifdef DEBUG_PUSH | 5480 #ifdef DEBUG_PUSH |
| 4886 xmlGenericError(xmlGenericErrorContext, | 5481 xmlGenericError(xmlGenericErrorContext, |
| 4887 "HPP: entering END_TAG\n"); | 5482 "HPP: entering END_TAG\n"); |
| 4888 #endif | 5483 #endif |
| 4889 break; | 5484 break; |
| 4890 } | 5485 } |
| 4891 if ((!terminate) && | 5486 if ((!terminate) && |
| 4892 » » (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0)) | 5487 » » (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)) |
| 4893 goto done; | 5488 goto done; |
| 4894 | 5489 |
| 4895 failed = htmlParseStartTag(ctxt); | 5490 failed = htmlParseStartTag(ctxt); |
| 4896 name = ctxt->name; | 5491 name = ctxt->name; |
| 4897 if ((failed == -1) || | 5492 if ((failed == -1) || |
| 4898 (name == NULL)) { | 5493 (name == NULL)) { |
| 4899 if (CUR == '>') | 5494 if (CUR == '>') |
| 4900 NEXT; | 5495 NEXT; |
| 4901 break; | 5496 break; |
| 4902 } | 5497 } |
| (...skipping 26 matching lines...) Expand all Loading... |
| 4929 if (CUR == '>') { | 5524 if (CUR == '>') { |
| 4930 NEXT; | 5525 NEXT; |
| 4931 } else { | 5526 } else { |
| 4932 htmlParseErr(ctxt, XML_ERR_GT_REQUIRED, | 5527 htmlParseErr(ctxt, XML_ERR_GT_REQUIRED, |
| 4933 "Couldn't find end of Start Tag %s\n", | 5528 "Couldn't find end of Start Tag %s\n", |
| 4934 name, NULL); | 5529 name, NULL); |
| 4935 | 5530 |
| 4936 /* | 5531 /* |
| 4937 * end of parsing of this node. | 5532 * end of parsing of this node. |
| 4938 */ | 5533 */ |
| 4939 » » if (xmlStrEqual(name, ctxt->name)) { | 5534 » » if (xmlStrEqual(name, ctxt->name)) { |
| 4940 nodePop(ctxt); | 5535 nodePop(ctxt); |
| 4941 htmlnamePop(ctxt); | 5536 htmlnamePop(ctxt); |
| 4942 » » } | 5537 » » } |
| 4943 | 5538 |
| 4944 ctxt->instate = XML_PARSER_CONTENT; | 5539 ctxt->instate = XML_PARSER_CONTENT; |
| 4945 #ifdef DEBUG_PUSH | 5540 #ifdef DEBUG_PUSH |
| 4946 xmlGenericError(xmlGenericErrorContext, | 5541 xmlGenericError(xmlGenericErrorContext, |
| 4947 "HPP: entering CONTENT\n"); | 5542 "HPP: entering CONTENT\n"); |
| 4948 #endif | 5543 #endif |
| 4949 break; | 5544 break; |
| 4950 } | 5545 } |
| 4951 | 5546 |
| 4952 /* | 5547 /* |
| (...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5007 cons = ctxt->nbChars; | 5602 cons = ctxt->nbChars; |
| 5008 if ((xmlStrEqual(ctxt->name, BAD_CAST"script")) || | 5603 if ((xmlStrEqual(ctxt->name, BAD_CAST"script")) || |
| 5009 (xmlStrEqual(ctxt->name, BAD_CAST"style"))) { | 5604 (xmlStrEqual(ctxt->name, BAD_CAST"style"))) { |
| 5010 /* | 5605 /* |
| 5011 * Handle SCRIPT/STYLE separately | 5606 * Handle SCRIPT/STYLE separately |
| 5012 */ | 5607 */ |
| 5013 if (!terminate) { | 5608 if (!terminate) { |
| 5014 int idx; | 5609 int idx; |
| 5015 xmlChar val; | 5610 xmlChar val; |
| 5016 | 5611 |
| 5017 » » » idx = htmlParseLookupSequence(ctxt, '<', '/', 0, 0); | 5612 » » » idx = htmlParseLookupSequence(ctxt, '<', '/', 0, 0, 1); |
| 5018 if (idx < 0) | 5613 if (idx < 0) |
| 5019 goto done; | 5614 goto done; |
| 5020 val = in->cur[idx + 2]; | 5615 val = in->cur[idx + 2]; |
| 5021 if (val == 0) /* bad cut of input */ | 5616 if (val == 0) /* bad cut of input */ |
| 5022 goto done; | 5617 goto done; |
| 5023 } | 5618 } |
| 5024 htmlParseScript(ctxt); | 5619 htmlParseScript(ctxt); |
| 5025 if ((cur == '<') && (next == '/')) { | 5620 if ((cur == '<') && (next == '/')) { |
| 5026 ctxt->instate = XML_PARSER_END_TAG; | 5621 ctxt->instate = XML_PARSER_END_TAG; |
| 5027 ctxt->checkIndex = 0; | 5622 ctxt->checkIndex = 0; |
| 5028 #ifdef DEBUG_PUSH | 5623 #ifdef DEBUG_PUSH |
| 5029 xmlGenericError(xmlGenericErrorContext, | 5624 xmlGenericError(xmlGenericErrorContext, |
| 5030 "HPP: entering END_TAG\n"); | 5625 "HPP: entering END_TAG\n"); |
| 5031 #endif | 5626 #endif |
| 5032 break; | 5627 break; |
| 5033 } | 5628 } |
| 5034 } else { | 5629 } else { |
| 5035 /* | 5630 /* |
| 5036 * Sometimes DOCTYPE arrives in the middle of the document | 5631 * Sometimes DOCTYPE arrives in the middle of the document |
| 5037 */ | 5632 */ |
| 5038 if ((cur == '<') && (next == '!') && | 5633 if ((cur == '<') && (next == '!') && |
| 5039 (UPP(2) == 'D') && (UPP(3) == 'O') && | 5634 (UPP(2) == 'D') && (UPP(3) == 'O') && |
| 5040 (UPP(4) == 'C') && (UPP(5) == 'T') && | 5635 (UPP(4) == 'C') && (UPP(5) == 'T') && |
| 5041 (UPP(6) == 'Y') && (UPP(7) == 'P') && | 5636 (UPP(6) == 'Y') && (UPP(7) == 'P') && |
| 5042 (UPP(8) == 'E')) { | 5637 (UPP(8) == 'E')) { |
| 5043 if ((!terminate) && | 5638 if ((!terminate) && |
| 5044 » » » (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0)) | 5639 » » » (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)
) |
| 5045 goto done; | 5640 goto done; |
| 5046 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR, | 5641 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR, |
| 5047 "Misplaced DOCTYPE declaration\n", | 5642 "Misplaced DOCTYPE declaration\n", |
| 5048 BAD_CAST "DOCTYPE" , NULL); | 5643 BAD_CAST "DOCTYPE" , NULL); |
| 5049 htmlParseDocTypeDecl(ctxt); | 5644 htmlParseDocTypeDecl(ctxt); |
| 5050 } else if ((cur == '<') && (next == '!') && | 5645 } else if ((cur == '<') && (next == '!') && |
| 5051 (in->cur[2] == '-') && (in->cur[3] == '-')) { | 5646 (in->cur[2] == '-') && (in->cur[3] == '-')) { |
| 5052 if ((!terminate) && | 5647 if ((!terminate) && |
| 5053 (htmlParseLookupSequence( | 5648 (htmlParseLookupSequence( |
| 5054 » » » » » ctxt, '-', '-', '>', 1) < 0)) | 5649 » » » » ctxt, '-', '-', '>', 1, 1) < 0)) |
| 5055 goto done; | 5650 goto done; |
| 5056 #ifdef DEBUG_PUSH | 5651 #ifdef DEBUG_PUSH |
| 5057 xmlGenericError(xmlGenericErrorContext, | 5652 xmlGenericError(xmlGenericErrorContext, |
| 5058 "HPP: Parsing Comment\n"); | 5653 "HPP: Parsing Comment\n"); |
| 5059 #endif | 5654 #endif |
| 5060 htmlParseComment(ctxt); | 5655 htmlParseComment(ctxt); |
| 5061 ctxt->instate = XML_PARSER_CONTENT; | 5656 ctxt->instate = XML_PARSER_CONTENT; |
| 5062 } else if ((cur == '<') && (next == '?')) { | 5657 } else if ((cur == '<') && (next == '?')) { |
| 5063 if ((!terminate) && | 5658 if ((!terminate) && |
| 5064 » » » (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0)) | 5659 » » » (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)
) |
| 5065 goto done; | 5660 goto done; |
| 5066 #ifdef DEBUG_PUSH | 5661 #ifdef DEBUG_PUSH |
| 5067 xmlGenericError(xmlGenericErrorContext, | 5662 xmlGenericError(xmlGenericErrorContext, |
| 5068 "HPP: Parsing PI\n"); | 5663 "HPP: Parsing PI\n"); |
| 5069 #endif | 5664 #endif |
| 5070 htmlParsePI(ctxt); | 5665 htmlParsePI(ctxt); |
| 5071 ctxt->instate = XML_PARSER_CONTENT; | 5666 ctxt->instate = XML_PARSER_CONTENT; |
| 5072 } else if ((cur == '<') && (next == '!') && (avail < 4)) { | 5667 } else if ((cur == '<') && (next == '!') && (avail < 4)) { |
| 5073 goto done; | 5668 goto done; |
| 5074 } else if ((cur == '<') && (next == '/')) { | 5669 } else if ((cur == '<') && (next == '/')) { |
| 5075 ctxt->instate = XML_PARSER_END_TAG; | 5670 ctxt->instate = XML_PARSER_END_TAG; |
| 5076 ctxt->checkIndex = 0; | 5671 ctxt->checkIndex = 0; |
| 5077 #ifdef DEBUG_PUSH | 5672 #ifdef DEBUG_PUSH |
| 5078 xmlGenericError(xmlGenericErrorContext, | 5673 xmlGenericError(xmlGenericErrorContext, |
| 5079 "HPP: entering END_TAG\n"); | 5674 "HPP: entering END_TAG\n"); |
| 5080 #endif | 5675 #endif |
| 5081 break; | 5676 break; |
| 5082 } else if (cur == '<') { | 5677 } else if (cur == '<') { |
| 5083 ctxt->instate = XML_PARSER_START_TAG; | 5678 ctxt->instate = XML_PARSER_START_TAG; |
| 5084 ctxt->checkIndex = 0; | 5679 ctxt->checkIndex = 0; |
| 5085 #ifdef DEBUG_PUSH | 5680 #ifdef DEBUG_PUSH |
| 5086 xmlGenericError(xmlGenericErrorContext, | 5681 xmlGenericError(xmlGenericErrorContext, |
| 5087 "HPP: entering START_TAG\n"); | 5682 "HPP: entering START_TAG\n"); |
| 5088 #endif | 5683 #endif |
| 5089 break; | 5684 break; |
| 5090 } else if (cur == '&') { | 5685 } else if (cur == '&') { |
| 5091 if ((!terminate) && | 5686 if ((!terminate) && |
| 5092 » » » (htmlParseLookupSequence(ctxt, ';', 0, 0, 0) < 0)) | 5687 » » » (htmlParseLookupChars(ctxt, |
| 5688 BAD_CAST "; >/", 4) < 0)) |
| 5093 goto done; | 5689 goto done; |
| 5094 #ifdef DEBUG_PUSH | 5690 #ifdef DEBUG_PUSH |
| 5095 xmlGenericError(xmlGenericErrorContext, | 5691 xmlGenericError(xmlGenericErrorContext, |
| 5096 "HPP: Parsing Reference\n"); | 5692 "HPP: Parsing Reference\n"); |
| 5097 #endif | 5693 #endif |
| 5098 /* TODO: check generation of subtrees if noent !!! */ | 5694 /* TODO: check generation of subtrees if noent !!! */ |
| 5099 htmlParseReference(ctxt); | 5695 htmlParseReference(ctxt); |
| 5100 } else { | 5696 } else { |
| 5101 /* | 5697 /* |
| 5102 * check that the text sequence is complete | 5698 * check that the text sequence is complete |
| 5103 * before handing out the data to the parser | 5699 * before handing out the data to the parser |
| 5104 * to avoid problems with erroneous end of | 5700 * to avoid problems with erroneous end of |
| 5105 * data detection. | 5701 * data detection. |
| 5106 */ | 5702 */ |
| 5107 if ((!terminate) && | 5703 if ((!terminate) && |
| 5108 » » » (htmlParseLookupSequence(ctxt, '<', 0, 0, 0) < 0)) | 5704 (htmlParseLookupChars(ctxt, BAD_CAST "<&", 2) < 0)) |
| 5109 goto done; | 5705 goto done; |
| 5110 ctxt->checkIndex = 0; | 5706 ctxt->checkIndex = 0; |
| 5111 #ifdef DEBUG_PUSH | 5707 #ifdef DEBUG_PUSH |
| 5112 xmlGenericError(xmlGenericErrorContext, | 5708 xmlGenericError(xmlGenericErrorContext, |
| 5113 "HPP: Parsing char data\n"); | 5709 "HPP: Parsing char data\n"); |
| 5114 #endif | 5710 #endif |
| 5115 htmlParseCharData(ctxt); | 5711 htmlParseCharData(ctxt); |
| 5116 } | 5712 } |
| 5117 } | 5713 } |
| 5118 if (cons == ctxt->nbChars) { | 5714 if (cons == ctxt->nbChars) { |
| 5119 if (ctxt->node != NULL) { | 5715 if (ctxt->node != NULL) { |
| 5120 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, | 5716 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, |
| 5121 "detected an error in element content\n", | 5717 "detected an error in element content\n", |
| 5122 NULL, NULL); | 5718 NULL, NULL); |
| 5123 } | 5719 } |
| 5124 NEXT; | 5720 NEXT; |
| 5125 break; | 5721 break; |
| 5126 } | 5722 } |
| 5127 | 5723 |
| 5128 break; | 5724 break; |
| 5129 } | 5725 } |
| 5130 case XML_PARSER_END_TAG: | 5726 case XML_PARSER_END_TAG: |
| 5131 if (avail < 2) | 5727 if (avail < 2) |
| 5132 goto done; | 5728 goto done; |
| 5133 if ((!terminate) && | 5729 if ((!terminate) && |
| 5134 » » (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0)) | 5730 » » (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)) |
| 5135 goto done; | 5731 goto done; |
| 5136 htmlParseEndTag(ctxt); | 5732 htmlParseEndTag(ctxt); |
| 5137 if (ctxt->nameNr == 0) { | 5733 if (ctxt->nameNr == 0) { |
| 5138 ctxt->instate = XML_PARSER_EPILOG; | 5734 ctxt->instate = XML_PARSER_EPILOG; |
| 5139 } else { | 5735 } else { |
| 5140 ctxt->instate = XML_PARSER_CONTENT; | 5736 ctxt->instate = XML_PARSER_CONTENT; |
| 5141 } | 5737 } |
| 5142 ctxt->checkIndex = 0; | 5738 ctxt->checkIndex = 0; |
| 5143 #ifdef DEBUG_PUSH | 5739 #ifdef DEBUG_PUSH |
| 5144 xmlGenericError(xmlGenericErrorContext, | 5740 xmlGenericError(xmlGenericErrorContext, |
| (...skipping 106 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5251 ctxt->instate = XML_PARSER_CONTENT; | 5847 ctxt->instate = XML_PARSER_CONTENT; |
| 5252 ctxt->checkIndex = 0; | 5848 ctxt->checkIndex = 0; |
| 5253 #ifdef DEBUG_PUSH | 5849 #ifdef DEBUG_PUSH |
| 5254 xmlGenericError(xmlGenericErrorContext, | 5850 xmlGenericError(xmlGenericErrorContext, |
| 5255 "HPP: entering CONTENT\n"); | 5851 "HPP: entering CONTENT\n"); |
| 5256 #endif | 5852 #endif |
| 5257 break; | 5853 break; |
| 5258 | 5854 |
| 5259 } | 5855 } |
| 5260 } | 5856 } |
| 5261 done: | 5857 done: |
| 5262 if ((avail == 0) && (terminate)) { | 5858 if ((avail == 0) && (terminate)) { |
| 5263 htmlAutoCloseOnEnd(ctxt); | 5859 htmlAutoCloseOnEnd(ctxt); |
| 5264 » if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) { | 5860 » if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) { |
| 5265 /* | 5861 /* |
| 5266 * SAX: end of the document processing. | 5862 * SAX: end of the document processing. |
| 5267 */ | 5863 */ |
| 5268 ctxt->instate = XML_PARSER_EOF; | 5864 ctxt->instate = XML_PARSER_EOF; |
| 5269 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) | 5865 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) |
| 5270 ctxt->sax->endDocument(ctxt->userData); | 5866 ctxt->sax->endDocument(ctxt->userData); |
| 5271 } | 5867 } |
| 5272 } | 5868 } |
| 5273 if ((ctxt->myDoc != NULL) && | 5869 if ((ctxt->myDoc != NULL) && |
| 5274 ((terminate) || (ctxt->instate == XML_PARSER_EOF) || | 5870 ((terminate) || (ctxt->instate == XML_PARSER_EOF) || |
| 5275 (ctxt->instate == XML_PARSER_EPILOG))) { | 5871 (ctxt->instate == XML_PARSER_EPILOG))) { |
| 5276 xmlDtdPtr dtd; | 5872 xmlDtdPtr dtd; |
| 5277 dtd = xmlGetIntSubset(ctxt->myDoc); | 5873 dtd = xmlGetIntSubset(ctxt->myDoc); |
| 5278 if (dtd == NULL) | 5874 if (dtd == NULL) |
| 5279 » ctxt->myDoc->intSubset = | 5875 » ctxt->myDoc->intSubset = |
| 5280 » » xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html", | 5876 » » xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html", |
| 5281 BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN", | 5877 BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN", |
| 5282 BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd"); | 5878 BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd"); |
| 5283 } | 5879 } |
| 5284 #ifdef DEBUG_PUSH | 5880 #ifdef DEBUG_PUSH |
| 5285 xmlGenericError(xmlGenericErrorContext, "HPP: done %d\n", ret); | 5881 xmlGenericError(xmlGenericErrorContext, "HPP: done %d\n", ret); |
| 5286 #endif | 5882 #endif |
| 5287 return(ret); | 5883 return(ret); |
| 5288 } | 5884 } |
| 5289 | 5885 |
| 5290 /** | 5886 /** |
| (...skipping 13 matching lines...) Expand all Loading... |
| 5304 if ((ctxt == NULL) || (ctxt->input == NULL)) { | 5900 if ((ctxt == NULL) || (ctxt->input == NULL)) { |
| 5305 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, | 5901 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, |
| 5306 "htmlParseChunk: context error\n", NULL, NULL); | 5902 "htmlParseChunk: context error\n", NULL, NULL); |
| 5307 return(XML_ERR_INTERNAL_ERROR); | 5903 return(XML_ERR_INTERNAL_ERROR); |
| 5308 } | 5904 } |
| 5309 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && | 5905 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && |
| 5310 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { | 5906 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { |
| 5311 int base = ctxt->input->base - ctxt->input->buf->buffer->content; | 5907 int base = ctxt->input->base - ctxt->input->buf->buffer->content; |
| 5312 int cur = ctxt->input->cur - ctxt->input->base; | 5908 int cur = ctxt->input->cur - ctxt->input->base; |
| 5313 int res; | 5909 int res; |
| 5314 » | 5910 |
| 5315 » res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);» | 5911 » res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk); |
| 5316 if (res < 0) { | 5912 if (res < 0) { |
| 5317 ctxt->errNo = XML_PARSER_EOF; | 5913 ctxt->errNo = XML_PARSER_EOF; |
| 5318 ctxt->disableSAX = 1; | 5914 ctxt->disableSAX = 1; |
| 5319 return (XML_PARSER_EOF); | 5915 return (XML_PARSER_EOF); |
| 5320 } | 5916 } |
| 5321 ctxt->input->base = ctxt->input->buf->buffer->content + base; | 5917 ctxt->input->base = ctxt->input->buf->buffer->content + base; |
| 5322 ctxt->input->cur = ctxt->input->base + cur; | 5918 ctxt->input->cur = ctxt->input->base + cur; |
| 5323 ctxt->input->end = | 5919 ctxt->input->end = |
| 5324 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; | 5920 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; |
| 5325 #ifdef DEBUG_PUSH | 5921 #ifdef DEBUG_PUSH |
| 5326 xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size); | 5922 xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size); |
| 5327 #endif | 5923 #endif |
| 5328 | 5924 |
| 5329 #if 0 | 5925 #if 0 |
| 5330 if ((terminate) || (ctxt->input->buf->buffer->use > 80)) | 5926 if ((terminate) || (ctxt->input->buf->buffer->use > 80)) |
| 5331 htmlParseTryOrFinish(ctxt, terminate); | 5927 htmlParseTryOrFinish(ctxt, terminate); |
| 5332 #endif | 5928 #endif |
| 5333 } else if (ctxt->instate != XML_PARSER_EOF) { | 5929 } else if (ctxt->instate != XML_PARSER_EOF) { |
| 5334 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) { | 5930 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) { |
| 5335 xmlParserInputBufferPtr in = ctxt->input->buf; | 5931 xmlParserInputBufferPtr in = ctxt->input->buf; |
| 5336 if ((in->encoder != NULL) && (in->buffer != NULL) && | 5932 if ((in->encoder != NULL) && (in->buffer != NULL) && |
| 5337 (in->raw != NULL)) { | 5933 (in->raw != NULL)) { |
| 5338 int nbchars; | 5934 int nbchars; |
| 5339 » » | 5935 |
| 5340 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw); | 5936 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw); |
| 5341 if (nbchars < 0) { | 5937 if (nbchars < 0) { |
| 5342 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, | 5938 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, |
| 5343 "encoder error\n", NULL, NULL); | 5939 "encoder error\n", NULL, NULL); |
| 5344 return(XML_ERR_INVALID_ENCODING); | 5940 return(XML_ERR_INVALID_ENCODING); |
| 5345 } | 5941 } |
| 5346 } | 5942 } |
| 5347 } | 5943 } |
| 5348 } | 5944 } |
| 5349 htmlParseTryOrFinish(ctxt, terminate); | 5945 htmlParseTryOrFinish(ctxt, terminate); |
| 5350 if (terminate) { | 5946 if (terminate) { |
| 5351 if ((ctxt->instate != XML_PARSER_EOF) && | 5947 if ((ctxt->instate != XML_PARSER_EOF) && |
| 5352 (ctxt->instate != XML_PARSER_EPILOG) && | 5948 (ctxt->instate != XML_PARSER_EPILOG) && |
| 5353 (ctxt->instate != XML_PARSER_MISC)) { | 5949 (ctxt->instate != XML_PARSER_MISC)) { |
| 5354 ctxt->errNo = XML_ERR_DOCUMENT_END; | 5950 ctxt->errNo = XML_ERR_DOCUMENT_END; |
| 5355 ctxt->wellFormed = 0; | 5951 ctxt->wellFormed = 0; |
| 5356 » } | 5952 » } |
| 5357 if (ctxt->instate != XML_PARSER_EOF) { | 5953 if (ctxt->instate != XML_PARSER_EOF) { |
| 5358 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) | 5954 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) |
| 5359 ctxt->sax->endDocument(ctxt->userData); | 5955 ctxt->sax->endDocument(ctxt->userData); |
| 5360 } | 5956 } |
| 5361 ctxt->instate = XML_PARSER_EOF; | 5957 ctxt->instate = XML_PARSER_EOF; |
| 5362 } | 5958 } |
| 5363 return((xmlParserErrors) ctxt->errNo);» | 5959 return((xmlParserErrors) ctxt->errNo); |
| 5364 } | 5960 } |
| 5365 | 5961 |
| 5366 /************************************************************************ | 5962 /************************************************************************ |
| 5367 * * | 5963 * * |
| 5368 * User entry points * | 5964 * User entry points * |
| 5369 * * | 5965 * * |
| 5370 ************************************************************************/ | 5966 ************************************************************************/ |
| 5371 | 5967 |
| 5372 /** | 5968 /** |
| 5373 * htmlCreatePushParserCtxt: | 5969 * htmlCreatePushParserCtxt: |
| 5374 * @sax: a SAX handler | 5970 * @sax: a SAX handler |
| 5375 * @user_data: The user data returned on SAX callbacks | 5971 * @user_data: The user data returned on SAX callbacks |
| 5376 * @chunk: a pointer to an array of chars | 5972 * @chunk: a pointer to an array of chars |
| 5377 * @size: number of chars in the array | 5973 * @size: number of chars in the array |
| 5378 * @filename: an optional file name or URI | 5974 * @filename: an optional file name or URI |
| 5379 * @enc: an optional encoding | 5975 * @enc: an optional encoding |
| 5380 * | 5976 * |
| 5381 * Create a parser context for using the HTML parser in push mode | 5977 * Create a parser context for using the HTML parser in push mode |
| 5382 * The value of @filename is used for fetching external entities | 5978 * The value of @filename is used for fetching external entities |
| 5383 * and error/warning reports. | 5979 * and error/warning reports. |
| 5384 * | 5980 * |
| 5385 * Returns the new parser context or NULL | 5981 * Returns the new parser context or NULL |
| 5386 */ | 5982 */ |
| 5387 htmlParserCtxtPtr | 5983 htmlParserCtxtPtr |
| 5388 htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax, void *user_data, | 5984 htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax, void *user_data, |
| 5389 const char *chunk, int size, const char *filename, | 5985 const char *chunk, int size, const char *filename, |
| 5390 xmlCharEncoding enc) { | 5986 xmlCharEncoding enc) { |
| 5391 htmlParserCtxtPtr ctxt; | 5987 htmlParserCtxtPtr ctxt; |
| 5392 htmlParserInputPtr inputStream; | 5988 htmlParserInputPtr inputStream; |
| 5393 xmlParserInputBufferPtr buf; | 5989 xmlParserInputBufferPtr buf; |
| 5394 | 5990 |
| 5395 xmlInitParser(); | 5991 xmlInitParser(); |
| 5396 | 5992 |
| 5397 buf = xmlAllocParserInputBuffer(enc); | 5993 buf = xmlAllocParserInputBuffer(enc); |
| 5398 if (buf == NULL) return(NULL); | 5994 if (buf == NULL) return(NULL); |
| (...skipping 10 matching lines...) Expand all Loading... |
| 5409 xmlFree(ctxt->sax); | 6005 xmlFree(ctxt->sax); |
| 5410 ctxt->sax = (htmlSAXHandlerPtr) xmlMalloc(sizeof(htmlSAXHandler)); | 6006 ctxt->sax = (htmlSAXHandlerPtr) xmlMalloc(sizeof(htmlSAXHandler)); |
| 5411 if (ctxt->sax == NULL) { | 6007 if (ctxt->sax == NULL) { |
| 5412 xmlFree(buf); | 6008 xmlFree(buf); |
| 5413 xmlFree(ctxt); | 6009 xmlFree(ctxt); |
| 5414 return(NULL); | 6010 return(NULL); |
| 5415 } | 6011 } |
| 5416 memcpy(ctxt->sax, sax, sizeof(htmlSAXHandler)); | 6012 memcpy(ctxt->sax, sax, sizeof(htmlSAXHandler)); |
| 5417 if (user_data != NULL) | 6013 if (user_data != NULL) |
| 5418 ctxt->userData = user_data; | 6014 ctxt->userData = user_data; |
| 5419 }» | 6015 } |
| 5420 if (filename == NULL) { | 6016 if (filename == NULL) { |
| 5421 ctxt->directory = NULL; | 6017 ctxt->directory = NULL; |
| 5422 } else { | 6018 } else { |
| 5423 ctxt->directory = xmlParserGetDirectory(filename); | 6019 ctxt->directory = xmlParserGetDirectory(filename); |
| 5424 } | 6020 } |
| 5425 | 6021 |
| 5426 inputStream = htmlNewInputStream(ctxt); | 6022 inputStream = htmlNewInputStream(ctxt); |
| 5427 if (inputStream == NULL) { | 6023 if (inputStream == NULL) { |
| 5428 xmlFreeParserCtxt(ctxt); | 6024 xmlFreeParserCtxt(ctxt); |
| 5429 xmlFree(buf); | 6025 xmlFree(buf); |
| 5430 return(NULL); | 6026 return(NULL); |
| 5431 } | 6027 } |
| 5432 | 6028 |
| 5433 if (filename == NULL) | 6029 if (filename == NULL) |
| 5434 inputStream->filename = NULL; | 6030 inputStream->filename = NULL; |
| 5435 else | 6031 else |
| 5436 inputStream->filename = (char *) | 6032 inputStream->filename = (char *) |
| 5437 xmlCanonicPath((const xmlChar *) filename); | 6033 xmlCanonicPath((const xmlChar *) filename); |
| 5438 inputStream->buf = buf; | 6034 inputStream->buf = buf; |
| 5439 inputStream->base = inputStream->buf->buffer->content; | 6035 inputStream->base = inputStream->buf->buffer->content; |
| 5440 inputStream->cur = inputStream->buf->buffer->content; | 6036 inputStream->cur = inputStream->buf->buffer->content; |
| 5441 inputStream->end = | 6037 inputStream->end = |
| 5442 &inputStream->buf->buffer->content[inputStream->buf->buffer->use]; | 6038 &inputStream->buf->buffer->content[inputStream->buf->buffer->use]; |
| 5443 | 6039 |
| 5444 inputPush(ctxt, inputStream); | 6040 inputPush(ctxt, inputStream); |
| 5445 | 6041 |
| 5446 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && | 6042 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && |
| 5447 (ctxt->input->buf != NULL)) {» | 6043 (ctxt->input->buf != NULL)) { |
| 5448 int base = ctxt->input->base - ctxt->input->buf->buffer->content; | 6044 int base = ctxt->input->base - ctxt->input->buf->buffer->content; |
| 5449 int cur = ctxt->input->cur - ctxt->input->base; | 6045 int cur = ctxt->input->cur - ctxt->input->base; |
| 5450 | 6046 |
| 5451 » xmlParserInputBufferPush(ctxt->input->buf, size, chunk);» | 6047 » xmlParserInputBufferPush(ctxt->input->buf, size, chunk); |
| 5452 | 6048 |
| 5453 ctxt->input->base = ctxt->input->buf->buffer->content + base; | 6049 ctxt->input->base = ctxt->input->buf->buffer->content + base; |
| 5454 ctxt->input->cur = ctxt->input->base + cur; | 6050 ctxt->input->cur = ctxt->input->base + cur; |
| 5455 ctxt->input->end = | 6051 ctxt->input->end = |
| 5456 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; | 6052 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; |
| 5457 #ifdef DEBUG_PUSH | 6053 #ifdef DEBUG_PUSH |
| 5458 xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size); | 6054 xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size); |
| 5459 #endif | 6055 #endif |
| 5460 } | 6056 } |
| 5461 ctxt->progressive = 1; | 6057 ctxt->progressive = 1; |
| 5462 | 6058 |
| 5463 return(ctxt); | 6059 return(ctxt); |
| 5464 } | 6060 } |
| 5465 #endif /* LIBXML_PUSH_ENABLED */ | 6061 #endif /* LIBXML_PUSH_ENABLED */ |
| 5466 | 6062 |
| 5467 /** | 6063 /** |
| 5468 * htmlSAXParseDoc: | 6064 * htmlSAXParseDoc: |
| 5469 * @cur: a pointer to an array of xmlChar | 6065 * @cur: a pointer to an array of xmlChar |
| 5470 * @encoding: a free form C string describing the HTML document encoding, or NU
LL | 6066 * @encoding: a free form C string describing the HTML document encoding, or NU
LL |
| 5471 * @sax: the SAX handler block | 6067 * @sax: the SAX handler block |
| 5472 * @userData: if using SAX, this pointer will be provided on callbacks. | 6068 * @userData: if using SAX, this pointer will be provided on callbacks. |
| 5473 * | 6069 * |
| 5474 * Parse an HTML in-memory document. If sax is not NULL, use the SAX callbacks | 6070 * Parse an HTML in-memory document. If sax is not NULL, use the SAX callbacks |
| 5475 * to handle parse events. If sax is NULL, fallback to the default DOM | 6071 * to handle parse events. If sax is NULL, fallback to the default DOM |
| 5476 * behavior and return a tree. | 6072 * behavior and return a tree. |
| 5477 * | 6073 * |
| 5478 * Returns the resulting document tree unless SAX is NULL or the document is | 6074 * Returns the resulting document tree unless SAX is NULL or the document is |
| 5479 * not well formed. | 6075 * not well formed. |
| 5480 */ | 6076 */ |
| 5481 | 6077 |
| 5482 htmlDocPtr | 6078 htmlDocPtr |
| 5483 htmlSAXParseDoc(xmlChar *cur, const char *encoding, htmlSAXHandlerPtr sax, void
*userData) { | 6079 htmlSAXParseDoc(xmlChar *cur, const char *encoding, htmlSAXHandlerPtr sax, void
*userData) { |
| 5484 htmlDocPtr ret; | 6080 htmlDocPtr ret; |
| 5485 htmlParserCtxtPtr ctxt; | 6081 htmlParserCtxtPtr ctxt; |
| 5486 | 6082 |
| 5487 xmlInitParser(); | 6083 xmlInitParser(); |
| 5488 | 6084 |
| 5489 if (cur == NULL) return(NULL); | 6085 if (cur == NULL) return(NULL); |
| 5490 | 6086 |
| 5491 | 6087 |
| 5492 ctxt = htmlCreateDocParserCtxt(cur, encoding); | 6088 ctxt = htmlCreateDocParserCtxt(cur, encoding); |
| 5493 if (ctxt == NULL) return(NULL); | 6089 if (ctxt == NULL) return(NULL); |
| 5494 if (sax != NULL) { | 6090 if (sax != NULL) { |
| 5495 if (ctxt->sax != NULL) xmlFree (ctxt->sax); | 6091 if (ctxt->sax != NULL) xmlFree (ctxt->sax); |
| 5496 ctxt->sax = sax; | 6092 ctxt->sax = sax; |
| 5497 ctxt->userData = userData; | 6093 ctxt->userData = userData; |
| 5498 } | 6094 } |
| 5499 | 6095 |
| 5500 htmlParseDocument(ctxt); | 6096 htmlParseDocument(ctxt); |
| 5501 ret = ctxt->myDoc; | 6097 ret = ctxt->myDoc; |
| 5502 if (sax != NULL) { | 6098 if (sax != NULL) { |
| 5503 ctxt->sax = NULL; | 6099 ctxt->sax = NULL; |
| 5504 ctxt->userData = NULL; | 6100 ctxt->userData = NULL; |
| 5505 } | 6101 } |
| 5506 htmlFreeParserCtxt(ctxt); | 6102 htmlFreeParserCtxt(ctxt); |
| 5507 | 6103 |
| 5508 return(ret); | 6104 return(ret); |
| 5509 } | 6105 } |
| 5510 | 6106 |
| 5511 /** | 6107 /** |
| 5512 * htmlParseDoc: | 6108 * htmlParseDoc: |
| 5513 * @cur: a pointer to an array of xmlChar | 6109 * @cur: a pointer to an array of xmlChar |
| 5514 * @encoding: a free form C string describing the HTML document encoding, or NU
LL | 6110 * @encoding: a free form C string describing the HTML document encoding, or NU
LL |
| 5515 * | 6111 * |
| 5516 * parse an HTML in-memory document and build a tree. | 6112 * parse an HTML in-memory document and build a tree. |
| 5517 * | 6113 * |
| 5518 * Returns the resulting document tree | 6114 * Returns the resulting document tree |
| 5519 */ | 6115 */ |
| 5520 | 6116 |
| 5521 htmlDocPtr | 6117 htmlDocPtr |
| 5522 htmlParseDoc(xmlChar *cur, const char *encoding) { | 6118 htmlParseDoc(xmlChar *cur, const char *encoding) { |
| 5523 return(htmlSAXParseDoc(cur, encoding, NULL, NULL)); | 6119 return(htmlSAXParseDoc(cur, encoding, NULL, NULL)); |
| 5524 } | 6120 } |
| 5525 | 6121 |
| 5526 | 6122 |
| 5527 /** | 6123 /** |
| 5528 * htmlCreateFileParserCtxt: | 6124 * htmlCreateFileParserCtxt: |
| 5529 * @filename: the filename | 6125 * @filename: the filename |
| 5530 * @encoding: a free form C string describing the HTML document encoding, or NU
LL | 6126 * @encoding: a free form C string describing the HTML document encoding, or NU
LL |
| 5531 * | 6127 * |
| 5532 * Create a parser context for a file content. | 6128 * Create a parser context for a file content. |
| 5533 * Automatic support for ZLIB/Compress compressed document is provided | 6129 * Automatic support for ZLIB/Compress compressed document is provided |
| 5534 * by default if found at compile-time. | 6130 * by default if found at compile-time. |
| 5535 * | 6131 * |
| 5536 * Returns the new parser context or NULL | 6132 * Returns the new parser context or NULL |
| 5537 */ | 6133 */ |
| 5538 htmlParserCtxtPtr | 6134 htmlParserCtxtPtr |
| 5539 htmlCreateFileParserCtxt(const char *filename, const char *encoding) | 6135 htmlCreateFileParserCtxt(const char *filename, const char *encoding) |
| 5540 { | 6136 { |
| 5541 htmlParserCtxtPtr ctxt; | 6137 htmlParserCtxtPtr ctxt; |
| 5542 htmlParserInputPtr inputStream; | 6138 htmlParserInputPtr inputStream; |
| (...skipping 11 matching lines...) Expand all Loading... |
| 5554 canonicFilename = (char *) xmlCanonicPath((const xmlChar *) filename); | 6150 canonicFilename = (char *) xmlCanonicPath((const xmlChar *) filename); |
| 5555 if (canonicFilename == NULL) { | 6151 if (canonicFilename == NULL) { |
| 5556 #ifdef LIBXML_SAX1_ENABLED | 6152 #ifdef LIBXML_SAX1_ENABLED |
| 5557 if (xmlDefaultSAXHandler.error != NULL) { | 6153 if (xmlDefaultSAXHandler.error != NULL) { |
| 5558 xmlDefaultSAXHandler.error(NULL, "out of memory\n"); | 6154 xmlDefaultSAXHandler.error(NULL, "out of memory\n"); |
| 5559 } | 6155 } |
| 5560 #endif | 6156 #endif |
| 5561 xmlFreeParserCtxt(ctxt); | 6157 xmlFreeParserCtxt(ctxt); |
| 5562 return(NULL); | 6158 return(NULL); |
| 5563 } | 6159 } |
| 5564 | 6160 |
| 5565 inputStream = xmlLoadExternalEntity(canonicFilename, NULL, ctxt); | 6161 inputStream = xmlLoadExternalEntity(canonicFilename, NULL, ctxt); |
| 5566 xmlFree(canonicFilename); | 6162 xmlFree(canonicFilename); |
| 5567 if (inputStream == NULL) { | 6163 if (inputStream == NULL) { |
| 5568 xmlFreeParserCtxt(ctxt); | 6164 xmlFreeParserCtxt(ctxt); |
| 5569 return(NULL); | 6165 return(NULL); |
| 5570 } | 6166 } |
| 5571 | 6167 |
| 5572 inputPush(ctxt, inputStream); | 6168 inputPush(ctxt, inputStream); |
| 5573 | 6169 |
| 5574 /* set encoding */ | 6170 /* set encoding */ |
| 5575 if (encoding) { | 6171 if (encoding) { |
| 5576 content = xmlMallocAtomic (xmlStrlen(content_line) + strlen(encoding) +
1); | 6172 content = xmlMallocAtomic (xmlStrlen(content_line) + strlen(encoding) +
1); |
| 5577 » if (content) { | 6173 » if (content) { |
| 5578 strcpy ((char *)content, (char *)content_line); | 6174 strcpy ((char *)content, (char *)content_line); |
| 5579 strcat ((char *)content, (char *)encoding); | 6175 strcat ((char *)content, (char *)encoding); |
| 5580 htmlCheckEncoding (ctxt, content); | 6176 htmlCheckEncoding (ctxt, content); |
| 5581 xmlFree (content); | 6177 xmlFree (content); |
| 5582 } | 6178 } |
| 5583 } | 6179 } |
| 5584 | 6180 |
| 5585 return(ctxt); | 6181 return(ctxt); |
| 5586 } | 6182 } |
| 5587 | 6183 |
| 5588 /** | 6184 /** |
| 5589 * htmlSAXParseFile: | 6185 * htmlSAXParseFile: |
| 5590 * @filename: the filename | 6186 * @filename: the filename |
| 5591 * @encoding: a free form C string describing the HTML document encoding, or NU
LL | 6187 * @encoding: a free form C string describing the HTML document encoding, or NU
LL |
| 5592 * @sax: the SAX handler block | 6188 * @sax: the SAX handler block |
| 5593 * @userData: if using SAX, this pointer will be provided on callbacks. | 6189 * @userData: if using SAX, this pointer will be provided on callbacks. |
| 5594 * | 6190 * |
| 5595 * parse an HTML file and build a tree. Automatic support for ZLIB/Compress | 6191 * parse an HTML file and build a tree. Automatic support for ZLIB/Compress |
| 5596 * compressed document is provided by default if found at compile-time. | 6192 * compressed document is provided by default if found at compile-time. |
| 5597 * It use the given SAX function block to handle the parsing callback. | 6193 * It use the given SAX function block to handle the parsing callback. |
| 5598 * If sax is NULL, fallback to the default DOM tree building routines. | 6194 * If sax is NULL, fallback to the default DOM tree building routines. |
| 5599 * | 6195 * |
| 5600 * Returns the resulting document tree unless SAX is NULL or the document is | 6196 * Returns the resulting document tree unless SAX is NULL or the document is |
| 5601 * not well formed. | 6197 * not well formed. |
| 5602 */ | 6198 */ |
| 5603 | 6199 |
| 5604 htmlDocPtr | 6200 htmlDocPtr |
| 5605 htmlSAXParseFile(const char *filename, const char *encoding, htmlSAXHandlerPtr s
ax, | 6201 htmlSAXParseFile(const char *filename, const char *encoding, htmlSAXHandlerPtr s
ax, |
| 5606 void *userData) { | 6202 void *userData) { |
| 5607 htmlDocPtr ret; | 6203 htmlDocPtr ret; |
| 5608 htmlParserCtxtPtr ctxt; | 6204 htmlParserCtxtPtr ctxt; |
| 5609 htmlSAXHandlerPtr oldsax = NULL; | 6205 htmlSAXHandlerPtr oldsax = NULL; |
| 5610 | 6206 |
| 5611 xmlInitParser(); | 6207 xmlInitParser(); |
| 5612 | 6208 |
| 5613 ctxt = htmlCreateFileParserCtxt(filename, encoding); | 6209 ctxt = htmlCreateFileParserCtxt(filename, encoding); |
| 5614 if (ctxt == NULL) return(NULL); | 6210 if (ctxt == NULL) return(NULL); |
| 5615 if (sax != NULL) { | 6211 if (sax != NULL) { |
| 5616 oldsax = ctxt->sax; | 6212 oldsax = ctxt->sax; |
| 5617 ctxt->sax = sax; | 6213 ctxt->sax = sax; |
| 5618 ctxt->userData = userData; | 6214 ctxt->userData = userData; |
| 5619 } | 6215 } |
| 5620 | 6216 |
| 5621 htmlParseDocument(ctxt); | 6217 htmlParseDocument(ctxt); |
| 5622 | 6218 |
| 5623 ret = ctxt->myDoc; | 6219 ret = ctxt->myDoc; |
| 5624 if (sax != NULL) { | 6220 if (sax != NULL) { |
| 5625 ctxt->sax = oldsax; | 6221 ctxt->sax = oldsax; |
| 5626 ctxt->userData = NULL; | 6222 ctxt->userData = NULL; |
| 5627 } | 6223 } |
| 5628 htmlFreeParserCtxt(ctxt); | 6224 htmlFreeParserCtxt(ctxt); |
| 5629 | 6225 |
| 5630 return(ret); | 6226 return(ret); |
| 5631 } | 6227 } |
| 5632 | 6228 |
| 5633 /** | 6229 /** |
| 5634 * htmlParseFile: | 6230 * htmlParseFile: |
| 5635 * @filename: the filename | 6231 * @filename: the filename |
| 5636 * @encoding: a free form C string describing the HTML document encoding, or NU
LL | 6232 * @encoding: a free form C string describing the HTML document encoding, or NU
LL |
| 5637 * | 6233 * |
| 5638 * parse an HTML file and build a tree. Automatic support for ZLIB/Compress | 6234 * parse an HTML file and build a tree. Automatic support for ZLIB/Compress |
| 5639 * compressed document is provided by default if found at compile-time. | 6235 * compressed document is provided by default if found at compile-time. |
| 5640 * | 6236 * |
| 5641 * Returns the resulting document tree | 6237 * Returns the resulting document tree |
| 5642 */ | 6238 */ |
| 5643 | 6239 |
| 5644 htmlDocPtr | 6240 htmlDocPtr |
| 5645 htmlParseFile(const char *filename, const char *encoding) { | 6241 htmlParseFile(const char *filename, const char *encoding) { |
| 5646 return(htmlSAXParseFile(filename, encoding, NULL, NULL)); | 6242 return(htmlSAXParseFile(filename, encoding, NULL, NULL)); |
| 5647 } | 6243 } |
| 5648 | 6244 |
| 5649 /** | 6245 /** |
| 5650 * htmlHandleOmittedElem: | 6246 * htmlHandleOmittedElem: |
| 5651 * @val: int 0 or 1 | 6247 * @val: int 0 or 1 |
| 5652 * | 6248 * |
| 5653 * Set and return the previous value for handling HTML omitted tags. | 6249 * Set and return the previous value for handling HTML omitted tags. |
| 5654 * | 6250 * |
| 5655 * Returns the last value for 0 for no handling, 1 for auto insertion. | 6251 * Returns the last value for 0 for no handling, 1 for auto insertion. |
| 5656 */ | 6252 */ |
| 5657 | 6253 |
| 5658 int | 6254 int |
| 5659 htmlHandleOmittedElem(int val) { | 6255 htmlHandleOmittedElem(int val) { |
| 5660 int old = htmlOmittedDefaultValue; | 6256 int old = htmlOmittedDefaultValue; |
| 5661 | 6257 |
| (...skipping 119 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5781 * * | 6377 * * |
| 5782 ************************************************************************/ | 6378 ************************************************************************/ |
| 5783 /** | 6379 /** |
| 5784 * DICT_FREE: | 6380 * DICT_FREE: |
| 5785 * @str: a string | 6381 * @str: a string |
| 5786 * | 6382 * |
| 5787 * Free a string if it is not owned by the "dict" dictionnary in the | 6383 * Free a string if it is not owned by the "dict" dictionnary in the |
| 5788 * current scope | 6384 * current scope |
| 5789 */ | 6385 */ |
| 5790 #define DICT_FREE(str) \ | 6386 #define DICT_FREE(str) \ |
| 5791 » if ((str) && ((!dict) || » » » » \ | 6387 » if ((str) && ((!dict) ||» » » » \ |
| 5792 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \ | 6388 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \ |
| 5793 xmlFree((char *)(str)); | 6389 xmlFree((char *)(str)); |
| 5794 | 6390 |
| 5795 /** | 6391 /** |
| 5796 * htmlCtxtReset: | 6392 * htmlCtxtReset: |
| 5797 * @ctxt: an HTML parser context | 6393 * @ctxt: an HTML parser context |
| 5798 * | 6394 * |
| 5799 * Reset a parser context | 6395 * Reset a parser context |
| 5800 */ | 6396 */ |
| 5801 void | 6397 void |
| 5802 htmlCtxtReset(htmlParserCtxtPtr ctxt) | 6398 htmlCtxtReset(htmlParserCtxtPtr ctxt) |
| 5803 { | 6399 { |
| 5804 xmlParserInputPtr input; | 6400 xmlParserInputPtr input; |
| 5805 xmlDictPtr dict; | 6401 xmlDictPtr dict; |
| 5806 | 6402 |
| 5807 if (ctxt == NULL) | 6403 if (ctxt == NULL) |
| 5808 return; | 6404 return; |
| 5809 | 6405 |
| 5810 xmlInitParser(); | 6406 xmlInitParser(); |
| 5811 dict = ctxt->dict; | 6407 dict = ctxt->dict; |
| 5812 | 6408 |
| 5813 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ | 6409 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ |
| 5814 xmlFreeInputStream(input); | 6410 xmlFreeInputStream(input); |
| 5815 } | 6411 } |
| 5816 ctxt->inputNr = 0; | 6412 ctxt->inputNr = 0; |
| (...skipping 106 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5923 ctxt->keepBlanks = 1; | 6519 ctxt->keepBlanks = 1; |
| 5924 if (options & HTML_PARSE_RECOVER) { | 6520 if (options & HTML_PARSE_RECOVER) { |
| 5925 ctxt->recovery = 1; | 6521 ctxt->recovery = 1; |
| 5926 options -= HTML_PARSE_RECOVER; | 6522 options -= HTML_PARSE_RECOVER; |
| 5927 } else | 6523 } else |
| 5928 ctxt->recovery = 0; | 6524 ctxt->recovery = 0; |
| 5929 if (options & HTML_PARSE_COMPACT) { | 6525 if (options & HTML_PARSE_COMPACT) { |
| 5930 ctxt->options |= HTML_PARSE_COMPACT; | 6526 ctxt->options |= HTML_PARSE_COMPACT; |
| 5931 options -= HTML_PARSE_COMPACT; | 6527 options -= HTML_PARSE_COMPACT; |
| 5932 } | 6528 } |
| 6529 if (options & XML_PARSE_HUGE) { |
| 6530 ctxt->options |= XML_PARSE_HUGE; |
| 6531 options -= XML_PARSE_HUGE; |
| 6532 } |
| 5933 ctxt->dictNames = 0; | 6533 ctxt->dictNames = 0; |
| 5934 return (options); | 6534 return (options); |
| 5935 } | 6535 } |
| 5936 | 6536 |
| 5937 /** | 6537 /** |
| 5938 * htmlDoRead: | 6538 * htmlDoRead: |
| 5939 * @ctxt: an HTML parser context | 6539 * @ctxt: an HTML parser context |
| 5940 * @URL: the base URL to use for the document | 6540 * @URL: the base URL to use for the document |
| 5941 * @encoding: the document encoding, or NULL | 6541 * @encoding: the document encoding, or NULL |
| 5942 * @options: a combination of htmlParserOption(s) | 6542 * @options: a combination of htmlParserOption(s) |
| 5943 * @reuse: keep the context for reuse | 6543 * @reuse: keep the context for reuse |
| 5944 * | 6544 * |
| 5945 * Common front-end for the htmlRead functions | 6545 * Common front-end for the htmlRead functions |
| 5946 * | 6546 * |
| 5947 * Returns the resulting document tree or NULL | 6547 * Returns the resulting document tree or NULL |
| 5948 */ | 6548 */ |
| 5949 static htmlDocPtr | 6549 static htmlDocPtr |
| 5950 htmlDoRead(htmlParserCtxtPtr ctxt, const char *URL, const char *encoding, | 6550 htmlDoRead(htmlParserCtxtPtr ctxt, const char *URL, const char *encoding, |
| 5951 int options, int reuse) | 6551 int options, int reuse) |
| 5952 { | 6552 { |
| 5953 htmlDocPtr ret; | 6553 htmlDocPtr ret; |
| 5954 | 6554 |
| 5955 htmlCtxtUseOptions(ctxt, options); | 6555 htmlCtxtUseOptions(ctxt, options); |
| 5956 ctxt->html = 1; | 6556 ctxt->html = 1; |
| 5957 if (encoding != NULL) { | 6557 if (encoding != NULL) { |
| 5958 xmlCharEncodingHandlerPtr hdlr; | 6558 xmlCharEncodingHandlerPtr hdlr; |
| 5959 | 6559 |
| 5960 hdlr = xmlFindCharEncodingHandler(encoding); | 6560 hdlr = xmlFindCharEncodingHandler(encoding); |
| 5961 » if (hdlr != NULL) | 6561 » if (hdlr != NULL) { |
| 5962 xmlSwitchToEncoding(ctxt, hdlr); | 6562 xmlSwitchToEncoding(ctxt, hdlr); |
| 6563 if (ctxt->input->encoding != NULL) |
| 6564 xmlFree((xmlChar *) ctxt->input->encoding); |
| 6565 ctxt->input->encoding = xmlStrdup((xmlChar *)encoding); |
| 6566 } |
| 5963 } | 6567 } |
| 5964 if ((URL != NULL) && (ctxt->input != NULL) && | 6568 if ((URL != NULL) && (ctxt->input != NULL) && |
| 5965 (ctxt->input->filename == NULL)) | 6569 (ctxt->input->filename == NULL)) |
| 5966 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL); | 6570 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL); |
| 5967 htmlParseDocument(ctxt); | 6571 htmlParseDocument(ctxt); |
| 5968 ret = ctxt->myDoc; | 6572 ret = ctxt->myDoc; |
| 5969 ctxt->myDoc = NULL; | 6573 ctxt->myDoc = NULL; |
| 5970 if (!reuse) { | 6574 if (!reuse) { |
| 5971 if ((ctxt->dictNames) && | 6575 if ((ctxt->dictNames) && |
| 5972 (ret != NULL) && | 6576 (ret != NULL) && |
| 5973 (ret->dict == ctxt->dict)) | 6577 (ret->dict == ctxt->dict)) |
| 5974 ctxt->dict = NULL; | 6578 ctxt->dict = NULL; |
| 5975 xmlFreeParserCtxt(ctxt); | 6579 xmlFreeParserCtxt(ctxt); |
| 5976 } | 6580 } |
| 5977 return (ret); | 6581 return (ret); |
| 5978 } | 6582 } |
| 5979 | 6583 |
| 5980 /** | 6584 /** |
| 5981 * htmlReadDoc: | 6585 * htmlReadDoc: |
| 5982 * @cur: a pointer to a zero terminated string | 6586 * @cur: a pointer to a zero terminated string |
| 5983 * @URL: the base URL to use for the document | 6587 * @URL: the base URL to use for the document |
| 5984 * @encoding: the document encoding, or NULL | 6588 * @encoding: the document encoding, or NULL |
| 5985 * @options: a combination of htmlParserOption(s) | 6589 * @options: a combination of htmlParserOption(s) |
| 5986 * | 6590 * |
| 5987 * parse an XML in-memory document and build a tree. | 6591 * parse an XML in-memory document and build a tree. |
| 5988 * | 6592 * |
| 5989 * Returns the resulting document tree | 6593 * Returns the resulting document tree |
| 5990 */ | 6594 */ |
| 5991 htmlDocPtr | 6595 htmlDocPtr |
| 5992 htmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int opti
ons) | 6596 htmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int opti
ons) |
| 5993 { | 6597 { |
| 5994 htmlParserCtxtPtr ctxt; | 6598 htmlParserCtxtPtr ctxt; |
| 5995 | 6599 |
| 5996 if (cur == NULL) | 6600 if (cur == NULL) |
| 5997 return (NULL); | 6601 return (NULL); |
| 5998 | 6602 |
| 5999 xmlInitParser(); | 6603 xmlInitParser(); |
| 6000 ctxt = htmlCreateDocParserCtxt(cur, NULL); | 6604 ctxt = htmlCreateDocParserCtxt(cur, NULL); |
| 6001 if (ctxt == NULL) | 6605 if (ctxt == NULL) |
| 6002 return (NULL); | 6606 return (NULL); |
| 6003 return (htmlDoRead(ctxt, URL, encoding, options, 0)); | 6607 return (htmlDoRead(ctxt, URL, encoding, options, 0)); |
| 6004 } | 6608 } |
| 6005 | 6609 |
| 6006 /** | 6610 /** |
| 6007 * htmlReadFile: | 6611 * htmlReadFile: |
| 6008 * @filename: a file or URL | 6612 * @filename: a file or URL |
| 6009 * @encoding: the document encoding, or NULL | 6613 * @encoding: the document encoding, or NULL |
| 6010 * @options: a combination of htmlParserOption(s) | 6614 * @options: a combination of htmlParserOption(s) |
| 6011 * | 6615 * |
| 6012 * parse an XML file from the filesystem or the network. | 6616 * parse an XML file from the filesystem or the network. |
| 6013 * | 6617 * |
| 6014 * Returns the resulting document tree | 6618 * Returns the resulting document tree |
| 6015 */ | 6619 */ |
| 6016 htmlDocPtr | 6620 htmlDocPtr |
| 6017 htmlReadFile(const char *filename, const char *encoding, int options) | 6621 htmlReadFile(const char *filename, const char *encoding, int options) |
| 6018 { | 6622 { |
| 6019 htmlParserCtxtPtr ctxt; | 6623 htmlParserCtxtPtr ctxt; |
| 6020 | 6624 |
| 6021 xmlInitParser(); | 6625 xmlInitParser(); |
| 6022 ctxt = htmlCreateFileParserCtxt(filename, encoding); | 6626 ctxt = htmlCreateFileParserCtxt(filename, encoding); |
| 6023 if (ctxt == NULL) | 6627 if (ctxt == NULL) |
| 6024 return (NULL); | 6628 return (NULL); |
| 6025 return (htmlDoRead(ctxt, NULL, NULL, options, 0)); | 6629 return (htmlDoRead(ctxt, NULL, NULL, options, 0)); |
| 6026 } | 6630 } |
| 6027 | 6631 |
| 6028 /** | 6632 /** |
| 6029 * htmlReadMemory: | 6633 * htmlReadMemory: |
| 6030 * @buffer: a pointer to a char array | 6634 * @buffer: a pointer to a char array |
| 6031 * @size: the size of the array | 6635 * @size: the size of the array |
| 6032 * @URL: the base URL to use for the document | 6636 * @URL: the base URL to use for the document |
| 6033 * @encoding: the document encoding, or NULL | 6637 * @encoding: the document encoding, or NULL |
| 6034 * @options: a combination of htmlParserOption(s) | 6638 * @options: a combination of htmlParserOption(s) |
| 6035 * | 6639 * |
| 6036 * parse an XML in-memory document and build a tree. | 6640 * parse an XML in-memory document and build a tree. |
| 6037 * | 6641 * |
| 6038 * Returns the resulting document tree | 6642 * Returns the resulting document tree |
| 6039 */ | 6643 */ |
| 6040 htmlDocPtr | 6644 htmlDocPtr |
| 6041 htmlReadMemory(const char *buffer, int size, const char *URL, const char *encodi
ng, int options) | 6645 htmlReadMemory(const char *buffer, int size, const char *URL, const char *encodi
ng, int options) |
| 6042 { | 6646 { |
| 6043 htmlParserCtxtPtr ctxt; | 6647 htmlParserCtxtPtr ctxt; |
| 6044 | 6648 |
| 6045 xmlInitParser(); | 6649 xmlInitParser(); |
| 6046 ctxt = xmlCreateMemoryParserCtxt(buffer, size); | 6650 ctxt = xmlCreateMemoryParserCtxt(buffer, size); |
| 6047 if (ctxt == NULL) | 6651 if (ctxt == NULL) |
| 6048 return (NULL); | 6652 return (NULL); |
| 6049 htmlDefaultSAXHandlerInit(); | 6653 htmlDefaultSAXHandlerInit(); |
| 6050 if (ctxt->sax != NULL) | 6654 if (ctxt->sax != NULL) |
| 6051 memcpy(ctxt->sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1)); | 6655 memcpy(ctxt->sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1)); |
| 6052 return (htmlDoRead(ctxt, URL, encoding, options, 0)); | 6656 return (htmlDoRead(ctxt, URL, encoding, options, 0)); |
| 6053 } | 6657 } |
| 6054 | 6658 |
| 6055 /** | 6659 /** |
| 6056 * htmlReadFd: | 6660 * htmlReadFd: |
| 6057 * @fd: an open file descriptor | 6661 * @fd: an open file descriptor |
| 6058 * @URL: the base URL to use for the document | 6662 * @URL: the base URL to use for the document |
| 6059 * @encoding: the document encoding, or NULL | 6663 * @encoding: the document encoding, or NULL |
| 6060 * @options: a combination of htmlParserOption(s) | 6664 * @options: a combination of htmlParserOption(s) |
| 6061 * | 6665 * |
| 6062 * parse an XML from a file descriptor and build a tree. | 6666 * parse an XML from a file descriptor and build a tree. |
| 6063 * | 6667 * |
| 6064 * Returns the resulting document tree | 6668 * Returns the resulting document tree |
| 6065 */ | 6669 */ |
| 6066 htmlDocPtr | 6670 htmlDocPtr |
| 6067 htmlReadFd(int fd, const char *URL, const char *encoding, int options) | 6671 htmlReadFd(int fd, const char *URL, const char *encoding, int options) |
| 6068 { | 6672 { |
| 6069 htmlParserCtxtPtr ctxt; | 6673 htmlParserCtxtPtr ctxt; |
| 6070 xmlParserInputBufferPtr input; | 6674 xmlParserInputBufferPtr input; |
| 6071 xmlParserInputPtr stream; | 6675 xmlParserInputPtr stream; |
| 6072 | 6676 |
| 6073 if (fd < 0) | 6677 if (fd < 0) |
| (...skipping 21 matching lines...) Expand all Loading... |
| 6095 /** | 6699 /** |
| 6096 * htmlReadIO: | 6700 * htmlReadIO: |
| 6097 * @ioread: an I/O read function | 6701 * @ioread: an I/O read function |
| 6098 * @ioclose: an I/O close function | 6702 * @ioclose: an I/O close function |
| 6099 * @ioctx: an I/O handler | 6703 * @ioctx: an I/O handler |
| 6100 * @URL: the base URL to use for the document | 6704 * @URL: the base URL to use for the document |
| 6101 * @encoding: the document encoding, or NULL | 6705 * @encoding: the document encoding, or NULL |
| 6102 * @options: a combination of htmlParserOption(s) | 6706 * @options: a combination of htmlParserOption(s) |
| 6103 * | 6707 * |
| 6104 * parse an HTML document from I/O functions and source and build a tree. | 6708 * parse an HTML document from I/O functions and source and build a tree. |
| 6105 * | 6709 * |
| 6106 * Returns the resulting document tree | 6710 * Returns the resulting document tree |
| 6107 */ | 6711 */ |
| 6108 htmlDocPtr | 6712 htmlDocPtr |
| 6109 htmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, | 6713 htmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, |
| 6110 void *ioctx, const char *URL, const char *encoding, int options) | 6714 void *ioctx, const char *URL, const char *encoding, int options) |
| 6111 { | 6715 { |
| 6112 htmlParserCtxtPtr ctxt; | 6716 htmlParserCtxtPtr ctxt; |
| 6113 xmlParserInputBufferPtr input; | 6717 xmlParserInputBufferPtr input; |
| 6114 xmlParserInputPtr stream; | 6718 xmlParserInputPtr stream; |
| 6115 | 6719 |
| (...skipping 23 matching lines...) Expand all Loading... |
| 6139 /** | 6743 /** |
| 6140 * htmlCtxtReadDoc: | 6744 * htmlCtxtReadDoc: |
| 6141 * @ctxt: an HTML parser context | 6745 * @ctxt: an HTML parser context |
| 6142 * @cur: a pointer to a zero terminated string | 6746 * @cur: a pointer to a zero terminated string |
| 6143 * @URL: the base URL to use for the document | 6747 * @URL: the base URL to use for the document |
| 6144 * @encoding: the document encoding, or NULL | 6748 * @encoding: the document encoding, or NULL |
| 6145 * @options: a combination of htmlParserOption(s) | 6749 * @options: a combination of htmlParserOption(s) |
| 6146 * | 6750 * |
| 6147 * parse an XML in-memory document and build a tree. | 6751 * parse an XML in-memory document and build a tree. |
| 6148 * This reuses the existing @ctxt parser context | 6752 * This reuses the existing @ctxt parser context |
| 6149 * | 6753 * |
| 6150 * Returns the resulting document tree | 6754 * Returns the resulting document tree |
| 6151 */ | 6755 */ |
| 6152 htmlDocPtr | 6756 htmlDocPtr |
| 6153 htmlCtxtReadDoc(htmlParserCtxtPtr ctxt, const xmlChar * cur, | 6757 htmlCtxtReadDoc(htmlParserCtxtPtr ctxt, const xmlChar * cur, |
| 6154 const char *URL, const char *encoding, int options) | 6758 const char *URL, const char *encoding, int options) |
| 6155 { | 6759 { |
| 6156 xmlParserInputPtr stream; | 6760 xmlParserInputPtr stream; |
| 6157 | 6761 |
| 6158 if (cur == NULL) | 6762 if (cur == NULL) |
| 6159 return (NULL); | 6763 return (NULL); |
| (...skipping 12 matching lines...) Expand all Loading... |
| 6172 | 6776 |
| 6173 /** | 6777 /** |
| 6174 * htmlCtxtReadFile: | 6778 * htmlCtxtReadFile: |
| 6175 * @ctxt: an HTML parser context | 6779 * @ctxt: an HTML parser context |
| 6176 * @filename: a file or URL | 6780 * @filename: a file or URL |
| 6177 * @encoding: the document encoding, or NULL | 6781 * @encoding: the document encoding, or NULL |
| 6178 * @options: a combination of htmlParserOption(s) | 6782 * @options: a combination of htmlParserOption(s) |
| 6179 * | 6783 * |
| 6180 * parse an XML file from the filesystem or the network. | 6784 * parse an XML file from the filesystem or the network. |
| 6181 * This reuses the existing @ctxt parser context | 6785 * This reuses the existing @ctxt parser context |
| 6182 * | 6786 * |
| 6183 * Returns the resulting document tree | 6787 * Returns the resulting document tree |
| 6184 */ | 6788 */ |
| 6185 htmlDocPtr | 6789 htmlDocPtr |
| 6186 htmlCtxtReadFile(htmlParserCtxtPtr ctxt, const char *filename, | 6790 htmlCtxtReadFile(htmlParserCtxtPtr ctxt, const char *filename, |
| 6187 const char *encoding, int options) | 6791 const char *encoding, int options) |
| 6188 { | 6792 { |
| 6189 xmlParserInputPtr stream; | 6793 xmlParserInputPtr stream; |
| 6190 | 6794 |
| 6191 if (filename == NULL) | 6795 if (filename == NULL) |
| 6192 return (NULL); | 6796 return (NULL); |
| (...skipping 14 matching lines...) Expand all Loading... |
| 6207 * htmlCtxtReadMemory: | 6811 * htmlCtxtReadMemory: |
| 6208 * @ctxt: an HTML parser context | 6812 * @ctxt: an HTML parser context |
| 6209 * @buffer: a pointer to a char array | 6813 * @buffer: a pointer to a char array |
| 6210 * @size: the size of the array | 6814 * @size: the size of the array |
| 6211 * @URL: the base URL to use for the document | 6815 * @URL: the base URL to use for the document |
| 6212 * @encoding: the document encoding, or NULL | 6816 * @encoding: the document encoding, or NULL |
| 6213 * @options: a combination of htmlParserOption(s) | 6817 * @options: a combination of htmlParserOption(s) |
| 6214 * | 6818 * |
| 6215 * parse an XML in-memory document and build a tree. | 6819 * parse an XML in-memory document and build a tree. |
| 6216 * This reuses the existing @ctxt parser context | 6820 * This reuses the existing @ctxt parser context |
| 6217 * | 6821 * |
| 6218 * Returns the resulting document tree | 6822 * Returns the resulting document tree |
| 6219 */ | 6823 */ |
| 6220 htmlDocPtr | 6824 htmlDocPtr |
| 6221 htmlCtxtReadMemory(htmlParserCtxtPtr ctxt, const char *buffer, int size, | 6825 htmlCtxtReadMemory(htmlParserCtxtPtr ctxt, const char *buffer, int size, |
| 6222 const char *URL, const char *encoding, int options) | 6826 const char *URL, const char *encoding, int options) |
| 6223 { | 6827 { |
| 6224 xmlParserInputBufferPtr input; | 6828 xmlParserInputBufferPtr input; |
| 6225 xmlParserInputPtr stream; | 6829 xmlParserInputPtr stream; |
| 6226 | 6830 |
| 6227 if (ctxt == NULL) | 6831 if (ctxt == NULL) |
| (...skipping 21 matching lines...) Expand all Loading... |
| 6249 /** | 6853 /** |
| 6250 * htmlCtxtReadFd: | 6854 * htmlCtxtReadFd: |
| 6251 * @ctxt: an HTML parser context | 6855 * @ctxt: an HTML parser context |
| 6252 * @fd: an open file descriptor | 6856 * @fd: an open file descriptor |
| 6253 * @URL: the base URL to use for the document | 6857 * @URL: the base URL to use for the document |
| 6254 * @encoding: the document encoding, or NULL | 6858 * @encoding: the document encoding, or NULL |
| 6255 * @options: a combination of htmlParserOption(s) | 6859 * @options: a combination of htmlParserOption(s) |
| 6256 * | 6860 * |
| 6257 * parse an XML from a file descriptor and build a tree. | 6861 * parse an XML from a file descriptor and build a tree. |
| 6258 * This reuses the existing @ctxt parser context | 6862 * This reuses the existing @ctxt parser context |
| 6259 * | 6863 * |
| 6260 * Returns the resulting document tree | 6864 * Returns the resulting document tree |
| 6261 */ | 6865 */ |
| 6262 htmlDocPtr | 6866 htmlDocPtr |
| 6263 htmlCtxtReadFd(htmlParserCtxtPtr ctxt, int fd, | 6867 htmlCtxtReadFd(htmlParserCtxtPtr ctxt, int fd, |
| 6264 const char *URL, const char *encoding, int options) | 6868 const char *URL, const char *encoding, int options) |
| 6265 { | 6869 { |
| 6266 xmlParserInputBufferPtr input; | 6870 xmlParserInputBufferPtr input; |
| 6267 xmlParserInputPtr stream; | 6871 xmlParserInputPtr stream; |
| 6268 | 6872 |
| 6269 if (fd < 0) | 6873 if (fd < 0) |
| (...skipping 21 matching lines...) Expand all Loading... |
| 6291 * @ctxt: an HTML parser context | 6895 * @ctxt: an HTML parser context |
| 6292 * @ioread: an I/O read function | 6896 * @ioread: an I/O read function |
| 6293 * @ioclose: an I/O close function | 6897 * @ioclose: an I/O close function |
| 6294 * @ioctx: an I/O handler | 6898 * @ioctx: an I/O handler |
| 6295 * @URL: the base URL to use for the document | 6899 * @URL: the base URL to use for the document |
| 6296 * @encoding: the document encoding, or NULL | 6900 * @encoding: the document encoding, or NULL |
| 6297 * @options: a combination of htmlParserOption(s) | 6901 * @options: a combination of htmlParserOption(s) |
| 6298 * | 6902 * |
| 6299 * parse an HTML document from I/O functions and source and build a tree. | 6903 * parse an HTML document from I/O functions and source and build a tree. |
| 6300 * This reuses the existing @ctxt parser context | 6904 * This reuses the existing @ctxt parser context |
| 6301 * | 6905 * |
| 6302 * Returns the resulting document tree | 6906 * Returns the resulting document tree |
| 6303 */ | 6907 */ |
| 6304 htmlDocPtr | 6908 htmlDocPtr |
| 6305 htmlCtxtReadIO(htmlParserCtxtPtr ctxt, xmlInputReadCallback ioread, | 6909 htmlCtxtReadIO(htmlParserCtxtPtr ctxt, xmlInputReadCallback ioread, |
| 6306 xmlInputCloseCallback ioclose, void *ioctx, | 6910 xmlInputCloseCallback ioclose, void *ioctx, |
| 6307 const char *URL, | 6911 const char *URL, |
| 6308 const char *encoding, int options) | 6912 const char *encoding, int options) |
| 6309 { | 6913 { |
| 6310 xmlParserInputBufferPtr input; | 6914 xmlParserInputBufferPtr input; |
| 6311 xmlParserInputPtr stream; | 6915 xmlParserInputPtr stream; |
| (...skipping 14 matching lines...) Expand all Loading... |
| 6326 xmlFreeParserInputBuffer(input); | 6930 xmlFreeParserInputBuffer(input); |
| 6327 return (NULL); | 6931 return (NULL); |
| 6328 } | 6932 } |
| 6329 inputPush(ctxt, stream); | 6933 inputPush(ctxt, stream); |
| 6330 return (htmlDoRead(ctxt, URL, encoding, options, 1)); | 6934 return (htmlDoRead(ctxt, URL, encoding, options, 1)); |
| 6331 } | 6935 } |
| 6332 | 6936 |
| 6333 #define bottom_HTMLparser | 6937 #define bottom_HTMLparser |
| 6334 #include "elfgcchack.h" | 6938 #include "elfgcchack.h" |
| 6335 #endif /* LIBXML_HTML_ENABLED */ | 6939 #endif /* LIBXML_HTML_ENABLED */ |
| OLD | NEW |