third_party/libxml/HTMLparser.c - Issue 2951008: Update libxml to 2.7.7.

Side by Side Diff: third_party/libxml/HTMLparser.c

Issue 2951008: Update libxml to 2.7.7. (Closed) Base URL: http://src.chromium.org/git/chromium.git

Patch Set: Created 10 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 /*	1 /*

2 * HTMLparser.c : an HTML 4.0 non-verifying parser	2 * HTMLparser.c : an HTML 4.0 non-verifying parser

3 *	3 *

4 * See Copyright for the status of this software.	4 * See Copyright for the status of this software.

5 *	5 *

6 * daniel@veillard.com	6 * daniel@veillard.com

7 */	7 */

8	8

9 #define IN_LIBXML	9 #define IN_LIBXML

10 #include "libxml.h"	10 #include "libxml.h"

(...skipping 41 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
52 /* #define DEBUG_PUSH */	52 /* #define DEBUG_PUSH */

53	53

54 static int htmlOmittedDefaultValue = 1;	54 static int htmlOmittedDefaultValue = 1;

55	55

56 xmlChar * htmlDecodeEntities(htmlParserCtxtPtr ctxt, int len,	56 xmlChar * htmlDecodeEntities(htmlParserCtxtPtr ctxt, int len,

57 xmlChar end, xmlChar end2, xmlChar end3);	57 xmlChar end, xmlChar end2, xmlChar end3);

58 static void htmlParseComment(htmlParserCtxtPtr ctxt);	58 static void htmlParseComment(htmlParserCtxtPtr ctxt);

59	59

60 /************************************************************************	60 /************************************************************************

61 * *	61 * *

62 * » » Some factorized error routines» » » » *	62 » » Some factorized error routines» » » »

63 * *	63 * *

64 ************************************************************************/	64 ************************************************************************/

65	65

66 /**	66 /**

67 * htmlErrMemory:	67 * htmlErrMemory:

68 * @ctxt: an HTML parser context	68 * @ctxt: an HTML parser context

69 * @extra: extra informations	69 * @extra: extra informations

70 *	70 *

71 * Handle a redefinition of attribute error	71 * Handle a redefinition of attribute error

72 */	72 */

(...skipping 67 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
140 ctxt->errNo = error;	140 ctxt->errNo = error;

141 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_HTML, error,	141 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_HTML, error,

142 XML_ERR_ERROR, NULL, 0, NULL, NULL,	142 XML_ERR_ERROR, NULL, 0, NULL, NULL,

143 NULL, val, 0, msg, val);	143 NULL, val, 0, msg, val);

144 if (ctxt != NULL)	144 if (ctxt != NULL)

145 ctxt->wellFormed = 0;	145 ctxt->wellFormed = 0;

146 }	146 }

147	147

148 /************************************************************************	148 /************************************************************************

149 * *	149 * *

150 * » » Parser stacks related functions and macros» » *	150 » Parser stacks related functions and macros» »

151 * *	151 * *

152 ************************************************************************/	152 ************************************************************************/

153	153

154 /**	154 /**

155 * htmlnamePush:	155 * htmlnamePush:

156 * @ctxt: an HTML parser context	156 * @ctxt: an HTML parser context

157 * @value: the element name	157 * @value: the element name

158 *	158 *

159 * Pushes a new element name on top of the name stack	159 * Pushes a new element name on top of the name stack

160 *	160 *

161 * Returns 0 in case of error, the index in the stack otherwise	161 * Returns 0 in case of error, the index in the stack otherwise

162 */	162 */

163 static int	163 static int

164 htmlnamePush(htmlParserCtxtPtr ctxt, const xmlChar * value)	164 htmlnamePush(htmlParserCtxtPtr ctxt, const xmlChar * value)

165 {	165 {

	166 if ((ctxt->html < 3) && (xmlStrEqual(value, BAD_CAST "head")))

	167 ctxt->html = 3;

	168 if ((ctxt->html < 10) && (xmlStrEqual(value, BAD_CAST "body")))

	169 ctxt->html = 10;

166 if (ctxt->nameNr >= ctxt->nameMax) {	170 if (ctxt->nameNr >= ctxt->nameMax) {

167 ctxt->nameMax *= 2;	171 ctxt->nameMax *= 2;

168 ctxt->nameTab = (const xmlChar * *)	172 ctxt->nameTab = (const xmlChar * *)

169 xmlRealloc((xmlChar * *)ctxt->nameTab,	173 xmlRealloc((xmlChar * *)ctxt->nameTab,

170 ctxt->nameMax *	174 ctxt->nameMax *

171 sizeof(ctxt->nameTab[0]));	175 sizeof(ctxt->nameTab[0]));

172 if (ctxt->nameTab == NULL) {	176 if (ctxt->nameTab == NULL) {

173 htmlErrMemory(ctxt, NULL);	177 htmlErrMemory(ctxt, NULL);

174 return (0);	178 return (0);

175 }	179 }

(...skipping 22 matching lines...) Expand all Loading...
198 return (NULL);	202 return (NULL);

199 if (ctxt->nameNr > 0)	203 if (ctxt->nameNr > 0)

200 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];	204 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];

201 else	205 else

202 ctxt->name = NULL;	206 ctxt->name = NULL;

203 ret = ctxt->nameTab[ctxt->nameNr];	207 ret = ctxt->nameTab[ctxt->nameNr];

204 ctxt->nameTab[ctxt->nameNr] = NULL;	208 ctxt->nameTab[ctxt->nameNr] = NULL;

205 return (ret);	209 return (ret);

206 }	210 }

207	211

	212 /**

	213 * htmlNodeInfoPush:

	214 * @ctxt: an HTML parser context

	215 * @value: the node info

	216 *

	217 * Pushes a new element name on top of the node info stack

	218 *

	219 * Returns 0 in case of error, the index in the stack otherwise

	220 */

	221 static int

	222 htmlNodeInfoPush(htmlParserCtxtPtr ctxt, htmlParserNodeInfo *value)

	223 {

	224 if (ctxt->nodeInfoNr >= ctxt->nodeInfoMax) {

	225 if (ctxt->nodeInfoMax == 0)

	226 ctxt->nodeInfoMax = 5;

	227 ctxt->nodeInfoMax *= 2;

	228 ctxt->nodeInfoTab = (htmlParserNodeInfo *)

	229 xmlRealloc((htmlParserNodeInfo *)ctxt->nodeInfoTab,

	230 ctxt->nodeInfoMax *

	231 sizeof(ctxt->nodeInfoTab[0]));

	232 if (ctxt->nodeInfoTab == NULL) {

	233 htmlErrMemory(ctxt, NULL);

	234 return (0);

	235 }

	236 }

	237 ctxt->nodeInfoTab[ctxt->nodeInfoNr] = *value;

	238 ctxt->nodeInfo = &ctxt->nodeInfoTab[ctxt->nodeInfoNr];

	239 return (ctxt->nodeInfoNr++);

	240 }

	241

	242 /**

	243 * htmlNodeInfoPop:

	244 * @ctxt: an HTML parser context

	245 *

	246 * Pops the top element name from the node info stack

	247 *

	248 * Returns 0 in case of error, the pointer to NodeInfo otherwise

	249 */

	250 static htmlParserNodeInfo *

	251 htmlNodeInfoPop(htmlParserCtxtPtr ctxt)

	252 {

	253 if (ctxt->nodeInfoNr <= 0)

	254 return (NULL);

	255 ctxt->nodeInfoNr--;

	256 if (ctxt->nodeInfoNr < 0)

	257 return (NULL);

	258 if (ctxt->nodeInfoNr > 0)

	259 ctxt->nodeInfo = &ctxt->nodeInfoTab[ctxt->nodeInfoNr - 1];

	260 else

	261 ctxt->nodeInfo = NULL;

	262 return &ctxt->nodeInfoTab[ctxt->nodeInfoNr];

	263 }

	264

208 /*	265 /*

209 * Macros for accessing the content. Those should be used only by the parser,	266 * Macros for accessing the content. Those should be used only by the parser,

210 * and not exported.	267 * and not exported.

211 *	268 *

212 * Dirty macros, i.e. one need to make assumption on the context to use them	269 * Dirty macros, i.e. one need to make assumption on the context to use them

213 *	270 *

214 * CUR_PTR return the current pointer to the xmlChar to be parsed.	271 * CUR_PTR return the current pointer to the xmlChar to be parsed.

215 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled	272 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled

216 * in ISO-Latin or UTF-8, and the current 16 bit value if compiled	273 * in ISO-Latin or UTF-8, and the current 16 bit value if compiled

217 * in UNICODE mode. This should be used internally by the parser	274 * in UNICODE mode. This should be used internally by the parser

(...skipping 38 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
256	313

257 #define SKIP_BLANKS htmlSkipBlankChars(ctxt)	314 #define SKIP_BLANKS htmlSkipBlankChars(ctxt)

258	315

259 /* Inported from XML */	316 /* Inported from XML */

260	317

261 /* #define CUR (ctxt->token ? ctxt->token : (int) (ctxt->input->cur)) /	318 /* #define CUR (ctxt->token ? ctxt->token : (int) (ctxt->input->cur)) /

262 #define CUR ((int) (*ctxt->input->cur))	319 #define CUR ((int) (*ctxt->input->cur))

263 #define NEXT xmlNextChar(ctxt)	320 #define NEXT xmlNextChar(ctxt)

264	321

265 #define RAW (ctxt->token ? -1 : (*ctxt->input->cur))	322 #define RAW (ctxt->token ? -1 : (*ctxt->input->cur))

266 #define NXT(val) ctxt->input->cur[(val)]

267 #define CUR_PTR ctxt->input->cur

268	323

269	324

270 #define NEXTL(l) do { \	325 #define NEXTL(l) do { \

271 if (*(ctxt->input->cur) == '\n') { \	326 if (*(ctxt->input->cur) == '\n') { \

272 ctxt->input->line++; ctxt->input->col = 1; \	327 ctxt->input->line++; ctxt->input->col = 1; \

273 } else ctxt->input->col++; \	328 } else ctxt->input->col++; \

274 ctxt->token = 0; ctxt->input->cur += l; ctxt->nbChars++; \	329 ctxt->token = 0; ctxt->input->cur += l; ctxt->nbChars++; \

275 } while (0)	330 } while (0)

276	331

277 /************	332 /************

278 \	333 \

279 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \	334 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \

280 if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);	335 if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);

281 ************/	336 ************/

282	337

283 #define CUR_CHAR(l) htmlCurrentChar(ctxt, &l)	338 #define CUR_CHAR(l) htmlCurrentChar(ctxt, &l)

284 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)	339 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)

285	340

286 #define COPY_BUF(l,b,i,v) \	341 #define COPY_BUF(l,b,i,v) \

287 if (l == 1) b[i++] = (xmlChar) v; \	342 if (l == 1) b[i++] = (xmlChar) v; \

288 else i += xmlCopyChar(l,&b[i],v)	343 else i += xmlCopyChar(l,&b[i],v)

289	344

290 /**	345 /**

	346 * htmlFindEncoding:

	347 * @the HTML parser context

	348 *

	349 * Ty to find and encoding in the current data available in the input

	350 * buffer this is needed to try to switch to the proper encoding when

	351 * one face a character error.

	352 * That's an heuristic, since it's operating outside of parsing it could

	353 * try to use a meta which had been commented out, that's the reason it

	354 * should only be used in case of error, not as a default.

	355 *

	356 * Returns an encoding string or NULL if not found, the string need to

	357 * be freed

	358 */

	359 static xmlChar *

	360 htmlFindEncoding(xmlParserCtxtPtr ctxt) {

	361 const xmlChar start, cur, *end;

	362

	363 if ((ctxt == NULL) \|\| (ctxt->input == NULL) \|\|

	364 (ctxt->input->encoding != NULL) \|\| (ctxt->input->buf == NULL) \|\|

	365 (ctxt->input->buf->encoder != NULL))

	366 return(NULL);

	367 if ((ctxt->input->cur == NULL) \|\| (ctxt->input->end == NULL))

	368 return(NULL);

	369

	370 start = ctxt->input->cur;

	371 end = ctxt->input->end;

	372 /* we also expect the input buffer to be zero terminated */

	373 if (*end != 0)

	374 return(NULL);

	375

	376 cur = xmlStrcasestr(start, BAD_CAST "HTTP-EQUIV");

	377 if (cur == NULL)

	378 return(NULL);

	379 cur = xmlStrcasestr(cur, BAD_CAST "CONTENT");

	380 if (cur == NULL)

	381 return(NULL);

	382 cur = xmlStrcasestr(cur, BAD_CAST "CHARSET=");

	383 if (cur == NULL)

	384 return(NULL);

	385 cur += 8;

	386 start = cur;

	387 while (((cur >= 'A') && (cur <= 'Z')) \|\|

	388 ((cur >= 'a') && (cur <= 'z')) \|\|

	389 ((cur >= '0') && (cur <= '9')) \|\|

	390 (cur == '-') \|\| (cur == '_') \|\| (cur == ':') \|\| (cur == '/'))

	391 cur++;

	392 if (cur == start)

	393 return(NULL);

	394 return(xmlStrndup(start, cur - start));

	395 }

	396

	397 /**

291 * htmlCurrentChar:	398 * htmlCurrentChar:

292 * @ctxt: the HTML parser context	399 * @ctxt: the HTML parser context

293 * @len: pointer to the length of the char read	400 * @len: pointer to the length of the char read

294 *	401 *

295 * The current char value, if using UTF-8 this may actually span multiple	402 * The current char value, if using UTF-8 this may actually span multiple

296 * bytes in the input buffer. Implement the end of line normalization:	403 * bytes in the input buffer. Implement the end of line normalization:

297 * 2.11 End-of-Line Handling	404 * 2.11 End-of-Line Handling

298 * If the encoding is unspecified, in the case we find an ISO-Latin-1	405 * If the encoding is unspecified, in the case we find an ISO-Latin-1

299 * char, then the encoding converter is plugged in automatically.	406 * char, then the encoding converter is plugged in automatically.

300 *	407 *

301 * Returns the current char value and its length	408 * Returns the current char value and its length

302 */	409 */

303	410

304 static int	411 static int

305 htmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {	412 htmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {

306 if (ctxt->instate == XML_PARSER_EOF)	413 if (ctxt->instate == XML_PARSER_EOF)

307 return(0);	414 return(0);

308	415

309 if (ctxt->token != 0) {	416 if (ctxt->token != 0) {

310 *len = 0;	417 *len = 0;

311 return(ctxt->token);	418 return(ctxt->token);

312 }»	419 }

313 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {	420 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {

314 /*	421 /*

315 * We are supposed to handle UTF8, check it's valid	422 * We are supposed to handle UTF8, check it's valid

316 * From rfc2044: encoding of the Unicode values on UTF-8:	423 * From rfc2044: encoding of the Unicode values on UTF-8:

317 *	424 *

318 * UCS-4 range (hex.) UTF-8 octet sequence (binary)	425 * UCS-4 range (hex.) UTF-8 octet sequence (binary)

319 * 0000 0000-0000 007F 0xxxxxxx	426 * 0000 0000-0000 007F 0xxxxxxx

320 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx	427 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx

321 » * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx	428 » * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx

322 *	429 *

323 * Check for the 0x110000 limit too	430 * Check for the 0x110000 limit too

324 */	431 */

325 const unsigned char *cur = ctxt->input->cur;	432 const unsigned char *cur = ctxt->input->cur;

326 unsigned char c;	433 unsigned char c;

327 unsigned int val;	434 unsigned int val;

328	435

329 c = *cur;	436 c = *cur;

330 if (c & 0x80) {	437 if (c & 0x80) {

331 » if (cur[1] == 0)	438 » if (cur[1] == 0) {

332 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);	439 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);

	440 cur = ctxt->input->cur;

	441 }

333 if ((cur[1] & 0xc0) != 0x80)	442 if ((cur[1] & 0xc0) != 0x80)

334 goto encoding_error;	443 goto encoding_error;

335 if ((c & 0xe0) == 0xe0) {	444 if ((c & 0xe0) == 0xe0) {

336	445

337 » » if (cur[2] == 0)	446 » » if (cur[2] == 0) {

338 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);	447 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);

	448 cur = ctxt->input->cur;

	449 }

339 if ((cur[2] & 0xc0) != 0x80)	450 if ((cur[2] & 0xc0) != 0x80)

340 goto encoding_error;	451 goto encoding_error;

341 if ((c & 0xf0) == 0xf0) {	452 if ((c & 0xf0) == 0xf0) {

342 » » if (cur[3] == 0)	453 » » if (cur[3] == 0) {

343 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);	454 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);

	455 cur = ctxt->input->cur;

	456 }

344 if (((c & 0xf8) != 0xf0) \|\|	457 if (((c & 0xf8) != 0xf0) \|\|

345 ((cur[3] & 0xc0) != 0x80))	458 ((cur[3] & 0xc0) != 0x80))

346 goto encoding_error;	459 goto encoding_error;

347 /* 4-byte code */	460 /* 4-byte code */

348 *len = 4;	461 *len = 4;

349 val = (cur[0] & 0x7) << 18;	462 val = (cur[0] & 0x7) << 18;

350 val \|= (cur[1] & 0x3f) << 12;	463 val \|= (cur[1] & 0x3f) << 12;

351 val \|= (cur[2] & 0x3f) << 6;	464 val \|= (cur[2] & 0x3f) << 6;

352 val \|= cur[3] & 0x3f;	465 val \|= cur[3] & 0x3f;

353 } else {	466 } else {

354 /* 3-byte code */	467 /* 3-byte code */

355 *len = 3;	468 *len = 3;

356 val = (cur[0] & 0xf) << 12;	469 val = (cur[0] & 0xf) << 12;

357 val \|= (cur[1] & 0x3f) << 6;	470 val \|= (cur[1] & 0x3f) << 6;

358 val \|= cur[2] & 0x3f;	471 val \|= cur[2] & 0x3f;

359 }	472 }

360 } else {	473 } else {

361 /* 2-byte code */	474 /* 2-byte code */

362 *len = 2;	475 *len = 2;

363 val = (cur[0] & 0x1f) << 6;	476 val = (cur[0] & 0x1f) << 6;

364 val \|= cur[1] & 0x3f;	477 val \|= cur[1] & 0x3f;

365 }	478 }

366 if (!IS_CHAR(val)) {	479 if (!IS_CHAR(val)) {

367 htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,	480 htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,

368 "Char 0x%X out of allowed range\n", val);	481 "Char 0x%X out of allowed range\n", val);

369 » }	482 » }

370 return(val);	483 return(val);

371 } else {	484 } else {

	485 if ((*ctxt->input->cur == 0) &&

	486 (ctxt->input->cur < ctxt->input->end)) {

	487 htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,

	488 "Char 0x%X out of allowed range\n", 0);

	489 *len = 1;

	490 return(' ');

	491 }

372 /* 1-byte code */	492 /* 1-byte code */

373 *len = 1;	493 *len = 1;

374 return((int) *ctxt->input->cur);	494 return((int) *ctxt->input->cur);

375 }	495 }

376 }	496 }

377 /*	497 /*

378 * Assume it's a fixed length encoding (1) with	498 * Assume it's a fixed length encoding (1) with

379 * a compatible encoding for the ASCII set, since	499 * a compatible encoding for the ASCII set, since

380 * XML constructs only use < 128 chars	500 * XML constructs only use < 128 chars

381 */	501 */

382 *len = 1;	502 *len = 1;

383 if ((int) *ctxt->input->cur < 0x80)	503 if ((int) *ctxt->input->cur < 0x80)

384 return((int) *ctxt->input->cur);	504 return((int) *ctxt->input->cur);

385	505

386 /*	506 /*

387 * Humm this is bad, do an automatic flow conversion	507 * Humm this is bad, do an automatic flow conversion

388 */	508 */

389 xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1);	509 {

390 ctxt->charset = XML_CHAR_ENCODING_UTF8;	510 xmlChar * guess;

	511 xmlCharEncodingHandlerPtr handler;

	512

	513 guess = htmlFindEncoding(ctxt);

	514 if (guess == NULL) {

	515 xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1);

	516 } else {

	517 if (ctxt->input->encoding != NULL)

	518 xmlFree((xmlChar *) ctxt->input->encoding);

	519 ctxt->input->encoding = guess;

	520 handler = xmlFindCharEncodingHandler((const char *) guess);

	521 if (handler != NULL) {

	522 xmlSwitchToEncoding(ctxt, handler);

	523 } else {

	524 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,

	525 "Unsupported encoding %s", guess, NULL);

	526 }

	527 }

	528 ctxt->charset = XML_CHAR_ENCODING_UTF8;

	529 }

	530

391 return(xmlCurrentChar(ctxt, len));	531 return(xmlCurrentChar(ctxt, len));

392	532

393 encoding_error:	533 encoding_error:

394 /*	534 /*

395 * If we detect an UTF8 error that probably mean that the	535 * If we detect an UTF8 error that probably mean that the

396 * input encoding didn't get properly advertized in the	536 * input encoding didn't get properly advertized in the

397 * declaration header. Report the error and switch the encoding	537 * declaration header. Report the error and switch the encoding

398 * to ISO-Latin-1 (if you don't like this policy, just declare the	538 * to ISO-Latin-1 (if you don't like this policy, just declare the

399 * encoding !)	539 * encoding !)

400 */	540 */

401 {	541 {

402 char buffer[150];	542 char buffer[150];

403	543

404 if (ctxt->input->end - ctxt->input->cur >= 4) {	544 if (ctxt->input->end - ctxt->input->cur >= 4) {

405 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",	545 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",

406 ctxt->input->cur[0], ctxt->input->cur[1],	546 ctxt->input->cur[0], ctxt->input->cur[1],

407 ctxt->input->cur[2], ctxt->input->cur[3]);	547 ctxt->input->cur[2], ctxt->input->cur[3]);

408 } else {	548 } else {

409 snprintf(buffer, 149, "Bytes: 0x%02X\n", ctxt->input->cur[0]);	549 snprintf(buffer, 149, "Bytes: 0x%02X\n", ctxt->input->cur[0]);

410 }	550 }

411 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,	551 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,

412 "Input is not proper UTF-8, indicate encoding !\n",	552 "Input is not proper UTF-8, indicate encoding !\n",

413 BAD_CAST buffer, NULL);	553 BAD_CAST buffer, NULL);

414 }	554 }

415	555

416 ctxt->charset = XML_CHAR_ENCODING_8859_1;	556 ctxt->charset = XML_CHAR_ENCODING_8859_1;

417 *len = 1;	557 *len = 1;

418 return((int) *ctxt->input->cur);	558 return((int) *ctxt->input->cur);

419 }	559 }

420	560

421 /**	561 /**

422 * htmlSkipBlankChars:	562 * htmlSkipBlankChars:

423 * @ctxt: the HTML parser context	563 * @ctxt: the HTML parser context

424 *	564 *

425 * skip all blanks character found at that point in the input streams.	565 * skip all blanks character found at that point in the input streams.

426 *	566 *

(...skipping 19 matching lines...) Expand all Loading...
446 }	586 }

447 res++;	587 res++;

448 }	588 }

449 return(res);	589 return(res);

450 }	590 }

451	591

452	592

453	593

454 /************************************************************************	594 /************************************************************************

455 * *	595 * *

456 * » » The list of HTML elements and their properties» » *	596 » The list of HTML elements and their properties» »

457 * *	597 * *

458 ************************************************************************/	598 ************************************************************************/

459	599

460 /*	600 /*

461 * Start Tag: 1 means the start tag can be ommited	601 * Start Tag: 1 means the start tag can be ommited

462 * End Tag: 1 means the end tag can be ommited	602 * End Tag: 1 means the end tag can be ommited

463 * 2 means it's forbidden (empty elements)	603 * 2 means it's forbidden (empty elements)

464 * 3 means the tag is stylistic and should be closed easily	604 * 3 means the tag is stylistic and should be closed easily

465 * Depr: this element is deprecated	605 * Depr: this element is deprecated

466 * DTD: 1 means that this element is valid only in the Loose DTD	606 * DTD: 1 means that this element is valid only in the Loose DTD

467 * 2 means that this element is valid only in the Frameset DTD	607 * 2 means that this element is valid only in the Frameset DTD

468 *	608 *

469 * Name,Start Tag,End Tag,Save End,Empty,Deprecated,DTD,inline,Description	609 * Name,Start Tag,End Tag,Save End,Empty,Deprecated,DTD,inline,Description

470 , subElements , impliedsubelt , Attributes, userdata	610 , subElements , impliedsubelt , Attributes, userdata

471 */	611 */

472	612

473 /* Definitions and a couple of vars for HTML Elements */	613 /* Definitions and a couple of vars for HTML Elements */

474	614

475 #define FONTSTYLE "tt", "i", "b", "u", "s", "strike", "big", "small"	615 #define FONTSTYLE "tt", "i", "b", "u", "s", "strike", "big", "small"

476 #define NB_FONTSTYLE 8	616 #define NB_FONTSTYLE 8

477 #define PHRASE "em", "strong", "dfn", "code", "samp", "kbd", "var", "cite", "abb r", "acronym"	617 #define PHRASE "em", "strong", "dfn", "code", "samp", "kbd", "var", "cite", "abb r", "acronym"

478 #define NB_PHRASE 10	618 #define NB_PHRASE 10

479 #define SPECIAL "a", "img", "applet", "embed", "object", "font", "basefont", "br ", "script", "map", "q", "sub", "sup", "span", "bdo", "iframe"	619 #define SPECIAL "a", "img", "applet", "embed", "object", "font", "basefont", "br ", "script", "map", "q", "sub", "sup", "span", "bdo", "iframe"

480 #define NB_SPECIAL 16	620 #define NB_SPECIAL 16

481 #define INLINE PCDATA FONTSTYLE PHRASE SPECIAL FORMCTRL	621 #define INLINE FONTSTYLE, PHRASE, SPECIAL, FORMCTRL

482 #define NB_INLINE NB_PCDATA + NB_FONTSTYLE + NB_PHRASE + NB_SPECIAL + NB_FORMCTR L	622 #define NB_INLINE NB_PCDATA + NB_FONTSTYLE + NB_PHRASE + NB_SPECIAL + NB_FORMCTR L

483 #define BLOCK HEADING, LIST "pre", "p", "dl", "div", "center", "noscript", "nofr ames", "blockquote", "form", "isindex", "hr", "table", "fieldset", "address"	623 #define BLOCK HEADING, LIST, "pre", "p", "dl", "div", "center", "noscript", "nof rames", "blockquote", "form", "isindex", "hr", "table", "fieldset", "address"

484 #define NB_BLOCK NB_HEADING + NB_LIST + 14	624 #define NB_BLOCK NB_HEADING + NB_LIST + 14

485 #define FORMCTRL "input", "select", "textarea", "label", "button"	625 #define FORMCTRL "input", "select", "textarea", "label", "button"

486 #define NB_FORMCTRL 5	626 #define NB_FORMCTRL 5

487 #define PCDATA	627 #define PCDATA

488 #define NB_PCDATA 0	628 #define NB_PCDATA 0

489 #define HEADING "h1", "h2", "h3", "h4", "h5", "h6"	629 #define HEADING "h1", "h2", "h3", "h4", "h5", "h6"

490 #define NB_HEADING 6	630 #define NB_HEADING 6

491 #define LIST "ul", "ol", "dir", "menu"	631 #define LIST "ul", "ol", "dir", "menu"

492 #define NB_LIST 4	632 #define NB_LIST 4

493 #define MODIFIER	633 #define MODIFIER

(...skipping 105 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
599 static const char* const optgroup_attrs[] = { ATTRS, "disabled", NULL } ;	739 static const char* const optgroup_attrs[] = { ATTRS, "disabled", NULL } ;

600 static const char* const option_attrs[] = { ATTRS, "disabled", "label", "selecte d", "value", NULL } ;	740 static const char* const option_attrs[] = { ATTRS, "disabled", "label", "selecte d", "value", NULL } ;

601 static const char* const param_attrs[] = { "id", "value", "valuetype", "type", N ULL } ;	741 static const char* const param_attrs[] = { "id", "value", "valuetype", "type", N ULL } ;

602 static const char* const width_attr[] = { "width", NULL } ;	742 static const char* const width_attr[] = { "width", NULL } ;

603 static const char* const pre_content[] = { PHRASE, "tt", "i", "b", "u", "s", "st rike", "a", "br", "script", "map", "q", "span", "bdo", "iframe", NULL } ;	743 static const char* const pre_content[] = { PHRASE, "tt", "i", "b", "u", "s", "st rike", "a", "br", "script", "map", "q", "span", "bdo", "iframe", NULL } ;

604 static const char* const script_attrs[] = { "charset", "src", "defer", "event", "for", NULL } ;	744 static const char* const script_attrs[] = { "charset", "src", "defer", "event", "for", NULL } ;

605 static const char* const language_attr[] = { "language", NULL } ;	745 static const char* const language_attr[] = { "language", NULL } ;

606 static const char* const select_content[] = { "optgroup", "option", NULL } ;	746 static const char* const select_content[] = { "optgroup", "option", NULL } ;

607 static const char* const select_attrs[] = { ATTRS, "name", "size", "multiple", " disabled", "tabindex", "onfocus", "onblur", "onchange", NULL } ;	747 static const char* const select_attrs[] = { ATTRS, "name", "size", "multiple", " disabled", "tabindex", "onfocus", "onblur", "onchange", NULL } ;

608 static const char* const style_attrs[] = { I18N, "media", "title", NULL } ;	748 static const char* const style_attrs[] = { I18N, "media", "title", NULL } ;

609 static const char* const table_attrs[] = { ATTRS "summary", "width", "border", " frame", "rules", "cellspacing", "cellpadding", "datapagesize", NULL } ;	749 static const char* const table_attrs[] = { ATTRS, "summary", "width", "border", "frame", "rules", "cellspacing", "cellpadding", "datapagesize", NULL } ;

610 static const char* const table_depr[] = { "align", "bgcolor", NULL } ;	750 static const char* const table_depr[] = { "align", "bgcolor", NULL } ;

611 static const char* const table_contents[] = { "caption", "col", "colgroup", "the ad", "tfoot", "tbody", "tr", NULL} ;	751 static const char* const table_contents[] = { "caption", "col", "colgroup", "the ad", "tfoot", "tbody", "tr", NULL} ;

612 static const char* const tr_elt[] = { "tr", NULL } ;	752 static const char* const tr_elt[] = { "tr", NULL } ;

613 static const char* const talign_attrs[] = { ATTRS, CELLHALIGN, CELLVALIGN, NULL} ;	753 static const char* const talign_attrs[] = { ATTRS, CELLHALIGN, CELLVALIGN, NULL} ;

614 static const char* const th_td_depr[] = { "nowrap", "bgcolor", "width", "height" , NULL } ;	754 static const char* const th_td_depr[] = { "nowrap", "bgcolor", "width", "height" , NULL } ;

615 static const char* const th_td_attr[] = { ATTRS, "abbr", "axis", "headers", "sco pe", "rowspan", "colspan", CELLHALIGN, CELLVALIGN, NULL } ;	755 static const char* const th_td_attr[] = { ATTRS, "abbr", "axis", "headers", "sco pe", "rowspan", "colspan", CELLHALIGN, CELLVALIGN, NULL } ;

616 static const char* const textarea_attrs[] = { ATTRS, "name", "disabled", "readon ly", "tabindex", "accesskey", "onfocus", "onblur", "onselect", "onchange", NULL } ;	756 static const char* const textarea_attrs[] = { ATTRS, "name", "disabled", "readon ly", "tabindex", "accesskey", "onfocus", "onblur", "onselect", "onchange", NULL } ;

617 static const char* const tr_contents[] = { "th", "td", NULL } ;	757 static const char* const tr_contents[] = { "th", "td", NULL } ;

618 static const char* const bgcolor_attr[] = { "bgcolor", NULL } ;	758 static const char* const bgcolor_attr[] = { "bgcolor", NULL } ;

619 static const char* const li_elt[] = { "li", NULL } ;	759 static const char* const li_elt[] = { "li", NULL } ;

(...skipping 311 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
931 "dl", "p", "dt", "menu", "dir", "address", "pre", "listing",	1071 "dl", "p", "dt", "menu", "dir", "address", "pre", "listing",

932 "xmp", "head", NULL,	1072 "xmp", "head", NULL,

933 "dt", "p", "menu", "dir", "address", "pre", "listing", "xmp",	1073 "dt", "p", "menu", "dir", "address", "pre", "listing", "xmp",

934 "head", "dd", NULL,	1074 "head", "dd", NULL,

935 "dd", "p", "menu", "dir", "address", "pre", "listing", "xmp",	1075 "dd", "p", "menu", "dir", "address", "pre", "listing", "xmp",

936 "head", "dt", NULL,	1076 "head", "dt", NULL,

937 "ul", "p", "head", "ol", "menu", "dir", "address", "pre",	1077 "ul", "p", "head", "ol", "menu", "dir", "address", "pre",

938 "listing", "xmp", NULL,	1078 "listing", "xmp", NULL,

939 "ol", "p", "head", "ul", NULL,	1079 "ol", "p", "head", "ul", NULL,

940 "menu", "p", "head", "ul", NULL,	1080 "menu", "p", "head", "ul", NULL,

941 "p",» » "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", NULL,	1081 "p",» » "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", FONTSTYLE, NULL ,

942 "div", "p", "head", NULL,	1082 "div", "p", "head", NULL,

943 "noscript", "p", "head", NULL,	1083 "noscript", "p", "head", NULL,

944 "center", "font", "b", "i", "p", "head", NULL,	1084 "center", "font", "b", "i", "p", "head", NULL,

945 "a", "a", NULL,	1085 "a", "a", NULL,

946 "caption", "p", NULL,	1086 "caption", "p", NULL,

947 "colgroup", "caption", "colgroup", "col", "p", NULL,	1087 "colgroup", "caption", "colgroup", "col", "p", NULL,

948 "col", "caption", "col", "p", NULL,	1088 "col", "caption", "col", "p", NULL,

949 "table", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", "pre",	1089 "table", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", "pre",

950 "listing", "xmp", "a", NULL,	1090 "listing", "xmp", "a", NULL,

951 "th", "th", "td", "p", "span", "font", "a", "b", "i", "u", NULL,	1091 "th", "th", "td", "p", "span", "font", "a", "b", "i", "u", NULL,

952 "td",» » "th", "td", "p", "span", "font", "a", "b", "i", "u", NULL,	1092 "td",» » "th", "td", "p", "span", "font", "a", "b", "i", "u", NULL,

953 "tr", "th", "td", "tr", "caption", "col", "colgroup", "p", NULL,	1093 "tr", "th", "td", "tr", "caption", "col", "colgroup", "p", NULL,

954 "thead", "caption", "col", "colgroup", NULL,	1094 "thead", "caption", "col", "colgroup", NULL,

955 "tfoot", "th", "td", "tr", "caption", "col", "colgroup", "thead",	1095 "tfoot", "th", "td", "tr", "caption", "col", "colgroup", "thead",

956 "tbody", "p", NULL,	1096 "tbody", "p", NULL,

957 "tbody", "th", "td", "tr", "caption", "col", "colgroup", "thead",	1097 "tbody", "th", "td", "tr", "caption", "col", "colgroup", "thead",

958 "tfoot", "tbody", "p", NULL,	1098 "tfoot", "tbody", "p", NULL,

959 "optgroup", "option", NULL,	1099 "optgroup", "option", NULL,

960 "option", "option", NULL,	1100 "option", "option", NULL,

961 "fieldset", "legend", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6",	1101 "fieldset", "legend", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6",

962 "pre", "listing", "xmp", "a", NULL,	1102 "pre", "listing", "xmp", "a", NULL,

(...skipping 38 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1001 "onchange",	1141 "onchange",

1002 "onselect"	1142 "onselect"

1003 };	1143 };

1004	1144

1005 /*	1145 /*

1006 * This table is used by the htmlparser to know what to do with	1146 * This table is used by the htmlparser to know what to do with

1007 * broken html pages. By assigning different priorities to different	1147 * broken html pages. By assigning different priorities to different

1008 * elements the parser can decide how to handle extra endtags.	1148 * elements the parser can decide how to handle extra endtags.

1009 * Endtags are only allowed to close elements with lower or equal	1149 * Endtags are only allowed to close elements with lower or equal

1010 * priority.	1150 * priority.

1011 */	1151 */

1012	1152

1013 typedef struct {	1153 typedef struct {

1014 const char *name;	1154 const char *name;

1015 int priority;	1155 int priority;

1016 } elementPriority;	1156 } elementPriority;

1017	1157

1018 static const elementPriority htmlEndPriority[] = {	1158 static const elementPriority htmlEndPriority[] = {

1019 {"div", 150},	1159 {"div", 150},

1020 {"td", 160},	1160 {"td", 160},

1021 {"th", 160},	1161 {"th", 160},

1022 {"tr", 170},	1162 {"tr", 170},

1023 {"thead", 180},	1163 {"thead", 180},

1024 {"tbody", 180},	1164 {"tbody", 180},

1025 {"tfoot", 180},	1165 {"tfoot", 180},

1026 {"table", 190},	1166 {"table", 190},

1027 {"head", 200},	1167 {"head", 200},

1028 {"body", 200},	1168 {"body", 200},

1029 {"html", 220},	1169 {"html", 220},

1030 {NULL, 100} /* Default priority */	1170 {NULL, 100} /* Default priority */

1031 };	1171 };

1032	1172

1033 static const char** htmlStartCloseIndex[100];	1173 static const char** htmlStartCloseIndex[100];

1034 static int htmlStartCloseIndexinitialized = 0;	1174 static int htmlStartCloseIndexinitialized = 0;

1035	1175

1036 /************************************************************************	1176 /************************************************************************

1037 * *	1177 * *

1038 * » » functions to handle HTML specific data» » » *	1178 » functions to handle HTML specific data» » »

1039 * *	1179 * *

1040 ************************************************************************/	1180 ************************************************************************/

1041	1181

1042 /**	1182 /**

1043 * htmlInitAutoClose:	1183 * htmlInitAutoClose:

1044 *	1184 *

1045 * Initialize the htmlStartCloseIndex for fast lookup of closing tags names.	1185 * Initialize the htmlStartCloseIndex for fast lookup of closing tags names.

1046 * This is not reentrant. Call xmlInitParser() once before processing in	1186 * This is not reentrant. Call xmlInitParser() once before processing in

1047 * case of use in multithreaded programs.	1187 * case of use in multithreaded programs.

1048 */	1188 */

(...skipping 29 matching lines...) Expand all Loading...
1078 sizeof(html40ElementTable[0]));i++) {	1218 sizeof(html40ElementTable[0]));i++) {

1079 if (!xmlStrcasecmp(tag, BAD_CAST html40ElementTable[i].name))	1219 if (!xmlStrcasecmp(tag, BAD_CAST html40ElementTable[i].name))

1080 return((htmlElemDescPtr) &html40ElementTable[i]);	1220 return((htmlElemDescPtr) &html40ElementTable[i]);

1081 }	1221 }

1082 return(NULL);	1222 return(NULL);

1083 }	1223 }

1084	1224

1085 /**	1225 /**

1086 * htmlGetEndPriority:	1226 * htmlGetEndPriority:

1087 * @name: The name of the element to look up the priority for.	1227 * @name: The name of the element to look up the priority for.

1088 *	1228 *

1089 * Return value: The "endtag" priority.	1229 * Return value: The "endtag" priority.

1090 **/	1230 **/

1091 static int	1231 static int

1092 htmlGetEndPriority (const xmlChar *name) {	1232 htmlGetEndPriority (const xmlChar *name) {

1093 int i = 0;	1233 int i = 0;

1094	1234

1095 while ((htmlEndPriority[i].name != NULL) &&	1235 while ((htmlEndPriority[i].name != NULL) &&

1096 (!xmlStrEqual((const xmlChar *)htmlEndPriority[i].name, name)))	1236 (!xmlStrEqual((const xmlChar *)htmlEndPriority[i].name, name)))

1097 i++;	1237 i++;

1098	1238

(...skipping 58 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1157 priority = htmlGetEndPriority(newtag);	1297 priority = htmlGetEndPriority(newtag);

1158	1298

1159 for (i = (ctxt->nameNr - 1); i >= 0; i--) {	1299 for (i = (ctxt->nameNr - 1); i >= 0; i--) {

1160	1300

1161 if (xmlStrEqual(newtag, ctxt->nameTab[i]))	1301 if (xmlStrEqual(newtag, ctxt->nameTab[i]))

1162 break;	1302 break;

1163 /*	1303 /*

1164 * A missplaced endtag can only close elements with lower	1304 * A missplaced endtag can only close elements with lower

1165 * or equal priority, so if we find an element with higher	1305 * or equal priority, so if we find an element with higher

1166 * priority before we find an element with	1306 * priority before we find an element with

1167 * matching name, we just ignore this endtag	1307 * matching name, we just ignore this endtag

1168 */	1308 */

1169 if (htmlGetEndPriority(ctxt->nameTab[i]) > priority)	1309 if (htmlGetEndPriority(ctxt->nameTab[i]) > priority)

1170 return;	1310 return;

1171 }	1311 }

1172 if (i < 0)	1312 if (i < 0)

1173 return;	1313 return;

1174	1314

1175 while (!xmlStrEqual(newtag, ctxt->name)) {	1315 while (!xmlStrEqual(newtag, ctxt->name)) {

1176 info = htmlTagLookup(ctxt->name);	1316 info = htmlTagLookup(ctxt->name);

1177 if ((info != NULL) && (info->endTag == 3)) {	1317 if ((info != NULL) && (info->endTag == 3)) {

(...skipping 30 matching lines...) Expand all Loading...
1208 /**	1348 /**

1209 * htmlAutoClose:	1349 * htmlAutoClose:

1210 * @ctxt: an HTML parser context	1350 * @ctxt: an HTML parser context

1211 * @newtag: The new tag name or NULL	1351 * @newtag: The new tag name or NULL

1212 *	1352 *

1213 * The HTML DTD allows a tag to implicitly close other tags.	1353 * The HTML DTD allows a tag to implicitly close other tags.

1214 * The list is kept in htmlStartClose array. This function is	1354 * The list is kept in htmlStartClose array. This function is

1215 * called when a new tag has been detected and generates the	1355 * called when a new tag has been detected and generates the

1216 * appropriates closes if possible/needed.	1356 * appropriates closes if possible/needed.

1217 * If newtag is NULL this mean we are at the end of the resource	1357 * If newtag is NULL this mean we are at the end of the resource

1218 * and we should check	1358 * and we should check

1219 */	1359 */

1220 static void	1360 static void

1221 htmlAutoClose(htmlParserCtxtPtr ctxt, const xmlChar * newtag)	1361 htmlAutoClose(htmlParserCtxtPtr ctxt, const xmlChar * newtag)

1222 {	1362 {

1223 while ((newtag != NULL) && (ctxt->name != NULL) &&	1363 while ((newtag != NULL) && (ctxt->name != NULL) &&

1224 (htmlCheckAutoClose(newtag, ctxt->name))) {	1364 (htmlCheckAutoClose(newtag, ctxt->name))) {

1225 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))	1365 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))

1226 ctxt->sax->endElement(ctxt->userData, ctxt->name);	1366 ctxt->sax->endElement(ctxt->userData, ctxt->name);

1227 htmlnamePop(ctxt);	1367 htmlnamePop(ctxt);

1228 }	1368 }

(...skipping 67 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1296 * htmlCheckImplied:	1436 * htmlCheckImplied:

1297 * @ctxt: an HTML parser context	1437 * @ctxt: an HTML parser context

1298 * @newtag: The new tag name	1438 * @newtag: The new tag name

1299 *	1439 *

1300 * The HTML DTD allows a tag to exists only implicitly	1440 * The HTML DTD allows a tag to exists only implicitly

1301 * called when a new tag has been detected and generates the	1441 * called when a new tag has been detected and generates the

1302 * appropriates implicit tags if missing	1442 * appropriates implicit tags if missing

1303 */	1443 */

1304 static void	1444 static void

1305 htmlCheckImplied(htmlParserCtxtPtr ctxt, const xmlChar *newtag) {	1445 htmlCheckImplied(htmlParserCtxtPtr ctxt, const xmlChar *newtag) {

	1446 int i;

	1447

	1448 if (ctxt->options & HTML_PARSE_NOIMPLIED)

	1449 return;

1306 if (!htmlOmittedDefaultValue)	1450 if (!htmlOmittedDefaultValue)

1307 return;	1451 return;

1308 if (xmlStrEqual(newtag, BAD_CAST"html"))	1452 if (xmlStrEqual(newtag, BAD_CAST"html"))

1309 return;	1453 return;

1310 if (ctxt->nameNr <= 0) {	1454 if (ctxt->nameNr <= 0) {

1311 htmlnamePush(ctxt, BAD_CAST"html");	1455 htmlnamePush(ctxt, BAD_CAST"html");

1312 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))	1456 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))

1313 ctxt->sax->startElement(ctxt->userData, BAD_CAST"html", NULL);	1457 ctxt->sax->startElement(ctxt->userData, BAD_CAST"html", NULL);

1314 }	1458 }

1315 if ((xmlStrEqual(newtag, BAD_CAST"body")) \|\| (xmlStrEqual(newtag, BAD_CAST"h ead")))	1459 if ((xmlStrEqual(newtag, BAD_CAST"body")) \|\| (xmlStrEqual(newtag, BAD_CAST"h ead")))

1316 return;	1460 return;

1317 if ((ctxt->nameNr <= 1) &&	1461 if ((ctxt->nameNr <= 1) &&

1318 ((xmlStrEqual(newtag, BAD_CAST"script")) \|\|	1462 ((xmlStrEqual(newtag, BAD_CAST"script")) \|\|

1319 (xmlStrEqual(newtag, BAD_CAST"style")) \|\|	1463 (xmlStrEqual(newtag, BAD_CAST"style")) \|\|

1320 (xmlStrEqual(newtag, BAD_CAST"meta")) \|\|	1464 (xmlStrEqual(newtag, BAD_CAST"meta")) \|\|

1321 (xmlStrEqual(newtag, BAD_CAST"link")) \|\|	1465 (xmlStrEqual(newtag, BAD_CAST"link")) \|\|

1322 (xmlStrEqual(newtag, BAD_CAST"title")) \|\|	1466 (xmlStrEqual(newtag, BAD_CAST"title")) \|\|

1323 (xmlStrEqual(newtag, BAD_CAST"base")))) {	1467 (xmlStrEqual(newtag, BAD_CAST"base")))) {

1324 » /*	1468 if (ctxt->html >= 3) {

1325 » * dropped OBJECT ... i you put it first BODY will be	1469 /* we already saw or generated an <head> before */

1326 » * assumed !	1470 return;

1327 » */	1471 }

1328 » htmlnamePush(ctxt, BAD_CAST"head");	1472 /*

1329 » if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))	1473 * dropped OBJECT ... i you put it first BODY will be

1330 » » ctxt->sax->startElement(ctxt->userData, BAD_CAST"head", NULL);	1474 * assumed !

	1475 */

	1476 htmlnamePush(ctxt, BAD_CAST"head");

	1477 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))

	1478 ctxt->sax->startElement(ctxt->userData, BAD_CAST"head", NULL);

1331 } else if ((!xmlStrEqual(newtag, BAD_CAST"noframes")) &&	1479 } else if ((!xmlStrEqual(newtag, BAD_CAST"noframes")) &&

1332 (!xmlStrEqual(newtag, BAD_CAST"frame")) &&	1480 (!xmlStrEqual(newtag, BAD_CAST"frame")) &&

1333 (!xmlStrEqual(newtag, BAD_CAST"frameset"))) {	1481 (!xmlStrEqual(newtag, BAD_CAST"frameset"))) {

1334 » int i;	1482 if (ctxt->html >= 10) {

	1483 /* we already saw or generated a <body> before */

	1484 return;

	1485 }

1335 for (i = 0;i < ctxt->nameNr;i++) {	1486 for (i = 0;i < ctxt->nameNr;i++) {

1336 if (xmlStrEqual(ctxt->nameTab[i], BAD_CAST"body")) {	1487 if (xmlStrEqual(ctxt->nameTab[i], BAD_CAST"body")) {

1337 return;	1488 return;

1338 }	1489 }

1339 if (xmlStrEqual(ctxt->nameTab[i], BAD_CAST"head")) {	1490 if (xmlStrEqual(ctxt->nameTab[i], BAD_CAST"head")) {

1340 return;	1491 return;

1341 }	1492 }

1342 }	1493 }

1343 »	1494

1344 htmlnamePush(ctxt, BAD_CAST"body");	1495 htmlnamePush(ctxt, BAD_CAST"body");

1345 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))	1496 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))

1346 ctxt->sax->startElement(ctxt->userData, BAD_CAST"body", NULL);	1497 ctxt->sax->startElement(ctxt->userData, BAD_CAST"body", NULL);

1347 }	1498 }

1348 }	1499 }

1349	1500

1350 /**	1501 /**

1351 * htmlCheckParagraph	1502 * htmlCheckParagraph

1352 * @ctxt: an HTML parser context	1503 * @ctxt: an HTML parser context

1353 *	1504 *

(...skipping 41 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1395 *	1546 *

1396 * Check if an attribute is of content type Script	1547 * Check if an attribute is of content type Script

1397 *	1548 *

1398 * Returns 1 is the attribute is a script 0 otherwise	1549 * Returns 1 is the attribute is a script 0 otherwise

1399 */	1550 */

1400 int	1551 int

1401 htmlIsScriptAttribute(const xmlChar *name) {	1552 htmlIsScriptAttribute(const xmlChar *name) {

1402 unsigned int i;	1553 unsigned int i;

1403	1554

1404 if (name == NULL)	1555 if (name == NULL)

1405 »return(0);	1556 return(0);

1406 /*	1557 /*

1407 * all script attributes start with 'on'	1558 * all script attributes start with 'on'

1408 */	1559 */

1409 if ((name[0] != 'o') \|\| (name[1] != 'n'))	1560 if ((name[0] != 'o') \|\| (name[1] != 'n'))

1410 »return(0);	1561 return(0);

1411 for (i = 0;	1562 for (i = 0;

1412 i < sizeof(htmlScriptAttributes)/sizeof(htmlScriptAttributes[0]);	1563 i < sizeof(htmlScriptAttributes)/sizeof(htmlScriptAttributes[0]);

1413 i++) {	1564 i++) {

1414 if (xmlStrEqual(name, (const xmlChar *) htmlScriptAttributes[i]))	1565 if (xmlStrEqual(name, (const xmlChar *) htmlScriptAttributes[i]))

1415 return(1);	1566 return(1);

1416 }	1567 }

1417 return(0);	1568 return(0);

1418 }	1569 }

1419	1570

1420 /************************************************************************	1571 /************************************************************************

1421 * *	1572 * *

1422 * » » The list of HTML predefined entities» » » *	1573 » The list of HTML predefined entities» » »

1423 * *	1574 * *

1424 ************************************************************************/	1575 ************************************************************************/

1425	1576

1426	1577

1427 static const htmlEntityDesc html40EntitiesTable[] = {	1578 static const htmlEntityDesc html40EntitiesTable[] = {

1428 /*	1579 /*

1429 * the 4 absolute ones, plus apostrophe.	1580 * the 4 absolute ones, plus apostrophe.

1430 */	1581 */

1431 { 34, "quot", "quotation mark = APL quote, U+0022 ISOnum" },	1582 { 34, "quot", "quotation mark = APL quote, U+0022 ISOnum" },

1432 { 38, "amp", "ampersand, U+0026 ISOnum" },	1583 { 38, "amp", "ampersand, U+0026 ISOnum" },

(...skipping 393 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1826 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }	1977 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }

1827 else {	1978 else {

1828 /* no chance for this in Ascii */	1979 /* no chance for this in Ascii */

1829 *outlen = out - outstart;	1980 *outlen = out - outstart;

1830 *inlen = processed - instart;	1981 *inlen = processed - instart;

1831 return(-2);	1982 return(-2);

1832 }	1983 }

1833	1984

1834 if (inend - in < trailing) {	1985 if (inend - in < trailing) {

1835 break;	1986 break;

1836 » }	1987 » }

1837	1988

1838 for ( ; trailing; trailing--) {	1989 for ( ; trailing; trailing--) {

1839 if ((in >= inend) \|\| (((d= *in++) & 0xC0) != 0x80))	1990 if ((in >= inend) \|\| (((d= *in++) & 0xC0) != 0x80))

1840 break;	1991 break;

1841 c <<= 6;	1992 c <<= 6;

1842 c \|= d & 0x3F;	1993 c \|= d & 0x3F;

1843 }	1994 }

1844	1995

1845 /* assertion: c is a single UTF-4 value */	1996 /* assertion: c is a single UTF-4 value */

1846 if (c < 0x80) {	1997 if (c < 0x80) {

(...skipping 169 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2016 return(input);	2167 return(input);

2017 }	2168 }

2018	2169

2019	2170

2020 /************************************************************************	2171 /************************************************************************

2021 * *	2172 * *

2022 * Commodity functions, cleanup needed ? *	2173 * Commodity functions, cleanup needed ? *

2023 * *	2174 * *

2024 ************************************************************************/	2175 ************************************************************************/

2025 /*	2176 /*

2026 * all tags allowing pc data from the html 4.01 loose dtd	2177 * all tags allowing pc data from the html 4.01 loose dtd

2027 * NOTE: it might be more apropriate to integrate this information	2178 * NOTE: it might be more apropriate to integrate this information

2028 * into the html40ElementTable array but I don't want to risk any	2179 * into the html40ElementTable array but I don't want to risk any

2029 * binary incomptibility	2180 * binary incomptibility

2030 */	2181 */

2031 static const char *allowPCData[] = {	2182 static const char *allowPCData[] = {

2032 "a", "abbr", "acronym", "address", "applet", "b", "bdo", "big",	2183 "a", "abbr", "acronym", "address", "applet", "b", "bdo", "big",

2033 "blockquote", "body", "button", "caption", "center", "cite", "code",	2184 "blockquote", "body", "button", "caption", "center", "cite", "code",

2034 "dd", "del", "dfn", "div", "dt", "em", "font", "form", "h1", "h2",	2185 "dd", "del", "dfn", "div", "dt", "em", "font", "form", "h1", "h2",

2035 "h3", "h4", "h5", "h6", "i", "iframe", "ins", "kbd", "label", "legend",	2186 "h3", "h4", "h5", "h6", "i", "iframe", "ins", "kbd", "label", "legend",

2036 "li", "noframes", "noscript", "object", "p", "pre", "q", "s", "samp",	2187 "li", "noframes", "noscript", "object", "p", "pre", "q", "s", "samp",

(...skipping 39 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2076 }	2227 }

2077 }	2228 }

2078	2229

2079 if (ctxt->node == NULL) return(0);	2230 if (ctxt->node == NULL) return(0);

2080 lastChild = xmlGetLastChild(ctxt->node);	2231 lastChild = xmlGetLastChild(ctxt->node);

2081 while ((lastChild) && (lastChild->type == XML_COMMENT_NODE))	2232 while ((lastChild) && (lastChild->type == XML_COMMENT_NODE))

2082 lastChild = lastChild->prev;	2233 lastChild = lastChild->prev;

2083 if (lastChild == NULL) {	2234 if (lastChild == NULL) {

2084 if ((ctxt->node->type != XML_ELEMENT_NODE) &&	2235 if ((ctxt->node->type != XML_ELEMENT_NODE) &&

2085 (ctxt->node->content != NULL)) return(0);	2236 (ctxt->node->content != NULL)) return(0);

2086 » /* keep ws in constructs like ...<b> </b>...	2237 » /* keep ws in constructs like ...<b> </b>...

2087 for all tags "b" allowing PCDATA */	2238 for all tags "b" allowing PCDATA */

2088 for ( i = 0; i < sizeof(allowPCData)/sizeof(allowPCData[0]); i++ ) {	2239 for ( i = 0; i < sizeof(allowPCData)/sizeof(allowPCData[0]); i++ ) {

2089 if ( xmlStrEqual(ctxt->name, BAD_CAST allowPCData[i]) ) {	2240 if ( xmlStrEqual(ctxt->name, BAD_CAST allowPCData[i]) ) {

2090 return(0);	2241 return(0);

2091 }	2242 }

2092 }	2243 }

2093 } else if (xmlNodeIsText(lastChild)) {	2244 } else if (xmlNodeIsText(lastChild)) {

2094 return(0);	2245 return(0);

2095 } else {	2246 } else {

2096 » /* keep ws in constructs like <p><b>xy</b> <i>z</i><p>	2247 » /* keep ws in constructs like <p><b>xy</b> <i>z</i><p>

2097 for all tags "p" allowing PCDATA */	2248 for all tags "p" allowing PCDATA */

2098 for ( i = 0; i < sizeof(allowPCData)/sizeof(allowPCData[0]); i++ ) {	2249 for ( i = 0; i < sizeof(allowPCData)/sizeof(allowPCData[0]); i++ ) {

2099 if ( xmlStrEqual(lastChild->name, BAD_CAST allowPCData[i]) ) {	2250 if ( xmlStrEqual(lastChild->name, BAD_CAST allowPCData[i]) ) {

2100 return(0);	2251 return(0);

2101 }	2252 }

2102 }	2253 }

2103 }	2254 }

2104 return(1);	2255 return(1);

2105 }	2256 }

2106	2257

(...skipping 19 matching lines...) Expand all Loading...
2126 htmlErrMemory(NULL, "HTML document creation failed\n");	2277 htmlErrMemory(NULL, "HTML document creation failed\n");

2127 return(NULL);	2278 return(NULL);

2128 }	2279 }

2129 memset(cur, 0, sizeof(xmlDoc));	2280 memset(cur, 0, sizeof(xmlDoc));

2130	2281

2131 cur->type = XML_HTML_DOCUMENT_NODE;	2282 cur->type = XML_HTML_DOCUMENT_NODE;

2132 cur->version = NULL;	2283 cur->version = NULL;

2133 cur->intSubset = NULL;	2284 cur->intSubset = NULL;

2134 cur->doc = cur;	2285 cur->doc = cur;

2135 cur->name = NULL;	2286 cur->name = NULL;

2136 cur->children = NULL;	2287 cur->children = NULL;

2137 cur->extSubset = NULL;	2288 cur->extSubset = NULL;

2138 cur->oldNs = NULL;	2289 cur->oldNs = NULL;

2139 cur->encoding = NULL;	2290 cur->encoding = NULL;

2140 cur->standalone = 1;	2291 cur->standalone = 1;

2141 cur->compression = 0;	2292 cur->compression = 0;

2142 cur->ids = NULL;	2293 cur->ids = NULL;

2143 cur->refs = NULL;	2294 cur->refs = NULL;

2144 cur->_private = NULL;	2295 cur->_private = NULL;

2145 cur->charset = XML_CHAR_ENCODING_UTF8;	2296 cur->charset = XML_CHAR_ENCODING_UTF8;

	2297 cur->properties = XML_DOC_HTML \| XML_DOC_USERBUILT;

2146 if ((ExternalID != NULL) \|\|	2298 if ((ExternalID != NULL) \|\|

2147 (URI != NULL))	2299 (URI != NULL))

2148 xmlCreateIntSubset(cur, BAD_CAST "html", ExternalID, URI);	2300 xmlCreateIntSubset(cur, BAD_CAST "html", ExternalID, URI);

2149 return(cur);	2301 return(cur);

2150 }	2302 }

2151	2303

2152 /**	2304 /**

2153 * htmlNewDoc:	2305 * htmlNewDoc:

2154 * @URI: URI for the dtd, or NULL	2306 * @URI: URI for the dtd, or NULL

2155 * @ExternalID: the external ID of the DTD, or NULL	2307 * @ExternalID: the external ID of the DTD, or NULL

(...skipping 37 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2193 *	2345 *

2194 * Returns the Tag Name parsed or NULL	2346 * Returns the Tag Name parsed or NULL

2195 */	2347 */

2196	2348

2197 static const xmlChar *	2349 static const xmlChar *

2198 htmlParseHTMLName(htmlParserCtxtPtr ctxt) {	2350 htmlParseHTMLName(htmlParserCtxtPtr ctxt) {

2199 int i = 0;	2351 int i = 0;

2200 xmlChar loc[HTML_PARSER_BUFFER_SIZE];	2352 xmlChar loc[HTML_PARSER_BUFFER_SIZE];

2201	2353

2202 if (!IS_ASCII_LETTER(CUR) && (CUR != '_') &&	2354 if (!IS_ASCII_LETTER(CUR) && (CUR != '_') &&

2203 (CUR != ':')) return(NULL);	2355 (CUR != ':') && (CUR != '.')) return(NULL);

2204	2356

2205 while ((i < HTML_PARSER_BUFFER_SIZE) &&	2357 while ((i < HTML_PARSER_BUFFER_SIZE) &&

2206 ((IS_ASCII_LETTER(CUR)) \|\| (IS_ASCII_DIGIT(CUR)) \|\|	2358 ((IS_ASCII_LETTER(CUR)) \|\| (IS_ASCII_DIGIT(CUR)) \|\|

2207 » (CUR == ':') \|\| (CUR == '-') \|\| (CUR == '_'))) {	2359 » (CUR == ':') \|\| (CUR == '-') \|\| (CUR == '_') \|\|

	2360 (CUR == '.'))) {

2208 if ((CUR >= 'A') && (CUR <= 'Z')) loc[i] = CUR + 0x20;	2361 if ((CUR >= 'A') && (CUR <= 'Z')) loc[i] = CUR + 0x20;

2209 else loc[i] = CUR;	2362 else loc[i] = CUR;

2210 i++;	2363 i++;

2211 »	2364

2212 NEXT;	2365 NEXT;

2213 }	2366 }

2214	2367

2215 return(xmlDictLookup(ctxt->dict, loc, i));	2368 return(xmlDictLookup(ctxt->dict, loc, i));

2216 }	2369 }

2217	2370

2218	2371

2219 /**	2372 /**

2220 * htmlParseHTMLName_nonInvasive:	2373 * htmlParseHTMLName_nonInvasive:

2221 * @ctxt: an HTML parser context	2374 * @ctxt: an HTML parser context

2222 *	2375 *

2223 * parse an HTML tag or attribute name, note that we convert it to lowercase	2376 * parse an HTML tag or attribute name, note that we convert it to lowercase

2224 * since HTML names are not case-sensitive, this doesn't consume the data	2377 * since HTML names are not case-sensitive, this doesn't consume the data

2225 * from the stream, it's a look-ahead	2378 * from the stream, it's a look-ahead

2226 *	2379 *

2227 * Returns the Tag Name parsed or NULL	2380 * Returns the Tag Name parsed or NULL

2228 */	2381 */

2229	2382

2230 static const xmlChar *	2383 static const xmlChar *

2231 htmlParseHTMLName_nonInvasive(htmlParserCtxtPtr ctxt) {	2384 htmlParseHTMLName_nonInvasive(htmlParserCtxtPtr ctxt) {

2232 int i = 0;	2385 int i = 0;

2233 xmlChar loc[HTML_PARSER_BUFFER_SIZE];	2386 xmlChar loc[HTML_PARSER_BUFFER_SIZE];

2234	2387

2235 if (!IS_ASCII_LETTER(NXT(1)) && (NXT(1) != '_') &&	2388 if (!IS_ASCII_LETTER(NXT(1)) && (NXT(1) != '_') &&

2236 (NXT(1) != ':')) return(NULL);	2389 (NXT(1) != ':')) return(NULL);

2237	2390

2238 while ((i < HTML_PARSER_BUFFER_SIZE) &&	2391 while ((i < HTML_PARSER_BUFFER_SIZE) &&

2239 ((IS_ASCII_LETTER(NXT(1+i))) \|\| (IS_ASCII_DIGIT(NXT(1+i))) \|\|	2392 ((IS_ASCII_LETTER(NXT(1+i))) \|\| (IS_ASCII_DIGIT(NXT(1+i))) \|\|

2240 (NXT(1+i) == ':') \|\| (NXT(1+i) == '-') \|\| (NXT(1+i) == '_'))) {	2393 (NXT(1+i) == ':') \|\| (NXT(1+i) == '-') \|\| (NXT(1+i) == '_'))) {

2241 if ((NXT(1+i) >= 'A') && (NXT(1+i) <= 'Z')) loc[i] = NXT(1+i) + 0x20;	2394 if ((NXT(1+i) >= 'A') && (NXT(1+i) <= 'Z')) loc[i] = NXT(1+i) + 0x20;

2242 else loc[i] = NXT(1+i);	2395 else loc[i] = NXT(1+i);

2243 i++;	2396 i++;

2244 }	2397 }

2245	2398

2246 return(xmlDictLookup(ctxt->dict, loc, i));	2399 return(xmlDictLookup(ctxt->dict, loc, i));

2247 }	2400 }

2248	2401

2249	2402

2250 /**	2403 /**

2251 * htmlParseName:	2404 * htmlParseName:

2252 * @ctxt: an HTML parser context	2405 * @ctxt: an HTML parser context

2253 *	2406 *

2254 * parse an HTML name, this routine is case sensitive.	2407 * parse an HTML name, this routine is case sensitive.

2255 *	2408 *

(...skipping 47 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2303 c = CUR_CHAR(l);	2456 c = CUR_CHAR(l);

2304 if ((c == ' ') \|\| (c == '>') \|\| (c == '/') \|\| /* accelerators */	2457 if ((c == ' ') \|\| (c == '>') \|\| (c == '/') \|\| /* accelerators */

2305 (!IS_LETTER(c) && (c != '_') &&	2458 (!IS_LETTER(c) && (c != '_') &&

2306 (c != ':'))) {	2459 (c != ':'))) {

2307 return(NULL);	2460 return(NULL);

2308 }	2461 }

2309	2462

2310 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */	2463 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */

2311 ((IS_LETTER(c)) \|\| (IS_DIGIT(c)) \|\|	2464 ((IS_LETTER(c)) \|\| (IS_DIGIT(c)) \|\|

2312 (c == '.') \|\| (c == '-') \|\|	2465 (c == '.') \|\| (c == '-') \|\|

2313 » (c == '_') \|\| (c == ':') \|\|	2466 » (c == '_') \|\| (c == ':') \|\|

2314 (IS_COMBINING(c)) \|\|	2467 (IS_COMBINING(c)) \|\|

2315 (IS_EXTENDER(c)))) {	2468 (IS_EXTENDER(c)))) {

2316 if (count++ > 100) {	2469 if (count++ > 100) {

2317 count = 0;	2470 count = 0;

2318 GROW;	2471 GROW;

2319 }	2472 }

2320 len += l;	2473 len += l;

2321 NEXTL(l);	2474 NEXTL(l);

2322 c = CUR_CHAR(l);	2475 c = CUR_CHAR(l);

2323 }	2476 }

2324 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));	2477 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));

2325 }	2478 }

2326	2479

2327	2480

2328 /**	2481 /**

2329 * htmlParseHTMLAttribute:	2482 * htmlParseHTMLAttribute:

2330 * @ctxt: an HTML parser context	2483 * @ctxt: an HTML parser context

2331 * @stop: a char stop value	2484 * @stop: a char stop value

2332 *	2485 *

2333 * parse an HTML attribute value till the stop (quote), if	2486 * parse an HTML attribute value till the stop (quote), if

2334 * stop is 0 then it stops at the first space	2487 * stop is 0 then it stops at the first space

2335 *	2488 *

2336 * Returns the attribute parsed or NULL	2489 * Returns the attribute parsed or NULL

2337 */	2490 */

2338	2491

2339 static xmlChar *	2492 static xmlChar *

2340 htmlParseHTMLAttribute(htmlParserCtxtPtr ctxt, const xmlChar stop) {	2493 htmlParseHTMLAttribute(htmlParserCtxtPtr ctxt, const xmlChar stop) {

2341 xmlChar *buffer = NULL;	2494 xmlChar *buffer = NULL;

2342 int buffer_size = 0;	2495 int buffer_size = 0;

(...skipping 24 matching lines...) Expand all Loading...
2367 unsigned int c;	2520 unsigned int c;

2368 int bits;	2521 int bits;

2369	2522

2370 c = htmlParseCharRef(ctxt);	2523 c = htmlParseCharRef(ctxt);

2371 if (c < 0x80)	2524 if (c < 0x80)

2372 { *out++ = c; bits= -6; }	2525 { *out++ = c; bits= -6; }

2373 else if (c < 0x800)	2526 else if (c < 0x800)

2374 { *out++ =((c >> 6) & 0x1F) \| 0xC0; bits= 0; }	2527 { *out++ =((c >> 6) & 0x1F) \| 0xC0; bits= 0; }

2375 else if (c < 0x10000)	2528 else if (c < 0x10000)

2376 { *out++ =((c >> 12) & 0x0F) \| 0xE0; bits= 6; }	2529 { *out++ =((c >> 12) & 0x0F) \| 0xE0; bits= 6; }

2377 » » else	2530 » » else

2378 { *out++ =((c >> 18) & 0x07) \| 0xF0; bits= 12; }	2531 { *out++ =((c >> 18) & 0x07) \| 0xF0; bits= 12; }

2379 »	2532

2380 for ( ; bits >= 0; bits-= 6) {	2533 for ( ; bits >= 0; bits-= 6) {

2381 *out++ = ((c >> bits) & 0x3F) \| 0x80;	2534 *out++ = ((c >> bits) & 0x3F) \| 0x80;

2382 }	2535 }

2383 » »	2536

2384 if (out - buffer > buffer_size - 100) {	2537 if (out - buffer > buffer_size - 100) {

2385 int indx = out - buffer;	2538 int indx = out - buffer;

2386	2539

2387 growBuffer(buffer);	2540 growBuffer(buffer);

2388 out = &buffer[indx];	2541 out = &buffer[indx];

2389 }	2542 }

2390 } else {	2543 } else {

2391 ent = htmlParseEntityRef(ctxt, &name);	2544 ent = htmlParseEntityRef(ctxt, &name);

2392 if (name == NULL) {	2545 if (name == NULL) {

2393 *out++ = '&';	2546 *out++ = '&';

(...skipping 25 matching lines...) Expand all Loading...
2419 growBuffer(buffer);	2572 growBuffer(buffer);

2420 out = &buffer[indx];	2573 out = &buffer[indx];

2421 }	2574 }

2422 c = ent->value;	2575 c = ent->value;

2423 if (c < 0x80)	2576 if (c < 0x80)

2424 { *out++ = c; bits= -6; }	2577 { *out++ = c; bits= -6; }

2425 else if (c < 0x800)	2578 else if (c < 0x800)

2426 { *out++ =((c >> 6) & 0x1F) \| 0xC0; bits= 0; }	2579 { *out++ =((c >> 6) & 0x1F) \| 0xC0; bits= 0; }

2427 else if (c < 0x10000)	2580 else if (c < 0x10000)

2428 { *out++ =((c >> 12) & 0x0F) \| 0xE0; bits= 6; }	2581 { *out++ =((c >> 12) & 0x0F) \| 0xE0; bits= 6; }

2429 » » else	2582 » » else

2430 { *out++ =((c >> 18) & 0x07) \| 0xF0; bits= 12; }	2583 { *out++ =((c >> 18) & 0x07) \| 0xF0; bits= 12; }

2431 »	2584

2432 for ( ; bits >= 0; bits-= 6) {	2585 for ( ; bits >= 0; bits-= 6) {

2433 *out++ = ((c >> bits) & 0x3F) \| 0x80;	2586 *out++ = ((c >> bits) & 0x3F) \| 0x80;

2434 }	2587 }

2435 }	2588 }

2436 }	2589 }

2437 } else {	2590 } else {

2438 unsigned int c;	2591 unsigned int c;

2439 int bits, l;	2592 int bits, l;

2440	2593

2441 if (out - buffer > buffer_size - 100) {	2594 if (out - buffer > buffer_size - 100) {

2442 int indx = out - buffer;	2595 int indx = out - buffer;

2443	2596

2444 growBuffer(buffer);	2597 growBuffer(buffer);

2445 out = &buffer[indx];	2598 out = &buffer[indx];

2446 }	2599 }

2447 c = CUR_CHAR(l);	2600 c = CUR_CHAR(l);

2448 if (c < 0x80)	2601 if (c < 0x80)

2449 { *out++ = c; bits= -6; }	2602 { *out++ = c; bits= -6; }

2450 else if (c < 0x800)	2603 else if (c < 0x800)

2451 { *out++ =((c >> 6) & 0x1F) \| 0xC0; bits= 0; }	2604 { *out++ =((c >> 6) & 0x1F) \| 0xC0; bits= 0; }

2452 else if (c < 0x10000)	2605 else if (c < 0x10000)

2453 { *out++ =((c >> 12) & 0x0F) \| 0xE0; bits= 6; }	2606 { *out++ =((c >> 12) & 0x0F) \| 0xE0; bits= 6; }

2454 » else	2607 » else

2455 { *out++ =((c >> 18) & 0x07) \| 0xF0; bits= 12; }	2608 { *out++ =((c >> 18) & 0x07) \| 0xF0; bits= 12; }

2456	2609

2457 for ( ; bits >= 0; bits-= 6) {	2610 for ( ; bits >= 0; bits-= 6) {

2458 *out++ = ((c >> bits) & 0x3F) \| 0x80;	2611 *out++ = ((c >> bits) & 0x3F) \| 0x80;

2459 }	2612 }

2460 NEXT;	2613 NEXT;

2461 }	2614 }

2462 }	2615 }

2463 *out++ = 0;	2616 *out = 0;

2464 return(buffer);	2617 return(buffer);

2465 }	2618 }

2466	2619

2467 /**	2620 /**

2468 * htmlParseEntityRef:	2621 * htmlParseEntityRef:

2469 * @ctxt: an HTML parser context	2622 * @ctxt: an HTML parser context

2470 * @str: location to store the entity name	2623 * @str: location to store the entity name

2471 *	2624 *

2472 * parse an HTML ENTITY references	2625 * parse an HTML ENTITY references

2473 *	2626 *

(...skipping 40 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2514 return(ent);	2667 return(ent);

2515 }	2668 }

2516	2669

2517 /**	2670 /**

2518 * htmlParseAttValue:	2671 * htmlParseAttValue:

2519 * @ctxt: an HTML parser context	2672 * @ctxt: an HTML parser context

2520 *	2673 *

2521 * parse a value for an attribute	2674 * parse a value for an attribute

2522 * Note: the parser won't do substitution of entities here, this	2675 * Note: the parser won't do substitution of entities here, this

2523 * will be handled later in xmlStringGetNodeList, unless it was	2676 * will be handled later in xmlStringGetNodeList, unless it was

2524 * asked for ctxt->replaceEntities != 0	2677 * asked for ctxt->replaceEntities != 0

2525 *	2678 *

2526 * Returns the AttValue parsed or NULL.	2679 * Returns the AttValue parsed or NULL.

2527 */	2680 */

2528	2681

2529 static xmlChar *	2682 static xmlChar *

2530 htmlParseAttValue(htmlParserCtxtPtr ctxt) {	2683 htmlParseAttValue(htmlParserCtxtPtr ctxt) {

2531 xmlChar *ret = NULL;	2684 xmlChar *ret = NULL;

2532	2685

2533 if (CUR == '"') {	2686 if (CUR == '"') {

2534 NEXT;	2687 NEXT;

(...skipping 20 matching lines...) Expand all Loading...
2555 htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,	2708 htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,

2556 "AttValue: no value found\n", NULL, NULL);	2709 "AttValue: no value found\n", NULL, NULL);

2557 }	2710 }

2558 }	2711 }

2559 return(ret);	2712 return(ret);

2560 }	2713 }

2561	2714

2562 /**	2715 /**

2563 * htmlParseSystemLiteral:	2716 * htmlParseSystemLiteral:

2564 * @ctxt: an HTML parser context	2717 * @ctxt: an HTML parser context

2565 *	2718 *

2566 * parse an HTML Literal	2719 * parse an HTML Literal

2567 *	2720 *

2568 * [11] SystemLiteral ::= ('"' [^"]* '"') \| ("'" [^']* "'")	2721 * [11] SystemLiteral ::= ('"' [^"]* '"') \| ("'" [^']* "'")

2569 *	2722 *

2570 * Returns the SystemLiteral parsed or NULL	2723 * Returns the SystemLiteral parsed or NULL

2571 */	2724 */

2572	2725

2573 static xmlChar *	2726 static xmlChar *

2574 htmlParseSystemLiteral(htmlParserCtxtPtr ctxt) {	2727 htmlParseSystemLiteral(htmlParserCtxtPtr ctxt) {

2575 const xmlChar *q;	2728 const xmlChar *q;

(...skipping 20 matching lines...) Expand all Loading...
2596 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,	2749 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,

2597 "Unfinished SystemLiteral\n", NULL, NULL);	2750 "Unfinished SystemLiteral\n", NULL, NULL);

2598 } else {	2751 } else {

2599 ret = xmlStrndup(q, CUR_PTR - q);	2752 ret = xmlStrndup(q, CUR_PTR - q);

2600 NEXT;	2753 NEXT;

2601 }	2754 }

2602 } else {	2755 } else {

2603 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED,	2756 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED,

2604 " or ' expected\n", NULL, NULL);	2757 " or ' expected\n", NULL, NULL);

2605 }	2758 }

2606	2759

2607 return(ret);	2760 return(ret);

2608 }	2761 }

2609	2762

2610 /**	2763 /**

2611 * htmlParsePubidLiteral:	2764 * htmlParsePubidLiteral:

2612 * @ctxt: an HTML parser context	2765 * @ctxt: an HTML parser context

2613 *	2766 *

2614 * parse an HTML public literal	2767 * parse an HTML public literal

2615 *	2768 *

2616 * [12] PubidLiteral ::= '"' PubidChar* '"' \| "'" (PubidChar - "'")* "'"	2769 * [12] PubidLiteral ::= '"' PubidChar* '"' \| "'" (PubidChar - "'")* "'"

(...skipping 28 matching lines...) Expand all Loading...
2645 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,	2798 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,

2646 "Unfinished PubidLiteral\n", NULL, NULL);	2799 "Unfinished PubidLiteral\n", NULL, NULL);

2647 } else {	2800 } else {

2648 ret = xmlStrndup(q, CUR_PTR - q);	2801 ret = xmlStrndup(q, CUR_PTR - q);

2649 NEXT;	2802 NEXT;

2650 }	2803 }

2651 } else {	2804 } else {

2652 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED,	2805 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED,

2653 "PubidLiteral \" or ' expected\n", NULL, NULL);	2806 "PubidLiteral \" or ' expected\n", NULL, NULL);

2654 }	2807 }

2655	2808

2656 return(ret);	2809 return(ret);

2657 }	2810 }

2658	2811

2659 /**	2812 /**

2660 * htmlParseScript:	2813 * htmlParseScript:

2661 * @ctxt: an HTML parser context	2814 * @ctxt: an HTML parser context

2662 *	2815 *

2663 * parse the content of an HTML SCRIPT or STYLE element	2816 * parse the content of an HTML SCRIPT or STYLE element

2664 * http://www.w3.org/TR/html4/sgml/dtd.html#Script	2817 * http://www.w3.org/TR/html4/sgml/dtd.html#Script

2665 * http://www.w3.org/TR/html4/sgml/dtd.html#StyleSheet	2818 * http://www.w3.org/TR/html4/sgml/dtd.html#StyleSheet

(...skipping 26 matching lines...) Expand all Loading...
2692 * Authors should therefore escape "</" within the content.	2845 * Authors should therefore escape "</" within the content.

2693 * Escape mechanisms are specific to each scripting or	2846 * Escape mechanisms are specific to each scripting or

2694 * style sheet language.	2847 * style sheet language.

2695 *	2848 *

2696 * In recovery mode, only break if end tag match the	2849 * In recovery mode, only break if end tag match the

2697 * current tag, effectively ignoring all tags inside the	2850 * current tag, effectively ignoring all tags inside the

2698 * script/style block and treating the entire block as	2851 * script/style block and treating the entire block as

2699 * CDATA.	2852 * CDATA.

2700 */	2853 */

2701 if (ctxt->recovery) {	2854 if (ctxt->recovery) {

2702 if (xmlStrncasecmp(ctxt->name, ctxt->input->cur+2,	2855 if (xmlStrncasecmp(ctxt->name, ctxt->input->cur+2,

2703 » » » » xmlStrlen(ctxt->name)) == 0)	2856 » » » » xmlStrlen(ctxt->name)) == 0)

2704 {	2857 {

2705 break; /* while */	2858 break; /* while */

2706 } else {	2859 } else {

2707 htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH,	2860 htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH,

2708 "Element %s embeds close tag\n",	2861 "Element %s embeds close tag\n",

2709 ctxt->name, NULL);	2862 ctxt->name, NULL);

2710 }	2863 }

2711 } else {	2864 } else {

2712 if (((NXT(2) >= 'A') && (NXT(2) <= 'Z')) \|\|	2865 if (((NXT(2) >= 'A') && (NXT(2) <= 'Z')) \|\|

2713 ((NXT(2) >= 'a') && (NXT(2) <= 'z')))	2866 ((NXT(2) >= 'a') && (NXT(2) <= 'z')))

2714 {	2867 {

2715 break; /* while */	2868 break; /* while */

2716 }	2869 }

2717 }	2870 }

2718 }	2871 }

2719 COPY_BUF(l,buf,nbchar,cur);	2872 COPY_BUF(l,buf,nbchar,cur);

2720 if (nbchar >= HTML_PARSER_BIG_BUFFER_SIZE) {	2873 if (nbchar >= HTML_PARSER_BIG_BUFFER_SIZE) {

2721 if (ctxt->sax->cdataBlock!= NULL) {	2874 if (ctxt->sax->cdataBlock!= NULL) {

2722 /*	2875 /*

2723 * Insert as CDATA, which is the same as HTML_PRESERVE_NODE	2876 * Insert as CDATA, which is the same as HTML_PRESERVE_NODE

(...skipping 36 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2760 * if we are within a CDATA section ']]>' marks an end of section.	2913 * if we are within a CDATA section ']]>' marks an end of section.

2761 *	2914 *

2762 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)	2915 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)

2763 */	2916 */

2764	2917

2765 static void	2918 static void

2766 htmlParseCharData(htmlParserCtxtPtr ctxt) {	2919 htmlParseCharData(htmlParserCtxtPtr ctxt) {

2767 xmlChar buf[HTML_PARSER_BIG_BUFFER_SIZE + 5];	2920 xmlChar buf[HTML_PARSER_BIG_BUFFER_SIZE + 5];

2768 int nbchar = 0;	2921 int nbchar = 0;

2769 int cur, l;	2922 int cur, l;

	2923 int chunk = 0;

2770	2924

2771 SHRINK;	2925 SHRINK;

2772 cur = CUR_CHAR(l);	2926 cur = CUR_CHAR(l);

2773 while (((cur != '<') \|\| (ctxt->token == '<')) &&	2927 while (((cur != '<') \|\| (ctxt->token == '<')) &&

2774 ((cur != '&') \|\| (ctxt->token == '&')) &&	2928 ((cur != '&') \|\| (ctxt->token == '&')) &&

2775 (cur != 0)) {	2929 (cur != 0)) {

2776 if (!(IS_CHAR(cur))) {	2930 if (!(IS_CHAR(cur))) {

2777 htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,	2931 htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,

2778 "Invalid char in CDATA 0x%X\n", cur);	2932 "Invalid char in CDATA 0x%X\n", cur);

2779 } else {	2933 } else {

2780 COPY_BUF(l,buf,nbchar,cur);	2934 COPY_BUF(l,buf,nbchar,cur);

2781 }	2935 }

2782 if (nbchar >= HTML_PARSER_BIG_BUFFER_SIZE) {	2936 if (nbchar >= HTML_PARSER_BIG_BUFFER_SIZE) {

2783 /*	2937 /*

2784 * Ok the segment is to be consumed as chars.	2938 * Ok the segment is to be consumed as chars.

2785 */	2939 */

2786 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {	2940 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {

2787 if (areBlanks(ctxt, buf, nbchar)) {	2941 if (areBlanks(ctxt, buf, nbchar)) {

2788 if (ctxt->sax->ignorableWhitespace != NULL)	2942 if (ctxt->sax->ignorableWhitespace != NULL)

2789 ctxt->sax->ignorableWhitespace(ctxt->userData,	2943 ctxt->sax->ignorableWhitespace(ctxt->userData,

2790 buf, nbchar);	2944 buf, nbchar);

2791 } else {	2945 } else {

2792 htmlCheckParagraph(ctxt);	2946 htmlCheckParagraph(ctxt);

2793 if (ctxt->sax->characters != NULL)	2947 if (ctxt->sax->characters != NULL)

2794 ctxt->sax->characters(ctxt->userData, buf, nbchar);	2948 ctxt->sax->characters(ctxt->userData, buf, nbchar);

2795 }	2949 }

2796 }	2950 }

2797 nbchar = 0;	2951 nbchar = 0;

2798 }	2952 }

2799 NEXTL(l);	2953 NEXTL(l);

	2954 chunk++;

	2955 if (chunk > HTML_PARSER_BUFFER_SIZE) {

	2956 chunk = 0;

	2957 SHRINK;

	2958 GROW;

	2959 }

2800 cur = CUR_CHAR(l);	2960 cur = CUR_CHAR(l);

2801 if (cur == 0) {	2961 if (cur == 0) {

2802 SHRINK;	2962 SHRINK;

2803 GROW;	2963 GROW;

2804 cur = CUR_CHAR(l);	2964 cur = CUR_CHAR(l);

2805 }	2965 }

2806 }	2966 }

2807 if (nbchar != 0) {	2967 if (nbchar != 0) {

2808 buf[nbchar] = 0;	2968 buf[nbchar] = 0;

2809	2969

(...skipping 174 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2984 /*	3144 /*

2985 * SAX: PI detected.	3145 * SAX: PI detected.

2986 */	3146 */

2987 if ((ctxt->sax) && (!ctxt->disableSAX) &&	3147 if ((ctxt->sax) && (!ctxt->disableSAX) &&

2988 (ctxt->sax->processingInstruction != NULL))	3148 (ctxt->sax->processingInstruction != NULL))

2989 ctxt->sax->processingInstruction(ctxt->userData,	3149 ctxt->sax->processingInstruction(ctxt->userData,

2990 target, buf);	3150 target, buf);

2991 }	3151 }

2992 xmlFree(buf);	3152 xmlFree(buf);

2993 } else {	3153 } else {

2994 » htmlParseErr(ctxt, XML_ERR_PI_NOT_STARTED,	3154 » htmlParseErr(ctxt, XML_ERR_PI_NOT_STARTED,

2995 "PI is not started correctly", NULL, NULL);	3155 "PI is not started correctly", NULL, NULL);

2996 }	3156 }

2997 ctxt->instate = state;	3157 ctxt->instate = state;

2998 }	3158 }

2999 }	3159 }

3000	3160

3001 /**	3161 /**

3002 * htmlParseComment:	3162 * htmlParseComment:

3003 * @ctxt: an HTML parser context	3163 * @ctxt: an HTML parser context

3004 *	3164 *

(...skipping 95 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3100 if ((ctxt == NULL) \|\| (ctxt->input == NULL)) {	3260 if ((ctxt == NULL) \|\| (ctxt->input == NULL)) {

3101 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,	3261 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,

3102 "htmlParseCharRef: context error\n",	3262 "htmlParseCharRef: context error\n",

3103 NULL, NULL);	3263 NULL, NULL);

3104 return(0);	3264 return(0);

3105 }	3265 }

3106 if ((CUR == '&') && (NXT(1) == '#') &&	3266 if ((CUR == '&') && (NXT(1) == '#') &&

3107 ((NXT(2) == 'x') \|\| NXT(2) == 'X')) {	3267 ((NXT(2) == 'x') \|\| NXT(2) == 'X')) {

3108 SKIP(3);	3268 SKIP(3);

3109 while (CUR != ';') {	3269 while (CUR != ';') {

3110 » if ((CUR >= '0') && (CUR <= '9'))	3270 » if ((CUR >= '0') && (CUR <= '9'))

3111 val = val * 16 + (CUR - '0');	3271 val = val * 16 + (CUR - '0');

3112 else if ((CUR >= 'a') && (CUR <= 'f'))	3272 else if ((CUR >= 'a') && (CUR <= 'f'))

3113 val = val * 16 + (CUR - 'a') + 10;	3273 val = val * 16 + (CUR - 'a') + 10;

3114 else if ((CUR >= 'A') && (CUR <= 'F'))	3274 else if ((CUR >= 'A') && (CUR <= 'F'))

3115 val = val * 16 + (CUR - 'A') + 10;	3275 val = val * 16 + (CUR - 'A') + 10;

3116 else {	3276 else {

3117 htmlParseErr(ctxt, XML_ERR_INVALID_HEX_CHARREF,	3277 htmlParseErr(ctxt, XML_ERR_INVALID_HEX_CHARREF,

3118 "htmlParseCharRef: missing semicolumn\n",	3278 "htmlParseCharRef: missing semicolumn\n",

3119 NULL, NULL);	3279 NULL, NULL);

3120 break;	3280 break;

3121 }	3281 }

3122 NEXT;	3282 NEXT;

3123 }	3283 }

3124 if (CUR == ';')	3284 if (CUR == ';')

3125 NEXT;	3285 NEXT;

3126 } else if ((CUR == '&') && (NXT(1) == '#')) {	3286 } else if ((CUR == '&') && (NXT(1) == '#')) {

3127 SKIP(2);	3287 SKIP(2);

3128 while (CUR != ';') {	3288 while (CUR != ';') {

3129 » if ((CUR >= '0') && (CUR <= '9'))	3289 » if ((CUR >= '0') && (CUR <= '9'))

3130 val = val * 10 + (CUR - '0');	3290 val = val * 10 + (CUR - '0');

3131 else {	3291 else {

3132 htmlParseErr(ctxt, XML_ERR_INVALID_DEC_CHARREF,	3292 htmlParseErr(ctxt, XML_ERR_INVALID_DEC_CHARREF,

3133 "htmlParseCharRef: missing semicolumn\n",	3293 "htmlParseCharRef: missing semicolumn\n",

3134 NULL, NULL);	3294 NULL, NULL);

3135 break;	3295 break;

3136 }	3296 }

3137 NEXT;	3297 NEXT;

3138 }	3298 }

3139 if (CUR == ';')	3299 if (CUR == ';')

(...skipping 15 matching lines...) Expand all Loading...
3155 return(0);	3315 return(0);

3156 }	3316 }

3157	3317

3158	3318

3159 /**	3319 /**

3160 * htmlParseDocTypeDecl:	3320 * htmlParseDocTypeDecl:

3161 * @ctxt: an HTML parser context	3321 * @ctxt: an HTML parser context

3162 *	3322 *

3163 * parse a DOCTYPE declaration	3323 * parse a DOCTYPE declaration

3164 *	3324 *

3165 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?	3325 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?

3166 * ('[' (markupdecl \| PEReference \| S)* ']' S?)? '>'	3326 * ('[' (markupdecl \| PEReference \| S)* ']' S?)? '>'

3167 */	3327 */

3168	3328

3169 static void	3329 static void

3170 htmlParseDocTypeDecl(htmlParserCtxtPtr ctxt) {	3330 htmlParseDocTypeDecl(htmlParserCtxtPtr ctxt) {

3171 const xmlChar *name;	3331 const xmlChar *name;

3172 xmlChar *ExternalID = NULL;	3332 xmlChar *ExternalID = NULL;

3173 xmlChar *URI = NULL;	3333 xmlChar *URI = NULL;

3174	3334

3175 /*	3335 /*

(...skipping 83 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3259 }	3419 }

3260	3420

3261 /*	3421 /*

3262 * read the value	3422 * read the value

3263 */	3423 */

3264 SKIP_BLANKS;	3424 SKIP_BLANKS;

3265 if (CUR == '=') {	3425 if (CUR == '=') {

3266 NEXT;	3426 NEXT;

3267 SKIP_BLANKS;	3427 SKIP_BLANKS;

3268 val = htmlParseAttValue(ctxt);	3428 val = htmlParseAttValue(ctxt);

3269 } else if (htmlIsBooleanAttr(name)) {

3270 /*

3271 * assume a minimized attribute

3272 */

3273 val = xmlStrdup(name);

3274 }	3429 }

3275	3430

3276 *value = val;	3431 *value = val;

3277 return(name);	3432 return(name);

3278 }	3433 }

3279	3434

3280 /**	3435 /**

3281 * htmlCheckEncoding:	3436 * htmlCheckEncoding:

3282 * @ctxt: an HTML parser context	3437 * @ctxt: an HTML parser context

3283 * @attvalue: the attribute value	3438 * @attvalue: the attribute value

3284 *	3439 *

3285 * Checks an http-equiv attribute from a Meta tag to detect	3440 * Checks an http-equiv attribute from a Meta tag to detect

3286 * the encoding	3441 * the encoding

3287 * If a new encoding is detected the parser is switched to decode	3442 * If a new encoding is detected the parser is switched to decode

3288 * it and pass UTF8	3443 * it and pass UTF8

3289 */	3444 */

3290 static void	3445 static void

3291 htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) {	3446 htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) {

3292 const xmlChar *encoding;	3447 const xmlChar *encoding;

3293	3448

3294 if ((ctxt == NULL) \|\| (attvalue == NULL))	3449 if ((ctxt == NULL) \|\| (attvalue == NULL))

3295 return;	3450 return;

3296	3451

3297 /* do not change encoding */»	3452 /* do not change encoding */

3298 if (ctxt->input->encoding != NULL)	3453 if (ctxt->input->encoding != NULL)

3299 return;	3454 return;

3300	3455

3301 encoding = xmlStrcasestr(attvalue, BAD_CAST"charset=");	3456 encoding = xmlStrcasestr(attvalue, BAD_CAST"charset=");

3302 if (encoding != NULL) {	3457 if (encoding != NULL) {

3303 encoding += 8;	3458 encoding += 8;

3304 } else {	3459 } else {

3305 encoding = xmlStrcasestr(attvalue, BAD_CAST"charset =");	3460 encoding = xmlStrcasestr(attvalue, BAD_CAST"charset =");

3306 if (encoding != NULL)	3461 if (encoding != NULL)

3307 encoding += 9;	3462 encoding += 9;

3308 }	3463 }

3309 if (encoding != NULL) {	3464 if (encoding != NULL) {

3310 xmlCharEncoding enc;	3465 xmlCharEncoding enc;

3311 xmlCharEncodingHandlerPtr handler;	3466 xmlCharEncodingHandlerPtr handler;

3312	3467

3313 while ((encoding == ' ') \|\| (encoding == '\t')) encoding++;	3468 while ((encoding == ' ') \|\| (encoding == '\t')) encoding++;

3314	3469

3315 if (ctxt->input->encoding != NULL)	3470 if (ctxt->input->encoding != NULL)

3316 xmlFree((xmlChar *) ctxt->input->encoding);	3471 xmlFree((xmlChar *) ctxt->input->encoding);

3317 ctxt->input->encoding = xmlStrdup(encoding);	3472 ctxt->input->encoding = xmlStrdup(encoding);

3318	3473

3319 enc = xmlParseCharEncoding((const char *) encoding);	3474 enc = xmlParseCharEncoding((const char *) encoding);

3320 /*	3475 /*

3321 * registered set of known encodings	3476 * registered set of known encodings

3322 */	3477 */

3323 if (enc != XML_CHAR_ENCODING_ERROR) {	3478 if (enc != XML_CHAR_ENCODING_ERROR) {

3324 » if (((enc == XML_CHAR_ENCODING_UTF16LE) \|\|	3479 » if (((enc == XML_CHAR_ENCODING_UTF16LE) \|\|

3325 (enc == XML_CHAR_ENCODING_UTF16BE) \|\|	3480 (enc == XML_CHAR_ENCODING_UTF16BE) \|\|

3326 (enc == XML_CHAR_ENCODING_UCS4LE) \|\|	3481 (enc == XML_CHAR_ENCODING_UCS4LE) \|\|

3327 (enc == XML_CHAR_ENCODING_UCS4BE)) &&	3482 (enc == XML_CHAR_ENCODING_UCS4BE)) &&

3328 (ctxt->input->buf != NULL) &&	3483 (ctxt->input->buf != NULL) &&

3329 (ctxt->input->buf->encoder == NULL)) {	3484 (ctxt->input->buf->encoder == NULL)) {

3330 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,	3485 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,

3331 "htmlCheckEncoding: wrong encoding meta\n",	3486 "htmlCheckEncoding: wrong encoding meta\n",

3332 NULL, NULL);	3487 NULL, NULL);

3333 } else {	3488 } else {

3334 xmlSwitchEncoding(ctxt, enc);	3489 xmlSwitchEncoding(ctxt, enc);

(...skipping 27 matching lines...) Expand all Loading...
3362 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,	3517 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,

3363 ctxt->input->buf->buffer,	3518 ctxt->input->buf->buffer,

3364 ctxt->input->buf->raw);	3519 ctxt->input->buf->raw);

3365 if (nbchars < 0) {	3520 if (nbchars < 0) {

3366 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,	3521 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,

3367 "htmlCheckEncoding: encoder error\n",	3522 "htmlCheckEncoding: encoder error\n",

3368 NULL, NULL);	3523 NULL, NULL);

3369 }	3524 }

3370 ctxt->input->base =	3525 ctxt->input->base =

3371 ctxt->input->cur = ctxt->input->buf->buffer->content;	3526 ctxt->input->cur = ctxt->input->buf->buffer->content;

	3527 ctxt->input->end =

	3528 &ctxt->input->base[ctxt->input->buf->buffer->use];

3372 }	3529 }

3373 }	3530 }

3374 }	3531 }

3375	3532

3376 /**	3533 /**

3377 * htmlCheckMeta:	3534 * htmlCheckMeta:

3378 * @ctxt: an HTML parser context	3535 * @ctxt: an HTML parser context

3379 * @atts: the attributes values	3536 * @atts: the attributes values

3380 *	3537 *

3381 * Checks an attributes from a Meta tag	3538 * Checks an attributes from a Meta tag

(...skipping 20 matching lines...) Expand all Loading...
3402 att = atts[i++];	3559 att = atts[i++];

3403 }	3560 }

3404 if ((http) && (content != NULL))	3561 if ((http) && (content != NULL))

3405 htmlCheckEncoding(ctxt, content);	3562 htmlCheckEncoding(ctxt, content);

3406	3563

3407 }	3564 }

3408	3565

3409 /**	3566 /**

3410 * htmlParseStartTag:	3567 * htmlParseStartTag:

3411 * @ctxt: an HTML parser context	3568 * @ctxt: an HTML parser context

3412 *	3569 *

3413 * parse a start of tag either for rule element or	3570 * parse a start of tag either for rule element or

3414 * EmptyElement. In both case we don't parse the tag closing chars.	3571 * EmptyElement. In both case we don't parse the tag closing chars.

3415 *	3572 *

3416 * [40] STag ::= '<' Name (S Attribute)* S? '>'	3573 * [40] STag ::= '<' Name (S Attribute)* S? '>'

3417 *	3574 *

3418 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'	3575 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'

3419 *	3576 *

3420 * With namespace:	3577 * With namespace:

3421 *	3578 *

3422 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'	3579 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'

3423 *	3580 *

3424 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'	3581 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'

3425 *	3582 *

3426 * Returns 0 in case of success, -1 in case of error and 1 if discarded	3583 * Returns 0 in case of success, -1 in case of error and 1 if discarded

3427 */	3584 */

3428	3585

3429 static int	3586 static int

3430 htmlParseStartTag(htmlParserCtxtPtr ctxt) {	3587 htmlParseStartTag(htmlParserCtxtPtr ctxt) {

3431 const xmlChar *name;	3588 const xmlChar *name;

3432 const xmlChar *attname;	3589 const xmlChar *attname;

3433 xmlChar *attvalue;	3590 xmlChar *attvalue;

3434 const xmlChar **atts;	3591 const xmlChar **atts;

3435 int nbatts = 0;	3592 int nbatts = 0;

3436 int maxatts;	3593 int maxatts;

3437 int meta = 0;	3594 int meta = 0;

3438 int i;	3595 int i;

3439 int discardtag = 0;	3596 int discardtag = 0;

3440	3597

	3598 if (ctxt->instate == XML_PARSER_EOF)

	3599 return(-1);

3441 if ((ctxt == NULL) \|\| (ctxt->input == NULL)) {	3600 if ((ctxt == NULL) \|\| (ctxt->input == NULL)) {

3442 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,	3601 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,

3443 "htmlParseStartTag: context error\n", NULL, NULL);	3602 "htmlParseStartTag: context error\n", NULL, NULL);

3444 return -1;	3603 return -1;

3445 }	3604 }

3446 if (CUR != '<') return -1;	3605 if (CUR != '<') return -1;

3447 NEXT;	3606 NEXT;

3448	3607

3449 atts = ctxt->atts;	3608 atts = ctxt->atts;

3450 maxatts = ctxt->maxatts;	3609 maxatts = ctxt->maxatts;

3451	3610

3452 GROW;	3611 GROW;

3453 name = htmlParseHTMLName(ctxt);	3612 name = htmlParseHTMLName(ctxt);

3454 if (name == NULL) {	3613 if (name == NULL) {

3455 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,	3614 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,

3456 "htmlParseStartTag: invalid element name\n",	3615 "htmlParseStartTag: invalid element name\n",

3457 NULL, NULL);	3616 NULL, NULL);

3458 /* Dump the bogus tag like browsers do */	3617 /* Dump the bogus tag like browsers do */

3459 » while ((IS_CHAR_CH(CUR)) && (CUR != '>'))	3618 » while ((IS_CHAR_CH(CUR)) && (CUR != '>') &&

	3619 (ctxt->instate != XML_PARSER_EOF))

3460 NEXT;	3620 NEXT;

3461 return -1;	3621 return -1;

3462 }	3622 }

3463 if (xmlStrEqual(name, BAD_CAST"meta"))	3623 if (xmlStrEqual(name, BAD_CAST"meta"))

3464 meta = 1;	3624 meta = 1;

3465	3625

3466 /*	3626 /*

3467 * Check for auto-closure of HTML elements.	3627 * Check for auto-closure of HTML elements.

3468 */	3628 */

3469 htmlAutoClose(ctxt, name);	3629 htmlAutoClose(ctxt, name);

3470	3630

3471 /*	3631 /*

3472 * Check for implied HTML elements.	3632 * Check for implied HTML elements.

3473 */	3633 */

3474 htmlCheckImplied(ctxt, name);	3634 htmlCheckImplied(ctxt, name);

3475	3635

3476 /*	3636 /*

3477 * Avoid html at any level > 0, head at any level != 1	3637 * Avoid html at any level > 0, head at any level != 1

3478 * or any attempt to recurse body	3638 * or any attempt to recurse body

3479 */	3639 */

3480 if ((ctxt->nameNr > 0) && (xmlStrEqual(name, BAD_CAST"html"))) {	3640 if ((ctxt->nameNr > 0) && (xmlStrEqual(name, BAD_CAST"html"))) {

3481 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,	3641 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,

3482 "htmlParseStartTag: misplaced <html> tag\n",	3642 "htmlParseStartTag: misplaced <html> tag\n",

3483 name, NULL);	3643 name, NULL);

3484 discardtag = 1;	3644 discardtag = 1;

	3645 ctxt->depth++;

3485 }	3646 }

3486 if ((ctxt->nameNr != 1) &&	3647 if ((ctxt->nameNr != 1) &&

3487 (xmlStrEqual(name, BAD_CAST"head"))) {	3648 (xmlStrEqual(name, BAD_CAST"head"))) {

3488 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,	3649 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,

3489 "htmlParseStartTag: misplaced <head> tag\n",	3650 "htmlParseStartTag: misplaced <head> tag\n",

3490 name, NULL);	3651 name, NULL);

3491 discardtag = 1;	3652 discardtag = 1;

	3653 ctxt->depth++;

3492 }	3654 }

3493 if (xmlStrEqual(name, BAD_CAST"body")) {	3655 if (xmlStrEqual(name, BAD_CAST"body")) {

3494 int indx;	3656 int indx;

3495 for (indx = 0;indx < ctxt->nameNr;indx++) {	3657 for (indx = 0;indx < ctxt->nameNr;indx++) {

3496 if (xmlStrEqual(ctxt->nameTab[indx], BAD_CAST"body")) {	3658 if (xmlStrEqual(ctxt->nameTab[indx], BAD_CAST"body")) {

3497 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,	3659 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,

3498 "htmlParseStartTag: misplaced <body> tag\n",	3660 "htmlParseStartTag: misplaced <body> tag\n",

3499 name, NULL);	3661 name, NULL);

3500 discardtag = 1;	3662 discardtag = 1;

	3663 ctxt->depth++;

3501 }	3664 }

3502 }	3665 }

3503 }	3666 }

3504	3667

3505 /*	3668 /*

3506 * Now parse the attributes, it ends up with the ending	3669 * Now parse the attributes, it ends up with the ending

3507 *	3670 *

3508 * (S Attribute)* S?	3671 * (S Attribute)* S?

3509 */	3672 */

3510 SKIP_BLANKS;	3673 SKIP_BLANKS;

3511 while ((IS_CHAR_CH(CUR)) &&	3674 while ((IS_CHAR_CH(CUR)) &&

3512 (CUR != '>') &&	3675 (CUR != '>') &&

3513 ((CUR != '/') \|\| (NXT(1) != '>'))) {	3676 ((CUR != '/') \|\| (NXT(1) != '>'))) {

3514 long cons = ctxt->nbChars;	3677 long cons = ctxt->nbChars;

3515	3678

3516 GROW;	3679 GROW;

3517 attname = htmlParseAttribute(ctxt, &attvalue);	3680 attname = htmlParseAttribute(ctxt, &attvalue);

3518 if (attname != NULL) {	3681 if (attname != NULL) {

3519	3682

3520 /*	3683 /*

3521 * Well formedness requires at most one declaration of an attribute	3684 * Well formedness requires at most one declaration of an attribute

3522 */	3685 */

(...skipping 118 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3641 if ((CUR != '<') \|\| (NXT(1) != '/')) {	3804 if ((CUR != '<') \|\| (NXT(1) != '/')) {

3642 htmlParseErr(ctxt, XML_ERR_LTSLASH_REQUIRED,	3805 htmlParseErr(ctxt, XML_ERR_LTSLASH_REQUIRED,

3643 "htmlParseEndTag: '</' not found\n", NULL, NULL);	3806 "htmlParseEndTag: '</' not found\n", NULL, NULL);

3644 return (0);	3807 return (0);

3645 }	3808 }

3646 SKIP(2);	3809 SKIP(2);

3647	3810

3648 name = htmlParseHTMLName(ctxt);	3811 name = htmlParseHTMLName(ctxt);

3649 if (name == NULL)	3812 if (name == NULL)

3650 return (0);	3813 return (0);

3651

3652 /*	3814 /*

3653 * We should definitely be at the ending "S? '>'" part	3815 * We should definitely be at the ending "S? '>'" part

3654 */	3816 */

3655 SKIP_BLANKS;	3817 SKIP_BLANKS;

3656 if ((!IS_CHAR_CH(CUR)) \|\| (CUR != '>')) {	3818 if ((!IS_CHAR_CH(CUR)) \|\| (CUR != '>')) {

3657 htmlParseErr(ctxt, XML_ERR_GT_REQUIRED,	3819 htmlParseErr(ctxt, XML_ERR_GT_REQUIRED,

3658 "End tag : expected '>'\n", NULL, NULL);	3820 "End tag : expected '>'\n", NULL, NULL);

3659 if (ctxt->recovery) {	3821 if (ctxt->recovery) {

3660 /*	3822 /*

3661 * We're not at the ending > !!	3823 * We're not at the ending > !!

3662 * Error, unless in recover mode where we search forwards	3824 * Error, unless in recover mode where we search forwards

3663 * until we find a >	3825 * until we find a >

3664 */	3826 */

3665 while (CUR != '\0' && CUR != '>') NEXT;	3827 while (CUR != '\0' && CUR != '>') NEXT;

3666 NEXT;	3828 NEXT;

3667 }	3829 }

3668 } else	3830 } else

3669 NEXT;	3831 NEXT;

3670	3832

3671 /*	3833 /*

	3834 * if we ignored misplaced tags in htmlParseStartTag don't pop them

	3835 * out now.

	3836 */

	3837 if ((ctxt->depth > 0) &&

	3838 (xmlStrEqual(name, BAD_CAST "html") \|\|

	3839 xmlStrEqual(name, BAD_CAST "body") \|\|

	3840 xmlStrEqual(name, BAD_CAST "head"))) {

	3841 ctxt->depth--;

	3842 return (0);

	3843 }

	3844

	3845 /*

3672 * If the name read is not one of the element in the parsing stack	3846 * If the name read is not one of the element in the parsing stack

3673 * then return, it's just an error.	3847 * then return, it's just an error.

3674 */	3848 */

3675 for (i = (ctxt->nameNr - 1); i >= 0; i--) {	3849 for (i = (ctxt->nameNr - 1); i >= 0; i--) {

3676 if (xmlStrEqual(name, ctxt->nameTab[i]))	3850 if (xmlStrEqual(name, ctxt->nameTab[i]))

3677 break;	3851 break;

3678 }	3852 }

3679 if (i < 0) {	3853 if (i < 0) {

3680 htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH,	3854 htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH,

3681 "Unexpected end tag : %s\n", name, NULL);	3855 "Unexpected end tag : %s\n", name, NULL);

(...skipping 33 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3715 ret = 0;	3889 ret = 0;

3716 }	3890 }

3717	3891

3718 return (ret);	3892 return (ret);

3719 }	3893 }

3720	3894

3721	3895

3722 /**	3896 /**

3723 * htmlParseReference:	3897 * htmlParseReference:

3724 * @ctxt: an HTML parser context	3898 * @ctxt: an HTML parser context

3725 *	3899 *

3726 * parse and handle entity references in content,	3900 * parse and handle entity references in content,

3727 * this will end-up in a call to character() since this is either a	3901 * this will end-up in a call to character() since this is either a

3728 * CharRef, or a predefined entity.	3902 * CharRef, or a predefined entity.

3729 */	3903 */

3730 static void	3904 static void

3731 htmlParseReference(htmlParserCtxtPtr ctxt) {	3905 htmlParseReference(htmlParserCtxtPtr ctxt) {

3732 const htmlEntityDesc * ent;	3906 const htmlEntityDesc * ent;

3733 xmlChar out[6];	3907 xmlChar out[6];

3734 const xmlChar *name;	3908 const xmlChar *name;

3735 if (CUR != '&') return;	3909 if (CUR != '&') return;

3736	3910

3737 if (NXT(1) == '#') {	3911 if (NXT(1) == '#') {

3738 unsigned int c;	3912 unsigned int c;

3739 int bits, i = 0;	3913 int bits, i = 0;

3740	3914

3741 c = htmlParseCharRef(ctxt);	3915 c = htmlParseCharRef(ctxt);

3742 if (c == 0)	3916 if (c == 0)

3743 return;	3917 return;

3744	3918

3745 if (c < 0x80) { out[i++]= c; bits= -6; }	3919 if (c < 0x80) { out[i++]= c; bits= -6; }

3746 else if (c < 0x800) { out[i++]=((c >> 6) & 0x1F) \| 0xC0; bits= 0; }	3920 else if (c < 0x800) { out[i++]=((c >> 6) & 0x1F) \| 0xC0; bits= 0; }

3747 else if (c < 0x10000) { out[i++]=((c >> 12) & 0x0F) \| 0xE0; bits= 6; }	3921 else if (c < 0x10000) { out[i++]=((c >> 12) & 0x0F) \| 0xE0; bits= 6; }

3748 else { out[i++]=((c >> 18) & 0x07) \| 0xF0; bits= 12; }	3922 else { out[i++]=((c >> 18) & 0x07) \| 0xF0; bits= 12; }

3749	3923

3750 for ( ; bits >= 0; bits-= 6) {	3924 for ( ; bits >= 0; bits-= 6) {

3751 out[i++]= ((c >> bits) & 0x3F) \| 0x80;	3925 out[i++]= ((c >> bits) & 0x3F) \| 0x80;

3752 }	3926 }

3753 out[i] = 0;	3927 out[i] = 0;

3754	3928

3755 htmlCheckParagraph(ctxt);	3929 htmlCheckParagraph(ctxt);

3756 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))	3930 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))

3757 ctxt->sax->characters(ctxt->userData, out, i);	3931 ctxt->sax->characters(ctxt->userData, out, i);

3758 } else {	3932 } else {

3759 ent = htmlParseEntityRef(ctxt, &name);	3933 ent = htmlParseEntityRef(ctxt, &name);

(...skipping 14 matching lines...) Expand all Loading...
3774 unsigned int c;	3948 unsigned int c;

3775 int bits, i = 0;	3949 int bits, i = 0;

3776	3950

3777 c = ent->value;	3951 c = ent->value;

3778 if (c < 0x80)	3952 if (c < 0x80)

3779 { out[i++]= c; bits= -6; }	3953 { out[i++]= c; bits= -6; }

3780 else if (c < 0x800)	3954 else if (c < 0x800)

3781 { out[i++]=((c >> 6) & 0x1F) \| 0xC0; bits= 0; }	3955 { out[i++]=((c >> 6) & 0x1F) \| 0xC0; bits= 0; }

3782 else if (c < 0x10000)	3956 else if (c < 0x10000)

3783 { out[i++]=((c >> 12) & 0x0F) \| 0xE0; bits= 6; }	3957 { out[i++]=((c >> 12) & 0x0F) \| 0xE0; bits= 6; }

3784 » else	3958 » else

3785 { out[i++]=((c >> 18) & 0x07) \| 0xF0; bits= 12; }	3959 { out[i++]=((c >> 18) & 0x07) \| 0xF0; bits= 12; }

3786	3960

3787 for ( ; bits >= 0; bits-= 6) {	3961 for ( ; bits >= 0; bits-= 6) {

3788 out[i++]= ((c >> bits) & 0x3F) \| 0x80;	3962 out[i++]= ((c >> bits) & 0x3F) \| 0x80;

3789 }	3963 }

3790 out[i] = 0;	3964 out[i] = 0;

3791	3965

3792 htmlCheckParagraph(ctxt);	3966 htmlCheckParagraph(ctxt);

3793 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))	3967 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))

3794 ctxt->sax->characters(ctxt->userData, out, i);	3968 ctxt->sax->characters(ctxt->userData, out, i);

3795 }	3969 }

3796 }	3970 }

3797 }	3971 }

3798	3972

3799 /**	3973 /**

3800 * htmlParseContent:	3974 * htmlParseContent:

3801 * @ctxt: an HTML parser context	3975 * @ctxt: an HTML parser context

3802 *	3976 *

3803 * Parse a content: comment, sub-element, reference or text.	3977 * Parse a content: comment, sub-element, reference or text.

	3978 * Kept for compatibility with old code

3804 */	3979 */

3805	3980

3806 static void	3981 static void

3807 htmlParseContent(htmlParserCtxtPtr ctxt) {	3982 htmlParseContent(htmlParserCtxtPtr ctxt) {

3808 xmlChar *currentNode;	3983 xmlChar *currentNode;

3809 int depth;	3984 int depth;

3810 const xmlChar *name;	3985 const xmlChar *name;

3811	3986

3812 currentNode = xmlStrdup(ctxt->name);	3987 currentNode = xmlStrdup(ctxt->name);

3813 depth = ctxt->nameNr;	3988 depth = ctxt->nameNr;

3814 while (1) {	3989 while (1) {

3815 long cons = ctxt->nbChars;	3990 long cons = ctxt->nbChars;

3816	3991

3817 GROW;	3992 GROW;

	3993

	3994 if (ctxt->instate == XML_PARSER_EOF)

	3995 break;

	3996

3818 /*	3997 /*

3819 * Our tag or one of it's parent or children is ending.	3998 * Our tag or one of it's parent or children is ending.

3820 */	3999 */

3821 if ((CUR == '<') && (NXT(1) == '/')) {	4000 if ((CUR == '<') && (NXT(1) == '/')) {

3822 if (htmlParseEndTag(ctxt) &&	4001 if (htmlParseEndTag(ctxt) &&

3823 ((currentNode != NULL) \|\| (ctxt->nameNr == 0))) {	4002 ((currentNode != NULL) \|\| (ctxt->nameNr == 0))) {

3824 if (currentNode != NULL)	4003 if (currentNode != NULL)

3825 xmlFree(currentNode);	4004 xmlFree(currentNode);

3826 return;	4005 return;

3827 }	4006 }

3828 continue; /* while */	4007 continue; /* while */

3829 }	4008 }

3830	4009

3831 else if ((CUR == '<') &&	4010 else if ((CUR == '<') &&

3832 ((IS_ASCII_LETTER(NXT(1))) \|\|	4011 ((IS_ASCII_LETTER(NXT(1))) \|\|

3833 (NXT(1) == '_') \|\| (NXT(1) == ':'))) {	4012 (NXT(1) == '_') \|\| (NXT(1) == ':'))) {

3834 name = htmlParseHTMLName_nonInvasive(ctxt);	4013 name = htmlParseHTMLName_nonInvasive(ctxt);

3835 if (name == NULL) {	4014 if (name == NULL) {

3836 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,	4015 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,

3837 "htmlParseStartTag: invalid element name\n",	4016 "htmlParseStartTag: invalid element name\n",

3838 NULL, NULL);	4017 NULL, NULL);

3839 /* Dump the bogus tag like browsers do */	4018 /* Dump the bogus tag like browsers do */

3840 » while ((IS_CHAR_CH(CUR)) && (CUR != '>'))	4019 while ((IS_CHAR_CH(CUR)) && (CUR != '>'))

3841 NEXT;	4020 NEXT;

3842	4021

3843 if (currentNode != NULL)	4022 if (currentNode != NULL)

3844 xmlFree(currentNode);	4023 xmlFree(currentNode);

3845 return;	4024 return;

3846 }	4025 }

3847	4026

3848 if (ctxt->name != NULL) {	4027 if (ctxt->name != NULL) {

3849 if (htmlCheckAutoClose(name, ctxt->name) == 1) {	4028 if (htmlCheckAutoClose(name, ctxt->name) == 1) {

3850 htmlAutoClose(ctxt, name);	4029 htmlAutoClose(ctxt, name);

3851 continue;	4030 continue;

3852 }	4031 }

3853 » }»	4032 » }

3854 }	4033 }

3855	4034

3856 /*	4035 /*

3857 * Has this node been popped out during parsing of	4036 * Has this node been popped out during parsing of

3858 * the next element	4037 * the next element

3859 */	4038 */

3860 if ((ctxt->nameNr > 0) && (depth >= ctxt->nameNr) &&	4039 if ((ctxt->nameNr > 0) && (depth >= ctxt->nameNr) &&

3861 (!xmlStrEqual(currentNode, ctxt->name)))	4040 (!xmlStrEqual(currentNode, ctxt->name)))

3862 {	4041 {

3863 if (currentNode != NULL) xmlFree(currentNode);	4042 if (currentNode != NULL) xmlFree(currentNode);

(...skipping 38 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3902	4081

3903 /*	4082 /*

3904 * Third case : a sub-element.	4083 * Third case : a sub-element.

3905 */	4084 */

3906 else if (CUR == '<') {	4085 else if (CUR == '<') {

3907 htmlParseElement(ctxt);	4086 htmlParseElement(ctxt);

3908 }	4087 }

3909	4088

3910 /*	4089 /*

3911 * Fourth case : a reference. If if has not been resolved,	4090 * Fourth case : a reference. If if has not been resolved,

3912 » * parsing returns it's Name, create the node	4091 » * parsing returns it's Name, create the node

3913 */	4092 */

3914 else if (CUR == '&') {	4093 else if (CUR == '&') {

3915 htmlParseReference(ctxt);	4094 htmlParseReference(ctxt);

3916 }	4095 }

3917	4096

3918 /*	4097 /*

3919 * Fifth case : end of the resource	4098 * Fifth case : end of the resource

3920 */	4099 */

3921 else if (CUR == 0) {	4100 else if (CUR == 0) {

3922 htmlAutoCloseOnEnd(ctxt);	4101 htmlAutoCloseOnEnd(ctxt);

(...skipping 15 matching lines...) Expand all Loading...
3938 }	4117 }

3939 break;	4118 break;

3940 }	4119 }

3941 }	4120 }

3942 GROW;	4121 GROW;

3943 }	4122 }

3944 if (currentNode != NULL) xmlFree(currentNode);	4123 if (currentNode != NULL) xmlFree(currentNode);

3945 }	4124 }

3946	4125

3947 /**	4126 /**

3948 * htmlParseContent:

3949 * @ctxt: an HTML parser context

3950 *

3951 * Parse a content: comment, sub-element, reference or text.

3952 */

3953

3954 void

3955 __htmlParseContent(void *ctxt) {

3956 if (ctxt != NULL)

3957 htmlParseContent((htmlParserCtxtPtr) ctxt);

3958 }

3959

3960 /**

3961 * htmlParseElement:	4127 * htmlParseElement:

3962 * @ctxt: an HTML parser context	4128 * @ctxt: an HTML parser context

3963 *	4129 *

3964 * parse an HTML element, this is highly recursive	4130 * parse an HTML element, this is highly recursive

	4131 * this is kept for compatibility with previous code versions

3965 *	4132 *

3966 * [39] element ::= EmptyElemTag \| STag content ETag	4133 * [39] element ::= EmptyElemTag \| STag content ETag

3967 *	4134 *

3968 * [41] Attribute ::= Name Eq AttValue	4135 * [41] Attribute ::= Name Eq AttValue

3969 */	4136 */

3970	4137

3971 void	4138 void

3972 htmlParseElement(htmlParserCtxtPtr ctxt) {	4139 htmlParseElement(htmlParserCtxtPtr ctxt) {

3973 const xmlChar *name;	4140 const xmlChar *name;

3974 xmlChar *currentNode = NULL;	4141 xmlChar *currentNode = NULL;

3975 const htmlElemDesc * info;	4142 const htmlElemDesc * info;

3976 htmlParserNodeInfo node_info;	4143 htmlParserNodeInfo node_info;

3977 int failed;	4144 int failed;

3978 int depth;	4145 int depth;

3979 const xmlChar *oldptr;	4146 const xmlChar *oldptr;

3980	4147

3981 if ((ctxt == NULL) \|\| (ctxt->input == NULL)) {	4148 if ((ctxt == NULL) \|\| (ctxt->input == NULL)) {

3982 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,	4149 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,

3983 "htmlParseElement: context error\n", NULL, NULL);	4150 "htmlParseElement: context error\n", NULL, NULL);

3984 return;	4151 return;

3985 }	4152 }

	4153

	4154 if (ctxt->instate == XML_PARSER_EOF)

	4155 return;

	4156

3986 /* Capture start position */	4157 /* Capture start position */

3987 if (ctxt->record_info) {	4158 if (ctxt->record_info) {

3988 node_info.begin_pos = ctxt->input->consumed +	4159 node_info.begin_pos = ctxt->input->consumed +

3989 (CUR_PTR - ctxt->input->base);	4160 (CUR_PTR - ctxt->input->base);

3990 node_info.begin_line = ctxt->input->line;	4161 node_info.begin_line = ctxt->input->line;

3991 }	4162 }

3992	4163

3993 failed = htmlParseStartTag(ctxt);	4164 failed = htmlParseStartTag(ctxt);

3994 name = ctxt->name;	4165 name = ctxt->name;

3995 if ((failed == -1) \|\| (name == NULL)) {	4166 if ((failed == -1) \|\| (name == NULL)) {

(...skipping 24 matching lines...) Expand all Loading...
4020	4191

4021 if (CUR == '>') {	4192 if (CUR == '>') {

4022 NEXT;	4193 NEXT;

4023 } else {	4194 } else {

4024 htmlParseErr(ctxt, XML_ERR_GT_REQUIRED,	4195 htmlParseErr(ctxt, XML_ERR_GT_REQUIRED,

4025 "Couldn't find end of Start Tag %s\n", name, NULL);	4196 "Couldn't find end of Start Tag %s\n", name, NULL);

4026	4197

4027 /*	4198 /*

4028 * end of parsing of this node.	4199 * end of parsing of this node.

4029 */	4200 */

4030 » if (xmlStrEqual(name, ctxt->name)) {	4201 » if (xmlStrEqual(name, ctxt->name)) {

4031 nodePop(ctxt);	4202 nodePop(ctxt);

4032 htmlnamePop(ctxt);	4203 htmlnamePop(ctxt);

4033 » }	4204 » }

4034	4205

4035 /*	4206 /*

4036 * Capture end position and add node	4207 * Capture end position and add node

4037 */	4208 */

4038 if (ctxt->record_info) {	4209 if (ctxt->record_info) {

4039 node_info.end_pos = ctxt->input->consumed +	4210 node_info.end_pos = ctxt->input->consumed +

4040 (CUR_PTR - ctxt->input->base);	4211 (CUR_PTR - ctxt->input->base);

4041 node_info.end_line = ctxt->input->line;	4212 node_info.end_line = ctxt->input->line;

4042 node_info.node = ctxt->node;	4213 node_info.node = ctxt->node;

4043 xmlParserAddNodeInfo(ctxt, &node_info);	4214 xmlParserAddNodeInfo(ctxt, &node_info);

(...skipping 13 matching lines...) Expand all Loading...
4057	4228

4058 /*	4229 /*

4059 * Parse the content of the element:	4230 * Parse the content of the element:

4060 */	4231 */

4061 currentNode = xmlStrdup(ctxt->name);	4232 currentNode = xmlStrdup(ctxt->name);

4062 depth = ctxt->nameNr;	4233 depth = ctxt->nameNr;

4063 while (IS_CHAR_CH(CUR)) {	4234 while (IS_CHAR_CH(CUR)) {

4064 oldptr = ctxt->input->cur;	4235 oldptr = ctxt->input->cur;

4065 htmlParseContent(ctxt);	4236 htmlParseContent(ctxt);

4066 if (oldptr==ctxt->input->cur) break;	4237 if (oldptr==ctxt->input->cur) break;

4067 » if (ctxt->nameNr < depth) break;	4238 » if (ctxt->nameNr < depth) break;

4068 }»	4239 }

4069	4240

4070 /*	4241 /*

4071 * Capture end position and add node	4242 * Capture end position and add node

4072 */	4243 */

4073 if ( currentNode != NULL && ctxt->record_info ) {	4244 if ( currentNode != NULL && ctxt->record_info ) {

4074 node_info.end_pos = ctxt->input->consumed +	4245 node_info.end_pos = ctxt->input->consumed +

4075 (CUR_PTR - ctxt->input->base);	4246 (CUR_PTR - ctxt->input->base);

4076 node_info.end_line = ctxt->input->line;	4247 node_info.end_line = ctxt->input->line;

4077 node_info.node = ctxt->node;	4248 node_info.node = ctxt->node;

4078 xmlParserAddNodeInfo(ctxt, &node_info);	4249 xmlParserAddNodeInfo(ctxt, &node_info);

4079 }	4250 }

4080 if (!IS_CHAR_CH(CUR)) {	4251 if (!IS_CHAR_CH(CUR)) {

4081 htmlAutoCloseOnEnd(ctxt);	4252 htmlAutoCloseOnEnd(ctxt);

4082 }	4253 }

4083	4254

4084 if (currentNode != NULL)	4255 if (currentNode != NULL)

4085 xmlFree(currentNode);	4256 xmlFree(currentNode);

4086 }	4257 }

4087	4258

	4259 static void

	4260 htmlParserFinishElementParsing(htmlParserCtxtPtr ctxt) {

	4261 /*

	4262 * Capture end position and add node

	4263 */

	4264 if ( ctxt->node != NULL && ctxt->record_info ) {

	4265 ctxt->nodeInfo->end_pos = ctxt->input->consumed +

	4266 (CUR_PTR - ctxt->input->base);

	4267 ctxt->nodeInfo->end_line = ctxt->input->line;

	4268 ctxt->nodeInfo->node = ctxt->node;

	4269 xmlParserAddNodeInfo(ctxt, ctxt->nodeInfo);

	4270 htmlNodeInfoPop(ctxt);

	4271 }

	4272 if (!IS_CHAR_CH(CUR)) {

	4273 htmlAutoCloseOnEnd(ctxt);

	4274 }

	4275 }

	4276

	4277 /**

	4278 * htmlParseElementInternal:

	4279 * @ctxt: an HTML parser context

	4280 *

	4281 * parse an HTML element, new version, non recursive

	4282 *

	4283 * [39] element ::= EmptyElemTag \| STag content ETag

	4284 *

	4285 * [41] Attribute ::= Name Eq AttValue

	4286 */

	4287

	4288 static void

	4289 htmlParseElementInternal(htmlParserCtxtPtr ctxt) {

	4290 const xmlChar *name;

	4291 const htmlElemDesc * info;

	4292 htmlParserNodeInfo node_info;

	4293 int failed;

	4294

	4295 if ((ctxt == NULL) \|\| (ctxt->input == NULL)) {

	4296 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,

	4297 "htmlParseElementInternal: context error\n", NULL, NULL);

	4298 return;

	4299 }

	4300

	4301 if (ctxt->instate == XML_PARSER_EOF)

	4302 return;

	4303

	4304 /* Capture start position */

	4305 if (ctxt->record_info) {

	4306 node_info.begin_pos = ctxt->input->consumed +

	4307 (CUR_PTR - ctxt->input->base);

	4308 node_info.begin_line = ctxt->input->line;

	4309 }

	4310

	4311 failed = htmlParseStartTag(ctxt);

	4312 name = ctxt->name;

	4313 if ((failed == -1) \|\| (name == NULL)) {

	4314 if (CUR == '>')

	4315 NEXT;

	4316 return;

	4317 }

	4318

	4319 /*

	4320 * Lookup the info for that element.

	4321 */

	4322 info = htmlTagLookup(name);

	4323 if (info == NULL) {

	4324 htmlParseErr(ctxt, XML_HTML_UNKNOWN_TAG,

	4325 "Tag %s invalid\n", name, NULL);

	4326 }

	4327

	4328 /*

	4329 * Check for an Empty Element labeled the XML/SGML way

	4330 */

	4331 if ((CUR == '/') && (NXT(1) == '>')) {

	4332 SKIP(2);

	4333 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))

	4334 ctxt->sax->endElement(ctxt->userData, name);

	4335 htmlnamePop(ctxt);

	4336 return;

	4337 }

	4338

	4339 if (CUR == '>') {

	4340 NEXT;

	4341 } else {

	4342 htmlParseErr(ctxt, XML_ERR_GT_REQUIRED,

	4343 "Couldn't find end of Start Tag %s\n", name, NULL);

	4344

	4345 /*

	4346 * end of parsing of this node.

	4347 */

	4348 if (xmlStrEqual(name, ctxt->name)) {

	4349 nodePop(ctxt);

	4350 htmlnamePop(ctxt);

	4351 }

	4352

	4353 if (ctxt->record_info)

	4354 htmlNodeInfoPush(ctxt, &node_info);

	4355 htmlParserFinishElementParsing(ctxt);

	4356 return;

	4357 }

	4358

	4359 /*

	4360 * Check for an Empty Element from DTD definition

	4361 */

	4362 if ((info != NULL) && (info->empty)) {

	4363 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))

	4364 ctxt->sax->endElement(ctxt->userData, name);

	4365 htmlnamePop(ctxt);

	4366 return;

	4367 }

	4368

	4369 if (ctxt->record_info)

	4370 htmlNodeInfoPush(ctxt, &node_info);

	4371 }

	4372

	4373 /**

	4374 * htmlParseContentInternal:

	4375 * @ctxt: an HTML parser context

	4376 *

	4377 * Parse a content: comment, sub-element, reference or text.

	4378 * New version for non recursive htmlParseElementInternal

	4379 */

	4380

	4381 static void

	4382 htmlParseContentInternal(htmlParserCtxtPtr ctxt) {

	4383 xmlChar *currentNode;

	4384 int depth;

	4385 const xmlChar *name;

	4386

	4387 currentNode = xmlStrdup(ctxt->name);

	4388 depth = ctxt->nameNr;

	4389 while (1) {

	4390 long cons = ctxt->nbChars;

	4391

	4392 GROW;

	4393

	4394 if (ctxt->instate == XML_PARSER_EOF)

	4395 break;

	4396

	4397 /*

	4398 * Our tag or one of it's parent or children is ending.

	4399 */

	4400 if ((CUR == '<') && (NXT(1) == '/')) {

	4401 if (htmlParseEndTag(ctxt) &&

	4402 ((currentNode != NULL) \|\| (ctxt->nameNr == 0))) {

	4403 if (currentNode != NULL)

	4404 xmlFree(currentNode);

	4405

	4406 currentNode = xmlStrdup(ctxt->name);

	4407 depth = ctxt->nameNr;

	4408 }

	4409 continue; /* while */

	4410 }

	4411

	4412 else if ((CUR == '<') &&

	4413 ((IS_ASCII_LETTER(NXT(1))) \|\|

	4414 (NXT(1) == '_') \|\| (NXT(1) == ':'))) {

	4415 name = htmlParseHTMLName_nonInvasive(ctxt);

	4416 if (name == NULL) {

	4417 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,

	4418 "htmlParseStartTag: invalid element name\n",

	4419 NULL, NULL);

	4420 /* Dump the bogus tag like browsers do */

	4421 while ((IS_CHAR_CH(CUR)) && (CUR != '>'))

	4422 NEXT;

	4423

	4424 htmlParserFinishElementParsing(ctxt);

	4425 if (currentNode != NULL)

	4426 xmlFree(currentNode);

	4427

	4428 currentNode = xmlStrdup(ctxt->name);

	4429 depth = ctxt->nameNr;

	4430 continue;

	4431 }

	4432

	4433 if (ctxt->name != NULL) {

	4434 if (htmlCheckAutoClose(name, ctxt->name) == 1) {

	4435 htmlAutoClose(ctxt, name);

	4436 continue;

	4437 }

	4438 }

	4439 }

	4440

	4441 /*

	4442 * Has this node been popped out during parsing of

	4443 * the next element

	4444 */

	4445 if ((ctxt->nameNr > 0) && (depth >= ctxt->nameNr) &&

	4446 (!xmlStrEqual(currentNode, ctxt->name)))

	4447 {

	4448 htmlParserFinishElementParsing(ctxt);

	4449 if (currentNode != NULL) xmlFree(currentNode);

	4450

	4451 currentNode = xmlStrdup(ctxt->name);

	4452 depth = ctxt->nameNr;

	4453 continue;

	4454 }

	4455

	4456 if ((CUR != 0) && ((xmlStrEqual(currentNode, BAD_CAST"script")) \|\|

	4457 (xmlStrEqual(currentNode, BAD_CAST"style")))) {

	4458 /*

	4459 * Handle SCRIPT/STYLE separately

	4460 */

	4461 htmlParseScript(ctxt);

	4462 } else {

	4463 /*

	4464 * Sometimes DOCTYPE arrives in the middle of the document

	4465 */

	4466 if ((CUR == '<') && (NXT(1) == '!') &&

	4467 (UPP(2) == 'D') && (UPP(3) == 'O') &&

	4468 (UPP(4) == 'C') && (UPP(5) == 'T') &&

	4469 (UPP(6) == 'Y') && (UPP(7) == 'P') &&

	4470 (UPP(8) == 'E')) {

	4471 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,

	4472 "Misplaced DOCTYPE declaration\n",

	4473 BAD_CAST "DOCTYPE" , NULL);

	4474 htmlParseDocTypeDecl(ctxt);

	4475 }

	4476

	4477 /*

	4478 * First case : a comment

	4479 */

	4480 if ((CUR == '<') && (NXT(1) == '!') &&

	4481 (NXT(2) == '-') && (NXT(3) == '-')) {

	4482 htmlParseComment(ctxt);

	4483 }

	4484

	4485 /*

	4486 * Second case : a Processing Instruction.

	4487 */

	4488 else if ((CUR == '<') && (NXT(1) == '?')) {

	4489 htmlParsePI(ctxt);

	4490 }

	4491

	4492 /*

	4493 * Third case : a sub-element.

	4494 */

	4495 else if (CUR == '<') {

	4496 htmlParseElementInternal(ctxt);

	4497 if (currentNode != NULL) xmlFree(currentNode);

	4498

	4499 currentNode = xmlStrdup(ctxt->name);

	4500 depth = ctxt->nameNr;

	4501 }

	4502

	4503 /*

	4504 * Fourth case : a reference. If if has not been resolved,

	4505 * parsing returns it's Name, create the node

	4506 */

	4507 else if (CUR == '&') {

	4508 htmlParseReference(ctxt);

	4509 }

	4510

	4511 /*

	4512 * Fifth case : end of the resource

	4513 */

	4514 else if (CUR == 0) {

	4515 htmlAutoCloseOnEnd(ctxt);

	4516 break;

	4517 }

	4518

	4519 /*

	4520 * Last case, text. Note that References are handled directly.

	4521 */

	4522 else {

	4523 htmlParseCharData(ctxt);

	4524 }

	4525

	4526 if (cons == ctxt->nbChars) {

	4527 if (ctxt->node != NULL) {

	4528 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,

	4529 "detected an error in element content\n",

	4530 NULL, NULL);

	4531 }

	4532 break;

	4533 }

	4534 }

	4535 GROW;

	4536 }

	4537 if (currentNode != NULL) xmlFree(currentNode);

	4538 }

	4539

	4540 /**

	4541 * htmlParseContent:

	4542 * @ctxt: an HTML parser context

	4543 *

	4544 * Parse a content: comment, sub-element, reference or text.

	4545 * This is the entry point when called from parser.c

	4546 */

	4547

	4548 void

	4549 __htmlParseContent(void *ctxt) {

	4550 if (ctxt != NULL)

	4551 htmlParseContentInternal((htmlParserCtxtPtr) ctxt);

	4552 }

	4553

4088 /**	4554 /**

4089 * htmlParseDocument:	4555 * htmlParseDocument:

4090 * @ctxt: an HTML parser context	4556 * @ctxt: an HTML parser context

4091 *	4557 *

4092 * parse an HTML document (and build a tree if using the standard SAX	4558 * parse an HTML document (and build a tree if using the standard SAX

4093 * interface).	4559 * interface).

4094 *	4560 *

4095 * Returns 0, -1 in case of error. the parser context is augmented	4561 * Returns 0, -1 in case of error. the parser context is augmented

4096 * as a result of the parsing.	4562 * as a result of the parsing.

4097 */	4563 */

4098	4564

4099 int	4565 int

4100 htmlParseDocument(htmlParserCtxtPtr ctxt) {	4566 htmlParseDocument(htmlParserCtxtPtr ctxt) {

	4567 xmlChar start[4];

	4568 xmlCharEncoding enc;

4101 xmlDtdPtr dtd;	4569 xmlDtdPtr dtd;

4102	4570

4103 xmlInitParser();	4571 xmlInitParser();

4104	4572

4105 htmlDefaultSAXHandlerInit();	4573 htmlDefaultSAXHandlerInit();

4106	4574

4107 if ((ctxt == NULL) \|\| (ctxt->input == NULL)) {	4575 if ((ctxt == NULL) \|\| (ctxt->input == NULL)) {

4108 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,	4576 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,

4109 "htmlParseDocument: context error\n", NULL, NULL);	4577 "htmlParseDocument: context error\n", NULL, NULL);

4110 return(XML_ERR_INTERNAL_ERROR);	4578 return(XML_ERR_INTERNAL_ERROR);

4111 }	4579 }

4112 ctxt->html = 1;	4580 ctxt->html = 1;

	4581 ctxt->linenumbers = 1;

4113 GROW;	4582 GROW;

4114 /*	4583 /*

4115 * SAX: beginning of the document processing.	4584 * SAX: beginning of the document processing.

4116 */	4585 */

4117 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))	4586 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))

4118 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);	4587 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);

4119	4588

	4589 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&

	4590 ((ctxt->input->end - ctxt->input->cur) >= 4)) {

	4591 /*

	4592 * Get the 4 first bytes and decode the charset

	4593 * if enc != XML_CHAR_ENCODING_NONE

	4594 * plug some encoding conversion routines.

	4595 */

	4596 start[0] = RAW;

	4597 start[1] = NXT(1);

	4598 start[2] = NXT(2);

	4599 start[3] = NXT(3);

	4600 enc = xmlDetectCharEncoding(&start[0], 4);

	4601 if (enc != XML_CHAR_ENCODING_NONE) {

	4602 xmlSwitchEncoding(ctxt, enc);

	4603 }

	4604 }

	4605

4120 /*	4606 /*

4121 * Wipe out everything which is before the first '<'	4607 * Wipe out everything which is before the first '<'

4122 */	4608 */

4123 SKIP_BLANKS;	4609 SKIP_BLANKS;

4124 if (CUR == 0) {	4610 if (CUR == 0) {

4125 » htmlParseErr(ctxt, XML_ERR_DOCUMENT_EMPTY,	4611 » htmlParseErr(ctxt, XML_ERR_DOCUMENT_EMPTY,

4126 "Document is empty\n", NULL, NULL);	4612 "Document is empty\n", NULL, NULL);

4127 }	4613 }

4128	4614

4129 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))	4615 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))

4130 ctxt->sax->startDocument(ctxt->userData);	4616 ctxt->sax->startDocument(ctxt->userData);

4131	4617

4132	4618

4133 /*	4619 /*

4134 * Parse possible comments and PIs before any content	4620 * Parse possible comments and PIs before any content

4135 */	4621 */

4136 while (((CUR == '<') && (NXT(1) == '!') &&	4622 while (((CUR == '<') && (NXT(1) == '!') &&

4137 (NXT(2) == '-') && (NXT(3) == '-')) \|\|	4623 (NXT(2) == '-') && (NXT(3) == '-')) \|\|

4138 ((CUR == '<') && (NXT(1) == '?'))) {	4624 ((CUR == '<') && (NXT(1) == '?'))) {

4139 htmlParseComment(ctxt);»	4625 htmlParseComment(ctxt);

4140 htmlParsePI(ctxt);»	4626 htmlParsePI(ctxt);

4141 SKIP_BLANKS;	4627 SKIP_BLANKS;

4142 }»	4628 }

4143	4629

4144	4630

4145 /*	4631 /*

4146 * Then possibly doc type declaration(s) and more Misc	4632 * Then possibly doc type declaration(s) and more Misc

4147 * (doctypedecl Misc*)?	4633 * (doctypedecl Misc*)?

4148 */	4634 */

4149 if ((CUR == '<') && (NXT(1) == '!') &&	4635 if ((CUR == '<') && (NXT(1) == '!') &&

4150 (UPP(2) == 'D') && (UPP(3) == 'O') &&	4636 (UPP(2) == 'D') && (UPP(3) == 'O') &&

4151 (UPP(4) == 'C') && (UPP(5) == 'T') &&	4637 (UPP(4) == 'C') && (UPP(5) == 'T') &&

4152 (UPP(6) == 'Y') && (UPP(7) == 'P') &&	4638 (UPP(6) == 'Y') && (UPP(7) == 'P') &&

4153 (UPP(8) == 'E')) {	4639 (UPP(8) == 'E')) {

4154 htmlParseDocTypeDecl(ctxt);	4640 htmlParseDocTypeDecl(ctxt);

4155 }	4641 }

4156 SKIP_BLANKS;	4642 SKIP_BLANKS;

4157	4643

4158 /*	4644 /*

4159 * Parse possible comments and PIs before any content	4645 * Parse possible comments and PIs before any content

4160 */	4646 */

4161 while (((CUR == '<') && (NXT(1) == '!') &&	4647 while (((CUR == '<') && (NXT(1) == '!') &&

4162 (NXT(2) == '-') && (NXT(3) == '-')) \|\|	4648 (NXT(2) == '-') && (NXT(3) == '-')) \|\|

4163 ((CUR == '<') && (NXT(1) == '?'))) {	4649 ((CUR == '<') && (NXT(1) == '?'))) {

4164 htmlParseComment(ctxt);»	4650 htmlParseComment(ctxt);

4165 htmlParsePI(ctxt);»	4651 htmlParsePI(ctxt);

4166 SKIP_BLANKS;	4652 SKIP_BLANKS;

4167 }»	4653 }

4168	4654

4169 /*	4655 /*

4170 * Time to start parsing the tree itself	4656 * Time to start parsing the tree itself

4171 */	4657 */

4172 htmlParseContent(ctxt);	4658 htmlParseContentInternal(ctxt);

4173	4659

4174 /*	4660 /*

4175 * autoclose	4661 * autoclose

4176 */	4662 */

4177 if (CUR == 0)	4663 if (CUR == 0)

4178 htmlAutoCloseOnEnd(ctxt);	4664 htmlAutoCloseOnEnd(ctxt);

4179	4665

4180	4666

4181 /*	4667 /*

4182 * SAX: end of the document processing.	4668 * SAX: end of the document processing.

4183 */	4669 */

4184 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))	4670 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))

4185 ctxt->sax->endDocument(ctxt->userData);	4671 ctxt->sax->endDocument(ctxt->userData);

4186	4672

4187 if (ctxt->myDoc != NULL) {	4673 if (ctxt->myDoc != NULL) {

4188 dtd = xmlGetIntSubset(ctxt->myDoc);	4674 dtd = xmlGetIntSubset(ctxt->myDoc);

4189 if (dtd == NULL)	4675 if (dtd == NULL)

4190 » ctxt->myDoc->intSubset =	4676 » ctxt->myDoc->intSubset =

4191 » » xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html",	4677 » » xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html",

4192 BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN",	4678 BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN",

4193 BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd");	4679 BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd");

4194 }	4680 }

4195 if (! ctxt->wellFormed) return(-1);	4681 if (! ctxt->wellFormed) return(-1);

4196 return(0);	4682 return(0);

4197 }	4683 }

4198	4684

4199	4685

4200 /************************************************************************	4686 /************************************************************************

4201 * *	4687 * *

(...skipping 25 matching lines...) Expand all Loading...
4227 }	4713 }

4228 sax = (htmlSAXHandler *) xmlMalloc(sizeof(htmlSAXHandler));	4714 sax = (htmlSAXHandler *) xmlMalloc(sizeof(htmlSAXHandler));

4229 if (sax == NULL) {	4715 if (sax == NULL) {

4230 htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n");	4716 htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n");

4231 return(-1);	4717 return(-1);

4232 }	4718 }

4233 else	4719 else

4234 memset(sax, 0, sizeof(htmlSAXHandler));	4720 memset(sax, 0, sizeof(htmlSAXHandler));

4235	4721

4236 /* Allocate the Input stack */	4722 /* Allocate the Input stack */

4237 ctxt->inputTab = (htmlParserInputPtr *)	4723 ctxt->inputTab = (htmlParserInputPtr *)

4238 xmlMalloc(5 * sizeof(htmlParserInputPtr));	4724 xmlMalloc(5 * sizeof(htmlParserInputPtr));

4239 if (ctxt->inputTab == NULL) {	4725 if (ctxt->inputTab == NULL) {

4240 htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n");	4726 htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n");

4241 ctxt->inputNr = 0;	4727 ctxt->inputNr = 0;

4242 ctxt->inputMax = 0;	4728 ctxt->inputMax = 0;

4243 ctxt->input = NULL;	4729 ctxt->input = NULL;

4244 return(-1);	4730 return(-1);

4245 }	4731 }

4246 ctxt->inputNr = 0;	4732 ctxt->inputNr = 0;

4247 ctxt->inputMax = 5;	4733 ctxt->inputMax = 5;

(...skipping 17 matching lines...) Expand all Loading...
4265 }	4751 }

4266 ctxt->nodeNr = 0;	4752 ctxt->nodeNr = 0;

4267 ctxt->nodeMax = 10;	4753 ctxt->nodeMax = 10;

4268 ctxt->node = NULL;	4754 ctxt->node = NULL;

4269	4755

4270 /* Allocate the Name stack */	4756 /* Allocate the Name stack */

4271 ctxt->nameTab = (const xmlChar *) xmlMalloc(10 sizeof(xmlChar *));	4757 ctxt->nameTab = (const xmlChar *) xmlMalloc(10 sizeof(xmlChar *));

4272 if (ctxt->nameTab == NULL) {	4758 if (ctxt->nameTab == NULL) {

4273 htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n");	4759 htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n");

4274 ctxt->nameNr = 0;	4760 ctxt->nameNr = 0;

4275 » ctxt->nameMax = 10;	4761 » ctxt->nameMax = 0;

4276 ctxt->name = NULL;	4762 ctxt->name = NULL;

4277 ctxt->nodeNr = 0;	4763 ctxt->nodeNr = 0;

4278 ctxt->nodeMax = 0;	4764 ctxt->nodeMax = 0;

4279 ctxt->node = NULL;	4765 ctxt->node = NULL;

4280 ctxt->inputNr = 0;	4766 ctxt->inputNr = 0;

4281 ctxt->inputMax = 0;	4767 ctxt->inputMax = 0;

4282 ctxt->input = NULL;	4768 ctxt->input = NULL;

4283 return(-1);	4769 return(-1);

4284 }	4770 }

4285 ctxt->nameNr = 0;	4771 ctxt->nameNr = 0;

4286 ctxt->nameMax = 10;	4772 ctxt->nameMax = 10;

4287 ctxt->name = NULL;	4773 ctxt->name = NULL;

4288	4774

	4775 ctxt->nodeInfoTab = NULL;

	4776 ctxt->nodeInfoNr = 0;

	4777 ctxt->nodeInfoMax = 0;

	4778

4289 if (sax == NULL) ctxt->sax = (xmlSAXHandlerPtr) &htmlDefaultSAXHandler;	4779 if (sax == NULL) ctxt->sax = (xmlSAXHandlerPtr) &htmlDefaultSAXHandler;

4290 else {	4780 else {

4291 ctxt->sax = sax;	4781 ctxt->sax = sax;

4292 memcpy(sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1));	4782 memcpy(sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1));

4293 }	4783 }

4294 ctxt->userData = ctxt;	4784 ctxt->userData = ctxt;

4295 ctxt->myDoc = NULL;	4785 ctxt->myDoc = NULL;

4296 ctxt->wellFormed = 1;	4786 ctxt->wellFormed = 1;

4297 ctxt->replaceEntities = 0;	4787 ctxt->replaceEntities = 0;

4298 ctxt->linenumbers = xmlLineNumbersDefaultValue;	4788 ctxt->linenumbers = xmlLineNumbersDefaultValue;

(...skipping 126 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4425 ctxt->input->encoding = xmlStrdup((const xmlChar *) encoding);	4915 ctxt->input->encoding = xmlStrdup((const xmlChar *) encoding);

4426	4916

4427 enc = xmlParseCharEncoding(encoding);	4917 enc = xmlParseCharEncoding(encoding);

4428 /*	4918 /*

4429 * registered set of known encodings	4919 * registered set of known encodings

4430 */	4920 */

4431 if (enc != XML_CHAR_ENCODING_ERROR) {	4921 if (enc != XML_CHAR_ENCODING_ERROR) {

4432 xmlSwitchEncoding(ctxt, enc);	4922 xmlSwitchEncoding(ctxt, enc);

4433 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {	4923 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {

4434 htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,	4924 htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,

4435 » » "Unsupported encoding %s\n",	4925 » » "Unsupported encoding %s\n",

4436 (const xmlChar *) encoding, NULL);	4926 (const xmlChar *) encoding, NULL);

4437 }	4927 }

4438 } else {	4928 } else {

4439 /*	4929 /*

4440 * fallback for unknown encodings	4930 * fallback for unknown encodings

4441 */	4931 */

4442 handler = xmlFindCharEncodingHandler((const char *) encoding);	4932 handler = xmlFindCharEncodingHandler((const char *) encoding);

4443 if (handler != NULL) {	4933 if (handler != NULL) {

4444 xmlSwitchToEncoding(ctxt, handler);	4934 xmlSwitchToEncoding(ctxt, handler);

4445 } else {	4935 } else {

4446 htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,	4936 htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,

4447 "Unsupported encoding %s\n",	4937 "Unsupported encoding %s\n",

4448 (const xmlChar *) encoding, NULL);	4938 (const xmlChar *) encoding, NULL);

4449 }	4939 }

4450 }	4940 }

4451 }	4941 }

4452 return(ctxt);	4942 return(ctxt);

4453 }	4943 }

4454	4944

4455 #ifdef LIBXML_PUSH_ENABLED	4945 #ifdef LIBXML_PUSH_ENABLED

4456 /************************************************************************	4946 /************************************************************************

4457 * *	4947 * *

4458 * » » Progressive parsing interfaces» » » » *	4948 » Progressive parsing interfaces» » » »

4459 * *	4949 * *

4460 ************************************************************************/	4950 ************************************************************************/

4461	4951

4462 /**	4952 /**

4463 * htmlParseLookupSequence:	4953 * htmlParseLookupSequence:

4464 * @ctxt: an HTML parser context	4954 * @ctxt: an HTML parser context

4465 * @first: the first char to lookup	4955 * @first: the first char to lookup

4466 * @next: the next char to lookup or zero	4956 * @next: the next char to lookup or zero

4467 * @third: the next char to lookup or zero	4957 * @third: the next char to lookup or zero

4468 * @comment: flag to force checking inside comments	4958 * @comment: flag to force checking inside comments

4469 *	4959 *

4470 * Try to find if a sequence (first, next, third) or just (first next) or	4960 * Try to find if a sequence (first, next, third) or just (first next) or

4471 * (first) is available in the input stream.	4961 * (first) is available in the input stream.

4472 * This function has a side effect of (possibly) incrementing ctxt->checkIndex	4962 * This function has a side effect of (possibly) incrementing ctxt->checkIndex

4473 * to avoid rescanning sequences of bytes, it DOES change the state of the	4963 * to avoid rescanning sequences of bytes, it DOES change the state of the

4474 * parser, do not use liberally.	4964 * parser, do not use liberally.

4475 * This is basically similar to xmlParseLookupSequence()	4965 * This is basically similar to xmlParseLookupSequence()

4476 *	4966 *

4477 * Returns the index to the current parsing point if the full sequence	4967 * Returns the index to the current parsing point if the full sequence

4478 * is available, -1 otherwise.	4968 * is available, -1 otherwise.

4479 */	4969 */

4480 static int	4970 static int

4481 htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first,	4971 htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first,

4482 xmlChar next, xmlChar third, int iscomment) {	4972 xmlChar next, xmlChar third, int iscomment,

	4973 int ignoreattrval)

	4974 {

4483 int base, len;	4975 int base, len;

4484 htmlParserInputPtr in;	4976 htmlParserInputPtr in;

4485 const xmlChar *buf;	4977 const xmlChar *buf;

4486 int incomment = 0;	4978 int incomment = 0;

	4979 int invalue = 0;

	4980 char valdellim = 0x0;

4487	4981

4488 in = ctxt->input;	4982 in = ctxt->input;

4489 if (in == NULL) return(-1);	4983 if (in == NULL)

	4984 return (-1);

	4985

4490 base = in->cur - in->base;	4986 base = in->cur - in->base;

4491 if (base < 0) return(-1);	4987 if (base < 0)

	4988 return (-1);

	4989

4492 if (ctxt->checkIndex > base)	4990 if (ctxt->checkIndex > base)

4493 base = ctxt->checkIndex;	4991 base = ctxt->checkIndex;

	4992

4494 if (in->buf == NULL) {	4993 if (in->buf == NULL) {

4495 » buf = in->base;	4994 buf = in->base;

4496 » len = in->length;	4995 len = in->length;

4497 } else {	4996 } else {

4498 » buf = in->buf->buffer->content;	4997 buf = in->buf->buffer->content;

4499 » len = in->buf->buffer->use;	4998 len = in->buf->buffer->use;

4500 }	4999 }

	5000

4501 /* take into account the sequence length */	5001 /* take into account the sequence length */

4502 if (third) len -= 2;	5002 if (third)

4503 else if (next) len --;	5003 len -= 2;

4504 for (;base < len;base++) {	5004 else if (next)

4505 » if (!incomment && (base + 4 < len) && !iscomment) {	5005 len--;

4506 » if ((buf[base] == '<') && (buf[base + 1] == '!') &&	5006 for (; base < len; base++) {

4507 » » (buf[base + 2] == '-') && (buf[base + 3] == '-')) {	5007 if ((!incomment) && (base + 4 < len) && (!iscomment)) {

4508 » » incomment = 1;	5008 if ((buf[base] == '<') && (buf[base + 1] == '!') &&

4509 » » /* do not increment past <! - some people use <!--> */	5009 (buf[base + 2] == '-') && (buf[base + 3] == '-')) {

4510 » » base += 2;	5010 incomment = 1;

4511 » }	5011 /* do not increment past <! - some people use <!--> */

4512 » }	5012 base += 2;

4513 » if (incomment) {	5013 }

4514 » if (base + 3 > len)	5014 }

4515 » » return(-1);	5015 if (ignoreattrval) {

4516 » if ((buf[base] == '-') && (buf[base + 1] == '-') &&	5016 if (buf[base] == '"' \|\| buf[base] == '\'') {

4517 » » (buf[base + 2] == '>')) {	5017 if (invalue) {

4518 » » incomment = 0;	5018 if (buf[base] == valdellim) {

4519 » » base += 2;	5019 invalue = 0;

4520 » }	5020 continue;

4521 » continue;	5021 }

4522 » }	5022 } else {

	5023 valdellim = buf[base];

	5024 invalue = 1;

	5025 continue;

	5026 }

	5027 } else if (invalue) {

	5028 continue;

	5029 }

	5030 }

	5031 if (incomment) {

	5032 if (base + 3 > len)

	5033 return (-1);

	5034 if ((buf[base] == '-') && (buf[base + 1] == '-') &&

	5035 (buf[base + 2] == '>')) {

	5036 incomment = 0;

	5037 base += 2;

	5038 }

	5039 continue;

	5040 }

4523 if (buf[base] == first) {	5041 if (buf[base] == first) {

4524 » if (third != 0) {	5042 if (third != 0) {

4525 » » if ((buf[base + 1] != next) \|\|	5043 if ((buf[base + 1] != next) \|\| (buf[base + 2] != third))

4526 » » (buf[base + 2] != third)) continue;	5044 continue;

4527 » } else if (next != 0) {	5045 } else if (next != 0) {

4528 » » if (buf[base + 1] != next) continue;	5046 if (buf[base + 1] != next)

4529 » }	5047 continue;

4530 » ctxt->checkIndex = 0;	5048 }

	5049 ctxt->checkIndex = 0;

4531 #ifdef DEBUG_PUSH	5050 #ifdef DEBUG_PUSH

4532 » if (next == 0)	5051 if (next == 0)

4533 » » xmlGenericError(xmlGenericErrorContext,	5052 xmlGenericError(xmlGenericErrorContext,

4534 » » » "HPP: lookup '%c' found at %d\n",	5053 "HPP: lookup '%c' found at %d\n",

4535 » » » first, base);	5054 first, base);

4536 » else if (third == 0)	5055 else if (third == 0)

4537 » » xmlGenericError(xmlGenericErrorContext,	5056 xmlGenericError(xmlGenericErrorContext,

4538 » » » "HPP: lookup '%c%c' found at %d\n",	5057 "HPP: lookup '%c%c' found at %d\n",

4539 » » » first, next, base);	5058 first, next, base);

4540 » else	5059 else

4541 » » xmlGenericError(xmlGenericErrorContext,	5060 xmlGenericError(xmlGenericErrorContext,

4542 » » » "HPP: lookup '%c%c%c' found at %d\n",	5061 "HPP: lookup '%c%c%c' found at %d\n",

4543 » » » first, next, third, base);	5062 first, next, third, base);

4544 #endif	5063 #endif

4545 » return(base - (in->cur - in->base));	5064 return (base - (in->cur - in->base));

4546 » }	5065 }

4547 }	5066 }

4548 ctxt->checkIndex = base;	5067 if ((!incomment) && (!invalue))

	5068 ctxt->checkIndex = base;

4549 #ifdef DEBUG_PUSH	5069 #ifdef DEBUG_PUSH

4550 if (next == 0)	5070 if (next == 0)

4551 » xmlGenericError(xmlGenericErrorContext,	5071 xmlGenericError(xmlGenericErrorContext,

4552 » » "HPP: lookup '%c' failed\n", first);	5072 "HPP: lookup '%c' failed\n", first);

4553 else if (third == 0)	5073 else if (third == 0)

4554 » xmlGenericError(xmlGenericErrorContext,	5074 xmlGenericError(xmlGenericErrorContext,

4555 » » "HPP: lookup '%c%c' failed\n", first, next);	5075 "HPP: lookup '%c%c' failed\n", first, next);

4556 else»	5076 else

4557 » xmlGenericError(xmlGenericErrorContext,	5077 xmlGenericError(xmlGenericErrorContext,

4558 » » "HPP: lookup '%c%c%c' failed\n", first, next, third);	5078 "HPP: lookup '%c%c%c' failed\n", first, next,

	5079 third);

4559 #endif	5080 #endif

4560 return(-1);	5081 return (-1);

4561 }	5082 }

4562	5083

4563 /**	5084 /**

	5085 * htmlParseLookupChars:

	5086 * @ctxt: an HTML parser context

	5087 * @stop: Array of chars, which stop the lookup.

	5088 * @stopLen: Length of stop-Array

	5089 *

	5090 * Try to find if any char of the stop-Array is available in the input

	5091 * stream.

	5092 * This function has a side effect of (possibly) incrementing ctxt->checkIndex

	5093 * to avoid rescanning sequences of bytes, it DOES change the state of the

	5094 * parser, do not use liberally.

	5095 *

	5096 * Returns the index to the current parsing point if a stopChar

	5097 * is available, -1 otherwise.

	5098 */

	5099 static int

	5100 htmlParseLookupChars(htmlParserCtxtPtr ctxt, const xmlChar * stop,

	5101 int stopLen)

	5102 {

	5103 int base, len;

	5104 htmlParserInputPtr in;

	5105 const xmlChar *buf;

	5106 int incomment = 0;

	5107 int i;

	5108

	5109 in = ctxt->input;

	5110 if (in == NULL)

	5111 return (-1);

	5112

	5113 base = in->cur - in->base;

	5114 if (base < 0)

	5115 return (-1);

	5116

	5117 if (ctxt->checkIndex > base)

	5118 base = ctxt->checkIndex;

	5119

	5120 if (in->buf == NULL) {

	5121 buf = in->base;

	5122 len = in->length;

	5123 } else {

	5124 buf = in->buf->buffer->content;

	5125 len = in->buf->buffer->use;

	5126 }

	5127

	5128 for (; base < len; base++) {

	5129 if (!incomment && (base + 4 < len)) {

	5130 if ((buf[base] == '<') && (buf[base + 1] == '!') &&

	5131 (buf[base + 2] == '-') && (buf[base + 3] == '-')) {

	5132 incomment = 1;

	5133 /* do not increment past <! - some people use <!--> */

	5134 base += 2;

	5135 }

	5136 }

	5137 if (incomment) {

	5138 if (base + 3 > len)

	5139 return (-1);

	5140 if ((buf[base] == '-') && (buf[base + 1] == '-') &&

	5141 (buf[base + 2] == '>')) {

	5142 incomment = 0;

	5143 base += 2;

	5144 }

	5145 continue;

	5146 }

	5147 for (i = 0; i < stopLen; ++i) {

	5148 if (buf[base] == stop[i]) {

	5149 ctxt->checkIndex = 0;

	5150 return (base - (in->cur - in->base));

	5151 }

	5152 }

	5153 }

	5154 ctxt->checkIndex = base;

	5155 return (-1);

	5156 }

	5157

	5158 /**

4564 * htmlParseTryOrFinish:	5159 * htmlParseTryOrFinish:

4565 * @ctxt: an HTML parser context	5160 * @ctxt: an HTML parser context

4566 * @terminate: last chunk indicator	5161 * @terminate: last chunk indicator

4567 *	5162 *

4568 * Try to progress on parsing	5163 * Try to progress on parsing

4569 *	5164 *

4570 * Returns zero if no parsing was possible	5165 * Returns zero if no parsing was possible

4571 */	5166 */

4572 static int	5167 static int

4573 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {	5168 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {

(...skipping 58 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4632 while (1) {	5227 while (1) {

4633	5228

4634 in = ctxt->input;	5229 in = ctxt->input;

4635 if (in == NULL) break;	5230 if (in == NULL) break;

4636 if (in->buf == NULL)	5231 if (in->buf == NULL)

4637 avail = in->length - (in->cur - in->base);	5232 avail = in->length - (in->cur - in->base);

4638 else	5233 else

4639 avail = in->buf->buffer->use - (in->cur - in->base);	5234 avail = in->buf->buffer->use - (in->cur - in->base);

4640 if ((avail == 0) && (terminate)) {	5235 if ((avail == 0) && (terminate)) {

4641 htmlAutoCloseOnEnd(ctxt);	5236 htmlAutoCloseOnEnd(ctxt);

4642 » if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) {	5237 » if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) {

4643 /*	5238 /*

4644 * SAX: end of the document processing.	5239 * SAX: end of the document processing.

4645 */	5240 */

4646 ctxt->instate = XML_PARSER_EOF;	5241 ctxt->instate = XML_PARSER_EOF;

4647 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))	5242 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))

4648 ctxt->sax->endDocument(ctxt->userData);	5243 ctxt->sax->endDocument(ctxt->userData);

4649 }	5244 }

4650 }	5245 }

4651 if (avail < 1)	5246 if (avail < 1)

4652 goto done;	5247 goto done;

(...skipping 29 matching lines...) Expand all Loading...
4682 ctxt->sax->startDocument(ctxt->userData);	5277 ctxt->sax->startDocument(ctxt->userData);

4683	5278

4684 cur = in->cur[0];	5279 cur = in->cur[0];

4685 next = in->cur[1];	5280 next = in->cur[1];

4686 if ((cur == '<') && (next == '!') &&	5281 if ((cur == '<') && (next == '!') &&

4687 (UPP(2) == 'D') && (UPP(3) == 'O') &&	5282 (UPP(2) == 'D') && (UPP(3) == 'O') &&

4688 (UPP(4) == 'C') && (UPP(5) == 'T') &&	5283 (UPP(4) == 'C') && (UPP(5) == 'T') &&

4689 (UPP(6) == 'Y') && (UPP(7) == 'P') &&	5284 (UPP(6) == 'Y') && (UPP(7) == 'P') &&

4690 (UPP(8) == 'E')) {	5285 (UPP(8) == 'E')) {

4691 if ((!terminate) &&	5286 if ((!terminate) &&

4692 » » (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))	5287 » » (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))

4693 goto done;	5288 goto done;

4694 #ifdef DEBUG_PUSH	5289 #ifdef DEBUG_PUSH

4695 xmlGenericError(xmlGenericErrorContext,	5290 xmlGenericError(xmlGenericErrorContext,

4696 "HPP: Parsing internal subset\n");	5291 "HPP: Parsing internal subset\n");

4697 #endif	5292 #endif

4698 htmlParseDocTypeDecl(ctxt);	5293 htmlParseDocTypeDecl(ctxt);

4699 ctxt->instate = XML_PARSER_PROLOG;	5294 ctxt->instate = XML_PARSER_PROLOG;

4700 #ifdef DEBUG_PUSH	5295 #ifdef DEBUG_PUSH

4701 xmlGenericError(xmlGenericErrorContext,	5296 xmlGenericError(xmlGenericErrorContext,

4702 "HPP: entering PROLOG\n");	5297 "HPP: entering PROLOG\n");

(...skipping 12 matching lines...) Expand all Loading...
4715 avail = in->length - (in->cur - in->base);	5310 avail = in->length - (in->cur - in->base);

4716 else	5311 else

4717 avail = in->buf->buffer->use - (in->cur - in->base);	5312 avail = in->buf->buffer->use - (in->cur - in->base);

4718 if (avail < 2)	5313 if (avail < 2)

4719 goto done;	5314 goto done;

4720 cur = in->cur[0];	5315 cur = in->cur[0];

4721 next = in->cur[1];	5316 next = in->cur[1];

4722 if ((cur == '<') && (next == '!') &&	5317 if ((cur == '<') && (next == '!') &&

4723 (in->cur[2] == '-') && (in->cur[3] == '-')) {	5318 (in->cur[2] == '-') && (in->cur[3] == '-')) {

4724 if ((!terminate) &&	5319 if ((!terminate) &&

4725 » » (htmlParseLookupSequence(ctxt, '-', '-', '>', 1) < 0))	5320 » » (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0) )

4726 goto done;	5321 goto done;

4727 #ifdef DEBUG_PUSH	5322 #ifdef DEBUG_PUSH

4728 xmlGenericError(xmlGenericErrorContext,	5323 xmlGenericError(xmlGenericErrorContext,

4729 "HPP: Parsing Comment\n");	5324 "HPP: Parsing Comment\n");

4730 #endif	5325 #endif

4731 htmlParseComment(ctxt);	5326 htmlParseComment(ctxt);

4732 ctxt->instate = XML_PARSER_MISC;	5327 ctxt->instate = XML_PARSER_MISC;

4733 } else if ((cur == '<') && (next == '?')) {	5328 } else if ((cur == '<') && (next == '?')) {

4734 if ((!terminate) &&	5329 if ((!terminate) &&

4735 » » (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))	5330 » » (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))

4736 goto done;	5331 goto done;

4737 #ifdef DEBUG_PUSH	5332 #ifdef DEBUG_PUSH

4738 xmlGenericError(xmlGenericErrorContext,	5333 xmlGenericError(xmlGenericErrorContext,

4739 "HPP: Parsing PI\n");	5334 "HPP: Parsing PI\n");

4740 #endif	5335 #endif

4741 htmlParsePI(ctxt);	5336 htmlParsePI(ctxt);

4742 ctxt->instate = XML_PARSER_MISC;	5337 ctxt->instate = XML_PARSER_MISC;

4743 } else if ((cur == '<') && (next == '!') &&	5338 } else if ((cur == '<') && (next == '!') &&

4744 (UPP(2) == 'D') && (UPP(3) == 'O') &&	5339 (UPP(2) == 'D') && (UPP(3) == 'O') &&

4745 (UPP(4) == 'C') && (UPP(5) == 'T') &&	5340 (UPP(4) == 'C') && (UPP(5) == 'T') &&

4746 (UPP(6) == 'Y') && (UPP(7) == 'P') &&	5341 (UPP(6) == 'Y') && (UPP(7) == 'P') &&

4747 (UPP(8) == 'E')) {	5342 (UPP(8) == 'E')) {

4748 if ((!terminate) &&	5343 if ((!terminate) &&

4749 » » (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))	5344 » » (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))

4750 goto done;	5345 goto done;

4751 #ifdef DEBUG_PUSH	5346 #ifdef DEBUG_PUSH

4752 xmlGenericError(xmlGenericErrorContext,	5347 xmlGenericError(xmlGenericErrorContext,

4753 "HPP: Parsing internal subset\n");	5348 "HPP: Parsing internal subset\n");

4754 #endif	5349 #endif

4755 htmlParseDocTypeDecl(ctxt);	5350 htmlParseDocTypeDecl(ctxt);

4756 ctxt->instate = XML_PARSER_PROLOG;	5351 ctxt->instate = XML_PARSER_PROLOG;

4757 #ifdef DEBUG_PUSH	5352 #ifdef DEBUG_PUSH

4758 xmlGenericError(xmlGenericErrorContext,	5353 xmlGenericError(xmlGenericErrorContext,

4759 "HPP: entering PROLOG\n");	5354 "HPP: entering PROLOG\n");

4760 #endif	5355 #endif

4761 } else if ((cur == '<') && (next == '!') &&	5356 } else if ((cur == '<') && (next == '!') &&

4762 (avail < 9)) {	5357 (avail < 9)) {

4763 goto done;	5358 goto done;

4764 } else {	5359 } else {

4765 ctxt->instate = XML_PARSER_START_TAG;	5360 ctxt->instate = XML_PARSER_START_TAG;

4766 #ifdef DEBUG_PUSH	5361 #ifdef DEBUG_PUSH

4767 xmlGenericError(xmlGenericErrorContext,	5362 xmlGenericError(xmlGenericErrorContext,

4768 "HPP: entering START_TAG\n");	5363 "HPP: entering START_TAG\n");

4769 #endif	5364 #endif

4770 }	5365 }

4771 break;	5366 break;

4772 case XML_PARSER_PROLOG:	5367 case XML_PARSER_PROLOG:

4773 SKIP_BLANKS;	5368 SKIP_BLANKS;

4774 if (in->buf == NULL)	5369 if (in->buf == NULL)

4775 avail = in->length - (in->cur - in->base);	5370 avail = in->length - (in->cur - in->base);

4776 else	5371 else

4777 avail = in->buf->buffer->use - (in->cur - in->base);	5372 avail = in->buf->buffer->use - (in->cur - in->base);

4778 » » if (avail < 2)	5373 » » if (avail < 2)

4779 goto done;	5374 goto done;

4780 cur = in->cur[0];	5375 cur = in->cur[0];

4781 next = in->cur[1];	5376 next = in->cur[1];

4782 if ((cur == '<') && (next == '!') &&	5377 if ((cur == '<') && (next == '!') &&

4783 (in->cur[2] == '-') && (in->cur[3] == '-')) {	5378 (in->cur[2] == '-') && (in->cur[3] == '-')) {

4784 if ((!terminate) &&	5379 if ((!terminate) &&

4785 » » (htmlParseLookupSequence(ctxt, '-', '-', '>', 1) < 0))	5380 » » (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0) )

4786 goto done;	5381 goto done;

4787 #ifdef DEBUG_PUSH	5382 #ifdef DEBUG_PUSH

4788 xmlGenericError(xmlGenericErrorContext,	5383 xmlGenericError(xmlGenericErrorContext,

4789 "HPP: Parsing Comment\n");	5384 "HPP: Parsing Comment\n");

4790 #endif	5385 #endif

4791 htmlParseComment(ctxt);	5386 htmlParseComment(ctxt);

4792 ctxt->instate = XML_PARSER_PROLOG;	5387 ctxt->instate = XML_PARSER_PROLOG;

4793 } else if ((cur == '<') && (next == '?')) {	5388 } else if ((cur == '<') && (next == '?')) {

4794 if ((!terminate) &&	5389 if ((!terminate) &&

4795 » » (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))	5390 » » (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))

4796 goto done;	5391 goto done;

4797 #ifdef DEBUG_PUSH	5392 #ifdef DEBUG_PUSH

4798 xmlGenericError(xmlGenericErrorContext,	5393 xmlGenericError(xmlGenericErrorContext,

4799 "HPP: Parsing PI\n");	5394 "HPP: Parsing PI\n");

4800 #endif	5395 #endif

4801 htmlParsePI(ctxt);	5396 htmlParsePI(ctxt);

4802 ctxt->instate = XML_PARSER_PROLOG;	5397 ctxt->instate = XML_PARSER_PROLOG;

4803 } else if ((cur == '<') && (next == '!') &&	5398 } else if ((cur == '<') && (next == '!') &&

4804 (avail < 4)) {	5399 (avail < 4)) {

4805 goto done;	5400 goto done;

(...skipping 16 matching lines...) Expand all Loading...
4822 if (IS_BLANK_CH(cur)) {	5417 if (IS_BLANK_CH(cur)) {

4823 htmlParseCharData(ctxt);	5418 htmlParseCharData(ctxt);

4824 goto done;	5419 goto done;

4825 }	5420 }

4826 if (avail < 2)	5421 if (avail < 2)

4827 goto done;	5422 goto done;

4828 next = in->cur[1];	5423 next = in->cur[1];

4829 if ((cur == '<') && (next == '!') &&	5424 if ((cur == '<') && (next == '!') &&

4830 (in->cur[2] == '-') && (in->cur[3] == '-')) {	5425 (in->cur[2] == '-') && (in->cur[3] == '-')) {

4831 if ((!terminate) &&	5426 if ((!terminate) &&

4832 » » (htmlParseLookupSequence(ctxt, '-', '-', '>', 1) < 0))	5427 » » (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0) )

4833 goto done;	5428 goto done;

4834 #ifdef DEBUG_PUSH	5429 #ifdef DEBUG_PUSH

4835 xmlGenericError(xmlGenericErrorContext,	5430 xmlGenericError(xmlGenericErrorContext,

4836 "HPP: Parsing Comment\n");	5431 "HPP: Parsing Comment\n");

4837 #endif	5432 #endif

4838 htmlParseComment(ctxt);	5433 htmlParseComment(ctxt);

4839 ctxt->instate = XML_PARSER_EPILOG;	5434 ctxt->instate = XML_PARSER_EPILOG;

4840 } else if ((cur == '<') && (next == '?')) {	5435 } else if ((cur == '<') && (next == '?')) {

4841 if ((!terminate) &&	5436 if ((!terminate) &&

4842 » » (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))	5437 » » (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))

4843 goto done;	5438 goto done;

4844 #ifdef DEBUG_PUSH	5439 #ifdef DEBUG_PUSH

4845 xmlGenericError(xmlGenericErrorContext,	5440 xmlGenericError(xmlGenericErrorContext,

4846 "HPP: Parsing PI\n");	5441 "HPP: Parsing PI\n");

4847 #endif	5442 #endif

4848 htmlParsePI(ctxt);	5443 htmlParsePI(ctxt);

4849 ctxt->instate = XML_PARSER_EPILOG;	5444 ctxt->instate = XML_PARSER_EPILOG;

4850 } else if ((cur == '<') && (next == '!') &&	5445 } else if ((cur == '<') && (next == '!') &&

4851 (avail < 4)) {	5446 (avail < 4)) {

4852 goto done;	5447 goto done;

(...skipping 29 matching lines...) Expand all Loading...
4882 if (in->cur[1] == '/') {	5477 if (in->cur[1] == '/') {

4883 ctxt->instate = XML_PARSER_END_TAG;	5478 ctxt->instate = XML_PARSER_END_TAG;

4884 ctxt->checkIndex = 0;	5479 ctxt->checkIndex = 0;

4885 #ifdef DEBUG_PUSH	5480 #ifdef DEBUG_PUSH

4886 xmlGenericError(xmlGenericErrorContext,	5481 xmlGenericError(xmlGenericErrorContext,

4887 "HPP: entering END_TAG\n");	5482 "HPP: entering END_TAG\n");

4888 #endif	5483 #endif

4889 break;	5484 break;

4890 }	5485 }

4891 if ((!terminate) &&	5486 if ((!terminate) &&

4892 » » (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))	5487 » » (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))

4893 goto done;	5488 goto done;

4894	5489

4895 failed = htmlParseStartTag(ctxt);	5490 failed = htmlParseStartTag(ctxt);

4896 name = ctxt->name;	5491 name = ctxt->name;

4897 if ((failed == -1) \|\|	5492 if ((failed == -1) \|\|

4898 (name == NULL)) {	5493 (name == NULL)) {

4899 if (CUR == '>')	5494 if (CUR == '>')

4900 NEXT;	5495 NEXT;

4901 break;	5496 break;

4902 }	5497 }

(...skipping 26 matching lines...) Expand all Loading...
4929 if (CUR == '>') {	5524 if (CUR == '>') {

4930 NEXT;	5525 NEXT;

4931 } else {	5526 } else {

4932 htmlParseErr(ctxt, XML_ERR_GT_REQUIRED,	5527 htmlParseErr(ctxt, XML_ERR_GT_REQUIRED,

4933 "Couldn't find end of Start Tag %s\n",	5528 "Couldn't find end of Start Tag %s\n",

4934 name, NULL);	5529 name, NULL);

4935	5530

4936 /*	5531 /*

4937 * end of parsing of this node.	5532 * end of parsing of this node.

4938 */	5533 */

4939 » » if (xmlStrEqual(name, ctxt->name)) {	5534 » » if (xmlStrEqual(name, ctxt->name)) {

4940 nodePop(ctxt);	5535 nodePop(ctxt);

4941 htmlnamePop(ctxt);	5536 htmlnamePop(ctxt);

4942 » » }	5537 » » }

4943	5538

4944 ctxt->instate = XML_PARSER_CONTENT;	5539 ctxt->instate = XML_PARSER_CONTENT;

4945 #ifdef DEBUG_PUSH	5540 #ifdef DEBUG_PUSH

4946 xmlGenericError(xmlGenericErrorContext,	5541 xmlGenericError(xmlGenericErrorContext,

4947 "HPP: entering CONTENT\n");	5542 "HPP: entering CONTENT\n");

4948 #endif	5543 #endif

4949 break;	5544 break;

4950 }	5545 }

4951	5546

4952 /*	5547 /*

(...skipping 54 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5007 cons = ctxt->nbChars;	5602 cons = ctxt->nbChars;

5008 if ((xmlStrEqual(ctxt->name, BAD_CAST"script")) \|\|	5603 if ((xmlStrEqual(ctxt->name, BAD_CAST"script")) \|\|

5009 (xmlStrEqual(ctxt->name, BAD_CAST"style"))) {	5604 (xmlStrEqual(ctxt->name, BAD_CAST"style"))) {

5010 /*	5605 /*

5011 * Handle SCRIPT/STYLE separately	5606 * Handle SCRIPT/STYLE separately

5012 */	5607 */

5013 if (!terminate) {	5608 if (!terminate) {

5014 int idx;	5609 int idx;

5015 xmlChar val;	5610 xmlChar val;

5016	5611

5017 » » » idx = htmlParseLookupSequence(ctxt, '<', '/', 0, 0);	5612 » » » idx = htmlParseLookupSequence(ctxt, '<', '/', 0, 0, 1);

5018 if (idx < 0)	5613 if (idx < 0)

5019 goto done;	5614 goto done;

5020 val = in->cur[idx + 2];	5615 val = in->cur[idx + 2];

5021 if (val == 0) /* bad cut of input */	5616 if (val == 0) /* bad cut of input */

5022 goto done;	5617 goto done;

5023 }	5618 }

5024 htmlParseScript(ctxt);	5619 htmlParseScript(ctxt);

5025 if ((cur == '<') && (next == '/')) {	5620 if ((cur == '<') && (next == '/')) {

5026 ctxt->instate = XML_PARSER_END_TAG;	5621 ctxt->instate = XML_PARSER_END_TAG;

5027 ctxt->checkIndex = 0;	5622 ctxt->checkIndex = 0;

5028 #ifdef DEBUG_PUSH	5623 #ifdef DEBUG_PUSH

5029 xmlGenericError(xmlGenericErrorContext,	5624 xmlGenericError(xmlGenericErrorContext,

5030 "HPP: entering END_TAG\n");	5625 "HPP: entering END_TAG\n");

5031 #endif	5626 #endif

5032 break;	5627 break;

5033 }	5628 }

5034 } else {	5629 } else {

5035 /*	5630 /*

5036 * Sometimes DOCTYPE arrives in the middle of the document	5631 * Sometimes DOCTYPE arrives in the middle of the document

5037 */	5632 */

5038 if ((cur == '<') && (next == '!') &&	5633 if ((cur == '<') && (next == '!') &&

5039 (UPP(2) == 'D') && (UPP(3) == 'O') &&	5634 (UPP(2) == 'D') && (UPP(3) == 'O') &&

5040 (UPP(4) == 'C') && (UPP(5) == 'T') &&	5635 (UPP(4) == 'C') && (UPP(5) == 'T') &&

5041 (UPP(6) == 'Y') && (UPP(7) == 'P') &&	5636 (UPP(6) == 'Y') && (UPP(7) == 'P') &&

5042 (UPP(8) == 'E')) {	5637 (UPP(8) == 'E')) {

5043 if ((!terminate) &&	5638 if ((!terminate) &&

5044 » » » (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))	5639 » » » (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0) )

5045 goto done;	5640 goto done;

5046 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,	5641 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,

5047 "Misplaced DOCTYPE declaration\n",	5642 "Misplaced DOCTYPE declaration\n",

5048 BAD_CAST "DOCTYPE" , NULL);	5643 BAD_CAST "DOCTYPE" , NULL);

5049 htmlParseDocTypeDecl(ctxt);	5644 htmlParseDocTypeDecl(ctxt);

5050 } else if ((cur == '<') && (next == '!') &&	5645 } else if ((cur == '<') && (next == '!') &&

5051 (in->cur[2] == '-') && (in->cur[3] == '-')) {	5646 (in->cur[2] == '-') && (in->cur[3] == '-')) {

5052 if ((!terminate) &&	5647 if ((!terminate) &&

5053 (htmlParseLookupSequence(	5648 (htmlParseLookupSequence(

5054 » » » » » ctxt, '-', '-', '>', 1) < 0))	5649 » » » » ctxt, '-', '-', '>', 1, 1) < 0))

5055 goto done;	5650 goto done;

5056 #ifdef DEBUG_PUSH	5651 #ifdef DEBUG_PUSH

5057 xmlGenericError(xmlGenericErrorContext,	5652 xmlGenericError(xmlGenericErrorContext,

5058 "HPP: Parsing Comment\n");	5653 "HPP: Parsing Comment\n");

5059 #endif	5654 #endif

5060 htmlParseComment(ctxt);	5655 htmlParseComment(ctxt);

5061 ctxt->instate = XML_PARSER_CONTENT;	5656 ctxt->instate = XML_PARSER_CONTENT;

5062 } else if ((cur == '<') && (next == '?')) {	5657 } else if ((cur == '<') && (next == '?')) {

5063 if ((!terminate) &&	5658 if ((!terminate) &&

5064 » » » (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))	5659 » » » (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0) )

5065 goto done;	5660 goto done;

5066 #ifdef DEBUG_PUSH	5661 #ifdef DEBUG_PUSH

5067 xmlGenericError(xmlGenericErrorContext,	5662 xmlGenericError(xmlGenericErrorContext,

5068 "HPP: Parsing PI\n");	5663 "HPP: Parsing PI\n");

5069 #endif	5664 #endif

5070 htmlParsePI(ctxt);	5665 htmlParsePI(ctxt);

5071 ctxt->instate = XML_PARSER_CONTENT;	5666 ctxt->instate = XML_PARSER_CONTENT;

5072 } else if ((cur == '<') && (next == '!') && (avail < 4)) {	5667 } else if ((cur == '<') && (next == '!') && (avail < 4)) {

5073 goto done;	5668 goto done;

5074 } else if ((cur == '<') && (next == '/')) {	5669 } else if ((cur == '<') && (next == '/')) {

5075 ctxt->instate = XML_PARSER_END_TAG;	5670 ctxt->instate = XML_PARSER_END_TAG;

5076 ctxt->checkIndex = 0;	5671 ctxt->checkIndex = 0;

5077 #ifdef DEBUG_PUSH	5672 #ifdef DEBUG_PUSH

5078 xmlGenericError(xmlGenericErrorContext,	5673 xmlGenericError(xmlGenericErrorContext,

5079 "HPP: entering END_TAG\n");	5674 "HPP: entering END_TAG\n");

5080 #endif	5675 #endif

5081 break;	5676 break;

5082 } else if (cur == '<') {	5677 } else if (cur == '<') {

5083 ctxt->instate = XML_PARSER_START_TAG;	5678 ctxt->instate = XML_PARSER_START_TAG;

5084 ctxt->checkIndex = 0;	5679 ctxt->checkIndex = 0;

5085 #ifdef DEBUG_PUSH	5680 #ifdef DEBUG_PUSH

5086 xmlGenericError(xmlGenericErrorContext,	5681 xmlGenericError(xmlGenericErrorContext,

5087 "HPP: entering START_TAG\n");	5682 "HPP: entering START_TAG\n");

5088 #endif	5683 #endif

5089 break;	5684 break;

5090 } else if (cur == '&') {	5685 } else if (cur == '&') {

5091 if ((!terminate) &&	5686 if ((!terminate) &&

5092 » » » (htmlParseLookupSequence(ctxt, ';', 0, 0, 0) < 0))	5687 » » » (htmlParseLookupChars(ctxt,

	5688 BAD_CAST "; >/", 4) < 0))

5093 goto done;	5689 goto done;

5094 #ifdef DEBUG_PUSH	5690 #ifdef DEBUG_PUSH

5095 xmlGenericError(xmlGenericErrorContext,	5691 xmlGenericError(xmlGenericErrorContext,

5096 "HPP: Parsing Reference\n");	5692 "HPP: Parsing Reference\n");

5097 #endif	5693 #endif

5098 /* TODO: check generation of subtrees if noent !!! */	5694 /* TODO: check generation of subtrees if noent !!! */

5099 htmlParseReference(ctxt);	5695 htmlParseReference(ctxt);

5100 } else {	5696 } else {

5101 /*	5697 /*

5102 * check that the text sequence is complete	5698 * check that the text sequence is complete

5103 * before handing out the data to the parser	5699 * before handing out the data to the parser

5104 * to avoid problems with erroneous end of	5700 * to avoid problems with erroneous end of

5105 * data detection.	5701 * data detection.

5106 */	5702 */

5107 if ((!terminate) &&	5703 if ((!terminate) &&

5108 » » » (htmlParseLookupSequence(ctxt, '<', 0, 0, 0) < 0))	5704 (htmlParseLookupChars(ctxt, BAD_CAST "<&", 2) < 0))

5109 goto done;	5705 goto done;

5110 ctxt->checkIndex = 0;	5706 ctxt->checkIndex = 0;

5111 #ifdef DEBUG_PUSH	5707 #ifdef DEBUG_PUSH

5112 xmlGenericError(xmlGenericErrorContext,	5708 xmlGenericError(xmlGenericErrorContext,

5113 "HPP: Parsing char data\n");	5709 "HPP: Parsing char data\n");

5114 #endif	5710 #endif

5115 htmlParseCharData(ctxt);	5711 htmlParseCharData(ctxt);

5116 }	5712 }

5117 }	5713 }

5118 if (cons == ctxt->nbChars) {	5714 if (cons == ctxt->nbChars) {

5119 if (ctxt->node != NULL) {	5715 if (ctxt->node != NULL) {

5120 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,	5716 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,

5121 "detected an error in element content\n",	5717 "detected an error in element content\n",

5122 NULL, NULL);	5718 NULL, NULL);

5123 }	5719 }

5124 NEXT;	5720 NEXT;

5125 break;	5721 break;

5126 }	5722 }

5127	5723

5128 break;	5724 break;

5129 }	5725 }

5130 case XML_PARSER_END_TAG:	5726 case XML_PARSER_END_TAG:

5131 if (avail < 2)	5727 if (avail < 2)

5132 goto done;	5728 goto done;

5133 if ((!terminate) &&	5729 if ((!terminate) &&

5134 » » (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))	5730 » » (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))

5135 goto done;	5731 goto done;

5136 htmlParseEndTag(ctxt);	5732 htmlParseEndTag(ctxt);

5137 if (ctxt->nameNr == 0) {	5733 if (ctxt->nameNr == 0) {

5138 ctxt->instate = XML_PARSER_EPILOG;	5734 ctxt->instate = XML_PARSER_EPILOG;

5139 } else {	5735 } else {

5140 ctxt->instate = XML_PARSER_CONTENT;	5736 ctxt->instate = XML_PARSER_CONTENT;

5141 }	5737 }

5142 ctxt->checkIndex = 0;	5738 ctxt->checkIndex = 0;

5143 #ifdef DEBUG_PUSH	5739 #ifdef DEBUG_PUSH

5144 xmlGenericError(xmlGenericErrorContext,	5740 xmlGenericError(xmlGenericErrorContext,

(...skipping 106 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5251 ctxt->instate = XML_PARSER_CONTENT;	5847 ctxt->instate = XML_PARSER_CONTENT;

5252 ctxt->checkIndex = 0;	5848 ctxt->checkIndex = 0;

5253 #ifdef DEBUG_PUSH	5849 #ifdef DEBUG_PUSH

5254 xmlGenericError(xmlGenericErrorContext,	5850 xmlGenericError(xmlGenericErrorContext,

5255 "HPP: entering CONTENT\n");	5851 "HPP: entering CONTENT\n");

5256 #endif	5852 #endif

5257 break;	5853 break;

5258	5854

5259 }	5855 }

5260 }	5856 }

5261 done:	5857 done:

5262 if ((avail == 0) && (terminate)) {	5858 if ((avail == 0) && (terminate)) {

5263 htmlAutoCloseOnEnd(ctxt);	5859 htmlAutoCloseOnEnd(ctxt);

5264 » if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) {	5860 » if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) {

5265 /*	5861 /*

5266 * SAX: end of the document processing.	5862 * SAX: end of the document processing.

5267 */	5863 */

5268 ctxt->instate = XML_PARSER_EOF;	5864 ctxt->instate = XML_PARSER_EOF;

5269 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))	5865 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))

5270 ctxt->sax->endDocument(ctxt->userData);	5866 ctxt->sax->endDocument(ctxt->userData);

5271 }	5867 }

5272 }	5868 }

5273 if ((ctxt->myDoc != NULL) &&	5869 if ((ctxt->myDoc != NULL) &&

5274 ((terminate) \|\| (ctxt->instate == XML_PARSER_EOF) \|\|	5870 ((terminate) \|\| (ctxt->instate == XML_PARSER_EOF) \|\|

5275 (ctxt->instate == XML_PARSER_EPILOG))) {	5871 (ctxt->instate == XML_PARSER_EPILOG))) {

5276 xmlDtdPtr dtd;	5872 xmlDtdPtr dtd;

5277 dtd = xmlGetIntSubset(ctxt->myDoc);	5873 dtd = xmlGetIntSubset(ctxt->myDoc);

5278 if (dtd == NULL)	5874 if (dtd == NULL)

5279 » ctxt->myDoc->intSubset =	5875 » ctxt->myDoc->intSubset =

5280 » » xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html",	5876 » » xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html",

5281 BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN",	5877 BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN",

5282 BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd");	5878 BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd");

5283 }	5879 }

5284 #ifdef DEBUG_PUSH	5880 #ifdef DEBUG_PUSH

5285 xmlGenericError(xmlGenericErrorContext, "HPP: done %d\n", ret);	5881 xmlGenericError(xmlGenericErrorContext, "HPP: done %d\n", ret);

5286 #endif	5882 #endif

5287 return(ret);	5883 return(ret);

5288 }	5884 }

5289	5885

5290 /**	5886 /**

(...skipping 13 matching lines...) Expand all Loading...
5304 if ((ctxt == NULL) \|\| (ctxt->input == NULL)) {	5900 if ((ctxt == NULL) \|\| (ctxt->input == NULL)) {

5305 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,	5901 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,

5306 "htmlParseChunk: context error\n", NULL, NULL);	5902 "htmlParseChunk: context error\n", NULL, NULL);

5307 return(XML_ERR_INTERNAL_ERROR);	5903 return(XML_ERR_INTERNAL_ERROR);

5308 }	5904 }

5309 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&	5905 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&

5310 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {	5906 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {

5311 int base = ctxt->input->base - ctxt->input->buf->buffer->content;	5907 int base = ctxt->input->base - ctxt->input->buf->buffer->content;

5312 int cur = ctxt->input->cur - ctxt->input->base;	5908 int cur = ctxt->input->cur - ctxt->input->base;

5313 int res;	5909 int res;

5314 »	5910

5315 » res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);»	5911 » res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);

5316 if (res < 0) {	5912 if (res < 0) {

5317 ctxt->errNo = XML_PARSER_EOF;	5913 ctxt->errNo = XML_PARSER_EOF;

5318 ctxt->disableSAX = 1;	5914 ctxt->disableSAX = 1;

5319 return (XML_PARSER_EOF);	5915 return (XML_PARSER_EOF);

5320 }	5916 }

5321 ctxt->input->base = ctxt->input->buf->buffer->content + base;	5917 ctxt->input->base = ctxt->input->buf->buffer->content + base;

5322 ctxt->input->cur = ctxt->input->base + cur;	5918 ctxt->input->cur = ctxt->input->base + cur;

5323 ctxt->input->end =	5919 ctxt->input->end =

5324 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];	5920 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];

5325 #ifdef DEBUG_PUSH	5921 #ifdef DEBUG_PUSH

5326 xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size);	5922 xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size);

5327 #endif	5923 #endif

5328	5924

5329 #if 0	5925 #if 0

5330 if ((terminate) \|\| (ctxt->input->buf->buffer->use > 80))	5926 if ((terminate) \|\| (ctxt->input->buf->buffer->use > 80))

5331 htmlParseTryOrFinish(ctxt, terminate);	5927 htmlParseTryOrFinish(ctxt, terminate);

5332 #endif	5928 #endif

5333 } else if (ctxt->instate != XML_PARSER_EOF) {	5929 } else if (ctxt->instate != XML_PARSER_EOF) {

5334 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {	5930 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {

5335 xmlParserInputBufferPtr in = ctxt->input->buf;	5931 xmlParserInputBufferPtr in = ctxt->input->buf;

5336 if ((in->encoder != NULL) && (in->buffer != NULL) &&	5932 if ((in->encoder != NULL) && (in->buffer != NULL) &&

5337 (in->raw != NULL)) {	5933 (in->raw != NULL)) {

5338 int nbchars;	5934 int nbchars;

5339 » »	5935

5340 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);	5936 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);

5341 if (nbchars < 0) {	5937 if (nbchars < 0) {

5342 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,	5938 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,

5343 "encoder error\n", NULL, NULL);	5939 "encoder error\n", NULL, NULL);

5344 return(XML_ERR_INVALID_ENCODING);	5940 return(XML_ERR_INVALID_ENCODING);

5345 }	5941 }

5346 }	5942 }

5347 }	5943 }

5348 }	5944 }

5349 htmlParseTryOrFinish(ctxt, terminate);	5945 htmlParseTryOrFinish(ctxt, terminate);

5350 if (terminate) {	5946 if (terminate) {

5351 if ((ctxt->instate != XML_PARSER_EOF) &&	5947 if ((ctxt->instate != XML_PARSER_EOF) &&

5352 (ctxt->instate != XML_PARSER_EPILOG) &&	5948 (ctxt->instate != XML_PARSER_EPILOG) &&

5353 (ctxt->instate != XML_PARSER_MISC)) {	5949 (ctxt->instate != XML_PARSER_MISC)) {

5354 ctxt->errNo = XML_ERR_DOCUMENT_END;	5950 ctxt->errNo = XML_ERR_DOCUMENT_END;

5355 ctxt->wellFormed = 0;	5951 ctxt->wellFormed = 0;

5356 » }	5952 » }

5357 if (ctxt->instate != XML_PARSER_EOF) {	5953 if (ctxt->instate != XML_PARSER_EOF) {

5358 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))	5954 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))

5359 ctxt->sax->endDocument(ctxt->userData);	5955 ctxt->sax->endDocument(ctxt->userData);

5360 }	5956 }

5361 ctxt->instate = XML_PARSER_EOF;	5957 ctxt->instate = XML_PARSER_EOF;

5362 }	5958 }

5363 return((xmlParserErrors) ctxt->errNo);»	5959 return((xmlParserErrors) ctxt->errNo);

5364 }	5960 }

5365	5961

5366 /************************************************************************	5962 /************************************************************************

5367 * *	5963 * *

5368 * User entry points *	5964 * User entry points *

5369 * *	5965 * *

5370 ************************************************************************/	5966 ************************************************************************/

5371	5967

5372 /**	5968 /**

5373 * htmlCreatePushParserCtxt:	5969 * htmlCreatePushParserCtxt:

5374 * @sax: a SAX handler	5970 * @sax: a SAX handler

5375 * @user_data: The user data returned on SAX callbacks	5971 * @user_data: The user data returned on SAX callbacks

5376 * @chunk: a pointer to an array of chars	5972 * @chunk: a pointer to an array of chars

5377 * @size: number of chars in the array	5973 * @size: number of chars in the array

5378 * @filename: an optional file name or URI	5974 * @filename: an optional file name or URI

5379 * @enc: an optional encoding	5975 * @enc: an optional encoding

5380 *	5976 *

5381 * Create a parser context for using the HTML parser in push mode	5977 * Create a parser context for using the HTML parser in push mode

5382 * The value of @filename is used for fetching external entities	5978 * The value of @filename is used for fetching external entities

5383 * and error/warning reports.	5979 * and error/warning reports.

5384 *	5980 *

5385 * Returns the new parser context or NULL	5981 * Returns the new parser context or NULL

5386 */	5982 */

5387 htmlParserCtxtPtr	5983 htmlParserCtxtPtr

5388 htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax, void *user_data,	5984 htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax, void *user_data,

5389 const char chunk, int size, const char filename,	5985 const char chunk, int size, const char filename,

5390 xmlCharEncoding enc) {	5986 xmlCharEncoding enc) {

5391 htmlParserCtxtPtr ctxt;	5987 htmlParserCtxtPtr ctxt;

5392 htmlParserInputPtr inputStream;	5988 htmlParserInputPtr inputStream;

5393 xmlParserInputBufferPtr buf;	5989 xmlParserInputBufferPtr buf;

5394	5990

5395 xmlInitParser();	5991 xmlInitParser();

5396	5992

5397 buf = xmlAllocParserInputBuffer(enc);	5993 buf = xmlAllocParserInputBuffer(enc);

5398 if (buf == NULL) return(NULL);	5994 if (buf == NULL) return(NULL);

(...skipping 10 matching lines...) Expand all Loading...
5409 xmlFree(ctxt->sax);	6005 xmlFree(ctxt->sax);

5410 ctxt->sax = (htmlSAXHandlerPtr) xmlMalloc(sizeof(htmlSAXHandler));	6006 ctxt->sax = (htmlSAXHandlerPtr) xmlMalloc(sizeof(htmlSAXHandler));

5411 if (ctxt->sax == NULL) {	6007 if (ctxt->sax == NULL) {

5412 xmlFree(buf);	6008 xmlFree(buf);

5413 xmlFree(ctxt);	6009 xmlFree(ctxt);

5414 return(NULL);	6010 return(NULL);

5415 }	6011 }

5416 memcpy(ctxt->sax, sax, sizeof(htmlSAXHandler));	6012 memcpy(ctxt->sax, sax, sizeof(htmlSAXHandler));

5417 if (user_data != NULL)	6013 if (user_data != NULL)

5418 ctxt->userData = user_data;	6014 ctxt->userData = user_data;

5419 }»	6015 }

5420 if (filename == NULL) {	6016 if (filename == NULL) {

5421 ctxt->directory = NULL;	6017 ctxt->directory = NULL;

5422 } else {	6018 } else {

5423 ctxt->directory = xmlParserGetDirectory(filename);	6019 ctxt->directory = xmlParserGetDirectory(filename);

5424 }	6020 }

5425	6021

5426 inputStream = htmlNewInputStream(ctxt);	6022 inputStream = htmlNewInputStream(ctxt);

5427 if (inputStream == NULL) {	6023 if (inputStream == NULL) {

5428 xmlFreeParserCtxt(ctxt);	6024 xmlFreeParserCtxt(ctxt);

5429 xmlFree(buf);	6025 xmlFree(buf);

5430 return(NULL);	6026 return(NULL);

5431 }	6027 }

5432	6028

5433 if (filename == NULL)	6029 if (filename == NULL)

5434 inputStream->filename = NULL;	6030 inputStream->filename = NULL;

5435 else	6031 else

5436 inputStream->filename = (char *)	6032 inputStream->filename = (char *)

5437 xmlCanonicPath((const xmlChar *) filename);	6033 xmlCanonicPath((const xmlChar *) filename);

5438 inputStream->buf = buf;	6034 inputStream->buf = buf;

5439 inputStream->base = inputStream->buf->buffer->content;	6035 inputStream->base = inputStream->buf->buffer->content;

5440 inputStream->cur = inputStream->buf->buffer->content;	6036 inputStream->cur = inputStream->buf->buffer->content;

5441 inputStream->end =	6037 inputStream->end =

5442 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];	6038 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];

5443	6039

5444 inputPush(ctxt, inputStream);	6040 inputPush(ctxt, inputStream);

5445	6041

5446 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&	6042 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&

5447 (ctxt->input->buf != NULL)) {»	6043 (ctxt->input->buf != NULL)) {

5448 int base = ctxt->input->base - ctxt->input->buf->buffer->content;	6044 int base = ctxt->input->base - ctxt->input->buf->buffer->content;

5449 int cur = ctxt->input->cur - ctxt->input->base;	6045 int cur = ctxt->input->cur - ctxt->input->base;

5450	6046

5451 » xmlParserInputBufferPush(ctxt->input->buf, size, chunk);»	6047 » xmlParserInputBufferPush(ctxt->input->buf, size, chunk);

5452	6048

5453 ctxt->input->base = ctxt->input->buf->buffer->content + base;	6049 ctxt->input->base = ctxt->input->buf->buffer->content + base;

5454 ctxt->input->cur = ctxt->input->base + cur;	6050 ctxt->input->cur = ctxt->input->base + cur;

5455 ctxt->input->end =	6051 ctxt->input->end =

5456 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];	6052 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];

5457 #ifdef DEBUG_PUSH	6053 #ifdef DEBUG_PUSH

5458 xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size);	6054 xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size);

5459 #endif	6055 #endif

5460 }	6056 }

5461 ctxt->progressive = 1;	6057 ctxt->progressive = 1;

5462	6058

5463 return(ctxt);	6059 return(ctxt);

5464 }	6060 }

5465 #endif /* LIBXML_PUSH_ENABLED */	6061 #endif /* LIBXML_PUSH_ENABLED */

5466	6062

5467 /**	6063 /**

5468 * htmlSAXParseDoc:	6064 * htmlSAXParseDoc:

5469 * @cur: a pointer to an array of xmlChar	6065 * @cur: a pointer to an array of xmlChar

5470 * @encoding: a free form C string describing the HTML document encoding, or NU LL	6066 * @encoding: a free form C string describing the HTML document encoding, or NU LL

5471 * @sax: the SAX handler block	6067 * @sax: the SAX handler block

5472 * @userData: if using SAX, this pointer will be provided on callbacks.	6068 * @userData: if using SAX, this pointer will be provided on callbacks.

5473 *	6069 *

5474 * Parse an HTML in-memory document. If sax is not NULL, use the SAX callbacks	6070 * Parse an HTML in-memory document. If sax is not NULL, use the SAX callbacks

5475 * to handle parse events. If sax is NULL, fallback to the default DOM	6071 * to handle parse events. If sax is NULL, fallback to the default DOM

5476 * behavior and return a tree.	6072 * behavior and return a tree.

5477 *	6073 *

5478 * Returns the resulting document tree unless SAX is NULL or the document is	6074 * Returns the resulting document tree unless SAX is NULL or the document is

5479 * not well formed.	6075 * not well formed.

5480 */	6076 */

5481	6077

5482 htmlDocPtr	6078 htmlDocPtr

5483 htmlSAXParseDoc(xmlChar cur, const char encoding, htmlSAXHandlerPtr sax, void *userData) {	6079 htmlSAXParseDoc(xmlChar cur, const char encoding, htmlSAXHandlerPtr sax, void *userData) {

5484 htmlDocPtr ret;	6080 htmlDocPtr ret;

5485 htmlParserCtxtPtr ctxt;	6081 htmlParserCtxtPtr ctxt;

5486	6082

5487 xmlInitParser();	6083 xmlInitParser();

5488	6084

5489 if (cur == NULL) return(NULL);	6085 if (cur == NULL) return(NULL);

5490	6086

5491	6087

5492 ctxt = htmlCreateDocParserCtxt(cur, encoding);	6088 ctxt = htmlCreateDocParserCtxt(cur, encoding);

5493 if (ctxt == NULL) return(NULL);	6089 if (ctxt == NULL) return(NULL);

5494 if (sax != NULL) {	6090 if (sax != NULL) {

5495 if (ctxt->sax != NULL) xmlFree (ctxt->sax);	6091 if (ctxt->sax != NULL) xmlFree (ctxt->sax);

5496 ctxt->sax = sax;	6092 ctxt->sax = sax;

5497 ctxt->userData = userData;	6093 ctxt->userData = userData;

5498 }	6094 }

5499	6095

5500 htmlParseDocument(ctxt);	6096 htmlParseDocument(ctxt);

5501 ret = ctxt->myDoc;	6097 ret = ctxt->myDoc;

5502 if (sax != NULL) {	6098 if (sax != NULL) {

5503 ctxt->sax = NULL;	6099 ctxt->sax = NULL;

5504 ctxt->userData = NULL;	6100 ctxt->userData = NULL;

5505 }	6101 }

5506 htmlFreeParserCtxt(ctxt);	6102 htmlFreeParserCtxt(ctxt);

5507	6103

5508 return(ret);	6104 return(ret);

5509 }	6105 }

5510	6106

5511 /**	6107 /**

5512 * htmlParseDoc:	6108 * htmlParseDoc:

5513 * @cur: a pointer to an array of xmlChar	6109 * @cur: a pointer to an array of xmlChar

5514 * @encoding: a free form C string describing the HTML document encoding, or NU LL	6110 * @encoding: a free form C string describing the HTML document encoding, or NU LL

5515 *	6111 *

5516 * parse an HTML in-memory document and build a tree.	6112 * parse an HTML in-memory document and build a tree.

5517 *	6113 *

5518 * Returns the resulting document tree	6114 * Returns the resulting document tree

5519 */	6115 */

5520	6116

5521 htmlDocPtr	6117 htmlDocPtr

5522 htmlParseDoc(xmlChar cur, const char encoding) {	6118 htmlParseDoc(xmlChar cur, const char encoding) {

5523 return(htmlSAXParseDoc(cur, encoding, NULL, NULL));	6119 return(htmlSAXParseDoc(cur, encoding, NULL, NULL));

5524 }	6120 }

5525	6121

5526	6122

5527 /**	6123 /**

5528 * htmlCreateFileParserCtxt:	6124 * htmlCreateFileParserCtxt:

5529 * @filename: the filename	6125 * @filename: the filename

5530 * @encoding: a free form C string describing the HTML document encoding, or NU LL	6126 * @encoding: a free form C string describing the HTML document encoding, or NU LL

5531 *	6127 *

5532 * Create a parser context for a file content.	6128 * Create a parser context for a file content.

5533 * Automatic support for ZLIB/Compress compressed document is provided	6129 * Automatic support for ZLIB/Compress compressed document is provided

5534 * by default if found at compile-time.	6130 * by default if found at compile-time.

5535 *	6131 *

5536 * Returns the new parser context or NULL	6132 * Returns the new parser context or NULL

5537 */	6133 */

5538 htmlParserCtxtPtr	6134 htmlParserCtxtPtr

5539 htmlCreateFileParserCtxt(const char filename, const char encoding)	6135 htmlCreateFileParserCtxt(const char filename, const char encoding)

5540 {	6136 {

5541 htmlParserCtxtPtr ctxt;	6137 htmlParserCtxtPtr ctxt;

5542 htmlParserInputPtr inputStream;	6138 htmlParserInputPtr inputStream;

(...skipping 11 matching lines...) Expand all Loading...
5554 canonicFilename = (char ) xmlCanonicPath((const xmlChar ) filename);	6150 canonicFilename = (char ) xmlCanonicPath((const xmlChar ) filename);

5555 if (canonicFilename == NULL) {	6151 if (canonicFilename == NULL) {

5556 #ifdef LIBXML_SAX1_ENABLED	6152 #ifdef LIBXML_SAX1_ENABLED

5557 if (xmlDefaultSAXHandler.error != NULL) {	6153 if (xmlDefaultSAXHandler.error != NULL) {

5558 xmlDefaultSAXHandler.error(NULL, "out of memory\n");	6154 xmlDefaultSAXHandler.error(NULL, "out of memory\n");

5559 }	6155 }

5560 #endif	6156 #endif

5561 xmlFreeParserCtxt(ctxt);	6157 xmlFreeParserCtxt(ctxt);

5562 return(NULL);	6158 return(NULL);

5563 }	6159 }

5564	6160

5565 inputStream = xmlLoadExternalEntity(canonicFilename, NULL, ctxt);	6161 inputStream = xmlLoadExternalEntity(canonicFilename, NULL, ctxt);

5566 xmlFree(canonicFilename);	6162 xmlFree(canonicFilename);

5567 if (inputStream == NULL) {	6163 if (inputStream == NULL) {

5568 xmlFreeParserCtxt(ctxt);	6164 xmlFreeParserCtxt(ctxt);

5569 return(NULL);	6165 return(NULL);

5570 }	6166 }

5571	6167

5572 inputPush(ctxt, inputStream);	6168 inputPush(ctxt, inputStream);

5573	6169

5574 /* set encoding */	6170 /* set encoding */

5575 if (encoding) {	6171 if (encoding) {

5576 content = xmlMallocAtomic (xmlStrlen(content_line) + strlen(encoding) + 1);	6172 content = xmlMallocAtomic (xmlStrlen(content_line) + strlen(encoding) + 1);

5577 » if (content) {	6173 » if (content) {

5578 strcpy ((char )content, (char )content_line);	6174 strcpy ((char )content, (char )content_line);

5579 strcat ((char )content, (char )encoding);	6175 strcat ((char )content, (char )encoding);

5580 htmlCheckEncoding (ctxt, content);	6176 htmlCheckEncoding (ctxt, content);

5581 xmlFree (content);	6177 xmlFree (content);

5582 }	6178 }

5583 }	6179 }

5584	6180

5585 return(ctxt);	6181 return(ctxt);

5586 }	6182 }

5587	6183

5588 /**	6184 /**

5589 * htmlSAXParseFile:	6185 * htmlSAXParseFile:

5590 * @filename: the filename	6186 * @filename: the filename

5591 * @encoding: a free form C string describing the HTML document encoding, or NU LL	6187 * @encoding: a free form C string describing the HTML document encoding, or NU LL

5592 * @sax: the SAX handler block	6188 * @sax: the SAX handler block

5593 * @userData: if using SAX, this pointer will be provided on callbacks.	6189 * @userData: if using SAX, this pointer will be provided on callbacks.

5594 *	6190 *

5595 * parse an HTML file and build a tree. Automatic support for ZLIB/Compress	6191 * parse an HTML file and build a tree. Automatic support for ZLIB/Compress

5596 * compressed document is provided by default if found at compile-time.	6192 * compressed document is provided by default if found at compile-time.

5597 * It use the given SAX function block to handle the parsing callback.	6193 * It use the given SAX function block to handle the parsing callback.

5598 * If sax is NULL, fallback to the default DOM tree building routines.	6194 * If sax is NULL, fallback to the default DOM tree building routines.

5599 *	6195 *

5600 * Returns the resulting document tree unless SAX is NULL or the document is	6196 * Returns the resulting document tree unless SAX is NULL or the document is

5601 * not well formed.	6197 * not well formed.

5602 */	6198 */

5603	6199

5604 htmlDocPtr	6200 htmlDocPtr

5605 htmlSAXParseFile(const char filename, const char encoding, htmlSAXHandlerPtr s ax,	6201 htmlSAXParseFile(const char filename, const char encoding, htmlSAXHandlerPtr s ax,

5606 void *userData) {	6202 void *userData) {

5607 htmlDocPtr ret;	6203 htmlDocPtr ret;

5608 htmlParserCtxtPtr ctxt;	6204 htmlParserCtxtPtr ctxt;

5609 htmlSAXHandlerPtr oldsax = NULL;	6205 htmlSAXHandlerPtr oldsax = NULL;

5610	6206

5611 xmlInitParser();	6207 xmlInitParser();

5612	6208

5613 ctxt = htmlCreateFileParserCtxt(filename, encoding);	6209 ctxt = htmlCreateFileParserCtxt(filename, encoding);

5614 if (ctxt == NULL) return(NULL);	6210 if (ctxt == NULL) return(NULL);

5615 if (sax != NULL) {	6211 if (sax != NULL) {

5616 oldsax = ctxt->sax;	6212 oldsax = ctxt->sax;

5617 ctxt->sax = sax;	6213 ctxt->sax = sax;

5618 ctxt->userData = userData;	6214 ctxt->userData = userData;

5619 }	6215 }

5620	6216

5621 htmlParseDocument(ctxt);	6217 htmlParseDocument(ctxt);

5622	6218

5623 ret = ctxt->myDoc;	6219 ret = ctxt->myDoc;

5624 if (sax != NULL) {	6220 if (sax != NULL) {

5625 ctxt->sax = oldsax;	6221 ctxt->sax = oldsax;

5626 ctxt->userData = NULL;	6222 ctxt->userData = NULL;

5627 }	6223 }

5628 htmlFreeParserCtxt(ctxt);	6224 htmlFreeParserCtxt(ctxt);

5629	6225

5630 return(ret);	6226 return(ret);

5631 }	6227 }

5632	6228

5633 /**	6229 /**

5634 * htmlParseFile:	6230 * htmlParseFile:

5635 * @filename: the filename	6231 * @filename: the filename

5636 * @encoding: a free form C string describing the HTML document encoding, or NU LL	6232 * @encoding: a free form C string describing the HTML document encoding, or NU LL

5637 *	6233 *

5638 * parse an HTML file and build a tree. Automatic support for ZLIB/Compress	6234 * parse an HTML file and build a tree. Automatic support for ZLIB/Compress

5639 * compressed document is provided by default if found at compile-time.	6235 * compressed document is provided by default if found at compile-time.

5640 *	6236 *

5641 * Returns the resulting document tree	6237 * Returns the resulting document tree

5642 */	6238 */

5643	6239

5644 htmlDocPtr	6240 htmlDocPtr

5645 htmlParseFile(const char filename, const char encoding) {	6241 htmlParseFile(const char filename, const char encoding) {

5646 return(htmlSAXParseFile(filename, encoding, NULL, NULL));	6242 return(htmlSAXParseFile(filename, encoding, NULL, NULL));

5647 }	6243 }

5648	6244

5649 /**	6245 /**

5650 * htmlHandleOmittedElem:	6246 * htmlHandleOmittedElem:

5651 * @val: int 0 or 1	6247 * @val: int 0 or 1

5652 *	6248 *

5653 * Set and return the previous value for handling HTML omitted tags.	6249 * Set and return the previous value for handling HTML omitted tags.

5654 *	6250 *

5655 * Returns the last value for 0 for no handling, 1 for auto insertion.	6251 * Returns the last value for 0 for no handling, 1 for auto insertion.

5656 */	6252 */

5657	6253

5658 int	6254 int

5659 htmlHandleOmittedElem(int val) {	6255 htmlHandleOmittedElem(int val) {

5660 int old = htmlOmittedDefaultValue;	6256 int old = htmlOmittedDefaultValue;

5661	6257

(...skipping 119 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5781 * *	6377 * *

5782 ************************************************************************/	6378 ************************************************************************/

5783 /**	6379 /**

5784 * DICT_FREE:	6380 * DICT_FREE:

5785 * @str: a string	6381 * @str: a string

5786 *	6382 *

5787 * Free a string if it is not owned by the "dict" dictionnary in the	6383 * Free a string if it is not owned by the "dict" dictionnary in the

5788 * current scope	6384 * current scope

5789 */	6385 */

5790 #define DICT_FREE(str) \	6386 #define DICT_FREE(str) \

5791 » if ((str) && ((!dict) \|\| » » » » \	6387 » if ((str) && ((!dict) \|\|» » » » \

5792 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \	6388 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \

5793 xmlFree((char *)(str));	6389 xmlFree((char *)(str));

5794	6390

5795 /**	6391 /**

5796 * htmlCtxtReset:	6392 * htmlCtxtReset:

5797 * @ctxt: an HTML parser context	6393 * @ctxt: an HTML parser context

5798 *	6394 *

5799 * Reset a parser context	6395 * Reset a parser context

5800 */	6396 */

5801 void	6397 void

5802 htmlCtxtReset(htmlParserCtxtPtr ctxt)	6398 htmlCtxtReset(htmlParserCtxtPtr ctxt)

5803 {	6399 {

5804 xmlParserInputPtr input;	6400 xmlParserInputPtr input;

5805 xmlDictPtr dict;	6401 xmlDictPtr dict;

5806	6402

5807 if (ctxt == NULL)	6403 if (ctxt == NULL)

5808 return;	6404 return;

5809	6405

5810 xmlInitParser();	6406 xmlInitParser();

5811 dict = ctxt->dict;	6407 dict = ctxt->dict;

5812	6408

5813 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */	6409 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */

5814 xmlFreeInputStream(input);	6410 xmlFreeInputStream(input);

5815 }	6411 }

5816 ctxt->inputNr = 0;	6412 ctxt->inputNr = 0;

(...skipping 106 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5923 ctxt->keepBlanks = 1;	6519 ctxt->keepBlanks = 1;

5924 if (options & HTML_PARSE_RECOVER) {	6520 if (options & HTML_PARSE_RECOVER) {

5925 ctxt->recovery = 1;	6521 ctxt->recovery = 1;

5926 options -= HTML_PARSE_RECOVER;	6522 options -= HTML_PARSE_RECOVER;

5927 } else	6523 } else

5928 ctxt->recovery = 0;	6524 ctxt->recovery = 0;

5929 if (options & HTML_PARSE_COMPACT) {	6525 if (options & HTML_PARSE_COMPACT) {

5930 ctxt->options \|= HTML_PARSE_COMPACT;	6526 ctxt->options \|= HTML_PARSE_COMPACT;

5931 options -= HTML_PARSE_COMPACT;	6527 options -= HTML_PARSE_COMPACT;

5932 }	6528 }

	6529 if (options & XML_PARSE_HUGE) {

	6530 ctxt->options \|= XML_PARSE_HUGE;

	6531 options -= XML_PARSE_HUGE;

	6532 }

5933 ctxt->dictNames = 0;	6533 ctxt->dictNames = 0;

5934 return (options);	6534 return (options);

5935 }	6535 }

5936	6536

5937 /**	6537 /**

5938 * htmlDoRead:	6538 * htmlDoRead:

5939 * @ctxt: an HTML parser context	6539 * @ctxt: an HTML parser context

5940 * @URL: the base URL to use for the document	6540 * @URL: the base URL to use for the document

5941 * @encoding: the document encoding, or NULL	6541 * @encoding: the document encoding, or NULL

5942 * @options: a combination of htmlParserOption(s)	6542 * @options: a combination of htmlParserOption(s)

5943 * @reuse: keep the context for reuse	6543 * @reuse: keep the context for reuse

5944 *	6544 *

5945 * Common front-end for the htmlRead functions	6545 * Common front-end for the htmlRead functions

5946 *	6546 *

5947 * Returns the resulting document tree or NULL	6547 * Returns the resulting document tree or NULL

5948 */	6548 */

5949 static htmlDocPtr	6549 static htmlDocPtr

5950 htmlDoRead(htmlParserCtxtPtr ctxt, const char URL, const char encoding,	6550 htmlDoRead(htmlParserCtxtPtr ctxt, const char URL, const char encoding,

5951 int options, int reuse)	6551 int options, int reuse)

5952 {	6552 {

5953 htmlDocPtr ret;	6553 htmlDocPtr ret;

5954	6554

5955 htmlCtxtUseOptions(ctxt, options);	6555 htmlCtxtUseOptions(ctxt, options);

5956 ctxt->html = 1;	6556 ctxt->html = 1;

5957 if (encoding != NULL) {	6557 if (encoding != NULL) {

5958 xmlCharEncodingHandlerPtr hdlr;	6558 xmlCharEncodingHandlerPtr hdlr;

5959	6559

5960 hdlr = xmlFindCharEncodingHandler(encoding);	6560 hdlr = xmlFindCharEncodingHandler(encoding);

5961 » if (hdlr != NULL)	6561 » if (hdlr != NULL) {

5962 xmlSwitchToEncoding(ctxt, hdlr);	6562 xmlSwitchToEncoding(ctxt, hdlr);

	6563 if (ctxt->input->encoding != NULL)

	6564 xmlFree((xmlChar *) ctxt->input->encoding);

	6565 ctxt->input->encoding = xmlStrdup((xmlChar *)encoding);

	6566 }

5963 }	6567 }

5964 if ((URL != NULL) && (ctxt->input != NULL) &&	6568 if ((URL != NULL) && (ctxt->input != NULL) &&

5965 (ctxt->input->filename == NULL))	6569 (ctxt->input->filename == NULL))

5966 ctxt->input->filename = (char ) xmlStrdup((const xmlChar ) URL);	6570 ctxt->input->filename = (char ) xmlStrdup((const xmlChar ) URL);

5967 htmlParseDocument(ctxt);	6571 htmlParseDocument(ctxt);

5968 ret = ctxt->myDoc;	6572 ret = ctxt->myDoc;

5969 ctxt->myDoc = NULL;	6573 ctxt->myDoc = NULL;

5970 if (!reuse) {	6574 if (!reuse) {

5971 if ((ctxt->dictNames) &&	6575 if ((ctxt->dictNames) &&

5972 (ret != NULL) &&	6576 (ret != NULL) &&

5973 (ret->dict == ctxt->dict))	6577 (ret->dict == ctxt->dict))

5974 ctxt->dict = NULL;	6578 ctxt->dict = NULL;

5975 xmlFreeParserCtxt(ctxt);	6579 xmlFreeParserCtxt(ctxt);

5976 }	6580 }

5977 return (ret);	6581 return (ret);

5978 }	6582 }

5979	6583

5980 /**	6584 /**

5981 * htmlReadDoc:	6585 * htmlReadDoc:

5982 * @cur: a pointer to a zero terminated string	6586 * @cur: a pointer to a zero terminated string

5983 * @URL: the base URL to use for the document	6587 * @URL: the base URL to use for the document

5984 * @encoding: the document encoding, or NULL	6588 * @encoding: the document encoding, or NULL

5985 * @options: a combination of htmlParserOption(s)	6589 * @options: a combination of htmlParserOption(s)

5986 *	6590 *

5987 * parse an XML in-memory document and build a tree.	6591 * parse an XML in-memory document and build a tree.

5988 *	6592 *

5989 * Returns the resulting document tree	6593 * Returns the resulting document tree

5990 */	6594 */

5991 htmlDocPtr	6595 htmlDocPtr

5992 htmlReadDoc(const xmlChar * cur, const char URL, const char encoding, int opti ons)	6596 htmlReadDoc(const xmlChar * cur, const char URL, const char encoding, int opti ons)

5993 {	6597 {

5994 htmlParserCtxtPtr ctxt;	6598 htmlParserCtxtPtr ctxt;

5995	6599

5996 if (cur == NULL)	6600 if (cur == NULL)

5997 return (NULL);	6601 return (NULL);

5998	6602

5999 xmlInitParser();	6603 xmlInitParser();

6000 ctxt = htmlCreateDocParserCtxt(cur, NULL);	6604 ctxt = htmlCreateDocParserCtxt(cur, NULL);

6001 if (ctxt == NULL)	6605 if (ctxt == NULL)

6002 return (NULL);	6606 return (NULL);

6003 return (htmlDoRead(ctxt, URL, encoding, options, 0));	6607 return (htmlDoRead(ctxt, URL, encoding, options, 0));

6004 }	6608 }

6005	6609

6006 /**	6610 /**

6007 * htmlReadFile:	6611 * htmlReadFile:

6008 * @filename: a file or URL	6612 * @filename: a file or URL

6009 * @encoding: the document encoding, or NULL	6613 * @encoding: the document encoding, or NULL

6010 * @options: a combination of htmlParserOption(s)	6614 * @options: a combination of htmlParserOption(s)

6011 *	6615 *

6012 * parse an XML file from the filesystem or the network.	6616 * parse an XML file from the filesystem or the network.

6013 *	6617 *

6014 * Returns the resulting document tree	6618 * Returns the resulting document tree

6015 */	6619 */

6016 htmlDocPtr	6620 htmlDocPtr

6017 htmlReadFile(const char filename, const char encoding, int options)	6621 htmlReadFile(const char filename, const char encoding, int options)

6018 {	6622 {

6019 htmlParserCtxtPtr ctxt;	6623 htmlParserCtxtPtr ctxt;

6020	6624

6021 xmlInitParser();	6625 xmlInitParser();

6022 ctxt = htmlCreateFileParserCtxt(filename, encoding);	6626 ctxt = htmlCreateFileParserCtxt(filename, encoding);

6023 if (ctxt == NULL)	6627 if (ctxt == NULL)

6024 return (NULL);	6628 return (NULL);

6025 return (htmlDoRead(ctxt, NULL, NULL, options, 0));	6629 return (htmlDoRead(ctxt, NULL, NULL, options, 0));

6026 }	6630 }

6027	6631

6028 /**	6632 /**

6029 * htmlReadMemory:	6633 * htmlReadMemory:

6030 * @buffer: a pointer to a char array	6634 * @buffer: a pointer to a char array

6031 * @size: the size of the array	6635 * @size: the size of the array

6032 * @URL: the base URL to use for the document	6636 * @URL: the base URL to use for the document

6033 * @encoding: the document encoding, or NULL	6637 * @encoding: the document encoding, or NULL

6034 * @options: a combination of htmlParserOption(s)	6638 * @options: a combination of htmlParserOption(s)

6035 *	6639 *

6036 * parse an XML in-memory document and build a tree.	6640 * parse an XML in-memory document and build a tree.

6037 *	6641 *

6038 * Returns the resulting document tree	6642 * Returns the resulting document tree

6039 */	6643 */

6040 htmlDocPtr	6644 htmlDocPtr

6041 htmlReadMemory(const char buffer, int size, const char URL, const char *encodi ng, int options)	6645 htmlReadMemory(const char buffer, int size, const char URL, const char *encodi ng, int options)

6042 {	6646 {

6043 htmlParserCtxtPtr ctxt;	6647 htmlParserCtxtPtr ctxt;

6044	6648

6045 xmlInitParser();	6649 xmlInitParser();

6046 ctxt = xmlCreateMemoryParserCtxt(buffer, size);	6650 ctxt = xmlCreateMemoryParserCtxt(buffer, size);

6047 if (ctxt == NULL)	6651 if (ctxt == NULL)

6048 return (NULL);	6652 return (NULL);

6049 htmlDefaultSAXHandlerInit();	6653 htmlDefaultSAXHandlerInit();

6050 if (ctxt->sax != NULL)	6654 if (ctxt->sax != NULL)

6051 memcpy(ctxt->sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1));	6655 memcpy(ctxt->sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1));

6052 return (htmlDoRead(ctxt, URL, encoding, options, 0));	6656 return (htmlDoRead(ctxt, URL, encoding, options, 0));

6053 }	6657 }

6054	6658

6055 /**	6659 /**

6056 * htmlReadFd:	6660 * htmlReadFd:

6057 * @fd: an open file descriptor	6661 * @fd: an open file descriptor

6058 * @URL: the base URL to use for the document	6662 * @URL: the base URL to use for the document

6059 * @encoding: the document encoding, or NULL	6663 * @encoding: the document encoding, or NULL

6060 * @options: a combination of htmlParserOption(s)	6664 * @options: a combination of htmlParserOption(s)

6061 *	6665 *

6062 * parse an XML from a file descriptor and build a tree.	6666 * parse an XML from a file descriptor and build a tree.

6063 *	6667 *

6064 * Returns the resulting document tree	6668 * Returns the resulting document tree

6065 */	6669 */

6066 htmlDocPtr	6670 htmlDocPtr

6067 htmlReadFd(int fd, const char URL, const char encoding, int options)	6671 htmlReadFd(int fd, const char URL, const char encoding, int options)

6068 {	6672 {

6069 htmlParserCtxtPtr ctxt;	6673 htmlParserCtxtPtr ctxt;

6070 xmlParserInputBufferPtr input;	6674 xmlParserInputBufferPtr input;

6071 xmlParserInputPtr stream;	6675 xmlParserInputPtr stream;

6072	6676

6073 if (fd < 0)	6677 if (fd < 0)

(...skipping 21 matching lines...) Expand all Loading...
6095 /**	6699 /**

6096 * htmlReadIO:	6700 * htmlReadIO:

6097 * @ioread: an I/O read function	6701 * @ioread: an I/O read function

6098 * @ioclose: an I/O close function	6702 * @ioclose: an I/O close function

6099 * @ioctx: an I/O handler	6703 * @ioctx: an I/O handler

6100 * @URL: the base URL to use for the document	6704 * @URL: the base URL to use for the document

6101 * @encoding: the document encoding, or NULL	6705 * @encoding: the document encoding, or NULL

6102 * @options: a combination of htmlParserOption(s)	6706 * @options: a combination of htmlParserOption(s)

6103 *	6707 *

6104 * parse an HTML document from I/O functions and source and build a tree.	6708 * parse an HTML document from I/O functions and source and build a tree.

6105 *	6709 *

6106 * Returns the resulting document tree	6710 * Returns the resulting document tree

6107 */	6711 */

6108 htmlDocPtr	6712 htmlDocPtr

6109 htmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,	6713 htmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,

6110 void ioctx, const char URL, const char *encoding, int options)	6714 void ioctx, const char URL, const char *encoding, int options)

6111 {	6715 {

6112 htmlParserCtxtPtr ctxt;	6716 htmlParserCtxtPtr ctxt;

6113 xmlParserInputBufferPtr input;	6717 xmlParserInputBufferPtr input;

6114 xmlParserInputPtr stream;	6718 xmlParserInputPtr stream;

6115	6719

(...skipping 23 matching lines...) Expand all Loading...
6139 /**	6743 /**

6140 * htmlCtxtReadDoc:	6744 * htmlCtxtReadDoc:

6141 * @ctxt: an HTML parser context	6745 * @ctxt: an HTML parser context

6142 * @cur: a pointer to a zero terminated string	6746 * @cur: a pointer to a zero terminated string

6143 * @URL: the base URL to use for the document	6747 * @URL: the base URL to use for the document

6144 * @encoding: the document encoding, or NULL	6748 * @encoding: the document encoding, or NULL

6145 * @options: a combination of htmlParserOption(s)	6749 * @options: a combination of htmlParserOption(s)

6146 *	6750 *

6147 * parse an XML in-memory document and build a tree.	6751 * parse an XML in-memory document and build a tree.

6148 * This reuses the existing @ctxt parser context	6752 * This reuses the existing @ctxt parser context

6149 *	6753 *

6150 * Returns the resulting document tree	6754 * Returns the resulting document tree

6151 */	6755 */

6152 htmlDocPtr	6756 htmlDocPtr

6153 htmlCtxtReadDoc(htmlParserCtxtPtr ctxt, const xmlChar * cur,	6757 htmlCtxtReadDoc(htmlParserCtxtPtr ctxt, const xmlChar * cur,

6154 const char URL, const char encoding, int options)	6758 const char URL, const char encoding, int options)

6155 {	6759 {

6156 xmlParserInputPtr stream;	6760 xmlParserInputPtr stream;

6157	6761

6158 if (cur == NULL)	6762 if (cur == NULL)

6159 return (NULL);	6763 return (NULL);

(...skipping 12 matching lines...) Expand all Loading...
6172	6776

6173 /**	6777 /**

6174 * htmlCtxtReadFile:	6778 * htmlCtxtReadFile:

6175 * @ctxt: an HTML parser context	6779 * @ctxt: an HTML parser context

6176 * @filename: a file or URL	6780 * @filename: a file or URL

6177 * @encoding: the document encoding, or NULL	6781 * @encoding: the document encoding, or NULL

6178 * @options: a combination of htmlParserOption(s)	6782 * @options: a combination of htmlParserOption(s)

6179 *	6783 *

6180 * parse an XML file from the filesystem or the network.	6784 * parse an XML file from the filesystem or the network.

6181 * This reuses the existing @ctxt parser context	6785 * This reuses the existing @ctxt parser context

6182 *	6786 *

6183 * Returns the resulting document tree	6787 * Returns the resulting document tree

6184 */	6788 */

6185 htmlDocPtr	6789 htmlDocPtr

6186 htmlCtxtReadFile(htmlParserCtxtPtr ctxt, const char *filename,	6790 htmlCtxtReadFile(htmlParserCtxtPtr ctxt, const char *filename,

6187 const char *encoding, int options)	6791 const char *encoding, int options)

6188 {	6792 {

6189 xmlParserInputPtr stream;	6793 xmlParserInputPtr stream;

6190	6794

6191 if (filename == NULL)	6795 if (filename == NULL)

6192 return (NULL);	6796 return (NULL);

(...skipping 14 matching lines...) Expand all Loading...
6207 * htmlCtxtReadMemory:	6811 * htmlCtxtReadMemory:

6208 * @ctxt: an HTML parser context	6812 * @ctxt: an HTML parser context

6209 * @buffer: a pointer to a char array	6813 * @buffer: a pointer to a char array

6210 * @size: the size of the array	6814 * @size: the size of the array

6211 * @URL: the base URL to use for the document	6815 * @URL: the base URL to use for the document

6212 * @encoding: the document encoding, or NULL	6816 * @encoding: the document encoding, or NULL

6213 * @options: a combination of htmlParserOption(s)	6817 * @options: a combination of htmlParserOption(s)

6214 *	6818 *

6215 * parse an XML in-memory document and build a tree.	6819 * parse an XML in-memory document and build a tree.

6216 * This reuses the existing @ctxt parser context	6820 * This reuses the existing @ctxt parser context

6217 *	6821 *

6218 * Returns the resulting document tree	6822 * Returns the resulting document tree

6219 */	6823 */

6220 htmlDocPtr	6824 htmlDocPtr

6221 htmlCtxtReadMemory(htmlParserCtxtPtr ctxt, const char *buffer, int size,	6825 htmlCtxtReadMemory(htmlParserCtxtPtr ctxt, const char *buffer, int size,

6222 const char URL, const char encoding, int options)	6826 const char URL, const char encoding, int options)

6223 {	6827 {

6224 xmlParserInputBufferPtr input;	6828 xmlParserInputBufferPtr input;

6225 xmlParserInputPtr stream;	6829 xmlParserInputPtr stream;

6226	6830

6227 if (ctxt == NULL)	6831 if (ctxt == NULL)

(...skipping 21 matching lines...) Expand all Loading...
6249 /**	6853 /**

6250 * htmlCtxtReadFd:	6854 * htmlCtxtReadFd:

6251 * @ctxt: an HTML parser context	6855 * @ctxt: an HTML parser context

6252 * @fd: an open file descriptor	6856 * @fd: an open file descriptor

6253 * @URL: the base URL to use for the document	6857 * @URL: the base URL to use for the document

6254 * @encoding: the document encoding, or NULL	6858 * @encoding: the document encoding, or NULL

6255 * @options: a combination of htmlParserOption(s)	6859 * @options: a combination of htmlParserOption(s)

6256 *	6860 *

6257 * parse an XML from a file descriptor and build a tree.	6861 * parse an XML from a file descriptor and build a tree.

6258 * This reuses the existing @ctxt parser context	6862 * This reuses the existing @ctxt parser context

6259 *	6863 *

6260 * Returns the resulting document tree	6864 * Returns the resulting document tree

6261 */	6865 */

6262 htmlDocPtr	6866 htmlDocPtr

6263 htmlCtxtReadFd(htmlParserCtxtPtr ctxt, int fd,	6867 htmlCtxtReadFd(htmlParserCtxtPtr ctxt, int fd,

6264 const char URL, const char encoding, int options)	6868 const char URL, const char encoding, int options)

6265 {	6869 {

6266 xmlParserInputBufferPtr input;	6870 xmlParserInputBufferPtr input;

6267 xmlParserInputPtr stream;	6871 xmlParserInputPtr stream;

6268	6872

6269 if (fd < 0)	6873 if (fd < 0)

(...skipping 21 matching lines...) Expand all Loading...
6291 * @ctxt: an HTML parser context	6895 * @ctxt: an HTML parser context

6292 * @ioread: an I/O read function	6896 * @ioread: an I/O read function

6293 * @ioclose: an I/O close function	6897 * @ioclose: an I/O close function

6294 * @ioctx: an I/O handler	6898 * @ioctx: an I/O handler

6295 * @URL: the base URL to use for the document	6899 * @URL: the base URL to use for the document

6296 * @encoding: the document encoding, or NULL	6900 * @encoding: the document encoding, or NULL

6297 * @options: a combination of htmlParserOption(s)	6901 * @options: a combination of htmlParserOption(s)

6298 *	6902 *

6299 * parse an HTML document from I/O functions and source and build a tree.	6903 * parse an HTML document from I/O functions and source and build a tree.

6300 * This reuses the existing @ctxt parser context	6904 * This reuses the existing @ctxt parser context

6301 *	6905 *

6302 * Returns the resulting document tree	6906 * Returns the resulting document tree

6303 */	6907 */

6304 htmlDocPtr	6908 htmlDocPtr

6305 htmlCtxtReadIO(htmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,	6909 htmlCtxtReadIO(htmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,

6306 xmlInputCloseCallback ioclose, void *ioctx,	6910 xmlInputCloseCallback ioclose, void *ioctx,

6307 const char *URL,	6911 const char *URL,

6308 const char *encoding, int options)	6912 const char *encoding, int options)

6309 {	6913 {

6310 xmlParserInputBufferPtr input;	6914 xmlParserInputBufferPtr input;

6311 xmlParserInputPtr stream;	6915 xmlParserInputPtr stream;

(...skipping 14 matching lines...) Expand all Loading...
6326 xmlFreeParserInputBuffer(input);	6930 xmlFreeParserInputBuffer(input);

6327 return (NULL);	6931 return (NULL);

6328 }	6932 }

6329 inputPush(ctxt, stream);	6933 inputPush(ctxt, stream);

6330 return (htmlDoRead(ctxt, URL, encoding, options, 1));	6934 return (htmlDoRead(ctxt, URL, encoding, options, 1));

6331 }	6935 }

6332	6936

6333 #define bottom_HTMLparser	6937 #define bottom_HTMLparser

6334 #include "elfgcchack.h"	6938 #include "elfgcchack.h"

6335 #endif /* LIBXML_HTML_ENABLED */	6939 #endif /* LIBXML_HTML_ENABLED */

OLD	NEW

« no previous file with comments | « third_party/libxml/ChangeLog ('k') | third_party/libxml/HTMLtree.c » ('j') | third_party/libxml/patches/icu » ('J')