Reactos
at master 2435 lines 70 kB view raw
1/* 2 * parserInternals.c : Internal routines (and obsolete ones) needed for the 3 * XML and HTML parsers. 4 * 5 * See Copyright for the status of this software. 6 * 7 * daniel@veillard.com 8 */ 9 10#define IN_LIBXML 11#include "libxml.h" 12 13#if defined(_WIN32) 14#define XML_DIR_SEP '\\' 15#else 16#define XML_DIR_SEP '/' 17#endif 18 19#include <string.h> 20#include <ctype.h> 21#include <stdlib.h> 22 23#include <libxml/xmlmemory.h> 24#include <libxml/tree.h> 25#include <libxml/parser.h> 26#include <libxml/parserInternals.h> 27#include <libxml/entities.h> 28#include <libxml/xmlerror.h> 29#include <libxml/encoding.h> 30#include <libxml/xmlIO.h> 31#include <libxml/uri.h> 32#include <libxml/dict.h> 33#include <libxml/xmlsave.h> 34#ifdef LIBXML_CATALOG_ENABLED 35#include <libxml/catalog.h> 36#endif 37#include <libxml/chvalid.h> 38 39#define CUR(ctxt) ctxt->input->cur 40#define END(ctxt) ctxt->input->end 41 42#include "private/buf.h" 43#include "private/enc.h" 44#include "private/error.h" 45#include "private/io.h" 46#include "private/parser.h" 47 48/* 49 * XML_MAX_AMPLIFICATION_DEFAULT is the default maximum allowed amplification 50 * factor of serialized output after entity expansion. 51 */ 52#define XML_MAX_AMPLIFICATION_DEFAULT 5 53 54/* 55 * Various global defaults for parsing 56 */ 57 58/** 59 * xmlCheckVersion: 60 * @version: the include version number 61 * 62 * check the compiled lib version against the include one. 63 * This can warn or immediately kill the application 64 */ 65void 66xmlCheckVersion(int version) { 67 int myversion = LIBXML_VERSION; 68 69 xmlInitParser(); 70 71 if ((myversion / 10000) != (version / 10000)) { 72 xmlGenericError(xmlGenericErrorContext, 73 "Fatal: program compiled against libxml %d using libxml %d\n", 74 (version / 10000), (myversion / 10000)); 75 fprintf(stderr, 76 "Fatal: program compiled against libxml %d using libxml %d\n", 77 (version / 10000), (myversion / 10000)); 78 } 79 if ((myversion / 100) < (version / 100)) { 80 xmlGenericError(xmlGenericErrorContext, 81 "Warning: program compiled against libxml %d using older %d\n", 82 (version / 100), (myversion / 100)); 83 } 84} 85 86 87/************************************************************************ 88 * * 89 * Some factorized error routines * 90 * * 91 ************************************************************************/ 92 93 94/** 95 * xmlErrMemory: 96 * @ctxt: an XML parser context 97 * @extra: extra information 98 * 99 * Handle a redefinition of attribute error 100 */ 101void 102xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra) 103{ 104 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 105 (ctxt->instate == XML_PARSER_EOF)) 106 return; 107 if (ctxt != NULL) { 108 ctxt->errNo = XML_ERR_NO_MEMORY; 109 ctxt->instate = XML_PARSER_EOF; 110 ctxt->disableSAX = 1; 111 } 112 if (extra) 113 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 114 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra, 115 NULL, NULL, 0, 0, 116 "Memory allocation failed : %s\n", extra); 117 else 118 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 119 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL, 120 NULL, NULL, 0, 0, "Memory allocation failed\n"); 121} 122 123/** 124 * __xmlErrEncoding: 125 * @ctxt: an XML parser context 126 * @xmlerr: the error number 127 * @msg: the error message 128 * @str1: an string info 129 * @str2: an string info 130 * 131 * Handle an encoding error 132 */ 133void 134__xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr, 135 const char *msg, const xmlChar * str1, const xmlChar * str2) 136{ 137 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 138 (ctxt->instate == XML_PARSER_EOF)) 139 return; 140 if (ctxt != NULL) 141 ctxt->errNo = xmlerr; 142 __xmlRaiseError(NULL, NULL, NULL, 143 ctxt, NULL, XML_FROM_PARSER, xmlerr, XML_ERR_FATAL, 144 NULL, 0, (const char *) str1, (const char *) str2, 145 NULL, 0, 0, msg, str1, str2); 146 if (ctxt != NULL) { 147 ctxt->wellFormed = 0; 148 if (ctxt->recovery == 0) 149 ctxt->disableSAX = 1; 150 } 151} 152 153/** 154 * xmlErrInternal: 155 * @ctxt: an XML parser context 156 * @msg: the error message 157 * @str: error information 158 * 159 * Handle an internal error 160 */ 161static void LIBXML_ATTR_FORMAT(2,0) 162xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str) 163{ 164 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 165 (ctxt->instate == XML_PARSER_EOF)) 166 return; 167 if (ctxt != NULL) 168 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 169 __xmlRaiseError(NULL, NULL, NULL, 170 ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR, 171 XML_ERR_FATAL, NULL, 0, (const char *) str, NULL, NULL, 172 0, 0, msg, str); 173 if (ctxt != NULL) { 174 ctxt->wellFormed = 0; 175 if (ctxt->recovery == 0) 176 ctxt->disableSAX = 1; 177 } 178} 179 180/** 181 * xmlFatalErr: 182 * @ctxt: an XML parser context 183 * @error: the error number 184 * @info: extra information string 185 * 186 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 187 */ 188void 189xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info) 190{ 191 const char *errmsg; 192 193 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 194 (ctxt->instate == XML_PARSER_EOF)) 195 return; 196 switch (error) { 197 case XML_ERR_INVALID_HEX_CHARREF: 198 errmsg = "CharRef: invalid hexadecimal value"; 199 break; 200 case XML_ERR_INVALID_DEC_CHARREF: 201 errmsg = "CharRef: invalid decimal value"; 202 break; 203 case XML_ERR_INVALID_CHARREF: 204 errmsg = "CharRef: invalid value"; 205 break; 206 case XML_ERR_INTERNAL_ERROR: 207 errmsg = "internal error"; 208 break; 209 case XML_ERR_PEREF_AT_EOF: 210 errmsg = "PEReference at end of document"; 211 break; 212 case XML_ERR_PEREF_IN_PROLOG: 213 errmsg = "PEReference in prolog"; 214 break; 215 case XML_ERR_PEREF_IN_EPILOG: 216 errmsg = "PEReference in epilog"; 217 break; 218 case XML_ERR_PEREF_NO_NAME: 219 errmsg = "PEReference: no name"; 220 break; 221 case XML_ERR_PEREF_SEMICOL_MISSING: 222 errmsg = "PEReference: expecting ';'"; 223 break; 224 case XML_ERR_ENTITY_LOOP: 225 errmsg = "Detected an entity reference loop"; 226 break; 227 case XML_ERR_ENTITY_NOT_STARTED: 228 errmsg = "EntityValue: \" or ' expected"; 229 break; 230 case XML_ERR_ENTITY_PE_INTERNAL: 231 errmsg = "PEReferences forbidden in internal subset"; 232 break; 233 case XML_ERR_ENTITY_NOT_FINISHED: 234 errmsg = "EntityValue: \" or ' expected"; 235 break; 236 case XML_ERR_ATTRIBUTE_NOT_STARTED: 237 errmsg = "AttValue: \" or ' expected"; 238 break; 239 case XML_ERR_LT_IN_ATTRIBUTE: 240 errmsg = "Unescaped '<' not allowed in attributes values"; 241 break; 242 case XML_ERR_LITERAL_NOT_STARTED: 243 errmsg = "SystemLiteral \" or ' expected"; 244 break; 245 case XML_ERR_LITERAL_NOT_FINISHED: 246 errmsg = "Unfinished System or Public ID \" or ' expected"; 247 break; 248 case XML_ERR_MISPLACED_CDATA_END: 249 errmsg = "Sequence ']]>' not allowed in content"; 250 break; 251 case XML_ERR_URI_REQUIRED: 252 errmsg = "SYSTEM or PUBLIC, the URI is missing"; 253 break; 254 case XML_ERR_PUBID_REQUIRED: 255 errmsg = "PUBLIC, the Public Identifier is missing"; 256 break; 257 case XML_ERR_HYPHEN_IN_COMMENT: 258 errmsg = "Comment must not contain '--' (double-hyphen)"; 259 break; 260 case XML_ERR_PI_NOT_STARTED: 261 errmsg = "xmlParsePI : no target name"; 262 break; 263 case XML_ERR_RESERVED_XML_NAME: 264 errmsg = "Invalid PI name"; 265 break; 266 case XML_ERR_NOTATION_NOT_STARTED: 267 errmsg = "NOTATION: Name expected here"; 268 break; 269 case XML_ERR_NOTATION_NOT_FINISHED: 270 errmsg = "'>' required to close NOTATION declaration"; 271 break; 272 case XML_ERR_VALUE_REQUIRED: 273 errmsg = "Entity value required"; 274 break; 275 case XML_ERR_URI_FRAGMENT: 276 errmsg = "Fragment not allowed"; 277 break; 278 case XML_ERR_ATTLIST_NOT_STARTED: 279 errmsg = "'(' required to start ATTLIST enumeration"; 280 break; 281 case XML_ERR_NMTOKEN_REQUIRED: 282 errmsg = "NmToken expected in ATTLIST enumeration"; 283 break; 284 case XML_ERR_ATTLIST_NOT_FINISHED: 285 errmsg = "')' required to finish ATTLIST enumeration"; 286 break; 287 case XML_ERR_MIXED_NOT_STARTED: 288 errmsg = "MixedContentDecl : '|' or ')*' expected"; 289 break; 290 case XML_ERR_PCDATA_REQUIRED: 291 errmsg = "MixedContentDecl : '#PCDATA' expected"; 292 break; 293 case XML_ERR_ELEMCONTENT_NOT_STARTED: 294 errmsg = "ContentDecl : Name or '(' expected"; 295 break; 296 case XML_ERR_ELEMCONTENT_NOT_FINISHED: 297 errmsg = "ContentDecl : ',' '|' or ')' expected"; 298 break; 299 case XML_ERR_PEREF_IN_INT_SUBSET: 300 errmsg = 301 "PEReference: forbidden within markup decl in internal subset"; 302 break; 303 case XML_ERR_GT_REQUIRED: 304 errmsg = "expected '>'"; 305 break; 306 case XML_ERR_CONDSEC_INVALID: 307 errmsg = "XML conditional section '[' expected"; 308 break; 309 case XML_ERR_EXT_SUBSET_NOT_FINISHED: 310 errmsg = "Content error in the external subset"; 311 break; 312 case XML_ERR_CONDSEC_INVALID_KEYWORD: 313 errmsg = 314 "conditional section INCLUDE or IGNORE keyword expected"; 315 break; 316 case XML_ERR_CONDSEC_NOT_FINISHED: 317 errmsg = "XML conditional section not closed"; 318 break; 319 case XML_ERR_XMLDECL_NOT_STARTED: 320 errmsg = "Text declaration '<?xml' required"; 321 break; 322 case XML_ERR_XMLDECL_NOT_FINISHED: 323 errmsg = "parsing XML declaration: '?>' expected"; 324 break; 325 case XML_ERR_EXT_ENTITY_STANDALONE: 326 errmsg = "external parsed entities cannot be standalone"; 327 break; 328 case XML_ERR_ENTITYREF_SEMICOL_MISSING: 329 errmsg = "EntityRef: expecting ';'"; 330 break; 331 case XML_ERR_DOCTYPE_NOT_FINISHED: 332 errmsg = "DOCTYPE improperly terminated"; 333 break; 334 case XML_ERR_LTSLASH_REQUIRED: 335 errmsg = "EndTag: '</' not found"; 336 break; 337 case XML_ERR_EQUAL_REQUIRED: 338 errmsg = "expected '='"; 339 break; 340 case XML_ERR_STRING_NOT_CLOSED: 341 errmsg = "String not closed expecting \" or '"; 342 break; 343 case XML_ERR_STRING_NOT_STARTED: 344 errmsg = "String not started expecting ' or \""; 345 break; 346 case XML_ERR_ENCODING_NAME: 347 errmsg = "Invalid XML encoding name"; 348 break; 349 case XML_ERR_STANDALONE_VALUE: 350 errmsg = "standalone accepts only 'yes' or 'no'"; 351 break; 352 case XML_ERR_DOCUMENT_EMPTY: 353 errmsg = "Document is empty"; 354 break; 355 case XML_ERR_DOCUMENT_END: 356 errmsg = "Extra content at the end of the document"; 357 break; 358 case XML_ERR_NOT_WELL_BALANCED: 359 errmsg = "chunk is not well balanced"; 360 break; 361 case XML_ERR_EXTRA_CONTENT: 362 errmsg = "extra content at the end of well balanced chunk"; 363 break; 364 case XML_ERR_VERSION_MISSING: 365 errmsg = "Malformed declaration expecting version"; 366 break; 367 case XML_ERR_NAME_TOO_LONG: 368 errmsg = "Name too long"; 369 break; 370 case XML_ERR_INVALID_ENCODING: 371 errmsg = "Invalid bytes in character encoding"; 372 break; 373 case XML_IO_UNKNOWN: 374 errmsg = "I/O error"; 375 break; 376#if 0 377 case: 378 errmsg = ""; 379 break; 380#endif 381 default: 382 errmsg = "Unregistered error message"; 383 } 384 if (ctxt != NULL) 385 ctxt->errNo = error; 386 if (info == NULL) { 387 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 388 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n", 389 errmsg); 390 } else { 391 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 392 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n", 393 errmsg, info); 394 } 395 if (ctxt != NULL) { 396 ctxt->wellFormed = 0; 397 if (ctxt->recovery == 0) 398 ctxt->disableSAX = 1; 399 } 400} 401 402/** 403 * xmlErrEncodingInt: 404 * @ctxt: an XML parser context 405 * @error: the error number 406 * @msg: the error message 407 * @val: an integer value 408 * 409 * n encoding error 410 */ 411static void LIBXML_ATTR_FORMAT(3,0) 412xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, 413 const char *msg, int val) 414{ 415 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 416 (ctxt->instate == XML_PARSER_EOF)) 417 return; 418 if (ctxt != NULL) 419 ctxt->errNo = error; 420 __xmlRaiseError(NULL, NULL, NULL, 421 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 422 NULL, 0, NULL, NULL, NULL, val, 0, msg, val); 423 if (ctxt != NULL) { 424 ctxt->wellFormed = 0; 425 if (ctxt->recovery == 0) 426 ctxt->disableSAX = 1; 427 } 428} 429 430/** 431 * xmlIsLetter: 432 * @c: an unicode character (int) 433 * 434 * Check whether the character is allowed by the production 435 * [84] Letter ::= BaseChar | Ideographic 436 * 437 * Returns 0 if not, non-zero otherwise 438 */ 439int 440xmlIsLetter(int c) { 441 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c)); 442} 443 444/************************************************************************ 445 * * 446 * Input handling functions for progressive parsing * 447 * * 448 ************************************************************************/ 449 450/* we need to keep enough input to show errors in context */ 451#define LINE_LEN 80 452 453/** 454 * xmlHaltParser: 455 * @ctxt: an XML parser context 456 * 457 * Blocks further parser processing don't override error 458 * for internal use 459 */ 460void 461xmlHaltParser(xmlParserCtxtPtr ctxt) { 462 if (ctxt == NULL) 463 return; 464 ctxt->instate = XML_PARSER_EOF; 465 ctxt->disableSAX = 1; 466 while (ctxt->inputNr > 1) 467 xmlFreeInputStream(inputPop(ctxt)); 468 if (ctxt->input != NULL) { 469 /* 470 * in case there was a specific allocation deallocate before 471 * overriding base 472 */ 473 if (ctxt->input->free != NULL) { 474 ctxt->input->free((xmlChar *) ctxt->input->base); 475 ctxt->input->free = NULL; 476 } 477 if (ctxt->input->buf != NULL) { 478 xmlFreeParserInputBuffer(ctxt->input->buf); 479 ctxt->input->buf = NULL; 480 } 481 ctxt->input->cur = BAD_CAST""; 482 ctxt->input->length = 0; 483 ctxt->input->base = ctxt->input->cur; 484 ctxt->input->end = ctxt->input->cur; 485 } 486} 487 488/** 489 * xmlParserInputRead: 490 * @in: an XML parser input 491 * @len: an indicative size for the lookahead 492 * 493 * DEPRECATED: This function was internal and is deprecated. 494 * 495 * Returns -1 as this is an error to use it. 496 */ 497int 498xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) { 499 return(-1); 500} 501 502/** 503 * xmlParserGrow: 504 * @ctxt: an XML parser context 505 * 506 * Grow the input buffer. 507 * 508 * Returns the number of bytes read or -1 in case of error. 509 */ 510int 511xmlParserGrow(xmlParserCtxtPtr ctxt) { 512 xmlParserInputPtr in = ctxt->input; 513 xmlParserInputBufferPtr buf = in->buf; 514 ptrdiff_t curEnd = in->end - in->cur; 515 ptrdiff_t curBase = in->cur - in->base; 516 int ret; 517 518 if (buf == NULL) 519 return(0); 520 /* Don't grow push parser buffer. */ 521 if ((ctxt->progressive) && (ctxt->inputNr <= 1)) 522 return(0); 523 /* Don't grow memory buffers. */ 524 if ((buf->encoder == NULL) && (buf->readcallback == NULL)) 525 return(0); 526 if (buf->error != 0) 527 return(-1); 528 529 if (((curEnd > XML_MAX_LOOKUP_LIMIT) || 530 (curBase > XML_MAX_LOOKUP_LIMIT)) && 531 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 532 xmlErrMemory(ctxt, "Huge input lookup"); 533 xmlHaltParser(ctxt); 534 return(-1); 535 } 536 537 if (curEnd >= INPUT_CHUNK) 538 return(0); 539 540 ret = xmlParserInputBufferGrow(buf, INPUT_CHUNK); 541 xmlBufUpdateInput(buf->buffer, in, curBase); 542 543 if (ret < 0) { 544 xmlFatalErr(ctxt, buf->error, NULL); 545 /* Buffer contents may be lost in case of memory errors. */ 546 if (buf->error == XML_ERR_NO_MEMORY) 547 xmlHaltParser(ctxt); 548 } 549 550 return(ret); 551} 552 553/** 554 * xmlParserInputGrow: 555 * @in: an XML parser input 556 * @len: an indicative size for the lookahead 557 * 558 * DEPRECATED: Don't use. 559 * 560 * This function increase the input for the parser. It tries to 561 * preserve pointers to the input buffer, and keep already read data 562 * 563 * Returns the amount of char read, or -1 in case of error, 0 indicate the 564 * end of this entity 565 */ 566int 567xmlParserInputGrow(xmlParserInputPtr in, int len) { 568 int ret; 569 size_t indx; 570 571 if ((in == NULL) || (len < 0)) return(-1); 572 if (in->buf == NULL) return(-1); 573 if (in->base == NULL) return(-1); 574 if (in->cur == NULL) return(-1); 575 if (in->buf->buffer == NULL) return(-1); 576 577 /* Don't grow memory buffers. */ 578 if ((in->buf->encoder == NULL) && (in->buf->readcallback == NULL)) 579 return(0); 580 581 indx = in->cur - in->base; 582 if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) { 583 return(0); 584 } 585 ret = xmlParserInputBufferGrow(in->buf, len); 586 587 in->base = xmlBufContent(in->buf->buffer); 588 if (in->base == NULL) { 589 in->base = BAD_CAST ""; 590 in->cur = in->base; 591 in->end = in->base; 592 return(-1); 593 } 594 in->cur = in->base + indx; 595 in->end = xmlBufEnd(in->buf->buffer); 596 597 return(ret); 598} 599 600/** 601 * xmlParserShrink: 602 * @ctxt: an XML parser context 603 * 604 * Shrink the input buffer. 605 */ 606void 607xmlParserShrink(xmlParserCtxtPtr ctxt) { 608 xmlParserInputPtr in = ctxt->input; 609 xmlParserInputBufferPtr buf = in->buf; 610 size_t used; 611 612 if (buf == NULL) 613 return; 614 /* Don't shrink pull parser memory buffers. */ 615 if (((ctxt->progressive == 0) || (ctxt->inputNr > 1)) && 616 (buf->encoder == NULL) && 617 (buf->readcallback == NULL)) 618 return; 619 620 used = in->cur - in->base; 621 /* 622 * Do not shrink on large buffers whose only a tiny fraction 623 * was consumed 624 */ 625 if (used > INPUT_CHUNK) { 626 size_t res = xmlBufShrink(buf->buffer, used - LINE_LEN); 627 628 if (res > 0) { 629 used -= res; 630 if ((res > ULONG_MAX) || 631 (in->consumed > ULONG_MAX - (unsigned long)res)) 632 in->consumed = ULONG_MAX; 633 else 634 in->consumed += res; 635 } 636 } 637 638 xmlBufUpdateInput(buf->buffer, in, used); 639} 640 641/** 642 * xmlParserInputShrink: 643 * @in: an XML parser input 644 * 645 * DEPRECATED: Don't use. 646 * 647 * This function removes used input for the parser. 648 */ 649void 650xmlParserInputShrink(xmlParserInputPtr in) { 651 size_t used; 652 size_t ret; 653 654 if (in == NULL) return; 655 if (in->buf == NULL) return; 656 if (in->base == NULL) return; 657 if (in->cur == NULL) return; 658 if (in->buf->buffer == NULL) return; 659 660 used = in->cur - in->base; 661 /* 662 * Do not shrink on large buffers whose only a tiny fraction 663 * was consumed 664 */ 665 if (used > INPUT_CHUNK) { 666 ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN); 667 if (ret > 0) { 668 used -= ret; 669 if ((ret > ULONG_MAX) || 670 (in->consumed > ULONG_MAX - (unsigned long)ret)) 671 in->consumed = ULONG_MAX; 672 else 673 in->consumed += ret; 674 } 675 } 676 677 if (xmlBufUse(in->buf->buffer) <= INPUT_CHUNK) { 678 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK); 679 } 680 681 in->base = xmlBufContent(in->buf->buffer); 682 if (in->base == NULL) { 683 /* TODO: raise error */ 684 in->base = BAD_CAST ""; 685 in->cur = in->base; 686 in->end = in->base; 687 return; 688 } 689 in->cur = in->base + used; 690 in->end = xmlBufEnd(in->buf->buffer); 691} 692 693/************************************************************************ 694 * * 695 * UTF8 character input and related functions * 696 * * 697 ************************************************************************/ 698 699/** 700 * xmlNextChar: 701 * @ctxt: the XML parser context 702 * 703 * DEPRECATED: Internal function, do not use. 704 * 705 * Skip to the next char input char. 706 */ 707 708void 709xmlNextChar(xmlParserCtxtPtr ctxt) 710{ 711 const unsigned char *cur; 712 size_t avail; 713 int c; 714 715 if ((ctxt == NULL) || (ctxt->instate == XML_PARSER_EOF) || 716 (ctxt->input == NULL)) 717 return; 718 719 avail = ctxt->input->end - ctxt->input->cur; 720 721 if (avail < INPUT_CHUNK) { 722 xmlParserGrow(ctxt); 723 if ((ctxt->instate == XML_PARSER_EOF) || 724 (ctxt->input->cur >= ctxt->input->end)) 725 return; 726 avail = ctxt->input->end - ctxt->input->cur; 727 } 728 729 cur = ctxt->input->cur; 730 c = *cur; 731 732 if (c < 0x80) { 733 if (c == '\n') { 734 ctxt->input->cur++; 735 ctxt->input->line++; 736 ctxt->input->col = 1; 737 } else if (c == '\r') { 738 /* 739 * 2.11 End-of-Line Handling 740 * the literal two-character sequence "#xD#xA" or a standalone 741 * literal #xD, an XML processor must pass to the application 742 * the single character #xA. 743 */ 744 ctxt->input->cur += ((cur[1] == '\n') ? 2 : 1); 745 ctxt->input->line++; 746 ctxt->input->col = 1; 747 return; 748 } else { 749 ctxt->input->cur++; 750 ctxt->input->col++; 751 } 752 } else { 753 ctxt->input->col++; 754 755 if ((avail < 2) || (cur[1] & 0xc0) != 0x80) 756 goto encoding_error; 757 758 if (c < 0xe0) { 759 /* 2-byte code */ 760 if (c < 0xc2) 761 goto encoding_error; 762 ctxt->input->cur += 2; 763 } else { 764 unsigned int val = (c << 8) | cur[1]; 765 766 if ((avail < 3) || (cur[2] & 0xc0) != 0x80) 767 goto encoding_error; 768 769 if (c < 0xf0) { 770 /* 3-byte code */ 771 if ((val < 0xe0a0) || ((val >= 0xeda0) && (val < 0xee00))) 772 goto encoding_error; 773 ctxt->input->cur += 3; 774 } else { 775 if ((avail < 4) || ((cur[3] & 0xc0) != 0x80)) 776 goto encoding_error; 777 778 /* 4-byte code */ 779 if ((val < 0xf090) || (val >= 0xf490)) 780 goto encoding_error; 781 ctxt->input->cur += 4; 782 } 783 } 784 } 785 786 return; 787 788encoding_error: 789 /* Only report the first error */ 790 if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) { 791 if ((ctxt == NULL) || (ctxt->input == NULL) || 792 (ctxt->input->end - ctxt->input->cur < 4)) { 793 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 794 "Input is not proper UTF-8, indicate encoding !\n", 795 NULL, NULL); 796 } else { 797 char buffer[150]; 798 799 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 800 ctxt->input->cur[0], ctxt->input->cur[1], 801 ctxt->input->cur[2], ctxt->input->cur[3]); 802 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 803 "Input is not proper UTF-8, indicate encoding !\n%s", 804 BAD_CAST buffer, NULL); 805 } 806 ctxt->input->flags |= XML_INPUT_ENCODING_ERROR; 807 } 808 ctxt->input->cur++; 809 return; 810} 811 812/** 813 * xmlCurrentChar: 814 * @ctxt: the XML parser context 815 * @len: pointer to the length of the char read 816 * 817 * DEPRECATED: Internal function, do not use. 818 * 819 * The current char value, if using UTF-8 this may actually span multiple 820 * bytes in the input buffer. Implement the end of line normalization: 821 * 2.11 End-of-Line Handling 822 * Wherever an external parsed entity or the literal entity value 823 * of an internal parsed entity contains either the literal two-character 824 * sequence "#xD#xA" or a standalone literal #xD, an XML processor 825 * must pass to the application the single character #xA. 826 * This behavior can conveniently be produced by normalizing all 827 * line breaks to #xA on input, before parsing.) 828 * 829 * Returns the current char value and its length 830 */ 831 832int 833xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { 834 const unsigned char *cur; 835 size_t avail; 836 int c; 837 838 if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0); 839 if (ctxt->instate == XML_PARSER_EOF) 840 return(0); 841 842 avail = ctxt->input->end - ctxt->input->cur; 843 844 if (avail < INPUT_CHUNK) { 845 xmlParserGrow(ctxt); 846 if (ctxt->instate == XML_PARSER_EOF) 847 return(0); 848 avail = ctxt->input->end - ctxt->input->cur; 849 } 850 851 cur = ctxt->input->cur; 852 c = *cur; 853 854 if (c < 0x80) { 855 /* 1-byte code */ 856 if (c < 0x20) { 857 /* 858 * 2.11 End-of-Line Handling 859 * the literal two-character sequence "#xD#xA" or a standalone 860 * literal #xD, an XML processor must pass to the application 861 * the single character #xA. 862 */ 863 if (c == '\r') { 864 /* 865 * TODO: This function shouldn't change the 'cur' pointer 866 * as side effect, but the NEXTL macro in parser.c relies 867 * on this behavior when incrementing line numbers. 868 */ 869 if (cur[1] == '\n') 870 ctxt->input->cur++; 871 *len = 1; 872 c = '\n'; 873 } else if (c == 0) { 874 if (ctxt->input->cur >= ctxt->input->end) { 875 *len = 0; 876 } else { 877 *len = 1; 878 /* 879 * TODO: Null bytes should be handled by callers, 880 * but this can be tricky. 881 */ 882 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 883 "Char 0x0 out of allowed range\n", c); 884 } 885 } else { 886 *len = 1; 887 } 888 } else { 889 *len = 1; 890 } 891 892 return(c); 893 } else { 894 int val; 895 896 if (avail < 2) 897 goto incomplete_sequence; 898 if ((cur[1] & 0xc0) != 0x80) 899 goto encoding_error; 900 901 if (c < 0xe0) { 902 /* 2-byte code */ 903 if (c < 0xc2) 904 goto encoding_error; 905 val = (c & 0x1f) << 6; 906 val |= cur[1] & 0x3f; 907 *len = 2; 908 } else { 909 if (avail < 3) 910 goto incomplete_sequence; 911 if ((cur[2] & 0xc0) != 0x80) 912 goto encoding_error; 913 914 if (c < 0xf0) { 915 /* 3-byte code */ 916 val = (c & 0xf) << 12; 917 val |= (cur[1] & 0x3f) << 6; 918 val |= cur[2] & 0x3f; 919 if ((val < 0x800) || ((val >= 0xd800) && (val < 0xe000))) 920 goto encoding_error; 921 *len = 3; 922 } else { 923 if (avail < 4) 924 goto incomplete_sequence; 925 if ((cur[3] & 0xc0) != 0x80) 926 goto encoding_error; 927 928 /* 4-byte code */ 929 val = (c & 0x0f) << 18; 930 val |= (cur[1] & 0x3f) << 12; 931 val |= (cur[2] & 0x3f) << 6; 932 val |= cur[3] & 0x3f; 933 if ((val < 0x10000) || (val >= 0x110000)) 934 goto encoding_error; 935 *len = 4; 936 } 937 } 938 939 return(val); 940 } 941 942encoding_error: 943 /* Only report the first error */ 944 if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) { 945 if (ctxt->input->end - ctxt->input->cur < 4) { 946 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 947 "Input is not proper UTF-8, indicate encoding !\n", 948 NULL, NULL); 949 } else { 950 char buffer[150]; 951 952 snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 953 ctxt->input->cur[0], ctxt->input->cur[1], 954 ctxt->input->cur[2], ctxt->input->cur[3]); 955 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 956 "Input is not proper UTF-8, indicate encoding !\n%s", 957 BAD_CAST buffer, NULL); 958 } 959 ctxt->input->flags |= XML_INPUT_ENCODING_ERROR; 960 } 961 *len = 1; 962 return(0xFFFD); /* U+FFFD Replacement Character */ 963 964incomplete_sequence: 965 /* 966 * An encoding problem may arise from a truncated input buffer 967 * splitting a character in the middle. In that case do not raise 968 * an error but return 0. This should only happen when push parsing 969 * char data. 970 */ 971 *len = 0; 972 return(0); 973} 974 975/** 976 * xmlStringCurrentChar: 977 * @ctxt: the XML parser context 978 * @cur: pointer to the beginning of the char 979 * @len: pointer to the length of the char read 980 * 981 * DEPRECATED: Internal function, do not use. 982 * 983 * The current char value, if using UTF-8 this may actually span multiple 984 * bytes in the input buffer. 985 * 986 * Returns the current char value and its length 987 */ 988 989int 990xmlStringCurrentChar(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, 991 const xmlChar *cur, int *len) { 992 int c; 993 994 if ((cur == NULL) || (len == NULL)) 995 return(0); 996 997 /* cur is zero-terminated, so we can lie about its length. */ 998 *len = 4; 999 c = xmlGetUTF8Char(cur, len); 1000 1001 return((c < 0) ? 0 : c); 1002} 1003 1004/** 1005 * xmlCopyCharMultiByte: 1006 * @out: pointer to an array of xmlChar 1007 * @val: the char value 1008 * 1009 * append the char value in the array 1010 * 1011 * Returns the number of xmlChar written 1012 */ 1013int 1014xmlCopyCharMultiByte(xmlChar *out, int val) { 1015 if ((out == NULL) || (val < 0)) return(0); 1016 /* 1017 * We are supposed to handle UTF8, check it's valid 1018 * From rfc2044: encoding of the Unicode values on UTF-8: 1019 * 1020 * UCS-4 range (hex.) UTF-8 octet sequence (binary) 1021 * 0000 0000-0000 007F 0xxxxxxx 1022 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 1023 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 1024 */ 1025 if (val >= 0x80) { 1026 xmlChar *savedout = out; 1027 int bits; 1028 if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; } 1029 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;} 1030 else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; } 1031 else { 1032 xmlErrEncodingInt(NULL, XML_ERR_INVALID_CHAR, 1033 "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n", 1034 val); 1035 return(0); 1036 } 1037 for ( ; bits >= 0; bits-= 6) 1038 *out++= ((val >> bits) & 0x3F) | 0x80 ; 1039 return (out - savedout); 1040 } 1041 *out = val; 1042 return 1; 1043} 1044 1045/** 1046 * xmlCopyChar: 1047 * @len: Ignored, compatibility 1048 * @out: pointer to an array of xmlChar 1049 * @val: the char value 1050 * 1051 * append the char value in the array 1052 * 1053 * Returns the number of xmlChar written 1054 */ 1055 1056int 1057xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) { 1058 if ((out == NULL) || (val < 0)) return(0); 1059 /* the len parameter is ignored */ 1060 if (val >= 0x80) { 1061 return(xmlCopyCharMultiByte (out, val)); 1062 } 1063 *out = val; 1064 return 1; 1065} 1066 1067/************************************************************************ 1068 * * 1069 * Commodity functions to switch encodings * 1070 * * 1071 ************************************************************************/ 1072 1073static xmlCharEncodingHandlerPtr 1074xmlDetectEBCDIC(xmlParserInputPtr input) { 1075 xmlChar out[200]; 1076 xmlCharEncodingHandlerPtr handler; 1077 int inlen, outlen, res, i; 1078 1079 /* 1080 * To detect the EBCDIC code page, we convert the first 200 bytes 1081 * to EBCDIC-US and try to find the encoding declaration. 1082 */ 1083 handler = xmlGetCharEncodingHandler(XML_CHAR_ENCODING_EBCDIC); 1084 if (handler == NULL) 1085 return(NULL); 1086 outlen = sizeof(out) - 1; 1087 inlen = input->end - input->cur; 1088 res = xmlEncInputChunk(handler, out, &outlen, input->cur, &inlen); 1089 if (res < 0) 1090 return(handler); 1091 out[outlen] = 0; 1092 1093 for (i = 0; i < outlen; i++) { 1094 if (out[i] == '>') 1095 break; 1096 if ((out[i] == 'e') && 1097 (xmlStrncmp(out + i, BAD_CAST "encoding", 8) == 0)) { 1098 int start, cur, quote; 1099 1100 i += 8; 1101 while (IS_BLANK_CH(out[i])) 1102 i += 1; 1103 if (out[i++] != '=') 1104 break; 1105 while (IS_BLANK_CH(out[i])) 1106 i += 1; 1107 quote = out[i++]; 1108 if ((quote != '\'') && (quote != '"')) 1109 break; 1110 start = i; 1111 cur = out[i]; 1112 while (((cur >= 'a') && (cur <= 'z')) || 1113 ((cur >= 'A') && (cur <= 'Z')) || 1114 ((cur >= '0') && (cur <= '9')) || 1115 (cur == '.') || (cur == '_') || 1116 (cur == '-')) 1117 cur = out[++i]; 1118 if (cur != quote) 1119 break; 1120 out[i] = 0; 1121 xmlCharEncCloseFunc(handler); 1122 return(xmlFindCharEncodingHandler((char *) out + start)); 1123 } 1124 } 1125 1126 /* 1127 * ICU handlers are stateful, so we have to recreate them. 1128 */ 1129 xmlCharEncCloseFunc(handler); 1130 return(xmlGetCharEncodingHandler(XML_CHAR_ENCODING_EBCDIC)); 1131} 1132 1133/** 1134 * xmlSwitchEncoding: 1135 * @ctxt: the parser context 1136 * @enc: the encoding value (number) 1137 * 1138 * Use encoding specified by enum to decode input data. 1139 * 1140 * This function can be used to enforce the encoding of chunks passed 1141 * to xmlParseChunk. 1142 * 1143 * Returns 0 in case of success, -1 otherwise 1144 */ 1145int 1146xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc) 1147{ 1148 xmlCharEncodingHandlerPtr handler = NULL; 1149 int check = 1; 1150 int ret; 1151 1152 if ((ctxt == NULL) || (ctxt->input == NULL)) 1153 return(-1); 1154 1155 switch (enc) { 1156 case XML_CHAR_ENCODING_NONE: 1157 case XML_CHAR_ENCODING_UTF8: 1158 case XML_CHAR_ENCODING_ASCII: 1159 check = 0; 1160 break; 1161 case XML_CHAR_ENCODING_EBCDIC: 1162 handler = xmlDetectEBCDIC(ctxt->input); 1163 break; 1164 default: 1165 handler = xmlGetCharEncodingHandler(enc); 1166 break; 1167 } 1168 1169 if ((check) && (handler == NULL)) { 1170 const char *name = xmlGetCharEncodingName(enc); 1171 1172 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1173 "encoding not supported: %s\n", 1174 BAD_CAST (name ? name : "<null>"), NULL); 1175 /* 1176 * TODO: We could recover from errors in external entities 1177 * if we didn't stop the parser. But most callers of this 1178 * function don't check the return value. 1179 */ 1180 xmlStopParser(ctxt); 1181 return(-1); 1182 } 1183 1184 ret = xmlSwitchInputEncoding(ctxt, ctxt->input, handler); 1185 1186 if ((ret >= 0) && (enc == XML_CHAR_ENCODING_NONE)) { 1187 ctxt->input->flags &= ~XML_INPUT_HAS_ENCODING; 1188 } 1189 1190 return(ret); 1191} 1192 1193/** 1194 * xmlSwitchInputEncoding: 1195 * @ctxt: the parser context 1196 * @input: the input stream 1197 * @handler: the encoding handler 1198 * 1199 * DEPRECATED: Internal function, don't use. 1200 * 1201 * Use encoding handler to decode input data. 1202 * 1203 * Returns 0 in case of success, -1 otherwise 1204 */ 1205int 1206xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, 1207 xmlCharEncodingHandlerPtr handler) 1208{ 1209 int nbchars; 1210 xmlParserInputBufferPtr in; 1211 1212 if ((input == NULL) || (input->buf == NULL)) { 1213 xmlCharEncCloseFunc(handler); 1214 return (-1); 1215 } 1216 in = input->buf; 1217 1218 input->flags |= XML_INPUT_HAS_ENCODING; 1219 1220 /* 1221 * UTF-8 requires no encoding handler. 1222 */ 1223 if ((handler != NULL) && 1224 (xmlStrcasecmp(BAD_CAST handler->name, BAD_CAST "UTF-8") == 0)) { 1225 xmlCharEncCloseFunc(handler); 1226 handler = NULL; 1227 } 1228 1229 if (in->encoder == handler) 1230 return (0); 1231 1232 if (in->encoder != NULL) { 1233 /* 1234 * Switching encodings during parsing is a really bad idea, 1235 * but Chromium can switch between ISO-8859-1 and UTF-16 before 1236 * separate calls to xmlParseChunk. 1237 * 1238 * TODO: We should check whether the "raw" input buffer is empty and 1239 * convert the old content using the old encoder. 1240 */ 1241 1242 xmlCharEncCloseFunc(in->encoder); 1243 in->encoder = handler; 1244 return (0); 1245 } 1246 1247 in->encoder = handler; 1248 1249 /* 1250 * Is there already some content down the pipe to convert ? 1251 */ 1252 if (xmlBufIsEmpty(in->buffer) == 0) { 1253 size_t processed; 1254 1255 /* 1256 * Shrink the current input buffer. 1257 * Move it as the raw buffer and create a new input buffer 1258 */ 1259 processed = input->cur - input->base; 1260 xmlBufShrink(in->buffer, processed); 1261 input->consumed += processed; 1262 in->raw = in->buffer; 1263 in->buffer = xmlBufCreate(); 1264 in->rawconsumed = processed; 1265 1266 nbchars = xmlCharEncInput(in); 1267 xmlBufResetInput(in->buffer, input); 1268 if (nbchars < 0) { 1269 /* TODO: This could be an out of memory or an encoding error. */ 1270 xmlErrInternal(ctxt, 1271 "switching encoding: encoder error\n", 1272 NULL); 1273 xmlHaltParser(ctxt); 1274 return (-1); 1275 } 1276 } 1277 return (0); 1278} 1279 1280/** 1281 * xmlSwitchToEncoding: 1282 * @ctxt: the parser context 1283 * @handler: the encoding handler 1284 * 1285 * Use encoding handler to decode input data. 1286 * 1287 * This function can be used to enforce the encoding of chunks passed 1288 * to xmlParseChunk. 1289 * 1290 * Returns 0 in case of success, -1 otherwise 1291 */ 1292int 1293xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler) 1294{ 1295 if (ctxt == NULL) 1296 return(-1); 1297 return(xmlSwitchInputEncoding(ctxt, ctxt->input, handler)); 1298} 1299 1300/** 1301 * xmlDetectEncoding: 1302 * @ctxt: the parser context 1303 * 1304 * Handle optional BOM, detect and switch to encoding. 1305 * 1306 * Assumes that there are at least four bytes in the input buffer. 1307 */ 1308void 1309xmlDetectEncoding(xmlParserCtxtPtr ctxt) { 1310 const xmlChar *in; 1311 xmlCharEncoding enc; 1312 int bomSize; 1313 int autoFlag = 0; 1314 1315 if (xmlParserGrow(ctxt) < 0) 1316 return; 1317 in = ctxt->input->cur; 1318 if (ctxt->input->end - in < 4) 1319 return; 1320 1321 if (ctxt->input->flags & XML_INPUT_HAS_ENCODING) { 1322 /* 1323 * If the encoding was already set, only skip the BOM which was 1324 * possibly decoded to UTF-8. 1325 */ 1326 if ((in[0] == 0xEF) && (in[1] == 0xBB) && (in[2] == 0xBF)) { 1327 ctxt->input->cur += 3; 1328 } 1329 1330 return; 1331 } 1332 1333 enc = XML_CHAR_ENCODING_NONE; 1334 bomSize = 0; 1335 1336 switch (in[0]) { 1337 case 0x00: 1338 if ((in[1] == 0x00) && (in[2] == 0x00) && (in[3] == 0x3C)) { 1339 enc = XML_CHAR_ENCODING_UCS4BE; 1340 autoFlag = XML_INPUT_AUTO_OTHER; 1341 } else if ((in[1] == 0x3C) && (in[2] == 0x00) && (in[3] == 0x3F)) { 1342 enc = XML_CHAR_ENCODING_UTF16BE; 1343 autoFlag = XML_INPUT_AUTO_UTF16BE; 1344 } 1345 break; 1346 1347 case 0x3C: 1348 if (in[1] == 0x00) { 1349 if ((in[2] == 0x00) && (in[3] == 0x00)) { 1350 enc = XML_CHAR_ENCODING_UCS4LE; 1351 autoFlag = XML_INPUT_AUTO_OTHER; 1352 } else if ((in[2] == 0x3F) && (in[3] == 0x00)) { 1353 enc = XML_CHAR_ENCODING_UTF16LE; 1354 autoFlag = XML_INPUT_AUTO_UTF16LE; 1355 } 1356 } 1357 break; 1358 1359 case 0x4C: 1360 if ((in[1] == 0x6F) && (in[2] == 0xA7) && (in[3] == 0x94)) { 1361 enc = XML_CHAR_ENCODING_EBCDIC; 1362 autoFlag = XML_INPUT_AUTO_OTHER; 1363 } 1364 break; 1365 1366 case 0xEF: 1367 if ((in[1] == 0xBB) && (in[2] == 0xBF)) { 1368 enc = XML_CHAR_ENCODING_UTF8; 1369 autoFlag = XML_INPUT_AUTO_UTF8; 1370 bomSize = 3; 1371 } 1372 break; 1373 1374 case 0xFE: 1375 if (in[1] == 0xFF) { 1376 enc = XML_CHAR_ENCODING_UTF16BE; 1377 autoFlag = XML_INPUT_AUTO_UTF16BE; 1378 bomSize = 2; 1379 } 1380 break; 1381 1382 case 0xFF: 1383 if (in[1] == 0xFE) { 1384 enc = XML_CHAR_ENCODING_UTF16LE; 1385 autoFlag = XML_INPUT_AUTO_UTF16LE; 1386 bomSize = 2; 1387 } 1388 break; 1389 } 1390 1391 if (bomSize > 0) { 1392 ctxt->input->cur += bomSize; 1393 } 1394 1395 if (enc != XML_CHAR_ENCODING_NONE) { 1396 ctxt->input->flags |= autoFlag; 1397 xmlSwitchEncoding(ctxt, enc); 1398 } 1399} 1400 1401/** 1402 * xmlSetDeclaredEncoding: 1403 * @ctxt: the parser context 1404 * @encoding: declared encoding 1405 * 1406 * Set the encoding from a declaration in the document. 1407 * 1408 * If no encoding was set yet, switch the encoding. Otherwise, only warn 1409 * about encoding mismatches. 1410 * 1411 * Takes ownership of 'encoding'. 1412 */ 1413void 1414xmlSetDeclaredEncoding(xmlParserCtxtPtr ctxt, xmlChar *encoding) { 1415 if (ctxt->encoding != NULL) 1416 xmlFree((xmlChar *) ctxt->encoding); 1417 ctxt->encoding = encoding; 1418 1419 if (((ctxt->input->flags & XML_INPUT_HAS_ENCODING) == 0) && 1420 ((ctxt->options & XML_PARSE_IGNORE_ENC) == 0)) { 1421 xmlCharEncodingHandlerPtr handler; 1422 1423 handler = xmlFindCharEncodingHandler((const char *) encoding); 1424 if (handler == NULL) { 1425 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1426 "Unsupported encoding: %s\n", 1427 encoding, NULL); 1428 return; 1429 } 1430 1431 xmlSwitchToEncoding(ctxt, handler); 1432 ctxt->input->flags |= XML_INPUT_USES_ENC_DECL; 1433 } else if (ctxt->input->flags & XML_INPUT_AUTO_ENCODING) { 1434 static const char *allowedUTF8[] = { 1435 "UTF-8", "UTF8", NULL 1436 }; 1437 static const char *allowedUTF16LE[] = { 1438 "UTF-16", "UTF-16LE", "UTF16", NULL 1439 }; 1440 static const char *allowedUTF16BE[] = { 1441 "UTF-16", "UTF-16BE", "UTF16", NULL 1442 }; 1443 const char **allowed = NULL; 1444 const char *autoEnc = NULL; 1445 1446 switch (ctxt->input->flags & XML_INPUT_AUTO_ENCODING) { 1447 case XML_INPUT_AUTO_UTF8: 1448 allowed = allowedUTF8; 1449 autoEnc = "UTF-8"; 1450 break; 1451 case XML_INPUT_AUTO_UTF16LE: 1452 allowed = allowedUTF16LE; 1453 autoEnc = "UTF-16LE"; 1454 break; 1455 case XML_INPUT_AUTO_UTF16BE: 1456 allowed = allowedUTF16BE; 1457 autoEnc = "UTF-16BE"; 1458 break; 1459 } 1460 1461 if (allowed != NULL) { 1462 const char **p; 1463 int match = 0; 1464 1465 for (p = allowed; *p != NULL; p++) { 1466 if (xmlStrcasecmp(encoding, BAD_CAST *p) == 0) { 1467 match = 1; 1468 break; 1469 } 1470 } 1471 1472 if (match == 0) { 1473 xmlWarningMsg(ctxt, XML_WAR_ENCODING_MISMATCH, 1474 "Encoding '%s' doesn't match " 1475 "auto-detected '%s'\n", 1476 encoding, BAD_CAST autoEnc); 1477 } 1478 } 1479 } 1480} 1481 1482/** 1483 * xmlGetActualEncoding: 1484 * @ctxt: the parser context 1485 * 1486 * Returns the actual used to parse the document. This can differ from 1487 * the declared encoding. 1488 */ 1489const xmlChar * 1490xmlGetActualEncoding(xmlParserCtxtPtr ctxt) { 1491 const xmlChar *encoding = NULL; 1492 1493 if ((ctxt->input->flags & XML_INPUT_USES_ENC_DECL) || 1494 (ctxt->input->flags & XML_INPUT_AUTO_ENCODING)) { 1495 /* Preserve encoding exactly */ 1496 encoding = ctxt->encoding; 1497 } else if ((ctxt->input->buf) && (ctxt->input->buf->encoder)) { 1498 encoding = BAD_CAST ctxt->input->buf->encoder->name; 1499 } else if (ctxt->input->flags & XML_INPUT_HAS_ENCODING) { 1500 encoding = BAD_CAST "UTF-8"; 1501 } 1502 1503 return(encoding); 1504} 1505 1506/************************************************************************ 1507 * * 1508 * Commodity functions to handle entities processing * 1509 * * 1510 ************************************************************************/ 1511 1512/** 1513 * xmlFreeInputStream: 1514 * @input: an xmlParserInputPtr 1515 * 1516 * Free up an input stream. 1517 */ 1518void 1519xmlFreeInputStream(xmlParserInputPtr input) { 1520 if (input == NULL) return; 1521 1522 if (input->filename != NULL) xmlFree((char *) input->filename); 1523 if (input->directory != NULL) xmlFree((char *) input->directory); 1524 if (input->version != NULL) xmlFree((char *) input->version); 1525 if ((input->free != NULL) && (input->base != NULL)) 1526 input->free((xmlChar *) input->base); 1527 if (input->buf != NULL) 1528 xmlFreeParserInputBuffer(input->buf); 1529 xmlFree(input); 1530} 1531 1532/** 1533 * xmlNewInputStream: 1534 * @ctxt: an XML parser context 1535 * 1536 * Create a new input stream structure. 1537 * 1538 * Returns the new input stream or NULL 1539 */ 1540xmlParserInputPtr 1541xmlNewInputStream(xmlParserCtxtPtr ctxt) { 1542 xmlParserInputPtr input; 1543 1544 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput)); 1545 if (input == NULL) { 1546 xmlErrMemory(ctxt, "couldn't allocate a new input stream\n"); 1547 return(NULL); 1548 } 1549 memset(input, 0, sizeof(xmlParserInput)); 1550 input->line = 1; 1551 input->col = 1; 1552 1553 /* 1554 * If the context is NULL the id cannot be initialized, but that 1555 * should not happen while parsing which is the situation where 1556 * the id is actually needed. 1557 */ 1558 if (ctxt != NULL) { 1559 if (input->id >= INT_MAX) { 1560 xmlErrMemory(ctxt, "Input ID overflow\n"); 1561 return(NULL); 1562 } 1563 input->id = ctxt->input_id++; 1564 } 1565 1566 return(input); 1567} 1568 1569/** 1570 * xmlNewIOInputStream: 1571 * @ctxt: an XML parser context 1572 * @input: an I/O Input 1573 * @enc: the charset encoding if known 1574 * 1575 * Create a new input stream structure encapsulating the @input into 1576 * a stream suitable for the parser. 1577 * 1578 * Returns the new input stream or NULL 1579 */ 1580xmlParserInputPtr 1581xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input, 1582 xmlCharEncoding enc) { 1583 xmlParserInputPtr inputStream; 1584 1585 if (input == NULL) return(NULL); 1586 if (xmlParserDebugEntities) 1587 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n"); 1588 inputStream = xmlNewInputStream(ctxt); 1589 if (inputStream == NULL) { 1590 return(NULL); 1591 } 1592 inputStream->filename = NULL; 1593 inputStream->buf = input; 1594 xmlBufResetInput(inputStream->buf->buffer, inputStream); 1595 1596 if (enc != XML_CHAR_ENCODING_NONE) { 1597 xmlSwitchEncoding(ctxt, enc); 1598 } 1599 1600 return(inputStream); 1601} 1602 1603/** 1604 * xmlNewEntityInputStream: 1605 * @ctxt: an XML parser context 1606 * @entity: an Entity pointer 1607 * 1608 * DEPRECATED: Internal function, do not use. 1609 * 1610 * Create a new input stream based on an xmlEntityPtr 1611 * 1612 * Returns the new input stream or NULL 1613 */ 1614xmlParserInputPtr 1615xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 1616 xmlParserInputPtr input; 1617 1618 if (entity == NULL) { 1619 xmlErrInternal(ctxt, "xmlNewEntityInputStream entity = NULL\n", 1620 NULL); 1621 return(NULL); 1622 } 1623 if (xmlParserDebugEntities) 1624 xmlGenericError(xmlGenericErrorContext, 1625 "new input from entity: %s\n", entity->name); 1626 if (entity->content == NULL) { 1627 switch (entity->etype) { 1628 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY: 1629 xmlErrInternal(ctxt, "Cannot parse entity %s\n", 1630 entity->name); 1631 break; 1632 case XML_EXTERNAL_GENERAL_PARSED_ENTITY: 1633 case XML_EXTERNAL_PARAMETER_ENTITY: 1634 input = xmlLoadExternalEntity((char *) entity->URI, 1635 (char *) entity->ExternalID, ctxt); 1636 if (input != NULL) 1637 input->entity = entity; 1638 return(input); 1639 case XML_INTERNAL_GENERAL_ENTITY: 1640 xmlErrInternal(ctxt, 1641 "Internal entity %s without content !\n", 1642 entity->name); 1643 break; 1644 case XML_INTERNAL_PARAMETER_ENTITY: 1645 xmlErrInternal(ctxt, 1646 "Internal parameter entity %s without content !\n", 1647 entity->name); 1648 break; 1649 case XML_INTERNAL_PREDEFINED_ENTITY: 1650 xmlErrInternal(ctxt, 1651 "Predefined entity %s without content !\n", 1652 entity->name); 1653 break; 1654 } 1655 return(NULL); 1656 } 1657 input = xmlNewInputStream(ctxt); 1658 if (input == NULL) { 1659 return(NULL); 1660 } 1661 if (entity->URI != NULL) 1662 input->filename = (char *) xmlStrdup((xmlChar *) entity->URI); 1663 input->base = entity->content; 1664 if (entity->length == 0) 1665 entity->length = xmlStrlen(entity->content); 1666 input->cur = entity->content; 1667 input->length = entity->length; 1668 input->end = &entity->content[input->length]; 1669 input->entity = entity; 1670 return(input); 1671} 1672 1673/** 1674 * xmlNewStringInputStream: 1675 * @ctxt: an XML parser context 1676 * @buffer: an memory buffer 1677 * 1678 * Create a new input stream based on a memory buffer. 1679 * Returns the new input stream 1680 */ 1681xmlParserInputPtr 1682xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) { 1683 xmlParserInputPtr input; 1684 xmlParserInputBufferPtr buf; 1685 1686 if (buffer == NULL) { 1687 xmlErrInternal(ctxt, "xmlNewStringInputStream string = NULL\n", 1688 NULL); 1689 return(NULL); 1690 } 1691 if (xmlParserDebugEntities) 1692 xmlGenericError(xmlGenericErrorContext, 1693 "new fixed input: %.30s\n", buffer); 1694 buf = xmlParserInputBufferCreateString(buffer); 1695 if (buf == NULL) { 1696 xmlErrMemory(ctxt, NULL); 1697 return(NULL); 1698 } 1699 input = xmlNewInputStream(ctxt); 1700 if (input == NULL) { 1701 xmlErrMemory(ctxt, "couldn't allocate a new input stream\n"); 1702 xmlFreeParserInputBuffer(buf); 1703 return(NULL); 1704 } 1705 input->buf = buf; 1706 xmlBufResetInput(input->buf->buffer, input); 1707 return(input); 1708} 1709 1710/** 1711 * xmlNewInputFromFile: 1712 * @ctxt: an XML parser context 1713 * @filename: the filename to use as entity 1714 * 1715 * Create a new input stream based on a file or an URL. 1716 * 1717 * Returns the new input stream or NULL in case of error 1718 */ 1719xmlParserInputPtr 1720xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) { 1721 xmlParserInputBufferPtr buf; 1722 xmlParserInputPtr inputStream; 1723 char *directory = NULL; 1724 xmlChar *URI = NULL; 1725 1726 if (xmlParserDebugEntities) 1727 xmlGenericError(xmlGenericErrorContext, 1728 "new input from file: %s\n", filename); 1729 if (ctxt == NULL) return(NULL); 1730 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE); 1731 if (buf == NULL) { 1732 if (filename == NULL) 1733 __xmlLoaderErr(ctxt, 1734 "failed to load external entity: NULL filename \n", 1735 NULL); 1736 else 1737 __xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n", 1738 (const char *) filename); 1739 return(NULL); 1740 } 1741 1742 inputStream = xmlNewInputStream(ctxt); 1743 if (inputStream == NULL) { 1744 xmlFreeParserInputBuffer(buf); 1745 return(NULL); 1746 } 1747 1748 inputStream->buf = buf; 1749 inputStream = xmlCheckHTTPInput(ctxt, inputStream); 1750 if (inputStream == NULL) 1751 return(NULL); 1752 1753 if (inputStream->filename == NULL) 1754 URI = xmlStrdup((xmlChar *) filename); 1755 else 1756 URI = xmlStrdup((xmlChar *) inputStream->filename); 1757 directory = xmlParserGetDirectory((const char *) URI); 1758 if (inputStream->filename != NULL) xmlFree((char *)inputStream->filename); 1759 inputStream->filename = (char *) xmlCanonicPath((const xmlChar *) URI); 1760 if (URI != NULL) xmlFree((char *) URI); 1761 inputStream->directory = directory; 1762 1763 xmlBufResetInput(inputStream->buf->buffer, inputStream); 1764 if ((ctxt->directory == NULL) && (directory != NULL)) 1765 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory); 1766 return(inputStream); 1767} 1768 1769/************************************************************************ 1770 * * 1771 * Commodity functions to handle parser contexts * 1772 * * 1773 ************************************************************************/ 1774 1775/** 1776 * xmlInitSAXParserCtxt: 1777 * @ctxt: XML parser context 1778 * @sax: SAX handlert 1779 * @userData: user data 1780 * 1781 * Initialize a SAX parser context 1782 * 1783 * Returns 0 in case of success and -1 in case of error 1784 */ 1785 1786static int 1787xmlInitSAXParserCtxt(xmlParserCtxtPtr ctxt, const xmlSAXHandler *sax, 1788 void *userData) 1789{ 1790 xmlParserInputPtr input; 1791 1792 if(ctxt==NULL) { 1793 xmlErrInternal(NULL, "Got NULL parser context\n", NULL); 1794 return(-1); 1795 } 1796 1797 xmlInitParser(); 1798 1799 if (ctxt->dict == NULL) 1800 ctxt->dict = xmlDictCreate(); 1801 if (ctxt->dict == NULL) { 1802 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1803 return(-1); 1804 } 1805 xmlDictSetLimit(ctxt->dict, XML_MAX_DICTIONARY_LIMIT); 1806 1807 if (ctxt->sax == NULL) 1808 ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler)); 1809 if (ctxt->sax == NULL) { 1810 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1811 return(-1); 1812 } 1813 if (sax == NULL) { 1814 memset(ctxt->sax, 0, sizeof(xmlSAXHandler)); 1815 xmlSAXVersion(ctxt->sax, 2); 1816 ctxt->userData = ctxt; 1817 } else { 1818 if (sax->initialized == XML_SAX2_MAGIC) { 1819 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 1820 } else { 1821 memset(ctxt->sax, 0, sizeof(xmlSAXHandler)); 1822 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); 1823 } 1824 ctxt->userData = userData ? userData : ctxt; 1825 } 1826 1827 ctxt->maxatts = 0; 1828 ctxt->atts = NULL; 1829 /* Allocate the Input stack */ 1830 if (ctxt->inputTab == NULL) { 1831 ctxt->inputTab = (xmlParserInputPtr *) 1832 xmlMalloc(5 * sizeof(xmlParserInputPtr)); 1833 ctxt->inputMax = 5; 1834 } 1835 if (ctxt->inputTab == NULL) { 1836 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1837 ctxt->inputNr = 0; 1838 ctxt->inputMax = 0; 1839 ctxt->input = NULL; 1840 return(-1); 1841 } 1842 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 1843 xmlFreeInputStream(input); 1844 } 1845 ctxt->inputNr = 0; 1846 ctxt->input = NULL; 1847 1848 ctxt->version = NULL; 1849 ctxt->encoding = NULL; 1850 ctxt->standalone = -1; 1851 ctxt->hasExternalSubset = 0; 1852 ctxt->hasPErefs = 0; 1853 ctxt->html = 0; 1854 ctxt->external = 0; 1855 ctxt->instate = XML_PARSER_START; 1856 ctxt->token = 0; 1857 ctxt->directory = NULL; 1858 1859 /* Allocate the Node stack */ 1860 if (ctxt->nodeTab == NULL) { 1861 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr)); 1862 ctxt->nodeMax = 10; 1863 } 1864 if (ctxt->nodeTab == NULL) { 1865 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1866 ctxt->nodeNr = 0; 1867 ctxt->nodeMax = 0; 1868 ctxt->node = NULL; 1869 ctxt->inputNr = 0; 1870 ctxt->inputMax = 0; 1871 ctxt->input = NULL; 1872 return(-1); 1873 } 1874 ctxt->nodeNr = 0; 1875 ctxt->node = NULL; 1876 1877 /* Allocate the Name stack */ 1878 if (ctxt->nameTab == NULL) { 1879 ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *)); 1880 ctxt->nameMax = 10; 1881 } 1882 if (ctxt->nameTab == NULL) { 1883 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1884 ctxt->nodeNr = 0; 1885 ctxt->nodeMax = 0; 1886 ctxt->node = NULL; 1887 ctxt->inputNr = 0; 1888 ctxt->inputMax = 0; 1889 ctxt->input = NULL; 1890 ctxt->nameNr = 0; 1891 ctxt->nameMax = 0; 1892 ctxt->name = NULL; 1893 return(-1); 1894 } 1895 ctxt->nameNr = 0; 1896 ctxt->name = NULL; 1897 1898 /* Allocate the space stack */ 1899 if (ctxt->spaceTab == NULL) { 1900 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int)); 1901 ctxt->spaceMax = 10; 1902 } 1903 if (ctxt->spaceTab == NULL) { 1904 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1905 ctxt->nodeNr = 0; 1906 ctxt->nodeMax = 0; 1907 ctxt->node = NULL; 1908 ctxt->inputNr = 0; 1909 ctxt->inputMax = 0; 1910 ctxt->input = NULL; 1911 ctxt->nameNr = 0; 1912 ctxt->nameMax = 0; 1913 ctxt->name = NULL; 1914 ctxt->spaceNr = 0; 1915 ctxt->spaceMax = 0; 1916 ctxt->space = NULL; 1917 return(-1); 1918 } 1919 ctxt->spaceNr = 1; 1920 ctxt->spaceMax = 10; 1921 ctxt->spaceTab[0] = -1; 1922 ctxt->space = &ctxt->spaceTab[0]; 1923 ctxt->myDoc = NULL; 1924 ctxt->wellFormed = 1; 1925 ctxt->nsWellFormed = 1; 1926 ctxt->valid = 1; 1927 ctxt->loadsubset = xmlLoadExtDtdDefaultValue; 1928 if (ctxt->loadsubset) { 1929 ctxt->options |= XML_PARSE_DTDLOAD; 1930 } 1931 ctxt->validate = xmlDoValidityCheckingDefaultValue; 1932 ctxt->pedantic = xmlPedanticParserDefaultValue; 1933 if (ctxt->pedantic) { 1934 ctxt->options |= XML_PARSE_PEDANTIC; 1935 } 1936 ctxt->linenumbers = xmlLineNumbersDefaultValue; 1937 ctxt->keepBlanks = xmlKeepBlanksDefaultValue; 1938 if (ctxt->keepBlanks == 0) { 1939 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; 1940 ctxt->options |= XML_PARSE_NOBLANKS; 1941 } 1942 1943 ctxt->vctxt.flags = XML_VCTXT_USE_PCTXT; 1944 ctxt->vctxt.userData = ctxt; 1945 ctxt->vctxt.error = xmlParserValidityError; 1946 ctxt->vctxt.warning = xmlParserValidityWarning; 1947 if (ctxt->validate) { 1948 if (xmlGetWarningsDefaultValue == 0) 1949 ctxt->vctxt.warning = NULL; 1950 else 1951 ctxt->vctxt.warning = xmlParserValidityWarning; 1952 ctxt->vctxt.nodeMax = 0; 1953 ctxt->options |= XML_PARSE_DTDVALID; 1954 } 1955 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue; 1956 if (ctxt->replaceEntities) { 1957 ctxt->options |= XML_PARSE_NOENT; 1958 } 1959 ctxt->record_info = 0; 1960 ctxt->checkIndex = 0; 1961 ctxt->inSubset = 0; 1962 ctxt->errNo = XML_ERR_OK; 1963 ctxt->depth = 0; 1964 ctxt->catalogs = NULL; 1965 ctxt->sizeentities = 0; 1966 ctxt->sizeentcopy = 0; 1967 ctxt->input_id = 1; 1968 ctxt->maxAmpl = XML_MAX_AMPLIFICATION_DEFAULT; 1969 xmlInitNodeInfoSeq(&ctxt->node_seq); 1970 1971 if (ctxt->nsdb == NULL) { 1972 ctxt->nsdb = xmlParserNsCreate(); 1973 if (ctxt->nsdb == NULL) { 1974 xmlErrMemory(ctxt, NULL); 1975 return(-1); 1976 } 1977 } 1978 1979 return(0); 1980} 1981 1982/** 1983 * xmlInitParserCtxt: 1984 * @ctxt: an XML parser context 1985 * 1986 * DEPRECATED: Internal function which will be made private in a future 1987 * version. 1988 * 1989 * Initialize a parser context 1990 * 1991 * Returns 0 in case of success and -1 in case of error 1992 */ 1993 1994int 1995xmlInitParserCtxt(xmlParserCtxtPtr ctxt) 1996{ 1997 return(xmlInitSAXParserCtxt(ctxt, NULL, NULL)); 1998} 1999 2000/** 2001 * xmlFreeParserCtxt: 2002 * @ctxt: an XML parser context 2003 * 2004 * Free all the memory used by a parser context. However the parsed 2005 * document in ctxt->myDoc is not freed. 2006 */ 2007 2008void 2009xmlFreeParserCtxt(xmlParserCtxtPtr ctxt) 2010{ 2011 xmlParserInputPtr input; 2012 2013 if (ctxt == NULL) return; 2014 2015 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 2016 xmlFreeInputStream(input); 2017 } 2018 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab); 2019 if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab); 2020 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab); 2021 if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab); 2022 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab); 2023 if (ctxt->version != NULL) xmlFree((char *) ctxt->version); 2024 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding); 2025 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI); 2026 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem); 2027#ifdef LIBXML_SAX1_ENABLED 2028 if ((ctxt->sax != NULL) && 2029 (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)) 2030#else 2031 if (ctxt->sax != NULL) 2032#endif /* LIBXML_SAX1_ENABLED */ 2033 xmlFree(ctxt->sax); 2034 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory); 2035 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab); 2036 if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts); 2037 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 2038 if (ctxt->nsTab != NULL) xmlFree(ctxt->nsTab); 2039 if (ctxt->nsdb != NULL) xmlParserNsFree(ctxt->nsdb); 2040 if (ctxt->attrHash != NULL) xmlFree(ctxt->attrHash); 2041 if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab); 2042 if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs); 2043 if (ctxt->attsDefault != NULL) 2044 xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator); 2045 if (ctxt->attsSpecial != NULL) 2046 xmlHashFree(ctxt->attsSpecial, NULL); 2047 if (ctxt->freeElems != NULL) { 2048 xmlNodePtr cur, next; 2049 2050 cur = ctxt->freeElems; 2051 while (cur != NULL) { 2052 next = cur->next; 2053 xmlFree(cur); 2054 cur = next; 2055 } 2056 } 2057 if (ctxt->freeAttrs != NULL) { 2058 xmlAttrPtr cur, next; 2059 2060 cur = ctxt->freeAttrs; 2061 while (cur != NULL) { 2062 next = cur->next; 2063 xmlFree(cur); 2064 cur = next; 2065 } 2066 } 2067 /* 2068 * cleanup the error strings 2069 */ 2070 if (ctxt->lastError.message != NULL) 2071 xmlFree(ctxt->lastError.message); 2072 if (ctxt->lastError.file != NULL) 2073 xmlFree(ctxt->lastError.file); 2074 if (ctxt->lastError.str1 != NULL) 2075 xmlFree(ctxt->lastError.str1); 2076 if (ctxt->lastError.str2 != NULL) 2077 xmlFree(ctxt->lastError.str2); 2078 if (ctxt->lastError.str3 != NULL) 2079 xmlFree(ctxt->lastError.str3); 2080 2081#ifdef LIBXML_CATALOG_ENABLED 2082 if (ctxt->catalogs != NULL) 2083 xmlCatalogFreeLocal(ctxt->catalogs); 2084#endif 2085 xmlFree(ctxt); 2086} 2087 2088/** 2089 * xmlNewParserCtxt: 2090 * 2091 * Allocate and initialize a new parser context. 2092 * 2093 * Returns the xmlParserCtxtPtr or NULL 2094 */ 2095 2096xmlParserCtxtPtr 2097xmlNewParserCtxt(void) 2098{ 2099 return(xmlNewSAXParserCtxt(NULL, NULL)); 2100} 2101 2102/** 2103 * xmlNewSAXParserCtxt: 2104 * @sax: SAX handler 2105 * @userData: user data 2106 * 2107 * Allocate and initialize a new SAX parser context. If userData is NULL, 2108 * the parser context will be passed as user data. 2109 * 2110 * Returns the xmlParserCtxtPtr or NULL if memory allocation failed. 2111 */ 2112 2113xmlParserCtxtPtr 2114xmlNewSAXParserCtxt(const xmlSAXHandler *sax, void *userData) 2115{ 2116 xmlParserCtxtPtr ctxt; 2117 2118 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt)); 2119 if (ctxt == NULL) { 2120 xmlErrMemory(NULL, "cannot allocate parser context\n"); 2121 return(NULL); 2122 } 2123 memset(ctxt, 0, sizeof(xmlParserCtxt)); 2124 if (xmlInitSAXParserCtxt(ctxt, sax, userData) < 0) { 2125 xmlFreeParserCtxt(ctxt); 2126 return(NULL); 2127 } 2128 return(ctxt); 2129} 2130 2131/************************************************************************ 2132 * * 2133 * Handling of node information * 2134 * * 2135 ************************************************************************/ 2136 2137/** 2138 * xmlClearParserCtxt: 2139 * @ctxt: an XML parser context 2140 * 2141 * Clear (release owned resources) and reinitialize a parser context 2142 */ 2143 2144void 2145xmlClearParserCtxt(xmlParserCtxtPtr ctxt) 2146{ 2147 if (ctxt==NULL) 2148 return; 2149 xmlClearNodeInfoSeq(&ctxt->node_seq); 2150 xmlCtxtReset(ctxt); 2151} 2152 2153 2154/** 2155 * xmlParserFindNodeInfo: 2156 * @ctx: an XML parser context 2157 * @node: an XML node within the tree 2158 * 2159 * DEPRECATED: Don't use. 2160 * 2161 * Find the parser node info struct for a given node 2162 * 2163 * Returns an xmlParserNodeInfo block pointer or NULL 2164 */ 2165const xmlParserNodeInfo * 2166xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx, const xmlNodePtr node) 2167{ 2168 unsigned long pos; 2169 2170 if ((ctx == NULL) || (node == NULL)) 2171 return (NULL); 2172 /* Find position where node should be at */ 2173 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node); 2174 if (pos < ctx->node_seq.length 2175 && ctx->node_seq.buffer[pos].node == node) 2176 return &ctx->node_seq.buffer[pos]; 2177 else 2178 return NULL; 2179} 2180 2181 2182/** 2183 * xmlInitNodeInfoSeq: 2184 * @seq: a node info sequence pointer 2185 * 2186 * DEPRECATED: Don't use. 2187 * 2188 * -- Initialize (set to initial state) node info sequence 2189 */ 2190void 2191xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq) 2192{ 2193 if (seq == NULL) 2194 return; 2195 seq->length = 0; 2196 seq->maximum = 0; 2197 seq->buffer = NULL; 2198} 2199 2200/** 2201 * xmlClearNodeInfoSeq: 2202 * @seq: a node info sequence pointer 2203 * 2204 * DEPRECATED: Don't use. 2205 * 2206 * -- Clear (release memory and reinitialize) node 2207 * info sequence 2208 */ 2209void 2210xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq) 2211{ 2212 if (seq == NULL) 2213 return; 2214 if (seq->buffer != NULL) 2215 xmlFree(seq->buffer); 2216 xmlInitNodeInfoSeq(seq); 2217} 2218 2219/** 2220 * xmlParserFindNodeInfoIndex: 2221 * @seq: a node info sequence pointer 2222 * @node: an XML node pointer 2223 * 2224 * DEPRECATED: Don't use. 2225 * 2226 * xmlParserFindNodeInfoIndex : Find the index that the info record for 2227 * the given node is or should be at in a sorted sequence 2228 * 2229 * Returns a long indicating the position of the record 2230 */ 2231unsigned long 2232xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq, 2233 const xmlNodePtr node) 2234{ 2235 unsigned long upper, lower, middle; 2236 int found = 0; 2237 2238 if ((seq == NULL) || (node == NULL)) 2239 return ((unsigned long) -1); 2240 2241 /* Do a binary search for the key */ 2242 lower = 1; 2243 upper = seq->length; 2244 middle = 0; 2245 while (lower <= upper && !found) { 2246 middle = lower + (upper - lower) / 2; 2247 if (node == seq->buffer[middle - 1].node) 2248 found = 1; 2249 else if (node < seq->buffer[middle - 1].node) 2250 upper = middle - 1; 2251 else 2252 lower = middle + 1; 2253 } 2254 2255 /* Return position */ 2256 if (middle == 0 || seq->buffer[middle - 1].node < node) 2257 return middle; 2258 else 2259 return middle - 1; 2260} 2261 2262 2263/** 2264 * xmlParserAddNodeInfo: 2265 * @ctxt: an XML parser context 2266 * @info: a node info sequence pointer 2267 * 2268 * DEPRECATED: Don't use. 2269 * 2270 * Insert node info record into the sorted sequence 2271 */ 2272void 2273xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt, 2274 const xmlParserNodeInfoPtr info) 2275{ 2276 unsigned long pos; 2277 2278 if ((ctxt == NULL) || (info == NULL)) return; 2279 2280 /* Find pos and check to see if node is already in the sequence */ 2281 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr) 2282 info->node); 2283 2284 if ((pos < ctxt->node_seq.length) && 2285 (ctxt->node_seq.buffer != NULL) && 2286 (ctxt->node_seq.buffer[pos].node == info->node)) { 2287 ctxt->node_seq.buffer[pos] = *info; 2288 } 2289 2290 /* Otherwise, we need to add new node to buffer */ 2291 else { 2292 if ((ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) || 2293 (ctxt->node_seq.buffer == NULL)) { 2294 xmlParserNodeInfo *tmp_buffer; 2295 unsigned int byte_size; 2296 2297 if (ctxt->node_seq.maximum == 0) 2298 ctxt->node_seq.maximum = 2; 2299 byte_size = (sizeof(*ctxt->node_seq.buffer) * 2300 (2 * ctxt->node_seq.maximum)); 2301 2302 if (ctxt->node_seq.buffer == NULL) 2303 tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size); 2304 else 2305 tmp_buffer = 2306 (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer, 2307 byte_size); 2308 2309 if (tmp_buffer == NULL) { 2310 xmlErrMemory(ctxt, "failed to allocate buffer\n"); 2311 return; 2312 } 2313 ctxt->node_seq.buffer = tmp_buffer; 2314 ctxt->node_seq.maximum *= 2; 2315 } 2316 2317 /* If position is not at end, move elements out of the way */ 2318 if (pos != ctxt->node_seq.length) { 2319 unsigned long i; 2320 2321 for (i = ctxt->node_seq.length; i > pos; i--) 2322 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1]; 2323 } 2324 2325 /* Copy element and increase length */ 2326 ctxt->node_seq.buffer[pos] = *info; 2327 ctxt->node_seq.length++; 2328 } 2329} 2330 2331/************************************************************************ 2332 * * 2333 * Defaults settings * 2334 * * 2335 ************************************************************************/ 2336/** 2337 * xmlPedanticParserDefault: 2338 * @val: int 0 or 1 2339 * 2340 * DEPRECATED: Use the modern options API with XML_PARSE_PEDANTIC. 2341 * 2342 * Set and return the previous value for enabling pedantic warnings. 2343 * 2344 * Returns the last value for 0 for no substitution, 1 for substitution. 2345 */ 2346 2347int 2348xmlPedanticParserDefault(int val) { 2349 int old = xmlPedanticParserDefaultValue; 2350 2351 xmlPedanticParserDefaultValue = val; 2352 return(old); 2353} 2354 2355/** 2356 * xmlLineNumbersDefault: 2357 * @val: int 0 or 1 2358 * 2359 * DEPRECATED: The modern options API always enables line numbers. 2360 * 2361 * Set and return the previous value for enabling line numbers in elements 2362 * contents. This may break on old application and is turned off by default. 2363 * 2364 * Returns the last value for 0 for no substitution, 1 for substitution. 2365 */ 2366 2367int 2368xmlLineNumbersDefault(int val) { 2369 int old = xmlLineNumbersDefaultValue; 2370 2371 xmlLineNumbersDefaultValue = val; 2372 return(old); 2373} 2374 2375/** 2376 * xmlSubstituteEntitiesDefault: 2377 * @val: int 0 or 1 2378 * 2379 * DEPRECATED: Use the modern options API with XML_PARSE_NOENT. 2380 * 2381 * Set and return the previous value for default entity support. 2382 * Initially the parser always keep entity references instead of substituting 2383 * entity values in the output. This function has to be used to change the 2384 * default parser behavior 2385 * SAX::substituteEntities() has to be used for changing that on a file by 2386 * file basis. 2387 * 2388 * Returns the last value for 0 for no substitution, 1 for substitution. 2389 */ 2390 2391int 2392xmlSubstituteEntitiesDefault(int val) { 2393 int old = xmlSubstituteEntitiesDefaultValue; 2394 2395 xmlSubstituteEntitiesDefaultValue = val; 2396 return(old); 2397} 2398 2399/** 2400 * xmlKeepBlanksDefault: 2401 * @val: int 0 or 1 2402 * 2403 * DEPRECATED: Use the modern options API with XML_PARSE_NOBLANKS. 2404 * 2405 * Set and return the previous value for default blanks text nodes support. 2406 * The 1.x version of the parser used an heuristic to try to detect 2407 * ignorable white spaces. As a result the SAX callback was generating 2408 * xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when 2409 * using the DOM output text nodes containing those blanks were not generated. 2410 * The 2.x and later version will switch to the XML standard way and 2411 * ignorableWhitespace() are only generated when running the parser in 2412 * validating mode and when the current element doesn't allow CDATA or 2413 * mixed content. 2414 * This function is provided as a way to force the standard behavior 2415 * on 1.X libs and to switch back to the old mode for compatibility when 2416 * running 1.X client code on 2.X . Upgrade of 1.X code should be done 2417 * by using xmlIsBlankNode() commodity function to detect the "empty" 2418 * nodes generated. 2419 * This value also affect autogeneration of indentation when saving code 2420 * if blanks sections are kept, indentation is not generated. 2421 * 2422 * Returns the last value for 0 for no substitution, 1 for substitution. 2423 */ 2424 2425int 2426xmlKeepBlanksDefault(int val) { 2427 int old = xmlKeepBlanksDefaultValue; 2428 2429 xmlKeepBlanksDefaultValue = val; 2430#ifdef LIBXML_OUTPUT_ENABLED 2431 if (!val) 2432 xmlIndentTreeOutput = 1; 2433#endif 2434 return(old); 2435}