Reactos
at master 15148 lines 431 kB view raw
1/* 2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly 3 * implemented on top of the SAX interfaces 4 * 5 * References: 6 * The XML specification: 7 * http://www.w3.org/TR/REC-xml 8 * Original 1.0 version: 9 * http://www.w3.org/TR/1998/REC-xml-19980210 10 * XML second edition working draft 11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814 12 * 13 * Okay this is a big file, the parser core is around 7000 lines, then it 14 * is followed by the progressive parser top routines, then the various 15 * high level APIs to call the parser and a few miscellaneous functions. 16 * A number of helper functions and deprecated ones have been moved to 17 * parserInternals.c to reduce this file size. 18 * As much as possible the functions are associated with their relative 19 * production in the XML specification. A few productions defining the 20 * different ranges of character are actually implanted either in 21 * parserInternals.h or parserInternals.c 22 * The DOM tree build is realized from the default SAX callbacks in 23 * the module SAX.c. 24 * The routines doing the validation checks are in valid.c and called either 25 * from the SAX callbacks or as standalone functions using a preparsed 26 * document. 27 * 28 * See Copyright for the status of this software. 29 * 30 * daniel@veillard.com 31 */ 32 33/* To avoid EBCDIC trouble when parsing on zOS */ 34#if defined(__MVS__) 35#pragma convert("ISO8859-1") 36#endif 37 38#define IN_LIBXML 39#include "libxml.h" 40 41#if defined(_WIN32) 42#define XML_DIR_SEP '\\' 43#else 44#define XML_DIR_SEP '/' 45#endif 46 47#include <stdlib.h> 48#include <limits.h> 49#include <string.h> 50#include <stdarg.h> 51#include <stddef.h> 52#include <ctype.h> 53#include <stdlib.h> 54#include <libxml/parser.h> 55#include <libxml/xmlmemory.h> 56#include <libxml/tree.h> 57#include <libxml/parserInternals.h> 58#include <libxml/valid.h> 59#include <libxml/entities.h> 60#include <libxml/xmlerror.h> 61#include <libxml/encoding.h> 62#include <libxml/xmlIO.h> 63#include <libxml/uri.h> 64#include <libxml/SAX2.h> 65#ifdef LIBXML_CATALOG_ENABLED 66#include <libxml/catalog.h> 67#endif 68 69#include "private/buf.h" 70#include "private/dict.h" 71#include "private/entities.h" 72#include "private/error.h" 73#include "private/html.h" 74#include "private/io.h" 75#include "private/parser.h" 76 77#define NS_INDEX_EMPTY INT_MAX 78#define NS_INDEX_XML (INT_MAX - 1) 79#define URI_HASH_EMPTY 0xD943A04E 80#define URI_HASH_XML 0xF0451F02 81 82struct _xmlStartTag { 83 const xmlChar *prefix; 84 const xmlChar *URI; 85 int line; 86 int nsNr; 87}; 88 89typedef struct { 90 void *saxData; 91 unsigned prefixHashValue; 92 unsigned uriHashValue; 93 unsigned elementId; 94 int oldIndex; 95} xmlParserNsExtra; 96 97typedef struct { 98 unsigned hashValue; 99 int index; 100} xmlParserNsBucket; 101 102struct _xmlParserNsData { 103 xmlParserNsExtra *extra; 104 105 unsigned hashSize; 106 unsigned hashElems; 107 xmlParserNsBucket *hash; 108 109 unsigned elementId; 110 int defaultNsIndex; 111}; 112 113struct _xmlAttrHashBucket { 114 int index; 115}; 116 117static xmlParserCtxtPtr 118xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData, 119 const xmlChar *URL, const xmlChar *ID, const xmlChar *base, 120 xmlParserCtxtPtr pctx); 121 122static int 123xmlParseElementStart(xmlParserCtxtPtr ctxt); 124 125static void 126xmlParseElementEnd(xmlParserCtxtPtr ctxt); 127 128/************************************************************************ 129 * * 130 * Arbitrary limits set in the parser. See XML_PARSE_HUGE * 131 * * 132 ************************************************************************/ 133 134#define XML_PARSER_BIG_ENTITY 1000 135#define XML_PARSER_LOT_ENTITY 5000 136 137/* 138 * Constants for protection against abusive entity expansion 139 * ("billion laughs"). 140 */ 141 142/* 143 * A certain amount of entity expansion which is always allowed. 144 */ 145#define XML_PARSER_ALLOWED_EXPANSION 1000000 146 147/* 148 * Fixed cost for each entity reference. This crudely models processing time 149 * as well to protect, for example, against exponential expansion of empty 150 * or very short entities. 151 */ 152#define XML_ENT_FIXED_COST 20 153 154/** 155 * xmlParserMaxDepth: 156 * 157 * arbitrary depth limit for the XML documents that we allow to 158 * process. This is not a limitation of the parser but a safety 159 * boundary feature. It can be disabled with the XML_PARSE_HUGE 160 * parser option. 161 */ 162unsigned int xmlParserMaxDepth = 256; 163 164 165 166#define XML_PARSER_BIG_BUFFER_SIZE 300 167#define XML_PARSER_BUFFER_SIZE 100 168#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document" 169 170/** 171 * XML_PARSER_CHUNK_SIZE 172 * 173 * When calling GROW that's the minimal amount of data 174 * the parser expected to have received. It is not a hard 175 * limit but an optimization when reading strings like Names 176 * It is not strictly needed as long as inputs available characters 177 * are followed by 0, which should be provided by the I/O level 178 */ 179#define XML_PARSER_CHUNK_SIZE 100 180 181/** 182 * xmlParserVersion: 183 * 184 * Constant string describing the internal version of the library 185 */ 186const char *const 187xmlParserVersion = LIBXML_VERSION_STRING LIBXML_VERSION_EXTRA; 188 189/* 190 * List of XML prefixed PI allowed by W3C specs 191 */ 192 193static const char* const xmlW3CPIs[] = { 194 "xml-stylesheet", 195 "xml-model", 196 NULL 197}; 198 199 200/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */ 201static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, 202 const xmlChar **str); 203 204static xmlParserErrors 205xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 206 xmlSAXHandlerPtr sax, 207 void *user_data, int depth, const xmlChar *URL, 208 const xmlChar *ID, xmlNodePtr *list); 209 210static int 211xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options); 212#ifdef LIBXML_LEGACY_ENABLED 213static void 214xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 215 xmlNodePtr lastNode); 216#endif /* LIBXML_LEGACY_ENABLED */ 217 218static xmlParserErrors 219xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 220 const xmlChar *string, void *user_data, xmlNodePtr *lst); 221 222static int 223xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity); 224 225/************************************************************************ 226 * * 227 * Some factorized error routines * 228 * * 229 ************************************************************************/ 230 231/** 232 * xmlErrAttributeDup: 233 * @ctxt: an XML parser context 234 * @prefix: the attribute prefix 235 * @localname: the attribute localname 236 * 237 * Handle a redefinition of attribute error 238 */ 239static void 240xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix, 241 const xmlChar * localname) 242{ 243 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 244 (ctxt->instate == XML_PARSER_EOF)) 245 return; 246 if (ctxt != NULL) 247 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED; 248 249 if (prefix == NULL) 250 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 251 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, 252 (const char *) localname, NULL, NULL, 0, 0, 253 "Attribute %s redefined\n", localname); 254 else 255 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 256 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, 257 (const char *) prefix, (const char *) localname, 258 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix, 259 localname); 260 if (ctxt != NULL) { 261 ctxt->wellFormed = 0; 262 if (ctxt->recovery == 0) 263 ctxt->disableSAX = 1; 264 } 265} 266 267/** 268 * xmlFatalErrMsg: 269 * @ctxt: an XML parser context 270 * @error: the error number 271 * @msg: the error message 272 * 273 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 274 */ 275static void LIBXML_ATTR_FORMAT(3,0) 276xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 277 const char *msg) 278{ 279 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 280 (ctxt->instate == XML_PARSER_EOF)) 281 return; 282 if (ctxt != NULL) 283 ctxt->errNo = error; 284 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 285 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg); 286 if (ctxt != NULL) { 287 ctxt->wellFormed = 0; 288 if (ctxt->recovery == 0) 289 ctxt->disableSAX = 1; 290 } 291} 292 293/** 294 * xmlWarningMsg: 295 * @ctxt: an XML parser context 296 * @error: the error number 297 * @msg: the error message 298 * @str1: extra data 299 * @str2: extra data 300 * 301 * Handle a warning. 302 */ 303void LIBXML_ATTR_FORMAT(3,0) 304xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 305 const char *msg, const xmlChar *str1, const xmlChar *str2) 306{ 307 xmlStructuredErrorFunc schannel = NULL; 308 309 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 310 (ctxt->instate == XML_PARSER_EOF)) 311 return; 312 if ((ctxt != NULL) && (ctxt->sax != NULL) && 313 (ctxt->sax->initialized == XML_SAX2_MAGIC)) 314 schannel = ctxt->sax->serror; 315 if (ctxt != NULL) { 316 __xmlRaiseError(schannel, 317 (ctxt->sax) ? ctxt->sax->warning : NULL, 318 ctxt->userData, 319 ctxt, NULL, XML_FROM_PARSER, error, 320 XML_ERR_WARNING, NULL, 0, 321 (const char *) str1, (const char *) str2, NULL, 0, 0, 322 msg, (const char *) str1, (const char *) str2); 323 } else { 324 __xmlRaiseError(schannel, NULL, NULL, 325 ctxt, NULL, XML_FROM_PARSER, error, 326 XML_ERR_WARNING, NULL, 0, 327 (const char *) str1, (const char *) str2, NULL, 0, 0, 328 msg, (const char *) str1, (const char *) str2); 329 } 330} 331 332/** 333 * xmlValidityError: 334 * @ctxt: an XML parser context 335 * @error: the error number 336 * @msg: the error message 337 * @str1: extra data 338 * 339 * Handle a validity error. 340 */ 341static void LIBXML_ATTR_FORMAT(3,0) 342xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error, 343 const char *msg, const xmlChar *str1, const xmlChar *str2) 344{ 345 xmlStructuredErrorFunc schannel = NULL; 346 347 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 348 (ctxt->instate == XML_PARSER_EOF)) 349 return; 350 if (ctxt != NULL) { 351 ctxt->errNo = error; 352 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC)) 353 schannel = ctxt->sax->serror; 354 } 355 if (ctxt != NULL) { 356 __xmlRaiseError(schannel, 357 ctxt->vctxt.error, ctxt->vctxt.userData, 358 ctxt, NULL, XML_FROM_DTD, error, 359 XML_ERR_ERROR, NULL, 0, (const char *) str1, 360 (const char *) str2, NULL, 0, 0, 361 msg, (const char *) str1, (const char *) str2); 362 ctxt->valid = 0; 363 } else { 364 __xmlRaiseError(schannel, NULL, NULL, 365 ctxt, NULL, XML_FROM_DTD, error, 366 XML_ERR_ERROR, NULL, 0, (const char *) str1, 367 (const char *) str2, NULL, 0, 0, 368 msg, (const char *) str1, (const char *) str2); 369 } 370} 371 372/** 373 * xmlFatalErrMsgInt: 374 * @ctxt: an XML parser context 375 * @error: the error number 376 * @msg: the error message 377 * @val: an integer value 378 * 379 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 380 */ 381static void LIBXML_ATTR_FORMAT(3,0) 382xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, 383 const char *msg, int val) 384{ 385 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 386 (ctxt->instate == XML_PARSER_EOF)) 387 return; 388 if (ctxt != NULL) 389 ctxt->errNo = error; 390 __xmlRaiseError(NULL, NULL, NULL, 391 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 392 NULL, 0, NULL, NULL, NULL, val, 0, msg, val); 393 if (ctxt != NULL) { 394 ctxt->wellFormed = 0; 395 if (ctxt->recovery == 0) 396 ctxt->disableSAX = 1; 397 } 398} 399 400/** 401 * xmlFatalErrMsgStrIntStr: 402 * @ctxt: an XML parser context 403 * @error: the error number 404 * @msg: the error message 405 * @str1: an string info 406 * @val: an integer value 407 * @str2: an string info 408 * 409 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 410 */ 411static void LIBXML_ATTR_FORMAT(3,0) 412xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 413 const char *msg, const xmlChar *str1, int val, 414 const xmlChar *str2) 415{ 416 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 417 (ctxt->instate == XML_PARSER_EOF)) 418 return; 419 if (ctxt != NULL) 420 ctxt->errNo = error; 421 __xmlRaiseError(NULL, NULL, NULL, 422 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 423 NULL, 0, (const char *) str1, (const char *) str2, 424 NULL, val, 0, msg, str1, val, str2); 425 if (ctxt != NULL) { 426 ctxt->wellFormed = 0; 427 if (ctxt->recovery == 0) 428 ctxt->disableSAX = 1; 429 } 430} 431 432/** 433 * xmlFatalErrMsgStr: 434 * @ctxt: an XML parser context 435 * @error: the error number 436 * @msg: the error message 437 * @val: a string value 438 * 439 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 440 */ 441static void LIBXML_ATTR_FORMAT(3,0) 442xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 443 const char *msg, const xmlChar * val) 444{ 445 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 446 (ctxt->instate == XML_PARSER_EOF)) 447 return; 448 if (ctxt != NULL) 449 ctxt->errNo = error; 450 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 451 XML_FROM_PARSER, error, XML_ERR_FATAL, 452 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 453 val); 454 if (ctxt != NULL) { 455 ctxt->wellFormed = 0; 456 if (ctxt->recovery == 0) 457 ctxt->disableSAX = 1; 458 } 459} 460 461/** 462 * xmlErrMsgStr: 463 * @ctxt: an XML parser context 464 * @error: the error number 465 * @msg: the error message 466 * @val: a string value 467 * 468 * Handle a non fatal parser error 469 */ 470static void LIBXML_ATTR_FORMAT(3,0) 471xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 472 const char *msg, const xmlChar * val) 473{ 474 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 475 (ctxt->instate == XML_PARSER_EOF)) 476 return; 477 if (ctxt != NULL) 478 ctxt->errNo = error; 479 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 480 XML_FROM_PARSER, error, XML_ERR_ERROR, 481 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 482 val); 483} 484 485/** 486 * xmlNsErr: 487 * @ctxt: an XML parser context 488 * @error: the error number 489 * @msg: the message 490 * @info1: extra information string 491 * @info2: extra information string 492 * 493 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 494 */ 495static void LIBXML_ATTR_FORMAT(3,0) 496xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 497 const char *msg, 498 const xmlChar * info1, const xmlChar * info2, 499 const xmlChar * info3) 500{ 501 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 502 (ctxt->instate == XML_PARSER_EOF)) 503 return; 504 if (ctxt != NULL) 505 ctxt->errNo = error; 506 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, 507 XML_ERR_ERROR, NULL, 0, (const char *) info1, 508 (const char *) info2, (const char *) info3, 0, 0, msg, 509 info1, info2, info3); 510 if (ctxt != NULL) 511 ctxt->nsWellFormed = 0; 512} 513 514/** 515 * xmlNsWarn 516 * @ctxt: an XML parser context 517 * @error: the error number 518 * @msg: the message 519 * @info1: extra information string 520 * @info2: extra information string 521 * 522 * Handle a namespace warning error 523 */ 524static void LIBXML_ATTR_FORMAT(3,0) 525xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error, 526 const char *msg, 527 const xmlChar * info1, const xmlChar * info2, 528 const xmlChar * info3) 529{ 530 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 531 (ctxt->instate == XML_PARSER_EOF)) 532 return; 533 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, 534 XML_ERR_WARNING, NULL, 0, (const char *) info1, 535 (const char *) info2, (const char *) info3, 0, 0, msg, 536 info1, info2, info3); 537} 538 539static void 540xmlSaturatedAdd(unsigned long *dst, unsigned long val) { 541 if (val > ULONG_MAX - *dst) 542 *dst = ULONG_MAX; 543 else 544 *dst += val; 545} 546 547static void 548xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) { 549 if (val > ULONG_MAX - *dst) 550 *dst = ULONG_MAX; 551 else 552 *dst += val; 553} 554 555/** 556 * xmlParserEntityCheck: 557 * @ctxt: parser context 558 * @extra: sum of unexpanded entity sizes 559 * 560 * Check for non-linear entity expansion behaviour. 561 * 562 * In some cases like xmlStringDecodeEntities, this function is called 563 * for each, possibly nested entity and its unexpanded content length. 564 * 565 * In other cases like xmlParseReference, it's only called for each 566 * top-level entity with its unexpanded content length plus the sum of 567 * the unexpanded content lengths (plus fixed cost) of all nested 568 * entities. 569 * 570 * Summing the unexpanded lengths also adds the length of the reference. 571 * This is by design. Taking the length of the entity name into account 572 * discourages attacks that try to waste CPU time with abusively long 573 * entity names. See test/recurse/lol6.xml for example. Each call also 574 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with 575 * short entities. 576 * 577 * Returns 1 on error, 0 on success. 578 */ 579static int 580xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra) 581{ 582 unsigned long consumed; 583 xmlParserInputPtr input = ctxt->input; 584 xmlEntityPtr entity = input->entity; 585 586 /* 587 * Compute total consumed bytes so far, including input streams of 588 * external entities. 589 */ 590 consumed = input->parentConsumed; 591 if ((entity == NULL) || 592 ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 593 ((entity->flags & XML_ENT_PARSED) == 0))) { 594 xmlSaturatedAdd(&consumed, input->consumed); 595 xmlSaturatedAddSizeT(&consumed, input->cur - input->base); 596 } 597 xmlSaturatedAdd(&consumed, ctxt->sizeentities); 598 599 /* 600 * Add extra cost and some fixed cost. 601 */ 602 xmlSaturatedAdd(&ctxt->sizeentcopy, extra); 603 xmlSaturatedAdd(&ctxt->sizeentcopy, XML_ENT_FIXED_COST); 604 605 /* 606 * It's important to always use saturation arithmetic when tracking 607 * entity sizes to make the size checks reliable. If "sizeentcopy" 608 * overflows, we have to abort. 609 */ 610 if ((ctxt->sizeentcopy > XML_PARSER_ALLOWED_EXPANSION) && 611 ((ctxt->sizeentcopy >= ULONG_MAX) || 612 (ctxt->sizeentcopy / ctxt->maxAmpl > consumed))) { 613 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP, 614 "Maximum entity amplification factor exceeded, see " 615 "xmlCtxtSetMaxAmplification.\n"); 616 xmlHaltParser(ctxt); 617 return(1); 618 } 619 620 return(0); 621} 622 623/************************************************************************ 624 * * 625 * Library wide options * 626 * * 627 ************************************************************************/ 628 629/** 630 * xmlHasFeature: 631 * @feature: the feature to be examined 632 * 633 * Examines if the library has been compiled with a given feature. 634 * 635 * Returns a non-zero value if the feature exist, otherwise zero. 636 * Returns zero (0) if the feature does not exist or an unknown 637 * unknown feature is requested, non-zero otherwise. 638 */ 639int 640xmlHasFeature(xmlFeature feature) 641{ 642 switch (feature) { 643 case XML_WITH_THREAD: 644#ifdef LIBXML_THREAD_ENABLED 645 return(1); 646#else 647 return(0); 648#endif 649 case XML_WITH_TREE: 650#ifdef LIBXML_TREE_ENABLED 651 return(1); 652#else 653 return(0); 654#endif 655 case XML_WITH_OUTPUT: 656#ifdef LIBXML_OUTPUT_ENABLED 657 return(1); 658#else 659 return(0); 660#endif 661 case XML_WITH_PUSH: 662#ifdef LIBXML_PUSH_ENABLED 663 return(1); 664#else 665 return(0); 666#endif 667 case XML_WITH_READER: 668#ifdef LIBXML_READER_ENABLED 669 return(1); 670#else 671 return(0); 672#endif 673 case XML_WITH_PATTERN: 674#ifdef LIBXML_PATTERN_ENABLED 675 return(1); 676#else 677 return(0); 678#endif 679 case XML_WITH_WRITER: 680#ifdef LIBXML_WRITER_ENABLED 681 return(1); 682#else 683 return(0); 684#endif 685 case XML_WITH_SAX1: 686#ifdef LIBXML_SAX1_ENABLED 687 return(1); 688#else 689 return(0); 690#endif 691 case XML_WITH_FTP: 692#ifdef LIBXML_FTP_ENABLED 693 return(1); 694#else 695 return(0); 696#endif 697 case XML_WITH_HTTP: 698#ifdef LIBXML_HTTP_ENABLED 699 return(1); 700#else 701 return(0); 702#endif 703 case XML_WITH_VALID: 704#ifdef LIBXML_VALID_ENABLED 705 return(1); 706#else 707 return(0); 708#endif 709 case XML_WITH_HTML: 710#ifdef LIBXML_HTML_ENABLED 711 return(1); 712#else 713 return(0); 714#endif 715 case XML_WITH_LEGACY: 716#ifdef LIBXML_LEGACY_ENABLED 717 return(1); 718#else 719 return(0); 720#endif 721 case XML_WITH_C14N: 722#ifdef LIBXML_C14N_ENABLED 723 return(1); 724#else 725 return(0); 726#endif 727 case XML_WITH_CATALOG: 728#ifdef LIBXML_CATALOG_ENABLED 729 return(1); 730#else 731 return(0); 732#endif 733 case XML_WITH_XPATH: 734#ifdef LIBXML_XPATH_ENABLED 735 return(1); 736#else 737 return(0); 738#endif 739 case XML_WITH_XPTR: 740#ifdef LIBXML_XPTR_ENABLED 741 return(1); 742#else 743 return(0); 744#endif 745 case XML_WITH_XINCLUDE: 746#ifdef LIBXML_XINCLUDE_ENABLED 747 return(1); 748#else 749 return(0); 750#endif 751 case XML_WITH_ICONV: 752#ifdef LIBXML_ICONV_ENABLED 753 return(1); 754#else 755 return(0); 756#endif 757 case XML_WITH_ISO8859X: 758#ifdef LIBXML_ISO8859X_ENABLED 759 return(1); 760#else 761 return(0); 762#endif 763 case XML_WITH_UNICODE: 764#ifdef LIBXML_UNICODE_ENABLED 765 return(1); 766#else 767 return(0); 768#endif 769 case XML_WITH_REGEXP: 770#ifdef LIBXML_REGEXP_ENABLED 771 return(1); 772#else 773 return(0); 774#endif 775 case XML_WITH_AUTOMATA: 776#ifdef LIBXML_AUTOMATA_ENABLED 777 return(1); 778#else 779 return(0); 780#endif 781 case XML_WITH_EXPR: 782#ifdef LIBXML_EXPR_ENABLED 783 return(1); 784#else 785 return(0); 786#endif 787 case XML_WITH_SCHEMAS: 788#ifdef LIBXML_SCHEMAS_ENABLED 789 return(1); 790#else 791 return(0); 792#endif 793 case XML_WITH_SCHEMATRON: 794#ifdef LIBXML_SCHEMATRON_ENABLED 795 return(1); 796#else 797 return(0); 798#endif 799 case XML_WITH_MODULES: 800#ifdef LIBXML_MODULES_ENABLED 801 return(1); 802#else 803 return(0); 804#endif 805 case XML_WITH_DEBUG: 806#ifdef LIBXML_DEBUG_ENABLED 807 return(1); 808#else 809 return(0); 810#endif 811 case XML_WITH_DEBUG_MEM: 812#ifdef DEBUG_MEMORY_LOCATION 813 return(1); 814#else 815 return(0); 816#endif 817 case XML_WITH_DEBUG_RUN: 818 return(0); 819 case XML_WITH_ZLIB: 820#ifdef LIBXML_ZLIB_ENABLED 821 return(1); 822#else 823 return(0); 824#endif 825 case XML_WITH_LZMA: 826#ifdef LIBXML_LZMA_ENABLED 827 return(1); 828#else 829 return(0); 830#endif 831 case XML_WITH_ICU: 832#ifdef LIBXML_ICU_ENABLED 833 return(1); 834#else 835 return(0); 836#endif 837 default: 838 break; 839 } 840 return(0); 841} 842 843/************************************************************************ 844 * * 845 * SAX2 defaulted attributes handling * 846 * * 847 ************************************************************************/ 848 849/** 850 * xmlDetectSAX2: 851 * @ctxt: an XML parser context 852 * 853 * Do the SAX2 detection and specific initialization 854 */ 855static void 856xmlDetectSAX2(xmlParserCtxtPtr ctxt) { 857 xmlSAXHandlerPtr sax; 858 859 /* Avoid unused variable warning if features are disabled. */ 860 (void) sax; 861 862 if (ctxt == NULL) return; 863 sax = ctxt->sax; 864#ifdef LIBXML_SAX1_ENABLED 865 /* 866 * Only enable SAX2 if there SAX2 element handlers, except when there 867 * are no element handlers at all. 868 */ 869 if ((sax) && (sax->initialized == XML_SAX2_MAGIC) && 870 ((sax->startElementNs != NULL) || 871 (sax->endElementNs != NULL) || 872 ((sax->startElement == NULL) && (sax->endElement == NULL)))) 873 ctxt->sax2 = 1; 874#else 875 ctxt->sax2 = 1; 876#endif /* LIBXML_SAX1_ENABLED */ 877 878 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 879 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 880 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 881 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) || 882 (ctxt->str_xml_ns == NULL)) { 883 xmlErrMemory(ctxt, NULL); 884 } 885} 886 887typedef struct { 888 xmlHashedString prefix; 889 xmlHashedString name; 890 xmlHashedString value; 891 const xmlChar *valueEnd; 892 int external; 893 int expandedSize; 894} xmlDefAttr; 895 896typedef struct _xmlDefAttrs xmlDefAttrs; 897typedef xmlDefAttrs *xmlDefAttrsPtr; 898struct _xmlDefAttrs { 899 int nbAttrs; /* number of defaulted attributes on that element */ 900 int maxAttrs; /* the size of the array */ 901#if __STDC_VERSION__ >= 199901L 902 /* Using a C99 flexible array member avoids UBSan errors. */ 903 xmlDefAttr attrs[]; /* array of localname/prefix/values/external */ 904#else 905 xmlDefAttr attrs[1]; 906#endif 907}; 908 909/** 910 * xmlAttrNormalizeSpace: 911 * @src: the source string 912 * @dst: the target string 913 * 914 * Normalize the space in non CDATA attribute values: 915 * If the attribute type is not CDATA, then the XML processor MUST further 916 * process the normalized attribute value by discarding any leading and 917 * trailing space (#x20) characters, and by replacing sequences of space 918 * (#x20) characters by a single space (#x20) character. 919 * Note that the size of dst need to be at least src, and if one doesn't need 920 * to preserve dst (and it doesn't come from a dictionary or read-only) then 921 * passing src as dst is just fine. 922 * 923 * Returns a pointer to the normalized value (dst) or NULL if no conversion 924 * is needed. 925 */ 926static xmlChar * 927xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst) 928{ 929 if ((src == NULL) || (dst == NULL)) 930 return(NULL); 931 932 while (*src == 0x20) src++; 933 while (*src != 0) { 934 if (*src == 0x20) { 935 while (*src == 0x20) src++; 936 if (*src != 0) 937 *dst++ = 0x20; 938 } else { 939 *dst++ = *src++; 940 } 941 } 942 *dst = 0; 943 if (dst == src) 944 return(NULL); 945 return(dst); 946} 947 948/** 949 * xmlAttrNormalizeSpace2: 950 * @src: the source string 951 * 952 * Normalize the space in non CDATA attribute values, a slightly more complex 953 * front end to avoid allocation problems when running on attribute values 954 * coming from the input. 955 * 956 * Returns a pointer to the normalized value (dst) or NULL if no conversion 957 * is needed. 958 */ 959static const xmlChar * 960xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len) 961{ 962 int i; 963 int remove_head = 0; 964 int need_realloc = 0; 965 const xmlChar *cur; 966 967 if ((ctxt == NULL) || (src == NULL) || (len == NULL)) 968 return(NULL); 969 i = *len; 970 if (i <= 0) 971 return(NULL); 972 973 cur = src; 974 while (*cur == 0x20) { 975 cur++; 976 remove_head++; 977 } 978 while (*cur != 0) { 979 if (*cur == 0x20) { 980 cur++; 981 if ((*cur == 0x20) || (*cur == 0)) { 982 need_realloc = 1; 983 break; 984 } 985 } else 986 cur++; 987 } 988 if (need_realloc) { 989 xmlChar *ret; 990 991 ret = xmlStrndup(src + remove_head, i - remove_head + 1); 992 if (ret == NULL) { 993 xmlErrMemory(ctxt, NULL); 994 return(NULL); 995 } 996 xmlAttrNormalizeSpace(ret, ret); 997 *len = strlen((const char *)ret); 998 return(ret); 999 } else if (remove_head) { 1000 *len -= remove_head; 1001 memmove(src, src + remove_head, 1 + *len); 1002 return(src); 1003 } 1004 return(NULL); 1005} 1006 1007/** 1008 * xmlAddDefAttrs: 1009 * @ctxt: an XML parser context 1010 * @fullname: the element fullname 1011 * @fullattr: the attribute fullname 1012 * @value: the attribute value 1013 * 1014 * Add a defaulted attribute for an element 1015 */ 1016static void 1017xmlAddDefAttrs(xmlParserCtxtPtr ctxt, 1018 const xmlChar *fullname, 1019 const xmlChar *fullattr, 1020 const xmlChar *value) { 1021 xmlDefAttrsPtr defaults; 1022 xmlDefAttr *attr; 1023 int len, expandedSize; 1024 xmlHashedString name; 1025 xmlHashedString prefix; 1026 xmlHashedString hvalue; 1027 const xmlChar *localname; 1028 1029 /* 1030 * Allows to detect attribute redefinitions 1031 */ 1032 if (ctxt->attsSpecial != NULL) { 1033 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) 1034 return; 1035 } 1036 1037 if (ctxt->attsDefault == NULL) { 1038 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict); 1039 if (ctxt->attsDefault == NULL) 1040 goto mem_error; 1041 } 1042 1043 /* 1044 * split the element name into prefix:localname , the string found 1045 * are within the DTD and then not associated to namespace names. 1046 */ 1047 localname = xmlSplitQName3(fullname, &len); 1048 if (localname == NULL) { 1049 name = xmlDictLookupHashed(ctxt->dict, fullname, -1); 1050 prefix.name = NULL; 1051 } else { 1052 name = xmlDictLookupHashed(ctxt->dict, localname, -1); 1053 prefix = xmlDictLookupHashed(ctxt->dict, fullname, len); 1054 if (prefix.name == NULL) 1055 goto mem_error; 1056 } 1057 if (name.name == NULL) 1058 goto mem_error; 1059 1060 /* 1061 * make sure there is some storage 1062 */ 1063 defaults = xmlHashLookup2(ctxt->attsDefault, name.name, prefix.name); 1064 if ((defaults == NULL) || 1065 (defaults->nbAttrs >= defaults->maxAttrs)) { 1066 xmlDefAttrsPtr temp; 1067 int newSize; 1068 1069 newSize = (defaults != NULL) ? 2 * defaults->maxAttrs : 4; 1070 temp = xmlRealloc(defaults, 1071 sizeof(*defaults) + newSize * sizeof(xmlDefAttr)); 1072 if (temp == NULL) 1073 goto mem_error; 1074 if (defaults == NULL) 1075 temp->nbAttrs = 0; 1076 temp->maxAttrs = newSize; 1077 defaults = temp; 1078 if (xmlHashUpdateEntry2(ctxt->attsDefault, name.name, prefix.name, 1079 defaults, NULL) < 0) { 1080 xmlFree(defaults); 1081 goto mem_error; 1082 } 1083 } 1084 1085 /* 1086 * Split the attribute name into prefix:localname , the string found 1087 * are within the DTD and hen not associated to namespace names. 1088 */ 1089 localname = xmlSplitQName3(fullattr, &len); 1090 if (localname == NULL) { 1091 name = xmlDictLookupHashed(ctxt->dict, fullattr, -1); 1092 prefix.name = NULL; 1093 } else { 1094 name = xmlDictLookupHashed(ctxt->dict, localname, -1); 1095 prefix = xmlDictLookupHashed(ctxt->dict, fullattr, len); 1096 if (prefix.name == NULL) 1097 goto mem_error; 1098 } 1099 if (name.name == NULL) 1100 goto mem_error; 1101 1102 /* intern the string and precompute the end */ 1103 len = strlen((const char *) value); 1104 hvalue = xmlDictLookupHashed(ctxt->dict, value, len); 1105 if (hvalue.name == NULL) 1106 goto mem_error; 1107 1108 expandedSize = strlen((const char *) name.name); 1109 if (prefix.name != NULL) 1110 expandedSize += strlen((const char *) prefix.name); 1111 expandedSize += len; 1112 1113 attr = &defaults->attrs[defaults->nbAttrs++]; 1114 attr->name = name; 1115 attr->prefix = prefix; 1116 attr->value = hvalue; 1117 attr->valueEnd = hvalue.name + len; 1118 attr->external = ctxt->external; 1119 attr->expandedSize = expandedSize; 1120 1121 return; 1122 1123mem_error: 1124 xmlErrMemory(ctxt, NULL); 1125 return; 1126} 1127 1128/** 1129 * xmlAddSpecialAttr: 1130 * @ctxt: an XML parser context 1131 * @fullname: the element fullname 1132 * @fullattr: the attribute fullname 1133 * @type: the attribute type 1134 * 1135 * Register this attribute type 1136 */ 1137static void 1138xmlAddSpecialAttr(xmlParserCtxtPtr ctxt, 1139 const xmlChar *fullname, 1140 const xmlChar *fullattr, 1141 int type) 1142{ 1143 if (ctxt->attsSpecial == NULL) { 1144 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict); 1145 if (ctxt->attsSpecial == NULL) 1146 goto mem_error; 1147 } 1148 1149 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) 1150 return; 1151 1152 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr, 1153 (void *) (ptrdiff_t) type); 1154 return; 1155 1156mem_error: 1157 xmlErrMemory(ctxt, NULL); 1158 return; 1159} 1160 1161/** 1162 * xmlCleanSpecialAttrCallback: 1163 * 1164 * Removes CDATA attributes from the special attribute table 1165 */ 1166static void 1167xmlCleanSpecialAttrCallback(void *payload, void *data, 1168 const xmlChar *fullname, const xmlChar *fullattr, 1169 const xmlChar *unused ATTRIBUTE_UNUSED) { 1170 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data; 1171 1172 if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) { 1173 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL); 1174 } 1175} 1176 1177/** 1178 * xmlCleanSpecialAttr: 1179 * @ctxt: an XML parser context 1180 * 1181 * Trim the list of attributes defined to remove all those of type 1182 * CDATA as they are not special. This call should be done when finishing 1183 * to parse the DTD and before starting to parse the document root. 1184 */ 1185static void 1186xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt) 1187{ 1188 if (ctxt->attsSpecial == NULL) 1189 return; 1190 1191 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt); 1192 1193 if (xmlHashSize(ctxt->attsSpecial) == 0) { 1194 xmlHashFree(ctxt->attsSpecial, NULL); 1195 ctxt->attsSpecial = NULL; 1196 } 1197 return; 1198} 1199 1200/** 1201 * xmlCheckLanguageID: 1202 * @lang: pointer to the string value 1203 * 1204 * DEPRECATED: Internal function, do not use. 1205 * 1206 * Checks that the value conforms to the LanguageID production: 1207 * 1208 * NOTE: this is somewhat deprecated, those productions were removed from 1209 * the XML Second edition. 1210 * 1211 * [33] LanguageID ::= Langcode ('-' Subcode)* 1212 * [34] Langcode ::= ISO639Code | IanaCode | UserCode 1213 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z]) 1214 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+ 1215 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+ 1216 * [38] Subcode ::= ([a-z] | [A-Z])+ 1217 * 1218 * The current REC reference the successors of RFC 1766, currently 5646 1219 * 1220 * http://www.rfc-editor.org/rfc/rfc5646.txt 1221 * langtag = language 1222 * ["-" script] 1223 * ["-" region] 1224 * *("-" variant) 1225 * *("-" extension) 1226 * ["-" privateuse] 1227 * language = 2*3ALPHA ; shortest ISO 639 code 1228 * ["-" extlang] ; sometimes followed by 1229 * ; extended language subtags 1230 * / 4ALPHA ; or reserved for future use 1231 * / 5*8ALPHA ; or registered language subtag 1232 * 1233 * extlang = 3ALPHA ; selected ISO 639 codes 1234 * *2("-" 3ALPHA) ; permanently reserved 1235 * 1236 * script = 4ALPHA ; ISO 15924 code 1237 * 1238 * region = 2ALPHA ; ISO 3166-1 code 1239 * / 3DIGIT ; UN M.49 code 1240 * 1241 * variant = 5*8alphanum ; registered variants 1242 * / (DIGIT 3alphanum) 1243 * 1244 * extension = singleton 1*("-" (2*8alphanum)) 1245 * 1246 * ; Single alphanumerics 1247 * ; "x" reserved for private use 1248 * singleton = DIGIT ; 0 - 9 1249 * / %x41-57 ; A - W 1250 * / %x59-5A ; Y - Z 1251 * / %x61-77 ; a - w 1252 * / %x79-7A ; y - z 1253 * 1254 * it sounds right to still allow Irregular i-xxx IANA and user codes too 1255 * The parser below doesn't try to cope with extension or privateuse 1256 * that could be added but that's not interoperable anyway 1257 * 1258 * Returns 1 if correct 0 otherwise 1259 **/ 1260int 1261xmlCheckLanguageID(const xmlChar * lang) 1262{ 1263 const xmlChar *cur = lang, *nxt; 1264 1265 if (cur == NULL) 1266 return (0); 1267 if (((cur[0] == 'i') && (cur[1] == '-')) || 1268 ((cur[0] == 'I') && (cur[1] == '-')) || 1269 ((cur[0] == 'x') && (cur[1] == '-')) || 1270 ((cur[0] == 'X') && (cur[1] == '-'))) { 1271 /* 1272 * Still allow IANA code and user code which were coming 1273 * from the previous version of the XML-1.0 specification 1274 * it's deprecated but we should not fail 1275 */ 1276 cur += 2; 1277 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || 1278 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 1279 cur++; 1280 return(cur[0] == 0); 1281 } 1282 nxt = cur; 1283 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1284 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1285 nxt++; 1286 if (nxt - cur >= 4) { 1287 /* 1288 * Reserved 1289 */ 1290 if ((nxt - cur > 8) || (nxt[0] != 0)) 1291 return(0); 1292 return(1); 1293 } 1294 if (nxt - cur < 2) 1295 return(0); 1296 /* we got an ISO 639 code */ 1297 if (nxt[0] == 0) 1298 return(1); 1299 if (nxt[0] != '-') 1300 return(0); 1301 1302 nxt++; 1303 cur = nxt; 1304 /* now we can have extlang or script or region or variant */ 1305 if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1306 goto region_m49; 1307 1308 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1309 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1310 nxt++; 1311 if (nxt - cur == 4) 1312 goto script; 1313 if (nxt - cur == 2) 1314 goto region; 1315 if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1316 goto variant; 1317 if (nxt - cur != 3) 1318 return(0); 1319 /* we parsed an extlang */ 1320 if (nxt[0] == 0) 1321 return(1); 1322 if (nxt[0] != '-') 1323 return(0); 1324 1325 nxt++; 1326 cur = nxt; 1327 /* now we can have script or region or variant */ 1328 if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1329 goto region_m49; 1330 1331 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1332 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1333 nxt++; 1334 if (nxt - cur == 2) 1335 goto region; 1336 if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1337 goto variant; 1338 if (nxt - cur != 4) 1339 return(0); 1340 /* we parsed a script */ 1341script: 1342 if (nxt[0] == 0) 1343 return(1); 1344 if (nxt[0] != '-') 1345 return(0); 1346 1347 nxt++; 1348 cur = nxt; 1349 /* now we can have region or variant */ 1350 if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1351 goto region_m49; 1352 1353 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1354 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1355 nxt++; 1356 1357 if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1358 goto variant; 1359 if (nxt - cur != 2) 1360 return(0); 1361 /* we parsed a region */ 1362region: 1363 if (nxt[0] == 0) 1364 return(1); 1365 if (nxt[0] != '-') 1366 return(0); 1367 1368 nxt++; 1369 cur = nxt; 1370 /* now we can just have a variant */ 1371 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1372 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1373 nxt++; 1374 1375 if ((nxt - cur < 5) || (nxt - cur > 8)) 1376 return(0); 1377 1378 /* we parsed a variant */ 1379variant: 1380 if (nxt[0] == 0) 1381 return(1); 1382 if (nxt[0] != '-') 1383 return(0); 1384 /* extensions and private use subtags not checked */ 1385 return (1); 1386 1387region_m49: 1388 if (((nxt[1] >= '0') && (nxt[1] <= '9')) && 1389 ((nxt[2] >= '0') && (nxt[2] <= '9'))) { 1390 nxt += 3; 1391 goto region; 1392 } 1393 return(0); 1394} 1395 1396/************************************************************************ 1397 * * 1398 * Parser stacks related functions and macros * 1399 * * 1400 ************************************************************************/ 1401 1402static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, 1403 const xmlChar ** str); 1404 1405/** 1406 * xmlParserNsCreate: 1407 * 1408 * Create a new namespace database. 1409 * 1410 * Returns the new obejct. 1411 */ 1412xmlParserNsData * 1413xmlParserNsCreate(void) { 1414 xmlParserNsData *nsdb = xmlMalloc(sizeof(*nsdb)); 1415 1416 if (nsdb == NULL) 1417 return(NULL); 1418 memset(nsdb, 0, sizeof(*nsdb)); 1419 nsdb->defaultNsIndex = INT_MAX; 1420 1421 return(nsdb); 1422} 1423 1424/** 1425 * xmlParserNsFree: 1426 * @nsdb: namespace database 1427 * 1428 * Free a namespace database. 1429 */ 1430void 1431xmlParserNsFree(xmlParserNsData *nsdb) { 1432 if (nsdb == NULL) 1433 return; 1434 1435 xmlFree(nsdb->extra); 1436 xmlFree(nsdb->hash); 1437 xmlFree(nsdb); 1438} 1439 1440/** 1441 * xmlParserNsReset: 1442 * @nsdb: namespace database 1443 * 1444 * Reset a namespace database. 1445 */ 1446static void 1447xmlParserNsReset(xmlParserNsData *nsdb) { 1448 if (nsdb == NULL) 1449 return; 1450 1451 nsdb->hashElems = 0; 1452 nsdb->elementId = 0; 1453 nsdb->defaultNsIndex = INT_MAX; 1454 1455 if (nsdb->hash) 1456 memset(nsdb->hash, 0, nsdb->hashSize * sizeof(nsdb->hash[0])); 1457} 1458 1459/** 1460 * xmlParserStartElement: 1461 * @nsdb: namespace database 1462 * 1463 * Signal that a new element has started. 1464 * 1465 * Returns 0 on success, -1 if the element counter overflowed. 1466 */ 1467static int 1468xmlParserNsStartElement(xmlParserNsData *nsdb) { 1469 if (nsdb->elementId == UINT_MAX) 1470 return(-1); 1471 nsdb->elementId++; 1472 1473 return(0); 1474} 1475 1476/** 1477 * xmlParserNsLookup: 1478 * @ctxt: parser context 1479 * @prefix: namespace prefix 1480 * @bucketPtr: optional bucket (return value) 1481 * 1482 * Lookup namespace with given prefix. If @bucketPtr is non-NULL, it will 1483 * be set to the matching bucket, or the first empty bucket if no match 1484 * was found. 1485 * 1486 * Returns the namespace index on success, INT_MAX if no namespace was 1487 * found. 1488 */ 1489static int 1490xmlParserNsLookup(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix, 1491 xmlParserNsBucket **bucketPtr) { 1492 xmlParserNsBucket *bucket, *tombstone; 1493 unsigned index, hashValue; 1494 1495 if (prefix->name == NULL) 1496 return(ctxt->nsdb->defaultNsIndex); 1497 1498 if (ctxt->nsdb->hashSize == 0) 1499 return(INT_MAX); 1500 1501 hashValue = prefix->hashValue; 1502 index = hashValue & (ctxt->nsdb->hashSize - 1); 1503 bucket = &ctxt->nsdb->hash[index]; 1504 tombstone = NULL; 1505 1506 while (bucket->hashValue) { 1507 if (bucket->index == INT_MAX) { 1508 if (tombstone == NULL) 1509 tombstone = bucket; 1510 } else if (bucket->hashValue == hashValue) { 1511 if (ctxt->nsTab[bucket->index * 2] == prefix->name) { 1512 if (bucketPtr != NULL) 1513 *bucketPtr = bucket; 1514 return(bucket->index); 1515 } 1516 } 1517 1518 index++; 1519 bucket++; 1520 if (index == ctxt->nsdb->hashSize) { 1521 index = 0; 1522 bucket = ctxt->nsdb->hash; 1523 } 1524 } 1525 1526 if (bucketPtr != NULL) 1527 *bucketPtr = tombstone ? tombstone : bucket; 1528 return(INT_MAX); 1529} 1530 1531/** 1532 * xmlParserNsLookupUri: 1533 * @ctxt: parser context 1534 * @prefix: namespace prefix 1535 * 1536 * Lookup namespace URI with given prefix. 1537 * 1538 * Returns the namespace URI on success, NULL if no namespace was found. 1539 */ 1540static const xmlChar * 1541xmlParserNsLookupUri(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix) { 1542 const xmlChar *ret; 1543 int nsIndex; 1544 1545 if (prefix->name == ctxt->str_xml) 1546 return(ctxt->str_xml_ns); 1547 1548 nsIndex = xmlParserNsLookup(ctxt, prefix, NULL); 1549 if (nsIndex == INT_MAX) 1550 return(NULL); 1551 1552 ret = ctxt->nsTab[nsIndex * 2 + 1]; 1553 if (ret[0] == 0) 1554 ret = NULL; 1555 return(ret); 1556} 1557 1558/** 1559 * xmlParserNsLookupSax: 1560 * @ctxt: parser context 1561 * @prefix: namespace prefix 1562 * 1563 * Lookup extra data for the given prefix. This returns data stored 1564 * with xmlParserNsUdpateSax. 1565 * 1566 * Returns the data on success, NULL if no namespace was found. 1567 */ 1568void * 1569xmlParserNsLookupSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix) { 1570 xmlHashedString hprefix; 1571 int nsIndex; 1572 1573 if (prefix == ctxt->str_xml) 1574 return(NULL); 1575 1576 hprefix.name = prefix; 1577 if (prefix != NULL) 1578 hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix); 1579 else 1580 hprefix.hashValue = 0; 1581 nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL); 1582 if (nsIndex == INT_MAX) 1583 return(NULL); 1584 1585 return(ctxt->nsdb->extra[nsIndex].saxData); 1586} 1587 1588/** 1589 * xmlParserNsUpdateSax: 1590 * @ctxt: parser context 1591 * @prefix: namespace prefix 1592 * @saxData: extra data for SAX handler 1593 * 1594 * Sets or updates extra data for the given prefix. This value will be 1595 * returned by xmlParserNsLookupSax as long as the namespace with the 1596 * given prefix is in scope. 1597 * 1598 * Returns the data on success, NULL if no namespace was found. 1599 */ 1600int 1601xmlParserNsUpdateSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix, 1602 void *saxData) { 1603 xmlHashedString hprefix; 1604 int nsIndex; 1605 1606 if (prefix == ctxt->str_xml) 1607 return(-1); 1608 1609 hprefix.name = prefix; 1610 if (prefix != NULL) 1611 hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix); 1612 else 1613 hprefix.hashValue = 0; 1614 nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL); 1615 if (nsIndex == INT_MAX) 1616 return(-1); 1617 1618 ctxt->nsdb->extra[nsIndex].saxData = saxData; 1619 return(0); 1620} 1621 1622/** 1623 * xmlParserNsGrow: 1624 * @ctxt: parser context 1625 * 1626 * Grows the namespace tables. 1627 * 1628 * Returns 0 on success, -1 if a memory allocation failed. 1629 */ 1630static int 1631xmlParserNsGrow(xmlParserCtxtPtr ctxt) { 1632 const xmlChar **table; 1633 xmlParserNsExtra *extra; 1634 int newSize; 1635 1636 if (ctxt->nsMax > INT_MAX / 2) 1637 goto error; 1638 newSize = ctxt->nsMax ? ctxt->nsMax * 2 : 16; 1639 1640 table = xmlRealloc(ctxt->nsTab, 2 * newSize * sizeof(table[0])); 1641 if (table == NULL) 1642 goto error; 1643 ctxt->nsTab = table; 1644 1645 extra = xmlRealloc(ctxt->nsdb->extra, newSize * sizeof(extra[0])); 1646 if (extra == NULL) 1647 goto error; 1648 ctxt->nsdb->extra = extra; 1649 1650 ctxt->nsMax = newSize; 1651 return(0); 1652 1653error: 1654 xmlErrMemory(ctxt, NULL); 1655 return(-1); 1656} 1657 1658/** 1659 * xmlParserNsPush: 1660 * @ctxt: parser context 1661 * @prefix: prefix with hash value 1662 * @uri: uri with hash value 1663 * @saxData: extra data for SAX handler 1664 * @defAttr: whether the namespace comes from a default attribute 1665 * 1666 * Push a new namespace on the table. 1667 * 1668 * Returns 1 if the namespace was pushed, 0 if the namespace was ignored, 1669 * -1 if a memory allocation failed. 1670 */ 1671static int 1672xmlParserNsPush(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix, 1673 const xmlHashedString *uri, void *saxData, int defAttr) { 1674 xmlParserNsBucket *bucket = NULL; 1675 xmlParserNsExtra *extra; 1676 const xmlChar **ns; 1677 unsigned hashValue, nsIndex, oldIndex; 1678 1679 if ((prefix != NULL) && (prefix->name == ctxt->str_xml)) 1680 return(0); 1681 1682 if ((ctxt->nsNr >= ctxt->nsMax) && (xmlParserNsGrow(ctxt) < 0)) { 1683 xmlErrMemory(ctxt, NULL); 1684 return(-1); 1685 } 1686 1687 /* 1688 * Default namespace and 'xml' namespace 1689 */ 1690 if ((prefix == NULL) || (prefix->name == NULL)) { 1691 oldIndex = ctxt->nsdb->defaultNsIndex; 1692 1693 if (oldIndex != INT_MAX) { 1694 extra = &ctxt->nsdb->extra[oldIndex]; 1695 1696 if (extra->elementId == ctxt->nsdb->elementId) { 1697 if (defAttr == 0) 1698 xmlErrAttributeDup(ctxt, NULL, BAD_CAST "xmlns"); 1699 return(0); 1700 } 1701 1702 if ((ctxt->options & XML_PARSE_NSCLEAN) && 1703 (uri->name == ctxt->nsTab[oldIndex * 2 + 1])) 1704 return(0); 1705 } 1706 1707 ctxt->nsdb->defaultNsIndex = ctxt->nsNr; 1708 goto populate_entry; 1709 } 1710 1711 /* 1712 * Hash table lookup 1713 */ 1714 oldIndex = xmlParserNsLookup(ctxt, prefix, &bucket); 1715 if (oldIndex != INT_MAX) { 1716 extra = &ctxt->nsdb->extra[oldIndex]; 1717 1718 /* 1719 * Check for duplicate definitions on the same element. 1720 */ 1721 if (extra->elementId == ctxt->nsdb->elementId) { 1722 if (defAttr == 0) 1723 xmlErrAttributeDup(ctxt, BAD_CAST "xmlns", prefix->name); 1724 return(0); 1725 } 1726 1727 if ((ctxt->options & XML_PARSE_NSCLEAN) && 1728 (uri->name == ctxt->nsTab[bucket->index * 2 + 1])) 1729 return(0); 1730 1731 bucket->index = ctxt->nsNr; 1732 goto populate_entry; 1733 } 1734 1735 /* 1736 * Insert new bucket 1737 */ 1738 1739 hashValue = prefix->hashValue; 1740 1741 /* 1742 * Grow hash table, 50% fill factor 1743 */ 1744 if (ctxt->nsdb->hashElems + 1 > ctxt->nsdb->hashSize / 2) { 1745 xmlParserNsBucket *newHash; 1746 unsigned newSize, i, index; 1747 1748 if (ctxt->nsdb->hashSize > UINT_MAX / 2) { 1749 xmlErrMemory(ctxt, NULL); 1750 return(-1); 1751 } 1752 newSize = ctxt->nsdb->hashSize ? ctxt->nsdb->hashSize * 2 : 16; 1753 newHash = xmlMalloc(newSize * sizeof(newHash[0])); 1754 if (newHash == NULL) { 1755 xmlErrMemory(ctxt, NULL); 1756 return(-1); 1757 } 1758 memset(newHash, 0, newSize * sizeof(newHash[0])); 1759 1760 for (i = 0; i < ctxt->nsdb->hashSize; i++) { 1761 unsigned hv = ctxt->nsdb->hash[i].hashValue; 1762 unsigned newIndex; 1763 1764 if ((hv == 0) || (ctxt->nsdb->hash[i].index == INT_MAX)) 1765 continue; 1766 newIndex = hv & (newSize - 1); 1767 1768 while (newHash[newIndex].hashValue != 0) { 1769 newIndex++; 1770 if (newIndex == newSize) 1771 newIndex = 0; 1772 } 1773 1774 newHash[newIndex] = ctxt->nsdb->hash[i]; 1775 } 1776 1777 xmlFree(ctxt->nsdb->hash); 1778 ctxt->nsdb->hash = newHash; 1779 ctxt->nsdb->hashSize = newSize; 1780 1781 /* 1782 * Relookup 1783 */ 1784 index = hashValue & (newSize - 1); 1785 1786 while (newHash[index].hashValue != 0) { 1787 index++; 1788 if (index == newSize) 1789 index = 0; 1790 } 1791 1792 bucket = &newHash[index]; 1793 } 1794 1795 bucket->hashValue = hashValue; 1796 bucket->index = ctxt->nsNr; 1797 ctxt->nsdb->hashElems++; 1798 oldIndex = INT_MAX; 1799 1800populate_entry: 1801 nsIndex = ctxt->nsNr; 1802 1803 ns = &ctxt->nsTab[nsIndex * 2]; 1804 ns[0] = prefix ? prefix->name : NULL; 1805 ns[1] = uri->name; 1806 1807 extra = &ctxt->nsdb->extra[nsIndex]; 1808 extra->saxData = saxData; 1809 extra->prefixHashValue = prefix ? prefix->hashValue : 0; 1810 extra->uriHashValue = uri->hashValue; 1811 extra->elementId = ctxt->nsdb->elementId; 1812 extra->oldIndex = oldIndex; 1813 1814 ctxt->nsNr++; 1815 1816 return(1); 1817} 1818 1819/** 1820 * xmlParserNsPop: 1821 * @ctxt: an XML parser context 1822 * @nr: the number to pop 1823 * 1824 * Pops the top @nr namespaces and restores the hash table. 1825 * 1826 * Returns the number of namespaces popped. 1827 */ 1828static int 1829xmlParserNsPop(xmlParserCtxtPtr ctxt, int nr) 1830{ 1831 int i; 1832 1833 /* assert(nr <= ctxt->nsNr); */ 1834 1835 for (i = ctxt->nsNr - 1; i >= ctxt->nsNr - nr; i--) { 1836 const xmlChar *prefix = ctxt->nsTab[i * 2]; 1837 xmlParserNsExtra *extra = &ctxt->nsdb->extra[i]; 1838 1839 if (prefix == NULL) { 1840 ctxt->nsdb->defaultNsIndex = extra->oldIndex; 1841 } else { 1842 xmlHashedString hprefix; 1843 xmlParserNsBucket *bucket = NULL; 1844 1845 hprefix.name = prefix; 1846 hprefix.hashValue = extra->prefixHashValue; 1847 xmlParserNsLookup(ctxt, &hprefix, &bucket); 1848 /* assert(bucket && bucket->hashValue); */ 1849 bucket->index = extra->oldIndex; 1850 } 1851 } 1852 1853 ctxt->nsNr -= nr; 1854 return(nr); 1855} 1856 1857static int 1858xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) { 1859 const xmlChar **atts; 1860 unsigned *attallocs; 1861 int maxatts; 1862 1863 if (nr + 5 > ctxt->maxatts) { 1864 maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2; 1865 atts = (const xmlChar **) xmlMalloc( 1866 maxatts * sizeof(const xmlChar *)); 1867 if (atts == NULL) goto mem_error; 1868 attallocs = xmlRealloc(ctxt->attallocs, 1869 (maxatts / 5) * sizeof(attallocs[0])); 1870 if (attallocs == NULL) { 1871 xmlFree(atts); 1872 goto mem_error; 1873 } 1874 if (ctxt->maxatts > 0) 1875 memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *)); 1876 xmlFree(ctxt->atts); 1877 ctxt->atts = atts; 1878 ctxt->attallocs = attallocs; 1879 ctxt->maxatts = maxatts; 1880 } 1881 return(ctxt->maxatts); 1882mem_error: 1883 xmlErrMemory(ctxt, NULL); 1884 return(-1); 1885} 1886 1887/** 1888 * inputPush: 1889 * @ctxt: an XML parser context 1890 * @value: the parser input 1891 * 1892 * Pushes a new parser input on top of the input stack 1893 * 1894 * Returns -1 in case of error, the index in the stack otherwise 1895 */ 1896int 1897inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value) 1898{ 1899 if ((ctxt == NULL) || (value == NULL)) 1900 return(-1); 1901 if (ctxt->inputNr >= ctxt->inputMax) { 1902 size_t newSize = ctxt->inputMax * 2; 1903 xmlParserInputPtr *tmp; 1904 1905 tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab, 1906 newSize * sizeof(*tmp)); 1907 if (tmp == NULL) { 1908 xmlErrMemory(ctxt, NULL); 1909 return (-1); 1910 } 1911 ctxt->inputTab = tmp; 1912 ctxt->inputMax = newSize; 1913 } 1914 ctxt->inputTab[ctxt->inputNr] = value; 1915 ctxt->input = value; 1916 return (ctxt->inputNr++); 1917} 1918/** 1919 * inputPop: 1920 * @ctxt: an XML parser context 1921 * 1922 * Pops the top parser input from the input stack 1923 * 1924 * Returns the input just removed 1925 */ 1926xmlParserInputPtr 1927inputPop(xmlParserCtxtPtr ctxt) 1928{ 1929 xmlParserInputPtr ret; 1930 1931 if (ctxt == NULL) 1932 return(NULL); 1933 if (ctxt->inputNr <= 0) 1934 return (NULL); 1935 ctxt->inputNr--; 1936 if (ctxt->inputNr > 0) 1937 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1]; 1938 else 1939 ctxt->input = NULL; 1940 ret = ctxt->inputTab[ctxt->inputNr]; 1941 ctxt->inputTab[ctxt->inputNr] = NULL; 1942 return (ret); 1943} 1944/** 1945 * nodePush: 1946 * @ctxt: an XML parser context 1947 * @value: the element node 1948 * 1949 * DEPRECATED: Internal function, do not use. 1950 * 1951 * Pushes a new element node on top of the node stack 1952 * 1953 * Returns -1 in case of error, the index in the stack otherwise 1954 */ 1955int 1956nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value) 1957{ 1958 if (ctxt == NULL) return(0); 1959 if (ctxt->nodeNr >= ctxt->nodeMax) { 1960 xmlNodePtr *tmp; 1961 1962 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab, 1963 ctxt->nodeMax * 2 * 1964 sizeof(ctxt->nodeTab[0])); 1965 if (tmp == NULL) { 1966 xmlErrMemory(ctxt, NULL); 1967 return (-1); 1968 } 1969 ctxt->nodeTab = tmp; 1970 ctxt->nodeMax *= 2; 1971 } 1972 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) && 1973 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 1974 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 1975 "Excessive depth in document: %d use XML_PARSE_HUGE option\n", 1976 xmlParserMaxDepth); 1977 xmlHaltParser(ctxt); 1978 return(-1); 1979 } 1980 ctxt->nodeTab[ctxt->nodeNr] = value; 1981 ctxt->node = value; 1982 return (ctxt->nodeNr++); 1983} 1984 1985/** 1986 * nodePop: 1987 * @ctxt: an XML parser context 1988 * 1989 * DEPRECATED: Internal function, do not use. 1990 * 1991 * Pops the top element node from the node stack 1992 * 1993 * Returns the node just removed 1994 */ 1995xmlNodePtr 1996nodePop(xmlParserCtxtPtr ctxt) 1997{ 1998 xmlNodePtr ret; 1999 2000 if (ctxt == NULL) return(NULL); 2001 if (ctxt->nodeNr <= 0) 2002 return (NULL); 2003 ctxt->nodeNr--; 2004 if (ctxt->nodeNr > 0) 2005 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1]; 2006 else 2007 ctxt->node = NULL; 2008 ret = ctxt->nodeTab[ctxt->nodeNr]; 2009 ctxt->nodeTab[ctxt->nodeNr] = NULL; 2010 return (ret); 2011} 2012 2013/** 2014 * nameNsPush: 2015 * @ctxt: an XML parser context 2016 * @value: the element name 2017 * @prefix: the element prefix 2018 * @URI: the element namespace name 2019 * @line: the current line number for error messages 2020 * @nsNr: the number of namespaces pushed on the namespace table 2021 * 2022 * Pushes a new element name/prefix/URL on top of the name stack 2023 * 2024 * Returns -1 in case of error, the index in the stack otherwise 2025 */ 2026static int 2027nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value, 2028 const xmlChar *prefix, const xmlChar *URI, int line, int nsNr) 2029{ 2030 xmlStartTag *tag; 2031 2032 if (ctxt->nameNr >= ctxt->nameMax) { 2033 const xmlChar * *tmp; 2034 xmlStartTag *tmp2; 2035 ctxt->nameMax *= 2; 2036 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 2037 ctxt->nameMax * 2038 sizeof(ctxt->nameTab[0])); 2039 if (tmp == NULL) { 2040 ctxt->nameMax /= 2; 2041 goto mem_error; 2042 } 2043 ctxt->nameTab = tmp; 2044 tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab, 2045 ctxt->nameMax * 2046 sizeof(ctxt->pushTab[0])); 2047 if (tmp2 == NULL) { 2048 ctxt->nameMax /= 2; 2049 goto mem_error; 2050 } 2051 ctxt->pushTab = tmp2; 2052 } else if (ctxt->pushTab == NULL) { 2053 ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax * 2054 sizeof(ctxt->pushTab[0])); 2055 if (ctxt->pushTab == NULL) 2056 goto mem_error; 2057 } 2058 ctxt->nameTab[ctxt->nameNr] = value; 2059 ctxt->name = value; 2060 tag = &ctxt->pushTab[ctxt->nameNr]; 2061 tag->prefix = prefix; 2062 tag->URI = URI; 2063 tag->line = line; 2064 tag->nsNr = nsNr; 2065 return (ctxt->nameNr++); 2066mem_error: 2067 xmlErrMemory(ctxt, NULL); 2068 return (-1); 2069} 2070#ifdef LIBXML_PUSH_ENABLED 2071/** 2072 * nameNsPop: 2073 * @ctxt: an XML parser context 2074 * 2075 * Pops the top element/prefix/URI name from the name stack 2076 * 2077 * Returns the name just removed 2078 */ 2079static const xmlChar * 2080nameNsPop(xmlParserCtxtPtr ctxt) 2081{ 2082 const xmlChar *ret; 2083 2084 if (ctxt->nameNr <= 0) 2085 return (NULL); 2086 ctxt->nameNr--; 2087 if (ctxt->nameNr > 0) 2088 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 2089 else 2090 ctxt->name = NULL; 2091 ret = ctxt->nameTab[ctxt->nameNr]; 2092 ctxt->nameTab[ctxt->nameNr] = NULL; 2093 return (ret); 2094} 2095#endif /* LIBXML_PUSH_ENABLED */ 2096 2097/** 2098 * namePush: 2099 * @ctxt: an XML parser context 2100 * @value: the element name 2101 * 2102 * DEPRECATED: Internal function, do not use. 2103 * 2104 * Pushes a new element name on top of the name stack 2105 * 2106 * Returns -1 in case of error, the index in the stack otherwise 2107 */ 2108int 2109namePush(xmlParserCtxtPtr ctxt, const xmlChar * value) 2110{ 2111 if (ctxt == NULL) return (-1); 2112 2113 if (ctxt->nameNr >= ctxt->nameMax) { 2114 const xmlChar * *tmp; 2115 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 2116 ctxt->nameMax * 2 * 2117 sizeof(ctxt->nameTab[0])); 2118 if (tmp == NULL) { 2119 goto mem_error; 2120 } 2121 ctxt->nameTab = tmp; 2122 ctxt->nameMax *= 2; 2123 } 2124 ctxt->nameTab[ctxt->nameNr] = value; 2125 ctxt->name = value; 2126 return (ctxt->nameNr++); 2127mem_error: 2128 xmlErrMemory(ctxt, NULL); 2129 return (-1); 2130} 2131 2132/** 2133 * namePop: 2134 * @ctxt: an XML parser context 2135 * 2136 * DEPRECATED: Internal function, do not use. 2137 * 2138 * Pops the top element name from the name stack 2139 * 2140 * Returns the name just removed 2141 */ 2142const xmlChar * 2143namePop(xmlParserCtxtPtr ctxt) 2144{ 2145 const xmlChar *ret; 2146 2147 if ((ctxt == NULL) || (ctxt->nameNr <= 0)) 2148 return (NULL); 2149 ctxt->nameNr--; 2150 if (ctxt->nameNr > 0) 2151 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 2152 else 2153 ctxt->name = NULL; 2154 ret = ctxt->nameTab[ctxt->nameNr]; 2155 ctxt->nameTab[ctxt->nameNr] = NULL; 2156 return (ret); 2157} 2158 2159static int spacePush(xmlParserCtxtPtr ctxt, int val) { 2160 if (ctxt->spaceNr >= ctxt->spaceMax) { 2161 int *tmp; 2162 2163 ctxt->spaceMax *= 2; 2164 tmp = (int *) xmlRealloc(ctxt->spaceTab, 2165 ctxt->spaceMax * sizeof(ctxt->spaceTab[0])); 2166 if (tmp == NULL) { 2167 xmlErrMemory(ctxt, NULL); 2168 ctxt->spaceMax /=2; 2169 return(-1); 2170 } 2171 ctxt->spaceTab = tmp; 2172 } 2173 ctxt->spaceTab[ctxt->spaceNr] = val; 2174 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr]; 2175 return(ctxt->spaceNr++); 2176} 2177 2178static int spacePop(xmlParserCtxtPtr ctxt) { 2179 int ret; 2180 if (ctxt->spaceNr <= 0) return(0); 2181 ctxt->spaceNr--; 2182 if (ctxt->spaceNr > 0) 2183 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1]; 2184 else 2185 ctxt->space = &ctxt->spaceTab[0]; 2186 ret = ctxt->spaceTab[ctxt->spaceNr]; 2187 ctxt->spaceTab[ctxt->spaceNr] = -1; 2188 return(ret); 2189} 2190 2191/* 2192 * Macros for accessing the content. Those should be used only by the parser, 2193 * and not exported. 2194 * 2195 * Dirty macros, i.e. one often need to make assumption on the context to 2196 * use them 2197 * 2198 * CUR_PTR return the current pointer to the xmlChar to be parsed. 2199 * To be used with extreme caution since operations consuming 2200 * characters may move the input buffer to a different location ! 2201 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled 2202 * This should be used internally by the parser 2203 * only to compare to ASCII values otherwise it would break when 2204 * running with UTF-8 encoding. 2205 * RAW same as CUR but in the input buffer, bypass any token 2206 * extraction that may have been done 2207 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only 2208 * to compare on ASCII based substring. 2209 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined 2210 * strings without newlines within the parser. 2211 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII 2212 * defined char within the parser. 2213 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding 2214 * 2215 * NEXT Skip to the next character, this does the proper decoding 2216 * in UTF-8 mode. It also pop-up unfinished entities on the fly. 2217 * NEXTL(l) Skip the current unicode character of l xmlChars long. 2218 * CUR_CHAR(l) returns the current unicode character (int), set l 2219 * to the number of xmlChars used for the encoding [0-5]. 2220 * CUR_SCHAR same but operate on a string instead of the context 2221 * COPY_BUF copy the current unicode char to the target buffer, increment 2222 * the index 2223 * GROW, SHRINK handling of input buffers 2224 */ 2225 2226#define RAW (*ctxt->input->cur) 2227#define CUR (*ctxt->input->cur) 2228#define NXT(val) ctxt->input->cur[(val)] 2229#define CUR_PTR ctxt->input->cur 2230#define BASE_PTR ctxt->input->base 2231 2232#define CMP4( s, c1, c2, c3, c4 ) \ 2233 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \ 2234 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 ) 2235#define CMP5( s, c1, c2, c3, c4, c5 ) \ 2236 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 ) 2237#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \ 2238 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 ) 2239#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \ 2240 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 ) 2241#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \ 2242 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 ) 2243#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \ 2244 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \ 2245 ((unsigned char *) s)[ 8 ] == c9 ) 2246#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \ 2247 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \ 2248 ((unsigned char *) s)[ 9 ] == c10 ) 2249 2250#define SKIP(val) do { \ 2251 ctxt->input->cur += (val),ctxt->input->col+=(val); \ 2252 if (*ctxt->input->cur == 0) \ 2253 xmlParserGrow(ctxt); \ 2254 } while (0) 2255 2256#define SKIPL(val) do { \ 2257 int skipl; \ 2258 for(skipl=0; skipl<val; skipl++) { \ 2259 if (*(ctxt->input->cur) == '\n') { \ 2260 ctxt->input->line++; ctxt->input->col = 1; \ 2261 } else ctxt->input->col++; \ 2262 ctxt->input->cur++; \ 2263 } \ 2264 if (*ctxt->input->cur == 0) \ 2265 xmlParserGrow(ctxt); \ 2266 } while (0) 2267 2268/* Don't shrink push parser buffer. */ 2269#define SHRINK \ 2270 if (((ctxt->progressive == 0) || (ctxt->inputNr > 1)) && \ 2271 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \ 2272 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \ 2273 xmlParserShrink(ctxt); 2274 2275#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) \ 2276 xmlParserGrow(ctxt); 2277 2278#define SKIP_BLANKS xmlSkipBlankChars(ctxt) 2279 2280#define NEXT xmlNextChar(ctxt) 2281 2282#define NEXT1 { \ 2283 ctxt->input->col++; \ 2284 ctxt->input->cur++; \ 2285 if (*ctxt->input->cur == 0) \ 2286 xmlParserGrow(ctxt); \ 2287 } 2288 2289#define NEXTL(l) do { \ 2290 if (*(ctxt->input->cur) == '\n') { \ 2291 ctxt->input->line++; ctxt->input->col = 1; \ 2292 } else ctxt->input->col++; \ 2293 ctxt->input->cur += l; \ 2294 } while (0) 2295 2296#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l) 2297#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) 2298 2299#define COPY_BUF(b, i, v) \ 2300 if (v < 0x80) b[i++] = v; \ 2301 else i += xmlCopyCharMultiByte(&b[i],v) 2302 2303/** 2304 * xmlSkipBlankChars: 2305 * @ctxt: the XML parser context 2306 * 2307 * DEPRECATED: Internal function, do not use. 2308 * 2309 * skip all blanks character found at that point in the input streams. 2310 * It pops up finished entities in the process if allowable at that point. 2311 * 2312 * Returns the number of space chars skipped 2313 */ 2314 2315int 2316xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { 2317 int res = 0; 2318 2319 /* 2320 * It's Okay to use CUR/NEXT here since all the blanks are on 2321 * the ASCII range. 2322 */ 2323 if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) || 2324 (ctxt->instate == XML_PARSER_START)) { 2325 const xmlChar *cur; 2326 /* 2327 * if we are in the document content, go really fast 2328 */ 2329 cur = ctxt->input->cur; 2330 while (IS_BLANK_CH(*cur)) { 2331 if (*cur == '\n') { 2332 ctxt->input->line++; ctxt->input->col = 1; 2333 } else { 2334 ctxt->input->col++; 2335 } 2336 cur++; 2337 if (res < INT_MAX) 2338 res++; 2339 if (*cur == 0) { 2340 ctxt->input->cur = cur; 2341 xmlParserGrow(ctxt); 2342 cur = ctxt->input->cur; 2343 } 2344 } 2345 ctxt->input->cur = cur; 2346 } else { 2347 int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1)); 2348 2349 while (ctxt->instate != XML_PARSER_EOF) { 2350 if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */ 2351 NEXT; 2352 } else if (CUR == '%') { 2353 /* 2354 * Need to handle support of entities branching here 2355 */ 2356 if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0)) 2357 break; 2358 xmlParsePEReference(ctxt); 2359 } else if (CUR == 0) { 2360 unsigned long consumed; 2361 xmlEntityPtr ent; 2362 2363 if (ctxt->inputNr <= 1) 2364 break; 2365 2366 consumed = ctxt->input->consumed; 2367 xmlSaturatedAddSizeT(&consumed, 2368 ctxt->input->cur - ctxt->input->base); 2369 2370 /* 2371 * Add to sizeentities when parsing an external entity 2372 * for the first time. 2373 */ 2374 ent = ctxt->input->entity; 2375 if ((ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 2376 ((ent->flags & XML_ENT_PARSED) == 0)) { 2377 ent->flags |= XML_ENT_PARSED; 2378 2379 xmlSaturatedAdd(&ctxt->sizeentities, consumed); 2380 } 2381 2382 xmlParserEntityCheck(ctxt, consumed); 2383 2384 xmlPopInput(ctxt); 2385 } else { 2386 break; 2387 } 2388 2389 /* 2390 * Also increase the counter when entering or exiting a PERef. 2391 * The spec says: "When a parameter-entity reference is recognized 2392 * in the DTD and included, its replacement text MUST be enlarged 2393 * by the attachment of one leading and one following space (#x20) 2394 * character." 2395 */ 2396 if (res < INT_MAX) 2397 res++; 2398 } 2399 } 2400 return(res); 2401} 2402 2403/************************************************************************ 2404 * * 2405 * Commodity functions to handle entities * 2406 * * 2407 ************************************************************************/ 2408 2409/** 2410 * xmlPopInput: 2411 * @ctxt: an XML parser context 2412 * 2413 * xmlPopInput: the current input pointed by ctxt->input came to an end 2414 * pop it and return the next char. 2415 * 2416 * Returns the current xmlChar in the parser context 2417 */ 2418xmlChar 2419xmlPopInput(xmlParserCtxtPtr ctxt) { 2420 xmlParserInputPtr input; 2421 2422 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0); 2423 if (xmlParserDebugEntities) 2424 xmlGenericError(xmlGenericErrorContext, 2425 "Popping input %d\n", ctxt->inputNr); 2426 if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) && 2427 (ctxt->instate != XML_PARSER_EOF)) 2428 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 2429 "Unfinished entity outside the DTD"); 2430 input = inputPop(ctxt); 2431 if (input->entity != NULL) 2432 input->entity->flags &= ~XML_ENT_EXPANDING; 2433 xmlFreeInputStream(input); 2434 if (*ctxt->input->cur == 0) 2435 xmlParserGrow(ctxt); 2436 return(CUR); 2437} 2438 2439/** 2440 * xmlPushInput: 2441 * @ctxt: an XML parser context 2442 * @input: an XML parser input fragment (entity, XML fragment ...). 2443 * 2444 * xmlPushInput: switch to a new input stream which is stacked on top 2445 * of the previous one(s). 2446 * Returns -1 in case of error or the index in the input stack 2447 */ 2448int 2449xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { 2450 int ret; 2451 if (input == NULL) return(-1); 2452 2453 if (xmlParserDebugEntities) { 2454 if ((ctxt->input != NULL) && (ctxt->input->filename)) 2455 xmlGenericError(xmlGenericErrorContext, 2456 "%s(%d): ", ctxt->input->filename, 2457 ctxt->input->line); 2458 xmlGenericError(xmlGenericErrorContext, 2459 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur); 2460 } 2461 if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) || 2462 (ctxt->inputNr > 100)) { 2463 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 2464 while (ctxt->inputNr > 1) 2465 xmlFreeInputStream(inputPop(ctxt)); 2466 return(-1); 2467 } 2468 ret = inputPush(ctxt, input); 2469 if (ctxt->instate == XML_PARSER_EOF) 2470 return(-1); 2471 GROW; 2472 return(ret); 2473} 2474 2475/** 2476 * xmlParseCharRef: 2477 * @ctxt: an XML parser context 2478 * 2479 * DEPRECATED: Internal function, don't use. 2480 * 2481 * Parse a numeric character reference. Always consumes '&'. 2482 * 2483 * [66] CharRef ::= '&#' [0-9]+ ';' | 2484 * '&#x' [0-9a-fA-F]+ ';' 2485 * 2486 * [ WFC: Legal Character ] 2487 * Characters referred to using character references must match the 2488 * production for Char. 2489 * 2490 * Returns the value parsed (as an int), 0 in case of error 2491 */ 2492int 2493xmlParseCharRef(xmlParserCtxtPtr ctxt) { 2494 int val = 0; 2495 int count = 0; 2496 2497 /* 2498 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here 2499 */ 2500 if ((RAW == '&') && (NXT(1) == '#') && 2501 (NXT(2) == 'x')) { 2502 SKIP(3); 2503 GROW; 2504 while (RAW != ';') { /* loop blocked by count */ 2505 if (count++ > 20) { 2506 count = 0; 2507 GROW; 2508 if (ctxt->instate == XML_PARSER_EOF) 2509 return(0); 2510 } 2511 if ((RAW >= '0') && (RAW <= '9')) 2512 val = val * 16 + (CUR - '0'); 2513 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20)) 2514 val = val * 16 + (CUR - 'a') + 10; 2515 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20)) 2516 val = val * 16 + (CUR - 'A') + 10; 2517 else { 2518 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 2519 val = 0; 2520 break; 2521 } 2522 if (val > 0x110000) 2523 val = 0x110000; 2524 2525 NEXT; 2526 count++; 2527 } 2528 if (RAW == ';') { 2529 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 2530 ctxt->input->col++; 2531 ctxt->input->cur++; 2532 } 2533 } else if ((RAW == '&') && (NXT(1) == '#')) { 2534 SKIP(2); 2535 GROW; 2536 while (RAW != ';') { /* loop blocked by count */ 2537 if (count++ > 20) { 2538 count = 0; 2539 GROW; 2540 if (ctxt->instate == XML_PARSER_EOF) 2541 return(0); 2542 } 2543 if ((RAW >= '0') && (RAW <= '9')) 2544 val = val * 10 + (CUR - '0'); 2545 else { 2546 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 2547 val = 0; 2548 break; 2549 } 2550 if (val > 0x110000) 2551 val = 0x110000; 2552 2553 NEXT; 2554 count++; 2555 } 2556 if (RAW == ';') { 2557 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 2558 ctxt->input->col++; 2559 ctxt->input->cur++; 2560 } 2561 } else { 2562 if (RAW == '&') 2563 SKIP(1); 2564 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 2565 } 2566 2567 /* 2568 * [ WFC: Legal Character ] 2569 * Characters referred to using character references must match the 2570 * production for Char. 2571 */ 2572 if (val >= 0x110000) { 2573 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2574 "xmlParseCharRef: character reference out of bounds\n", 2575 val); 2576 } else if (IS_CHAR(val)) { 2577 return(val); 2578 } else { 2579 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2580 "xmlParseCharRef: invalid xmlChar value %d\n", 2581 val); 2582 } 2583 return(0); 2584} 2585 2586/** 2587 * xmlParseStringCharRef: 2588 * @ctxt: an XML parser context 2589 * @str: a pointer to an index in the string 2590 * 2591 * parse Reference declarations, variant parsing from a string rather 2592 * than an an input flow. 2593 * 2594 * [66] CharRef ::= '&#' [0-9]+ ';' | 2595 * '&#x' [0-9a-fA-F]+ ';' 2596 * 2597 * [ WFC: Legal Character ] 2598 * Characters referred to using character references must match the 2599 * production for Char. 2600 * 2601 * Returns the value parsed (as an int), 0 in case of error, str will be 2602 * updated to the current value of the index 2603 */ 2604static int 2605xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { 2606 const xmlChar *ptr; 2607 xmlChar cur; 2608 int val = 0; 2609 2610 if ((str == NULL) || (*str == NULL)) return(0); 2611 ptr = *str; 2612 cur = *ptr; 2613 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) { 2614 ptr += 3; 2615 cur = *ptr; 2616 while (cur != ';') { /* Non input consuming loop */ 2617 if ((cur >= '0') && (cur <= '9')) 2618 val = val * 16 + (cur - '0'); 2619 else if ((cur >= 'a') && (cur <= 'f')) 2620 val = val * 16 + (cur - 'a') + 10; 2621 else if ((cur >= 'A') && (cur <= 'F')) 2622 val = val * 16 + (cur - 'A') + 10; 2623 else { 2624 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 2625 val = 0; 2626 break; 2627 } 2628 if (val > 0x110000) 2629 val = 0x110000; 2630 2631 ptr++; 2632 cur = *ptr; 2633 } 2634 if (cur == ';') 2635 ptr++; 2636 } else if ((cur == '&') && (ptr[1] == '#')){ 2637 ptr += 2; 2638 cur = *ptr; 2639 while (cur != ';') { /* Non input consuming loops */ 2640 if ((cur >= '0') && (cur <= '9')) 2641 val = val * 10 + (cur - '0'); 2642 else { 2643 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 2644 val = 0; 2645 break; 2646 } 2647 if (val > 0x110000) 2648 val = 0x110000; 2649 2650 ptr++; 2651 cur = *ptr; 2652 } 2653 if (cur == ';') 2654 ptr++; 2655 } else { 2656 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 2657 return(0); 2658 } 2659 *str = ptr; 2660 2661 /* 2662 * [ WFC: Legal Character ] 2663 * Characters referred to using character references must match the 2664 * production for Char. 2665 */ 2666 if (val >= 0x110000) { 2667 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2668 "xmlParseStringCharRef: character reference out of bounds\n", 2669 val); 2670 } else if (IS_CHAR(val)) { 2671 return(val); 2672 } else { 2673 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2674 "xmlParseStringCharRef: invalid xmlChar value %d\n", 2675 val); 2676 } 2677 return(0); 2678} 2679 2680/** 2681 * xmlParserHandlePEReference: 2682 * @ctxt: the parser context 2683 * 2684 * DEPRECATED: Internal function, do not use. 2685 * 2686 * [69] PEReference ::= '%' Name ';' 2687 * 2688 * [ WFC: No Recursion ] 2689 * A parsed entity must not contain a recursive 2690 * reference to itself, either directly or indirectly. 2691 * 2692 * [ WFC: Entity Declared ] 2693 * In a document without any DTD, a document with only an internal DTD 2694 * subset which contains no parameter entity references, or a document 2695 * with "standalone='yes'", ... ... The declaration of a parameter 2696 * entity must precede any reference to it... 2697 * 2698 * [ VC: Entity Declared ] 2699 * In a document with an external subset or external parameter entities 2700 * with "standalone='no'", ... ... The declaration of a parameter entity 2701 * must precede any reference to it... 2702 * 2703 * [ WFC: In DTD ] 2704 * Parameter-entity references may only appear in the DTD. 2705 * NOTE: misleading but this is handled. 2706 * 2707 * A PEReference may have been detected in the current input stream 2708 * the handling is done accordingly to 2709 * http://www.w3.org/TR/REC-xml#entproc 2710 * i.e. 2711 * - Included in literal in entity values 2712 * - Included as Parameter Entity reference within DTDs 2713 */ 2714void 2715xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { 2716 switch(ctxt->instate) { 2717 case XML_PARSER_CDATA_SECTION: 2718 return; 2719 case XML_PARSER_COMMENT: 2720 return; 2721 case XML_PARSER_START_TAG: 2722 return; 2723 case XML_PARSER_END_TAG: 2724 return; 2725 case XML_PARSER_EOF: 2726 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL); 2727 return; 2728 case XML_PARSER_PROLOG: 2729 case XML_PARSER_START: 2730 case XML_PARSER_XML_DECL: 2731 case XML_PARSER_MISC: 2732 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL); 2733 return; 2734 case XML_PARSER_ENTITY_DECL: 2735 case XML_PARSER_CONTENT: 2736 case XML_PARSER_ATTRIBUTE_VALUE: 2737 case XML_PARSER_PI: 2738 case XML_PARSER_SYSTEM_LITERAL: 2739 case XML_PARSER_PUBLIC_LITERAL: 2740 /* we just ignore it there */ 2741 return; 2742 case XML_PARSER_EPILOG: 2743 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL); 2744 return; 2745 case XML_PARSER_ENTITY_VALUE: 2746 /* 2747 * NOTE: in the case of entity values, we don't do the 2748 * substitution here since we need the literal 2749 * entity value to be able to save the internal 2750 * subset of the document. 2751 * This will be handled by xmlStringDecodeEntities 2752 */ 2753 return; 2754 case XML_PARSER_DTD: 2755 /* 2756 * [WFC: Well-Formedness Constraint: PEs in Internal Subset] 2757 * In the internal DTD subset, parameter-entity references 2758 * can occur only where markup declarations can occur, not 2759 * within markup declarations. 2760 * In that case this is handled in xmlParseMarkupDecl 2761 */ 2762 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 2763 return; 2764 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0) 2765 return; 2766 break; 2767 case XML_PARSER_IGNORE: 2768 return; 2769 } 2770 2771 xmlParsePEReference(ctxt); 2772} 2773 2774/* 2775 * Macro used to grow the current buffer. 2776 * buffer##_size is expected to be a size_t 2777 * mem_error: is expected to handle memory allocation failures 2778 */ 2779#define growBuffer(buffer, n) { \ 2780 xmlChar *tmp; \ 2781 size_t new_size = buffer##_size * 2 + n; \ 2782 if (new_size < buffer##_size) goto mem_error; \ 2783 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \ 2784 if (tmp == NULL) goto mem_error; \ 2785 buffer = tmp; \ 2786 buffer##_size = new_size; \ 2787} 2788 2789/** 2790 * xmlStringDecodeEntitiesInt: 2791 * @ctxt: the parser context 2792 * @str: the input string 2793 * @len: the string length 2794 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 2795 * @end: an end marker xmlChar, 0 if none 2796 * @end2: an end marker xmlChar, 0 if none 2797 * @end3: an end marker xmlChar, 0 if none 2798 * @check: whether to perform entity checks 2799 */ 2800static xmlChar * 2801xmlStringDecodeEntitiesInt(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 2802 int what, xmlChar end, xmlChar end2, xmlChar end3, 2803 int check) { 2804 xmlChar *buffer = NULL; 2805 size_t buffer_size = 0; 2806 size_t nbchars = 0; 2807 2808 xmlChar *current = NULL; 2809 xmlChar *rep = NULL; 2810 const xmlChar *last; 2811 xmlEntityPtr ent; 2812 int c,l; 2813 2814 if (str == NULL) 2815 return(NULL); 2816 last = str + len; 2817 2818 if (((ctxt->depth > 40) && 2819 ((ctxt->options & XML_PARSE_HUGE) == 0)) || 2820 (ctxt->depth > 100)) { 2821 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP, 2822 "Maximum entity nesting depth exceeded"); 2823 return(NULL); 2824 } 2825 2826 /* 2827 * allocate a translation buffer. 2828 */ 2829 buffer_size = XML_PARSER_BIG_BUFFER_SIZE; 2830 buffer = (xmlChar *) xmlMallocAtomic(buffer_size); 2831 if (buffer == NULL) goto mem_error; 2832 2833 /* 2834 * OK loop until we reach one of the ending char or a size limit. 2835 * we are operating on already parsed values. 2836 */ 2837 if (str < last) 2838 c = CUR_SCHAR(str, l); 2839 else 2840 c = 0; 2841 while ((c != 0) && (c != end) && /* non input consuming loop */ 2842 (c != end2) && (c != end3) && 2843 (ctxt->instate != XML_PARSER_EOF)) { 2844 2845 if (c == 0) break; 2846 if ((c == '&') && (str[1] == '#')) { 2847 int val = xmlParseStringCharRef(ctxt, &str); 2848 if (val == 0) 2849 goto int_error; 2850 COPY_BUF(buffer, nbchars, val); 2851 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2852 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2853 } 2854 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { 2855 if (xmlParserDebugEntities) 2856 xmlGenericError(xmlGenericErrorContext, 2857 "String decoding Entity Reference: %.30s\n", 2858 str); 2859 ent = xmlParseStringEntityRef(ctxt, &str); 2860 if ((ent != NULL) && 2861 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 2862 if (ent->content != NULL) { 2863 COPY_BUF(buffer, nbchars, ent->content[0]); 2864 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2865 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2866 } 2867 } else { 2868 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 2869 "predefined entity has no content\n"); 2870 goto int_error; 2871 } 2872 } else if ((ent != NULL) && (ent->content != NULL)) { 2873 if ((check) && (xmlParserEntityCheck(ctxt, ent->length))) 2874 goto int_error; 2875 2876 if (ent->flags & XML_ENT_EXPANDING) { 2877 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 2878 xmlHaltParser(ctxt); 2879 ent->content[0] = 0; 2880 goto int_error; 2881 } 2882 2883 ent->flags |= XML_ENT_EXPANDING; 2884 ctxt->depth++; 2885 rep = xmlStringDecodeEntitiesInt(ctxt, ent->content, 2886 ent->length, what, 0, 0, 0, check); 2887 ctxt->depth--; 2888 ent->flags &= ~XML_ENT_EXPANDING; 2889 2890 if (rep == NULL) { 2891 ent->content[0] = 0; 2892 goto int_error; 2893 } 2894 2895 current = rep; 2896 while (*current != 0) { /* non input consuming loop */ 2897 buffer[nbchars++] = *current++; 2898 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2899 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2900 } 2901 } 2902 xmlFree(rep); 2903 rep = NULL; 2904 } else if (ent != NULL) { 2905 int i = xmlStrlen(ent->name); 2906 const xmlChar *cur = ent->name; 2907 2908 buffer[nbchars++] = '&'; 2909 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) { 2910 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE); 2911 } 2912 for (;i > 0;i--) 2913 buffer[nbchars++] = *cur++; 2914 buffer[nbchars++] = ';'; 2915 } 2916 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) { 2917 if (xmlParserDebugEntities) 2918 xmlGenericError(xmlGenericErrorContext, 2919 "String decoding PE Reference: %.30s\n", str); 2920 ent = xmlParseStringPEReference(ctxt, &str); 2921 if (ent != NULL) { 2922 if (ent->content == NULL) { 2923 /* 2924 * Note: external parsed entities will not be loaded, 2925 * it is not required for a non-validating parser to 2926 * complete external PEReferences coming from the 2927 * internal subset 2928 */ 2929 if (((ctxt->options & XML_PARSE_NOENT) != 0) || 2930 ((ctxt->options & XML_PARSE_DTDVALID) != 0) || 2931 (ctxt->validate != 0)) { 2932 xmlLoadEntityContent(ctxt, ent); 2933 } else { 2934 xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING, 2935 "not validating will not read content for PE entity %s\n", 2936 ent->name, NULL); 2937 } 2938 } 2939 2940 if ((check) && (xmlParserEntityCheck(ctxt, ent->length))) 2941 goto int_error; 2942 2943 if (ent->flags & XML_ENT_EXPANDING) { 2944 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 2945 xmlHaltParser(ctxt); 2946 if (ent->content != NULL) 2947 ent->content[0] = 0; 2948 goto int_error; 2949 } 2950 2951 ent->flags |= XML_ENT_EXPANDING; 2952 ctxt->depth++; 2953 rep = xmlStringDecodeEntitiesInt(ctxt, ent->content, 2954 ent->length, what, 0, 0, 0, check); 2955 ctxt->depth--; 2956 ent->flags &= ~XML_ENT_EXPANDING; 2957 2958 if (rep == NULL) { 2959 if (ent->content != NULL) 2960 ent->content[0] = 0; 2961 goto int_error; 2962 } 2963 current = rep; 2964 while (*current != 0) { /* non input consuming loop */ 2965 buffer[nbchars++] = *current++; 2966 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2967 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2968 } 2969 } 2970 xmlFree(rep); 2971 rep = NULL; 2972 } 2973 } else { 2974 COPY_BUF(buffer, nbchars, c); 2975 str += l; 2976 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2977 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2978 } 2979 } 2980 if (str < last) 2981 c = CUR_SCHAR(str, l); 2982 else 2983 c = 0; 2984 } 2985 buffer[nbchars] = 0; 2986 return(buffer); 2987 2988mem_error: 2989 xmlErrMemory(ctxt, NULL); 2990int_error: 2991 if (rep != NULL) 2992 xmlFree(rep); 2993 if (buffer != NULL) 2994 xmlFree(buffer); 2995 return(NULL); 2996} 2997 2998/** 2999 * xmlStringLenDecodeEntities: 3000 * @ctxt: the parser context 3001 * @str: the input string 3002 * @len: the string length 3003 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 3004 * @end: an end marker xmlChar, 0 if none 3005 * @end2: an end marker xmlChar, 0 if none 3006 * @end3: an end marker xmlChar, 0 if none 3007 * 3008 * DEPRECATED: Internal function, don't use. 3009 * 3010 * Takes a entity string content and process to do the adequate substitutions. 3011 * 3012 * [67] Reference ::= EntityRef | CharRef 3013 * 3014 * [69] PEReference ::= '%' Name ';' 3015 * 3016 * Returns A newly allocated string with the substitution done. The caller 3017 * must deallocate it ! 3018 */ 3019xmlChar * 3020xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 3021 int what, xmlChar end, xmlChar end2, 3022 xmlChar end3) { 3023 if ((ctxt == NULL) || (str == NULL) || (len < 0)) 3024 return(NULL); 3025 return(xmlStringDecodeEntitiesInt(ctxt, str, len, what, 3026 end, end2, end3, 0)); 3027} 3028 3029/** 3030 * xmlStringDecodeEntities: 3031 * @ctxt: the parser context 3032 * @str: the input string 3033 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 3034 * @end: an end marker xmlChar, 0 if none 3035 * @end2: an end marker xmlChar, 0 if none 3036 * @end3: an end marker xmlChar, 0 if none 3037 * 3038 * DEPRECATED: Internal function, don't use. 3039 * 3040 * Takes a entity string content and process to do the adequate substitutions. 3041 * 3042 * [67] Reference ::= EntityRef | CharRef 3043 * 3044 * [69] PEReference ::= '%' Name ';' 3045 * 3046 * Returns A newly allocated string with the substitution done. The caller 3047 * must deallocate it ! 3048 */ 3049xmlChar * 3050xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, 3051 xmlChar end, xmlChar end2, xmlChar end3) { 3052 if ((ctxt == NULL) || (str == NULL)) return(NULL); 3053 return(xmlStringDecodeEntitiesInt(ctxt, str, xmlStrlen(str), what, 3054 end, end2, end3, 0)); 3055} 3056 3057/************************************************************************ 3058 * * 3059 * Commodity functions, cleanup needed ? * 3060 * * 3061 ************************************************************************/ 3062 3063/** 3064 * areBlanks: 3065 * @ctxt: an XML parser context 3066 * @str: a xmlChar * 3067 * @len: the size of @str 3068 * @blank_chars: we know the chars are blanks 3069 * 3070 * Is this a sequence of blank chars that one can ignore ? 3071 * 3072 * Returns 1 if ignorable 0 otherwise. 3073 */ 3074 3075static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 3076 int blank_chars) { 3077 int i, ret; 3078 xmlNodePtr lastChild; 3079 3080 /* 3081 * Don't spend time trying to differentiate them, the same callback is 3082 * used ! 3083 */ 3084 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters) 3085 return(0); 3086 3087 /* 3088 * Check for xml:space value. 3089 */ 3090 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) || 3091 (*(ctxt->space) == -2)) 3092 return(0); 3093 3094 /* 3095 * Check that the string is made of blanks 3096 */ 3097 if (blank_chars == 0) { 3098 for (i = 0;i < len;i++) 3099 if (!(IS_BLANK_CH(str[i]))) return(0); 3100 } 3101 3102 /* 3103 * Look if the element is mixed content in the DTD if available 3104 */ 3105 if (ctxt->node == NULL) return(0); 3106 if (ctxt->myDoc != NULL) { 3107 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name); 3108 if (ret == 0) return(1); 3109 if (ret == 1) return(0); 3110 } 3111 3112 /* 3113 * Otherwise, heuristic :-\ 3114 */ 3115 if ((RAW != '<') && (RAW != 0xD)) return(0); 3116 if ((ctxt->node->children == NULL) && 3117 (RAW == '<') && (NXT(1) == '/')) return(0); 3118 3119 lastChild = xmlGetLastChild(ctxt->node); 3120 if (lastChild == NULL) { 3121 if ((ctxt->node->type != XML_ELEMENT_NODE) && 3122 (ctxt->node->content != NULL)) return(0); 3123 } else if (xmlNodeIsText(lastChild)) 3124 return(0); 3125 else if ((ctxt->node->children != NULL) && 3126 (xmlNodeIsText(ctxt->node->children))) 3127 return(0); 3128 return(1); 3129} 3130 3131/************************************************************************ 3132 * * 3133 * Extra stuff for namespace support * 3134 * Relates to http://www.w3.org/TR/WD-xml-names * 3135 * * 3136 ************************************************************************/ 3137 3138/** 3139 * xmlSplitQName: 3140 * @ctxt: an XML parser context 3141 * @name: an XML parser context 3142 * @prefix: a xmlChar ** 3143 * 3144 * parse an UTF8 encoded XML qualified name string 3145 * 3146 * [NS 5] QName ::= (Prefix ':')? LocalPart 3147 * 3148 * [NS 6] Prefix ::= NCName 3149 * 3150 * [NS 7] LocalPart ::= NCName 3151 * 3152 * Returns the local part, and prefix is updated 3153 * to get the Prefix if any. 3154 */ 3155 3156xmlChar * 3157xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { 3158 xmlChar buf[XML_MAX_NAMELEN + 5]; 3159 xmlChar *buffer = NULL; 3160 int len = 0; 3161 int max = XML_MAX_NAMELEN; 3162 xmlChar *ret = NULL; 3163 const xmlChar *cur = name; 3164 int c; 3165 3166 if (prefix == NULL) return(NULL); 3167 *prefix = NULL; 3168 3169 if (cur == NULL) return(NULL); 3170 3171#ifndef XML_XML_NAMESPACE 3172 /* xml: prefix is not really a namespace */ 3173 if ((cur[0] == 'x') && (cur[1] == 'm') && 3174 (cur[2] == 'l') && (cur[3] == ':')) 3175 return(xmlStrdup(name)); 3176#endif 3177 3178 /* nasty but well=formed */ 3179 if (cur[0] == ':') 3180 return(xmlStrdup(name)); 3181 3182 c = *cur++; 3183 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */ 3184 buf[len++] = c; 3185 c = *cur++; 3186 } 3187 if (len >= max) { 3188 /* 3189 * Okay someone managed to make a huge name, so he's ready to pay 3190 * for the processing speed. 3191 */ 3192 max = len * 2; 3193 3194 buffer = (xmlChar *) xmlMallocAtomic(max); 3195 if (buffer == NULL) { 3196 xmlErrMemory(ctxt, NULL); 3197 return(NULL); 3198 } 3199 memcpy(buffer, buf, len); 3200 while ((c != 0) && (c != ':')) { /* tested bigname.xml */ 3201 if (len + 10 > max) { 3202 xmlChar *tmp; 3203 3204 max *= 2; 3205 tmp = (xmlChar *) xmlRealloc(buffer, max); 3206 if (tmp == NULL) { 3207 xmlFree(buffer); 3208 xmlErrMemory(ctxt, NULL); 3209 return(NULL); 3210 } 3211 buffer = tmp; 3212 } 3213 buffer[len++] = c; 3214 c = *cur++; 3215 } 3216 buffer[len] = 0; 3217 } 3218 3219 if ((c == ':') && (*cur == 0)) { 3220 if (buffer != NULL) 3221 xmlFree(buffer); 3222 *prefix = NULL; 3223 return(xmlStrdup(name)); 3224 } 3225 3226 if (buffer == NULL) 3227 ret = xmlStrndup(buf, len); 3228 else { 3229 ret = buffer; 3230 buffer = NULL; 3231 max = XML_MAX_NAMELEN; 3232 } 3233 3234 3235 if (c == ':') { 3236 c = *cur; 3237 *prefix = ret; 3238 if (c == 0) { 3239 return(xmlStrndup(BAD_CAST "", 0)); 3240 } 3241 len = 0; 3242 3243 /* 3244 * Check that the first character is proper to start 3245 * a new name 3246 */ 3247 if (!(((c >= 0x61) && (c <= 0x7A)) || 3248 ((c >= 0x41) && (c <= 0x5A)) || 3249 (c == '_') || (c == ':'))) { 3250 int l; 3251 int first = CUR_SCHAR(cur, l); 3252 3253 if (!IS_LETTER(first) && (first != '_')) { 3254 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME, 3255 "Name %s is not XML Namespace compliant\n", 3256 name); 3257 } 3258 } 3259 cur++; 3260 3261 while ((c != 0) && (len < max)) { /* tested bigname2.xml */ 3262 buf[len++] = c; 3263 c = *cur++; 3264 } 3265 if (len >= max) { 3266 /* 3267 * Okay someone managed to make a huge name, so he's ready to pay 3268 * for the processing speed. 3269 */ 3270 max = len * 2; 3271 3272 buffer = (xmlChar *) xmlMallocAtomic(max); 3273 if (buffer == NULL) { 3274 xmlErrMemory(ctxt, NULL); 3275 return(NULL); 3276 } 3277 memcpy(buffer, buf, len); 3278 while (c != 0) { /* tested bigname2.xml */ 3279 if (len + 10 > max) { 3280 xmlChar *tmp; 3281 3282 max *= 2; 3283 tmp = (xmlChar *) xmlRealloc(buffer, max); 3284 if (tmp == NULL) { 3285 xmlErrMemory(ctxt, NULL); 3286 xmlFree(buffer); 3287 return(NULL); 3288 } 3289 buffer = tmp; 3290 } 3291 buffer[len++] = c; 3292 c = *cur++; 3293 } 3294 buffer[len] = 0; 3295 } 3296 3297 if (buffer == NULL) 3298 ret = xmlStrndup(buf, len); 3299 else { 3300 ret = buffer; 3301 } 3302 } 3303 3304 return(ret); 3305} 3306 3307/************************************************************************ 3308 * * 3309 * The parser itself * 3310 * Relates to http://www.w3.org/TR/REC-xml * 3311 * * 3312 ************************************************************************/ 3313 3314/************************************************************************ 3315 * * 3316 * Routines to parse Name, NCName and NmToken * 3317 * * 3318 ************************************************************************/ 3319 3320/* 3321 * The two following functions are related to the change of accepted 3322 * characters for Name and NmToken in the Revision 5 of XML-1.0 3323 * They correspond to the modified production [4] and the new production [4a] 3324 * changes in that revision. Also note that the macros used for the 3325 * productions Letter, Digit, CombiningChar and Extender are not needed 3326 * anymore. 3327 * We still keep compatibility to pre-revision5 parsing semantic if the 3328 * new XML_PARSE_OLD10 option is given to the parser. 3329 */ 3330static int 3331xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) { 3332 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3333 /* 3334 * Use the new checks of production [4] [4a] amd [5] of the 3335 * Update 5 of XML-1.0 3336 */ 3337 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3338 (((c >= 'a') && (c <= 'z')) || 3339 ((c >= 'A') && (c <= 'Z')) || 3340 (c == '_') || (c == ':') || 3341 ((c >= 0xC0) && (c <= 0xD6)) || 3342 ((c >= 0xD8) && (c <= 0xF6)) || 3343 ((c >= 0xF8) && (c <= 0x2FF)) || 3344 ((c >= 0x370) && (c <= 0x37D)) || 3345 ((c >= 0x37F) && (c <= 0x1FFF)) || 3346 ((c >= 0x200C) && (c <= 0x200D)) || 3347 ((c >= 0x2070) && (c <= 0x218F)) || 3348 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3349 ((c >= 0x3001) && (c <= 0xD7FF)) || 3350 ((c >= 0xF900) && (c <= 0xFDCF)) || 3351 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3352 ((c >= 0x10000) && (c <= 0xEFFFF)))) 3353 return(1); 3354 } else { 3355 if (IS_LETTER(c) || (c == '_') || (c == ':')) 3356 return(1); 3357 } 3358 return(0); 3359} 3360 3361static int 3362xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) { 3363 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3364 /* 3365 * Use the new checks of production [4] [4a] amd [5] of the 3366 * Update 5 of XML-1.0 3367 */ 3368 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3369 (((c >= 'a') && (c <= 'z')) || 3370 ((c >= 'A') && (c <= 'Z')) || 3371 ((c >= '0') && (c <= '9')) || /* !start */ 3372 (c == '_') || (c == ':') || 3373 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ 3374 ((c >= 0xC0) && (c <= 0xD6)) || 3375 ((c >= 0xD8) && (c <= 0xF6)) || 3376 ((c >= 0xF8) && (c <= 0x2FF)) || 3377 ((c >= 0x300) && (c <= 0x36F)) || /* !start */ 3378 ((c >= 0x370) && (c <= 0x37D)) || 3379 ((c >= 0x37F) && (c <= 0x1FFF)) || 3380 ((c >= 0x200C) && (c <= 0x200D)) || 3381 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ 3382 ((c >= 0x2070) && (c <= 0x218F)) || 3383 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3384 ((c >= 0x3001) && (c <= 0xD7FF)) || 3385 ((c >= 0xF900) && (c <= 0xFDCF)) || 3386 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3387 ((c >= 0x10000) && (c <= 0xEFFFF)))) 3388 return(1); 3389 } else { 3390 if ((IS_LETTER(c)) || (IS_DIGIT(c)) || 3391 (c == '.') || (c == '-') || 3392 (c == '_') || (c == ':') || 3393 (IS_COMBINING(c)) || 3394 (IS_EXTENDER(c))) 3395 return(1); 3396 } 3397 return(0); 3398} 3399 3400static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, 3401 int *len, int *alloc, int normalize); 3402 3403static const xmlChar * 3404xmlParseNameComplex(xmlParserCtxtPtr ctxt) { 3405 int len = 0, l; 3406 int c; 3407 int maxLength = (ctxt->options & XML_PARSE_HUGE) ? 3408 XML_MAX_TEXT_LENGTH : 3409 XML_MAX_NAME_LENGTH; 3410 3411 /* 3412 * Handler for more complex cases 3413 */ 3414 c = CUR_CHAR(l); 3415 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3416 /* 3417 * Use the new checks of production [4] [4a] amd [5] of the 3418 * Update 5 of XML-1.0 3419 */ 3420 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3421 (!(((c >= 'a') && (c <= 'z')) || 3422 ((c >= 'A') && (c <= 'Z')) || 3423 (c == '_') || (c == ':') || 3424 ((c >= 0xC0) && (c <= 0xD6)) || 3425 ((c >= 0xD8) && (c <= 0xF6)) || 3426 ((c >= 0xF8) && (c <= 0x2FF)) || 3427 ((c >= 0x370) && (c <= 0x37D)) || 3428 ((c >= 0x37F) && (c <= 0x1FFF)) || 3429 ((c >= 0x200C) && (c <= 0x200D)) || 3430 ((c >= 0x2070) && (c <= 0x218F)) || 3431 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3432 ((c >= 0x3001) && (c <= 0xD7FF)) || 3433 ((c >= 0xF900) && (c <= 0xFDCF)) || 3434 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3435 ((c >= 0x10000) && (c <= 0xEFFFF))))) { 3436 return(NULL); 3437 } 3438 len += l; 3439 NEXTL(l); 3440 c = CUR_CHAR(l); 3441 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3442 (((c >= 'a') && (c <= 'z')) || 3443 ((c >= 'A') && (c <= 'Z')) || 3444 ((c >= '0') && (c <= '9')) || /* !start */ 3445 (c == '_') || (c == ':') || 3446 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ 3447 ((c >= 0xC0) && (c <= 0xD6)) || 3448 ((c >= 0xD8) && (c <= 0xF6)) || 3449 ((c >= 0xF8) && (c <= 0x2FF)) || 3450 ((c >= 0x300) && (c <= 0x36F)) || /* !start */ 3451 ((c >= 0x370) && (c <= 0x37D)) || 3452 ((c >= 0x37F) && (c <= 0x1FFF)) || 3453 ((c >= 0x200C) && (c <= 0x200D)) || 3454 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ 3455 ((c >= 0x2070) && (c <= 0x218F)) || 3456 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3457 ((c >= 0x3001) && (c <= 0xD7FF)) || 3458 ((c >= 0xF900) && (c <= 0xFDCF)) || 3459 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3460 ((c >= 0x10000) && (c <= 0xEFFFF)) 3461 )) { 3462 if (len <= INT_MAX - l) 3463 len += l; 3464 NEXTL(l); 3465 c = CUR_CHAR(l); 3466 } 3467 } else { 3468 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3469 (!IS_LETTER(c) && (c != '_') && 3470 (c != ':'))) { 3471 return(NULL); 3472 } 3473 len += l; 3474 NEXTL(l); 3475 c = CUR_CHAR(l); 3476 3477 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 3478 ((IS_LETTER(c)) || (IS_DIGIT(c)) || 3479 (c == '.') || (c == '-') || 3480 (c == '_') || (c == ':') || 3481 (IS_COMBINING(c)) || 3482 (IS_EXTENDER(c)))) { 3483 if (len <= INT_MAX - l) 3484 len += l; 3485 NEXTL(l); 3486 c = CUR_CHAR(l); 3487 } 3488 } 3489 if (ctxt->instate == XML_PARSER_EOF) 3490 return(NULL); 3491 if (len > maxLength) { 3492 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name"); 3493 return(NULL); 3494 } 3495 if (ctxt->input->cur - ctxt->input->base < len) { 3496 /* 3497 * There were a couple of bugs where PERefs lead to to a change 3498 * of the buffer. Check the buffer size to avoid passing an invalid 3499 * pointer to xmlDictLookup. 3500 */ 3501 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 3502 "unexpected change of input buffer"); 3503 return (NULL); 3504 } 3505 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r')) 3506 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len)); 3507 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); 3508} 3509 3510/** 3511 * xmlParseName: 3512 * @ctxt: an XML parser context 3513 * 3514 * DEPRECATED: Internal function, don't use. 3515 * 3516 * parse an XML name. 3517 * 3518 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 3519 * CombiningChar | Extender 3520 * 3521 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 3522 * 3523 * [6] Names ::= Name (#x20 Name)* 3524 * 3525 * Returns the Name parsed or NULL 3526 */ 3527 3528const xmlChar * 3529xmlParseName(xmlParserCtxtPtr ctxt) { 3530 const xmlChar *in; 3531 const xmlChar *ret; 3532 size_t count = 0; 3533 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ? 3534 XML_MAX_TEXT_LENGTH : 3535 XML_MAX_NAME_LENGTH; 3536 3537 GROW; 3538 if (ctxt->instate == XML_PARSER_EOF) 3539 return(NULL); 3540 3541 /* 3542 * Accelerator for simple ASCII names 3543 */ 3544 in = ctxt->input->cur; 3545 if (((*in >= 0x61) && (*in <= 0x7A)) || 3546 ((*in >= 0x41) && (*in <= 0x5A)) || 3547 (*in == '_') || (*in == ':')) { 3548 in++; 3549 while (((*in >= 0x61) && (*in <= 0x7A)) || 3550 ((*in >= 0x41) && (*in <= 0x5A)) || 3551 ((*in >= 0x30) && (*in <= 0x39)) || 3552 (*in == '_') || (*in == '-') || 3553 (*in == ':') || (*in == '.')) 3554 in++; 3555 if ((*in > 0) && (*in < 0x80)) { 3556 count = in - ctxt->input->cur; 3557 if (count > maxLength) { 3558 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name"); 3559 return(NULL); 3560 } 3561 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 3562 ctxt->input->cur = in; 3563 ctxt->input->col += count; 3564 if (ret == NULL) 3565 xmlErrMemory(ctxt, NULL); 3566 return(ret); 3567 } 3568 } 3569 /* accelerator for special cases */ 3570 return(xmlParseNameComplex(ctxt)); 3571} 3572 3573static xmlHashedString 3574xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) { 3575 xmlHashedString ret; 3576 int len = 0, l; 3577 int c; 3578 int maxLength = (ctxt->options & XML_PARSE_HUGE) ? 3579 XML_MAX_TEXT_LENGTH : 3580 XML_MAX_NAME_LENGTH; 3581 size_t startPosition = 0; 3582 3583 ret.name = NULL; 3584 ret.hashValue = 0; 3585 3586 /* 3587 * Handler for more complex cases 3588 */ 3589 startPosition = CUR_PTR - BASE_PTR; 3590 c = CUR_CHAR(l); 3591 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3592 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) { 3593 return(ret); 3594 } 3595 3596 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 3597 (xmlIsNameChar(ctxt, c) && (c != ':'))) { 3598 if (len <= INT_MAX - l) 3599 len += l; 3600 NEXTL(l); 3601 c = CUR_CHAR(l); 3602 } 3603 if (ctxt->instate == XML_PARSER_EOF) 3604 return(ret); 3605 if (len > maxLength) { 3606 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3607 return(ret); 3608 } 3609 ret = xmlDictLookupHashed(ctxt->dict, (BASE_PTR + startPosition), len); 3610 return(ret); 3611} 3612 3613/** 3614 * xmlParseNCName: 3615 * @ctxt: an XML parser context 3616 * @len: length of the string parsed 3617 * 3618 * parse an XML name. 3619 * 3620 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | 3621 * CombiningChar | Extender 3622 * 3623 * [5NS] NCName ::= (Letter | '_') (NCNameChar)* 3624 * 3625 * Returns the Name parsed or NULL 3626 */ 3627 3628static xmlHashedString 3629xmlParseNCName(xmlParserCtxtPtr ctxt) { 3630 const xmlChar *in, *e; 3631 xmlHashedString ret; 3632 size_t count = 0; 3633 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ? 3634 XML_MAX_TEXT_LENGTH : 3635 XML_MAX_NAME_LENGTH; 3636 3637 ret.name = NULL; 3638 3639 /* 3640 * Accelerator for simple ASCII names 3641 */ 3642 in = ctxt->input->cur; 3643 e = ctxt->input->end; 3644 if ((((*in >= 0x61) && (*in <= 0x7A)) || 3645 ((*in >= 0x41) && (*in <= 0x5A)) || 3646 (*in == '_')) && (in < e)) { 3647 in++; 3648 while ((((*in >= 0x61) && (*in <= 0x7A)) || 3649 ((*in >= 0x41) && (*in <= 0x5A)) || 3650 ((*in >= 0x30) && (*in <= 0x39)) || 3651 (*in == '_') || (*in == '-') || 3652 (*in == '.')) && (in < e)) 3653 in++; 3654 if (in >= e) 3655 goto complex; 3656 if ((*in > 0) && (*in < 0x80)) { 3657 count = in - ctxt->input->cur; 3658 if (count > maxLength) { 3659 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3660 return(ret); 3661 } 3662 ret = xmlDictLookupHashed(ctxt->dict, ctxt->input->cur, count); 3663 ctxt->input->cur = in; 3664 ctxt->input->col += count; 3665 if (ret.name == NULL) { 3666 xmlErrMemory(ctxt, NULL); 3667 } 3668 return(ret); 3669 } 3670 } 3671complex: 3672 return(xmlParseNCNameComplex(ctxt)); 3673} 3674 3675/** 3676 * xmlParseNameAndCompare: 3677 * @ctxt: an XML parser context 3678 * 3679 * parse an XML name and compares for match 3680 * (specialized for endtag parsing) 3681 * 3682 * Returns NULL for an illegal name, (xmlChar*) 1 for success 3683 * and the name for mismatch 3684 */ 3685 3686static const xmlChar * 3687xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) { 3688 register const xmlChar *cmp = other; 3689 register const xmlChar *in; 3690 const xmlChar *ret; 3691 3692 GROW; 3693 if (ctxt->instate == XML_PARSER_EOF) 3694 return(NULL); 3695 3696 in = ctxt->input->cur; 3697 while (*in != 0 && *in == *cmp) { 3698 ++in; 3699 ++cmp; 3700 } 3701 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 3702 /* success */ 3703 ctxt->input->col += in - ctxt->input->cur; 3704 ctxt->input->cur = in; 3705 return (const xmlChar*) 1; 3706 } 3707 /* failure (or end of input buffer), check with full function */ 3708 ret = xmlParseName (ctxt); 3709 /* strings coming from the dictionary direct compare possible */ 3710 if (ret == other) { 3711 return (const xmlChar*) 1; 3712 } 3713 return ret; 3714} 3715 3716/** 3717 * xmlParseStringName: 3718 * @ctxt: an XML parser context 3719 * @str: a pointer to the string pointer (IN/OUT) 3720 * 3721 * parse an XML name. 3722 * 3723 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 3724 * CombiningChar | Extender 3725 * 3726 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 3727 * 3728 * [6] Names ::= Name (#x20 Name)* 3729 * 3730 * Returns the Name parsed or NULL. The @str pointer 3731 * is updated to the current location in the string. 3732 */ 3733 3734static xmlChar * 3735xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { 3736 xmlChar buf[XML_MAX_NAMELEN + 5]; 3737 const xmlChar *cur = *str; 3738 int len = 0, l; 3739 int c; 3740 int maxLength = (ctxt->options & XML_PARSE_HUGE) ? 3741 XML_MAX_TEXT_LENGTH : 3742 XML_MAX_NAME_LENGTH; 3743 3744 c = CUR_SCHAR(cur, l); 3745 if (!xmlIsNameStartChar(ctxt, c)) { 3746 return(NULL); 3747 } 3748 3749 COPY_BUF(buf, len, c); 3750 cur += l; 3751 c = CUR_SCHAR(cur, l); 3752 while (xmlIsNameChar(ctxt, c)) { 3753 COPY_BUF(buf, len, c); 3754 cur += l; 3755 c = CUR_SCHAR(cur, l); 3756 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */ 3757 /* 3758 * Okay someone managed to make a huge name, so he's ready to pay 3759 * for the processing speed. 3760 */ 3761 xmlChar *buffer; 3762 int max = len * 2; 3763 3764 buffer = (xmlChar *) xmlMallocAtomic(max); 3765 if (buffer == NULL) { 3766 xmlErrMemory(ctxt, NULL); 3767 return(NULL); 3768 } 3769 memcpy(buffer, buf, len); 3770 while (xmlIsNameChar(ctxt, c)) { 3771 if (len + 10 > max) { 3772 xmlChar *tmp; 3773 3774 max *= 2; 3775 tmp = (xmlChar *) xmlRealloc(buffer, max); 3776 if (tmp == NULL) { 3777 xmlErrMemory(ctxt, NULL); 3778 xmlFree(buffer); 3779 return(NULL); 3780 } 3781 buffer = tmp; 3782 } 3783 COPY_BUF(buffer, len, c); 3784 cur += l; 3785 c = CUR_SCHAR(cur, l); 3786 if (len > maxLength) { 3787 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3788 xmlFree(buffer); 3789 return(NULL); 3790 } 3791 } 3792 buffer[len] = 0; 3793 *str = cur; 3794 return(buffer); 3795 } 3796 } 3797 if (len > maxLength) { 3798 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3799 return(NULL); 3800 } 3801 *str = cur; 3802 return(xmlStrndup(buf, len)); 3803} 3804 3805/** 3806 * xmlParseNmtoken: 3807 * @ctxt: an XML parser context 3808 * 3809 * DEPRECATED: Internal function, don't use. 3810 * 3811 * parse an XML Nmtoken. 3812 * 3813 * [7] Nmtoken ::= (NameChar)+ 3814 * 3815 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)* 3816 * 3817 * Returns the Nmtoken parsed or NULL 3818 */ 3819 3820xmlChar * 3821xmlParseNmtoken(xmlParserCtxtPtr ctxt) { 3822 xmlChar buf[XML_MAX_NAMELEN + 5]; 3823 int len = 0, l; 3824 int c; 3825 int maxLength = (ctxt->options & XML_PARSE_HUGE) ? 3826 XML_MAX_TEXT_LENGTH : 3827 XML_MAX_NAME_LENGTH; 3828 3829 c = CUR_CHAR(l); 3830 3831 while (xmlIsNameChar(ctxt, c)) { 3832 COPY_BUF(buf, len, c); 3833 NEXTL(l); 3834 c = CUR_CHAR(l); 3835 if (len >= XML_MAX_NAMELEN) { 3836 /* 3837 * Okay someone managed to make a huge token, so he's ready to pay 3838 * for the processing speed. 3839 */ 3840 xmlChar *buffer; 3841 int max = len * 2; 3842 3843 buffer = (xmlChar *) xmlMallocAtomic(max); 3844 if (buffer == NULL) { 3845 xmlErrMemory(ctxt, NULL); 3846 return(NULL); 3847 } 3848 memcpy(buffer, buf, len); 3849 while (xmlIsNameChar(ctxt, c)) { 3850 if (len + 10 > max) { 3851 xmlChar *tmp; 3852 3853 max *= 2; 3854 tmp = (xmlChar *) xmlRealloc(buffer, max); 3855 if (tmp == NULL) { 3856 xmlErrMemory(ctxt, NULL); 3857 xmlFree(buffer); 3858 return(NULL); 3859 } 3860 buffer = tmp; 3861 } 3862 COPY_BUF(buffer, len, c); 3863 if (len > maxLength) { 3864 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken"); 3865 xmlFree(buffer); 3866 return(NULL); 3867 } 3868 NEXTL(l); 3869 c = CUR_CHAR(l); 3870 } 3871 buffer[len] = 0; 3872 if (ctxt->instate == XML_PARSER_EOF) { 3873 xmlFree(buffer); 3874 return(NULL); 3875 } 3876 return(buffer); 3877 } 3878 } 3879 if (ctxt->instate == XML_PARSER_EOF) 3880 return(NULL); 3881 if (len == 0) 3882 return(NULL); 3883 if (len > maxLength) { 3884 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken"); 3885 return(NULL); 3886 } 3887 return(xmlStrndup(buf, len)); 3888} 3889 3890/** 3891 * xmlParseEntityValue: 3892 * @ctxt: an XML parser context 3893 * @orig: if non-NULL store a copy of the original entity value 3894 * 3895 * DEPRECATED: Internal function, don't use. 3896 * 3897 * parse a value for ENTITY declarations 3898 * 3899 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | 3900 * "'" ([^%&'] | PEReference | Reference)* "'" 3901 * 3902 * Returns the EntityValue parsed with reference substituted or NULL 3903 */ 3904 3905xmlChar * 3906xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { 3907 xmlChar *buf = NULL; 3908 int len = 0; 3909 int size = XML_PARSER_BUFFER_SIZE; 3910 int c, l; 3911 int maxLength = (ctxt->options & XML_PARSE_HUGE) ? 3912 XML_MAX_HUGE_LENGTH : 3913 XML_MAX_TEXT_LENGTH; 3914 xmlChar stop; 3915 xmlChar *ret = NULL; 3916 const xmlChar *cur = NULL; 3917 xmlParserInputPtr input; 3918 3919 if (RAW == '"') stop = '"'; 3920 else if (RAW == '\'') stop = '\''; 3921 else { 3922 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL); 3923 return(NULL); 3924 } 3925 buf = (xmlChar *) xmlMallocAtomic(size); 3926 if (buf == NULL) { 3927 xmlErrMemory(ctxt, NULL); 3928 return(NULL); 3929 } 3930 3931 /* 3932 * The content of the entity definition is copied in a buffer. 3933 */ 3934 3935 ctxt->instate = XML_PARSER_ENTITY_VALUE; 3936 input = ctxt->input; 3937 GROW; 3938 if (ctxt->instate == XML_PARSER_EOF) 3939 goto error; 3940 NEXT; 3941 c = CUR_CHAR(l); 3942 /* 3943 * NOTE: 4.4.5 Included in Literal 3944 * When a parameter entity reference appears in a literal entity 3945 * value, ... a single or double quote character in the replacement 3946 * text is always treated as a normal data character and will not 3947 * terminate the literal. 3948 * In practice it means we stop the loop only when back at parsing 3949 * the initial entity and the quote is found 3950 */ 3951 while (((IS_CHAR(c)) && ((c != stop) || /* checked */ 3952 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) { 3953 if (len + 5 >= size) { 3954 xmlChar *tmp; 3955 3956 size *= 2; 3957 tmp = (xmlChar *) xmlRealloc(buf, size); 3958 if (tmp == NULL) { 3959 xmlErrMemory(ctxt, NULL); 3960 goto error; 3961 } 3962 buf = tmp; 3963 } 3964 COPY_BUF(buf, len, c); 3965 NEXTL(l); 3966 3967 GROW; 3968 c = CUR_CHAR(l); 3969 if (c == 0) { 3970 GROW; 3971 c = CUR_CHAR(l); 3972 } 3973 3974 if (len > maxLength) { 3975 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED, 3976 "entity value too long\n"); 3977 goto error; 3978 } 3979 } 3980 buf[len] = 0; 3981 if (ctxt->instate == XML_PARSER_EOF) 3982 goto error; 3983 if (c != stop) { 3984 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL); 3985 goto error; 3986 } 3987 NEXT; 3988 3989 /* 3990 * Raise problem w.r.t. '&' and '%' being used in non-entities 3991 * reference constructs. Note Charref will be handled in 3992 * xmlStringDecodeEntities() 3993 */ 3994 cur = buf; 3995 while (*cur != 0) { /* non input consuming */ 3996 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) { 3997 xmlChar *name; 3998 xmlChar tmp = *cur; 3999 int nameOk = 0; 4000 4001 cur++; 4002 name = xmlParseStringName(ctxt, &cur); 4003 if (name != NULL) { 4004 nameOk = 1; 4005 xmlFree(name); 4006 } 4007 if ((nameOk == 0) || (*cur != ';')) { 4008 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR, 4009 "EntityValue: '%c' forbidden except for entities references\n", 4010 tmp); 4011 goto error; 4012 } 4013 if ((tmp == '%') && (ctxt->inSubset == 1) && 4014 (ctxt->inputNr == 1)) { 4015 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL); 4016 goto error; 4017 } 4018 if (*cur == 0) 4019 break; 4020 } 4021 cur++; 4022 } 4023 4024 /* 4025 * Then PEReference entities are substituted. 4026 * 4027 * NOTE: 4.4.7 Bypassed 4028 * When a general entity reference appears in the EntityValue in 4029 * an entity declaration, it is bypassed and left as is. 4030 * so XML_SUBSTITUTE_REF is not set here. 4031 */ 4032 ++ctxt->depth; 4033 ret = xmlStringDecodeEntitiesInt(ctxt, buf, len, XML_SUBSTITUTE_PEREF, 4034 0, 0, 0, /* check */ 1); 4035 --ctxt->depth; 4036 4037 if (orig != NULL) { 4038 *orig = buf; 4039 buf = NULL; 4040 } 4041 4042error: 4043 if (buf != NULL) 4044 xmlFree(buf); 4045 return(ret); 4046} 4047 4048/** 4049 * xmlParseAttValueComplex: 4050 * @ctxt: an XML parser context 4051 * @len: the resulting attribute len 4052 * @normalize: whether to apply the inner normalization 4053 * 4054 * parse a value for an attribute, this is the fallback function 4055 * of xmlParseAttValue() when the attribute parsing requires handling 4056 * of non-ASCII characters, or normalization compaction. 4057 * 4058 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 4059 */ 4060static xmlChar * 4061xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { 4062 xmlChar limit = 0; 4063 xmlChar *buf = NULL; 4064 xmlChar *rep = NULL; 4065 size_t len = 0; 4066 size_t buf_size = 0; 4067 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ? 4068 XML_MAX_HUGE_LENGTH : 4069 XML_MAX_TEXT_LENGTH; 4070 int c, l, in_space = 0; 4071 xmlChar *current = NULL; 4072 xmlEntityPtr ent; 4073 4074 if (NXT(0) == '"') { 4075 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 4076 limit = '"'; 4077 NEXT; 4078 } else if (NXT(0) == '\'') { 4079 limit = '\''; 4080 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 4081 NEXT; 4082 } else { 4083 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 4084 return(NULL); 4085 } 4086 4087 /* 4088 * allocate a translation buffer. 4089 */ 4090 buf_size = XML_PARSER_BUFFER_SIZE; 4091 buf = (xmlChar *) xmlMallocAtomic(buf_size); 4092 if (buf == NULL) goto mem_error; 4093 4094 /* 4095 * OK loop until we reach one of the ending char or a size limit. 4096 */ 4097 c = CUR_CHAR(l); 4098 while (((NXT(0) != limit) && /* checked */ 4099 (IS_CHAR(c)) && (c != '<')) && 4100 (ctxt->instate != XML_PARSER_EOF)) { 4101 if (c == '&') { 4102 in_space = 0; 4103 if (NXT(1) == '#') { 4104 int val = xmlParseCharRef(ctxt); 4105 4106 if (val == '&') { 4107 if (ctxt->replaceEntities) { 4108 if (len + 10 > buf_size) { 4109 growBuffer(buf, 10); 4110 } 4111 buf[len++] = '&'; 4112 } else { 4113 /* 4114 * The reparsing will be done in xmlStringGetNodeList() 4115 * called by the attribute() function in SAX.c 4116 */ 4117 if (len + 10 > buf_size) { 4118 growBuffer(buf, 10); 4119 } 4120 buf[len++] = '&'; 4121 buf[len++] = '#'; 4122 buf[len++] = '3'; 4123 buf[len++] = '8'; 4124 buf[len++] = ';'; 4125 } 4126 } else if (val != 0) { 4127 if (len + 10 > buf_size) { 4128 growBuffer(buf, 10); 4129 } 4130 len += xmlCopyChar(0, &buf[len], val); 4131 } 4132 } else { 4133 ent = xmlParseEntityRef(ctxt); 4134 if ((ent != NULL) && 4135 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 4136 if (len + 10 > buf_size) { 4137 growBuffer(buf, 10); 4138 } 4139 if ((ctxt->replaceEntities == 0) && 4140 (ent->content[0] == '&')) { 4141 buf[len++] = '&'; 4142 buf[len++] = '#'; 4143 buf[len++] = '3'; 4144 buf[len++] = '8'; 4145 buf[len++] = ';'; 4146 } else { 4147 buf[len++] = ent->content[0]; 4148 } 4149 } else if ((ent != NULL) && 4150 (ctxt->replaceEntities != 0)) { 4151 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) { 4152 if (xmlParserEntityCheck(ctxt, ent->length)) 4153 goto error; 4154 4155 ++ctxt->depth; 4156 rep = xmlStringDecodeEntitiesInt(ctxt, ent->content, 4157 ent->length, XML_SUBSTITUTE_REF, 0, 0, 0, 4158 /* check */ 1); 4159 --ctxt->depth; 4160 if (rep != NULL) { 4161 current = rep; 4162 while (*current != 0) { /* non input consuming */ 4163 if ((*current == 0xD) || (*current == 0xA) || 4164 (*current == 0x9)) { 4165 buf[len++] = 0x20; 4166 current++; 4167 } else 4168 buf[len++] = *current++; 4169 if (len + 10 > buf_size) { 4170 growBuffer(buf, 10); 4171 } 4172 } 4173 xmlFree(rep); 4174 rep = NULL; 4175 } 4176 } else { 4177 if (len + 10 > buf_size) { 4178 growBuffer(buf, 10); 4179 } 4180 if (ent->content != NULL) 4181 buf[len++] = ent->content[0]; 4182 } 4183 } else if (ent != NULL) { 4184 int i = xmlStrlen(ent->name); 4185 const xmlChar *cur = ent->name; 4186 4187 /* 4188 * We also check for recursion and amplification 4189 * when entities are not substituted. They're 4190 * often expanded later. 4191 */ 4192 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 4193 (ent->content != NULL)) { 4194 if ((ent->flags & XML_ENT_CHECKED) == 0) { 4195 unsigned long oldCopy = ctxt->sizeentcopy; 4196 4197 ctxt->sizeentcopy = ent->length; 4198 4199 ++ctxt->depth; 4200 rep = xmlStringDecodeEntitiesInt(ctxt, 4201 ent->content, ent->length, 4202 XML_SUBSTITUTE_REF, 0, 0, 0, 4203 /* check */ 1); 4204 --ctxt->depth; 4205 4206 /* 4207 * If we're parsing DTD content, the entity 4208 * might reference other entities which 4209 * weren't defined yet, so the check isn't 4210 * reliable. 4211 */ 4212 if (ctxt->inSubset == 0) { 4213 ent->flags |= XML_ENT_CHECKED; 4214 ent->expandedSize = ctxt->sizeentcopy; 4215 } 4216 4217 if (rep != NULL) { 4218 xmlFree(rep); 4219 rep = NULL; 4220 } else { 4221 ent->content[0] = 0; 4222 } 4223 4224 if (xmlParserEntityCheck(ctxt, oldCopy)) 4225 goto error; 4226 } else { 4227 if (xmlParserEntityCheck(ctxt, ent->expandedSize)) 4228 goto error; 4229 } 4230 } 4231 4232 /* 4233 * Just output the reference 4234 */ 4235 buf[len++] = '&'; 4236 while (len + i + 10 > buf_size) { 4237 growBuffer(buf, i + 10); 4238 } 4239 for (;i > 0;i--) 4240 buf[len++] = *cur++; 4241 buf[len++] = ';'; 4242 } 4243 } 4244 } else { 4245 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) { 4246 if ((len != 0) || (!normalize)) { 4247 if ((!normalize) || (!in_space)) { 4248 COPY_BUF(buf, len, 0x20); 4249 while (len + 10 > buf_size) { 4250 growBuffer(buf, 10); 4251 } 4252 } 4253 in_space = 1; 4254 } 4255 } else { 4256 in_space = 0; 4257 COPY_BUF(buf, len, c); 4258 if (len + 10 > buf_size) { 4259 growBuffer(buf, 10); 4260 } 4261 } 4262 NEXTL(l); 4263 } 4264 GROW; 4265 c = CUR_CHAR(l); 4266 if (len > maxLength) { 4267 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 4268 "AttValue length too long\n"); 4269 goto mem_error; 4270 } 4271 } 4272 if (ctxt->instate == XML_PARSER_EOF) 4273 goto error; 4274 4275 if ((in_space) && (normalize)) { 4276 while ((len > 0) && (buf[len - 1] == 0x20)) len--; 4277 } 4278 buf[len] = 0; 4279 if (RAW == '<') { 4280 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL); 4281 } else if (RAW != limit) { 4282 if ((c != 0) && (!IS_CHAR(c))) { 4283 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, 4284 "invalid character in attribute value\n"); 4285 } else { 4286 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 4287 "AttValue: ' expected\n"); 4288 } 4289 } else 4290 NEXT; 4291 4292 if (attlen != NULL) *attlen = len; 4293 return(buf); 4294 4295mem_error: 4296 xmlErrMemory(ctxt, NULL); 4297error: 4298 if (buf != NULL) 4299 xmlFree(buf); 4300 if (rep != NULL) 4301 xmlFree(rep); 4302 return(NULL); 4303} 4304 4305/** 4306 * xmlParseAttValue: 4307 * @ctxt: an XML parser context 4308 * 4309 * DEPRECATED: Internal function, don't use. 4310 * 4311 * parse a value for an attribute 4312 * Note: the parser won't do substitution of entities here, this 4313 * will be handled later in xmlStringGetNodeList 4314 * 4315 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | 4316 * "'" ([^<&'] | Reference)* "'" 4317 * 4318 * 3.3.3 Attribute-Value Normalization: 4319 * Before the value of an attribute is passed to the application or 4320 * checked for validity, the XML processor must normalize it as follows: 4321 * - a character reference is processed by appending the referenced 4322 * character to the attribute value 4323 * - an entity reference is processed by recursively processing the 4324 * replacement text of the entity 4325 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 4326 * appending #x20 to the normalized value, except that only a single 4327 * #x20 is appended for a "#xD#xA" sequence that is part of an external 4328 * parsed entity or the literal entity value of an internal parsed entity 4329 * - other characters are processed by appending them to the normalized value 4330 * If the declared value is not CDATA, then the XML processor must further 4331 * process the normalized attribute value by discarding any leading and 4332 * trailing space (#x20) characters, and by replacing sequences of space 4333 * (#x20) characters by a single space (#x20) character. 4334 * All attributes for which no declaration has been read should be treated 4335 * by a non-validating parser as if declared CDATA. 4336 * 4337 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 4338 */ 4339 4340 4341xmlChar * 4342xmlParseAttValue(xmlParserCtxtPtr ctxt) { 4343 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL); 4344 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0)); 4345} 4346 4347/** 4348 * xmlParseSystemLiteral: 4349 * @ctxt: an XML parser context 4350 * 4351 * DEPRECATED: Internal function, don't use. 4352 * 4353 * parse an XML Literal 4354 * 4355 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") 4356 * 4357 * Returns the SystemLiteral parsed or NULL 4358 */ 4359 4360xmlChar * 4361xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { 4362 xmlChar *buf = NULL; 4363 int len = 0; 4364 int size = XML_PARSER_BUFFER_SIZE; 4365 int cur, l; 4366 int maxLength = (ctxt->options & XML_PARSE_HUGE) ? 4367 XML_MAX_TEXT_LENGTH : 4368 XML_MAX_NAME_LENGTH; 4369 xmlChar stop; 4370 int state = ctxt->instate; 4371 4372 if (RAW == '"') { 4373 NEXT; 4374 stop = '"'; 4375 } else if (RAW == '\'') { 4376 NEXT; 4377 stop = '\''; 4378 } else { 4379 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 4380 return(NULL); 4381 } 4382 4383 buf = (xmlChar *) xmlMallocAtomic(size); 4384 if (buf == NULL) { 4385 xmlErrMemory(ctxt, NULL); 4386 return(NULL); 4387 } 4388 ctxt->instate = XML_PARSER_SYSTEM_LITERAL; 4389 cur = CUR_CHAR(l); 4390 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */ 4391 if (len + 5 >= size) { 4392 xmlChar *tmp; 4393 4394 size *= 2; 4395 tmp = (xmlChar *) xmlRealloc(buf, size); 4396 if (tmp == NULL) { 4397 xmlFree(buf); 4398 xmlErrMemory(ctxt, NULL); 4399 ctxt->instate = (xmlParserInputState) state; 4400 return(NULL); 4401 } 4402 buf = tmp; 4403 } 4404 COPY_BUF(buf, len, cur); 4405 if (len > maxLength) { 4406 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral"); 4407 xmlFree(buf); 4408 ctxt->instate = (xmlParserInputState) state; 4409 return(NULL); 4410 } 4411 NEXTL(l); 4412 cur = CUR_CHAR(l); 4413 } 4414 buf[len] = 0; 4415 if (ctxt->instate == XML_PARSER_EOF) { 4416 xmlFree(buf); 4417 return(NULL); 4418 } 4419 ctxt->instate = (xmlParserInputState) state; 4420 if (!IS_CHAR(cur)) { 4421 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 4422 } else { 4423 NEXT; 4424 } 4425 return(buf); 4426} 4427 4428/** 4429 * xmlParsePubidLiteral: 4430 * @ctxt: an XML parser context 4431 * 4432 * DEPRECATED: Internal function, don't use. 4433 * 4434 * parse an XML public literal 4435 * 4436 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 4437 * 4438 * Returns the PubidLiteral parsed or NULL. 4439 */ 4440 4441xmlChar * 4442xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { 4443 xmlChar *buf = NULL; 4444 int len = 0; 4445 int size = XML_PARSER_BUFFER_SIZE; 4446 int maxLength = (ctxt->options & XML_PARSE_HUGE) ? 4447 XML_MAX_TEXT_LENGTH : 4448 XML_MAX_NAME_LENGTH; 4449 xmlChar cur; 4450 xmlChar stop; 4451 xmlParserInputState oldstate = ctxt->instate; 4452 4453 if (RAW == '"') { 4454 NEXT; 4455 stop = '"'; 4456 } else if (RAW == '\'') { 4457 NEXT; 4458 stop = '\''; 4459 } else { 4460 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 4461 return(NULL); 4462 } 4463 buf = (xmlChar *) xmlMallocAtomic(size); 4464 if (buf == NULL) { 4465 xmlErrMemory(ctxt, NULL); 4466 return(NULL); 4467 } 4468 ctxt->instate = XML_PARSER_PUBLIC_LITERAL; 4469 cur = CUR; 4470 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */ 4471 if (len + 1 >= size) { 4472 xmlChar *tmp; 4473 4474 size *= 2; 4475 tmp = (xmlChar *) xmlRealloc(buf, size); 4476 if (tmp == NULL) { 4477 xmlErrMemory(ctxt, NULL); 4478 xmlFree(buf); 4479 return(NULL); 4480 } 4481 buf = tmp; 4482 } 4483 buf[len++] = cur; 4484 if (len > maxLength) { 4485 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID"); 4486 xmlFree(buf); 4487 return(NULL); 4488 } 4489 NEXT; 4490 cur = CUR; 4491 } 4492 buf[len] = 0; 4493 if (ctxt->instate == XML_PARSER_EOF) { 4494 xmlFree(buf); 4495 return(NULL); 4496 } 4497 if (cur != stop) { 4498 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 4499 } else { 4500 NEXTL(1); 4501 } 4502 ctxt->instate = oldstate; 4503 return(buf); 4504} 4505 4506static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial); 4507 4508/* 4509 * used for the test in the inner loop of the char data testing 4510 */ 4511static const unsigned char test_char_data[256] = { 4512 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4513 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */ 4514 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4515 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4516 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */ 4517 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 4518 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 4519 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */ 4520 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 4521 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 4522 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 4523 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */ 4524 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 4525 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 4526 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 4527 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 4528 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */ 4529 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4530 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4531 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4532 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4533 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4534 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4535 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4536 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4537 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4538 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4539 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4540 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4541 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4542 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4543 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 4544}; 4545 4546/** 4547 * xmlParseCharDataInternal: 4548 * @ctxt: an XML parser context 4549 * @partial: buffer may contain partial UTF-8 sequences 4550 * 4551 * Parse character data. Always makes progress if the first char isn't 4552 * '<' or '&'. 4553 * 4554 * The right angle bracket (>) may be represented using the string "&gt;", 4555 * and must, for compatibility, be escaped using "&gt;" or a character 4556 * reference when it appears in the string "]]>" in content, when that 4557 * string is not marking the end of a CDATA section. 4558 * 4559 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) 4560 */ 4561static void 4562xmlParseCharDataInternal(xmlParserCtxtPtr ctxt, int partial) { 4563 const xmlChar *in; 4564 int nbchar = 0; 4565 int line = ctxt->input->line; 4566 int col = ctxt->input->col; 4567 int ccol; 4568 4569 GROW; 4570 /* 4571 * Accelerated common case where input don't need to be 4572 * modified before passing it to the handler. 4573 */ 4574 in = ctxt->input->cur; 4575 do { 4576get_more_space: 4577 while (*in == 0x20) { in++; ctxt->input->col++; } 4578 if (*in == 0xA) { 4579 do { 4580 ctxt->input->line++; ctxt->input->col = 1; 4581 in++; 4582 } while (*in == 0xA); 4583 goto get_more_space; 4584 } 4585 if (*in == '<') { 4586 nbchar = in - ctxt->input->cur; 4587 if (nbchar > 0) { 4588 const xmlChar *tmp = ctxt->input->cur; 4589 ctxt->input->cur = in; 4590 4591 if ((ctxt->sax != NULL) && 4592 (ctxt->disableSAX == 0) && 4593 (ctxt->sax->ignorableWhitespace != 4594 ctxt->sax->characters)) { 4595 if (areBlanks(ctxt, tmp, nbchar, 1)) { 4596 if (ctxt->sax->ignorableWhitespace != NULL) 4597 ctxt->sax->ignorableWhitespace(ctxt->userData, 4598 tmp, nbchar); 4599 } else { 4600 if (ctxt->sax->characters != NULL) 4601 ctxt->sax->characters(ctxt->userData, 4602 tmp, nbchar); 4603 if (*ctxt->space == -1) 4604 *ctxt->space = -2; 4605 } 4606 } else if ((ctxt->sax != NULL) && 4607 (ctxt->disableSAX == 0) && 4608 (ctxt->sax->characters != NULL)) { 4609 ctxt->sax->characters(ctxt->userData, 4610 tmp, nbchar); 4611 } 4612 } 4613 return; 4614 } 4615 4616get_more: 4617 ccol = ctxt->input->col; 4618 while (test_char_data[*in]) { 4619 in++; 4620 ccol++; 4621 } 4622 ctxt->input->col = ccol; 4623 if (*in == 0xA) { 4624 do { 4625 ctxt->input->line++; ctxt->input->col = 1; 4626 in++; 4627 } while (*in == 0xA); 4628 goto get_more; 4629 } 4630 if (*in == ']') { 4631 if ((in[1] == ']') && (in[2] == '>')) { 4632 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 4633 if (ctxt->instate != XML_PARSER_EOF) 4634 ctxt->input->cur = in + 1; 4635 return; 4636 } 4637 in++; 4638 ctxt->input->col++; 4639 goto get_more; 4640 } 4641 nbchar = in - ctxt->input->cur; 4642 if (nbchar > 0) { 4643 if ((ctxt->sax != NULL) && 4644 (ctxt->disableSAX == 0) && 4645 (ctxt->sax->ignorableWhitespace != 4646 ctxt->sax->characters) && 4647 (IS_BLANK_CH(*ctxt->input->cur))) { 4648 const xmlChar *tmp = ctxt->input->cur; 4649 ctxt->input->cur = in; 4650 4651 if (areBlanks(ctxt, tmp, nbchar, 0)) { 4652 if (ctxt->sax->ignorableWhitespace != NULL) 4653 ctxt->sax->ignorableWhitespace(ctxt->userData, 4654 tmp, nbchar); 4655 } else { 4656 if (ctxt->sax->characters != NULL) 4657 ctxt->sax->characters(ctxt->userData, 4658 tmp, nbchar); 4659 if (*ctxt->space == -1) 4660 *ctxt->space = -2; 4661 } 4662 line = ctxt->input->line; 4663 col = ctxt->input->col; 4664 } else if ((ctxt->sax != NULL) && 4665 (ctxt->disableSAX == 0)) { 4666 if (ctxt->sax->characters != NULL) 4667 ctxt->sax->characters(ctxt->userData, 4668 ctxt->input->cur, nbchar); 4669 line = ctxt->input->line; 4670 col = ctxt->input->col; 4671 } 4672 if (ctxt->instate == XML_PARSER_EOF) 4673 return; 4674 } 4675 ctxt->input->cur = in; 4676 if (*in == 0xD) { 4677 in++; 4678 if (*in == 0xA) { 4679 ctxt->input->cur = in; 4680 in++; 4681 ctxt->input->line++; ctxt->input->col = 1; 4682 continue; /* while */ 4683 } 4684 in--; 4685 } 4686 if (*in == '<') { 4687 return; 4688 } 4689 if (*in == '&') { 4690 return; 4691 } 4692 SHRINK; 4693 GROW; 4694 if (ctxt->instate == XML_PARSER_EOF) 4695 return; 4696 in = ctxt->input->cur; 4697 } while (((*in >= 0x20) && (*in <= 0x7F)) || 4698 (*in == 0x09) || (*in == 0x0a)); 4699 ctxt->input->line = line; 4700 ctxt->input->col = col; 4701 xmlParseCharDataComplex(ctxt, partial); 4702} 4703 4704/** 4705 * xmlParseCharDataComplex: 4706 * @ctxt: an XML parser context 4707 * @cdata: int indicating whether we are within a CDATA section 4708 * 4709 * Always makes progress if the first char isn't '<' or '&'. 4710 * 4711 * parse a CharData section.this is the fallback function 4712 * of xmlParseCharData() when the parsing requires handling 4713 * of non-ASCII characters. 4714 */ 4715static void 4716xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) { 4717 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5]; 4718 int nbchar = 0; 4719 int cur, l; 4720 4721 cur = CUR_CHAR(l); 4722 while ((cur != '<') && /* checked */ 4723 (cur != '&') && 4724 (IS_CHAR(cur))) { 4725 if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) { 4726 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 4727 } 4728 COPY_BUF(buf, nbchar, cur); 4729 /* move current position before possible calling of ctxt->sax->characters */ 4730 NEXTL(l); 4731 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) { 4732 buf[nbchar] = 0; 4733 4734 /* 4735 * OK the segment is to be consumed as chars. 4736 */ 4737 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 4738 if (areBlanks(ctxt, buf, nbchar, 0)) { 4739 if (ctxt->sax->ignorableWhitespace != NULL) 4740 ctxt->sax->ignorableWhitespace(ctxt->userData, 4741 buf, nbchar); 4742 } else { 4743 if (ctxt->sax->characters != NULL) 4744 ctxt->sax->characters(ctxt->userData, buf, nbchar); 4745 if ((ctxt->sax->characters != 4746 ctxt->sax->ignorableWhitespace) && 4747 (*ctxt->space == -1)) 4748 *ctxt->space = -2; 4749 } 4750 } 4751 nbchar = 0; 4752 /* something really bad happened in the SAX callback */ 4753 if (ctxt->instate != XML_PARSER_CONTENT) 4754 return; 4755 SHRINK; 4756 } 4757 cur = CUR_CHAR(l); 4758 } 4759 if (ctxt->instate == XML_PARSER_EOF) 4760 return; 4761 if (nbchar != 0) { 4762 buf[nbchar] = 0; 4763 /* 4764 * OK the segment is to be consumed as chars. 4765 */ 4766 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 4767 if (areBlanks(ctxt, buf, nbchar, 0)) { 4768 if (ctxt->sax->ignorableWhitespace != NULL) 4769 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); 4770 } else { 4771 if (ctxt->sax->characters != NULL) 4772 ctxt->sax->characters(ctxt->userData, buf, nbchar); 4773 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) && 4774 (*ctxt->space == -1)) 4775 *ctxt->space = -2; 4776 } 4777 } 4778 } 4779 /* 4780 * cur == 0 can mean 4781 * 4782 * - XML_PARSER_EOF or memory error. This is checked above. 4783 * - An actual 0 character. 4784 * - End of buffer. 4785 * - An incomplete UTF-8 sequence. This is allowed if partial is set. 4786 */ 4787 if (ctxt->input->cur < ctxt->input->end) { 4788 if ((cur == 0) && (CUR != 0)) { 4789 if (partial == 0) { 4790 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4791 "Incomplete UTF-8 sequence starting with %02X\n", CUR); 4792 NEXTL(1); 4793 } 4794 } else if ((cur != '<') && (cur != '&')) { 4795 /* Generate the error and skip the offending character */ 4796 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4797 "PCDATA invalid Char value %d\n", cur); 4798 NEXTL(l); 4799 } 4800 } 4801} 4802 4803/** 4804 * xmlParseCharData: 4805 * @ctxt: an XML parser context 4806 * @cdata: unused 4807 * 4808 * DEPRECATED: Internal function, don't use. 4809 */ 4810void 4811xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) { 4812 xmlParseCharDataInternal(ctxt, 0); 4813} 4814 4815/** 4816 * xmlParseExternalID: 4817 * @ctxt: an XML parser context 4818 * @publicID: a xmlChar** receiving PubidLiteral 4819 * @strict: indicate whether we should restrict parsing to only 4820 * production [75], see NOTE below 4821 * 4822 * DEPRECATED: Internal function, don't use. 4823 * 4824 * Parse an External ID or a Public ID 4825 * 4826 * NOTE: Productions [75] and [83] interact badly since [75] can generate 4827 * 'PUBLIC' S PubidLiteral S SystemLiteral 4828 * 4829 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral 4830 * | 'PUBLIC' S PubidLiteral S SystemLiteral 4831 * 4832 * [83] PublicID ::= 'PUBLIC' S PubidLiteral 4833 * 4834 * Returns the function returns SystemLiteral and in the second 4835 * case publicID receives PubidLiteral, is strict is off 4836 * it is possible to return NULL and have publicID set. 4837 */ 4838 4839xmlChar * 4840xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { 4841 xmlChar *URI = NULL; 4842 4843 *publicID = NULL; 4844 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) { 4845 SKIP(6); 4846 if (SKIP_BLANKS == 0) { 4847 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4848 "Space required after 'SYSTEM'\n"); 4849 } 4850 URI = xmlParseSystemLiteral(ctxt); 4851 if (URI == NULL) { 4852 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 4853 } 4854 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) { 4855 SKIP(6); 4856 if (SKIP_BLANKS == 0) { 4857 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4858 "Space required after 'PUBLIC'\n"); 4859 } 4860 *publicID = xmlParsePubidLiteral(ctxt); 4861 if (*publicID == NULL) { 4862 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL); 4863 } 4864 if (strict) { 4865 /* 4866 * We don't handle [83] so "S SystemLiteral" is required. 4867 */ 4868 if (SKIP_BLANKS == 0) { 4869 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4870 "Space required after the Public Identifier\n"); 4871 } 4872 } else { 4873 /* 4874 * We handle [83] so we return immediately, if 4875 * "S SystemLiteral" is not detected. We skip blanks if no 4876 * system literal was found, but this is harmless since we must 4877 * be at the end of a NotationDecl. 4878 */ 4879 if (SKIP_BLANKS == 0) return(NULL); 4880 if ((CUR != '\'') && (CUR != '"')) return(NULL); 4881 } 4882 URI = xmlParseSystemLiteral(ctxt); 4883 if (URI == NULL) { 4884 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 4885 } 4886 } 4887 return(URI); 4888} 4889 4890/** 4891 * xmlParseCommentComplex: 4892 * @ctxt: an XML parser context 4893 * @buf: the already parsed part of the buffer 4894 * @len: number of bytes in the buffer 4895 * @size: allocated size of the buffer 4896 * 4897 * Skip an XML (SGML) comment <!-- .... --> 4898 * The spec says that "For compatibility, the string "--" (double-hyphen) 4899 * must not occur within comments. " 4900 * This is the slow routine in case the accelerator for ascii didn't work 4901 * 4902 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 4903 */ 4904static void 4905xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, 4906 size_t len, size_t size) { 4907 int q, ql; 4908 int r, rl; 4909 int cur, l; 4910 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ? 4911 XML_MAX_HUGE_LENGTH : 4912 XML_MAX_TEXT_LENGTH; 4913 int inputid; 4914 4915 inputid = ctxt->input->id; 4916 4917 if (buf == NULL) { 4918 len = 0; 4919 size = XML_PARSER_BUFFER_SIZE; 4920 buf = (xmlChar *) xmlMallocAtomic(size); 4921 if (buf == NULL) { 4922 xmlErrMemory(ctxt, NULL); 4923 return; 4924 } 4925 } 4926 q = CUR_CHAR(ql); 4927 if (q == 0) 4928 goto not_terminated; 4929 if (!IS_CHAR(q)) { 4930 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4931 "xmlParseComment: invalid xmlChar value %d\n", 4932 q); 4933 xmlFree (buf); 4934 return; 4935 } 4936 NEXTL(ql); 4937 r = CUR_CHAR(rl); 4938 if (r == 0) 4939 goto not_terminated; 4940 if (!IS_CHAR(r)) { 4941 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4942 "xmlParseComment: invalid xmlChar value %d\n", 4943 r); 4944 xmlFree (buf); 4945 return; 4946 } 4947 NEXTL(rl); 4948 cur = CUR_CHAR(l); 4949 if (cur == 0) 4950 goto not_terminated; 4951 while (IS_CHAR(cur) && /* checked */ 4952 ((cur != '>') || 4953 (r != '-') || (q != '-'))) { 4954 if ((r == '-') && (q == '-')) { 4955 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL); 4956 } 4957 if (len + 5 >= size) { 4958 xmlChar *new_buf; 4959 size_t new_size; 4960 4961 new_size = size * 2; 4962 new_buf = (xmlChar *) xmlRealloc(buf, new_size); 4963 if (new_buf == NULL) { 4964 xmlFree (buf); 4965 xmlErrMemory(ctxt, NULL); 4966 return; 4967 } 4968 buf = new_buf; 4969 size = new_size; 4970 } 4971 COPY_BUF(buf, len, q); 4972 if (len > maxLength) { 4973 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4974 "Comment too big found", NULL); 4975 xmlFree (buf); 4976 return; 4977 } 4978 4979 q = r; 4980 ql = rl; 4981 r = cur; 4982 rl = l; 4983 4984 NEXTL(l); 4985 cur = CUR_CHAR(l); 4986 4987 } 4988 buf[len] = 0; 4989 if (ctxt->instate == XML_PARSER_EOF) { 4990 xmlFree(buf); 4991 return; 4992 } 4993 if (cur == 0) { 4994 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4995 "Comment not terminated \n<!--%.50s\n", buf); 4996 } else if (!IS_CHAR(cur)) { 4997 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4998 "xmlParseComment: invalid xmlChar value %d\n", 4999 cur); 5000 } else { 5001 if (inputid != ctxt->input->id) { 5002 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5003 "Comment doesn't start and stop in the same" 5004 " entity\n"); 5005 } 5006 NEXT; 5007 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 5008 (!ctxt->disableSAX)) 5009 ctxt->sax->comment(ctxt->userData, buf); 5010 } 5011 xmlFree(buf); 5012 return; 5013not_terminated: 5014 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 5015 "Comment not terminated\n", NULL); 5016 xmlFree(buf); 5017 return; 5018} 5019 5020/** 5021 * xmlParseComment: 5022 * @ctxt: an XML parser context 5023 * 5024 * DEPRECATED: Internal function, don't use. 5025 * 5026 * Parse an XML (SGML) comment. Always consumes '<!'. 5027 * 5028 * The spec says that "For compatibility, the string "--" (double-hyphen) 5029 * must not occur within comments. " 5030 * 5031 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 5032 */ 5033void 5034xmlParseComment(xmlParserCtxtPtr ctxt) { 5035 xmlChar *buf = NULL; 5036 size_t size = XML_PARSER_BUFFER_SIZE; 5037 size_t len = 0; 5038 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ? 5039 XML_MAX_HUGE_LENGTH : 5040 XML_MAX_TEXT_LENGTH; 5041 xmlParserInputState state; 5042 const xmlChar *in; 5043 size_t nbchar = 0; 5044 int ccol; 5045 int inputid; 5046 5047 /* 5048 * Check that there is a comment right here. 5049 */ 5050 if ((RAW != '<') || (NXT(1) != '!')) 5051 return; 5052 SKIP(2); 5053 if ((RAW != '-') || (NXT(1) != '-')) 5054 return; 5055 state = ctxt->instate; 5056 ctxt->instate = XML_PARSER_COMMENT; 5057 inputid = ctxt->input->id; 5058 SKIP(2); 5059 GROW; 5060 5061 /* 5062 * Accelerated common case where input don't need to be 5063 * modified before passing it to the handler. 5064 */ 5065 in = ctxt->input->cur; 5066 do { 5067 if (*in == 0xA) { 5068 do { 5069 ctxt->input->line++; ctxt->input->col = 1; 5070 in++; 5071 } while (*in == 0xA); 5072 } 5073get_more: 5074 ccol = ctxt->input->col; 5075 while (((*in > '-') && (*in <= 0x7F)) || 5076 ((*in >= 0x20) && (*in < '-')) || 5077 (*in == 0x09)) { 5078 in++; 5079 ccol++; 5080 } 5081 ctxt->input->col = ccol; 5082 if (*in == 0xA) { 5083 do { 5084 ctxt->input->line++; ctxt->input->col = 1; 5085 in++; 5086 } while (*in == 0xA); 5087 goto get_more; 5088 } 5089 nbchar = in - ctxt->input->cur; 5090 /* 5091 * save current set of data 5092 */ 5093 if (nbchar > 0) { 5094 if (buf == NULL) { 5095 if ((*in == '-') && (in[1] == '-')) 5096 size = nbchar + 1; 5097 else 5098 size = XML_PARSER_BUFFER_SIZE + nbchar; 5099 buf = (xmlChar *) xmlMallocAtomic(size); 5100 if (buf == NULL) { 5101 xmlErrMemory(ctxt, NULL); 5102 ctxt->instate = state; 5103 return; 5104 } 5105 len = 0; 5106 } else if (len + nbchar + 1 >= size) { 5107 xmlChar *new_buf; 5108 size += len + nbchar + XML_PARSER_BUFFER_SIZE; 5109 new_buf = (xmlChar *) xmlRealloc(buf, size); 5110 if (new_buf == NULL) { 5111 xmlFree (buf); 5112 xmlErrMemory(ctxt, NULL); 5113 ctxt->instate = state; 5114 return; 5115 } 5116 buf = new_buf; 5117 } 5118 memcpy(&buf[len], ctxt->input->cur, nbchar); 5119 len += nbchar; 5120 buf[len] = 0; 5121 } 5122 if (len > maxLength) { 5123 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 5124 "Comment too big found", NULL); 5125 xmlFree (buf); 5126 return; 5127 } 5128 ctxt->input->cur = in; 5129 if (*in == 0xA) { 5130 in++; 5131 ctxt->input->line++; ctxt->input->col = 1; 5132 } 5133 if (*in == 0xD) { 5134 in++; 5135 if (*in == 0xA) { 5136 ctxt->input->cur = in; 5137 in++; 5138 ctxt->input->line++; ctxt->input->col = 1; 5139 goto get_more; 5140 } 5141 in--; 5142 } 5143 SHRINK; 5144 GROW; 5145 if (ctxt->instate == XML_PARSER_EOF) { 5146 xmlFree(buf); 5147 return; 5148 } 5149 in = ctxt->input->cur; 5150 if (*in == '-') { 5151 if (in[1] == '-') { 5152 if (in[2] == '>') { 5153 if (ctxt->input->id != inputid) { 5154 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5155 "comment doesn't start and stop in the" 5156 " same entity\n"); 5157 } 5158 SKIP(3); 5159 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 5160 (!ctxt->disableSAX)) { 5161 if (buf != NULL) 5162 ctxt->sax->comment(ctxt->userData, buf); 5163 else 5164 ctxt->sax->comment(ctxt->userData, BAD_CAST ""); 5165 } 5166 if (buf != NULL) 5167 xmlFree(buf); 5168 if (ctxt->instate != XML_PARSER_EOF) 5169 ctxt->instate = state; 5170 return; 5171 } 5172 if (buf != NULL) { 5173 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, 5174 "Double hyphen within comment: " 5175 "<!--%.50s\n", 5176 buf); 5177 } else 5178 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, 5179 "Double hyphen within comment\n", NULL); 5180 if (ctxt->instate == XML_PARSER_EOF) { 5181 xmlFree(buf); 5182 return; 5183 } 5184 in++; 5185 ctxt->input->col++; 5186 } 5187 in++; 5188 ctxt->input->col++; 5189 goto get_more; 5190 } 5191 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a)); 5192 xmlParseCommentComplex(ctxt, buf, len, size); 5193 ctxt->instate = state; 5194 return; 5195} 5196 5197 5198/** 5199 * xmlParsePITarget: 5200 * @ctxt: an XML parser context 5201 * 5202 * DEPRECATED: Internal function, don't use. 5203 * 5204 * parse the name of a PI 5205 * 5206 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) 5207 * 5208 * Returns the PITarget name or NULL 5209 */ 5210 5211const xmlChar * 5212xmlParsePITarget(xmlParserCtxtPtr ctxt) { 5213 const xmlChar *name; 5214 5215 name = xmlParseName(ctxt); 5216 if ((name != NULL) && 5217 ((name[0] == 'x') || (name[0] == 'X')) && 5218 ((name[1] == 'm') || (name[1] == 'M')) && 5219 ((name[2] == 'l') || (name[2] == 'L'))) { 5220 int i; 5221 if ((name[0] == 'x') && (name[1] == 'm') && 5222 (name[2] == 'l') && (name[3] == 0)) { 5223 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 5224 "XML declaration allowed only at the start of the document\n"); 5225 return(name); 5226 } else if (name[3] == 0) { 5227 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL); 5228 return(name); 5229 } 5230 for (i = 0;;i++) { 5231 if (xmlW3CPIs[i] == NULL) break; 5232 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i])) 5233 return(name); 5234 } 5235 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 5236 "xmlParsePITarget: invalid name prefix 'xml'\n", 5237 NULL, NULL); 5238 } 5239 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) { 5240 xmlNsErr(ctxt, XML_NS_ERR_COLON, 5241 "colons are forbidden from PI names '%s'\n", name, NULL, NULL); 5242 } 5243 return(name); 5244} 5245 5246#ifdef LIBXML_CATALOG_ENABLED 5247/** 5248 * xmlParseCatalogPI: 5249 * @ctxt: an XML parser context 5250 * @catalog: the PI value string 5251 * 5252 * parse an XML Catalog Processing Instruction. 5253 * 5254 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?> 5255 * 5256 * Occurs only if allowed by the user and if happening in the Misc 5257 * part of the document before any doctype information 5258 * This will add the given catalog to the parsing context in order 5259 * to be used if there is a resolution need further down in the document 5260 */ 5261 5262static void 5263xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) { 5264 xmlChar *URL = NULL; 5265 const xmlChar *tmp, *base; 5266 xmlChar marker; 5267 5268 tmp = catalog; 5269 while (IS_BLANK_CH(*tmp)) tmp++; 5270 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7)) 5271 goto error; 5272 tmp += 7; 5273 while (IS_BLANK_CH(*tmp)) tmp++; 5274 if (*tmp != '=') { 5275 return; 5276 } 5277 tmp++; 5278 while (IS_BLANK_CH(*tmp)) tmp++; 5279 marker = *tmp; 5280 if ((marker != '\'') && (marker != '"')) 5281 goto error; 5282 tmp++; 5283 base = tmp; 5284 while ((*tmp != 0) && (*tmp != marker)) tmp++; 5285 if (*tmp == 0) 5286 goto error; 5287 URL = xmlStrndup(base, tmp - base); 5288 tmp++; 5289 while (IS_BLANK_CH(*tmp)) tmp++; 5290 if (*tmp != 0) 5291 goto error; 5292 5293 if (URL != NULL) { 5294 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL); 5295 xmlFree(URL); 5296 } 5297 return; 5298 5299error: 5300 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI, 5301 "Catalog PI syntax error: %s\n", 5302 catalog, NULL); 5303 if (URL != NULL) 5304 xmlFree(URL); 5305} 5306#endif 5307 5308/** 5309 * xmlParsePI: 5310 * @ctxt: an XML parser context 5311 * 5312 * DEPRECATED: Internal function, don't use. 5313 * 5314 * parse an XML Processing Instruction. 5315 * 5316 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 5317 * 5318 * The processing is transferred to SAX once parsed. 5319 */ 5320 5321void 5322xmlParsePI(xmlParserCtxtPtr ctxt) { 5323 xmlChar *buf = NULL; 5324 size_t len = 0; 5325 size_t size = XML_PARSER_BUFFER_SIZE; 5326 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ? 5327 XML_MAX_HUGE_LENGTH : 5328 XML_MAX_TEXT_LENGTH; 5329 int cur, l; 5330 const xmlChar *target; 5331 xmlParserInputState state; 5332 5333 if ((RAW == '<') && (NXT(1) == '?')) { 5334 int inputid = ctxt->input->id; 5335 state = ctxt->instate; 5336 ctxt->instate = XML_PARSER_PI; 5337 /* 5338 * this is a Processing Instruction. 5339 */ 5340 SKIP(2); 5341 5342 /* 5343 * Parse the target name and check for special support like 5344 * namespace. 5345 */ 5346 target = xmlParsePITarget(ctxt); 5347 if (target != NULL) { 5348 if ((RAW == '?') && (NXT(1) == '>')) { 5349 if (inputid != ctxt->input->id) { 5350 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5351 "PI declaration doesn't start and stop in" 5352 " the same entity\n"); 5353 } 5354 SKIP(2); 5355 5356 /* 5357 * SAX: PI detected. 5358 */ 5359 if ((ctxt->sax) && (!ctxt->disableSAX) && 5360 (ctxt->sax->processingInstruction != NULL)) 5361 ctxt->sax->processingInstruction(ctxt->userData, 5362 target, NULL); 5363 if (ctxt->instate != XML_PARSER_EOF) 5364 ctxt->instate = state; 5365 return; 5366 } 5367 buf = (xmlChar *) xmlMallocAtomic(size); 5368 if (buf == NULL) { 5369 xmlErrMemory(ctxt, NULL); 5370 ctxt->instate = state; 5371 return; 5372 } 5373 if (SKIP_BLANKS == 0) { 5374 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED, 5375 "ParsePI: PI %s space expected\n", target); 5376 } 5377 cur = CUR_CHAR(l); 5378 while (IS_CHAR(cur) && /* checked */ 5379 ((cur != '?') || (NXT(1) != '>'))) { 5380 if (len + 5 >= size) { 5381 xmlChar *tmp; 5382 size_t new_size = size * 2; 5383 tmp = (xmlChar *) xmlRealloc(buf, new_size); 5384 if (tmp == NULL) { 5385 xmlErrMemory(ctxt, NULL); 5386 xmlFree(buf); 5387 ctxt->instate = state; 5388 return; 5389 } 5390 buf = tmp; 5391 size = new_size; 5392 } 5393 COPY_BUF(buf, len, cur); 5394 if (len > maxLength) { 5395 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 5396 "PI %s too big found", target); 5397 xmlFree(buf); 5398 ctxt->instate = state; 5399 return; 5400 } 5401 NEXTL(l); 5402 cur = CUR_CHAR(l); 5403 } 5404 buf[len] = 0; 5405 if (ctxt->instate == XML_PARSER_EOF) { 5406 xmlFree(buf); 5407 return; 5408 } 5409 if (cur != '?') { 5410 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 5411 "ParsePI: PI %s never end ...\n", target); 5412 } else { 5413 if (inputid != ctxt->input->id) { 5414 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5415 "PI declaration doesn't start and stop in" 5416 " the same entity\n"); 5417 } 5418 SKIP(2); 5419 5420#ifdef LIBXML_CATALOG_ENABLED 5421 if (((state == XML_PARSER_MISC) || 5422 (state == XML_PARSER_START)) && 5423 (xmlStrEqual(target, XML_CATALOG_PI))) { 5424 xmlCatalogAllow allow = xmlCatalogGetDefaults(); 5425 if ((allow == XML_CATA_ALLOW_DOCUMENT) || 5426 (allow == XML_CATA_ALLOW_ALL)) 5427 xmlParseCatalogPI(ctxt, buf); 5428 } 5429#endif 5430 5431 5432 /* 5433 * SAX: PI detected. 5434 */ 5435 if ((ctxt->sax) && (!ctxt->disableSAX) && 5436 (ctxt->sax->processingInstruction != NULL)) 5437 ctxt->sax->processingInstruction(ctxt->userData, 5438 target, buf); 5439 } 5440 xmlFree(buf); 5441 } else { 5442 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL); 5443 } 5444 if (ctxt->instate != XML_PARSER_EOF) 5445 ctxt->instate = state; 5446 } 5447} 5448 5449/** 5450 * xmlParseNotationDecl: 5451 * @ctxt: an XML parser context 5452 * 5453 * DEPRECATED: Internal function, don't use. 5454 * 5455 * Parse a notation declaration. Always consumes '<!'. 5456 * 5457 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' 5458 * 5459 * Hence there is actually 3 choices: 5460 * 'PUBLIC' S PubidLiteral 5461 * 'PUBLIC' S PubidLiteral S SystemLiteral 5462 * and 'SYSTEM' S SystemLiteral 5463 * 5464 * See the NOTE on xmlParseExternalID(). 5465 */ 5466 5467void 5468xmlParseNotationDecl(xmlParserCtxtPtr ctxt) { 5469 const xmlChar *name; 5470 xmlChar *Pubid; 5471 xmlChar *Systemid; 5472 5473 if ((CUR != '<') || (NXT(1) != '!')) 5474 return; 5475 SKIP(2); 5476 5477 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 5478 int inputid = ctxt->input->id; 5479 SKIP(8); 5480 if (SKIP_BLANKS == 0) { 5481 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5482 "Space required after '<!NOTATION'\n"); 5483 return; 5484 } 5485 5486 name = xmlParseName(ctxt); 5487 if (name == NULL) { 5488 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 5489 return; 5490 } 5491 if (xmlStrchr(name, ':') != NULL) { 5492 xmlNsErr(ctxt, XML_NS_ERR_COLON, 5493 "colons are forbidden from notation names '%s'\n", 5494 name, NULL, NULL); 5495 } 5496 if (SKIP_BLANKS == 0) { 5497 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5498 "Space required after the NOTATION name'\n"); 5499 return; 5500 } 5501 5502 /* 5503 * Parse the IDs. 5504 */ 5505 Systemid = xmlParseExternalID(ctxt, &Pubid, 0); 5506 SKIP_BLANKS; 5507 5508 if (RAW == '>') { 5509 if (inputid != ctxt->input->id) { 5510 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5511 "Notation declaration doesn't start and stop" 5512 " in the same entity\n"); 5513 } 5514 NEXT; 5515 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5516 (ctxt->sax->notationDecl != NULL)) 5517 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid); 5518 } else { 5519 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 5520 } 5521 if (Systemid != NULL) xmlFree(Systemid); 5522 if (Pubid != NULL) xmlFree(Pubid); 5523 } 5524} 5525 5526/** 5527 * xmlParseEntityDecl: 5528 * @ctxt: an XML parser context 5529 * 5530 * DEPRECATED: Internal function, don't use. 5531 * 5532 * Parse an entity declaration. Always consumes '<!'. 5533 * 5534 * [70] EntityDecl ::= GEDecl | PEDecl 5535 * 5536 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' 5537 * 5538 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' 5539 * 5540 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) 5541 * 5542 * [74] PEDef ::= EntityValue | ExternalID 5543 * 5544 * [76] NDataDecl ::= S 'NDATA' S Name 5545 * 5546 * [ VC: Notation Declared ] 5547 * The Name must match the declared name of a notation. 5548 */ 5549 5550void 5551xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { 5552 const xmlChar *name = NULL; 5553 xmlChar *value = NULL; 5554 xmlChar *URI = NULL, *literal = NULL; 5555 const xmlChar *ndata = NULL; 5556 int isParameter = 0; 5557 xmlChar *orig = NULL; 5558 5559 if ((CUR != '<') || (NXT(1) != '!')) 5560 return; 5561 SKIP(2); 5562 5563 /* GROW; done in the caller */ 5564 if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) { 5565 int inputid = ctxt->input->id; 5566 SKIP(6); 5567 if (SKIP_BLANKS == 0) { 5568 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5569 "Space required after '<!ENTITY'\n"); 5570 } 5571 5572 if (RAW == '%') { 5573 NEXT; 5574 if (SKIP_BLANKS == 0) { 5575 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5576 "Space required after '%%'\n"); 5577 } 5578 isParameter = 1; 5579 } 5580 5581 name = xmlParseName(ctxt); 5582 if (name == NULL) { 5583 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5584 "xmlParseEntityDecl: no name\n"); 5585 return; 5586 } 5587 if (xmlStrchr(name, ':') != NULL) { 5588 xmlNsErr(ctxt, XML_NS_ERR_COLON, 5589 "colons are forbidden from entities names '%s'\n", 5590 name, NULL, NULL); 5591 } 5592 if (SKIP_BLANKS == 0) { 5593 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5594 "Space required after the entity name\n"); 5595 } 5596 5597 ctxt->instate = XML_PARSER_ENTITY_DECL; 5598 /* 5599 * handle the various case of definitions... 5600 */ 5601 if (isParameter) { 5602 if ((RAW == '"') || (RAW == '\'')) { 5603 value = xmlParseEntityValue(ctxt, &orig); 5604 if (value) { 5605 if ((ctxt->sax != NULL) && 5606 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5607 ctxt->sax->entityDecl(ctxt->userData, name, 5608 XML_INTERNAL_PARAMETER_ENTITY, 5609 NULL, NULL, value); 5610 } 5611 } else { 5612 URI = xmlParseExternalID(ctxt, &literal, 1); 5613 if ((URI == NULL) && (literal == NULL)) { 5614 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 5615 } 5616 if (URI) { 5617 xmlURIPtr uri; 5618 5619 uri = xmlParseURI((const char *) URI); 5620 if (uri == NULL) { 5621 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 5622 "Invalid URI: %s\n", URI); 5623 /* 5624 * This really ought to be a well formedness error 5625 * but the XML Core WG decided otherwise c.f. issue 5626 * E26 of the XML erratas. 5627 */ 5628 } else { 5629 if (uri->fragment != NULL) { 5630 /* 5631 * Okay this is foolish to block those but not 5632 * invalid URIs. 5633 */ 5634 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 5635 } else { 5636 if ((ctxt->sax != NULL) && 5637 (!ctxt->disableSAX) && 5638 (ctxt->sax->entityDecl != NULL)) 5639 ctxt->sax->entityDecl(ctxt->userData, name, 5640 XML_EXTERNAL_PARAMETER_ENTITY, 5641 literal, URI, NULL); 5642 } 5643 xmlFreeURI(uri); 5644 } 5645 } 5646 } 5647 } else { 5648 if ((RAW == '"') || (RAW == '\'')) { 5649 value = xmlParseEntityValue(ctxt, &orig); 5650 if ((ctxt->sax != NULL) && 5651 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5652 ctxt->sax->entityDecl(ctxt->userData, name, 5653 XML_INTERNAL_GENERAL_ENTITY, 5654 NULL, NULL, value); 5655 /* 5656 * For expat compatibility in SAX mode. 5657 */ 5658 if ((ctxt->myDoc == NULL) || 5659 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 5660 if (ctxt->myDoc == NULL) { 5661 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 5662 if (ctxt->myDoc == NULL) { 5663 xmlErrMemory(ctxt, "New Doc failed"); 5664 goto done; 5665 } 5666 ctxt->myDoc->properties = XML_DOC_INTERNAL; 5667 } 5668 if (ctxt->myDoc->intSubset == NULL) 5669 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 5670 BAD_CAST "fake", NULL, NULL); 5671 5672 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY, 5673 NULL, NULL, value); 5674 } 5675 } else { 5676 URI = xmlParseExternalID(ctxt, &literal, 1); 5677 if ((URI == NULL) && (literal == NULL)) { 5678 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 5679 } 5680 if (URI) { 5681 xmlURIPtr uri; 5682 5683 uri = xmlParseURI((const char *)URI); 5684 if (uri == NULL) { 5685 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 5686 "Invalid URI: %s\n", URI); 5687 /* 5688 * This really ought to be a well formedness error 5689 * but the XML Core WG decided otherwise c.f. issue 5690 * E26 of the XML erratas. 5691 */ 5692 } else { 5693 if (uri->fragment != NULL) { 5694 /* 5695 * Okay this is foolish to block those but not 5696 * invalid URIs. 5697 */ 5698 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 5699 } 5700 xmlFreeURI(uri); 5701 } 5702 } 5703 if ((RAW != '>') && (SKIP_BLANKS == 0)) { 5704 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5705 "Space required before 'NDATA'\n"); 5706 } 5707 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) { 5708 SKIP(5); 5709 if (SKIP_BLANKS == 0) { 5710 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5711 "Space required after 'NDATA'\n"); 5712 } 5713 ndata = xmlParseName(ctxt); 5714 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5715 (ctxt->sax->unparsedEntityDecl != NULL)) 5716 ctxt->sax->unparsedEntityDecl(ctxt->userData, name, 5717 literal, URI, ndata); 5718 } else { 5719 if ((ctxt->sax != NULL) && 5720 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5721 ctxt->sax->entityDecl(ctxt->userData, name, 5722 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 5723 literal, URI, NULL); 5724 /* 5725 * For expat compatibility in SAX mode. 5726 * assuming the entity replacement was asked for 5727 */ 5728 if ((ctxt->replaceEntities != 0) && 5729 ((ctxt->myDoc == NULL) || 5730 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) { 5731 if (ctxt->myDoc == NULL) { 5732 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 5733 if (ctxt->myDoc == NULL) { 5734 xmlErrMemory(ctxt, "New Doc failed"); 5735 goto done; 5736 } 5737 ctxt->myDoc->properties = XML_DOC_INTERNAL; 5738 } 5739 5740 if (ctxt->myDoc->intSubset == NULL) 5741 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 5742 BAD_CAST "fake", NULL, NULL); 5743 xmlSAX2EntityDecl(ctxt, name, 5744 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 5745 literal, URI, NULL); 5746 } 5747 } 5748 } 5749 } 5750 if (ctxt->instate == XML_PARSER_EOF) 5751 goto done; 5752 SKIP_BLANKS; 5753 if (RAW != '>') { 5754 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, 5755 "xmlParseEntityDecl: entity %s not terminated\n", name); 5756 xmlHaltParser(ctxt); 5757 } else { 5758 if (inputid != ctxt->input->id) { 5759 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5760 "Entity declaration doesn't start and stop in" 5761 " the same entity\n"); 5762 } 5763 NEXT; 5764 } 5765 if (orig != NULL) { 5766 /* 5767 * Ugly mechanism to save the raw entity value. 5768 */ 5769 xmlEntityPtr cur = NULL; 5770 5771 if (isParameter) { 5772 if ((ctxt->sax != NULL) && 5773 (ctxt->sax->getParameterEntity != NULL)) 5774 cur = ctxt->sax->getParameterEntity(ctxt->userData, name); 5775 } else { 5776 if ((ctxt->sax != NULL) && 5777 (ctxt->sax->getEntity != NULL)) 5778 cur = ctxt->sax->getEntity(ctxt->userData, name); 5779 if ((cur == NULL) && (ctxt->userData==ctxt)) { 5780 cur = xmlSAX2GetEntity(ctxt, name); 5781 } 5782 } 5783 if ((cur != NULL) && (cur->orig == NULL)) { 5784 cur->orig = orig; 5785 orig = NULL; 5786 } 5787 } 5788 5789done: 5790 if (value != NULL) xmlFree(value); 5791 if (URI != NULL) xmlFree(URI); 5792 if (literal != NULL) xmlFree(literal); 5793 if (orig != NULL) xmlFree(orig); 5794 } 5795} 5796 5797/** 5798 * xmlParseDefaultDecl: 5799 * @ctxt: an XML parser context 5800 * @value: Receive a possible fixed default value for the attribute 5801 * 5802 * DEPRECATED: Internal function, don't use. 5803 * 5804 * Parse an attribute default declaration 5805 * 5806 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) 5807 * 5808 * [ VC: Required Attribute ] 5809 * if the default declaration is the keyword #REQUIRED, then the 5810 * attribute must be specified for all elements of the type in the 5811 * attribute-list declaration. 5812 * 5813 * [ VC: Attribute Default Legal ] 5814 * The declared default value must meet the lexical constraints of 5815 * the declared attribute type c.f. xmlValidateAttributeDecl() 5816 * 5817 * [ VC: Fixed Attribute Default ] 5818 * if an attribute has a default value declared with the #FIXED 5819 * keyword, instances of that attribute must match the default value. 5820 * 5821 * [ WFC: No < in Attribute Values ] 5822 * handled in xmlParseAttValue() 5823 * 5824 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED 5825 * or XML_ATTRIBUTE_FIXED. 5826 */ 5827 5828int 5829xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) { 5830 int val; 5831 xmlChar *ret; 5832 5833 *value = NULL; 5834 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) { 5835 SKIP(9); 5836 return(XML_ATTRIBUTE_REQUIRED); 5837 } 5838 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) { 5839 SKIP(8); 5840 return(XML_ATTRIBUTE_IMPLIED); 5841 } 5842 val = XML_ATTRIBUTE_NONE; 5843 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) { 5844 SKIP(6); 5845 val = XML_ATTRIBUTE_FIXED; 5846 if (SKIP_BLANKS == 0) { 5847 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5848 "Space required after '#FIXED'\n"); 5849 } 5850 } 5851 ret = xmlParseAttValue(ctxt); 5852 ctxt->instate = XML_PARSER_DTD; 5853 if (ret == NULL) { 5854 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo, 5855 "Attribute default value declaration error\n"); 5856 } else 5857 *value = ret; 5858 return(val); 5859} 5860 5861/** 5862 * xmlParseNotationType: 5863 * @ctxt: an XML parser context 5864 * 5865 * DEPRECATED: Internal function, don't use. 5866 * 5867 * parse an Notation attribute type. 5868 * 5869 * Note: the leading 'NOTATION' S part has already being parsed... 5870 * 5871 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 5872 * 5873 * [ VC: Notation Attributes ] 5874 * Values of this type must match one of the notation names included 5875 * in the declaration; all notation names in the declaration must be declared. 5876 * 5877 * Returns: the notation attribute tree built while parsing 5878 */ 5879 5880xmlEnumerationPtr 5881xmlParseNotationType(xmlParserCtxtPtr ctxt) { 5882 const xmlChar *name; 5883 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp; 5884 5885 if (RAW != '(') { 5886 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 5887 return(NULL); 5888 } 5889 do { 5890 NEXT; 5891 SKIP_BLANKS; 5892 name = xmlParseName(ctxt); 5893 if (name == NULL) { 5894 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5895 "Name expected in NOTATION declaration\n"); 5896 xmlFreeEnumeration(ret); 5897 return(NULL); 5898 } 5899 tmp = ret; 5900 while (tmp != NULL) { 5901 if (xmlStrEqual(name, tmp->name)) { 5902 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN, 5903 "standalone: attribute notation value token %s duplicated\n", 5904 name, NULL); 5905 if (!xmlDictOwns(ctxt->dict, name)) 5906 xmlFree((xmlChar *) name); 5907 break; 5908 } 5909 tmp = tmp->next; 5910 } 5911 if (tmp == NULL) { 5912 cur = xmlCreateEnumeration(name); 5913 if (cur == NULL) { 5914 xmlFreeEnumeration(ret); 5915 return(NULL); 5916 } 5917 if (last == NULL) ret = last = cur; 5918 else { 5919 last->next = cur; 5920 last = cur; 5921 } 5922 } 5923 SKIP_BLANKS; 5924 } while (RAW == '|'); 5925 if (RAW != ')') { 5926 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 5927 xmlFreeEnumeration(ret); 5928 return(NULL); 5929 } 5930 NEXT; 5931 return(ret); 5932} 5933 5934/** 5935 * xmlParseEnumerationType: 5936 * @ctxt: an XML parser context 5937 * 5938 * DEPRECATED: Internal function, don't use. 5939 * 5940 * parse an Enumeration attribute type. 5941 * 5942 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' 5943 * 5944 * [ VC: Enumeration ] 5945 * Values of this type must match one of the Nmtoken tokens in 5946 * the declaration 5947 * 5948 * Returns: the enumeration attribute tree built while parsing 5949 */ 5950 5951xmlEnumerationPtr 5952xmlParseEnumerationType(xmlParserCtxtPtr ctxt) { 5953 xmlChar *name; 5954 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp; 5955 5956 if (RAW != '(') { 5957 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL); 5958 return(NULL); 5959 } 5960 do { 5961 NEXT; 5962 SKIP_BLANKS; 5963 name = xmlParseNmtoken(ctxt); 5964 if (name == NULL) { 5965 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL); 5966 return(ret); 5967 } 5968 tmp = ret; 5969 while (tmp != NULL) { 5970 if (xmlStrEqual(name, tmp->name)) { 5971 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN, 5972 "standalone: attribute enumeration value token %s duplicated\n", 5973 name, NULL); 5974 if (!xmlDictOwns(ctxt->dict, name)) 5975 xmlFree(name); 5976 break; 5977 } 5978 tmp = tmp->next; 5979 } 5980 if (tmp == NULL) { 5981 cur = xmlCreateEnumeration(name); 5982 if (!xmlDictOwns(ctxt->dict, name)) 5983 xmlFree(name); 5984 if (cur == NULL) { 5985 xmlFreeEnumeration(ret); 5986 return(NULL); 5987 } 5988 if (last == NULL) ret = last = cur; 5989 else { 5990 last->next = cur; 5991 last = cur; 5992 } 5993 } 5994 SKIP_BLANKS; 5995 } while (RAW == '|'); 5996 if (RAW != ')') { 5997 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL); 5998 return(ret); 5999 } 6000 NEXT; 6001 return(ret); 6002} 6003 6004/** 6005 * xmlParseEnumeratedType: 6006 * @ctxt: an XML parser context 6007 * @tree: the enumeration tree built while parsing 6008 * 6009 * DEPRECATED: Internal function, don't use. 6010 * 6011 * parse an Enumerated attribute type. 6012 * 6013 * [57] EnumeratedType ::= NotationType | Enumeration 6014 * 6015 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 6016 * 6017 * 6018 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION 6019 */ 6020 6021int 6022xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 6023 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 6024 SKIP(8); 6025 if (SKIP_BLANKS == 0) { 6026 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6027 "Space required after 'NOTATION'\n"); 6028 return(0); 6029 } 6030 *tree = xmlParseNotationType(ctxt); 6031 if (*tree == NULL) return(0); 6032 return(XML_ATTRIBUTE_NOTATION); 6033 } 6034 *tree = xmlParseEnumerationType(ctxt); 6035 if (*tree == NULL) return(0); 6036 return(XML_ATTRIBUTE_ENUMERATION); 6037} 6038 6039/** 6040 * xmlParseAttributeType: 6041 * @ctxt: an XML parser context 6042 * @tree: the enumeration tree built while parsing 6043 * 6044 * DEPRECATED: Internal function, don't use. 6045 * 6046 * parse the Attribute list def for an element 6047 * 6048 * [54] AttType ::= StringType | TokenizedType | EnumeratedType 6049 * 6050 * [55] StringType ::= 'CDATA' 6051 * 6052 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 6053 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' 6054 * 6055 * Validity constraints for attribute values syntax are checked in 6056 * xmlValidateAttributeValue() 6057 * 6058 * [ VC: ID ] 6059 * Values of type ID must match the Name production. A name must not 6060 * appear more than once in an XML document as a value of this type; 6061 * i.e., ID values must uniquely identify the elements which bear them. 6062 * 6063 * [ VC: One ID per Element Type ] 6064 * No element type may have more than one ID attribute specified. 6065 * 6066 * [ VC: ID Attribute Default ] 6067 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED. 6068 * 6069 * [ VC: IDREF ] 6070 * Values of type IDREF must match the Name production, and values 6071 * of type IDREFS must match Names; each IDREF Name must match the value 6072 * of an ID attribute on some element in the XML document; i.e. IDREF 6073 * values must match the value of some ID attribute. 6074 * 6075 * [ VC: Entity Name ] 6076 * Values of type ENTITY must match the Name production, values 6077 * of type ENTITIES must match Names; each Entity Name must match the 6078 * name of an unparsed entity declared in the DTD. 6079 * 6080 * [ VC: Name Token ] 6081 * Values of type NMTOKEN must match the Nmtoken production; values 6082 * of type NMTOKENS must match Nmtokens. 6083 * 6084 * Returns the attribute type 6085 */ 6086int 6087xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 6088 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) { 6089 SKIP(5); 6090 return(XML_ATTRIBUTE_CDATA); 6091 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) { 6092 SKIP(6); 6093 return(XML_ATTRIBUTE_IDREFS); 6094 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) { 6095 SKIP(5); 6096 return(XML_ATTRIBUTE_IDREF); 6097 } else if ((RAW == 'I') && (NXT(1) == 'D')) { 6098 SKIP(2); 6099 return(XML_ATTRIBUTE_ID); 6100 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) { 6101 SKIP(6); 6102 return(XML_ATTRIBUTE_ENTITY); 6103 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) { 6104 SKIP(8); 6105 return(XML_ATTRIBUTE_ENTITIES); 6106 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) { 6107 SKIP(8); 6108 return(XML_ATTRIBUTE_NMTOKENS); 6109 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) { 6110 SKIP(7); 6111 return(XML_ATTRIBUTE_NMTOKEN); 6112 } 6113 return(xmlParseEnumeratedType(ctxt, tree)); 6114} 6115 6116/** 6117 * xmlParseAttributeListDecl: 6118 * @ctxt: an XML parser context 6119 * 6120 * DEPRECATED: Internal function, don't use. 6121 * 6122 * Parse an attribute list declaration for an element. Always consumes '<!'. 6123 * 6124 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' 6125 * 6126 * [53] AttDef ::= S Name S AttType S DefaultDecl 6127 * 6128 */ 6129void 6130xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { 6131 const xmlChar *elemName; 6132 const xmlChar *attrName; 6133 xmlEnumerationPtr tree; 6134 6135 if ((CUR != '<') || (NXT(1) != '!')) 6136 return; 6137 SKIP(2); 6138 6139 if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) { 6140 int inputid = ctxt->input->id; 6141 6142 SKIP(7); 6143 if (SKIP_BLANKS == 0) { 6144 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6145 "Space required after '<!ATTLIST'\n"); 6146 } 6147 elemName = xmlParseName(ctxt); 6148 if (elemName == NULL) { 6149 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6150 "ATTLIST: no name for Element\n"); 6151 return; 6152 } 6153 SKIP_BLANKS; 6154 GROW; 6155 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) { 6156 int type; 6157 int def; 6158 xmlChar *defaultValue = NULL; 6159 6160 GROW; 6161 tree = NULL; 6162 attrName = xmlParseName(ctxt); 6163 if (attrName == NULL) { 6164 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6165 "ATTLIST: no name for Attribute\n"); 6166 break; 6167 } 6168 GROW; 6169 if (SKIP_BLANKS == 0) { 6170 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6171 "Space required after the attribute name\n"); 6172 break; 6173 } 6174 6175 type = xmlParseAttributeType(ctxt, &tree); 6176 if (type <= 0) { 6177 break; 6178 } 6179 6180 GROW; 6181 if (SKIP_BLANKS == 0) { 6182 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6183 "Space required after the attribute type\n"); 6184 if (tree != NULL) 6185 xmlFreeEnumeration(tree); 6186 break; 6187 } 6188 6189 def = xmlParseDefaultDecl(ctxt, &defaultValue); 6190 if (def <= 0) { 6191 if (defaultValue != NULL) 6192 xmlFree(defaultValue); 6193 if (tree != NULL) 6194 xmlFreeEnumeration(tree); 6195 break; 6196 } 6197 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL)) 6198 xmlAttrNormalizeSpace(defaultValue, defaultValue); 6199 6200 GROW; 6201 if (RAW != '>') { 6202 if (SKIP_BLANKS == 0) { 6203 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6204 "Space required after the attribute default value\n"); 6205 if (defaultValue != NULL) 6206 xmlFree(defaultValue); 6207 if (tree != NULL) 6208 xmlFreeEnumeration(tree); 6209 break; 6210 } 6211 } 6212 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 6213 (ctxt->sax->attributeDecl != NULL)) 6214 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName, 6215 type, def, defaultValue, tree); 6216 else if (tree != NULL) 6217 xmlFreeEnumeration(tree); 6218 6219 if ((ctxt->sax2) && (defaultValue != NULL) && 6220 (def != XML_ATTRIBUTE_IMPLIED) && 6221 (def != XML_ATTRIBUTE_REQUIRED)) { 6222 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue); 6223 } 6224 if (ctxt->sax2) { 6225 xmlAddSpecialAttr(ctxt, elemName, attrName, type); 6226 } 6227 if (defaultValue != NULL) 6228 xmlFree(defaultValue); 6229 GROW; 6230 } 6231 if (RAW == '>') { 6232 if (inputid != ctxt->input->id) { 6233 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6234 "Attribute list declaration doesn't start and" 6235 " stop in the same entity\n"); 6236 } 6237 NEXT; 6238 } 6239 } 6240} 6241 6242/** 6243 * xmlParseElementMixedContentDecl: 6244 * @ctxt: an XML parser context 6245 * @inputchk: the input used for the current entity, needed for boundary checks 6246 * 6247 * DEPRECATED: Internal function, don't use. 6248 * 6249 * parse the declaration for a Mixed Element content 6250 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 6251 * 6252 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | 6253 * '(' S? '#PCDATA' S? ')' 6254 * 6255 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49]) 6256 * 6257 * [ VC: No Duplicate Types ] 6258 * The same name must not appear more than once in a single 6259 * mixed-content declaration. 6260 * 6261 * returns: the list of the xmlElementContentPtr describing the element choices 6262 */ 6263xmlElementContentPtr 6264xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { 6265 xmlElementContentPtr ret = NULL, cur = NULL, n; 6266 const xmlChar *elem = NULL; 6267 6268 GROW; 6269 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 6270 SKIP(7); 6271 SKIP_BLANKS; 6272 if (RAW == ')') { 6273 if (ctxt->input->id != inputchk) { 6274 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6275 "Element content declaration doesn't start and" 6276 " stop in the same entity\n"); 6277 } 6278 NEXT; 6279 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); 6280 if (ret == NULL) 6281 return(NULL); 6282 if (RAW == '*') { 6283 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6284 NEXT; 6285 } 6286 return(ret); 6287 } 6288 if ((RAW == '(') || (RAW == '|')) { 6289 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); 6290 if (ret == NULL) return(NULL); 6291 } 6292 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) { 6293 NEXT; 6294 if (elem == NULL) { 6295 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 6296 if (ret == NULL) { 6297 xmlFreeDocElementContent(ctxt->myDoc, cur); 6298 return(NULL); 6299 } 6300 ret->c1 = cur; 6301 if (cur != NULL) 6302 cur->parent = ret; 6303 cur = ret; 6304 } else { 6305 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 6306 if (n == NULL) { 6307 xmlFreeDocElementContent(ctxt->myDoc, ret); 6308 return(NULL); 6309 } 6310 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6311 if (n->c1 != NULL) 6312 n->c1->parent = n; 6313 cur->c2 = n; 6314 if (n != NULL) 6315 n->parent = cur; 6316 cur = n; 6317 } 6318 SKIP_BLANKS; 6319 elem = xmlParseName(ctxt); 6320 if (elem == NULL) { 6321 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6322 "xmlParseElementMixedContentDecl : Name expected\n"); 6323 xmlFreeDocElementContent(ctxt->myDoc, ret); 6324 return(NULL); 6325 } 6326 SKIP_BLANKS; 6327 GROW; 6328 } 6329 if ((RAW == ')') && (NXT(1) == '*')) { 6330 if (elem != NULL) { 6331 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem, 6332 XML_ELEMENT_CONTENT_ELEMENT); 6333 if (cur->c2 != NULL) 6334 cur->c2->parent = cur; 6335 } 6336 if (ret != NULL) 6337 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6338 if (ctxt->input->id != inputchk) { 6339 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6340 "Element content declaration doesn't start and" 6341 " stop in the same entity\n"); 6342 } 6343 SKIP(2); 6344 } else { 6345 xmlFreeDocElementContent(ctxt->myDoc, ret); 6346 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL); 6347 return(NULL); 6348 } 6349 6350 } else { 6351 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL); 6352 } 6353 return(ret); 6354} 6355 6356/** 6357 * xmlParseElementChildrenContentDeclPriv: 6358 * @ctxt: an XML parser context 6359 * @inputchk: the input used for the current entity, needed for boundary checks 6360 * @depth: the level of recursion 6361 * 6362 * parse the declaration for a Mixed Element content 6363 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 6364 * 6365 * 6366 * [47] children ::= (choice | seq) ('?' | '*' | '+')? 6367 * 6368 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 6369 * 6370 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 6371 * 6372 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 6373 * 6374 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 6375 * TODO Parameter-entity replacement text must be properly nested 6376 * with parenthesized groups. That is to say, if either of the 6377 * opening or closing parentheses in a choice, seq, or Mixed 6378 * construct is contained in the replacement text for a parameter 6379 * entity, both must be contained in the same replacement text. For 6380 * interoperability, if a parameter-entity reference appears in a 6381 * choice, seq, or Mixed construct, its replacement text should not 6382 * be empty, and neither the first nor last non-blank character of 6383 * the replacement text should be a connector (| or ,). 6384 * 6385 * Returns the tree of xmlElementContentPtr describing the element 6386 * hierarchy. 6387 */ 6388static xmlElementContentPtr 6389xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk, 6390 int depth) { 6391 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL; 6392 const xmlChar *elem; 6393 xmlChar type = 0; 6394 6395 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) || 6396 (depth > 2048)) { 6397 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, 6398"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n", 6399 depth); 6400 return(NULL); 6401 } 6402 SKIP_BLANKS; 6403 GROW; 6404 if (RAW == '(') { 6405 int inputid = ctxt->input->id; 6406 6407 /* Recurse on first child */ 6408 NEXT; 6409 SKIP_BLANKS; 6410 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 6411 depth + 1); 6412 if (cur == NULL) 6413 return(NULL); 6414 SKIP_BLANKS; 6415 GROW; 6416 } else { 6417 elem = xmlParseName(ctxt); 6418 if (elem == NULL) { 6419 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 6420 return(NULL); 6421 } 6422 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6423 if (cur == NULL) { 6424 xmlErrMemory(ctxt, NULL); 6425 return(NULL); 6426 } 6427 GROW; 6428 if (RAW == '?') { 6429 cur->ocur = XML_ELEMENT_CONTENT_OPT; 6430 NEXT; 6431 } else if (RAW == '*') { 6432 cur->ocur = XML_ELEMENT_CONTENT_MULT; 6433 NEXT; 6434 } else if (RAW == '+') { 6435 cur->ocur = XML_ELEMENT_CONTENT_PLUS; 6436 NEXT; 6437 } else { 6438 cur->ocur = XML_ELEMENT_CONTENT_ONCE; 6439 } 6440 GROW; 6441 } 6442 SKIP_BLANKS; 6443 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) { 6444 /* 6445 * Each loop we parse one separator and one element. 6446 */ 6447 if (RAW == ',') { 6448 if (type == 0) type = CUR; 6449 6450 /* 6451 * Detect "Name | Name , Name" error 6452 */ 6453 else if (type != CUR) { 6454 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 6455 "xmlParseElementChildrenContentDecl : '%c' expected\n", 6456 type); 6457 if ((last != NULL) && (last != ret)) 6458 xmlFreeDocElementContent(ctxt->myDoc, last); 6459 if (ret != NULL) 6460 xmlFreeDocElementContent(ctxt->myDoc, ret); 6461 return(NULL); 6462 } 6463 NEXT; 6464 6465 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ); 6466 if (op == NULL) { 6467 if ((last != NULL) && (last != ret)) 6468 xmlFreeDocElementContent(ctxt->myDoc, last); 6469 xmlFreeDocElementContent(ctxt->myDoc, ret); 6470 return(NULL); 6471 } 6472 if (last == NULL) { 6473 op->c1 = ret; 6474 if (ret != NULL) 6475 ret->parent = op; 6476 ret = cur = op; 6477 } else { 6478 cur->c2 = op; 6479 if (op != NULL) 6480 op->parent = cur; 6481 op->c1 = last; 6482 if (last != NULL) 6483 last->parent = op; 6484 cur =op; 6485 last = NULL; 6486 } 6487 } else if (RAW == '|') { 6488 if (type == 0) type = CUR; 6489 6490 /* 6491 * Detect "Name , Name | Name" error 6492 */ 6493 else if (type != CUR) { 6494 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 6495 "xmlParseElementChildrenContentDecl : '%c' expected\n", 6496 type); 6497 if ((last != NULL) && (last != ret)) 6498 xmlFreeDocElementContent(ctxt->myDoc, last); 6499 if (ret != NULL) 6500 xmlFreeDocElementContent(ctxt->myDoc, ret); 6501 return(NULL); 6502 } 6503 NEXT; 6504 6505 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 6506 if (op == NULL) { 6507 if ((last != NULL) && (last != ret)) 6508 xmlFreeDocElementContent(ctxt->myDoc, last); 6509 if (ret != NULL) 6510 xmlFreeDocElementContent(ctxt->myDoc, ret); 6511 return(NULL); 6512 } 6513 if (last == NULL) { 6514 op->c1 = ret; 6515 if (ret != NULL) 6516 ret->parent = op; 6517 ret = cur = op; 6518 } else { 6519 cur->c2 = op; 6520 if (op != NULL) 6521 op->parent = cur; 6522 op->c1 = last; 6523 if (last != NULL) 6524 last->parent = op; 6525 cur =op; 6526 last = NULL; 6527 } 6528 } else { 6529 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL); 6530 if ((last != NULL) && (last != ret)) 6531 xmlFreeDocElementContent(ctxt->myDoc, last); 6532 if (ret != NULL) 6533 xmlFreeDocElementContent(ctxt->myDoc, ret); 6534 return(NULL); 6535 } 6536 GROW; 6537 SKIP_BLANKS; 6538 GROW; 6539 if (RAW == '(') { 6540 int inputid = ctxt->input->id; 6541 /* Recurse on second child */ 6542 NEXT; 6543 SKIP_BLANKS; 6544 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 6545 depth + 1); 6546 if (last == NULL) { 6547 if (ret != NULL) 6548 xmlFreeDocElementContent(ctxt->myDoc, ret); 6549 return(NULL); 6550 } 6551 SKIP_BLANKS; 6552 } else { 6553 elem = xmlParseName(ctxt); 6554 if (elem == NULL) { 6555 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 6556 if (ret != NULL) 6557 xmlFreeDocElementContent(ctxt->myDoc, ret); 6558 return(NULL); 6559 } 6560 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6561 if (last == NULL) { 6562 if (ret != NULL) 6563 xmlFreeDocElementContent(ctxt->myDoc, ret); 6564 return(NULL); 6565 } 6566 if (RAW == '?') { 6567 last->ocur = XML_ELEMENT_CONTENT_OPT; 6568 NEXT; 6569 } else if (RAW == '*') { 6570 last->ocur = XML_ELEMENT_CONTENT_MULT; 6571 NEXT; 6572 } else if (RAW == '+') { 6573 last->ocur = XML_ELEMENT_CONTENT_PLUS; 6574 NEXT; 6575 } else { 6576 last->ocur = XML_ELEMENT_CONTENT_ONCE; 6577 } 6578 } 6579 SKIP_BLANKS; 6580 GROW; 6581 } 6582 if ((cur != NULL) && (last != NULL)) { 6583 cur->c2 = last; 6584 if (last != NULL) 6585 last->parent = cur; 6586 } 6587 if (ctxt->input->id != inputchk) { 6588 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6589 "Element content declaration doesn't start and stop in" 6590 " the same entity\n"); 6591 } 6592 NEXT; 6593 if (RAW == '?') { 6594 if (ret != NULL) { 6595 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) || 6596 (ret->ocur == XML_ELEMENT_CONTENT_MULT)) 6597 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6598 else 6599 ret->ocur = XML_ELEMENT_CONTENT_OPT; 6600 } 6601 NEXT; 6602 } else if (RAW == '*') { 6603 if (ret != NULL) { 6604 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6605 cur = ret; 6606 /* 6607 * Some normalization: 6608 * (a | b* | c?)* == (a | b | c)* 6609 */ 6610 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) { 6611 if ((cur->c1 != NULL) && 6612 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 6613 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) 6614 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 6615 if ((cur->c2 != NULL) && 6616 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 6617 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) 6618 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 6619 cur = cur->c2; 6620 } 6621 } 6622 NEXT; 6623 } else if (RAW == '+') { 6624 if (ret != NULL) { 6625 int found = 0; 6626 6627 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) || 6628 (ret->ocur == XML_ELEMENT_CONTENT_MULT)) 6629 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6630 else 6631 ret->ocur = XML_ELEMENT_CONTENT_PLUS; 6632 /* 6633 * Some normalization: 6634 * (a | b*)+ == (a | b)* 6635 * (a | b?)+ == (a | b)* 6636 */ 6637 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) { 6638 if ((cur->c1 != NULL) && 6639 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 6640 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) { 6641 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 6642 found = 1; 6643 } 6644 if ((cur->c2 != NULL) && 6645 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 6646 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) { 6647 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 6648 found = 1; 6649 } 6650 cur = cur->c2; 6651 } 6652 if (found) 6653 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6654 } 6655 NEXT; 6656 } 6657 return(ret); 6658} 6659 6660/** 6661 * xmlParseElementChildrenContentDecl: 6662 * @ctxt: an XML parser context 6663 * @inputchk: the input used for the current entity, needed for boundary checks 6664 * 6665 * DEPRECATED: Internal function, don't use. 6666 * 6667 * parse the declaration for a Mixed Element content 6668 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 6669 * 6670 * [47] children ::= (choice | seq) ('?' | '*' | '+')? 6671 * 6672 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 6673 * 6674 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 6675 * 6676 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 6677 * 6678 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 6679 * TODO Parameter-entity replacement text must be properly nested 6680 * with parenthesized groups. That is to say, if either of the 6681 * opening or closing parentheses in a choice, seq, or Mixed 6682 * construct is contained in the replacement text for a parameter 6683 * entity, both must be contained in the same replacement text. For 6684 * interoperability, if a parameter-entity reference appears in a 6685 * choice, seq, or Mixed construct, its replacement text should not 6686 * be empty, and neither the first nor last non-blank character of 6687 * the replacement text should be a connector (| or ,). 6688 * 6689 * Returns the tree of xmlElementContentPtr describing the element 6690 * hierarchy. 6691 */ 6692xmlElementContentPtr 6693xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { 6694 /* stub left for API/ABI compat */ 6695 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1)); 6696} 6697 6698/** 6699 * xmlParseElementContentDecl: 6700 * @ctxt: an XML parser context 6701 * @name: the name of the element being defined. 6702 * @result: the Element Content pointer will be stored here if any 6703 * 6704 * DEPRECATED: Internal function, don't use. 6705 * 6706 * parse the declaration for an Element content either Mixed or Children, 6707 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl 6708 * 6709 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children 6710 * 6711 * returns: the type of element content XML_ELEMENT_TYPE_xxx 6712 */ 6713 6714int 6715xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name, 6716 xmlElementContentPtr *result) { 6717 6718 xmlElementContentPtr tree = NULL; 6719 int inputid = ctxt->input->id; 6720 int res; 6721 6722 *result = NULL; 6723 6724 if (RAW != '(') { 6725 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 6726 "xmlParseElementContentDecl : %s '(' expected\n", name); 6727 return(-1); 6728 } 6729 NEXT; 6730 GROW; 6731 if (ctxt->instate == XML_PARSER_EOF) 6732 return(-1); 6733 SKIP_BLANKS; 6734 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 6735 tree = xmlParseElementMixedContentDecl(ctxt, inputid); 6736 res = XML_ELEMENT_TYPE_MIXED; 6737 } else { 6738 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1); 6739 res = XML_ELEMENT_TYPE_ELEMENT; 6740 } 6741 SKIP_BLANKS; 6742 *result = tree; 6743 return(res); 6744} 6745 6746/** 6747 * xmlParseElementDecl: 6748 * @ctxt: an XML parser context 6749 * 6750 * DEPRECATED: Internal function, don't use. 6751 * 6752 * Parse an element declaration. Always consumes '<!'. 6753 * 6754 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' 6755 * 6756 * [ VC: Unique Element Type Declaration ] 6757 * No element type may be declared more than once 6758 * 6759 * Returns the type of the element, or -1 in case of error 6760 */ 6761int 6762xmlParseElementDecl(xmlParserCtxtPtr ctxt) { 6763 const xmlChar *name; 6764 int ret = -1; 6765 xmlElementContentPtr content = NULL; 6766 6767 if ((CUR != '<') || (NXT(1) != '!')) 6768 return(ret); 6769 SKIP(2); 6770 6771 /* GROW; done in the caller */ 6772 if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) { 6773 int inputid = ctxt->input->id; 6774 6775 SKIP(7); 6776 if (SKIP_BLANKS == 0) { 6777 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6778 "Space required after 'ELEMENT'\n"); 6779 return(-1); 6780 } 6781 name = xmlParseName(ctxt); 6782 if (name == NULL) { 6783 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6784 "xmlParseElementDecl: no name for Element\n"); 6785 return(-1); 6786 } 6787 if (SKIP_BLANKS == 0) { 6788 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6789 "Space required after the element name\n"); 6790 } 6791 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) { 6792 SKIP(5); 6793 /* 6794 * Element must always be empty. 6795 */ 6796 ret = XML_ELEMENT_TYPE_EMPTY; 6797 } else if ((RAW == 'A') && (NXT(1) == 'N') && 6798 (NXT(2) == 'Y')) { 6799 SKIP(3); 6800 /* 6801 * Element is a generic container. 6802 */ 6803 ret = XML_ELEMENT_TYPE_ANY; 6804 } else if (RAW == '(') { 6805 ret = xmlParseElementContentDecl(ctxt, name, &content); 6806 } else { 6807 /* 6808 * [ WFC: PEs in Internal Subset ] error handling. 6809 */ 6810 if ((RAW == '%') && (ctxt->external == 0) && 6811 (ctxt->inputNr == 1)) { 6812 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET, 6813 "PEReference: forbidden within markup decl in internal subset\n"); 6814 } else { 6815 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 6816 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n"); 6817 } 6818 return(-1); 6819 } 6820 6821 SKIP_BLANKS; 6822 6823 if (RAW != '>') { 6824 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 6825 if (content != NULL) { 6826 xmlFreeDocElementContent(ctxt->myDoc, content); 6827 } 6828 } else { 6829 if (inputid != ctxt->input->id) { 6830 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6831 "Element declaration doesn't start and stop in" 6832 " the same entity\n"); 6833 } 6834 6835 NEXT; 6836 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 6837 (ctxt->sax->elementDecl != NULL)) { 6838 if (content != NULL) 6839 content->parent = NULL; 6840 ctxt->sax->elementDecl(ctxt->userData, name, ret, 6841 content); 6842 if ((content != NULL) && (content->parent == NULL)) { 6843 /* 6844 * this is a trick: if xmlAddElementDecl is called, 6845 * instead of copying the full tree it is plugged directly 6846 * if called from the parser. Avoid duplicating the 6847 * interfaces or change the API/ABI 6848 */ 6849 xmlFreeDocElementContent(ctxt->myDoc, content); 6850 } 6851 } else if (content != NULL) { 6852 xmlFreeDocElementContent(ctxt->myDoc, content); 6853 } 6854 } 6855 } 6856 return(ret); 6857} 6858 6859/** 6860 * xmlParseConditionalSections 6861 * @ctxt: an XML parser context 6862 * 6863 * Parse a conditional section. Always consumes '<!['. 6864 * 6865 * [61] conditionalSect ::= includeSect | ignoreSect 6866 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' 6867 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' 6868 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)* 6869 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) 6870 */ 6871 6872static void 6873xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { 6874 int *inputIds = NULL; 6875 size_t inputIdsSize = 0; 6876 size_t depth = 0; 6877 6878 while (ctxt->instate != XML_PARSER_EOF) { 6879 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6880 int id = ctxt->input->id; 6881 6882 SKIP(3); 6883 SKIP_BLANKS; 6884 6885 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) { 6886 SKIP(7); 6887 SKIP_BLANKS; 6888 if (RAW != '[') { 6889 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 6890 xmlHaltParser(ctxt); 6891 goto error; 6892 } 6893 if (ctxt->input->id != id) { 6894 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6895 "All markup of the conditional section is" 6896 " not in the same entity\n"); 6897 } 6898 NEXT; 6899 6900 if (inputIdsSize <= depth) { 6901 int *tmp; 6902 6903 inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2); 6904 tmp = (int *) xmlRealloc(inputIds, 6905 inputIdsSize * sizeof(int)); 6906 if (tmp == NULL) { 6907 xmlErrMemory(ctxt, NULL); 6908 goto error; 6909 } 6910 inputIds = tmp; 6911 } 6912 inputIds[depth] = id; 6913 depth++; 6914 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) { 6915 size_t ignoreDepth = 0; 6916 6917 SKIP(6); 6918 SKIP_BLANKS; 6919 if (RAW != '[') { 6920 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 6921 xmlHaltParser(ctxt); 6922 goto error; 6923 } 6924 if (ctxt->input->id != id) { 6925 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6926 "All markup of the conditional section is" 6927 " not in the same entity\n"); 6928 } 6929 NEXT; 6930 6931 while (RAW != 0) { 6932 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6933 SKIP(3); 6934 ignoreDepth++; 6935 /* Check for integer overflow */ 6936 if (ignoreDepth == 0) { 6937 xmlErrMemory(ctxt, NULL); 6938 goto error; 6939 } 6940 } else if ((RAW == ']') && (NXT(1) == ']') && 6941 (NXT(2) == '>')) { 6942 if (ignoreDepth == 0) 6943 break; 6944 SKIP(3); 6945 ignoreDepth--; 6946 } else { 6947 NEXT; 6948 } 6949 } 6950 6951 if (RAW == 0) { 6952 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL); 6953 goto error; 6954 } 6955 if (ctxt->input->id != id) { 6956 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6957 "All markup of the conditional section is" 6958 " not in the same entity\n"); 6959 } 6960 SKIP(3); 6961 } else { 6962 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL); 6963 xmlHaltParser(ctxt); 6964 goto error; 6965 } 6966 } else if ((depth > 0) && 6967 (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) { 6968 depth--; 6969 if (ctxt->input->id != inputIds[depth]) { 6970 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6971 "All markup of the conditional section is not" 6972 " in the same entity\n"); 6973 } 6974 SKIP(3); 6975 } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) { 6976 xmlParseMarkupDecl(ctxt); 6977 } else { 6978 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 6979 xmlHaltParser(ctxt); 6980 goto error; 6981 } 6982 6983 if (depth == 0) 6984 break; 6985 6986 SKIP_BLANKS; 6987 SHRINK; 6988 GROW; 6989 } 6990 6991error: 6992 xmlFree(inputIds); 6993} 6994 6995/** 6996 * xmlParseMarkupDecl: 6997 * @ctxt: an XML parser context 6998 * 6999 * DEPRECATED: Internal function, don't use. 7000 * 7001 * Parse markup declarations. Always consumes '<!' or '<?'. 7002 * 7003 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | 7004 * NotationDecl | PI | Comment 7005 * 7006 * [ VC: Proper Declaration/PE Nesting ] 7007 * Parameter-entity replacement text must be properly nested with 7008 * markup declarations. That is to say, if either the first character 7009 * or the last character of a markup declaration (markupdecl above) is 7010 * contained in the replacement text for a parameter-entity reference, 7011 * both must be contained in the same replacement text. 7012 * 7013 * [ WFC: PEs in Internal Subset ] 7014 * In the internal DTD subset, parameter-entity references can occur 7015 * only where markup declarations can occur, not within markup declarations. 7016 * (This does not apply to references that occur in external parameter 7017 * entities or to the external subset.) 7018 */ 7019void 7020xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) { 7021 GROW; 7022 if (CUR == '<') { 7023 if (NXT(1) == '!') { 7024 switch (NXT(2)) { 7025 case 'E': 7026 if (NXT(3) == 'L') 7027 xmlParseElementDecl(ctxt); 7028 else if (NXT(3) == 'N') 7029 xmlParseEntityDecl(ctxt); 7030 else 7031 SKIP(2); 7032 break; 7033 case 'A': 7034 xmlParseAttributeListDecl(ctxt); 7035 break; 7036 case 'N': 7037 xmlParseNotationDecl(ctxt); 7038 break; 7039 case '-': 7040 xmlParseComment(ctxt); 7041 break; 7042 default: 7043 /* there is an error but it will be detected later */ 7044 SKIP(2); 7045 break; 7046 } 7047 } else if (NXT(1) == '?') { 7048 xmlParsePI(ctxt); 7049 } 7050 } 7051 7052 /* 7053 * detect requirement to exit there and act accordingly 7054 * and avoid having instate overridden later on 7055 */ 7056 if (ctxt->instate == XML_PARSER_EOF) 7057 return; 7058 7059 ctxt->instate = XML_PARSER_DTD; 7060} 7061 7062/** 7063 * xmlParseTextDecl: 7064 * @ctxt: an XML parser context 7065 * 7066 * DEPRECATED: Internal function, don't use. 7067 * 7068 * parse an XML declaration header for external entities 7069 * 7070 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 7071 */ 7072 7073void 7074xmlParseTextDecl(xmlParserCtxtPtr ctxt) { 7075 xmlChar *version; 7076 int oldstate; 7077 7078 /* 7079 * We know that '<?xml' is here. 7080 */ 7081 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 7082 SKIP(5); 7083 } else { 7084 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL); 7085 return; 7086 } 7087 7088 /* Avoid expansion of parameter entities when skipping blanks. */ 7089 oldstate = ctxt->instate; 7090 ctxt->instate = XML_PARSER_START; 7091 7092 if (SKIP_BLANKS == 0) { 7093 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 7094 "Space needed after '<?xml'\n"); 7095 } 7096 7097 /* 7098 * We may have the VersionInfo here. 7099 */ 7100 version = xmlParseVersionInfo(ctxt); 7101 if (version == NULL) 7102 version = xmlCharStrdup(XML_DEFAULT_VERSION); 7103 else { 7104 if (SKIP_BLANKS == 0) { 7105 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 7106 "Space needed here\n"); 7107 } 7108 } 7109 ctxt->input->version = version; 7110 7111 /* 7112 * We must have the encoding declaration 7113 */ 7114 xmlParseEncodingDecl(ctxt); 7115 if (ctxt->instate == XML_PARSER_EOF) 7116 return; 7117 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 7118 /* 7119 * The XML REC instructs us to stop parsing right here 7120 */ 7121 ctxt->instate = oldstate; 7122 return; 7123 } 7124 7125 SKIP_BLANKS; 7126 if ((RAW == '?') && (NXT(1) == '>')) { 7127 SKIP(2); 7128 } else if (RAW == '>') { 7129 /* Deprecated old WD ... */ 7130 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 7131 NEXT; 7132 } else { 7133 int c; 7134 7135 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 7136 while ((c = CUR) != 0) { 7137 NEXT; 7138 if (c == '>') 7139 break; 7140 } 7141 } 7142 7143 if (ctxt->instate != XML_PARSER_EOF) 7144 ctxt->instate = oldstate; 7145} 7146 7147/** 7148 * xmlParseExternalSubset: 7149 * @ctxt: an XML parser context 7150 * @ExternalID: the external identifier 7151 * @SystemID: the system identifier (or URL) 7152 * 7153 * parse Markup declarations from an external subset 7154 * 7155 * [30] extSubset ::= textDecl? extSubsetDecl 7156 * 7157 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) * 7158 */ 7159void 7160xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID, 7161 const xmlChar *SystemID) { 7162 xmlDetectSAX2(ctxt); 7163 7164 xmlDetectEncoding(ctxt); 7165 7166 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) { 7167 xmlParseTextDecl(ctxt); 7168 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 7169 /* 7170 * The XML REC instructs us to stop parsing right here 7171 */ 7172 xmlHaltParser(ctxt); 7173 return; 7174 } 7175 } 7176 if (ctxt->myDoc == NULL) { 7177 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 7178 if (ctxt->myDoc == NULL) { 7179 xmlErrMemory(ctxt, "New Doc failed"); 7180 return; 7181 } 7182 ctxt->myDoc->properties = XML_DOC_INTERNAL; 7183 } 7184 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL)) 7185 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID); 7186 7187 ctxt->instate = XML_PARSER_DTD; 7188 ctxt->external = 1; 7189 SKIP_BLANKS; 7190 while ((ctxt->instate != XML_PARSER_EOF) && (RAW != 0)) { 7191 GROW; 7192 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 7193 xmlParseConditionalSections(ctxt); 7194 } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) { 7195 xmlParseMarkupDecl(ctxt); 7196 } else { 7197 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 7198 xmlHaltParser(ctxt); 7199 return; 7200 } 7201 SKIP_BLANKS; 7202 SHRINK; 7203 } 7204 7205 if (RAW != 0) { 7206 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 7207 } 7208 7209} 7210 7211/** 7212 * xmlParseReference: 7213 * @ctxt: an XML parser context 7214 * 7215 * DEPRECATED: Internal function, don't use. 7216 * 7217 * parse and handle entity references in content, depending on the SAX 7218 * interface, this may end-up in a call to character() if this is a 7219 * CharRef, a predefined entity, if there is no reference() callback. 7220 * or if the parser was asked to switch to that mode. 7221 * 7222 * Always consumes '&'. 7223 * 7224 * [67] Reference ::= EntityRef | CharRef 7225 */ 7226void 7227xmlParseReference(xmlParserCtxtPtr ctxt) { 7228 xmlEntityPtr ent; 7229 xmlChar *val; 7230 int was_checked; 7231 xmlNodePtr list = NULL; 7232 xmlParserErrors ret = XML_ERR_OK; 7233 7234 7235 if (RAW != '&') 7236 return; 7237 7238 /* 7239 * Simple case of a CharRef 7240 */ 7241 if (NXT(1) == '#') { 7242 int i = 0; 7243 xmlChar out[16]; 7244 int value = xmlParseCharRef(ctxt); 7245 7246 if (value == 0) 7247 return; 7248 7249 /* 7250 * Just encode the value in UTF-8 7251 */ 7252 COPY_BUF(out, i, value); 7253 out[i] = 0; 7254 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 7255 (!ctxt->disableSAX)) 7256 ctxt->sax->characters(ctxt->userData, out, i); 7257 return; 7258 } 7259 7260 /* 7261 * We are seeing an entity reference 7262 */ 7263 ent = xmlParseEntityRef(ctxt); 7264 if (ent == NULL) return; 7265 if (!ctxt->wellFormed) 7266 return; 7267 was_checked = ent->flags & XML_ENT_PARSED; 7268 7269 /* special case of predefined entities */ 7270 if ((ent->name == NULL) || 7271 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 7272 val = ent->content; 7273 if (val == NULL) return; 7274 /* 7275 * inline the entity. 7276 */ 7277 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 7278 (!ctxt->disableSAX)) 7279 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val)); 7280 return; 7281 } 7282 7283 /* 7284 * The first reference to the entity trigger a parsing phase 7285 * where the ent->children is filled with the result from 7286 * the parsing. 7287 * Note: external parsed entities will not be loaded, it is not 7288 * required for a non-validating parser, unless the parsing option 7289 * of validating, or substituting entities were given. Doing so is 7290 * far more secure as the parser will only process data coming from 7291 * the document entity by default. 7292 * 7293 * FIXME: This doesn't work correctly since entities can be 7294 * expanded with different namespace declarations in scope. 7295 * For example: 7296 * 7297 * <!DOCTYPE doc [ 7298 * <!ENTITY ent "<ns:elem/>"> 7299 * ]> 7300 * <doc> 7301 * <decl1 xmlns:ns="urn:ns1"> 7302 * &ent; 7303 * </decl1> 7304 * <decl2 xmlns:ns="urn:ns2"> 7305 * &ent; 7306 * </decl2> 7307 * </doc> 7308 * 7309 * Proposed fix: 7310 * 7311 * - Remove the ent->owner optimization which tries to avoid the 7312 * initial copy of the entity. Always make entities own the 7313 * subtree. 7314 * - Ignore current namespace declarations when parsing the 7315 * entity. If a prefix can't be resolved, don't report an error 7316 * but mark it as unresolved. 7317 * - Try to resolve these prefixes when expanding the entity. 7318 * This will require a specialized version of xmlStaticCopyNode 7319 * which can also make use of the namespace hash table to avoid 7320 * quadratic behavior. 7321 * 7322 * Alternatively, we could simply reparse the entity on each 7323 * expansion like we already do with custom SAX callbacks. 7324 * External entity content should be cached in this case. 7325 */ 7326 if (((ent->flags & XML_ENT_PARSED) == 0) && 7327 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) || 7328 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) { 7329 unsigned long oldsizeentcopy = ctxt->sizeentcopy; 7330 7331 /* 7332 * This is a bit hackish but this seems the best 7333 * way to make sure both SAX and DOM entity support 7334 * behaves okay. 7335 */ 7336 void *user_data; 7337 if (ctxt->userData == ctxt) 7338 user_data = NULL; 7339 else 7340 user_data = ctxt->userData; 7341 7342 /* Avoid overflow as much as possible */ 7343 ctxt->sizeentcopy = 0; 7344 7345 if (ent->flags & XML_ENT_EXPANDING) { 7346 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 7347 xmlHaltParser(ctxt); 7348 return; 7349 } 7350 7351 ent->flags |= XML_ENT_EXPANDING; 7352 7353 /* 7354 * Check that this entity is well formed 7355 * 4.3.2: An internal general parsed entity is well-formed 7356 * if its replacement text matches the production labeled 7357 * content. 7358 */ 7359 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 7360 ctxt->depth++; 7361 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content, 7362 user_data, &list); 7363 ctxt->depth--; 7364 7365 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 7366 ctxt->depth++; 7367 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax, 7368 user_data, ctxt->depth, ent->URI, 7369 ent->ExternalID, &list); 7370 ctxt->depth--; 7371 } else { 7372 ret = XML_ERR_ENTITY_PE_INTERNAL; 7373 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 7374 "invalid entity type found\n", NULL); 7375 } 7376 7377 ent->flags &= ~XML_ENT_EXPANDING; 7378 ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED; 7379 ent->expandedSize = ctxt->sizeentcopy; 7380 if (ret == XML_ERR_ENTITY_LOOP) { 7381 xmlHaltParser(ctxt); 7382 xmlFreeNodeList(list); 7383 return; 7384 } 7385 if (xmlParserEntityCheck(ctxt, oldsizeentcopy)) { 7386 xmlFreeNodeList(list); 7387 return; 7388 } 7389 7390 if ((ret == XML_ERR_OK) && (list != NULL)) { 7391 ent->children = list; 7392 /* 7393 * Prune it directly in the generated document 7394 * except for single text nodes. 7395 */ 7396 if ((ctxt->replaceEntities == 0) || 7397 (ctxt->parseMode == XML_PARSE_READER) || 7398 ((list->type == XML_TEXT_NODE) && 7399 (list->next == NULL))) { 7400 ent->owner = 1; 7401 while (list != NULL) { 7402 list->parent = (xmlNodePtr) ent; 7403 if (list->doc != ent->doc) 7404 xmlSetTreeDoc(list, ent->doc); 7405 if (list->next == NULL) 7406 ent->last = list; 7407 list = list->next; 7408 } 7409 list = NULL; 7410 } else { 7411 ent->owner = 0; 7412 while (list != NULL) { 7413 list->parent = (xmlNodePtr) ctxt->node; 7414 list->doc = ctxt->myDoc; 7415 if (list->next == NULL) 7416 ent->last = list; 7417 list = list->next; 7418 } 7419 list = ent->children; 7420#ifdef LIBXML_LEGACY_ENABLED 7421 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7422 xmlAddEntityReference(ent, list, NULL); 7423#endif /* LIBXML_LEGACY_ENABLED */ 7424 } 7425 } else if ((ret != XML_ERR_OK) && 7426 (ret != XML_WAR_UNDECLARED_ENTITY)) { 7427 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7428 "Entity '%s' failed to parse\n", ent->name); 7429 if (ent->content != NULL) 7430 ent->content[0] = 0; 7431 } else if (list != NULL) { 7432 xmlFreeNodeList(list); 7433 list = NULL; 7434 } 7435 7436 /* Prevent entity from being parsed and expanded twice (Bug 760367). */ 7437 was_checked = 0; 7438 } 7439 7440 /* 7441 * Now that the entity content has been gathered 7442 * provide it to the application, this can take different forms based 7443 * on the parsing modes. 7444 */ 7445 if (ent->children == NULL) { 7446 /* 7447 * Probably running in SAX mode and the callbacks don't 7448 * build the entity content. So unless we already went 7449 * though parsing for first checking go though the entity 7450 * content to generate callbacks associated to the entity 7451 */ 7452 if (was_checked != 0) { 7453 void *user_data; 7454 /* 7455 * This is a bit hackish but this seems the best 7456 * way to make sure both SAX and DOM entity support 7457 * behaves okay. 7458 */ 7459 if (ctxt->userData == ctxt) 7460 user_data = NULL; 7461 else 7462 user_data = ctxt->userData; 7463 7464 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 7465 ctxt->depth++; 7466 ret = xmlParseBalancedChunkMemoryInternal(ctxt, 7467 ent->content, user_data, NULL); 7468 ctxt->depth--; 7469 } else if (ent->etype == 7470 XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 7471 unsigned long oldsizeentities = ctxt->sizeentities; 7472 7473 ctxt->depth++; 7474 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, 7475 ctxt->sax, user_data, ctxt->depth, 7476 ent->URI, ent->ExternalID, NULL); 7477 ctxt->depth--; 7478 7479 /* Undo the change to sizeentities */ 7480 ctxt->sizeentities = oldsizeentities; 7481 } else { 7482 ret = XML_ERR_ENTITY_PE_INTERNAL; 7483 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 7484 "invalid entity type found\n", NULL); 7485 } 7486 if (ret == XML_ERR_ENTITY_LOOP) { 7487 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 7488 return; 7489 } 7490 if (xmlParserEntityCheck(ctxt, 0)) 7491 return; 7492 } 7493 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7494 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 7495 /* 7496 * Entity reference callback comes second, it's somewhat 7497 * superfluous but a compatibility to historical behaviour 7498 */ 7499 ctxt->sax->reference(ctxt->userData, ent->name); 7500 } 7501 return; 7502 } 7503 7504 /* 7505 * We also check for amplification if entities aren't substituted. 7506 * They might be expanded later. 7507 */ 7508 if ((was_checked != 0) && 7509 (xmlParserEntityCheck(ctxt, ent->expandedSize))) 7510 return; 7511 7512 /* 7513 * If we didn't get any children for the entity being built 7514 */ 7515 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7516 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 7517 /* 7518 * Create a node. 7519 */ 7520 ctxt->sax->reference(ctxt->userData, ent->name); 7521 return; 7522 } 7523 7524 if (ctxt->replaceEntities) { 7525 /* 7526 * There is a problem on the handling of _private for entities 7527 * (bug 155816): Should we copy the content of the field from 7528 * the entity (possibly overwriting some value set by the user 7529 * when a copy is created), should we leave it alone, or should 7530 * we try to take care of different situations? The problem 7531 * is exacerbated by the usage of this field by the xmlReader. 7532 * To fix this bug, we look at _private on the created node 7533 * and, if it's NULL, we copy in whatever was in the entity. 7534 * If it's not NULL we leave it alone. This is somewhat of a 7535 * hack - maybe we should have further tests to determine 7536 * what to do. 7537 */ 7538 if (ctxt->node != NULL) { 7539 /* 7540 * Seems we are generating the DOM content, do 7541 * a simple tree copy for all references except the first 7542 * In the first occurrence list contains the replacement. 7543 */ 7544 if (((list == NULL) && (ent->owner == 0)) || 7545 (ctxt->parseMode == XML_PARSE_READER)) { 7546 xmlNodePtr nw = NULL, cur, firstChild = NULL; 7547 7548 /* 7549 * when operating on a reader, the entities definitions 7550 * are always owning the entities subtree. 7551 if (ctxt->parseMode == XML_PARSE_READER) 7552 ent->owner = 1; 7553 */ 7554 7555 cur = ent->children; 7556 while (cur != NULL) { 7557 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); 7558 if (nw != NULL) { 7559 if (nw->_private == NULL) 7560 nw->_private = cur->_private; 7561 if (firstChild == NULL){ 7562 firstChild = nw; 7563 } 7564 nw = xmlAddChild(ctxt->node, nw); 7565 } 7566 if (cur == ent->last) { 7567 /* 7568 * needed to detect some strange empty 7569 * node cases in the reader tests 7570 */ 7571 if ((ctxt->parseMode == XML_PARSE_READER) && 7572 (nw != NULL) && 7573 (nw->type == XML_ELEMENT_NODE) && 7574 (nw->children == NULL)) 7575 nw->extra = 1; 7576 7577 break; 7578 } 7579 cur = cur->next; 7580 } 7581#ifdef LIBXML_LEGACY_ENABLED 7582 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7583 xmlAddEntityReference(ent, firstChild, nw); 7584#endif /* LIBXML_LEGACY_ENABLED */ 7585 } else if ((list == NULL) || (ctxt->inputNr > 0)) { 7586 xmlNodePtr nw = NULL, cur, next, last, 7587 firstChild = NULL; 7588 7589 /* 7590 * Copy the entity child list and make it the new 7591 * entity child list. The goal is to make sure any 7592 * ID or REF referenced will be the one from the 7593 * document content and not the entity copy. 7594 */ 7595 cur = ent->children; 7596 ent->children = NULL; 7597 last = ent->last; 7598 ent->last = NULL; 7599 while (cur != NULL) { 7600 next = cur->next; 7601 cur->next = NULL; 7602 cur->parent = NULL; 7603 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); 7604 if (nw != NULL) { 7605 if (nw->_private == NULL) 7606 nw->_private = cur->_private; 7607 if (firstChild == NULL){ 7608 firstChild = cur; 7609 } 7610 xmlAddChild((xmlNodePtr) ent, nw); 7611 } 7612 xmlAddChild(ctxt->node, cur); 7613 if (cur == last) 7614 break; 7615 cur = next; 7616 } 7617 if (ent->owner == 0) 7618 ent->owner = 1; 7619#ifdef LIBXML_LEGACY_ENABLED 7620 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7621 xmlAddEntityReference(ent, firstChild, nw); 7622#endif /* LIBXML_LEGACY_ENABLED */ 7623 } else { 7624 const xmlChar *nbktext; 7625 7626 /* 7627 * the name change is to avoid coalescing of the 7628 * node with a possible previous text one which 7629 * would make ent->children a dangling pointer 7630 */ 7631 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext", 7632 -1); 7633 if (ent->children->type == XML_TEXT_NODE) 7634 ent->children->name = nbktext; 7635 if ((ent->last != ent->children) && 7636 (ent->last->type == XML_TEXT_NODE)) 7637 ent->last->name = nbktext; 7638 xmlAddChildList(ctxt->node, ent->children); 7639 } 7640 7641 /* 7642 * This is to avoid a nasty side effect, see 7643 * characters() in SAX.c 7644 */ 7645 ctxt->nodemem = 0; 7646 ctxt->nodelen = 0; 7647 return; 7648 } 7649 } 7650} 7651 7652/** 7653 * xmlParseEntityRef: 7654 * @ctxt: an XML parser context 7655 * 7656 * DEPRECATED: Internal function, don't use. 7657 * 7658 * Parse an entitiy reference. Always consumes '&'. 7659 * 7660 * [68] EntityRef ::= '&' Name ';' 7661 * 7662 * [ WFC: Entity Declared ] 7663 * In a document without any DTD, a document with only an internal DTD 7664 * subset which contains no parameter entity references, or a document 7665 * with "standalone='yes'", the Name given in the entity reference 7666 * must match that in an entity declaration, except that well-formed 7667 * documents need not declare any of the following entities: amp, lt, 7668 * gt, apos, quot. The declaration of a parameter entity must precede 7669 * any reference to it. Similarly, the declaration of a general entity 7670 * must precede any reference to it which appears in a default value in an 7671 * attribute-list declaration. Note that if entities are declared in the 7672 * external subset or in external parameter entities, a non-validating 7673 * processor is not obligated to read and process their declarations; 7674 * for such documents, the rule that an entity must be declared is a 7675 * well-formedness constraint only if standalone='yes'. 7676 * 7677 * [ WFC: Parsed Entity ] 7678 * An entity reference must not contain the name of an unparsed entity 7679 * 7680 * Returns the xmlEntityPtr if found, or NULL otherwise. 7681 */ 7682xmlEntityPtr 7683xmlParseEntityRef(xmlParserCtxtPtr ctxt) { 7684 const xmlChar *name; 7685 xmlEntityPtr ent = NULL; 7686 7687 GROW; 7688 if (ctxt->instate == XML_PARSER_EOF) 7689 return(NULL); 7690 7691 if (RAW != '&') 7692 return(NULL); 7693 NEXT; 7694 name = xmlParseName(ctxt); 7695 if (name == NULL) { 7696 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7697 "xmlParseEntityRef: no name\n"); 7698 return(NULL); 7699 } 7700 if (RAW != ';') { 7701 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7702 return(NULL); 7703 } 7704 NEXT; 7705 7706 /* 7707 * Predefined entities override any extra definition 7708 */ 7709 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) { 7710 ent = xmlGetPredefinedEntity(name); 7711 if (ent != NULL) 7712 return(ent); 7713 } 7714 7715 /* 7716 * Ask first SAX for entity resolution, otherwise try the 7717 * entities which may have stored in the parser context. 7718 */ 7719 if (ctxt->sax != NULL) { 7720 if (ctxt->sax->getEntity != NULL) 7721 ent = ctxt->sax->getEntity(ctxt->userData, name); 7722 if ((ctxt->wellFormed == 1 ) && (ent == NULL) && 7723 (ctxt->options & XML_PARSE_OLDSAX)) 7724 ent = xmlGetPredefinedEntity(name); 7725 if ((ctxt->wellFormed == 1 ) && (ent == NULL) && 7726 (ctxt->userData==ctxt)) { 7727 ent = xmlSAX2GetEntity(ctxt, name); 7728 } 7729 } 7730 if (ctxt->instate == XML_PARSER_EOF) 7731 return(NULL); 7732 /* 7733 * [ WFC: Entity Declared ] 7734 * In a document without any DTD, a document with only an 7735 * internal DTD subset which contains no parameter entity 7736 * references, or a document with "standalone='yes'", the 7737 * Name given in the entity reference must match that in an 7738 * entity declaration, except that well-formed documents 7739 * need not declare any of the following entities: amp, lt, 7740 * gt, apos, quot. 7741 * The declaration of a parameter entity must precede any 7742 * reference to it. 7743 * Similarly, the declaration of a general entity must 7744 * precede any reference to it which appears in a default 7745 * value in an attribute-list declaration. Note that if 7746 * entities are declared in the external subset or in 7747 * external parameter entities, a non-validating processor 7748 * is not obligated to read and process their declarations; 7749 * for such documents, the rule that an entity must be 7750 * declared is a well-formedness constraint only if 7751 * standalone='yes'. 7752 */ 7753 if (ent == NULL) { 7754 if ((ctxt->standalone == 1) || 7755 ((ctxt->hasExternalSubset == 0) && 7756 (ctxt->hasPErefs == 0))) { 7757 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7758 "Entity '%s' not defined\n", name); 7759 } else { 7760 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 7761 "Entity '%s' not defined\n", name); 7762 if ((ctxt->inSubset == 0) && 7763 (ctxt->sax != NULL) && 7764 (ctxt->disableSAX == 0) && 7765 (ctxt->sax->reference != NULL)) { 7766 ctxt->sax->reference(ctxt->userData, name); 7767 } 7768 } 7769 ctxt->valid = 0; 7770 } 7771 7772 /* 7773 * [ WFC: Parsed Entity ] 7774 * An entity reference must not contain the name of an 7775 * unparsed entity 7776 */ 7777 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 7778 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 7779 "Entity reference to unparsed entity %s\n", name); 7780 } 7781 7782 /* 7783 * [ WFC: No External Entity References ] 7784 * Attribute values cannot contain direct or indirect 7785 * entity references to external entities. 7786 */ 7787 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7788 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 7789 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 7790 "Attribute references external entity '%s'\n", name); 7791 } 7792 /* 7793 * [ WFC: No < in Attribute Values ] 7794 * The replacement text of any entity referred to directly or 7795 * indirectly in an attribute value (other than "&lt;") must 7796 * not contain a <. 7797 */ 7798 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7799 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) { 7800 if ((ent->flags & XML_ENT_CHECKED_LT) == 0) { 7801 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<'))) 7802 ent->flags |= XML_ENT_CONTAINS_LT; 7803 ent->flags |= XML_ENT_CHECKED_LT; 7804 } 7805 if (ent->flags & XML_ENT_CONTAINS_LT) 7806 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 7807 "'<' in entity '%s' is not allowed in attributes " 7808 "values\n", name); 7809 } 7810 7811 /* 7812 * Internal check, no parameter entities here ... 7813 */ 7814 else { 7815 switch (ent->etype) { 7816 case XML_INTERNAL_PARAMETER_ENTITY: 7817 case XML_EXTERNAL_PARAMETER_ENTITY: 7818 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 7819 "Attempt to reference the parameter entity '%s'\n", 7820 name); 7821 break; 7822 default: 7823 break; 7824 } 7825 } 7826 7827 /* 7828 * [ WFC: No Recursion ] 7829 * A parsed entity must not contain a recursive reference 7830 * to itself, either directly or indirectly. 7831 * Done somewhere else 7832 */ 7833 return(ent); 7834} 7835 7836/** 7837 * xmlParseStringEntityRef: 7838 * @ctxt: an XML parser context 7839 * @str: a pointer to an index in the string 7840 * 7841 * parse ENTITY references declarations, but this version parses it from 7842 * a string value. 7843 * 7844 * [68] EntityRef ::= '&' Name ';' 7845 * 7846 * [ WFC: Entity Declared ] 7847 * In a document without any DTD, a document with only an internal DTD 7848 * subset which contains no parameter entity references, or a document 7849 * with "standalone='yes'", the Name given in the entity reference 7850 * must match that in an entity declaration, except that well-formed 7851 * documents need not declare any of the following entities: amp, lt, 7852 * gt, apos, quot. The declaration of a parameter entity must precede 7853 * any reference to it. Similarly, the declaration of a general entity 7854 * must precede any reference to it which appears in a default value in an 7855 * attribute-list declaration. Note that if entities are declared in the 7856 * external subset or in external parameter entities, a non-validating 7857 * processor is not obligated to read and process their declarations; 7858 * for such documents, the rule that an entity must be declared is a 7859 * well-formedness constraint only if standalone='yes'. 7860 * 7861 * [ WFC: Parsed Entity ] 7862 * An entity reference must not contain the name of an unparsed entity 7863 * 7864 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer 7865 * is updated to the current location in the string. 7866 */ 7867static xmlEntityPtr 7868xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) { 7869 xmlChar *name; 7870 const xmlChar *ptr; 7871 xmlChar cur; 7872 xmlEntityPtr ent = NULL; 7873 7874 if ((str == NULL) || (*str == NULL)) 7875 return(NULL); 7876 ptr = *str; 7877 cur = *ptr; 7878 if (cur != '&') 7879 return(NULL); 7880 7881 ptr++; 7882 name = xmlParseStringName(ctxt, &ptr); 7883 if (name == NULL) { 7884 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7885 "xmlParseStringEntityRef: no name\n"); 7886 *str = ptr; 7887 return(NULL); 7888 } 7889 if (*ptr != ';') { 7890 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7891 xmlFree(name); 7892 *str = ptr; 7893 return(NULL); 7894 } 7895 ptr++; 7896 7897 7898 /* 7899 * Predefined entities override any extra definition 7900 */ 7901 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) { 7902 ent = xmlGetPredefinedEntity(name); 7903 if (ent != NULL) { 7904 xmlFree(name); 7905 *str = ptr; 7906 return(ent); 7907 } 7908 } 7909 7910 /* 7911 * Ask first SAX for entity resolution, otherwise try the 7912 * entities which may have stored in the parser context. 7913 */ 7914 if (ctxt->sax != NULL) { 7915 if (ctxt->sax->getEntity != NULL) 7916 ent = ctxt->sax->getEntity(ctxt->userData, name); 7917 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX)) 7918 ent = xmlGetPredefinedEntity(name); 7919 if ((ent == NULL) && (ctxt->userData==ctxt)) { 7920 ent = xmlSAX2GetEntity(ctxt, name); 7921 } 7922 } 7923 if (ctxt->instate == XML_PARSER_EOF) { 7924 xmlFree(name); 7925 return(NULL); 7926 } 7927 7928 /* 7929 * [ WFC: Entity Declared ] 7930 * In a document without any DTD, a document with only an 7931 * internal DTD subset which contains no parameter entity 7932 * references, or a document with "standalone='yes'", the 7933 * Name given in the entity reference must match that in an 7934 * entity declaration, except that well-formed documents 7935 * need not declare any of the following entities: amp, lt, 7936 * gt, apos, quot. 7937 * The declaration of a parameter entity must precede any 7938 * reference to it. 7939 * Similarly, the declaration of a general entity must 7940 * precede any reference to it which appears in a default 7941 * value in an attribute-list declaration. Note that if 7942 * entities are declared in the external subset or in 7943 * external parameter entities, a non-validating processor 7944 * is not obligated to read and process their declarations; 7945 * for such documents, the rule that an entity must be 7946 * declared is a well-formedness constraint only if 7947 * standalone='yes'. 7948 */ 7949 if (ent == NULL) { 7950 if ((ctxt->standalone == 1) || 7951 ((ctxt->hasExternalSubset == 0) && 7952 (ctxt->hasPErefs == 0))) { 7953 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7954 "Entity '%s' not defined\n", name); 7955 } else { 7956 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 7957 "Entity '%s' not defined\n", 7958 name); 7959 } 7960 /* TODO ? check regressions ctxt->valid = 0; */ 7961 } 7962 7963 /* 7964 * [ WFC: Parsed Entity ] 7965 * An entity reference must not contain the name of an 7966 * unparsed entity 7967 */ 7968 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 7969 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 7970 "Entity reference to unparsed entity %s\n", name); 7971 } 7972 7973 /* 7974 * [ WFC: No External Entity References ] 7975 * Attribute values cannot contain direct or indirect 7976 * entity references to external entities. 7977 */ 7978 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7979 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 7980 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 7981 "Attribute references external entity '%s'\n", name); 7982 } 7983 /* 7984 * [ WFC: No < in Attribute Values ] 7985 * The replacement text of any entity referred to directly or 7986 * indirectly in an attribute value (other than "&lt;") must 7987 * not contain a <. 7988 */ 7989 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7990 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) { 7991 if ((ent->flags & XML_ENT_CHECKED_LT) == 0) { 7992 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<'))) 7993 ent->flags |= XML_ENT_CONTAINS_LT; 7994 ent->flags |= XML_ENT_CHECKED_LT; 7995 } 7996 if (ent->flags & XML_ENT_CONTAINS_LT) 7997 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 7998 "'<' in entity '%s' is not allowed in attributes " 7999 "values\n", name); 8000 } 8001 8002 /* 8003 * Internal check, no parameter entities here ... 8004 */ 8005 else { 8006 switch (ent->etype) { 8007 case XML_INTERNAL_PARAMETER_ENTITY: 8008 case XML_EXTERNAL_PARAMETER_ENTITY: 8009 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 8010 "Attempt to reference the parameter entity '%s'\n", 8011 name); 8012 break; 8013 default: 8014 break; 8015 } 8016 } 8017 8018 /* 8019 * [ WFC: No Recursion ] 8020 * A parsed entity must not contain a recursive reference 8021 * to itself, either directly or indirectly. 8022 * Done somewhere else 8023 */ 8024 8025 xmlFree(name); 8026 *str = ptr; 8027 return(ent); 8028} 8029 8030/** 8031 * xmlParsePEReference: 8032 * @ctxt: an XML parser context 8033 * 8034 * DEPRECATED: Internal function, don't use. 8035 * 8036 * Parse a parameter entity reference. Always consumes '%'. 8037 * 8038 * The entity content is handled directly by pushing it's content as 8039 * a new input stream. 8040 * 8041 * [69] PEReference ::= '%' Name ';' 8042 * 8043 * [ WFC: No Recursion ] 8044 * A parsed entity must not contain a recursive 8045 * reference to itself, either directly or indirectly. 8046 * 8047 * [ WFC: Entity Declared ] 8048 * In a document without any DTD, a document with only an internal DTD 8049 * subset which contains no parameter entity references, or a document 8050 * with "standalone='yes'", ... ... The declaration of a parameter 8051 * entity must precede any reference to it... 8052 * 8053 * [ VC: Entity Declared ] 8054 * In a document with an external subset or external parameter entities 8055 * with "standalone='no'", ... ... The declaration of a parameter entity 8056 * must precede any reference to it... 8057 * 8058 * [ WFC: In DTD ] 8059 * Parameter-entity references may only appear in the DTD. 8060 * NOTE: misleading but this is handled. 8061 */ 8062void 8063xmlParsePEReference(xmlParserCtxtPtr ctxt) 8064{ 8065 const xmlChar *name; 8066 xmlEntityPtr entity = NULL; 8067 xmlParserInputPtr input; 8068 8069 if (RAW != '%') 8070 return; 8071 NEXT; 8072 name = xmlParseName(ctxt); 8073 if (name == NULL) { 8074 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n"); 8075 return; 8076 } 8077 if (xmlParserDebugEntities) 8078 xmlGenericError(xmlGenericErrorContext, 8079 "PEReference: %s\n", name); 8080 if (RAW != ';') { 8081 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL); 8082 return; 8083 } 8084 8085 NEXT; 8086 8087 /* 8088 * Request the entity from SAX 8089 */ 8090 if ((ctxt->sax != NULL) && 8091 (ctxt->sax->getParameterEntity != NULL)) 8092 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 8093 if (ctxt->instate == XML_PARSER_EOF) 8094 return; 8095 if (entity == NULL) { 8096 /* 8097 * [ WFC: Entity Declared ] 8098 * In a document without any DTD, a document with only an 8099 * internal DTD subset which contains no parameter entity 8100 * references, or a document with "standalone='yes'", ... 8101 * ... The declaration of a parameter entity must precede 8102 * any reference to it... 8103 */ 8104 if ((ctxt->standalone == 1) || 8105 ((ctxt->hasExternalSubset == 0) && 8106 (ctxt->hasPErefs == 0))) { 8107 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 8108 "PEReference: %%%s; not found\n", 8109 name); 8110 } else { 8111 /* 8112 * [ VC: Entity Declared ] 8113 * In a document with an external subset or external 8114 * parameter entities with "standalone='no'", ... 8115 * ... The declaration of a parameter entity must 8116 * precede any reference to it... 8117 */ 8118 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) { 8119 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY, 8120 "PEReference: %%%s; not found\n", 8121 name, NULL); 8122 } else 8123 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 8124 "PEReference: %%%s; not found\n", 8125 name, NULL); 8126 ctxt->valid = 0; 8127 } 8128 } else { 8129 /* 8130 * Internal checking in case the entity quest barfed 8131 */ 8132 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 8133 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 8134 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 8135 "Internal: %%%s; is not a parameter entity\n", 8136 name, NULL); 8137 } else { 8138 unsigned long parentConsumed; 8139 xmlEntityPtr oldEnt; 8140 8141 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 8142 ((ctxt->options & XML_PARSE_NOENT) == 0) && 8143 ((ctxt->options & XML_PARSE_DTDVALID) == 0) && 8144 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) && 8145 ((ctxt->options & XML_PARSE_DTDATTR) == 0) && 8146 (ctxt->replaceEntities == 0) && 8147 (ctxt->validate == 0)) 8148 return; 8149 8150 if (entity->flags & XML_ENT_EXPANDING) { 8151 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 8152 xmlHaltParser(ctxt); 8153 return; 8154 } 8155 8156 /* Must be computed from old input before pushing new input. */ 8157 parentConsumed = ctxt->input->parentConsumed; 8158 oldEnt = ctxt->input->entity; 8159 if ((oldEnt == NULL) || 8160 ((oldEnt->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 8161 ((oldEnt->flags & XML_ENT_PARSED) == 0))) { 8162 xmlSaturatedAdd(&parentConsumed, ctxt->input->consumed); 8163 xmlSaturatedAddSizeT(&parentConsumed, 8164 ctxt->input->cur - ctxt->input->base); 8165 } 8166 8167 input = xmlNewEntityInputStream(ctxt, entity); 8168 if (xmlPushInput(ctxt, input) < 0) { 8169 xmlFreeInputStream(input); 8170 return; 8171 } 8172 8173 entity->flags |= XML_ENT_EXPANDING; 8174 8175 input->parentConsumed = parentConsumed; 8176 8177 if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) { 8178 xmlDetectEncoding(ctxt); 8179 8180 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && 8181 (IS_BLANK_CH(NXT(5)))) { 8182 xmlParseTextDecl(ctxt); 8183 } 8184 } 8185 } 8186 } 8187 ctxt->hasPErefs = 1; 8188} 8189 8190/** 8191 * xmlLoadEntityContent: 8192 * @ctxt: an XML parser context 8193 * @entity: an unloaded system entity 8194 * 8195 * Load the original content of the given system entity from the 8196 * ExternalID/SystemID given. This is to be used for Included in Literal 8197 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references 8198 * 8199 * Returns 0 in case of success and -1 in case of failure 8200 */ 8201static int 8202xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 8203 xmlParserInputPtr oldinput, input = NULL; 8204 xmlParserInputPtr *oldinputTab; 8205 const xmlChar *oldencoding; 8206 xmlChar *content = NULL; 8207 size_t length, i; 8208 int oldinputNr, oldinputMax, oldprogressive; 8209 int ret = -1; 8210 int res; 8211 8212 if ((ctxt == NULL) || (entity == NULL) || 8213 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) && 8214 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) || 8215 (entity->content != NULL)) { 8216 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8217 "xmlLoadEntityContent parameter error"); 8218 return(-1); 8219 } 8220 8221 if (xmlParserDebugEntities) 8222 xmlGenericError(xmlGenericErrorContext, 8223 "Reading %s entity content input\n", entity->name); 8224 8225 input = xmlLoadExternalEntity((char *) entity->URI, 8226 (char *) entity->ExternalID, ctxt); 8227 if (input == NULL) { 8228 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8229 "xmlLoadEntityContent input error"); 8230 return(-1); 8231 } 8232 8233 oldinput = ctxt->input; 8234 oldinputNr = ctxt->inputNr; 8235 oldinputMax = ctxt->inputMax; 8236 oldinputTab = ctxt->inputTab; 8237 oldencoding = ctxt->encoding; 8238 oldprogressive = ctxt->progressive; 8239 8240 ctxt->input = NULL; 8241 ctxt->inputNr = 0; 8242 ctxt->inputMax = 1; 8243 ctxt->encoding = NULL; 8244 ctxt->progressive = 0; 8245 ctxt->inputTab = xmlMalloc(sizeof(xmlParserInputPtr)); 8246 if (ctxt->inputTab == NULL) { 8247 xmlErrMemory(ctxt, NULL); 8248 xmlFreeInputStream(input); 8249 goto error; 8250 } 8251 8252 xmlBufResetInput(input->buf->buffer, input); 8253 8254 inputPush(ctxt, input); 8255 8256 xmlDetectEncoding(ctxt); 8257 8258 /* 8259 * Parse a possible text declaration first 8260 */ 8261 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 8262 xmlParseTextDecl(ctxt); 8263 /* 8264 * An XML-1.0 document can't reference an entity not XML-1.0 8265 */ 8266 if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) && 8267 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) { 8268 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH, 8269 "Version mismatch between document and entity\n"); 8270 } 8271 } 8272 8273 if (ctxt->instate == XML_PARSER_EOF) 8274 goto error; 8275 8276 length = input->cur - input->base; 8277 xmlBufShrink(input->buf->buffer, length); 8278 xmlSaturatedAdd(&ctxt->sizeentities, length); 8279 8280 while ((res = xmlParserInputBufferGrow(input->buf, 4096)) > 0) 8281 ; 8282 8283 xmlBufResetInput(input->buf->buffer, input); 8284 8285 if (res < 0) { 8286 xmlFatalErr(ctxt, input->buf->error, NULL); 8287 goto error; 8288 } 8289 8290 length = xmlBufUse(input->buf->buffer); 8291 content = xmlBufDetach(input->buf->buffer); 8292 8293 if (length > INT_MAX) { 8294 xmlErrMemory(ctxt, NULL); 8295 goto error; 8296 } 8297 8298 for (i = 0; i < length; ) { 8299 int clen = length - i; 8300 int c = xmlGetUTF8Char(content + i, &clen); 8301 8302 if ((c < 0) || (!IS_CHAR(c))) { 8303 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 8304 "xmlLoadEntityContent: invalid char value %d\n", 8305 content[i]); 8306 goto error; 8307 } 8308 i += clen; 8309 } 8310 8311 xmlSaturatedAdd(&ctxt->sizeentities, length); 8312 entity->content = content; 8313 entity->length = length; 8314 content = NULL; 8315 ret = 0; 8316 8317error: 8318 while (ctxt->inputNr > 0) 8319 xmlFreeInputStream(inputPop(ctxt)); 8320 xmlFree(ctxt->inputTab); 8321 xmlFree((xmlChar *) ctxt->encoding); 8322 8323 ctxt->input = oldinput; 8324 ctxt->inputNr = oldinputNr; 8325 ctxt->inputMax = oldinputMax; 8326 ctxt->inputTab = oldinputTab; 8327 ctxt->encoding = oldencoding; 8328 ctxt->progressive = oldprogressive; 8329 8330 xmlFree(content); 8331 8332 return(ret); 8333} 8334 8335/** 8336 * xmlParseStringPEReference: 8337 * @ctxt: an XML parser context 8338 * @str: a pointer to an index in the string 8339 * 8340 * parse PEReference declarations 8341 * 8342 * [69] PEReference ::= '%' Name ';' 8343 * 8344 * [ WFC: No Recursion ] 8345 * A parsed entity must not contain a recursive 8346 * reference to itself, either directly or indirectly. 8347 * 8348 * [ WFC: Entity Declared ] 8349 * In a document without any DTD, a document with only an internal DTD 8350 * subset which contains no parameter entity references, or a document 8351 * with "standalone='yes'", ... ... The declaration of a parameter 8352 * entity must precede any reference to it... 8353 * 8354 * [ VC: Entity Declared ] 8355 * In a document with an external subset or external parameter entities 8356 * with "standalone='no'", ... ... The declaration of a parameter entity 8357 * must precede any reference to it... 8358 * 8359 * [ WFC: In DTD ] 8360 * Parameter-entity references may only appear in the DTD. 8361 * NOTE: misleading but this is handled. 8362 * 8363 * Returns the string of the entity content. 8364 * str is updated to the current value of the index 8365 */ 8366static xmlEntityPtr 8367xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) { 8368 const xmlChar *ptr; 8369 xmlChar cur; 8370 xmlChar *name; 8371 xmlEntityPtr entity = NULL; 8372 8373 if ((str == NULL) || (*str == NULL)) return(NULL); 8374 ptr = *str; 8375 cur = *ptr; 8376 if (cur != '%') 8377 return(NULL); 8378 ptr++; 8379 name = xmlParseStringName(ctxt, &ptr); 8380 if (name == NULL) { 8381 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8382 "xmlParseStringPEReference: no name\n"); 8383 *str = ptr; 8384 return(NULL); 8385 } 8386 cur = *ptr; 8387 if (cur != ';') { 8388 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 8389 xmlFree(name); 8390 *str = ptr; 8391 return(NULL); 8392 } 8393 ptr++; 8394 8395 /* 8396 * Request the entity from SAX 8397 */ 8398 if ((ctxt->sax != NULL) && 8399 (ctxt->sax->getParameterEntity != NULL)) 8400 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 8401 if (ctxt->instate == XML_PARSER_EOF) { 8402 xmlFree(name); 8403 *str = ptr; 8404 return(NULL); 8405 } 8406 if (entity == NULL) { 8407 /* 8408 * [ WFC: Entity Declared ] 8409 * In a document without any DTD, a document with only an 8410 * internal DTD subset which contains no parameter entity 8411 * references, or a document with "standalone='yes'", ... 8412 * ... The declaration of a parameter entity must precede 8413 * any reference to it... 8414 */ 8415 if ((ctxt->standalone == 1) || 8416 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) { 8417 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 8418 "PEReference: %%%s; not found\n", name); 8419 } else { 8420 /* 8421 * [ VC: Entity Declared ] 8422 * In a document with an external subset or external 8423 * parameter entities with "standalone='no'", ... 8424 * ... The declaration of a parameter entity must 8425 * precede any reference to it... 8426 */ 8427 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 8428 "PEReference: %%%s; not found\n", 8429 name, NULL); 8430 ctxt->valid = 0; 8431 } 8432 } else { 8433 /* 8434 * Internal checking in case the entity quest barfed 8435 */ 8436 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 8437 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 8438 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 8439 "%%%s; is not a parameter entity\n", 8440 name, NULL); 8441 } 8442 } 8443 ctxt->hasPErefs = 1; 8444 xmlFree(name); 8445 *str = ptr; 8446 return(entity); 8447} 8448 8449/** 8450 * xmlParseDocTypeDecl: 8451 * @ctxt: an XML parser context 8452 * 8453 * DEPRECATED: Internal function, don't use. 8454 * 8455 * parse a DOCTYPE declaration 8456 * 8457 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? 8458 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 8459 * 8460 * [ VC: Root Element Type ] 8461 * The Name in the document type declaration must match the element 8462 * type of the root element. 8463 */ 8464 8465void 8466xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) { 8467 const xmlChar *name = NULL; 8468 xmlChar *ExternalID = NULL; 8469 xmlChar *URI = NULL; 8470 8471 /* 8472 * We know that '<!DOCTYPE' has been detected. 8473 */ 8474 SKIP(9); 8475 8476 SKIP_BLANKS; 8477 8478 /* 8479 * Parse the DOCTYPE name. 8480 */ 8481 name = xmlParseName(ctxt); 8482 if (name == NULL) { 8483 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8484 "xmlParseDocTypeDecl : no DOCTYPE name !\n"); 8485 } 8486 ctxt->intSubName = name; 8487 8488 SKIP_BLANKS; 8489 8490 /* 8491 * Check for SystemID and ExternalID 8492 */ 8493 URI = xmlParseExternalID(ctxt, &ExternalID, 1); 8494 8495 if ((URI != NULL) || (ExternalID != NULL)) { 8496 ctxt->hasExternalSubset = 1; 8497 } 8498 ctxt->extSubURI = URI; 8499 ctxt->extSubSystem = ExternalID; 8500 8501 SKIP_BLANKS; 8502 8503 /* 8504 * Create and update the internal subset. 8505 */ 8506 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) && 8507 (!ctxt->disableSAX)) 8508 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI); 8509 if (ctxt->instate == XML_PARSER_EOF) 8510 return; 8511 8512 /* 8513 * Is there any internal subset declarations ? 8514 * they are handled separately in xmlParseInternalSubset() 8515 */ 8516 if (RAW == '[') 8517 return; 8518 8519 /* 8520 * We should be at the end of the DOCTYPE declaration. 8521 */ 8522 if (RAW != '>') { 8523 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 8524 } 8525 NEXT; 8526} 8527 8528/** 8529 * xmlParseInternalSubset: 8530 * @ctxt: an XML parser context 8531 * 8532 * parse the internal subset declaration 8533 * 8534 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 8535 */ 8536 8537static void 8538xmlParseInternalSubset(xmlParserCtxtPtr ctxt) { 8539 /* 8540 * Is there any DTD definition ? 8541 */ 8542 if (RAW == '[') { 8543 int baseInputNr = ctxt->inputNr; 8544 ctxt->instate = XML_PARSER_DTD; 8545 NEXT; 8546 /* 8547 * Parse the succession of Markup declarations and 8548 * PEReferences. 8549 * Subsequence (markupdecl | PEReference | S)* 8550 */ 8551 SKIP_BLANKS; 8552 while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) && 8553 (ctxt->instate != XML_PARSER_EOF)) { 8554 8555 /* 8556 * Conditional sections are allowed from external entities included 8557 * by PE References in the internal subset. 8558 */ 8559 if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) && 8560 (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 8561 xmlParseConditionalSections(ctxt); 8562 } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) { 8563 xmlParseMarkupDecl(ctxt); 8564 } else if (RAW == '%') { 8565 xmlParsePEReference(ctxt); 8566 } else { 8567 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8568 "xmlParseInternalSubset: error detected in" 8569 " Markup declaration\n"); 8570 xmlHaltParser(ctxt); 8571 return; 8572 } 8573 SKIP_BLANKS; 8574 SHRINK; 8575 GROW; 8576 } 8577 if (RAW == ']') { 8578 NEXT; 8579 SKIP_BLANKS; 8580 } 8581 } 8582 8583 /* 8584 * We should be at the end of the DOCTYPE declaration. 8585 */ 8586 if (RAW != '>') { 8587 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 8588 return; 8589 } 8590 NEXT; 8591} 8592 8593#ifdef LIBXML_SAX1_ENABLED 8594/** 8595 * xmlParseAttribute: 8596 * @ctxt: an XML parser context 8597 * @value: a xmlChar ** used to store the value of the attribute 8598 * 8599 * DEPRECATED: Internal function, don't use. 8600 * 8601 * parse an attribute 8602 * 8603 * [41] Attribute ::= Name Eq AttValue 8604 * 8605 * [ WFC: No External Entity References ] 8606 * Attribute values cannot contain direct or indirect entity references 8607 * to external entities. 8608 * 8609 * [ WFC: No < in Attribute Values ] 8610 * The replacement text of any entity referred to directly or indirectly in 8611 * an attribute value (other than "&lt;") must not contain a <. 8612 * 8613 * [ VC: Attribute Value Type ] 8614 * The attribute must have been declared; the value must be of the type 8615 * declared for it. 8616 * 8617 * [25] Eq ::= S? '=' S? 8618 * 8619 * With namespace: 8620 * 8621 * [NS 11] Attribute ::= QName Eq AttValue 8622 * 8623 * Also the case QName == xmlns:??? is handled independently as a namespace 8624 * definition. 8625 * 8626 * Returns the attribute name, and the value in *value. 8627 */ 8628 8629const xmlChar * 8630xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) { 8631 const xmlChar *name; 8632 xmlChar *val; 8633 8634 *value = NULL; 8635 GROW; 8636 name = xmlParseName(ctxt); 8637 if (name == NULL) { 8638 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8639 "error parsing attribute name\n"); 8640 return(NULL); 8641 } 8642 8643 /* 8644 * read the value 8645 */ 8646 SKIP_BLANKS; 8647 if (RAW == '=') { 8648 NEXT; 8649 SKIP_BLANKS; 8650 val = xmlParseAttValue(ctxt); 8651 ctxt->instate = XML_PARSER_CONTENT; 8652 } else { 8653 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 8654 "Specification mandates value for attribute %s\n", name); 8655 return(name); 8656 } 8657 8658 /* 8659 * Check that xml:lang conforms to the specification 8660 * No more registered as an error, just generate a warning now 8661 * since this was deprecated in XML second edition 8662 */ 8663 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) { 8664 if (!xmlCheckLanguageID(val)) { 8665 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 8666 "Malformed value for xml:lang : %s\n", 8667 val, NULL); 8668 } 8669 } 8670 8671 /* 8672 * Check that xml:space conforms to the specification 8673 */ 8674 if (xmlStrEqual(name, BAD_CAST "xml:space")) { 8675 if (xmlStrEqual(val, BAD_CAST "default")) 8676 *(ctxt->space) = 0; 8677 else if (xmlStrEqual(val, BAD_CAST "preserve")) 8678 *(ctxt->space) = 1; 8679 else { 8680 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE, 8681"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 8682 val, NULL); 8683 } 8684 } 8685 8686 *value = val; 8687 return(name); 8688} 8689 8690/** 8691 * xmlParseStartTag: 8692 * @ctxt: an XML parser context 8693 * 8694 * DEPRECATED: Internal function, don't use. 8695 * 8696 * Parse a start tag. Always consumes '<'. 8697 * 8698 * [40] STag ::= '<' Name (S Attribute)* S? '>' 8699 * 8700 * [ WFC: Unique Att Spec ] 8701 * No attribute name may appear more than once in the same start-tag or 8702 * empty-element tag. 8703 * 8704 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 8705 * 8706 * [ WFC: Unique Att Spec ] 8707 * No attribute name may appear more than once in the same start-tag or 8708 * empty-element tag. 8709 * 8710 * With namespace: 8711 * 8712 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 8713 * 8714 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 8715 * 8716 * Returns the element name parsed 8717 */ 8718 8719const xmlChar * 8720xmlParseStartTag(xmlParserCtxtPtr ctxt) { 8721 const xmlChar *name; 8722 const xmlChar *attname; 8723 xmlChar *attvalue; 8724 const xmlChar **atts = ctxt->atts; 8725 int nbatts = 0; 8726 int maxatts = ctxt->maxatts; 8727 int i; 8728 8729 if (RAW != '<') return(NULL); 8730 NEXT1; 8731 8732 name = xmlParseName(ctxt); 8733 if (name == NULL) { 8734 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8735 "xmlParseStartTag: invalid element name\n"); 8736 return(NULL); 8737 } 8738 8739 /* 8740 * Now parse the attributes, it ends up with the ending 8741 * 8742 * (S Attribute)* S? 8743 */ 8744 SKIP_BLANKS; 8745 GROW; 8746 8747 while (((RAW != '>') && 8748 ((RAW != '/') || (NXT(1) != '>')) && 8749 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) { 8750 attname = xmlParseAttribute(ctxt, &attvalue); 8751 if (attname == NULL) { 8752 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 8753 "xmlParseStartTag: problem parsing attributes\n"); 8754 break; 8755 } 8756 if (attvalue != NULL) { 8757 /* 8758 * [ WFC: Unique Att Spec ] 8759 * No attribute name may appear more than once in the same 8760 * start-tag or empty-element tag. 8761 */ 8762 for (i = 0; i < nbatts;i += 2) { 8763 if (xmlStrEqual(atts[i], attname)) { 8764 xmlErrAttributeDup(ctxt, NULL, attname); 8765 xmlFree(attvalue); 8766 goto failed; 8767 } 8768 } 8769 /* 8770 * Add the pair to atts 8771 */ 8772 if (atts == NULL) { 8773 maxatts = 22; /* allow for 10 attrs by default */ 8774 atts = (const xmlChar **) 8775 xmlMalloc(maxatts * sizeof(xmlChar *)); 8776 if (atts == NULL) { 8777 xmlErrMemory(ctxt, NULL); 8778 if (attvalue != NULL) 8779 xmlFree(attvalue); 8780 goto failed; 8781 } 8782 ctxt->atts = atts; 8783 ctxt->maxatts = maxatts; 8784 } else if (nbatts + 4 > maxatts) { 8785 const xmlChar **n; 8786 8787 maxatts *= 2; 8788 n = (const xmlChar **) xmlRealloc((void *) atts, 8789 maxatts * sizeof(const xmlChar *)); 8790 if (n == NULL) { 8791 xmlErrMemory(ctxt, NULL); 8792 if (attvalue != NULL) 8793 xmlFree(attvalue); 8794 goto failed; 8795 } 8796 atts = n; 8797 ctxt->atts = atts; 8798 ctxt->maxatts = maxatts; 8799 } 8800 atts[nbatts++] = attname; 8801 atts[nbatts++] = attvalue; 8802 atts[nbatts] = NULL; 8803 atts[nbatts + 1] = NULL; 8804 } else { 8805 if (attvalue != NULL) 8806 xmlFree(attvalue); 8807 } 8808 8809failed: 8810 8811 GROW 8812 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 8813 break; 8814 if (SKIP_BLANKS == 0) { 8815 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 8816 "attributes construct error\n"); 8817 } 8818 SHRINK; 8819 GROW; 8820 } 8821 8822 /* 8823 * SAX: Start of Element ! 8824 */ 8825 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) && 8826 (!ctxt->disableSAX)) { 8827 if (nbatts > 0) 8828 ctxt->sax->startElement(ctxt->userData, name, atts); 8829 else 8830 ctxt->sax->startElement(ctxt->userData, name, NULL); 8831 } 8832 8833 if (atts != NULL) { 8834 /* Free only the content strings */ 8835 for (i = 1;i < nbatts;i+=2) 8836 if (atts[i] != NULL) 8837 xmlFree((xmlChar *) atts[i]); 8838 } 8839 return(name); 8840} 8841 8842/** 8843 * xmlParseEndTag1: 8844 * @ctxt: an XML parser context 8845 * @line: line of the start tag 8846 * @nsNr: number of namespaces on the start tag 8847 * 8848 * Parse an end tag. Always consumes '</'. 8849 * 8850 * [42] ETag ::= '</' Name S? '>' 8851 * 8852 * With namespace 8853 * 8854 * [NS 9] ETag ::= '</' QName S? '>' 8855 */ 8856 8857static void 8858xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) { 8859 const xmlChar *name; 8860 8861 GROW; 8862 if ((RAW != '<') || (NXT(1) != '/')) { 8863 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED, 8864 "xmlParseEndTag: '</' not found\n"); 8865 return; 8866 } 8867 SKIP(2); 8868 8869 name = xmlParseNameAndCompare(ctxt,ctxt->name); 8870 8871 /* 8872 * We should definitely be at the ending "S? '>'" part 8873 */ 8874 GROW; 8875 SKIP_BLANKS; 8876 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 8877 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 8878 } else 8879 NEXT1; 8880 8881 /* 8882 * [ WFC: Element Type Match ] 8883 * The Name in an element's end-tag must match the element type in the 8884 * start-tag. 8885 * 8886 */ 8887 if (name != (xmlChar*)1) { 8888 if (name == NULL) name = BAD_CAST "unparsable"; 8889 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 8890 "Opening and ending tag mismatch: %s line %d and %s\n", 8891 ctxt->name, line, name); 8892 } 8893 8894 /* 8895 * SAX: End of Tag 8896 */ 8897 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 8898 (!ctxt->disableSAX)) 8899 ctxt->sax->endElement(ctxt->userData, ctxt->name); 8900 8901 namePop(ctxt); 8902 spacePop(ctxt); 8903 return; 8904} 8905 8906/** 8907 * xmlParseEndTag: 8908 * @ctxt: an XML parser context 8909 * 8910 * DEPRECATED: Internal function, don't use. 8911 * 8912 * parse an end of tag 8913 * 8914 * [42] ETag ::= '</' Name S? '>' 8915 * 8916 * With namespace 8917 * 8918 * [NS 9] ETag ::= '</' QName S? '>' 8919 */ 8920 8921void 8922xmlParseEndTag(xmlParserCtxtPtr ctxt) { 8923 xmlParseEndTag1(ctxt, 0); 8924} 8925#endif /* LIBXML_SAX1_ENABLED */ 8926 8927/************************************************************************ 8928 * * 8929 * SAX 2 specific operations * 8930 * * 8931 ************************************************************************/ 8932 8933/** 8934 * xmlParseQNameHashed: 8935 * @ctxt: an XML parser context 8936 * @prefix: pointer to store the prefix part 8937 * 8938 * parse an XML Namespace QName 8939 * 8940 * [6] QName ::= (Prefix ':')? LocalPart 8941 * [7] Prefix ::= NCName 8942 * [8] LocalPart ::= NCName 8943 * 8944 * Returns the Name parsed or NULL 8945 */ 8946 8947static xmlHashedString 8948xmlParseQNameHashed(xmlParserCtxtPtr ctxt, xmlHashedString *prefix) { 8949 xmlHashedString l, p; 8950 int start, isNCName = 0; 8951 8952 l.name = NULL; 8953 p.name = NULL; 8954 8955 GROW; 8956 if (ctxt->instate == XML_PARSER_EOF) 8957 return(l); 8958 start = CUR_PTR - BASE_PTR; 8959 8960 l = xmlParseNCName(ctxt); 8961 if (l.name != NULL) { 8962 isNCName = 1; 8963 if (CUR == ':') { 8964 NEXT; 8965 p = l; 8966 l = xmlParseNCName(ctxt); 8967 } 8968 } 8969 if ((l.name == NULL) || (CUR == ':')) { 8970 xmlChar *tmp; 8971 8972 l.name = NULL; 8973 p.name = NULL; 8974 if (ctxt->instate == XML_PARSER_EOF) 8975 return(l); 8976 if ((isNCName == 0) && (CUR != ':')) 8977 return(l); 8978 tmp = xmlParseNmtoken(ctxt); 8979 if (tmp != NULL) 8980 xmlFree(tmp); 8981 if (ctxt->instate == XML_PARSER_EOF) 8982 return(l); 8983 l = xmlDictLookupHashed(ctxt->dict, BASE_PTR + start, 8984 CUR_PTR - (BASE_PTR + start)); 8985 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8986 "Failed to parse QName '%s'\n", l.name, NULL, NULL); 8987 } 8988 8989 *prefix = p; 8990 return(l); 8991} 8992 8993/** 8994 * xmlParseQName: 8995 * @ctxt: an XML parser context 8996 * @prefix: pointer to store the prefix part 8997 * 8998 * parse an XML Namespace QName 8999 * 9000 * [6] QName ::= (Prefix ':')? LocalPart 9001 * [7] Prefix ::= NCName 9002 * [8] LocalPart ::= NCName 9003 * 9004 * Returns the Name parsed or NULL 9005 */ 9006 9007static const xmlChar * 9008xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) { 9009 xmlHashedString n, p; 9010 9011 n = xmlParseQNameHashed(ctxt, &p); 9012 if (n.name == NULL) 9013 return(NULL); 9014 *prefix = p.name; 9015 return(n.name); 9016} 9017 9018/** 9019 * xmlParseQNameAndCompare: 9020 * @ctxt: an XML parser context 9021 * @name: the localname 9022 * @prefix: the prefix, if any. 9023 * 9024 * parse an XML name and compares for match 9025 * (specialized for endtag parsing) 9026 * 9027 * Returns NULL for an illegal name, (xmlChar*) 1 for success 9028 * and the name for mismatch 9029 */ 9030 9031static const xmlChar * 9032xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name, 9033 xmlChar const *prefix) { 9034 const xmlChar *cmp; 9035 const xmlChar *in; 9036 const xmlChar *ret; 9037 const xmlChar *prefix2; 9038 9039 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name)); 9040 9041 GROW; 9042 in = ctxt->input->cur; 9043 9044 cmp = prefix; 9045 while (*in != 0 && *in == *cmp) { 9046 ++in; 9047 ++cmp; 9048 } 9049 if ((*cmp == 0) && (*in == ':')) { 9050 in++; 9051 cmp = name; 9052 while (*in != 0 && *in == *cmp) { 9053 ++in; 9054 ++cmp; 9055 } 9056 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 9057 /* success */ 9058 ctxt->input->col += in - ctxt->input->cur; 9059 ctxt->input->cur = in; 9060 return((const xmlChar*) 1); 9061 } 9062 } 9063 /* 9064 * all strings coms from the dictionary, equality can be done directly 9065 */ 9066 ret = xmlParseQName (ctxt, &prefix2); 9067 if (ret == NULL) 9068 return(NULL); 9069 if ((ret == name) && (prefix == prefix2)) 9070 return((const xmlChar*) 1); 9071 return ret; 9072} 9073 9074/** 9075 * xmlParseAttValueInternal: 9076 * @ctxt: an XML parser context 9077 * @len: attribute len result 9078 * @alloc: whether the attribute was reallocated as a new string 9079 * @normalize: if 1 then further non-CDATA normalization must be done 9080 * 9081 * parse a value for an attribute. 9082 * NOTE: if no normalization is needed, the routine will return pointers 9083 * directly from the data buffer. 9084 * 9085 * 3.3.3 Attribute-Value Normalization: 9086 * Before the value of an attribute is passed to the application or 9087 * checked for validity, the XML processor must normalize it as follows: 9088 * - a character reference is processed by appending the referenced 9089 * character to the attribute value 9090 * - an entity reference is processed by recursively processing the 9091 * replacement text of the entity 9092 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 9093 * appending #x20 to the normalized value, except that only a single 9094 * #x20 is appended for a "#xD#xA" sequence that is part of an external 9095 * parsed entity or the literal entity value of an internal parsed entity 9096 * - other characters are processed by appending them to the normalized value 9097 * If the declared value is not CDATA, then the XML processor must further 9098 * process the normalized attribute value by discarding any leading and 9099 * trailing space (#x20) characters, and by replacing sequences of space 9100 * (#x20) characters by a single space (#x20) character. 9101 * All attributes for which no declaration has been read should be treated 9102 * by a non-validating parser as if declared CDATA. 9103 * 9104 * Returns the AttValue parsed or NULL. The value has to be freed by the 9105 * caller if it was copied, this can be detected by val[*len] == 0. 9106 */ 9107 9108#define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \ 9109 const xmlChar *oldbase = ctxt->input->base;\ 9110 GROW;\ 9111 if (ctxt->instate == XML_PARSER_EOF)\ 9112 return(NULL);\ 9113 if (oldbase != ctxt->input->base) {\ 9114 ptrdiff_t delta = ctxt->input->base - oldbase;\ 9115 start = start + delta;\ 9116 in = in + delta;\ 9117 }\ 9118 end = ctxt->input->end; 9119 9120static xmlChar * 9121xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc, 9122 int normalize) 9123{ 9124 xmlChar limit = 0; 9125 const xmlChar *in = NULL, *start, *end, *last; 9126 xmlChar *ret = NULL; 9127 int line, col; 9128 int maxLength = (ctxt->options & XML_PARSE_HUGE) ? 9129 XML_MAX_HUGE_LENGTH : 9130 XML_MAX_TEXT_LENGTH; 9131 9132 GROW; 9133 in = (xmlChar *) CUR_PTR; 9134 line = ctxt->input->line; 9135 col = ctxt->input->col; 9136 if (*in != '"' && *in != '\'') { 9137 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 9138 return (NULL); 9139 } 9140 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 9141 9142 /* 9143 * try to handle in this routine the most common case where no 9144 * allocation of a new string is required and where content is 9145 * pure ASCII. 9146 */ 9147 limit = *in++; 9148 col++; 9149 end = ctxt->input->end; 9150 start = in; 9151 if (in >= end) { 9152 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) 9153 } 9154 if (normalize) { 9155 /* 9156 * Skip any leading spaces 9157 */ 9158 while ((in < end) && (*in != limit) && 9159 ((*in == 0x20) || (*in == 0x9) || 9160 (*in == 0xA) || (*in == 0xD))) { 9161 if (*in == 0xA) { 9162 line++; col = 1; 9163 } else { 9164 col++; 9165 } 9166 in++; 9167 start = in; 9168 if (in >= end) { 9169 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) 9170 if ((in - start) > maxLength) { 9171 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9172 "AttValue length too long\n"); 9173 return(NULL); 9174 } 9175 } 9176 } 9177 while ((in < end) && (*in != limit) && (*in >= 0x20) && 9178 (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 9179 col++; 9180 if ((*in++ == 0x20) && (*in == 0x20)) break; 9181 if (in >= end) { 9182 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) 9183 if ((in - start) > maxLength) { 9184 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9185 "AttValue length too long\n"); 9186 return(NULL); 9187 } 9188 } 9189 } 9190 last = in; 9191 /* 9192 * skip the trailing blanks 9193 */ 9194 while ((last[-1] == 0x20) && (last > start)) last--; 9195 while ((in < end) && (*in != limit) && 9196 ((*in == 0x20) || (*in == 0x9) || 9197 (*in == 0xA) || (*in == 0xD))) { 9198 if (*in == 0xA) { 9199 line++, col = 1; 9200 } else { 9201 col++; 9202 } 9203 in++; 9204 if (in >= end) { 9205 const xmlChar *oldbase = ctxt->input->base; 9206 GROW; 9207 if (ctxt->instate == XML_PARSER_EOF) 9208 return(NULL); 9209 if (oldbase != ctxt->input->base) { 9210 ptrdiff_t delta = ctxt->input->base - oldbase; 9211 start = start + delta; 9212 in = in + delta; 9213 last = last + delta; 9214 } 9215 end = ctxt->input->end; 9216 if ((in - start) > maxLength) { 9217 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9218 "AttValue length too long\n"); 9219 return(NULL); 9220 } 9221 } 9222 } 9223 if ((in - start) > maxLength) { 9224 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9225 "AttValue length too long\n"); 9226 return(NULL); 9227 } 9228 if (*in != limit) goto need_complex; 9229 } else { 9230 while ((in < end) && (*in != limit) && (*in >= 0x20) && 9231 (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 9232 in++; 9233 col++; 9234 if (in >= end) { 9235 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) 9236 if ((in - start) > maxLength) { 9237 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9238 "AttValue length too long\n"); 9239 return(NULL); 9240 } 9241 } 9242 } 9243 last = in; 9244 if ((in - start) > maxLength) { 9245 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9246 "AttValue length too long\n"); 9247 return(NULL); 9248 } 9249 if (*in != limit) goto need_complex; 9250 } 9251 in++; 9252 col++; 9253 if (len != NULL) { 9254 if (alloc) *alloc = 0; 9255 *len = last - start; 9256 ret = (xmlChar *) start; 9257 } else { 9258 if (alloc) *alloc = 1; 9259 ret = xmlStrndup(start, last - start); 9260 } 9261 CUR_PTR = in; 9262 ctxt->input->line = line; 9263 ctxt->input->col = col; 9264 return ret; 9265need_complex: 9266 if (alloc) *alloc = 1; 9267 return xmlParseAttValueComplex(ctxt, len, normalize); 9268} 9269 9270/** 9271 * xmlParseAttribute2: 9272 * @ctxt: an XML parser context 9273 * @pref: the element prefix 9274 * @elem: the element name 9275 * @prefix: a xmlChar ** used to store the value of the attribute prefix 9276 * @value: a xmlChar ** used to store the value of the attribute 9277 * @len: an int * to save the length of the attribute 9278 * @alloc: an int * to indicate if the attribute was allocated 9279 * 9280 * parse an attribute in the new SAX2 framework. 9281 * 9282 * Returns the attribute name, and the value in *value, . 9283 */ 9284 9285static xmlHashedString 9286xmlParseAttribute2(xmlParserCtxtPtr ctxt, 9287 const xmlChar * pref, const xmlChar * elem, 9288 xmlHashedString * hprefix, xmlChar ** value, 9289 int *len, int *alloc) 9290{ 9291 xmlHashedString hname; 9292 const xmlChar *prefix, *name; 9293 xmlChar *val, *internal_val = NULL; 9294 int normalize = 0; 9295 9296 *value = NULL; 9297 GROW; 9298 hname = xmlParseQNameHashed(ctxt, hprefix); 9299 if (hname.name == NULL) { 9300 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 9301 "error parsing attribute name\n"); 9302 return(hname); 9303 } 9304 name = hname.name; 9305 if (hprefix->name != NULL) 9306 prefix = hprefix->name; 9307 else 9308 prefix = NULL; 9309 9310 /* 9311 * get the type if needed 9312 */ 9313 if (ctxt->attsSpecial != NULL) { 9314 int type; 9315 9316 type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial, 9317 pref, elem, 9318 prefix, name); 9319 if (type != 0) 9320 normalize = 1; 9321 } 9322 9323 /* 9324 * read the value 9325 */ 9326 SKIP_BLANKS; 9327 if (RAW == '=') { 9328 NEXT; 9329 SKIP_BLANKS; 9330 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize); 9331 if (val == NULL) { 9332 hname.name = NULL; 9333 return(hname); 9334 } 9335 if (normalize) { 9336 /* 9337 * Sometimes a second normalisation pass for spaces is needed 9338 * but that only happens if charrefs or entities references 9339 * have been used in the attribute value, i.e. the attribute 9340 * value have been extracted in an allocated string already. 9341 */ 9342 if (*alloc) { 9343 const xmlChar *val2; 9344 9345 val2 = xmlAttrNormalizeSpace2(ctxt, val, len); 9346 if ((val2 != NULL) && (val2 != val)) { 9347 xmlFree(val); 9348 val = (xmlChar *) val2; 9349 } 9350 } 9351 } 9352 ctxt->instate = XML_PARSER_CONTENT; 9353 } else { 9354 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 9355 "Specification mandates value for attribute %s\n", 9356 name); 9357 return(hname); 9358 } 9359 9360 if (prefix == ctxt->str_xml) { 9361 /* 9362 * Check that xml:lang conforms to the specification 9363 * No more registered as an error, just generate a warning now 9364 * since this was deprecated in XML second edition 9365 */ 9366 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) { 9367 internal_val = xmlStrndup(val, *len); 9368 if (!xmlCheckLanguageID(internal_val)) { 9369 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 9370 "Malformed value for xml:lang : %s\n", 9371 internal_val, NULL); 9372 } 9373 } 9374 9375 /* 9376 * Check that xml:space conforms to the specification 9377 */ 9378 if (xmlStrEqual(name, BAD_CAST "space")) { 9379 internal_val = xmlStrndup(val, *len); 9380 if (xmlStrEqual(internal_val, BAD_CAST "default")) 9381 *(ctxt->space) = 0; 9382 else if (xmlStrEqual(internal_val, BAD_CAST "preserve")) 9383 *(ctxt->space) = 1; 9384 else { 9385 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE, 9386 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 9387 internal_val, NULL); 9388 } 9389 } 9390 if (internal_val) { 9391 xmlFree(internal_val); 9392 } 9393 } 9394 9395 *value = val; 9396 return (hname); 9397} 9398 9399/** 9400 * xmlAttrHashInsert: 9401 * @ctxt: parser context 9402 * @size: size of the hash table 9403 * @name: attribute name 9404 * @uri: namespace uri 9405 * @hashValue: combined hash value of name and uri 9406 * @aindex: attribute index (this is a multiple of 5) 9407 * 9408 * Inserts a new attribute into the hash table. 9409 * 9410 * Returns INT_MAX if no existing attribute was found, the attribute 9411 * index if an attribute was found, -1 if a memory allocation failed. 9412 */ 9413static int 9414xmlAttrHashInsert(xmlParserCtxtPtr ctxt, unsigned size, const xmlChar *name, 9415 const xmlChar *uri, unsigned hashValue, int aindex) { 9416 xmlAttrHashBucket *table = ctxt->attrHash; 9417 xmlAttrHashBucket *bucket; 9418 unsigned hindex; 9419 9420 hindex = hashValue & (size - 1); 9421 bucket = &table[hindex]; 9422 9423 while (bucket->index >= 0) { 9424 const xmlChar **atts = &ctxt->atts[bucket->index]; 9425 9426 if (name == atts[0]) { 9427 int nsIndex = (int) (ptrdiff_t) atts[2]; 9428 9429 if ((nsIndex == NS_INDEX_EMPTY) ? (uri == NULL) : 9430 (nsIndex == NS_INDEX_XML) ? (uri == ctxt->str_xml_ns) : 9431 (uri == ctxt->nsTab[nsIndex * 2 + 1])) 9432 return(bucket->index); 9433 } 9434 9435 hindex++; 9436 bucket++; 9437 if (hindex >= size) { 9438 hindex = 0; 9439 bucket = table; 9440 } 9441 } 9442 9443 bucket->index = aindex; 9444 9445 return(INT_MAX); 9446} 9447 9448/** 9449 * xmlParseStartTag2: 9450 * @ctxt: an XML parser context 9451 * 9452 * Parse a start tag. Always consumes '<'. 9453 * 9454 * This routine is called when running SAX2 parsing 9455 * 9456 * [40] STag ::= '<' Name (S Attribute)* S? '>' 9457 * 9458 * [ WFC: Unique Att Spec ] 9459 * No attribute name may appear more than once in the same start-tag or 9460 * empty-element tag. 9461 * 9462 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 9463 * 9464 * [ WFC: Unique Att Spec ] 9465 * No attribute name may appear more than once in the same start-tag or 9466 * empty-element tag. 9467 * 9468 * With namespace: 9469 * 9470 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 9471 * 9472 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 9473 * 9474 * Returns the element name parsed 9475 */ 9476 9477static const xmlChar * 9478xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref, 9479 const xmlChar **URI, int *nbNsPtr) { 9480 xmlHashedString hlocalname; 9481 xmlHashedString hprefix; 9482 xmlHashedString hattname; 9483 xmlHashedString haprefix; 9484 const xmlChar *localname; 9485 const xmlChar *prefix; 9486 const xmlChar *attname; 9487 const xmlChar *aprefix; 9488 const xmlChar *uri; 9489 xmlChar *attvalue = NULL; 9490 const xmlChar **atts = ctxt->atts; 9491 unsigned attrHashSize = 0; 9492 int maxatts = ctxt->maxatts; 9493 int nratts, nbatts, nbdef, inputid; 9494 int i, j, nbNs, nbTotalDef, attval, nsIndex, maxAtts; 9495 int alloc = 0; 9496 9497 if (RAW != '<') return(NULL); 9498 NEXT1; 9499 9500 inputid = ctxt->input->id; 9501 nbatts = 0; 9502 nratts = 0; 9503 nbdef = 0; 9504 nbNs = 0; 9505 nbTotalDef = 0; 9506 attval = 0; 9507 9508 if (xmlParserNsStartElement(ctxt->nsdb) < 0) { 9509 xmlErrMemory(ctxt, NULL); 9510 return(NULL); 9511 } 9512 9513 hlocalname = xmlParseQNameHashed(ctxt, &hprefix); 9514 if (hlocalname.name == NULL) { 9515 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 9516 "StartTag: invalid element name\n"); 9517 return(NULL); 9518 } 9519 localname = hlocalname.name; 9520 prefix = hprefix.name; 9521 9522 /* 9523 * Now parse the attributes, it ends up with the ending 9524 * 9525 * (S Attribute)* S? 9526 */ 9527 SKIP_BLANKS; 9528 GROW; 9529 9530 /* 9531 * The ctxt->atts array will be ultimately passed to the SAX callback 9532 * containing five xmlChar pointers for each attribute: 9533 * 9534 * [0] attribute name 9535 * [1] attribute prefix 9536 * [2] namespace URI 9537 * [3] attribute value 9538 * [4] end of attribute value 9539 * 9540 * To save memory, we reuse this array temporarily and store integers 9541 * in these pointer variables. 9542 * 9543 * [0] attribute name 9544 * [1] attribute prefix 9545 * [2] hash value of attribute prefix, and later namespace index 9546 * [3] for non-allocated values: ptrdiff_t offset into input buffer 9547 * [4] for non-allocated values: ptrdiff_t offset into input buffer 9548 * 9549 * The ctxt->attallocs array contains an additional unsigned int for 9550 * each attribute, containing the hash value of the attribute name 9551 * and the alloc flag in bit 31. 9552 */ 9553 9554 while (((RAW != '>') && 9555 ((RAW != '/') || (NXT(1) != '>')) && 9556 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) { 9557 int len = -1; 9558 9559 hattname = xmlParseAttribute2(ctxt, prefix, localname, 9560 &haprefix, &attvalue, &len, 9561 &alloc); 9562 if (hattname.name == NULL) { 9563 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 9564 "xmlParseStartTag: problem parsing attributes\n"); 9565 break; 9566 } 9567 if (attvalue == NULL) 9568 goto next_attr; 9569 attname = hattname.name; 9570 aprefix = haprefix.name; 9571 if (len < 0) len = xmlStrlen(attvalue); 9572 9573 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 9574 xmlHashedString huri; 9575 xmlURIPtr parsedUri; 9576 9577 huri = xmlDictLookupHashed(ctxt->dict, attvalue, len); 9578 uri = huri.name; 9579 if (uri == NULL) { 9580 xmlErrMemory(ctxt, NULL); 9581 goto next_attr; 9582 } 9583 if (*uri != 0) { 9584 parsedUri = xmlParseURI((const char *) uri); 9585 if (parsedUri == NULL) { 9586 xmlNsErr(ctxt, XML_WAR_NS_URI, 9587 "xmlns: '%s' is not a valid URI\n", 9588 uri, NULL, NULL); 9589 } else { 9590 if (parsedUri->scheme == NULL) { 9591 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE, 9592 "xmlns: URI %s is not absolute\n", 9593 uri, NULL, NULL); 9594 } 9595 xmlFreeURI(parsedUri); 9596 } 9597 if (uri == ctxt->str_xml_ns) { 9598 if (attname != ctxt->str_xml) { 9599 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9600 "xml namespace URI cannot be the default namespace\n", 9601 NULL, NULL, NULL); 9602 } 9603 goto next_attr; 9604 } 9605 if ((len == 29) && 9606 (xmlStrEqual(uri, 9607 BAD_CAST "http://www.w3.org/2000/xmlns/"))) { 9608 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9609 "reuse of the xmlns namespace name is forbidden\n", 9610 NULL, NULL, NULL); 9611 goto next_attr; 9612 } 9613 } 9614 9615 if (xmlParserNsPush(ctxt, NULL, &huri, NULL, 0) > 0) 9616 nbNs++; 9617 } else if (aprefix == ctxt->str_xmlns) { 9618 xmlHashedString huri; 9619 xmlURIPtr parsedUri; 9620 9621 huri = xmlDictLookupHashed(ctxt->dict, attvalue, len); 9622 uri = huri.name; 9623 if (uri == NULL) { 9624 xmlErrMemory(ctxt, NULL); 9625 goto next_attr; 9626 } 9627 9628 if (attname == ctxt->str_xml) { 9629 if (uri != ctxt->str_xml_ns) { 9630 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9631 "xml namespace prefix mapped to wrong URI\n", 9632 NULL, NULL, NULL); 9633 } 9634 /* 9635 * Do not keep a namespace definition node 9636 */ 9637 goto next_attr; 9638 } 9639 if (uri == ctxt->str_xml_ns) { 9640 if (attname != ctxt->str_xml) { 9641 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9642 "xml namespace URI mapped to wrong prefix\n", 9643 NULL, NULL, NULL); 9644 } 9645 goto next_attr; 9646 } 9647 if (attname == ctxt->str_xmlns) { 9648 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9649 "redefinition of the xmlns prefix is forbidden\n", 9650 NULL, NULL, NULL); 9651 goto next_attr; 9652 } 9653 if ((len == 29) && 9654 (xmlStrEqual(uri, 9655 BAD_CAST "http://www.w3.org/2000/xmlns/"))) { 9656 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9657 "reuse of the xmlns namespace name is forbidden\n", 9658 NULL, NULL, NULL); 9659 goto next_attr; 9660 } 9661 if ((uri == NULL) || (uri[0] == 0)) { 9662 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9663 "xmlns:%s: Empty XML namespace is not allowed\n", 9664 attname, NULL, NULL); 9665 goto next_attr; 9666 } else { 9667 parsedUri = xmlParseURI((const char *) uri); 9668 if (parsedUri == NULL) { 9669 xmlNsErr(ctxt, XML_WAR_NS_URI, 9670 "xmlns:%s: '%s' is not a valid URI\n", 9671 attname, uri, NULL); 9672 } else { 9673 if ((ctxt->pedantic) && (parsedUri->scheme == NULL)) { 9674 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE, 9675 "xmlns:%s: URI %s is not absolute\n", 9676 attname, uri, NULL); 9677 } 9678 xmlFreeURI(parsedUri); 9679 } 9680 } 9681 9682 if (xmlParserNsPush(ctxt, &hattname, &huri, NULL, 0) > 0) 9683 nbNs++; 9684 } else { 9685 /* 9686 * Populate attributes array, see above for repurposing 9687 * of xmlChar pointers. 9688 */ 9689 if ((atts == NULL) || (nbatts + 5 > maxatts)) { 9690 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 9691 goto next_attr; 9692 } 9693 maxatts = ctxt->maxatts; 9694 atts = ctxt->atts; 9695 } 9696 ctxt->attallocs[nratts++] = (hattname.hashValue & 0x7FFFFFFF) | 9697 ((unsigned) alloc << 31); 9698 atts[nbatts++] = attname; 9699 atts[nbatts++] = aprefix; 9700 atts[nbatts++] = (const xmlChar *) (size_t) haprefix.hashValue; 9701 if (alloc) { 9702 atts[nbatts++] = attvalue; 9703 attvalue += len; 9704 atts[nbatts++] = attvalue; 9705 } else { 9706 /* 9707 * attvalue points into the input buffer which can be 9708 * reallocated. Store differences to input->base instead. 9709 * The pointers will be reconstructed later. 9710 */ 9711 atts[nbatts++] = (void *) (attvalue - BASE_PTR); 9712 attvalue += len; 9713 atts[nbatts++] = (void *) (attvalue - BASE_PTR); 9714 } 9715 /* 9716 * tag if some deallocation is needed 9717 */ 9718 if (alloc != 0) attval = 1; 9719 attvalue = NULL; /* moved into atts */ 9720 } 9721 9722next_attr: 9723 if ((attvalue != NULL) && (alloc != 0)) { 9724 xmlFree(attvalue); 9725 attvalue = NULL; 9726 } 9727 9728 GROW 9729 if (ctxt->instate == XML_PARSER_EOF) 9730 break; 9731 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 9732 break; 9733 if (SKIP_BLANKS == 0) { 9734 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 9735 "attributes construct error\n"); 9736 break; 9737 } 9738 GROW; 9739 } 9740 9741 if (ctxt->input->id != inputid) { 9742 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 9743 "Unexpected change of input\n"); 9744 localname = NULL; 9745 goto done; 9746 } 9747 9748 /* 9749 * Namespaces from default attributes 9750 */ 9751 if (ctxt->attsDefault != NULL) { 9752 xmlDefAttrsPtr defaults; 9753 9754 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix); 9755 if (defaults != NULL) { 9756 for (i = 0; i < defaults->nbAttrs; i++) { 9757 xmlDefAttr *attr = &defaults->attrs[i]; 9758 9759 attname = attr->name.name; 9760 aprefix = attr->prefix.name; 9761 9762 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 9763 xmlParserEntityCheck(ctxt, attr->expandedSize); 9764 9765 if (xmlParserNsPush(ctxt, NULL, &attr->value, NULL, 1) > 0) 9766 nbNs++; 9767 } else if (aprefix == ctxt->str_xmlns) { 9768 xmlParserEntityCheck(ctxt, attr->expandedSize); 9769 9770 if (xmlParserNsPush(ctxt, &attr->name, &attr->value, 9771 NULL, 1) > 0) 9772 nbNs++; 9773 } else { 9774 nbTotalDef += 1; 9775 } 9776 } 9777 } 9778 } 9779 9780 /* 9781 * Resolve attribute namespaces 9782 */ 9783 for (i = 0; i < nbatts; i += 5) { 9784 attname = atts[i]; 9785 aprefix = atts[i+1]; 9786 9787 /* 9788 * The default namespace does not apply to attribute names. 9789 */ 9790 if (aprefix == NULL) { 9791 nsIndex = NS_INDEX_EMPTY; 9792 } else if (aprefix == ctxt->str_xml) { 9793 nsIndex = NS_INDEX_XML; 9794 } else { 9795 haprefix.name = aprefix; 9796 haprefix.hashValue = (size_t) atts[i+2]; 9797 nsIndex = xmlParserNsLookup(ctxt, &haprefix, NULL); 9798 if (nsIndex == INT_MAX) { 9799 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 9800 "Namespace prefix %s for %s on %s is not defined\n", 9801 aprefix, attname, localname); 9802 nsIndex = NS_INDEX_EMPTY; 9803 } 9804 } 9805 9806 atts[i+2] = (const xmlChar *) (ptrdiff_t) nsIndex; 9807 } 9808 9809 /* 9810 * Maximum number of attributes including default attributes. 9811 */ 9812 maxAtts = nratts + nbTotalDef; 9813 9814 /* 9815 * Verify that attribute names are unique. 9816 */ 9817 if (maxAtts > 1) { 9818 attrHashSize = 4; 9819 while (attrHashSize / 2 < (unsigned) maxAtts) 9820 attrHashSize *= 2; 9821 9822 if (attrHashSize > ctxt->attrHashMax) { 9823 xmlAttrHashBucket *tmp; 9824 9825 tmp = xmlRealloc(ctxt->attrHash, attrHashSize * sizeof(tmp[0])); 9826 if (tmp == NULL) { 9827 xmlErrMemory(ctxt, NULL); 9828 goto done; 9829 } 9830 9831 ctxt->attrHash = tmp; 9832 ctxt->attrHashMax = attrHashSize; 9833 } 9834 9835 memset(ctxt->attrHash, -1, attrHashSize * sizeof(ctxt->attrHash[0])); 9836 9837 for (i = 0, j = 0; j < nratts; i += 5, j++) { 9838 const xmlChar *nsuri; 9839 unsigned hashValue, nameHashValue, uriHashValue; 9840 int res; 9841 9842 attname = atts[i]; 9843 aprefix = atts[i+1]; 9844 nsIndex = (ptrdiff_t) atts[i+2]; 9845 /* Hash values always have bit 31 set, see dict.c */ 9846 nameHashValue = ctxt->attallocs[j] | 0x80000000; 9847 9848 if (nsIndex == NS_INDEX_EMPTY) { 9849 nsuri = NULL; 9850 uriHashValue = URI_HASH_EMPTY; 9851 } else if (nsIndex == NS_INDEX_XML) { 9852 nsuri = ctxt->str_xml_ns; 9853 uriHashValue = URI_HASH_XML; 9854 } else { 9855 nsuri = ctxt->nsTab[nsIndex * 2 + 1]; 9856 uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue; 9857 } 9858 9859 hashValue = xmlDictCombineHash(nameHashValue, uriHashValue); 9860 res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri, 9861 hashValue, i); 9862 if (res < 0) 9863 continue; 9864 9865 /* 9866 * [ WFC: Unique Att Spec ] 9867 * No attribute name may appear more than once in the same 9868 * start-tag or empty-element tag. 9869 * As extended by the Namespace in XML REC. 9870 */ 9871 if (res < INT_MAX) { 9872 if (aprefix == atts[res+1]) { 9873 xmlErrAttributeDup(ctxt, aprefix, attname); 9874 } else { 9875 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED, 9876 "Namespaced Attribute %s in '%s' redefined\n", 9877 attname, nsuri, NULL); 9878 } 9879 } 9880 } 9881 } 9882 9883 /* 9884 * Default attributes 9885 */ 9886 if (ctxt->attsDefault != NULL) { 9887 xmlDefAttrsPtr defaults; 9888 9889 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix); 9890 if (defaults != NULL) { 9891 for (i = 0; i < defaults->nbAttrs; i++) { 9892 xmlDefAttr *attr = &defaults->attrs[i]; 9893 const xmlChar *nsuri; 9894 unsigned hashValue, uriHashValue; 9895 int res; 9896 9897 attname = attr->name.name; 9898 aprefix = attr->prefix.name; 9899 9900 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) 9901 continue; 9902 if (aprefix == ctxt->str_xmlns) 9903 continue; 9904 9905 if (aprefix == NULL) { 9906 nsIndex = NS_INDEX_EMPTY; 9907 nsuri = NULL; 9908 uriHashValue = URI_HASH_EMPTY; 9909 } if (aprefix == ctxt->str_xml) { 9910 nsIndex = NS_INDEX_XML; 9911 nsuri = ctxt->str_xml_ns; 9912 uriHashValue = URI_HASH_XML; 9913 } else if (aprefix != NULL) { 9914 nsIndex = xmlParserNsLookup(ctxt, &attr->prefix, NULL); 9915 if (nsIndex == INT_MAX) { 9916 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 9917 "Namespace prefix %s for %s on %s is not " 9918 "defined\n", 9919 aprefix, attname, localname); 9920 nsIndex = NS_INDEX_EMPTY; 9921 nsuri = NULL; 9922 uriHashValue = URI_HASH_EMPTY; 9923 } else { 9924 nsuri = ctxt->nsTab[nsIndex * 2 + 1]; 9925 uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue; 9926 } 9927 } 9928 9929 /* 9930 * Check whether the attribute exists 9931 */ 9932 if (maxAtts > 1) { 9933 hashValue = xmlDictCombineHash(attr->name.hashValue, 9934 uriHashValue); 9935 res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri, 9936 hashValue, nbatts); 9937 if (res < 0) 9938 continue; 9939 if (res < INT_MAX) { 9940 if (aprefix == atts[res+1]) 9941 continue; 9942 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED, 9943 "Namespaced Attribute %s in '%s' redefined\n", 9944 attname, nsuri, NULL); 9945 } 9946 } 9947 9948 xmlParserEntityCheck(ctxt, attr->expandedSize); 9949 9950 if ((atts == NULL) || (nbatts + 5 > maxatts)) { 9951 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 9952 localname = NULL; 9953 goto done; 9954 } 9955 maxatts = ctxt->maxatts; 9956 atts = ctxt->atts; 9957 } 9958 9959 atts[nbatts++] = attname; 9960 atts[nbatts++] = aprefix; 9961 atts[nbatts++] = (const xmlChar *) (ptrdiff_t) nsIndex; 9962 atts[nbatts++] = attr->value.name; 9963 atts[nbatts++] = attr->valueEnd; 9964 if ((ctxt->standalone == 1) && (attr->external != 0)) { 9965 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED, 9966 "standalone: attribute %s on %s defaulted " 9967 "from external subset\n", 9968 attname, localname); 9969 } 9970 nbdef++; 9971 } 9972 } 9973 } 9974 9975 /* 9976 * Reconstruct attribute pointers 9977 */ 9978 for (i = 0, j = 0; i < nbatts; i += 5, j++) { 9979 /* namespace URI */ 9980 nsIndex = (ptrdiff_t) atts[i+2]; 9981 if (nsIndex == INT_MAX) 9982 atts[i+2] = NULL; 9983 else if (nsIndex == INT_MAX - 1) 9984 atts[i+2] = ctxt->str_xml_ns; 9985 else 9986 atts[i+2] = ctxt->nsTab[nsIndex * 2 + 1]; 9987 9988 if ((j < nratts) && (ctxt->attallocs[j] & 0x80000000) == 0) { 9989 atts[i+3] = BASE_PTR + (ptrdiff_t) atts[i+3]; /* value */ 9990 atts[i+4] = BASE_PTR + (ptrdiff_t) atts[i+4]; /* valuend */ 9991 } 9992 } 9993 9994 uri = xmlParserNsLookupUri(ctxt, &hprefix); 9995 if ((prefix != NULL) && (uri == NULL)) { 9996 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 9997 "Namespace prefix %s on %s is not defined\n", 9998 prefix, localname, NULL); 9999 } 10000 *pref = prefix; 10001 *URI = uri; 10002 10003 /* 10004 * SAX callback 10005 */ 10006 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) && 10007 (!ctxt->disableSAX)) { 10008 if (nbNs > 0) 10009 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri, 10010 nbNs, ctxt->nsTab + 2 * (ctxt->nsNr - nbNs), 10011 nbatts / 5, nbdef, atts); 10012 else 10013 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri, 10014 0, NULL, nbatts / 5, nbdef, atts); 10015 } 10016 10017done: 10018 /* 10019 * Free allocated attribute values 10020 */ 10021 if (attval != 0) { 10022 for (i = 0, j = 0; j < nratts; i += 5, j++) 10023 if (ctxt->attallocs[j] & 0x80000000) 10024 xmlFree((xmlChar *) atts[i+3]); 10025 } 10026 10027 *nbNsPtr = nbNs; 10028 return(localname); 10029} 10030 10031/** 10032 * xmlParseEndTag2: 10033 * @ctxt: an XML parser context 10034 * @line: line of the start tag 10035 * @nsNr: number of namespaces on the start tag 10036 * 10037 * Parse an end tag. Always consumes '</'. 10038 * 10039 * [42] ETag ::= '</' Name S? '>' 10040 * 10041 * With namespace 10042 * 10043 * [NS 9] ETag ::= '</' QName S? '>' 10044 */ 10045 10046static void 10047xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) { 10048 const xmlChar *name; 10049 10050 GROW; 10051 if ((RAW != '<') || (NXT(1) != '/')) { 10052 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL); 10053 return; 10054 } 10055 SKIP(2); 10056 10057 if (tag->prefix == NULL) 10058 name = xmlParseNameAndCompare(ctxt, ctxt->name); 10059 else 10060 name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix); 10061 10062 /* 10063 * We should definitely be at the ending "S? '>'" part 10064 */ 10065 GROW; 10066 if (ctxt->instate == XML_PARSER_EOF) 10067 return; 10068 SKIP_BLANKS; 10069 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 10070 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 10071 } else 10072 NEXT1; 10073 10074 /* 10075 * [ WFC: Element Type Match ] 10076 * The Name in an element's end-tag must match the element type in the 10077 * start-tag. 10078 * 10079 */ 10080 if (name != (xmlChar*)1) { 10081 if (name == NULL) name = BAD_CAST "unparsable"; 10082 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 10083 "Opening and ending tag mismatch: %s line %d and %s\n", 10084 ctxt->name, tag->line, name); 10085 } 10086 10087 /* 10088 * SAX: End of Tag 10089 */ 10090 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 10091 (!ctxt->disableSAX)) 10092 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix, 10093 tag->URI); 10094 10095 spacePop(ctxt); 10096 if (tag->nsNr != 0) 10097 xmlParserNsPop(ctxt, tag->nsNr); 10098} 10099 10100/** 10101 * xmlParseCDSect: 10102 * @ctxt: an XML parser context 10103 * 10104 * DEPRECATED: Internal function, don't use. 10105 * 10106 * Parse escaped pure raw content. Always consumes '<!['. 10107 * 10108 * [18] CDSect ::= CDStart CData CDEnd 10109 * 10110 * [19] CDStart ::= '<![CDATA[' 10111 * 10112 * [20] Data ::= (Char* - (Char* ']]>' Char*)) 10113 * 10114 * [21] CDEnd ::= ']]>' 10115 */ 10116void 10117xmlParseCDSect(xmlParserCtxtPtr ctxt) { 10118 xmlChar *buf = NULL; 10119 int len = 0; 10120 int size = XML_PARSER_BUFFER_SIZE; 10121 int r, rl; 10122 int s, sl; 10123 int cur, l; 10124 int maxLength = (ctxt->options & XML_PARSE_HUGE) ? 10125 XML_MAX_HUGE_LENGTH : 10126 XML_MAX_TEXT_LENGTH; 10127 10128 if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '[')) 10129 return; 10130 SKIP(3); 10131 10132 if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '[')) 10133 return; 10134 SKIP(6); 10135 10136 ctxt->instate = XML_PARSER_CDATA_SECTION; 10137 r = CUR_CHAR(rl); 10138 if (!IS_CHAR(r)) { 10139 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 10140 goto out; 10141 } 10142 NEXTL(rl); 10143 s = CUR_CHAR(sl); 10144 if (!IS_CHAR(s)) { 10145 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 10146 goto out; 10147 } 10148 NEXTL(sl); 10149 cur = CUR_CHAR(l); 10150 buf = (xmlChar *) xmlMallocAtomic(size); 10151 if (buf == NULL) { 10152 xmlErrMemory(ctxt, NULL); 10153 goto out; 10154 } 10155 while (IS_CHAR(cur) && 10156 ((r != ']') || (s != ']') || (cur != '>'))) { 10157 if (len + 5 >= size) { 10158 xmlChar *tmp; 10159 10160 tmp = (xmlChar *) xmlRealloc(buf, size * 2); 10161 if (tmp == NULL) { 10162 xmlErrMemory(ctxt, NULL); 10163 goto out; 10164 } 10165 buf = tmp; 10166 size *= 2; 10167 } 10168 COPY_BUF(buf, len, r); 10169 if (len > maxLength) { 10170 xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED, 10171 "CData section too big found\n"); 10172 goto out; 10173 } 10174 r = s; 10175 rl = sl; 10176 s = cur; 10177 sl = l; 10178 NEXTL(l); 10179 cur = CUR_CHAR(l); 10180 } 10181 buf[len] = 0; 10182 if (ctxt->instate == XML_PARSER_EOF) { 10183 xmlFree(buf); 10184 return; 10185 } 10186 if (cur != '>') { 10187 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED, 10188 "CData section not finished\n%.50s\n", buf); 10189 goto out; 10190 } 10191 NEXTL(l); 10192 10193 /* 10194 * OK the buffer is to be consumed as cdata. 10195 */ 10196 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 10197 if (ctxt->sax->cdataBlock != NULL) 10198 ctxt->sax->cdataBlock(ctxt->userData, buf, len); 10199 else if (ctxt->sax->characters != NULL) 10200 ctxt->sax->characters(ctxt->userData, buf, len); 10201 } 10202 10203out: 10204 if (ctxt->instate != XML_PARSER_EOF) 10205 ctxt->instate = XML_PARSER_CONTENT; 10206 xmlFree(buf); 10207} 10208 10209/** 10210 * xmlParseContentInternal: 10211 * @ctxt: an XML parser context 10212 * 10213 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of 10214 * unexpected EOF to the caller. 10215 */ 10216 10217static void 10218xmlParseContentInternal(xmlParserCtxtPtr ctxt) { 10219 int nameNr = ctxt->nameNr; 10220 10221 GROW; 10222 while ((ctxt->input->cur < ctxt->input->end) && 10223 (ctxt->instate != XML_PARSER_EOF)) { 10224 const xmlChar *cur = ctxt->input->cur; 10225 10226 /* 10227 * First case : a Processing Instruction. 10228 */ 10229 if ((*cur == '<') && (cur[1] == '?')) { 10230 xmlParsePI(ctxt); 10231 } 10232 10233 /* 10234 * Second case : a CDSection 10235 */ 10236 /* 2.6.0 test was *cur not RAW */ 10237 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 10238 xmlParseCDSect(ctxt); 10239 } 10240 10241 /* 10242 * Third case : a comment 10243 */ 10244 else if ((*cur == '<') && (NXT(1) == '!') && 10245 (NXT(2) == '-') && (NXT(3) == '-')) { 10246 xmlParseComment(ctxt); 10247 ctxt->instate = XML_PARSER_CONTENT; 10248 } 10249 10250 /* 10251 * Fourth case : a sub-element. 10252 */ 10253 else if (*cur == '<') { 10254 if (NXT(1) == '/') { 10255 if (ctxt->nameNr <= nameNr) 10256 break; 10257 xmlParseElementEnd(ctxt); 10258 } else { 10259 xmlParseElementStart(ctxt); 10260 } 10261 } 10262 10263 /* 10264 * Fifth case : a reference. If if has not been resolved, 10265 * parsing returns it's Name, create the node 10266 */ 10267 10268 else if (*cur == '&') { 10269 xmlParseReference(ctxt); 10270 } 10271 10272 /* 10273 * Last case, text. Note that References are handled directly. 10274 */ 10275 else { 10276 xmlParseCharDataInternal(ctxt, 0); 10277 } 10278 10279 SHRINK; 10280 GROW; 10281 } 10282} 10283 10284/** 10285 * xmlParseContent: 10286 * @ctxt: an XML parser context 10287 * 10288 * Parse a content sequence. Stops at EOF or '</'. 10289 * 10290 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 10291 */ 10292 10293void 10294xmlParseContent(xmlParserCtxtPtr ctxt) { 10295 int nameNr = ctxt->nameNr; 10296 10297 xmlParseContentInternal(ctxt); 10298 10299 if ((ctxt->instate != XML_PARSER_EOF) && 10300 (ctxt->errNo == XML_ERR_OK) && 10301 (ctxt->nameNr > nameNr)) { 10302 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1]; 10303 int line = ctxt->pushTab[ctxt->nameNr - 1].line; 10304 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED, 10305 "Premature end of data in tag %s line %d\n", 10306 name, line, NULL); 10307 } 10308} 10309 10310/** 10311 * xmlParseElement: 10312 * @ctxt: an XML parser context 10313 * 10314 * DEPRECATED: Internal function, don't use. 10315 * 10316 * parse an XML element 10317 * 10318 * [39] element ::= EmptyElemTag | STag content ETag 10319 * 10320 * [ WFC: Element Type Match ] 10321 * The Name in an element's end-tag must match the element type in the 10322 * start-tag. 10323 * 10324 */ 10325 10326void 10327xmlParseElement(xmlParserCtxtPtr ctxt) { 10328 if (xmlParseElementStart(ctxt) != 0) 10329 return; 10330 10331 xmlParseContentInternal(ctxt); 10332 if (ctxt->instate == XML_PARSER_EOF) 10333 return; 10334 10335 if (ctxt->input->cur >= ctxt->input->end) { 10336 if (ctxt->errNo == XML_ERR_OK) { 10337 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1]; 10338 int line = ctxt->pushTab[ctxt->nameNr - 1].line; 10339 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED, 10340 "Premature end of data in tag %s line %d\n", 10341 name, line, NULL); 10342 } 10343 return; 10344 } 10345 10346 xmlParseElementEnd(ctxt); 10347} 10348 10349/** 10350 * xmlParseElementStart: 10351 * @ctxt: an XML parser context 10352 * 10353 * Parse the start of an XML element. Returns -1 in case of error, 0 if an 10354 * opening tag was parsed, 1 if an empty element was parsed. 10355 * 10356 * Always consumes '<'. 10357 */ 10358static int 10359xmlParseElementStart(xmlParserCtxtPtr ctxt) { 10360 const xmlChar *name; 10361 const xmlChar *prefix = NULL; 10362 const xmlChar *URI = NULL; 10363 xmlParserNodeInfo node_info; 10364 int line; 10365 xmlNodePtr cur; 10366 int nbNs = 0; 10367 10368 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) && 10369 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 10370 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 10371 "Excessive depth in document: %d use XML_PARSE_HUGE option\n", 10372 xmlParserMaxDepth); 10373 xmlHaltParser(ctxt); 10374 return(-1); 10375 } 10376 10377 /* Capture start position */ 10378 if (ctxt->record_info) { 10379 node_info.begin_pos = ctxt->input->consumed + 10380 (CUR_PTR - ctxt->input->base); 10381 node_info.begin_line = ctxt->input->line; 10382 } 10383 10384 if (ctxt->spaceNr == 0) 10385 spacePush(ctxt, -1); 10386 else if (*ctxt->space == -2) 10387 spacePush(ctxt, -1); 10388 else 10389 spacePush(ctxt, *ctxt->space); 10390 10391 line = ctxt->input->line; 10392#ifdef LIBXML_SAX1_ENABLED 10393 if (ctxt->sax2) 10394#endif /* LIBXML_SAX1_ENABLED */ 10395 name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs); 10396#ifdef LIBXML_SAX1_ENABLED 10397 else 10398 name = xmlParseStartTag(ctxt); 10399#endif /* LIBXML_SAX1_ENABLED */ 10400 if (ctxt->instate == XML_PARSER_EOF) 10401 return(-1); 10402 if (name == NULL) { 10403 spacePop(ctxt); 10404 return(-1); 10405 } 10406 nameNsPush(ctxt, name, prefix, URI, line, nbNs); 10407 cur = ctxt->node; 10408 10409#ifdef LIBXML_VALID_ENABLED 10410 /* 10411 * [ VC: Root Element Type ] 10412 * The Name in the document type declaration must match the element 10413 * type of the root element. 10414 */ 10415 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 10416 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 10417 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 10418#endif /* LIBXML_VALID_ENABLED */ 10419 10420 /* 10421 * Check for an Empty Element. 10422 */ 10423 if ((RAW == '/') && (NXT(1) == '>')) { 10424 SKIP(2); 10425 if (ctxt->sax2) { 10426 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 10427 (!ctxt->disableSAX)) 10428 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI); 10429#ifdef LIBXML_SAX1_ENABLED 10430 } else { 10431 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 10432 (!ctxt->disableSAX)) 10433 ctxt->sax->endElement(ctxt->userData, name); 10434#endif /* LIBXML_SAX1_ENABLED */ 10435 } 10436 namePop(ctxt); 10437 spacePop(ctxt); 10438 if (nbNs > 0) 10439 xmlParserNsPop(ctxt, nbNs); 10440 if (cur != NULL && ctxt->record_info) { 10441 node_info.node = cur; 10442 node_info.end_pos = ctxt->input->consumed + 10443 (CUR_PTR - ctxt->input->base); 10444 node_info.end_line = ctxt->input->line; 10445 xmlParserAddNodeInfo(ctxt, &node_info); 10446 } 10447 return(1); 10448 } 10449 if (RAW == '>') { 10450 NEXT1; 10451 if (cur != NULL && ctxt->record_info) { 10452 node_info.node = cur; 10453 node_info.end_pos = 0; 10454 node_info.end_line = 0; 10455 xmlParserAddNodeInfo(ctxt, &node_info); 10456 } 10457 } else { 10458 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED, 10459 "Couldn't find end of Start Tag %s line %d\n", 10460 name, line, NULL); 10461 10462 /* 10463 * end of parsing of this node. 10464 */ 10465 nodePop(ctxt); 10466 namePop(ctxt); 10467 spacePop(ctxt); 10468 if (nbNs > 0) 10469 xmlParserNsPop(ctxt, nbNs); 10470 return(-1); 10471 } 10472 10473 return(0); 10474} 10475 10476/** 10477 * xmlParseElementEnd: 10478 * @ctxt: an XML parser context 10479 * 10480 * Parse the end of an XML element. Always consumes '</'. 10481 */ 10482static void 10483xmlParseElementEnd(xmlParserCtxtPtr ctxt) { 10484 xmlNodePtr cur = ctxt->node; 10485 10486 if (ctxt->nameNr <= 0) { 10487 if ((RAW == '<') && (NXT(1) == '/')) 10488 SKIP(2); 10489 return; 10490 } 10491 10492 /* 10493 * parse the end of tag: '</' should be here. 10494 */ 10495 if (ctxt->sax2) { 10496 xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]); 10497 namePop(ctxt); 10498 } 10499#ifdef LIBXML_SAX1_ENABLED 10500 else 10501 xmlParseEndTag1(ctxt, 0); 10502#endif /* LIBXML_SAX1_ENABLED */ 10503 10504 /* 10505 * Capture end position 10506 */ 10507 if (cur != NULL && ctxt->record_info) { 10508 xmlParserNodeInfoPtr node_info; 10509 10510 node_info = (xmlParserNodeInfoPtr) xmlParserFindNodeInfo(ctxt, cur); 10511 if (node_info != NULL) { 10512 node_info->end_pos = ctxt->input->consumed + 10513 (CUR_PTR - ctxt->input->base); 10514 node_info->end_line = ctxt->input->line; 10515 } 10516 } 10517} 10518 10519/** 10520 * xmlParseVersionNum: 10521 * @ctxt: an XML parser context 10522 * 10523 * DEPRECATED: Internal function, don't use. 10524 * 10525 * parse the XML version value. 10526 * 10527 * [26] VersionNum ::= '1.' [0-9]+ 10528 * 10529 * In practice allow [0-9].[0-9]+ at that level 10530 * 10531 * Returns the string giving the XML version number, or NULL 10532 */ 10533xmlChar * 10534xmlParseVersionNum(xmlParserCtxtPtr ctxt) { 10535 xmlChar *buf = NULL; 10536 int len = 0; 10537 int size = 10; 10538 xmlChar cur; 10539 10540 buf = (xmlChar *) xmlMallocAtomic(size); 10541 if (buf == NULL) { 10542 xmlErrMemory(ctxt, NULL); 10543 return(NULL); 10544 } 10545 cur = CUR; 10546 if (!((cur >= '0') && (cur <= '9'))) { 10547 xmlFree(buf); 10548 return(NULL); 10549 } 10550 buf[len++] = cur; 10551 NEXT; 10552 cur=CUR; 10553 if (cur != '.') { 10554 xmlFree(buf); 10555 return(NULL); 10556 } 10557 buf[len++] = cur; 10558 NEXT; 10559 cur=CUR; 10560 while ((cur >= '0') && (cur <= '9')) { 10561 if (len + 1 >= size) { 10562 xmlChar *tmp; 10563 10564 size *= 2; 10565 tmp = (xmlChar *) xmlRealloc(buf, size); 10566 if (tmp == NULL) { 10567 xmlFree(buf); 10568 xmlErrMemory(ctxt, NULL); 10569 return(NULL); 10570 } 10571 buf = tmp; 10572 } 10573 buf[len++] = cur; 10574 NEXT; 10575 cur=CUR; 10576 } 10577 buf[len] = 0; 10578 return(buf); 10579} 10580 10581/** 10582 * xmlParseVersionInfo: 10583 * @ctxt: an XML parser context 10584 * 10585 * DEPRECATED: Internal function, don't use. 10586 * 10587 * parse the XML version. 10588 * 10589 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 10590 * 10591 * [25] Eq ::= S? '=' S? 10592 * 10593 * Returns the version string, e.g. "1.0" 10594 */ 10595 10596xmlChar * 10597xmlParseVersionInfo(xmlParserCtxtPtr ctxt) { 10598 xmlChar *version = NULL; 10599 10600 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) { 10601 SKIP(7); 10602 SKIP_BLANKS; 10603 if (RAW != '=') { 10604 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 10605 return(NULL); 10606 } 10607 NEXT; 10608 SKIP_BLANKS; 10609 if (RAW == '"') { 10610 NEXT; 10611 version = xmlParseVersionNum(ctxt); 10612 if (RAW != '"') { 10613 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10614 } else 10615 NEXT; 10616 } else if (RAW == '\''){ 10617 NEXT; 10618 version = xmlParseVersionNum(ctxt); 10619 if (RAW != '\'') { 10620 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10621 } else 10622 NEXT; 10623 } else { 10624 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 10625 } 10626 } 10627 return(version); 10628} 10629 10630/** 10631 * xmlParseEncName: 10632 * @ctxt: an XML parser context 10633 * 10634 * DEPRECATED: Internal function, don't use. 10635 * 10636 * parse the XML encoding name 10637 * 10638 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 10639 * 10640 * Returns the encoding name value or NULL 10641 */ 10642xmlChar * 10643xmlParseEncName(xmlParserCtxtPtr ctxt) { 10644 xmlChar *buf = NULL; 10645 int len = 0; 10646 int size = 10; 10647 int maxLength = (ctxt->options & XML_PARSE_HUGE) ? 10648 XML_MAX_TEXT_LENGTH : 10649 XML_MAX_NAME_LENGTH; 10650 xmlChar cur; 10651 10652 cur = CUR; 10653 if (((cur >= 'a') && (cur <= 'z')) || 10654 ((cur >= 'A') && (cur <= 'Z'))) { 10655 buf = (xmlChar *) xmlMallocAtomic(size); 10656 if (buf == NULL) { 10657 xmlErrMemory(ctxt, NULL); 10658 return(NULL); 10659 } 10660 10661 buf[len++] = cur; 10662 NEXT; 10663 cur = CUR; 10664 while (((cur >= 'a') && (cur <= 'z')) || 10665 ((cur >= 'A') && (cur <= 'Z')) || 10666 ((cur >= '0') && (cur <= '9')) || 10667 (cur == '.') || (cur == '_') || 10668 (cur == '-')) { 10669 if (len + 1 >= size) { 10670 xmlChar *tmp; 10671 10672 size *= 2; 10673 tmp = (xmlChar *) xmlRealloc(buf, size); 10674 if (tmp == NULL) { 10675 xmlErrMemory(ctxt, NULL); 10676 xmlFree(buf); 10677 return(NULL); 10678 } 10679 buf = tmp; 10680 } 10681 buf[len++] = cur; 10682 if (len > maxLength) { 10683 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "EncName"); 10684 xmlFree(buf); 10685 return(NULL); 10686 } 10687 NEXT; 10688 cur = CUR; 10689 } 10690 buf[len] = 0; 10691 } else { 10692 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL); 10693 } 10694 return(buf); 10695} 10696 10697/** 10698 * xmlParseEncodingDecl: 10699 * @ctxt: an XML parser context 10700 * 10701 * DEPRECATED: Internal function, don't use. 10702 * 10703 * parse the XML encoding declaration 10704 * 10705 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'") 10706 * 10707 * this setups the conversion filters. 10708 * 10709 * Returns the encoding value or NULL 10710 */ 10711 10712const xmlChar * 10713xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { 10714 xmlChar *encoding = NULL; 10715 10716 SKIP_BLANKS; 10717 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g') == 0) 10718 return(NULL); 10719 10720 SKIP(8); 10721 SKIP_BLANKS; 10722 if (RAW != '=') { 10723 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 10724 return(NULL); 10725 } 10726 NEXT; 10727 SKIP_BLANKS; 10728 if (RAW == '"') { 10729 NEXT; 10730 encoding = xmlParseEncName(ctxt); 10731 if (RAW != '"') { 10732 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10733 xmlFree((xmlChar *) encoding); 10734 return(NULL); 10735 } else 10736 NEXT; 10737 } else if (RAW == '\''){ 10738 NEXT; 10739 encoding = xmlParseEncName(ctxt); 10740 if (RAW != '\'') { 10741 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10742 xmlFree((xmlChar *) encoding); 10743 return(NULL); 10744 } else 10745 NEXT; 10746 } else { 10747 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 10748 } 10749 10750 if (encoding == NULL) 10751 return(NULL); 10752 10753 xmlSetDeclaredEncoding(ctxt, encoding); 10754 10755 return(ctxt->encoding); 10756} 10757 10758/** 10759 * xmlParseSDDecl: 10760 * @ctxt: an XML parser context 10761 * 10762 * DEPRECATED: Internal function, don't use. 10763 * 10764 * parse the XML standalone declaration 10765 * 10766 * [32] SDDecl ::= S 'standalone' Eq 10767 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"')) 10768 * 10769 * [ VC: Standalone Document Declaration ] 10770 * TODO The standalone document declaration must have the value "no" 10771 * if any external markup declarations contain declarations of: 10772 * - attributes with default values, if elements to which these 10773 * attributes apply appear in the document without specifications 10774 * of values for these attributes, or 10775 * - entities (other than amp, lt, gt, apos, quot), if references 10776 * to those entities appear in the document, or 10777 * - attributes with values subject to normalization, where the 10778 * attribute appears in the document with a value which will change 10779 * as a result of normalization, or 10780 * - element types with element content, if white space occurs directly 10781 * within any instance of those types. 10782 * 10783 * Returns: 10784 * 1 if standalone="yes" 10785 * 0 if standalone="no" 10786 * -2 if standalone attribute is missing or invalid 10787 * (A standalone value of -2 means that the XML declaration was found, 10788 * but no value was specified for the standalone attribute). 10789 */ 10790 10791int 10792xmlParseSDDecl(xmlParserCtxtPtr ctxt) { 10793 int standalone = -2; 10794 10795 SKIP_BLANKS; 10796 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) { 10797 SKIP(10); 10798 SKIP_BLANKS; 10799 if (RAW != '=') { 10800 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 10801 return(standalone); 10802 } 10803 NEXT; 10804 SKIP_BLANKS; 10805 if (RAW == '\''){ 10806 NEXT; 10807 if ((RAW == 'n') && (NXT(1) == 'o')) { 10808 standalone = 0; 10809 SKIP(2); 10810 } else if ((RAW == 'y') && (NXT(1) == 'e') && 10811 (NXT(2) == 's')) { 10812 standalone = 1; 10813 SKIP(3); 10814 } else { 10815 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 10816 } 10817 if (RAW != '\'') { 10818 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10819 } else 10820 NEXT; 10821 } else if (RAW == '"'){ 10822 NEXT; 10823 if ((RAW == 'n') && (NXT(1) == 'o')) { 10824 standalone = 0; 10825 SKIP(2); 10826 } else if ((RAW == 'y') && (NXT(1) == 'e') && 10827 (NXT(2) == 's')) { 10828 standalone = 1; 10829 SKIP(3); 10830 } else { 10831 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 10832 } 10833 if (RAW != '"') { 10834 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10835 } else 10836 NEXT; 10837 } else { 10838 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 10839 } 10840 } 10841 return(standalone); 10842} 10843 10844/** 10845 * xmlParseXMLDecl: 10846 * @ctxt: an XML parser context 10847 * 10848 * DEPRECATED: Internal function, don't use. 10849 * 10850 * parse an XML declaration header 10851 * 10852 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 10853 */ 10854 10855void 10856xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { 10857 xmlChar *version; 10858 10859 /* 10860 * This value for standalone indicates that the document has an 10861 * XML declaration but it does not have a standalone attribute. 10862 * It will be overwritten later if a standalone attribute is found. 10863 */ 10864 10865 ctxt->standalone = -2; 10866 10867 /* 10868 * We know that '<?xml' is here. 10869 */ 10870 SKIP(5); 10871 10872 if (!IS_BLANK_CH(RAW)) { 10873 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 10874 "Blank needed after '<?xml'\n"); 10875 } 10876 SKIP_BLANKS; 10877 10878 /* 10879 * We must have the VersionInfo here. 10880 */ 10881 version = xmlParseVersionInfo(ctxt); 10882 if (version == NULL) { 10883 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL); 10884 } else { 10885 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) { 10886 /* 10887 * Changed here for XML-1.0 5th edition 10888 */ 10889 if (ctxt->options & XML_PARSE_OLD10) { 10890 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION, 10891 "Unsupported version '%s'\n", 10892 version); 10893 } else { 10894 if ((version[0] == '1') && ((version[1] == '.'))) { 10895 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION, 10896 "Unsupported version '%s'\n", 10897 version, NULL); 10898 } else { 10899 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION, 10900 "Unsupported version '%s'\n", 10901 version); 10902 } 10903 } 10904 } 10905 if (ctxt->version != NULL) 10906 xmlFree((void *) ctxt->version); 10907 ctxt->version = version; 10908 } 10909 10910 /* 10911 * We may have the encoding declaration 10912 */ 10913 if (!IS_BLANK_CH(RAW)) { 10914 if ((RAW == '?') && (NXT(1) == '>')) { 10915 SKIP(2); 10916 return; 10917 } 10918 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 10919 } 10920 xmlParseEncodingDecl(ctxt); 10921 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) || 10922 (ctxt->instate == XML_PARSER_EOF)) { 10923 /* 10924 * The XML REC instructs us to stop parsing right here 10925 */ 10926 return; 10927 } 10928 10929 /* 10930 * We may have the standalone status. 10931 */ 10932 if ((ctxt->encoding != NULL) && (!IS_BLANK_CH(RAW))) { 10933 if ((RAW == '?') && (NXT(1) == '>')) { 10934 SKIP(2); 10935 return; 10936 } 10937 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 10938 } 10939 10940 /* 10941 * We can grow the input buffer freely at that point 10942 */ 10943 GROW; 10944 10945 SKIP_BLANKS; 10946 ctxt->standalone = xmlParseSDDecl(ctxt); 10947 10948 SKIP_BLANKS; 10949 if ((RAW == '?') && (NXT(1) == '>')) { 10950 SKIP(2); 10951 } else if (RAW == '>') { 10952 /* Deprecated old WD ... */ 10953 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 10954 NEXT; 10955 } else { 10956 int c; 10957 10958 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 10959 while ((c = CUR) != 0) { 10960 NEXT; 10961 if (c == '>') 10962 break; 10963 } 10964 } 10965} 10966 10967/** 10968 * xmlParseMisc: 10969 * @ctxt: an XML parser context 10970 * 10971 * DEPRECATED: Internal function, don't use. 10972 * 10973 * parse an XML Misc* optional field. 10974 * 10975 * [27] Misc ::= Comment | PI | S 10976 */ 10977 10978void 10979xmlParseMisc(xmlParserCtxtPtr ctxt) { 10980 while (ctxt->instate != XML_PARSER_EOF) { 10981 SKIP_BLANKS; 10982 GROW; 10983 if ((RAW == '<') && (NXT(1) == '?')) { 10984 xmlParsePI(ctxt); 10985 } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) { 10986 xmlParseComment(ctxt); 10987 } else { 10988 break; 10989 } 10990 } 10991} 10992 10993/** 10994 * xmlParseDocument: 10995 * @ctxt: an XML parser context 10996 * 10997 * parse an XML document (and build a tree if using the standard SAX 10998 * interface). 10999 * 11000 * [1] document ::= prolog element Misc* 11001 * 11002 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? 11003 * 11004 * Returns 0, -1 in case of error. the parser context is augmented 11005 * as a result of the parsing. 11006 */ 11007 11008int 11009xmlParseDocument(xmlParserCtxtPtr ctxt) { 11010 xmlInitParser(); 11011 11012 if ((ctxt == NULL) || (ctxt->input == NULL)) 11013 return(-1); 11014 11015 GROW; 11016 11017 /* 11018 * SAX: detecting the level. 11019 */ 11020 xmlDetectSAX2(ctxt); 11021 11022 /* 11023 * SAX: beginning of the document processing. 11024 */ 11025 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 11026 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 11027 if (ctxt->instate == XML_PARSER_EOF) 11028 return(-1); 11029 11030 xmlDetectEncoding(ctxt); 11031 11032 if (CUR == 0) { 11033 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 11034 return(-1); 11035 } 11036 11037 GROW; 11038 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 11039 11040 /* 11041 * Note that we will switch encoding on the fly. 11042 */ 11043 xmlParseXMLDecl(ctxt); 11044 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) || 11045 (ctxt->instate == XML_PARSER_EOF)) { 11046 /* 11047 * The XML REC instructs us to stop parsing right here 11048 */ 11049 return(-1); 11050 } 11051 SKIP_BLANKS; 11052 } else { 11053 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 11054 } 11055 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 11056 ctxt->sax->startDocument(ctxt->userData); 11057 if (ctxt->instate == XML_PARSER_EOF) 11058 return(-1); 11059 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) && 11060 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) { 11061 ctxt->myDoc->compression = ctxt->input->buf->compressed; 11062 } 11063 11064 /* 11065 * The Misc part of the Prolog 11066 */ 11067 xmlParseMisc(ctxt); 11068 11069 /* 11070 * Then possibly doc type declaration(s) and more Misc 11071 * (doctypedecl Misc*)? 11072 */ 11073 GROW; 11074 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) { 11075 11076 ctxt->inSubset = 1; 11077 xmlParseDocTypeDecl(ctxt); 11078 if (RAW == '[') { 11079 ctxt->instate = XML_PARSER_DTD; 11080 xmlParseInternalSubset(ctxt); 11081 if (ctxt->instate == XML_PARSER_EOF) 11082 return(-1); 11083 } 11084 11085 /* 11086 * Create and update the external subset. 11087 */ 11088 ctxt->inSubset = 2; 11089 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) && 11090 (!ctxt->disableSAX)) 11091 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 11092 ctxt->extSubSystem, ctxt->extSubURI); 11093 if (ctxt->instate == XML_PARSER_EOF) 11094 return(-1); 11095 ctxt->inSubset = 0; 11096 11097 xmlCleanSpecialAttr(ctxt); 11098 11099 ctxt->instate = XML_PARSER_PROLOG; 11100 xmlParseMisc(ctxt); 11101 } 11102 11103 /* 11104 * Time to start parsing the tree itself 11105 */ 11106 GROW; 11107 if (RAW != '<') { 11108 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY, 11109 "Start tag expected, '<' not found\n"); 11110 } else { 11111 ctxt->instate = XML_PARSER_CONTENT; 11112 xmlParseElement(ctxt); 11113 ctxt->instate = XML_PARSER_EPILOG; 11114 11115 11116 /* 11117 * The Misc part at the end 11118 */ 11119 xmlParseMisc(ctxt); 11120 11121 if (ctxt->input->cur < ctxt->input->end) { 11122 if (ctxt->errNo == XML_ERR_OK) 11123 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 11124 } else if ((ctxt->input->buf != NULL) && 11125 (ctxt->input->buf->encoder != NULL) && 11126 (!xmlBufIsEmpty(ctxt->input->buf->raw))) { 11127 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, 11128 "Truncated multi-byte sequence at EOF\n"); 11129 } 11130 ctxt->instate = XML_PARSER_EOF; 11131 } 11132 11133 /* 11134 * SAX: end of the document processing. 11135 */ 11136 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11137 ctxt->sax->endDocument(ctxt->userData); 11138 11139 /* 11140 * Remove locally kept entity definitions if the tree was not built 11141 */ 11142 if ((ctxt->myDoc != NULL) && 11143 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 11144 xmlFreeDoc(ctxt->myDoc); 11145 ctxt->myDoc = NULL; 11146 } 11147 11148 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) { 11149 ctxt->myDoc->properties |= XML_DOC_WELLFORMED; 11150 if (ctxt->valid) 11151 ctxt->myDoc->properties |= XML_DOC_DTDVALID; 11152 if (ctxt->nsWellFormed) 11153 ctxt->myDoc->properties |= XML_DOC_NSVALID; 11154 if (ctxt->options & XML_PARSE_OLD10) 11155 ctxt->myDoc->properties |= XML_DOC_OLD10; 11156 } 11157 if (! ctxt->wellFormed) { 11158 ctxt->valid = 0; 11159 return(-1); 11160 } 11161 return(0); 11162} 11163 11164/** 11165 * xmlParseExtParsedEnt: 11166 * @ctxt: an XML parser context 11167 * 11168 * parse a general parsed entity 11169 * An external general parsed entity is well-formed if it matches the 11170 * production labeled extParsedEnt. 11171 * 11172 * [78] extParsedEnt ::= TextDecl? content 11173 * 11174 * Returns 0, -1 in case of error. the parser context is augmented 11175 * as a result of the parsing. 11176 */ 11177 11178int 11179xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) { 11180 if ((ctxt == NULL) || (ctxt->input == NULL)) 11181 return(-1); 11182 11183 xmlDetectSAX2(ctxt); 11184 11185 /* 11186 * SAX: beginning of the document processing. 11187 */ 11188 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 11189 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 11190 11191 xmlDetectEncoding(ctxt); 11192 11193 if (CUR == 0) { 11194 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 11195 } 11196 11197 /* 11198 * Check for the XMLDecl in the Prolog. 11199 */ 11200 GROW; 11201 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 11202 11203 /* 11204 * Note that we will switch encoding on the fly. 11205 */ 11206 xmlParseXMLDecl(ctxt); 11207 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 11208 /* 11209 * The XML REC instructs us to stop parsing right here 11210 */ 11211 return(-1); 11212 } 11213 SKIP_BLANKS; 11214 } else { 11215 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 11216 } 11217 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 11218 ctxt->sax->startDocument(ctxt->userData); 11219 if (ctxt->instate == XML_PARSER_EOF) 11220 return(-1); 11221 11222 /* 11223 * Doing validity checking on chunk doesn't make sense 11224 */ 11225 ctxt->instate = XML_PARSER_CONTENT; 11226 ctxt->validate = 0; 11227 ctxt->loadsubset = 0; 11228 ctxt->depth = 0; 11229 11230 xmlParseContent(ctxt); 11231 if (ctxt->instate == XML_PARSER_EOF) 11232 return(-1); 11233 11234 if ((RAW == '<') && (NXT(1) == '/')) { 11235 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 11236 } else if (RAW != 0) { 11237 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 11238 } 11239 11240 /* 11241 * SAX: end of the document processing. 11242 */ 11243 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11244 ctxt->sax->endDocument(ctxt->userData); 11245 11246 if (! ctxt->wellFormed) return(-1); 11247 return(0); 11248} 11249 11250#ifdef LIBXML_PUSH_ENABLED 11251/************************************************************************ 11252 * * 11253 * Progressive parsing interfaces * 11254 * * 11255 ************************************************************************/ 11256 11257/** 11258 * xmlParseLookupChar: 11259 * @ctxt: an XML parser context 11260 * @c: character 11261 * 11262 * Check whether the input buffer contains a character. 11263 */ 11264static int 11265xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) { 11266 const xmlChar *cur; 11267 11268 if (ctxt->checkIndex == 0) { 11269 cur = ctxt->input->cur + 1; 11270 } else { 11271 cur = ctxt->input->cur + ctxt->checkIndex; 11272 } 11273 11274 if (memchr(cur, c, ctxt->input->end - cur) == NULL) { 11275 size_t index = ctxt->input->end - ctxt->input->cur; 11276 11277 if (index > LONG_MAX) { 11278 ctxt->checkIndex = 0; 11279 return(1); 11280 } 11281 ctxt->checkIndex = index; 11282 return(0); 11283 } else { 11284 ctxt->checkIndex = 0; 11285 return(1); 11286 } 11287} 11288 11289/** 11290 * xmlParseLookupString: 11291 * @ctxt: an XML parser context 11292 * @startDelta: delta to apply at the start 11293 * @str: string 11294 * @strLen: length of string 11295 * 11296 * Check whether the input buffer contains a string. 11297 */ 11298static const xmlChar * 11299xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta, 11300 const char *str, size_t strLen) { 11301 const xmlChar *cur, *term; 11302 11303 if (ctxt->checkIndex == 0) { 11304 cur = ctxt->input->cur + startDelta; 11305 } else { 11306 cur = ctxt->input->cur + ctxt->checkIndex; 11307 } 11308 11309 term = BAD_CAST strstr((const char *) cur, str); 11310 if (term == NULL) { 11311 const xmlChar *end = ctxt->input->end; 11312 size_t index; 11313 11314 /* Rescan (strLen - 1) characters. */ 11315 if ((size_t) (end - cur) < strLen) 11316 end = cur; 11317 else 11318 end -= strLen - 1; 11319 index = end - ctxt->input->cur; 11320 if (index > LONG_MAX) { 11321 ctxt->checkIndex = 0; 11322 return(ctxt->input->end - strLen); 11323 } 11324 ctxt->checkIndex = index; 11325 } else { 11326 ctxt->checkIndex = 0; 11327 } 11328 11329 return(term); 11330} 11331 11332/** 11333 * xmlParseLookupCharData: 11334 * @ctxt: an XML parser context 11335 * 11336 * Check whether the input buffer contains terminated char data. 11337 */ 11338static int 11339xmlParseLookupCharData(xmlParserCtxtPtr ctxt) { 11340 const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex; 11341 const xmlChar *end = ctxt->input->end; 11342 size_t index; 11343 11344 while (cur < end) { 11345 if ((*cur == '<') || (*cur == '&')) { 11346 ctxt->checkIndex = 0; 11347 return(1); 11348 } 11349 cur++; 11350 } 11351 11352 index = cur - ctxt->input->cur; 11353 if (index > LONG_MAX) { 11354 ctxt->checkIndex = 0; 11355 return(1); 11356 } 11357 ctxt->checkIndex = index; 11358 return(0); 11359} 11360 11361/** 11362 * xmlParseLookupGt: 11363 * @ctxt: an XML parser context 11364 * 11365 * Check whether there's enough data in the input buffer to finish parsing 11366 * a start tag. This has to take quotes into account. 11367 */ 11368static int 11369xmlParseLookupGt(xmlParserCtxtPtr ctxt) { 11370 const xmlChar *cur; 11371 const xmlChar *end = ctxt->input->end; 11372 int state = ctxt->endCheckState; 11373 size_t index; 11374 11375 if (ctxt->checkIndex == 0) 11376 cur = ctxt->input->cur + 1; 11377 else 11378 cur = ctxt->input->cur + ctxt->checkIndex; 11379 11380 while (cur < end) { 11381 if (state) { 11382 if (*cur == state) 11383 state = 0; 11384 } else if (*cur == '\'' || *cur == '"') { 11385 state = *cur; 11386 } else if (*cur == '>') { 11387 ctxt->checkIndex = 0; 11388 ctxt->endCheckState = 0; 11389 return(1); 11390 } 11391 cur++; 11392 } 11393 11394 index = cur - ctxt->input->cur; 11395 if (index > LONG_MAX) { 11396 ctxt->checkIndex = 0; 11397 ctxt->endCheckState = 0; 11398 return(1); 11399 } 11400 ctxt->checkIndex = index; 11401 ctxt->endCheckState = state; 11402 return(0); 11403} 11404 11405/** 11406 * xmlParseLookupInternalSubset: 11407 * @ctxt: an XML parser context 11408 * 11409 * Check whether there's enough data in the input buffer to finish parsing 11410 * the internal subset. 11411 */ 11412static int 11413xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) { 11414 /* 11415 * Sorry, but progressive parsing of the internal subset is not 11416 * supported. We first check that the full content of the internal 11417 * subset is available and parsing is launched only at that point. 11418 * Internal subset ends with "']' S? '>'" in an unescaped section and 11419 * not in a ']]>' sequence which are conditional sections. 11420 */ 11421 const xmlChar *cur, *start; 11422 const xmlChar *end = ctxt->input->end; 11423 int state = ctxt->endCheckState; 11424 size_t index; 11425 11426 if (ctxt->checkIndex == 0) { 11427 cur = ctxt->input->cur + 1; 11428 } else { 11429 cur = ctxt->input->cur + ctxt->checkIndex; 11430 } 11431 start = cur; 11432 11433 while (cur < end) { 11434 if (state == '-') { 11435 if ((*cur == '-') && 11436 (cur[1] == '-') && 11437 (cur[2] == '>')) { 11438 state = 0; 11439 cur += 3; 11440 start = cur; 11441 continue; 11442 } 11443 } 11444 else if (state == ']') { 11445 if (*cur == '>') { 11446 ctxt->checkIndex = 0; 11447 ctxt->endCheckState = 0; 11448 return(1); 11449 } 11450 if (IS_BLANK_CH(*cur)) { 11451 state = ' '; 11452 } else if (*cur != ']') { 11453 state = 0; 11454 start = cur; 11455 continue; 11456 } 11457 } 11458 else if (state == ' ') { 11459 if (*cur == '>') { 11460 ctxt->checkIndex = 0; 11461 ctxt->endCheckState = 0; 11462 return(1); 11463 } 11464 if (!IS_BLANK_CH(*cur)) { 11465 state = 0; 11466 start = cur; 11467 continue; 11468 } 11469 } 11470 else if (state != 0) { 11471 if (*cur == state) { 11472 state = 0; 11473 start = cur + 1; 11474 } 11475 } 11476 else if (*cur == '<') { 11477 if ((cur[1] == '!') && 11478 (cur[2] == '-') && 11479 (cur[3] == '-')) { 11480 state = '-'; 11481 cur += 4; 11482 /* Don't treat <!--> as comment */ 11483 start = cur; 11484 continue; 11485 } 11486 } 11487 else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) { 11488 state = *cur; 11489 } 11490 11491 cur++; 11492 } 11493 11494 /* 11495 * Rescan the three last characters to detect "<!--" and "-->" 11496 * split across chunks. 11497 */ 11498 if ((state == 0) || (state == '-')) { 11499 if (cur - start < 3) 11500 cur = start; 11501 else 11502 cur -= 3; 11503 } 11504 index = cur - ctxt->input->cur; 11505 if (index > LONG_MAX) { 11506 ctxt->checkIndex = 0; 11507 ctxt->endCheckState = 0; 11508 return(1); 11509 } 11510 ctxt->checkIndex = index; 11511 ctxt->endCheckState = state; 11512 return(0); 11513} 11514 11515/** 11516 * xmlCheckCdataPush: 11517 * @cur: pointer to the block of characters 11518 * @len: length of the block in bytes 11519 * @complete: 1 if complete CDATA block is passed in, 0 if partial block 11520 * 11521 * Check that the block of characters is okay as SCdata content [20] 11522 * 11523 * Returns the number of bytes to pass if okay, a negative index where an 11524 * UTF-8 error occurred otherwise 11525 */ 11526static int 11527xmlCheckCdataPush(const xmlChar *utf, int len, int complete) { 11528 int ix; 11529 unsigned char c; 11530 int codepoint; 11531 11532 if ((utf == NULL) || (len <= 0)) 11533 return(0); 11534 11535 for (ix = 0; ix < len;) { /* string is 0-terminated */ 11536 c = utf[ix]; 11537 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */ 11538 if (c >= 0x20) 11539 ix++; 11540 else if ((c == 0xA) || (c == 0xD) || (c == 0x9)) 11541 ix++; 11542 else 11543 return(-ix); 11544 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */ 11545 if (ix + 2 > len) return(complete ? -ix : ix); 11546 if ((utf[ix+1] & 0xc0 ) != 0x80) 11547 return(-ix); 11548 codepoint = (utf[ix] & 0x1f) << 6; 11549 codepoint |= utf[ix+1] & 0x3f; 11550 if (!xmlIsCharQ(codepoint)) 11551 return(-ix); 11552 ix += 2; 11553 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */ 11554 if (ix + 3 > len) return(complete ? -ix : ix); 11555 if (((utf[ix+1] & 0xc0) != 0x80) || 11556 ((utf[ix+2] & 0xc0) != 0x80)) 11557 return(-ix); 11558 codepoint = (utf[ix] & 0xf) << 12; 11559 codepoint |= (utf[ix+1] & 0x3f) << 6; 11560 codepoint |= utf[ix+2] & 0x3f; 11561 if (!xmlIsCharQ(codepoint)) 11562 return(-ix); 11563 ix += 3; 11564 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */ 11565 if (ix + 4 > len) return(complete ? -ix : ix); 11566 if (((utf[ix+1] & 0xc0) != 0x80) || 11567 ((utf[ix+2] & 0xc0) != 0x80) || 11568 ((utf[ix+3] & 0xc0) != 0x80)) 11569 return(-ix); 11570 codepoint = (utf[ix] & 0x7) << 18; 11571 codepoint |= (utf[ix+1] & 0x3f) << 12; 11572 codepoint |= (utf[ix+2] & 0x3f) << 6; 11573 codepoint |= utf[ix+3] & 0x3f; 11574 if (!xmlIsCharQ(codepoint)) 11575 return(-ix); 11576 ix += 4; 11577 } else /* unknown encoding */ 11578 return(-ix); 11579 } 11580 return(ix); 11581} 11582 11583/** 11584 * xmlParseTryOrFinish: 11585 * @ctxt: an XML parser context 11586 * @terminate: last chunk indicator 11587 * 11588 * Try to progress on parsing 11589 * 11590 * Returns zero if no parsing was possible 11591 */ 11592static int 11593xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { 11594 int ret = 0; 11595 size_t avail; 11596 xmlChar cur, next; 11597 11598 if (ctxt->input == NULL) 11599 return(0); 11600 11601 if ((ctxt->input != NULL) && 11602 (ctxt->input->cur - ctxt->input->base > 4096)) { 11603 xmlParserShrink(ctxt); 11604 } 11605 11606 while (ctxt->instate != XML_PARSER_EOF) { 11607 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 11608 return(0); 11609 11610 avail = ctxt->input->end - ctxt->input->cur; 11611 if (avail < 1) 11612 goto done; 11613 switch (ctxt->instate) { 11614 case XML_PARSER_EOF: 11615 /* 11616 * Document parsing is done ! 11617 */ 11618 goto done; 11619 case XML_PARSER_START: 11620 /* 11621 * Very first chars read from the document flow. 11622 */ 11623 if ((!terminate) && (avail < 4)) 11624 goto done; 11625 11626 /* 11627 * We need more bytes to detect EBCDIC code pages. 11628 * See xmlDetectEBCDIC. 11629 */ 11630 if ((CMP4(CUR_PTR, 0x4C, 0x6F, 0xA7, 0x94)) && 11631 (!terminate) && (avail < 200)) 11632 goto done; 11633 11634 xmlDetectEncoding(ctxt); 11635 if (ctxt->instate == XML_PARSER_EOF) 11636 goto done; 11637 ctxt->instate = XML_PARSER_XML_DECL; 11638 break; 11639 11640 case XML_PARSER_XML_DECL: 11641 if ((!terminate) && (avail < 2)) 11642 goto done; 11643 cur = ctxt->input->cur[0]; 11644 next = ctxt->input->cur[1]; 11645 if ((cur == '<') && (next == '?')) { 11646 /* PI or XML decl */ 11647 if ((!terminate) && 11648 (!xmlParseLookupString(ctxt, 2, "?>", 2))) 11649 goto done; 11650 if ((ctxt->input->cur[2] == 'x') && 11651 (ctxt->input->cur[3] == 'm') && 11652 (ctxt->input->cur[4] == 'l') && 11653 (IS_BLANK_CH(ctxt->input->cur[5]))) { 11654 ret += 5; 11655 xmlParseXMLDecl(ctxt); 11656 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 11657 /* 11658 * The XML REC instructs us to stop parsing right 11659 * here 11660 */ 11661 xmlHaltParser(ctxt); 11662 return(0); 11663 } 11664 } else { 11665 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 11666 } 11667 } else { 11668 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 11669 if (ctxt->version == NULL) { 11670 xmlErrMemory(ctxt, NULL); 11671 break; 11672 } 11673 } 11674 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 11675 ctxt->sax->setDocumentLocator(ctxt->userData, 11676 &xmlDefaultSAXLocator); 11677 if ((ctxt->sax) && (ctxt->sax->startDocument) && 11678 (!ctxt->disableSAX)) 11679 ctxt->sax->startDocument(ctxt->userData); 11680 if (ctxt->instate == XML_PARSER_EOF) 11681 goto done; 11682 ctxt->instate = XML_PARSER_MISC; 11683 break; 11684 case XML_PARSER_START_TAG: { 11685 const xmlChar *name; 11686 const xmlChar *prefix = NULL; 11687 const xmlChar *URI = NULL; 11688 int line = ctxt->input->line; 11689 int nbNs = 0; 11690 11691 if ((!terminate) && (avail < 2)) 11692 goto done; 11693 cur = ctxt->input->cur[0]; 11694 if (cur != '<') { 11695 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY, 11696 "Start tag expected, '<' not found"); 11697 xmlHaltParser(ctxt); 11698 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11699 ctxt->sax->endDocument(ctxt->userData); 11700 goto done; 11701 } 11702 if ((!terminate) && (!xmlParseLookupGt(ctxt))) 11703 goto done; 11704 if (ctxt->spaceNr == 0) 11705 spacePush(ctxt, -1); 11706 else if (*ctxt->space == -2) 11707 spacePush(ctxt, -1); 11708 else 11709 spacePush(ctxt, *ctxt->space); 11710#ifdef LIBXML_SAX1_ENABLED 11711 if (ctxt->sax2) 11712#endif /* LIBXML_SAX1_ENABLED */ 11713 name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs); 11714#ifdef LIBXML_SAX1_ENABLED 11715 else 11716 name = xmlParseStartTag(ctxt); 11717#endif /* LIBXML_SAX1_ENABLED */ 11718 if (ctxt->instate == XML_PARSER_EOF) 11719 goto done; 11720 if (name == NULL) { 11721 spacePop(ctxt); 11722 xmlHaltParser(ctxt); 11723 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11724 ctxt->sax->endDocument(ctxt->userData); 11725 goto done; 11726 } 11727#ifdef LIBXML_VALID_ENABLED 11728 /* 11729 * [ VC: Root Element Type ] 11730 * The Name in the document type declaration must match 11731 * the element type of the root element. 11732 */ 11733 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 11734 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 11735 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 11736#endif /* LIBXML_VALID_ENABLED */ 11737 11738 /* 11739 * Check for an Empty Element. 11740 */ 11741 if ((RAW == '/') && (NXT(1) == '>')) { 11742 SKIP(2); 11743 11744 if (ctxt->sax2) { 11745 if ((ctxt->sax != NULL) && 11746 (ctxt->sax->endElementNs != NULL) && 11747 (!ctxt->disableSAX)) 11748 ctxt->sax->endElementNs(ctxt->userData, name, 11749 prefix, URI); 11750 if (nbNs > 0) 11751 xmlParserNsPop(ctxt, nbNs); 11752#ifdef LIBXML_SAX1_ENABLED 11753 } else { 11754 if ((ctxt->sax != NULL) && 11755 (ctxt->sax->endElement != NULL) && 11756 (!ctxt->disableSAX)) 11757 ctxt->sax->endElement(ctxt->userData, name); 11758#endif /* LIBXML_SAX1_ENABLED */ 11759 } 11760 spacePop(ctxt); 11761 } else if (RAW == '>') { 11762 NEXT; 11763 nameNsPush(ctxt, name, prefix, URI, line, nbNs); 11764 } else { 11765 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED, 11766 "Couldn't find end of Start Tag %s\n", 11767 name); 11768 nodePop(ctxt); 11769 spacePop(ctxt); 11770 if (nbNs > 0) 11771 xmlParserNsPop(ctxt, nbNs); 11772 } 11773 11774 if (ctxt->instate == XML_PARSER_EOF) 11775 goto done; 11776 if (ctxt->nameNr == 0) 11777 ctxt->instate = XML_PARSER_EPILOG; 11778 else 11779 ctxt->instate = XML_PARSER_CONTENT; 11780 break; 11781 } 11782 case XML_PARSER_CONTENT: { 11783 cur = ctxt->input->cur[0]; 11784 11785 if (cur == '<') { 11786 if ((!terminate) && (avail < 2)) 11787 goto done; 11788 next = ctxt->input->cur[1]; 11789 11790 if (next == '/') { 11791 ctxt->instate = XML_PARSER_END_TAG; 11792 break; 11793 } else if (next == '?') { 11794 if ((!terminate) && 11795 (!xmlParseLookupString(ctxt, 2, "?>", 2))) 11796 goto done; 11797 xmlParsePI(ctxt); 11798 if (ctxt->instate == XML_PARSER_EOF) 11799 goto done; 11800 ctxt->instate = XML_PARSER_CONTENT; 11801 break; 11802 } else if (next == '!') { 11803 if ((!terminate) && (avail < 3)) 11804 goto done; 11805 next = ctxt->input->cur[2]; 11806 11807 if (next == '-') { 11808 if ((!terminate) && (avail < 4)) 11809 goto done; 11810 if (ctxt->input->cur[3] == '-') { 11811 if ((!terminate) && 11812 (!xmlParseLookupString(ctxt, 4, "-->", 3))) 11813 goto done; 11814 xmlParseComment(ctxt); 11815 if (ctxt->instate == XML_PARSER_EOF) 11816 goto done; 11817 ctxt->instate = XML_PARSER_CONTENT; 11818 break; 11819 } 11820 } else if (next == '[') { 11821 if ((!terminate) && (avail < 9)) 11822 goto done; 11823 if ((ctxt->input->cur[2] == '[') && 11824 (ctxt->input->cur[3] == 'C') && 11825 (ctxt->input->cur[4] == 'D') && 11826 (ctxt->input->cur[5] == 'A') && 11827 (ctxt->input->cur[6] == 'T') && 11828 (ctxt->input->cur[7] == 'A') && 11829 (ctxt->input->cur[8] == '[')) { 11830 SKIP(9); 11831 ctxt->instate = XML_PARSER_CDATA_SECTION; 11832 break; 11833 } 11834 } 11835 } 11836 } else if (cur == '&') { 11837 if ((!terminate) && (!xmlParseLookupChar(ctxt, ';'))) 11838 goto done; 11839 xmlParseReference(ctxt); 11840 break; 11841 } else { 11842 /* TODO Avoid the extra copy, handle directly !!! */ 11843 /* 11844 * Goal of the following test is: 11845 * - minimize calls to the SAX 'character' callback 11846 * when they are mergeable 11847 * - handle an problem for isBlank when we only parse 11848 * a sequence of blank chars and the next one is 11849 * not available to check against '<' presence. 11850 * - tries to homogenize the differences in SAX 11851 * callbacks between the push and pull versions 11852 * of the parser. 11853 */ 11854 if (avail < XML_PARSER_BIG_BUFFER_SIZE) { 11855 if ((!terminate) && (!xmlParseLookupCharData(ctxt))) 11856 goto done; 11857 } 11858 ctxt->checkIndex = 0; 11859 xmlParseCharDataInternal(ctxt, !terminate); 11860 break; 11861 } 11862 11863 ctxt->instate = XML_PARSER_START_TAG; 11864 break; 11865 } 11866 case XML_PARSER_END_TAG: 11867 if ((!terminate) && (!xmlParseLookupChar(ctxt, '>'))) 11868 goto done; 11869 if (ctxt->sax2) { 11870 xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]); 11871 nameNsPop(ctxt); 11872 } 11873#ifdef LIBXML_SAX1_ENABLED 11874 else 11875 xmlParseEndTag1(ctxt, 0); 11876#endif /* LIBXML_SAX1_ENABLED */ 11877 if (ctxt->instate == XML_PARSER_EOF) 11878 goto done; 11879 if (ctxt->nameNr == 0) { 11880 ctxt->instate = XML_PARSER_EPILOG; 11881 } else { 11882 ctxt->instate = XML_PARSER_CONTENT; 11883 } 11884 break; 11885 case XML_PARSER_CDATA_SECTION: { 11886 /* 11887 * The Push mode need to have the SAX callback for 11888 * cdataBlock merge back contiguous callbacks. 11889 */ 11890 const xmlChar *term; 11891 11892 if (terminate) { 11893 /* 11894 * Don't call xmlParseLookupString. If 'terminate' 11895 * is set, checkIndex is invalid. 11896 */ 11897 term = BAD_CAST strstr((const char *) ctxt->input->cur, 11898 "]]>"); 11899 } else { 11900 term = xmlParseLookupString(ctxt, 0, "]]>", 3); 11901 } 11902 11903 if (term == NULL) { 11904 int tmp, size; 11905 11906 if (terminate) { 11907 /* Unfinished CDATA section */ 11908 size = ctxt->input->end - ctxt->input->cur; 11909 } else { 11910 if (avail < XML_PARSER_BIG_BUFFER_SIZE + 2) 11911 goto done; 11912 ctxt->checkIndex = 0; 11913 /* XXX: Why don't we pass the full buffer? */ 11914 size = XML_PARSER_BIG_BUFFER_SIZE; 11915 } 11916 tmp = xmlCheckCdataPush(ctxt->input->cur, size, 0); 11917 if (tmp <= 0) { 11918 tmp = -tmp; 11919 ctxt->input->cur += tmp; 11920 goto encoding_error; 11921 } 11922 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 11923 if (ctxt->sax->cdataBlock != NULL) 11924 ctxt->sax->cdataBlock(ctxt->userData, 11925 ctxt->input->cur, tmp); 11926 else if (ctxt->sax->characters != NULL) 11927 ctxt->sax->characters(ctxt->userData, 11928 ctxt->input->cur, tmp); 11929 } 11930 if (ctxt->instate == XML_PARSER_EOF) 11931 goto done; 11932 SKIPL(tmp); 11933 } else { 11934 int base = term - CUR_PTR; 11935 int tmp; 11936 11937 tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1); 11938 if ((tmp < 0) || (tmp != base)) { 11939 tmp = -tmp; 11940 ctxt->input->cur += tmp; 11941 goto encoding_error; 11942 } 11943 if ((ctxt->sax != NULL) && (base == 0) && 11944 (ctxt->sax->cdataBlock != NULL) && 11945 (!ctxt->disableSAX)) { 11946 /* 11947 * Special case to provide identical behaviour 11948 * between pull and push parsers on enpty CDATA 11949 * sections 11950 */ 11951 if ((ctxt->input->cur - ctxt->input->base >= 9) && 11952 (!strncmp((const char *)&ctxt->input->cur[-9], 11953 "<![CDATA[", 9))) 11954 ctxt->sax->cdataBlock(ctxt->userData, 11955 BAD_CAST "", 0); 11956 } else if ((ctxt->sax != NULL) && (base > 0) && 11957 (!ctxt->disableSAX)) { 11958 if (ctxt->sax->cdataBlock != NULL) 11959 ctxt->sax->cdataBlock(ctxt->userData, 11960 ctxt->input->cur, base); 11961 else if (ctxt->sax->characters != NULL) 11962 ctxt->sax->characters(ctxt->userData, 11963 ctxt->input->cur, base); 11964 } 11965 if (ctxt->instate == XML_PARSER_EOF) 11966 goto done; 11967 SKIPL(base + 3); 11968 ctxt->instate = XML_PARSER_CONTENT; 11969 } 11970 break; 11971 } 11972 case XML_PARSER_MISC: 11973 case XML_PARSER_PROLOG: 11974 case XML_PARSER_EPILOG: 11975 SKIP_BLANKS; 11976 avail = ctxt->input->end - ctxt->input->cur; 11977 if (avail < 1) 11978 goto done; 11979 if (ctxt->input->cur[0] == '<') { 11980 if ((!terminate) && (avail < 2)) 11981 goto done; 11982 next = ctxt->input->cur[1]; 11983 if (next == '?') { 11984 if ((!terminate) && 11985 (!xmlParseLookupString(ctxt, 2, "?>", 2))) 11986 goto done; 11987 xmlParsePI(ctxt); 11988 if (ctxt->instate == XML_PARSER_EOF) 11989 goto done; 11990 break; 11991 } else if (next == '!') { 11992 if ((!terminate) && (avail < 3)) 11993 goto done; 11994 11995 if (ctxt->input->cur[2] == '-') { 11996 if ((!terminate) && (avail < 4)) 11997 goto done; 11998 if (ctxt->input->cur[3] == '-') { 11999 if ((!terminate) && 12000 (!xmlParseLookupString(ctxt, 4, "-->", 3))) 12001 goto done; 12002 xmlParseComment(ctxt); 12003 if (ctxt->instate == XML_PARSER_EOF) 12004 goto done; 12005 break; 12006 } 12007 } else if (ctxt->instate == XML_PARSER_MISC) { 12008 if ((!terminate) && (avail < 9)) 12009 goto done; 12010 if ((ctxt->input->cur[2] == 'D') && 12011 (ctxt->input->cur[3] == 'O') && 12012 (ctxt->input->cur[4] == 'C') && 12013 (ctxt->input->cur[5] == 'T') && 12014 (ctxt->input->cur[6] == 'Y') && 12015 (ctxt->input->cur[7] == 'P') && 12016 (ctxt->input->cur[8] == 'E')) { 12017 if ((!terminate) && (!xmlParseLookupGt(ctxt))) 12018 goto done; 12019 ctxt->inSubset = 1; 12020 xmlParseDocTypeDecl(ctxt); 12021 if (ctxt->instate == XML_PARSER_EOF) 12022 goto done; 12023 if (RAW == '[') { 12024 ctxt->instate = XML_PARSER_DTD; 12025 } else { 12026 /* 12027 * Create and update the external subset. 12028 */ 12029 ctxt->inSubset = 2; 12030 if ((ctxt->sax != NULL) && 12031 (!ctxt->disableSAX) && 12032 (ctxt->sax->externalSubset != NULL)) 12033 ctxt->sax->externalSubset( 12034 ctxt->userData, 12035 ctxt->intSubName, 12036 ctxt->extSubSystem, 12037 ctxt->extSubURI); 12038 ctxt->inSubset = 0; 12039 xmlCleanSpecialAttr(ctxt); 12040 if (ctxt->instate == XML_PARSER_EOF) 12041 goto done; 12042 ctxt->instate = XML_PARSER_PROLOG; 12043 } 12044 break; 12045 } 12046 } 12047 } 12048 } 12049 12050 if (ctxt->instate == XML_PARSER_EPILOG) { 12051 if (ctxt->errNo == XML_ERR_OK) 12052 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 12053 ctxt->instate = XML_PARSER_EOF; 12054 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 12055 ctxt->sax->endDocument(ctxt->userData); 12056 } else { 12057 ctxt->instate = XML_PARSER_START_TAG; 12058 } 12059 break; 12060 case XML_PARSER_DTD: { 12061 if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt))) 12062 goto done; 12063 xmlParseInternalSubset(ctxt); 12064 if (ctxt->instate == XML_PARSER_EOF) 12065 goto done; 12066 ctxt->inSubset = 2; 12067 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 12068 (ctxt->sax->externalSubset != NULL)) 12069 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 12070 ctxt->extSubSystem, ctxt->extSubURI); 12071 ctxt->inSubset = 0; 12072 xmlCleanSpecialAttr(ctxt); 12073 if (ctxt->instate == XML_PARSER_EOF) 12074 goto done; 12075 ctxt->instate = XML_PARSER_PROLOG; 12076 break; 12077 } 12078 default: 12079 xmlGenericError(xmlGenericErrorContext, 12080 "PP: internal error\n"); 12081 ctxt->instate = XML_PARSER_EOF; 12082 break; 12083 } 12084 } 12085done: 12086 return(ret); 12087encoding_error: 12088 if (ctxt->input->end - ctxt->input->cur < 4) { 12089 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 12090 "Input is not proper UTF-8, indicate encoding !\n", 12091 NULL, NULL); 12092 } else { 12093 char buffer[150]; 12094 12095 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 12096 ctxt->input->cur[0], ctxt->input->cur[1], 12097 ctxt->input->cur[2], ctxt->input->cur[3]); 12098 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 12099 "Input is not proper UTF-8, indicate encoding !\n%s", 12100 BAD_CAST buffer, NULL); 12101 } 12102 return(0); 12103} 12104 12105/** 12106 * xmlParseChunk: 12107 * @ctxt: an XML parser context 12108 * @chunk: an char array 12109 * @size: the size in byte of the chunk 12110 * @terminate: last chunk indicator 12111 * 12112 * Parse a Chunk of memory 12113 * 12114 * Returns zero if no error, the xmlParserErrors otherwise. 12115 */ 12116int 12117xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, 12118 int terminate) { 12119 int end_in_lf = 0; 12120 12121 if (ctxt == NULL) 12122 return(XML_ERR_INTERNAL_ERROR); 12123 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 12124 return(ctxt->errNo); 12125 if (ctxt->instate == XML_PARSER_EOF) 12126 return(-1); 12127 if (ctxt->input == NULL) 12128 return(-1); 12129 12130 ctxt->progressive = 1; 12131 if (ctxt->instate == XML_PARSER_START) 12132 xmlDetectSAX2(ctxt); 12133 if ((size > 0) && (chunk != NULL) && (!terminate) && 12134 (chunk[size - 1] == '\r')) { 12135 end_in_lf = 1; 12136 size--; 12137 } 12138 12139 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 12140 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { 12141 size_t pos = ctxt->input->cur - ctxt->input->base; 12142 int res; 12143 12144 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 12145 xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos); 12146 if (res < 0) { 12147 xmlFatalErr(ctxt, ctxt->input->buf->error, NULL); 12148 xmlHaltParser(ctxt); 12149 return(ctxt->errNo); 12150 } 12151 } 12152 12153 xmlParseTryOrFinish(ctxt, terminate); 12154 if (ctxt->instate == XML_PARSER_EOF) 12155 return(ctxt->errNo); 12156 12157 if ((ctxt->input != NULL) && 12158 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) || 12159 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) && 12160 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 12161 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup"); 12162 xmlHaltParser(ctxt); 12163 } 12164 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 12165 return(ctxt->errNo); 12166 12167 if ((end_in_lf == 1) && (ctxt->input != NULL) && 12168 (ctxt->input->buf != NULL)) { 12169 size_t pos = ctxt->input->cur - ctxt->input->base; 12170 int res; 12171 12172 res = xmlParserInputBufferPush(ctxt->input->buf, 1, "\r"); 12173 xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos); 12174 if (res < 0) { 12175 xmlFatalErr(ctxt, ctxt->input->buf->error, NULL); 12176 xmlHaltParser(ctxt); 12177 return(ctxt->errNo); 12178 } 12179 } 12180 if (terminate) { 12181 /* 12182 * Check for termination 12183 */ 12184 if ((ctxt->instate != XML_PARSER_EOF) && 12185 (ctxt->instate != XML_PARSER_EPILOG)) { 12186 if (ctxt->nameNr > 0) { 12187 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1]; 12188 int line = ctxt->pushTab[ctxt->nameNr - 1].line; 12189 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED, 12190 "Premature end of data in tag %s line %d\n", 12191 name, line, NULL); 12192 } else if (ctxt->instate == XML_PARSER_START) { 12193 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 12194 } else { 12195 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY, 12196 "Start tag expected, '<' not found\n"); 12197 } 12198 } else if ((ctxt->input->buf != NULL) && 12199 (ctxt->input->buf->encoder != NULL) && 12200 (!xmlBufIsEmpty(ctxt->input->buf->raw))) { 12201 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, 12202 "Truncated multi-byte sequence at EOF\n"); 12203 } 12204 if (ctxt->instate != XML_PARSER_EOF) { 12205 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 12206 ctxt->sax->endDocument(ctxt->userData); 12207 } 12208 ctxt->instate = XML_PARSER_EOF; 12209 } 12210 if (ctxt->wellFormed == 0) 12211 return((xmlParserErrors) ctxt->errNo); 12212 else 12213 return(0); 12214} 12215 12216/************************************************************************ 12217 * * 12218 * I/O front end functions to the parser * 12219 * * 12220 ************************************************************************/ 12221 12222/** 12223 * xmlCreatePushParserCtxt: 12224 * @sax: a SAX handler 12225 * @user_data: The user data returned on SAX callbacks 12226 * @chunk: a pointer to an array of chars 12227 * @size: number of chars in the array 12228 * @filename: an optional file name or URI 12229 * 12230 * Create a parser context for using the XML parser in push mode. 12231 * If @buffer and @size are non-NULL, the data is used to detect 12232 * the encoding. The remaining characters will be parsed so they 12233 * don't need to be fed in again through xmlParseChunk. 12234 * To allow content encoding detection, @size should be >= 4 12235 * The value of @filename is used for fetching external entities 12236 * and error/warning reports. 12237 * 12238 * Returns the new parser context or NULL 12239 */ 12240 12241xmlParserCtxtPtr 12242xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 12243 const char *chunk, int size, const char *filename) { 12244 xmlParserCtxtPtr ctxt; 12245 xmlParserInputPtr inputStream; 12246 xmlParserInputBufferPtr buf; 12247 12248 buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE); 12249 if (buf == NULL) return(NULL); 12250 12251 ctxt = xmlNewSAXParserCtxt(sax, user_data); 12252 if (ctxt == NULL) { 12253 xmlErrMemory(NULL, "creating parser: out of memory\n"); 12254 xmlFreeParserInputBuffer(buf); 12255 return(NULL); 12256 } 12257 ctxt->dictNames = 1; 12258 if (filename == NULL) { 12259 ctxt->directory = NULL; 12260 } else { 12261 ctxt->directory = xmlParserGetDirectory(filename); 12262 } 12263 12264 inputStream = xmlNewInputStream(ctxt); 12265 if (inputStream == NULL) { 12266 xmlFreeParserCtxt(ctxt); 12267 xmlFreeParserInputBuffer(buf); 12268 return(NULL); 12269 } 12270 12271 if (filename == NULL) 12272 inputStream->filename = NULL; 12273 else { 12274 inputStream->filename = (char *) 12275 xmlCanonicPath((const xmlChar *) filename); 12276 if (inputStream->filename == NULL) { 12277 xmlFreeInputStream(inputStream); 12278 xmlFreeParserCtxt(ctxt); 12279 xmlFreeParserInputBuffer(buf); 12280 return(NULL); 12281 } 12282 } 12283 inputStream->buf = buf; 12284 xmlBufResetInput(inputStream->buf->buffer, inputStream); 12285 inputPush(ctxt, inputStream); 12286 12287 if ((size != 0) && (chunk != NULL) && 12288 (ctxt->input != NULL) && (ctxt->input->buf != NULL)) { 12289 size_t pos = ctxt->input->cur - ctxt->input->base; 12290 int res; 12291 12292 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 12293 xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos); 12294 if (res < 0) { 12295 xmlFatalErr(ctxt, ctxt->input->buf->error, NULL); 12296 xmlHaltParser(ctxt); 12297 } 12298 } 12299 12300 return(ctxt); 12301} 12302#endif /* LIBXML_PUSH_ENABLED */ 12303 12304/** 12305 * xmlStopParser: 12306 * @ctxt: an XML parser context 12307 * 12308 * Blocks further parser processing 12309 */ 12310void 12311xmlStopParser(xmlParserCtxtPtr ctxt) { 12312 if (ctxt == NULL) 12313 return; 12314 xmlHaltParser(ctxt); 12315 ctxt->errNo = XML_ERR_USER_STOP; 12316} 12317 12318/** 12319 * xmlCreateIOParserCtxt: 12320 * @sax: a SAX handler 12321 * @user_data: The user data returned on SAX callbacks 12322 * @ioread: an I/O read function 12323 * @ioclose: an I/O close function 12324 * @ioctx: an I/O handler 12325 * @enc: the charset encoding if known 12326 * 12327 * Create a parser context for using the XML parser with an existing 12328 * I/O stream 12329 * 12330 * Returns the new parser context or NULL 12331 */ 12332xmlParserCtxtPtr 12333xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 12334 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 12335 void *ioctx, xmlCharEncoding enc) { 12336 xmlParserCtxtPtr ctxt; 12337 xmlParserInputPtr inputStream; 12338 xmlParserInputBufferPtr buf; 12339 12340 if (ioread == NULL) return(NULL); 12341 12342 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc); 12343 if (buf == NULL) { 12344 if (ioclose != NULL) 12345 ioclose(ioctx); 12346 return (NULL); 12347 } 12348 12349 ctxt = xmlNewSAXParserCtxt(sax, user_data); 12350 if (ctxt == NULL) { 12351 xmlFreeParserInputBuffer(buf); 12352 return(NULL); 12353 } 12354 12355 inputStream = xmlNewIOInputStream(ctxt, buf, enc); 12356 if (inputStream == NULL) { 12357 xmlFreeParserCtxt(ctxt); 12358 return(NULL); 12359 } 12360 inputPush(ctxt, inputStream); 12361 12362 return(ctxt); 12363} 12364 12365#ifdef LIBXML_VALID_ENABLED 12366/************************************************************************ 12367 * * 12368 * Front ends when parsing a DTD * 12369 * * 12370 ************************************************************************/ 12371 12372/** 12373 * xmlIOParseDTD: 12374 * @sax: the SAX handler block or NULL 12375 * @input: an Input Buffer 12376 * @enc: the charset encoding if known 12377 * 12378 * Load and parse a DTD 12379 * 12380 * Returns the resulting xmlDtdPtr or NULL in case of error. 12381 * @input will be freed by the function in any case. 12382 */ 12383 12384xmlDtdPtr 12385xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, 12386 xmlCharEncoding enc) { 12387 xmlDtdPtr ret = NULL; 12388 xmlParserCtxtPtr ctxt; 12389 xmlParserInputPtr pinput = NULL; 12390 12391 if (input == NULL) 12392 return(NULL); 12393 12394 ctxt = xmlNewSAXParserCtxt(sax, NULL); 12395 if (ctxt == NULL) { 12396 xmlFreeParserInputBuffer(input); 12397 return(NULL); 12398 } 12399 12400 /* We are loading a DTD */ 12401 ctxt->options |= XML_PARSE_DTDLOAD; 12402 12403 xmlDetectSAX2(ctxt); 12404 12405 /* 12406 * generate a parser input from the I/O handler 12407 */ 12408 12409 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 12410 if (pinput == NULL) { 12411 xmlFreeParserInputBuffer(input); 12412 xmlFreeParserCtxt(ctxt); 12413 return(NULL); 12414 } 12415 12416 /* 12417 * plug some encoding conversion routines here. 12418 */ 12419 if (xmlPushInput(ctxt, pinput) < 0) { 12420 xmlFreeParserCtxt(ctxt); 12421 return(NULL); 12422 } 12423 if (enc != XML_CHAR_ENCODING_NONE) { 12424 xmlSwitchEncoding(ctxt, enc); 12425 } 12426 12427 /* 12428 * let's parse that entity knowing it's an external subset. 12429 */ 12430 ctxt->inSubset = 2; 12431 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 12432 if (ctxt->myDoc == NULL) { 12433 xmlErrMemory(ctxt, "New Doc failed"); 12434 return(NULL); 12435 } 12436 ctxt->myDoc->properties = XML_DOC_INTERNAL; 12437 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 12438 BAD_CAST "none", BAD_CAST "none"); 12439 12440 xmlDetectEncoding(ctxt); 12441 12442 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none"); 12443 12444 if (ctxt->myDoc != NULL) { 12445 if (ctxt->wellFormed) { 12446 ret = ctxt->myDoc->extSubset; 12447 ctxt->myDoc->extSubset = NULL; 12448 if (ret != NULL) { 12449 xmlNodePtr tmp; 12450 12451 ret->doc = NULL; 12452 tmp = ret->children; 12453 while (tmp != NULL) { 12454 tmp->doc = NULL; 12455 tmp = tmp->next; 12456 } 12457 } 12458 } else { 12459 ret = NULL; 12460 } 12461 xmlFreeDoc(ctxt->myDoc); 12462 ctxt->myDoc = NULL; 12463 } 12464 xmlFreeParserCtxt(ctxt); 12465 12466 return(ret); 12467} 12468 12469/** 12470 * xmlSAXParseDTD: 12471 * @sax: the SAX handler block 12472 * @ExternalID: a NAME* containing the External ID of the DTD 12473 * @SystemID: a NAME* containing the URL to the DTD 12474 * 12475 * DEPRECATED: Don't use. 12476 * 12477 * Load and parse an external subset. 12478 * 12479 * Returns the resulting xmlDtdPtr or NULL in case of error. 12480 */ 12481 12482xmlDtdPtr 12483xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, 12484 const xmlChar *SystemID) { 12485 xmlDtdPtr ret = NULL; 12486 xmlParserCtxtPtr ctxt; 12487 xmlParserInputPtr input = NULL; 12488 xmlChar* systemIdCanonic; 12489 12490 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL); 12491 12492 ctxt = xmlNewSAXParserCtxt(sax, NULL); 12493 if (ctxt == NULL) { 12494 return(NULL); 12495 } 12496 12497 /* We are loading a DTD */ 12498 ctxt->options |= XML_PARSE_DTDLOAD; 12499 12500 /* 12501 * Canonicalise the system ID 12502 */ 12503 systemIdCanonic = xmlCanonicPath(SystemID); 12504 if ((SystemID != NULL) && (systemIdCanonic == NULL)) { 12505 xmlFreeParserCtxt(ctxt); 12506 return(NULL); 12507 } 12508 12509 /* 12510 * Ask the Entity resolver to load the damn thing 12511 */ 12512 12513 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL)) 12514 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, 12515 systemIdCanonic); 12516 if (input == NULL) { 12517 xmlFreeParserCtxt(ctxt); 12518 if (systemIdCanonic != NULL) 12519 xmlFree(systemIdCanonic); 12520 return(NULL); 12521 } 12522 12523 /* 12524 * plug some encoding conversion routines here. 12525 */ 12526 if (xmlPushInput(ctxt, input) < 0) { 12527 xmlFreeParserCtxt(ctxt); 12528 if (systemIdCanonic != NULL) 12529 xmlFree(systemIdCanonic); 12530 return(NULL); 12531 } 12532 12533 xmlDetectEncoding(ctxt); 12534 12535 if (input->filename == NULL) 12536 input->filename = (char *) systemIdCanonic; 12537 else 12538 xmlFree(systemIdCanonic); 12539 12540 /* 12541 * let's parse that entity knowing it's an external subset. 12542 */ 12543 ctxt->inSubset = 2; 12544 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 12545 if (ctxt->myDoc == NULL) { 12546 xmlErrMemory(ctxt, "New Doc failed"); 12547 xmlFreeParserCtxt(ctxt); 12548 return(NULL); 12549 } 12550 ctxt->myDoc->properties = XML_DOC_INTERNAL; 12551 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 12552 ExternalID, SystemID); 12553 xmlParseExternalSubset(ctxt, ExternalID, SystemID); 12554 12555 if (ctxt->myDoc != NULL) { 12556 if (ctxt->wellFormed) { 12557 ret = ctxt->myDoc->extSubset; 12558 ctxt->myDoc->extSubset = NULL; 12559 if (ret != NULL) { 12560 xmlNodePtr tmp; 12561 12562 ret->doc = NULL; 12563 tmp = ret->children; 12564 while (tmp != NULL) { 12565 tmp->doc = NULL; 12566 tmp = tmp->next; 12567 } 12568 } 12569 } else { 12570 ret = NULL; 12571 } 12572 xmlFreeDoc(ctxt->myDoc); 12573 ctxt->myDoc = NULL; 12574 } 12575 xmlFreeParserCtxt(ctxt); 12576 12577 return(ret); 12578} 12579 12580 12581/** 12582 * xmlParseDTD: 12583 * @ExternalID: a NAME* containing the External ID of the DTD 12584 * @SystemID: a NAME* containing the URL to the DTD 12585 * 12586 * Load and parse an external subset. 12587 * 12588 * Returns the resulting xmlDtdPtr or NULL in case of error. 12589 */ 12590 12591xmlDtdPtr 12592xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) { 12593 return(xmlSAXParseDTD(NULL, ExternalID, SystemID)); 12594} 12595#endif /* LIBXML_VALID_ENABLED */ 12596 12597/************************************************************************ 12598 * * 12599 * Front ends when parsing an Entity * 12600 * * 12601 ************************************************************************/ 12602 12603/** 12604 * xmlParseCtxtExternalEntity: 12605 * @ctx: the existing parsing context 12606 * @URL: the URL for the entity to load 12607 * @ID: the System ID for the entity to load 12608 * @lst: the return value for the set of parsed nodes 12609 * 12610 * Parse an external general entity within an existing parsing context 12611 * An external general parsed entity is well-formed if it matches the 12612 * production labeled extParsedEnt. 12613 * 12614 * [78] extParsedEnt ::= TextDecl? content 12615 * 12616 * Returns 0 if the entity is well formed, -1 in case of args problem and 12617 * the parser error code otherwise 12618 */ 12619 12620int 12621xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, 12622 const xmlChar *ID, xmlNodePtr *lst) { 12623 void *userData; 12624 12625 if (ctx == NULL) return(-1); 12626 /* 12627 * If the user provided their own SAX callbacks, then reuse the 12628 * userData callback field, otherwise the expected setup in a 12629 * DOM builder is to have userData == ctxt 12630 */ 12631 if (ctx->userData == ctx) 12632 userData = NULL; 12633 else 12634 userData = ctx->userData; 12635 return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax, 12636 userData, ctx->depth + 1, 12637 URL, ID, lst); 12638} 12639 12640/** 12641 * xmlParseExternalEntityPrivate: 12642 * @doc: the document the chunk pertains to 12643 * @oldctxt: the previous parser context if available 12644 * @sax: the SAX handler block (possibly NULL) 12645 * @user_data: The user data returned on SAX callbacks (possibly NULL) 12646 * @depth: Used for loop detection, use 0 12647 * @URL: the URL for the entity to load 12648 * @ID: the System ID for the entity to load 12649 * @list: the return value for the set of parsed nodes 12650 * 12651 * Private version of xmlParseExternalEntity() 12652 * 12653 * Returns 0 if the entity is well formed, -1 in case of args problem and 12654 * the parser error code otherwise 12655 */ 12656 12657static xmlParserErrors 12658xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 12659 xmlSAXHandlerPtr sax, 12660 void *user_data, int depth, const xmlChar *URL, 12661 const xmlChar *ID, xmlNodePtr *list) { 12662 xmlParserCtxtPtr ctxt; 12663 xmlDocPtr newDoc; 12664 xmlNodePtr newRoot; 12665 xmlParserErrors ret = XML_ERR_OK; 12666 12667 if (((depth > 40) && 12668 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) || 12669 (depth > 100)) { 12670 xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP, 12671 "Maximum entity nesting depth exceeded"); 12672 return(XML_ERR_ENTITY_LOOP); 12673 } 12674 12675 if (list != NULL) 12676 *list = NULL; 12677 if ((URL == NULL) && (ID == NULL)) 12678 return(XML_ERR_INTERNAL_ERROR); 12679 if (doc == NULL) 12680 return(XML_ERR_INTERNAL_ERROR); 12681 12682 ctxt = xmlCreateEntityParserCtxtInternal(sax, user_data, URL, ID, NULL, 12683 oldctxt); 12684 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 12685 if (oldctxt != NULL) { 12686 ctxt->nbErrors = oldctxt->nbErrors; 12687 ctxt->nbWarnings = oldctxt->nbWarnings; 12688 } 12689 xmlDetectSAX2(ctxt); 12690 12691 newDoc = xmlNewDoc(BAD_CAST "1.0"); 12692 if (newDoc == NULL) { 12693 xmlFreeParserCtxt(ctxt); 12694 return(XML_ERR_INTERNAL_ERROR); 12695 } 12696 newDoc->properties = XML_DOC_INTERNAL; 12697 if (doc) { 12698 newDoc->intSubset = doc->intSubset; 12699 newDoc->extSubset = doc->extSubset; 12700 if (doc->dict) { 12701 newDoc->dict = doc->dict; 12702 xmlDictReference(newDoc->dict); 12703 } 12704 if (doc->URL != NULL) { 12705 newDoc->URL = xmlStrdup(doc->URL); 12706 } 12707 } 12708 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 12709 if (newRoot == NULL) { 12710 if (sax != NULL) 12711 xmlFreeParserCtxt(ctxt); 12712 newDoc->intSubset = NULL; 12713 newDoc->extSubset = NULL; 12714 xmlFreeDoc(newDoc); 12715 return(XML_ERR_INTERNAL_ERROR); 12716 } 12717 xmlAddChild((xmlNodePtr) newDoc, newRoot); 12718 nodePush(ctxt, newDoc->children); 12719 if (doc == NULL) { 12720 ctxt->myDoc = newDoc; 12721 } else { 12722 ctxt->myDoc = doc; 12723 newRoot->doc = doc; 12724 } 12725 12726 xmlDetectEncoding(ctxt); 12727 12728 /* 12729 * Parse a possible text declaration first 12730 */ 12731 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 12732 xmlParseTextDecl(ctxt); 12733 /* 12734 * An XML-1.0 document can't reference an entity not XML-1.0 12735 */ 12736 if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) && 12737 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) { 12738 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH, 12739 "Version mismatch between document and entity\n"); 12740 } 12741 } 12742 12743 ctxt->instate = XML_PARSER_CONTENT; 12744 ctxt->depth = depth; 12745 if (oldctxt != NULL) { 12746 ctxt->_private = oldctxt->_private; 12747 ctxt->loadsubset = oldctxt->loadsubset; 12748 ctxt->validate = oldctxt->validate; 12749 ctxt->valid = oldctxt->valid; 12750 ctxt->replaceEntities = oldctxt->replaceEntities; 12751 if (oldctxt->validate) { 12752 ctxt->vctxt.error = oldctxt->vctxt.error; 12753 ctxt->vctxt.warning = oldctxt->vctxt.warning; 12754 ctxt->vctxt.userData = oldctxt->vctxt.userData; 12755 ctxt->vctxt.flags = oldctxt->vctxt.flags; 12756 } 12757 ctxt->external = oldctxt->external; 12758 if (ctxt->dict) xmlDictFree(ctxt->dict); 12759 ctxt->dict = oldctxt->dict; 12760 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 12761 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 12762 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 12763 ctxt->dictNames = oldctxt->dictNames; 12764 ctxt->attsDefault = oldctxt->attsDefault; 12765 ctxt->attsSpecial = oldctxt->attsSpecial; 12766 ctxt->linenumbers = oldctxt->linenumbers; 12767 ctxt->record_info = oldctxt->record_info; 12768 ctxt->node_seq.maximum = oldctxt->node_seq.maximum; 12769 ctxt->node_seq.length = oldctxt->node_seq.length; 12770 ctxt->node_seq.buffer = oldctxt->node_seq.buffer; 12771 } else { 12772 /* 12773 * Doing validity checking on chunk without context 12774 * doesn't make sense 12775 */ 12776 ctxt->_private = NULL; 12777 ctxt->validate = 0; 12778 ctxt->external = 2; 12779 ctxt->loadsubset = 0; 12780 } 12781 12782 xmlParseContent(ctxt); 12783 12784 if ((RAW == '<') && (NXT(1) == '/')) { 12785 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12786 } else if (RAW != 0) { 12787 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 12788 } 12789 if (ctxt->node != newDoc->children) { 12790 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12791 } 12792 12793 if (!ctxt->wellFormed) { 12794 ret = (xmlParserErrors)ctxt->errNo; 12795 if (oldctxt != NULL) { 12796 oldctxt->errNo = ctxt->errNo; 12797 oldctxt->wellFormed = 0; 12798 xmlCopyError(&ctxt->lastError, &oldctxt->lastError); 12799 } 12800 } else { 12801 if (list != NULL) { 12802 xmlNodePtr cur; 12803 12804 /* 12805 * Return the newly created nodeset after unlinking it from 12806 * they pseudo parent. 12807 */ 12808 cur = newDoc->children->children; 12809 *list = cur; 12810 while (cur != NULL) { 12811 cur->parent = NULL; 12812 cur = cur->next; 12813 } 12814 newDoc->children->children = NULL; 12815 } 12816 ret = XML_ERR_OK; 12817 } 12818 12819 /* 12820 * Also record the size of the entity parsed 12821 */ 12822 if (ctxt->input != NULL && oldctxt != NULL) { 12823 unsigned long consumed = ctxt->input->consumed; 12824 12825 xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base); 12826 12827 xmlSaturatedAdd(&oldctxt->sizeentities, consumed); 12828 xmlSaturatedAdd(&oldctxt->sizeentities, ctxt->sizeentities); 12829 12830 xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed); 12831 xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy); 12832 } 12833 12834 if (oldctxt != NULL) { 12835 ctxt->dict = NULL; 12836 ctxt->attsDefault = NULL; 12837 ctxt->attsSpecial = NULL; 12838 oldctxt->nbErrors = ctxt->nbErrors; 12839 oldctxt->nbWarnings = ctxt->nbWarnings; 12840 oldctxt->validate = ctxt->validate; 12841 oldctxt->valid = ctxt->valid; 12842 oldctxt->node_seq.maximum = ctxt->node_seq.maximum; 12843 oldctxt->node_seq.length = ctxt->node_seq.length; 12844 oldctxt->node_seq.buffer = ctxt->node_seq.buffer; 12845 } 12846 ctxt->node_seq.maximum = 0; 12847 ctxt->node_seq.length = 0; 12848 ctxt->node_seq.buffer = NULL; 12849 xmlFreeParserCtxt(ctxt); 12850 newDoc->intSubset = NULL; 12851 newDoc->extSubset = NULL; 12852 xmlFreeDoc(newDoc); 12853 12854 return(ret); 12855} 12856 12857#ifdef LIBXML_SAX1_ENABLED 12858/** 12859 * xmlParseExternalEntity: 12860 * @doc: the document the chunk pertains to 12861 * @sax: the SAX handler block (possibly NULL) 12862 * @user_data: The user data returned on SAX callbacks (possibly NULL) 12863 * @depth: Used for loop detection, use 0 12864 * @URL: the URL for the entity to load 12865 * @ID: the System ID for the entity to load 12866 * @lst: the return value for the set of parsed nodes 12867 * 12868 * Parse an external general entity 12869 * An external general parsed entity is well-formed if it matches the 12870 * production labeled extParsedEnt. 12871 * 12872 * [78] extParsedEnt ::= TextDecl? content 12873 * 12874 * Returns 0 if the entity is well formed, -1 in case of args problem and 12875 * the parser error code otherwise 12876 */ 12877 12878int 12879xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data, 12880 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) { 12881 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL, 12882 ID, lst)); 12883} 12884 12885/** 12886 * xmlParseBalancedChunkMemory: 12887 * @doc: the document the chunk pertains to (must not be NULL) 12888 * @sax: the SAX handler block (possibly NULL) 12889 * @user_data: The user data returned on SAX callbacks (possibly NULL) 12890 * @depth: Used for loop detection, use 0 12891 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 12892 * @lst: the return value for the set of parsed nodes 12893 * 12894 * Parse a well-balanced chunk of an XML document 12895 * called by the parser 12896 * The allowed sequence for the Well Balanced Chunk is the one defined by 12897 * the content production in the XML grammar: 12898 * 12899 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 12900 * 12901 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 12902 * the parser error code otherwise 12903 */ 12904 12905int 12906xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax, 12907 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) { 12908 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data, 12909 depth, string, lst, 0 ); 12910} 12911#endif /* LIBXML_SAX1_ENABLED */ 12912 12913/** 12914 * xmlParseBalancedChunkMemoryInternal: 12915 * @oldctxt: the existing parsing context 12916 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 12917 * @user_data: the user data field for the parser context 12918 * @lst: the return value for the set of parsed nodes 12919 * 12920 * 12921 * Parse a well-balanced chunk of an XML document 12922 * called by the parser 12923 * The allowed sequence for the Well Balanced Chunk is the one defined by 12924 * the content production in the XML grammar: 12925 * 12926 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 12927 * 12928 * Returns XML_ERR_OK if the chunk is well balanced, and the parser 12929 * error code otherwise 12930 * 12931 * In case recover is set to 1, the nodelist will not be empty even if 12932 * the parsed chunk is not well balanced. 12933 */ 12934static xmlParserErrors 12935xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 12936 const xmlChar *string, void *user_data, xmlNodePtr *lst) { 12937 xmlParserCtxtPtr ctxt; 12938 xmlDocPtr newDoc = NULL; 12939 xmlNodePtr newRoot; 12940 xmlSAXHandlerPtr oldsax = NULL; 12941 xmlNodePtr content = NULL; 12942 xmlNodePtr last = NULL; 12943 xmlParserErrors ret = XML_ERR_OK; 12944 xmlHashedString hprefix, huri; 12945 unsigned i; 12946 12947 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) || 12948 (oldctxt->depth > 100)) { 12949 xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP, 12950 "Maximum entity nesting depth exceeded"); 12951 return(XML_ERR_ENTITY_LOOP); 12952 } 12953 12954 12955 if (lst != NULL) 12956 *lst = NULL; 12957 if (string == NULL) 12958 return(XML_ERR_INTERNAL_ERROR); 12959 12960 ctxt = xmlCreateDocParserCtxt(string); 12961 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 12962 ctxt->nbErrors = oldctxt->nbErrors; 12963 ctxt->nbWarnings = oldctxt->nbWarnings; 12964 if (user_data != NULL) 12965 ctxt->userData = user_data; 12966 else 12967 ctxt->userData = ctxt; 12968 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 12969 ctxt->dict = oldctxt->dict; 12970 ctxt->input_id = oldctxt->input_id; 12971 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 12972 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 12973 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 12974 12975 /* 12976 * Propagate namespaces down the entity 12977 * 12978 * Making entities and namespaces work correctly requires additional 12979 * changes, see xmlParseReference. 12980 */ 12981 12982 /* Default namespace */ 12983 hprefix.name = NULL; 12984 hprefix.hashValue = 0; 12985 huri.name = xmlParserNsLookupUri(oldctxt, &hprefix); 12986 huri.hashValue = 0; 12987 if (huri.name != NULL) 12988 xmlParserNsPush(ctxt, NULL, &huri, NULL, 0); 12989 12990 for (i = 0; i < oldctxt->nsdb->hashSize; i++) { 12991 xmlParserNsBucket *bucket = &oldctxt->nsdb->hash[i]; 12992 const xmlChar **ns; 12993 xmlParserNsExtra *extra; 12994 unsigned nsIndex; 12995 12996 if ((bucket->hashValue != 0) && 12997 (bucket->index != INT_MAX)) { 12998 nsIndex = bucket->index; 12999 ns = &oldctxt->nsTab[nsIndex * 2]; 13000 extra = &oldctxt->nsdb->extra[nsIndex]; 13001 13002 hprefix.name = ns[0]; 13003 hprefix.hashValue = bucket->hashValue; 13004 huri.name = ns[1]; 13005 huri.hashValue = extra->uriHashValue; 13006 /* 13007 * Don't copy SAX data to avoid a use-after-free with XML reader. 13008 * This matches the pre-2.12 behavior. 13009 */ 13010 xmlParserNsPush(ctxt, &hprefix, &huri, NULL, 0); 13011 } 13012 } 13013 13014 oldsax = ctxt->sax; 13015 ctxt->sax = oldctxt->sax; 13016 xmlDetectSAX2(ctxt); 13017 ctxt->replaceEntities = oldctxt->replaceEntities; 13018 ctxt->options = oldctxt->options; 13019 13020 ctxt->_private = oldctxt->_private; 13021 if (oldctxt->myDoc == NULL) { 13022 newDoc = xmlNewDoc(BAD_CAST "1.0"); 13023 if (newDoc == NULL) { 13024 ret = XML_ERR_INTERNAL_ERROR; 13025 goto error; 13026 } 13027 newDoc->properties = XML_DOC_INTERNAL; 13028 newDoc->dict = ctxt->dict; 13029 xmlDictReference(newDoc->dict); 13030 ctxt->myDoc = newDoc; 13031 } else { 13032 ctxt->myDoc = oldctxt->myDoc; 13033 content = ctxt->myDoc->children; 13034 last = ctxt->myDoc->last; 13035 } 13036 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL); 13037 if (newRoot == NULL) { 13038 ret = XML_ERR_INTERNAL_ERROR; 13039 goto error; 13040 } 13041 ctxt->myDoc->children = NULL; 13042 ctxt->myDoc->last = NULL; 13043 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot); 13044 nodePush(ctxt, ctxt->myDoc->children); 13045 ctxt->instate = XML_PARSER_CONTENT; 13046 ctxt->depth = oldctxt->depth; 13047 13048 ctxt->validate = 0; 13049 ctxt->loadsubset = oldctxt->loadsubset; 13050 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) { 13051 /* 13052 * ID/IDREF registration will be done in xmlValidateElement below 13053 */ 13054 ctxt->loadsubset |= XML_SKIP_IDS; 13055 } 13056 ctxt->dictNames = oldctxt->dictNames; 13057 ctxt->attsDefault = oldctxt->attsDefault; 13058 ctxt->attsSpecial = oldctxt->attsSpecial; 13059 13060 xmlParseContent(ctxt); 13061 if ((RAW == '<') && (NXT(1) == '/')) { 13062 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13063 } else if (RAW != 0) { 13064 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13065 } 13066 if (ctxt->node != ctxt->myDoc->children) { 13067 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13068 } 13069 13070 if (!ctxt->wellFormed) { 13071 ret = (xmlParserErrors)ctxt->errNo; 13072 oldctxt->errNo = ctxt->errNo; 13073 oldctxt->wellFormed = 0; 13074 xmlCopyError(&ctxt->lastError, &oldctxt->lastError); 13075 } else { 13076 ret = XML_ERR_OK; 13077 } 13078 13079 if ((lst != NULL) && (ret == XML_ERR_OK)) { 13080 xmlNodePtr cur; 13081 13082 /* 13083 * Return the newly created nodeset after unlinking it from 13084 * they pseudo parent. 13085 */ 13086 cur = ctxt->myDoc->children->children; 13087 *lst = cur; 13088 while (cur != NULL) { 13089#ifdef LIBXML_VALID_ENABLED 13090 if ((oldctxt->validate) && (oldctxt->wellFormed) && 13091 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) && 13092 (cur->type == XML_ELEMENT_NODE)) { 13093 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt, 13094 oldctxt->myDoc, cur); 13095 } 13096#endif /* LIBXML_VALID_ENABLED */ 13097 cur->parent = NULL; 13098 cur = cur->next; 13099 } 13100 ctxt->myDoc->children->children = NULL; 13101 } 13102 if (ctxt->myDoc != NULL) { 13103 xmlFreeNode(ctxt->myDoc->children); 13104 ctxt->myDoc->children = content; 13105 ctxt->myDoc->last = last; 13106 } 13107 13108 /* 13109 * Also record the size of the entity parsed 13110 */ 13111 if (ctxt->input != NULL && oldctxt != NULL) { 13112 unsigned long consumed = ctxt->input->consumed; 13113 13114 xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base); 13115 13116 xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed); 13117 xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy); 13118 } 13119 13120 oldctxt->nbErrors = ctxt->nbErrors; 13121 oldctxt->nbWarnings = ctxt->nbWarnings; 13122 13123error: 13124 ctxt->sax = oldsax; 13125 ctxt->dict = NULL; 13126 ctxt->attsDefault = NULL; 13127 ctxt->attsSpecial = NULL; 13128 xmlFreeParserCtxt(ctxt); 13129 if (newDoc != NULL) { 13130 xmlFreeDoc(newDoc); 13131 } 13132 13133 return(ret); 13134} 13135 13136/** 13137 * xmlParseInNodeContext: 13138 * @node: the context node 13139 * @data: the input string 13140 * @datalen: the input string length in bytes 13141 * @options: a combination of xmlParserOption 13142 * @lst: the return value for the set of parsed nodes 13143 * 13144 * Parse a well-balanced chunk of an XML document 13145 * within the context (DTD, namespaces, etc ...) of the given node. 13146 * 13147 * The allowed sequence for the data is a Well Balanced Chunk defined by 13148 * the content production in the XML grammar: 13149 * 13150 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13151 * 13152 * Returns XML_ERR_OK if the chunk is well balanced, and the parser 13153 * error code otherwise 13154 */ 13155xmlParserErrors 13156xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen, 13157 int options, xmlNodePtr *lst) { 13158 xmlParserCtxtPtr ctxt; 13159 xmlDocPtr doc = NULL; 13160 xmlNodePtr fake, cur; 13161 int nsnr = 0; 13162 13163 xmlParserErrors ret = XML_ERR_OK; 13164 13165 /* 13166 * check all input parameters, grab the document 13167 */ 13168 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0)) 13169 return(XML_ERR_INTERNAL_ERROR); 13170 switch (node->type) { 13171 case XML_ELEMENT_NODE: 13172 case XML_ATTRIBUTE_NODE: 13173 case XML_TEXT_NODE: 13174 case XML_CDATA_SECTION_NODE: 13175 case XML_ENTITY_REF_NODE: 13176 case XML_PI_NODE: 13177 case XML_COMMENT_NODE: 13178 case XML_DOCUMENT_NODE: 13179 case XML_HTML_DOCUMENT_NODE: 13180 break; 13181 default: 13182 return(XML_ERR_INTERNAL_ERROR); 13183 13184 } 13185 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) && 13186 (node->type != XML_DOCUMENT_NODE) && 13187 (node->type != XML_HTML_DOCUMENT_NODE)) 13188 node = node->parent; 13189 if (node == NULL) 13190 return(XML_ERR_INTERNAL_ERROR); 13191 if (node->type == XML_ELEMENT_NODE) 13192 doc = node->doc; 13193 else 13194 doc = (xmlDocPtr) node; 13195 if (doc == NULL) 13196 return(XML_ERR_INTERNAL_ERROR); 13197 13198 /* 13199 * allocate a context and set-up everything not related to the 13200 * node position in the tree 13201 */ 13202 if (doc->type == XML_DOCUMENT_NODE) 13203 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen); 13204#ifdef LIBXML_HTML_ENABLED 13205 else if (doc->type == XML_HTML_DOCUMENT_NODE) { 13206 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen); 13207 /* 13208 * When parsing in context, it makes no sense to add implied 13209 * elements like html/body/etc... 13210 */ 13211 options |= HTML_PARSE_NOIMPLIED; 13212 } 13213#endif 13214 else 13215 return(XML_ERR_INTERNAL_ERROR); 13216 13217 if (ctxt == NULL) 13218 return(XML_ERR_NO_MEMORY); 13219 13220 /* 13221 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set. 13222 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict 13223 * we must wait until the last moment to free the original one. 13224 */ 13225 if (doc->dict != NULL) { 13226 if (ctxt->dict != NULL) 13227 xmlDictFree(ctxt->dict); 13228 ctxt->dict = doc->dict; 13229 } else 13230 options |= XML_PARSE_NODICT; 13231 13232 if (doc->encoding != NULL) { 13233 xmlCharEncodingHandlerPtr hdlr; 13234 13235 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding); 13236 if (hdlr != NULL) { 13237 xmlSwitchToEncoding(ctxt, hdlr); 13238 } else { 13239 return(XML_ERR_UNSUPPORTED_ENCODING); 13240 } 13241 } 13242 13243 xmlCtxtUseOptionsInternal(ctxt, options); 13244 xmlDetectSAX2(ctxt); 13245 ctxt->myDoc = doc; 13246 /* parsing in context, i.e. as within existing content */ 13247 ctxt->input_id = 2; 13248 ctxt->instate = XML_PARSER_CONTENT; 13249 13250 fake = xmlNewDocComment(node->doc, NULL); 13251 if (fake == NULL) { 13252 xmlFreeParserCtxt(ctxt); 13253 return(XML_ERR_NO_MEMORY); 13254 } 13255 xmlAddChild(node, fake); 13256 13257 if (node->type == XML_ELEMENT_NODE) 13258 nodePush(ctxt, node); 13259 13260 if ((ctxt->html == 0) && (node->type == XML_ELEMENT_NODE)) { 13261 /* 13262 * initialize the SAX2 namespaces stack 13263 */ 13264 cur = node; 13265 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) { 13266 xmlNsPtr ns = cur->nsDef; 13267 xmlHashedString hprefix, huri; 13268 13269 while (ns != NULL) { 13270 hprefix = xmlDictLookupHashed(ctxt->dict, ns->prefix, -1); 13271 huri = xmlDictLookupHashed(ctxt->dict, ns->href, -1); 13272 if (xmlParserNsPush(ctxt, &hprefix, &huri, ns, 1) > 0) 13273 nsnr++; 13274 ns = ns->next; 13275 } 13276 cur = cur->parent; 13277 } 13278 } 13279 13280 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) { 13281 /* 13282 * ID/IDREF registration will be done in xmlValidateElement below 13283 */ 13284 ctxt->loadsubset |= XML_SKIP_IDS; 13285 } 13286 13287#ifdef LIBXML_HTML_ENABLED 13288 if (doc->type == XML_HTML_DOCUMENT_NODE) 13289 __htmlParseContent(ctxt); 13290 else 13291#endif 13292 xmlParseContent(ctxt); 13293 13294 xmlParserNsPop(ctxt, nsnr); 13295 if ((RAW == '<') && (NXT(1) == '/')) { 13296 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13297 } else if (RAW != 0) { 13298 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13299 } 13300 if ((ctxt->node != NULL) && (ctxt->node != node)) { 13301 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13302 ctxt->wellFormed = 0; 13303 } 13304 13305 if (!ctxt->wellFormed) { 13306 if (ctxt->errNo == 0) 13307 ret = XML_ERR_INTERNAL_ERROR; 13308 else 13309 ret = (xmlParserErrors)ctxt->errNo; 13310 } else { 13311 ret = XML_ERR_OK; 13312 } 13313 13314 /* 13315 * Return the newly created nodeset after unlinking it from 13316 * the pseudo sibling. 13317 */ 13318 13319 cur = fake->next; 13320 fake->next = NULL; 13321 node->last = fake; 13322 13323 if (cur != NULL) { 13324 cur->prev = NULL; 13325 } 13326 13327 *lst = cur; 13328 13329 while (cur != NULL) { 13330 cur->parent = NULL; 13331 cur = cur->next; 13332 } 13333 13334 xmlUnlinkNode(fake); 13335 xmlFreeNode(fake); 13336 13337 13338 if (ret != XML_ERR_OK) { 13339 xmlFreeNodeList(*lst); 13340 *lst = NULL; 13341 } 13342 13343 if (doc->dict != NULL) 13344 ctxt->dict = NULL; 13345 xmlFreeParserCtxt(ctxt); 13346 13347 return(ret); 13348} 13349 13350#ifdef LIBXML_SAX1_ENABLED 13351/** 13352 * xmlParseBalancedChunkMemoryRecover: 13353 * @doc: the document the chunk pertains to (must not be NULL) 13354 * @sax: the SAX handler block (possibly NULL) 13355 * @user_data: The user data returned on SAX callbacks (possibly NULL) 13356 * @depth: Used for loop detection, use 0 13357 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 13358 * @lst: the return value for the set of parsed nodes 13359 * @recover: return nodes even if the data is broken (use 0) 13360 * 13361 * 13362 * Parse a well-balanced chunk of an XML document 13363 * called by the parser 13364 * The allowed sequence for the Well Balanced Chunk is the one defined by 13365 * the content production in the XML grammar: 13366 * 13367 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13368 * 13369 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 13370 * the parser error code otherwise 13371 * 13372 * In case recover is set to 1, the nodelist will not be empty even if 13373 * the parsed chunk is not well balanced, assuming the parsing succeeded to 13374 * some extent. 13375 */ 13376int 13377xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax, 13378 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst, 13379 int recover) { 13380 xmlParserCtxtPtr ctxt; 13381 xmlDocPtr newDoc; 13382 xmlSAXHandlerPtr oldsax = NULL; 13383 xmlNodePtr content, newRoot; 13384 int ret = 0; 13385 13386 if (depth > 40) { 13387 return(XML_ERR_ENTITY_LOOP); 13388 } 13389 13390 13391 if (lst != NULL) 13392 *lst = NULL; 13393 if (string == NULL) 13394 return(-1); 13395 13396 ctxt = xmlCreateDocParserCtxt(string); 13397 if (ctxt == NULL) return(-1); 13398 ctxt->userData = ctxt; 13399 if (sax != NULL) { 13400 oldsax = ctxt->sax; 13401 ctxt->sax = sax; 13402 if (user_data != NULL) 13403 ctxt->userData = user_data; 13404 } 13405 newDoc = xmlNewDoc(BAD_CAST "1.0"); 13406 if (newDoc == NULL) { 13407 xmlFreeParserCtxt(ctxt); 13408 return(-1); 13409 } 13410 newDoc->properties = XML_DOC_INTERNAL; 13411 if ((doc != NULL) && (doc->dict != NULL)) { 13412 xmlDictFree(ctxt->dict); 13413 ctxt->dict = doc->dict; 13414 xmlDictReference(ctxt->dict); 13415 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 13416 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 13417 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 13418 ctxt->dictNames = 1; 13419 newDoc->dict = ctxt->dict; 13420 xmlDictReference(newDoc->dict); 13421 } else { 13422 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT); 13423 } 13424 /* doc == NULL is only supported for historic reasons */ 13425 if (doc != NULL) { 13426 newDoc->intSubset = doc->intSubset; 13427 newDoc->extSubset = doc->extSubset; 13428 } 13429 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 13430 if (newRoot == NULL) { 13431 if (sax != NULL) 13432 ctxt->sax = oldsax; 13433 xmlFreeParserCtxt(ctxt); 13434 newDoc->intSubset = NULL; 13435 newDoc->extSubset = NULL; 13436 xmlFreeDoc(newDoc); 13437 return(-1); 13438 } 13439 xmlAddChild((xmlNodePtr) newDoc, newRoot); 13440 nodePush(ctxt, newRoot); 13441 /* doc == NULL is only supported for historic reasons */ 13442 if (doc == NULL) { 13443 ctxt->myDoc = newDoc; 13444 } else { 13445 ctxt->myDoc = newDoc; 13446 /* Ensure that doc has XML spec namespace */ 13447 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE); 13448 newDoc->oldNs = doc->oldNs; 13449 } 13450 ctxt->instate = XML_PARSER_CONTENT; 13451 ctxt->input_id = 2; 13452 ctxt->depth = depth; 13453 13454 /* 13455 * Doing validity checking on chunk doesn't make sense 13456 */ 13457 ctxt->validate = 0; 13458 ctxt->loadsubset = 0; 13459 xmlDetectSAX2(ctxt); 13460 13461 if ( doc != NULL ){ 13462 content = doc->children; 13463 doc->children = NULL; 13464 xmlParseContent(ctxt); 13465 doc->children = content; 13466 } 13467 else { 13468 xmlParseContent(ctxt); 13469 } 13470 if ((RAW == '<') && (NXT(1) == '/')) { 13471 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13472 } else if (RAW != 0) { 13473 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13474 } 13475 if (ctxt->node != newDoc->children) { 13476 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13477 } 13478 13479 if (!ctxt->wellFormed) { 13480 if (ctxt->errNo == 0) 13481 ret = 1; 13482 else 13483 ret = ctxt->errNo; 13484 } else { 13485 ret = 0; 13486 } 13487 13488 if ((lst != NULL) && ((ret == 0) || (recover == 1))) { 13489 xmlNodePtr cur; 13490 13491 /* 13492 * Return the newly created nodeset after unlinking it from 13493 * they pseudo parent. 13494 */ 13495 cur = newDoc->children->children; 13496 *lst = cur; 13497 while (cur != NULL) { 13498 xmlSetTreeDoc(cur, doc); 13499 cur->parent = NULL; 13500 cur = cur->next; 13501 } 13502 newDoc->children->children = NULL; 13503 } 13504 13505 if (sax != NULL) 13506 ctxt->sax = oldsax; 13507 xmlFreeParserCtxt(ctxt); 13508 newDoc->intSubset = NULL; 13509 newDoc->extSubset = NULL; 13510 /* This leaks the namespace list if doc == NULL */ 13511 newDoc->oldNs = NULL; 13512 xmlFreeDoc(newDoc); 13513 13514 return(ret); 13515} 13516 13517/** 13518 * xmlSAXParseEntity: 13519 * @sax: the SAX handler block 13520 * @filename: the filename 13521 * 13522 * DEPRECATED: Don't use. 13523 * 13524 * parse an XML external entity out of context and build a tree. 13525 * It use the given SAX function block to handle the parsing callback. 13526 * If sax is NULL, fallback to the default DOM tree building routines. 13527 * 13528 * [78] extParsedEnt ::= TextDecl? content 13529 * 13530 * This correspond to a "Well Balanced" chunk 13531 * 13532 * Returns the resulting document tree 13533 */ 13534 13535xmlDocPtr 13536xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) { 13537 xmlDocPtr ret; 13538 xmlParserCtxtPtr ctxt; 13539 13540 ctxt = xmlCreateFileParserCtxt(filename); 13541 if (ctxt == NULL) { 13542 return(NULL); 13543 } 13544 if (sax != NULL) { 13545 if (ctxt->sax != NULL) 13546 xmlFree(ctxt->sax); 13547 ctxt->sax = sax; 13548 ctxt->userData = NULL; 13549 } 13550 13551 xmlParseExtParsedEnt(ctxt); 13552 13553 if (ctxt->wellFormed) 13554 ret = ctxt->myDoc; 13555 else { 13556 ret = NULL; 13557 xmlFreeDoc(ctxt->myDoc); 13558 ctxt->myDoc = NULL; 13559 } 13560 if (sax != NULL) 13561 ctxt->sax = NULL; 13562 xmlFreeParserCtxt(ctxt); 13563 13564 return(ret); 13565} 13566 13567/** 13568 * xmlParseEntity: 13569 * @filename: the filename 13570 * 13571 * parse an XML external entity out of context and build a tree. 13572 * 13573 * [78] extParsedEnt ::= TextDecl? content 13574 * 13575 * This correspond to a "Well Balanced" chunk 13576 * 13577 * Returns the resulting document tree 13578 */ 13579 13580xmlDocPtr 13581xmlParseEntity(const char *filename) { 13582 return(xmlSAXParseEntity(NULL, filename)); 13583} 13584#endif /* LIBXML_SAX1_ENABLED */ 13585 13586/** 13587 * xmlCreateEntityParserCtxtInternal: 13588 * @URL: the entity URL 13589 * @ID: the entity PUBLIC ID 13590 * @base: a possible base for the target URI 13591 * @pctx: parser context used to set options on new context 13592 * 13593 * Create a parser context for an external entity 13594 * Automatic support for ZLIB/Compress compressed document is provided 13595 * by default if found at compile-time. 13596 * 13597 * Returns the new parser context or NULL 13598 */ 13599static xmlParserCtxtPtr 13600xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData, 13601 const xmlChar *URL, const xmlChar *ID, const xmlChar *base, 13602 xmlParserCtxtPtr pctx) { 13603 xmlParserCtxtPtr ctxt; 13604 xmlParserInputPtr inputStream; 13605 char *directory = NULL; 13606 xmlChar *uri; 13607 13608 ctxt = xmlNewSAXParserCtxt(sax, userData); 13609 if (ctxt == NULL) { 13610 return(NULL); 13611 } 13612 13613 if (pctx != NULL) { 13614 ctxt->options = pctx->options; 13615 ctxt->_private = pctx->_private; 13616 ctxt->input_id = pctx->input_id; 13617 } 13618 13619 /* Don't read from stdin. */ 13620 if (xmlStrcmp(URL, BAD_CAST "-") == 0) 13621 URL = BAD_CAST "./-"; 13622 13623 uri = xmlBuildURI(URL, base); 13624 13625 if (uri == NULL) { 13626 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt); 13627 if (inputStream == NULL) { 13628 xmlFreeParserCtxt(ctxt); 13629 return(NULL); 13630 } 13631 13632 inputPush(ctxt, inputStream); 13633 13634 if ((ctxt->directory == NULL) && (directory == NULL)) 13635 directory = xmlParserGetDirectory((char *)URL); 13636 if ((ctxt->directory == NULL) && (directory != NULL)) 13637 ctxt->directory = directory; 13638 } else { 13639 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt); 13640 if (inputStream == NULL) { 13641 xmlFree(uri); 13642 xmlFreeParserCtxt(ctxt); 13643 return(NULL); 13644 } 13645 13646 inputPush(ctxt, inputStream); 13647 13648 if ((ctxt->directory == NULL) && (directory == NULL)) 13649 directory = xmlParserGetDirectory((char *)uri); 13650 if ((ctxt->directory == NULL) && (directory != NULL)) 13651 ctxt->directory = directory; 13652 xmlFree(uri); 13653 } 13654 return(ctxt); 13655} 13656 13657/** 13658 * xmlCreateEntityParserCtxt: 13659 * @URL: the entity URL 13660 * @ID: the entity PUBLIC ID 13661 * @base: a possible base for the target URI 13662 * 13663 * Create a parser context for an external entity 13664 * Automatic support for ZLIB/Compress compressed document is provided 13665 * by default if found at compile-time. 13666 * 13667 * Returns the new parser context or NULL 13668 */ 13669xmlParserCtxtPtr 13670xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID, 13671 const xmlChar *base) { 13672 return xmlCreateEntityParserCtxtInternal(NULL, NULL, URL, ID, base, NULL); 13673 13674} 13675 13676/************************************************************************ 13677 * * 13678 * Front ends when parsing from a file * 13679 * * 13680 ************************************************************************/ 13681 13682/** 13683 * xmlCreateURLParserCtxt: 13684 * @filename: the filename or URL 13685 * @options: a combination of xmlParserOption 13686 * 13687 * Create a parser context for a file or URL content. 13688 * Automatic support for ZLIB/Compress compressed document is provided 13689 * by default if found at compile-time and for file accesses 13690 * 13691 * Returns the new parser context or NULL 13692 */ 13693xmlParserCtxtPtr 13694xmlCreateURLParserCtxt(const char *filename, int options) 13695{ 13696 xmlParserCtxtPtr ctxt; 13697 xmlParserInputPtr inputStream; 13698 char *directory = NULL; 13699 13700 ctxt = xmlNewParserCtxt(); 13701 if (ctxt == NULL) { 13702 xmlErrMemory(NULL, "cannot allocate parser context"); 13703 return(NULL); 13704 } 13705 13706 if (options) 13707 xmlCtxtUseOptionsInternal(ctxt, options); 13708 ctxt->linenumbers = 1; 13709 13710 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt); 13711 if (inputStream == NULL) { 13712 xmlFreeParserCtxt(ctxt); 13713 return(NULL); 13714 } 13715 13716 inputPush(ctxt, inputStream); 13717 if ((ctxt->directory == NULL) && (directory == NULL)) 13718 directory = xmlParserGetDirectory(filename); 13719 if ((ctxt->directory == NULL) && (directory != NULL)) 13720 ctxt->directory = directory; 13721 13722 return(ctxt); 13723} 13724 13725/** 13726 * xmlCreateFileParserCtxt: 13727 * @filename: the filename 13728 * 13729 * Create a parser context for a file content. 13730 * Automatic support for ZLIB/Compress compressed document is provided 13731 * by default if found at compile-time. 13732 * 13733 * Returns the new parser context or NULL 13734 */ 13735xmlParserCtxtPtr 13736xmlCreateFileParserCtxt(const char *filename) 13737{ 13738 return(xmlCreateURLParserCtxt(filename, 0)); 13739} 13740 13741#ifdef LIBXML_SAX1_ENABLED 13742/** 13743 * xmlSAXParseFileWithData: 13744 * @sax: the SAX handler block 13745 * @filename: the filename 13746 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 13747 * documents 13748 * @data: the userdata 13749 * 13750 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile. 13751 * 13752 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 13753 * compressed document is provided by default if found at compile-time. 13754 * It use the given SAX function block to handle the parsing callback. 13755 * If sax is NULL, fallback to the default DOM tree building routines. 13756 * 13757 * User data (void *) is stored within the parser context in the 13758 * context's _private member, so it is available nearly everywhere in libxml 13759 * 13760 * Returns the resulting document tree 13761 */ 13762 13763xmlDocPtr 13764xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename, 13765 int recovery, void *data) { 13766 xmlDocPtr ret; 13767 xmlParserCtxtPtr ctxt; 13768 13769 xmlInitParser(); 13770 13771 ctxt = xmlCreateFileParserCtxt(filename); 13772 if (ctxt == NULL) { 13773 return(NULL); 13774 } 13775 if (sax != NULL) { 13776 if (ctxt->sax != NULL) 13777 xmlFree(ctxt->sax); 13778 ctxt->sax = sax; 13779 } 13780 xmlDetectSAX2(ctxt); 13781 if (data!=NULL) { 13782 ctxt->_private = data; 13783 } 13784 13785 if (ctxt->directory == NULL) 13786 ctxt->directory = xmlParserGetDirectory(filename); 13787 13788 ctxt->recovery = recovery; 13789 13790 xmlParseDocument(ctxt); 13791 13792 if ((ctxt->wellFormed) || recovery) { 13793 ret = ctxt->myDoc; 13794 if ((ret != NULL) && (ctxt->input->buf != NULL)) { 13795 if (ctxt->input->buf->compressed > 0) 13796 ret->compression = 9; 13797 else 13798 ret->compression = ctxt->input->buf->compressed; 13799 } 13800 } 13801 else { 13802 ret = NULL; 13803 xmlFreeDoc(ctxt->myDoc); 13804 ctxt->myDoc = NULL; 13805 } 13806 if (sax != NULL) 13807 ctxt->sax = NULL; 13808 xmlFreeParserCtxt(ctxt); 13809 13810 return(ret); 13811} 13812 13813/** 13814 * xmlSAXParseFile: 13815 * @sax: the SAX handler block 13816 * @filename: the filename 13817 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 13818 * documents 13819 * 13820 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile. 13821 * 13822 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 13823 * compressed document is provided by default if found at compile-time. 13824 * It use the given SAX function block to handle the parsing callback. 13825 * If sax is NULL, fallback to the default DOM tree building routines. 13826 * 13827 * Returns the resulting document tree 13828 */ 13829 13830xmlDocPtr 13831xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename, 13832 int recovery) { 13833 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL)); 13834} 13835 13836/** 13837 * xmlRecoverDoc: 13838 * @cur: a pointer to an array of xmlChar 13839 * 13840 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER. 13841 * 13842 * parse an XML in-memory document and build a tree. 13843 * In the case the document is not Well Formed, a attempt to build a 13844 * tree is tried anyway 13845 * 13846 * Returns the resulting document tree or NULL in case of failure 13847 */ 13848 13849xmlDocPtr 13850xmlRecoverDoc(const xmlChar *cur) { 13851 return(xmlSAXParseDoc(NULL, cur, 1)); 13852} 13853 13854/** 13855 * xmlParseFile: 13856 * @filename: the filename 13857 * 13858 * DEPRECATED: Use xmlReadFile. 13859 * 13860 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 13861 * compressed document is provided by default if found at compile-time. 13862 * 13863 * Returns the resulting document tree if the file was wellformed, 13864 * NULL otherwise. 13865 */ 13866 13867xmlDocPtr 13868xmlParseFile(const char *filename) { 13869 return(xmlSAXParseFile(NULL, filename, 0)); 13870} 13871 13872/** 13873 * xmlRecoverFile: 13874 * @filename: the filename 13875 * 13876 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER. 13877 * 13878 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 13879 * compressed document is provided by default if found at compile-time. 13880 * In the case the document is not Well Formed, it attempts to build 13881 * a tree anyway 13882 * 13883 * Returns the resulting document tree or NULL in case of failure 13884 */ 13885 13886xmlDocPtr 13887xmlRecoverFile(const char *filename) { 13888 return(xmlSAXParseFile(NULL, filename, 1)); 13889} 13890 13891 13892/** 13893 * xmlSetupParserForBuffer: 13894 * @ctxt: an XML parser context 13895 * @buffer: a xmlChar * buffer 13896 * @filename: a file name 13897 * 13898 * DEPRECATED: Don't use. 13899 * 13900 * Setup the parser context to parse a new buffer; Clears any prior 13901 * contents from the parser context. The buffer parameter must not be 13902 * NULL, but the filename parameter can be 13903 */ 13904void 13905xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer, 13906 const char* filename) 13907{ 13908 xmlParserInputPtr input; 13909 13910 if ((ctxt == NULL) || (buffer == NULL)) 13911 return; 13912 13913 input = xmlNewInputStream(ctxt); 13914 if (input == NULL) { 13915 xmlErrMemory(NULL, "parsing new buffer: out of memory\n"); 13916 xmlClearParserCtxt(ctxt); 13917 return; 13918 } 13919 13920 xmlClearParserCtxt(ctxt); 13921 if (filename != NULL) 13922 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename); 13923 input->base = buffer; 13924 input->cur = buffer; 13925 input->end = &buffer[xmlStrlen(buffer)]; 13926 inputPush(ctxt, input); 13927} 13928 13929/** 13930 * xmlSAXUserParseFile: 13931 * @sax: a SAX handler 13932 * @user_data: The user data returned on SAX callbacks 13933 * @filename: a file name 13934 * 13935 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile. 13936 * 13937 * parse an XML file and call the given SAX handler routines. 13938 * Automatic support for ZLIB/Compress compressed document is provided 13939 * 13940 * Returns 0 in case of success or a error number otherwise 13941 */ 13942int 13943xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data, 13944 const char *filename) { 13945 int ret = 0; 13946 xmlParserCtxtPtr ctxt; 13947 13948 ctxt = xmlCreateFileParserCtxt(filename); 13949 if (ctxt == NULL) return -1; 13950 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 13951 xmlFree(ctxt->sax); 13952 ctxt->sax = sax; 13953 xmlDetectSAX2(ctxt); 13954 13955 if (user_data != NULL) 13956 ctxt->userData = user_data; 13957 13958 xmlParseDocument(ctxt); 13959 13960 if (ctxt->wellFormed) 13961 ret = 0; 13962 else { 13963 if (ctxt->errNo != 0) 13964 ret = ctxt->errNo; 13965 else 13966 ret = -1; 13967 } 13968 if (sax != NULL) 13969 ctxt->sax = NULL; 13970 if (ctxt->myDoc != NULL) { 13971 xmlFreeDoc(ctxt->myDoc); 13972 ctxt->myDoc = NULL; 13973 } 13974 xmlFreeParserCtxt(ctxt); 13975 13976 return ret; 13977} 13978#endif /* LIBXML_SAX1_ENABLED */ 13979 13980/************************************************************************ 13981 * * 13982 * Front ends when parsing from memory * 13983 * * 13984 ************************************************************************/ 13985 13986/** 13987 * xmlCreateMemoryParserCtxt: 13988 * @buffer: a pointer to a char array 13989 * @size: the size of the array 13990 * 13991 * Create a parser context for an XML in-memory document. 13992 * 13993 * Returns the new parser context or NULL 13994 */ 13995xmlParserCtxtPtr 13996xmlCreateMemoryParserCtxt(const char *buffer, int size) { 13997 xmlParserCtxtPtr ctxt; 13998 xmlParserInputPtr input; 13999 xmlParserInputBufferPtr buf; 14000 14001 if (buffer == NULL) 14002 return(NULL); 14003 if (size <= 0) 14004 return(NULL); 14005 14006 ctxt = xmlNewParserCtxt(); 14007 if (ctxt == NULL) 14008 return(NULL); 14009 14010 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 14011 if (buf == NULL) { 14012 xmlFreeParserCtxt(ctxt); 14013 return(NULL); 14014 } 14015 14016 input = xmlNewInputStream(ctxt); 14017 if (input == NULL) { 14018 xmlFreeParserInputBuffer(buf); 14019 xmlFreeParserCtxt(ctxt); 14020 return(NULL); 14021 } 14022 14023 input->filename = NULL; 14024 input->buf = buf; 14025 xmlBufResetInput(input->buf->buffer, input); 14026 14027 inputPush(ctxt, input); 14028 return(ctxt); 14029} 14030 14031#ifdef LIBXML_SAX1_ENABLED 14032/** 14033 * xmlSAXParseMemoryWithData: 14034 * @sax: the SAX handler block 14035 * @buffer: an pointer to a char array 14036 * @size: the size of the array 14037 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14038 * documents 14039 * @data: the userdata 14040 * 14041 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory. 14042 * 14043 * parse an XML in-memory block and use the given SAX function block 14044 * to handle the parsing callback. If sax is NULL, fallback to the default 14045 * DOM tree building routines. 14046 * 14047 * User data (void *) is stored within the parser context in the 14048 * context's _private member, so it is available nearly everywhere in libxml 14049 * 14050 * Returns the resulting document tree 14051 */ 14052 14053xmlDocPtr 14054xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer, 14055 int size, int recovery, void *data) { 14056 xmlDocPtr ret; 14057 xmlParserCtxtPtr ctxt; 14058 14059 xmlInitParser(); 14060 14061 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 14062 if (ctxt == NULL) return(NULL); 14063 if (sax != NULL) { 14064 if (ctxt->sax != NULL) 14065 xmlFree(ctxt->sax); 14066 ctxt->sax = sax; 14067 } 14068 xmlDetectSAX2(ctxt); 14069 if (data!=NULL) { 14070 ctxt->_private=data; 14071 } 14072 14073 ctxt->recovery = recovery; 14074 14075 xmlParseDocument(ctxt); 14076 14077 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 14078 else { 14079 ret = NULL; 14080 xmlFreeDoc(ctxt->myDoc); 14081 ctxt->myDoc = NULL; 14082 } 14083 if (sax != NULL) 14084 ctxt->sax = NULL; 14085 xmlFreeParserCtxt(ctxt); 14086 14087 return(ret); 14088} 14089 14090/** 14091 * xmlSAXParseMemory: 14092 * @sax: the SAX handler block 14093 * @buffer: an pointer to a char array 14094 * @size: the size of the array 14095 * @recovery: work in recovery mode, i.e. tries to read not Well Formed 14096 * documents 14097 * 14098 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory. 14099 * 14100 * parse an XML in-memory block and use the given SAX function block 14101 * to handle the parsing callback. If sax is NULL, fallback to the default 14102 * DOM tree building routines. 14103 * 14104 * Returns the resulting document tree 14105 */ 14106xmlDocPtr 14107xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer, 14108 int size, int recovery) { 14109 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL); 14110} 14111 14112/** 14113 * xmlParseMemory: 14114 * @buffer: an pointer to a char array 14115 * @size: the size of the array 14116 * 14117 * DEPRECATED: Use xmlReadMemory. 14118 * 14119 * parse an XML in-memory block and build a tree. 14120 * 14121 * Returns the resulting document tree 14122 */ 14123 14124xmlDocPtr xmlParseMemory(const char *buffer, int size) { 14125 return(xmlSAXParseMemory(NULL, buffer, size, 0)); 14126} 14127 14128/** 14129 * xmlRecoverMemory: 14130 * @buffer: an pointer to a char array 14131 * @size: the size of the array 14132 * 14133 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER. 14134 * 14135 * parse an XML in-memory block and build a tree. 14136 * In the case the document is not Well Formed, an attempt to 14137 * build a tree is tried anyway 14138 * 14139 * Returns the resulting document tree or NULL in case of error 14140 */ 14141 14142xmlDocPtr xmlRecoverMemory(const char *buffer, int size) { 14143 return(xmlSAXParseMemory(NULL, buffer, size, 1)); 14144} 14145 14146/** 14147 * xmlSAXUserParseMemory: 14148 * @sax: a SAX handler 14149 * @user_data: The user data returned on SAX callbacks 14150 * @buffer: an in-memory XML document input 14151 * @size: the length of the XML document in bytes 14152 * 14153 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory. 14154 * 14155 * parse an XML in-memory buffer and call the given SAX handler routines. 14156 * 14157 * Returns 0 in case of success or a error number otherwise 14158 */ 14159int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data, 14160 const char *buffer, int size) { 14161 int ret = 0; 14162 xmlParserCtxtPtr ctxt; 14163 14164 xmlInitParser(); 14165 14166 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 14167 if (ctxt == NULL) return -1; 14168 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 14169 xmlFree(ctxt->sax); 14170 ctxt->sax = sax; 14171 xmlDetectSAX2(ctxt); 14172 14173 if (user_data != NULL) 14174 ctxt->userData = user_data; 14175 14176 xmlParseDocument(ctxt); 14177 14178 if (ctxt->wellFormed) 14179 ret = 0; 14180 else { 14181 if (ctxt->errNo != 0) 14182 ret = ctxt->errNo; 14183 else 14184 ret = -1; 14185 } 14186 if (sax != NULL) 14187 ctxt->sax = NULL; 14188 if (ctxt->myDoc != NULL) { 14189 xmlFreeDoc(ctxt->myDoc); 14190 ctxt->myDoc = NULL; 14191 } 14192 xmlFreeParserCtxt(ctxt); 14193 14194 return ret; 14195} 14196#endif /* LIBXML_SAX1_ENABLED */ 14197 14198/** 14199 * xmlCreateDocParserCtxt: 14200 * @str: a pointer to an array of xmlChar 14201 * 14202 * Creates a parser context for an XML in-memory document. 14203 * 14204 * Returns the new parser context or NULL 14205 */ 14206xmlParserCtxtPtr 14207xmlCreateDocParserCtxt(const xmlChar *str) { 14208 xmlParserCtxtPtr ctxt; 14209 xmlParserInputPtr input; 14210 xmlParserInputBufferPtr buf; 14211 14212 if (str == NULL) 14213 return(NULL); 14214 14215 ctxt = xmlNewParserCtxt(); 14216 if (ctxt == NULL) 14217 return(NULL); 14218 14219 buf = xmlParserInputBufferCreateString(str); 14220 if (buf == NULL) { 14221 xmlFreeParserCtxt(ctxt); 14222 return(NULL); 14223 } 14224 14225 input = xmlNewInputStream(ctxt); 14226 if (input == NULL) { 14227 xmlFreeParserInputBuffer(buf); 14228 xmlFreeParserCtxt(ctxt); 14229 return(NULL); 14230 } 14231 14232 input->filename = NULL; 14233 input->buf = buf; 14234 xmlBufResetInput(input->buf->buffer, input); 14235 14236 inputPush(ctxt, input); 14237 return(ctxt); 14238} 14239 14240#ifdef LIBXML_SAX1_ENABLED 14241/** 14242 * xmlSAXParseDoc: 14243 * @sax: the SAX handler block 14244 * @cur: a pointer to an array of xmlChar 14245 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14246 * documents 14247 * 14248 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc. 14249 * 14250 * parse an XML in-memory document and build a tree. 14251 * It use the given SAX function block to handle the parsing callback. 14252 * If sax is NULL, fallback to the default DOM tree building routines. 14253 * 14254 * Returns the resulting document tree 14255 */ 14256 14257xmlDocPtr 14258xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) { 14259 xmlDocPtr ret; 14260 xmlParserCtxtPtr ctxt; 14261 xmlSAXHandlerPtr oldsax = NULL; 14262 14263 if (cur == NULL) return(NULL); 14264 14265 14266 ctxt = xmlCreateDocParserCtxt(cur); 14267 if (ctxt == NULL) return(NULL); 14268 if (sax != NULL) { 14269 oldsax = ctxt->sax; 14270 ctxt->sax = sax; 14271 ctxt->userData = NULL; 14272 } 14273 xmlDetectSAX2(ctxt); 14274 14275 xmlParseDocument(ctxt); 14276 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 14277 else { 14278 ret = NULL; 14279 xmlFreeDoc(ctxt->myDoc); 14280 ctxt->myDoc = NULL; 14281 } 14282 if (sax != NULL) 14283 ctxt->sax = oldsax; 14284 xmlFreeParserCtxt(ctxt); 14285 14286 return(ret); 14287} 14288 14289/** 14290 * xmlParseDoc: 14291 * @cur: a pointer to an array of xmlChar 14292 * 14293 * DEPRECATED: Use xmlReadDoc. 14294 * 14295 * parse an XML in-memory document and build a tree. 14296 * 14297 * Returns the resulting document tree 14298 */ 14299 14300xmlDocPtr 14301xmlParseDoc(const xmlChar *cur) { 14302 return(xmlSAXParseDoc(NULL, cur, 0)); 14303} 14304#endif /* LIBXML_SAX1_ENABLED */ 14305 14306#ifdef LIBXML_LEGACY_ENABLED 14307/************************************************************************ 14308 * * 14309 * Specific function to keep track of entities references * 14310 * and used by the XSLT debugger * 14311 * * 14312 ************************************************************************/ 14313 14314static xmlEntityReferenceFunc xmlEntityRefFunc = NULL; 14315 14316/** 14317 * xmlAddEntityReference: 14318 * @ent : A valid entity 14319 * @firstNode : A valid first node for children of entity 14320 * @lastNode : A valid last node of children entity 14321 * 14322 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY 14323 */ 14324static void 14325xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 14326 xmlNodePtr lastNode) 14327{ 14328 if (xmlEntityRefFunc != NULL) { 14329 (*xmlEntityRefFunc) (ent, firstNode, lastNode); 14330 } 14331} 14332 14333 14334/** 14335 * xmlSetEntityReferenceFunc: 14336 * @func: A valid function 14337 * 14338 * Set the function to call call back when a xml reference has been made 14339 */ 14340void 14341xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func) 14342{ 14343 xmlEntityRefFunc = func; 14344} 14345#endif /* LIBXML_LEGACY_ENABLED */ 14346 14347/************************************************************************ 14348 * * 14349 * New set (2.6.0) of simpler and more flexible APIs * 14350 * * 14351 ************************************************************************/ 14352 14353/** 14354 * DICT_FREE: 14355 * @str: a string 14356 * 14357 * Free a string if it is not owned by the "dict" dictionary in the 14358 * current scope 14359 */ 14360#define DICT_FREE(str) \ 14361 if ((str) && ((!dict) || \ 14362 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \ 14363 xmlFree((char *)(str)); 14364 14365/** 14366 * xmlCtxtReset: 14367 * @ctxt: an XML parser context 14368 * 14369 * Reset a parser context 14370 */ 14371void 14372xmlCtxtReset(xmlParserCtxtPtr ctxt) 14373{ 14374 xmlParserInputPtr input; 14375 xmlDictPtr dict; 14376 14377 if (ctxt == NULL) 14378 return; 14379 14380 dict = ctxt->dict; 14381 14382 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 14383 xmlFreeInputStream(input); 14384 } 14385 ctxt->inputNr = 0; 14386 ctxt->input = NULL; 14387 14388 ctxt->spaceNr = 0; 14389 if (ctxt->spaceTab != NULL) { 14390 ctxt->spaceTab[0] = -1; 14391 ctxt->space = &ctxt->spaceTab[0]; 14392 } else { 14393 ctxt->space = NULL; 14394 } 14395 14396 14397 ctxt->nodeNr = 0; 14398 ctxt->node = NULL; 14399 14400 ctxt->nameNr = 0; 14401 ctxt->name = NULL; 14402 14403 ctxt->nsNr = 0; 14404 xmlParserNsReset(ctxt->nsdb); 14405 14406 DICT_FREE(ctxt->version); 14407 ctxt->version = NULL; 14408 DICT_FREE(ctxt->encoding); 14409 ctxt->encoding = NULL; 14410 DICT_FREE(ctxt->directory); 14411 ctxt->directory = NULL; 14412 DICT_FREE(ctxt->extSubURI); 14413 ctxt->extSubURI = NULL; 14414 DICT_FREE(ctxt->extSubSystem); 14415 ctxt->extSubSystem = NULL; 14416 if (ctxt->myDoc != NULL) 14417 xmlFreeDoc(ctxt->myDoc); 14418 ctxt->myDoc = NULL; 14419 14420 ctxt->standalone = -1; 14421 ctxt->hasExternalSubset = 0; 14422 ctxt->hasPErefs = 0; 14423 ctxt->html = 0; 14424 ctxt->external = 0; 14425 ctxt->instate = XML_PARSER_START; 14426 ctxt->token = 0; 14427 14428 ctxt->wellFormed = 1; 14429 ctxt->nsWellFormed = 1; 14430 ctxt->disableSAX = 0; 14431 ctxt->valid = 1; 14432#if 0 14433 ctxt->vctxt.userData = ctxt; 14434 ctxt->vctxt.error = xmlParserValidityError; 14435 ctxt->vctxt.warning = xmlParserValidityWarning; 14436#endif 14437 ctxt->record_info = 0; 14438 ctxt->checkIndex = 0; 14439 ctxt->endCheckState = 0; 14440 ctxt->inSubset = 0; 14441 ctxt->errNo = XML_ERR_OK; 14442 ctxt->depth = 0; 14443 ctxt->catalogs = NULL; 14444 ctxt->sizeentities = 0; 14445 ctxt->sizeentcopy = 0; 14446 xmlInitNodeInfoSeq(&ctxt->node_seq); 14447 14448 if (ctxt->attsDefault != NULL) { 14449 xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator); 14450 ctxt->attsDefault = NULL; 14451 } 14452 if (ctxt->attsSpecial != NULL) { 14453 xmlHashFree(ctxt->attsSpecial, NULL); 14454 ctxt->attsSpecial = NULL; 14455 } 14456 14457#ifdef LIBXML_CATALOG_ENABLED 14458 if (ctxt->catalogs != NULL) 14459 xmlCatalogFreeLocal(ctxt->catalogs); 14460#endif 14461 ctxt->nbErrors = 0; 14462 ctxt->nbWarnings = 0; 14463 if (ctxt->lastError.code != XML_ERR_OK) 14464 xmlResetError(&ctxt->lastError); 14465} 14466 14467/** 14468 * xmlCtxtResetPush: 14469 * @ctxt: an XML parser context 14470 * @chunk: a pointer to an array of chars 14471 * @size: number of chars in the array 14472 * @filename: an optional file name or URI 14473 * @encoding: the document encoding, or NULL 14474 * 14475 * Reset a push parser context 14476 * 14477 * Returns 0 in case of success and 1 in case of error 14478 */ 14479int 14480xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk, 14481 int size, const char *filename, const char *encoding) 14482{ 14483 xmlParserInputPtr inputStream; 14484 xmlParserInputBufferPtr buf; 14485 14486 if (ctxt == NULL) 14487 return(1); 14488 14489 buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE); 14490 if (buf == NULL) 14491 return(1); 14492 14493 if (ctxt == NULL) { 14494 xmlFreeParserInputBuffer(buf); 14495 return(1); 14496 } 14497 14498 xmlCtxtReset(ctxt); 14499 14500 if (filename == NULL) { 14501 ctxt->directory = NULL; 14502 } else { 14503 ctxt->directory = xmlParserGetDirectory(filename); 14504 } 14505 14506 inputStream = xmlNewInputStream(ctxt); 14507 if (inputStream == NULL) { 14508 xmlFreeParserInputBuffer(buf); 14509 return(1); 14510 } 14511 14512 if (filename == NULL) 14513 inputStream->filename = NULL; 14514 else 14515 inputStream->filename = (char *) 14516 xmlCanonicPath((const xmlChar *) filename); 14517 inputStream->buf = buf; 14518 xmlBufResetInput(buf->buffer, inputStream); 14519 14520 inputPush(ctxt, inputStream); 14521 14522 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 14523 (ctxt->input->buf != NULL)) { 14524 size_t pos = ctxt->input->cur - ctxt->input->base; 14525 int res; 14526 14527 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 14528 xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos); 14529 if (res < 0) { 14530 xmlFatalErr(ctxt, ctxt->input->buf->error, NULL); 14531 xmlHaltParser(ctxt); 14532 return(1); 14533 } 14534 } 14535 14536 if (encoding != NULL) { 14537 xmlCharEncodingHandlerPtr hdlr; 14538 14539 hdlr = xmlFindCharEncodingHandler(encoding); 14540 if (hdlr != NULL) { 14541 xmlSwitchToEncoding(ctxt, hdlr); 14542 } else { 14543 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 14544 "Unsupported encoding %s\n", BAD_CAST encoding); 14545 } 14546 } 14547 14548 return(0); 14549} 14550 14551 14552/** 14553 * xmlCtxtUseOptionsInternal: 14554 * @ctxt: an XML parser context 14555 * @options: a combination of xmlParserOption 14556 * @encoding: the user provided encoding to use 14557 * 14558 * Applies the options to the parser context 14559 * 14560 * Returns 0 in case of success, the set of unknown or unimplemented options 14561 * in case of error. 14562 */ 14563static int 14564xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options) 14565{ 14566 if (ctxt == NULL) 14567 return(-1); 14568 if (options & XML_PARSE_RECOVER) { 14569 ctxt->recovery = 1; 14570 options -= XML_PARSE_RECOVER; 14571 ctxt->options |= XML_PARSE_RECOVER; 14572 } else 14573 ctxt->recovery = 0; 14574 if (options & XML_PARSE_DTDLOAD) { 14575 ctxt->loadsubset = XML_DETECT_IDS; 14576 options -= XML_PARSE_DTDLOAD; 14577 ctxt->options |= XML_PARSE_DTDLOAD; 14578 } else 14579 ctxt->loadsubset = 0; 14580 if (options & XML_PARSE_DTDATTR) { 14581 ctxt->loadsubset |= XML_COMPLETE_ATTRS; 14582 options -= XML_PARSE_DTDATTR; 14583 ctxt->options |= XML_PARSE_DTDATTR; 14584 } 14585 if (options & XML_PARSE_NOENT) { 14586 ctxt->replaceEntities = 1; 14587 /* ctxt->loadsubset |= XML_DETECT_IDS; */ 14588 options -= XML_PARSE_NOENT; 14589 ctxt->options |= XML_PARSE_NOENT; 14590 } else 14591 ctxt->replaceEntities = 0; 14592 if (options & XML_PARSE_PEDANTIC) { 14593 ctxt->pedantic = 1; 14594 options -= XML_PARSE_PEDANTIC; 14595 ctxt->options |= XML_PARSE_PEDANTIC; 14596 } else 14597 ctxt->pedantic = 0; 14598 if (options & XML_PARSE_NOBLANKS) { 14599 ctxt->keepBlanks = 0; 14600 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; 14601 options -= XML_PARSE_NOBLANKS; 14602 ctxt->options |= XML_PARSE_NOBLANKS; 14603 } else 14604 ctxt->keepBlanks = 1; 14605 if (options & XML_PARSE_DTDVALID) { 14606 ctxt->validate = 1; 14607 if (options & XML_PARSE_NOWARNING) 14608 ctxt->vctxt.warning = NULL; 14609 if (options & XML_PARSE_NOERROR) 14610 ctxt->vctxt.error = NULL; 14611 options -= XML_PARSE_DTDVALID; 14612 ctxt->options |= XML_PARSE_DTDVALID; 14613 } else 14614 ctxt->validate = 0; 14615 if (options & XML_PARSE_NOWARNING) { 14616 ctxt->sax->warning = NULL; 14617 options -= XML_PARSE_NOWARNING; 14618 } 14619 if (options & XML_PARSE_NOERROR) { 14620 ctxt->sax->error = NULL; 14621 ctxt->sax->fatalError = NULL; 14622 options -= XML_PARSE_NOERROR; 14623 } 14624#ifdef LIBXML_SAX1_ENABLED 14625 if (options & XML_PARSE_SAX1) { 14626 ctxt->sax->startElementNs = NULL; 14627 ctxt->sax->endElementNs = NULL; 14628 ctxt->sax->initialized = 1; 14629 options -= XML_PARSE_SAX1; 14630 ctxt->options |= XML_PARSE_SAX1; 14631 } 14632#endif /* LIBXML_SAX1_ENABLED */ 14633 if (options & XML_PARSE_NODICT) { 14634 ctxt->dictNames = 0; 14635 options -= XML_PARSE_NODICT; 14636 ctxt->options |= XML_PARSE_NODICT; 14637 } else { 14638 ctxt->dictNames = 1; 14639 } 14640 if (options & XML_PARSE_NOCDATA) { 14641 ctxt->sax->cdataBlock = NULL; 14642 options -= XML_PARSE_NOCDATA; 14643 ctxt->options |= XML_PARSE_NOCDATA; 14644 } 14645 if (options & XML_PARSE_NSCLEAN) { 14646 ctxt->options |= XML_PARSE_NSCLEAN; 14647 options -= XML_PARSE_NSCLEAN; 14648 } 14649 if (options & XML_PARSE_NONET) { 14650 ctxt->options |= XML_PARSE_NONET; 14651 options -= XML_PARSE_NONET; 14652 } 14653 if (options & XML_PARSE_COMPACT) { 14654 ctxt->options |= XML_PARSE_COMPACT; 14655 options -= XML_PARSE_COMPACT; 14656 } 14657 if (options & XML_PARSE_OLD10) { 14658 ctxt->options |= XML_PARSE_OLD10; 14659 options -= XML_PARSE_OLD10; 14660 } 14661 if (options & XML_PARSE_NOBASEFIX) { 14662 ctxt->options |= XML_PARSE_NOBASEFIX; 14663 options -= XML_PARSE_NOBASEFIX; 14664 } 14665 if (options & XML_PARSE_HUGE) { 14666 ctxt->options |= XML_PARSE_HUGE; 14667 options -= XML_PARSE_HUGE; 14668 if (ctxt->dict != NULL) 14669 xmlDictSetLimit(ctxt->dict, 0); 14670 } 14671 if (options & XML_PARSE_OLDSAX) { 14672 ctxt->options |= XML_PARSE_OLDSAX; 14673 options -= XML_PARSE_OLDSAX; 14674 } 14675 if (options & XML_PARSE_IGNORE_ENC) { 14676 ctxt->options |= XML_PARSE_IGNORE_ENC; 14677 options -= XML_PARSE_IGNORE_ENC; 14678 } 14679 if (options & XML_PARSE_BIG_LINES) { 14680 ctxt->options |= XML_PARSE_BIG_LINES; 14681 options -= XML_PARSE_BIG_LINES; 14682 } 14683 ctxt->linenumbers = 1; 14684 return (options); 14685} 14686 14687/** 14688 * xmlCtxtUseOptions: 14689 * @ctxt: an XML parser context 14690 * @options: a combination of xmlParserOption 14691 * 14692 * Applies the options to the parser context 14693 * 14694 * Returns 0 in case of success, the set of unknown or unimplemented options 14695 * in case of error. 14696 */ 14697int 14698xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options) 14699{ 14700 return(xmlCtxtUseOptionsInternal(ctxt, options)); 14701} 14702 14703/** 14704 * xmlCtxtSetMaxAmplification: 14705 * @ctxt: an XML parser context 14706 * @maxAmpl: maximum amplification factor 14707 * 14708 * To protect against exponential entity expansion ("billion laughs"), the 14709 * size of serialized output is (roughly) limited to the input size 14710 * multiplied by this factor. The default value is 5. 14711 * 14712 * When working with documents making heavy use of entity expansion, it can 14713 * be necessary to increase the value. For security reasons, this should only 14714 * be considered when processing trusted input. 14715 */ 14716void 14717xmlCtxtSetMaxAmplification(xmlParserCtxtPtr ctxt, unsigned maxAmpl) 14718{ 14719 ctxt->maxAmpl = maxAmpl; 14720} 14721 14722/** 14723 * xmlDoRead: 14724 * @ctxt: an XML parser context 14725 * @URL: the base URL to use for the document 14726 * @encoding: the document encoding, or NULL 14727 * @options: a combination of xmlParserOption 14728 * @reuse: keep the context for reuse 14729 * 14730 * Common front-end for the xmlRead functions 14731 * 14732 * Returns the resulting document tree or NULL 14733 */ 14734static xmlDocPtr 14735xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding, 14736 int options, int reuse) 14737{ 14738 xmlDocPtr ret; 14739 14740 xmlCtxtUseOptionsInternal(ctxt, options); 14741 if (encoding != NULL) { 14742 xmlCharEncodingHandlerPtr hdlr; 14743 14744 /* 14745 * TODO: We should consider to set XML_PARSE_IGNORE_ENC if the 14746 * caller provided an encoding. Otherwise, we might switch to 14747 * the encoding from the XML declaration which is likely to 14748 * break things. Also see xmlSwitchInputEncoding. 14749 */ 14750 hdlr = xmlFindCharEncodingHandler(encoding); 14751 if (hdlr != NULL) 14752 xmlSwitchToEncoding(ctxt, hdlr); 14753 } 14754 if ((URL != NULL) && (ctxt->input != NULL) && 14755 (ctxt->input->filename == NULL)) 14756 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL); 14757 xmlParseDocument(ctxt); 14758 if ((ctxt->wellFormed) || ctxt->recovery) 14759 ret = ctxt->myDoc; 14760 else { 14761 ret = NULL; 14762 if (ctxt->myDoc != NULL) { 14763 xmlFreeDoc(ctxt->myDoc); 14764 } 14765 } 14766 ctxt->myDoc = NULL; 14767 if (!reuse) { 14768 xmlFreeParserCtxt(ctxt); 14769 } 14770 14771 return (ret); 14772} 14773 14774/** 14775 * xmlReadDoc: 14776 * @cur: a pointer to a zero terminated string 14777 * @URL: the base URL to use for the document 14778 * @encoding: the document encoding, or NULL 14779 * @options: a combination of xmlParserOption 14780 * 14781 * parse an XML in-memory document and build a tree. 14782 * 14783 * Returns the resulting document tree 14784 */ 14785xmlDocPtr 14786xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options) 14787{ 14788 xmlParserCtxtPtr ctxt; 14789 14790 if (cur == NULL) 14791 return (NULL); 14792 xmlInitParser(); 14793 14794 ctxt = xmlCreateDocParserCtxt(cur); 14795 if (ctxt == NULL) 14796 return (NULL); 14797 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 14798} 14799 14800/** 14801 * xmlReadFile: 14802 * @filename: a file or URL 14803 * @encoding: the document encoding, or NULL 14804 * @options: a combination of xmlParserOption 14805 * 14806 * parse an XML file from the filesystem or the network. 14807 * 14808 * Returns the resulting document tree 14809 */ 14810xmlDocPtr 14811xmlReadFile(const char *filename, const char *encoding, int options) 14812{ 14813 xmlParserCtxtPtr ctxt; 14814 14815 xmlInitParser(); 14816 ctxt = xmlCreateURLParserCtxt(filename, options); 14817 if (ctxt == NULL) 14818 return (NULL); 14819 return (xmlDoRead(ctxt, NULL, encoding, options, 0)); 14820} 14821 14822/** 14823 * xmlReadMemory: 14824 * @buffer: a pointer to a char array 14825 * @size: the size of the array 14826 * @URL: the base URL to use for the document 14827 * @encoding: the document encoding, or NULL 14828 * @options: a combination of xmlParserOption 14829 * 14830 * parse an XML in-memory document and build a tree. 14831 * 14832 * Returns the resulting document tree 14833 */ 14834xmlDocPtr 14835xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options) 14836{ 14837 xmlParserCtxtPtr ctxt; 14838 14839 xmlInitParser(); 14840 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 14841 if (ctxt == NULL) 14842 return (NULL); 14843 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 14844} 14845 14846/** 14847 * xmlReadFd: 14848 * @fd: an open file descriptor 14849 * @URL: the base URL to use for the document 14850 * @encoding: the document encoding, or NULL 14851 * @options: a combination of xmlParserOption 14852 * 14853 * parse an XML from a file descriptor and build a tree. 14854 * NOTE that the file descriptor will not be closed when the 14855 * reader is closed or reset. 14856 * 14857 * Returns the resulting document tree 14858 */ 14859xmlDocPtr 14860xmlReadFd(int fd, const char *URL, const char *encoding, int options) 14861{ 14862 xmlParserCtxtPtr ctxt; 14863 xmlParserInputBufferPtr input; 14864 xmlParserInputPtr stream; 14865 14866 if (fd < 0) 14867 return (NULL); 14868 xmlInitParser(); 14869 14870 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 14871 if (input == NULL) 14872 return (NULL); 14873 input->closecallback = NULL; 14874 ctxt = xmlNewParserCtxt(); 14875 if (ctxt == NULL) { 14876 xmlFreeParserInputBuffer(input); 14877 return (NULL); 14878 } 14879 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 14880 if (stream == NULL) { 14881 xmlFreeParserInputBuffer(input); 14882 xmlFreeParserCtxt(ctxt); 14883 return (NULL); 14884 } 14885 inputPush(ctxt, stream); 14886 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 14887} 14888 14889/** 14890 * xmlReadIO: 14891 * @ioread: an I/O read function 14892 * @ioclose: an I/O close function 14893 * @ioctx: an I/O handler 14894 * @URL: the base URL to use for the document 14895 * @encoding: the document encoding, or NULL 14896 * @options: a combination of xmlParserOption 14897 * 14898 * parse an XML document from I/O functions and source and build a tree. 14899 * 14900 * Returns the resulting document tree 14901 */ 14902xmlDocPtr 14903xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 14904 void *ioctx, const char *URL, const char *encoding, int options) 14905{ 14906 xmlParserCtxtPtr ctxt; 14907 xmlParserInputBufferPtr input; 14908 xmlParserInputPtr stream; 14909 14910 if (ioread == NULL) 14911 return (NULL); 14912 xmlInitParser(); 14913 14914 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 14915 XML_CHAR_ENCODING_NONE); 14916 if (input == NULL) { 14917 if (ioclose != NULL) 14918 ioclose(ioctx); 14919 return (NULL); 14920 } 14921 ctxt = xmlNewParserCtxt(); 14922 if (ctxt == NULL) { 14923 xmlFreeParserInputBuffer(input); 14924 return (NULL); 14925 } 14926 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 14927 if (stream == NULL) { 14928 xmlFreeParserInputBuffer(input); 14929 xmlFreeParserCtxt(ctxt); 14930 return (NULL); 14931 } 14932 inputPush(ctxt, stream); 14933 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 14934} 14935 14936/** 14937 * xmlCtxtReadDoc: 14938 * @ctxt: an XML parser context 14939 * @str: a pointer to a zero terminated string 14940 * @URL: the base URL to use for the document 14941 * @encoding: the document encoding, or NULL 14942 * @options: a combination of xmlParserOption 14943 * 14944 * parse an XML in-memory document and build a tree. 14945 * This reuses the existing @ctxt parser context 14946 * 14947 * Returns the resulting document tree 14948 */ 14949xmlDocPtr 14950xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar *str, 14951 const char *URL, const char *encoding, int options) 14952{ 14953 xmlParserInputBufferPtr input; 14954 xmlParserInputPtr stream; 14955 14956 if (ctxt == NULL) 14957 return (NULL); 14958 if (str == NULL) 14959 return (NULL); 14960 xmlInitParser(); 14961 14962 xmlCtxtReset(ctxt); 14963 14964 input = xmlParserInputBufferCreateString(str); 14965 if (input == NULL) { 14966 return(NULL); 14967 } 14968 14969 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 14970 if (stream == NULL) { 14971 xmlFreeParserInputBuffer(input); 14972 return(NULL); 14973 } 14974 14975 inputPush(ctxt, stream); 14976 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 14977} 14978 14979/** 14980 * xmlCtxtReadFile: 14981 * @ctxt: an XML parser context 14982 * @filename: a file or URL 14983 * @encoding: the document encoding, or NULL 14984 * @options: a combination of xmlParserOption 14985 * 14986 * parse an XML file from the filesystem or the network. 14987 * This reuses the existing @ctxt parser context 14988 * 14989 * Returns the resulting document tree 14990 */ 14991xmlDocPtr 14992xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename, 14993 const char *encoding, int options) 14994{ 14995 xmlParserInputPtr stream; 14996 14997 if (filename == NULL) 14998 return (NULL); 14999 if (ctxt == NULL) 15000 return (NULL); 15001 xmlInitParser(); 15002 15003 xmlCtxtReset(ctxt); 15004 15005 stream = xmlLoadExternalEntity(filename, NULL, ctxt); 15006 if (stream == NULL) { 15007 return (NULL); 15008 } 15009 inputPush(ctxt, stream); 15010 return (xmlDoRead(ctxt, NULL, encoding, options, 1)); 15011} 15012 15013/** 15014 * xmlCtxtReadMemory: 15015 * @ctxt: an XML parser context 15016 * @buffer: a pointer to a char array 15017 * @size: the size of the array 15018 * @URL: the base URL to use for the document 15019 * @encoding: the document encoding, or NULL 15020 * @options: a combination of xmlParserOption 15021 * 15022 * parse an XML in-memory document and build a tree. 15023 * This reuses the existing @ctxt parser context 15024 * 15025 * Returns the resulting document tree 15026 */ 15027xmlDocPtr 15028xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size, 15029 const char *URL, const char *encoding, int options) 15030{ 15031 xmlParserInputBufferPtr input; 15032 xmlParserInputPtr stream; 15033 15034 if (ctxt == NULL) 15035 return (NULL); 15036 if (buffer == NULL) 15037 return (NULL); 15038 xmlInitParser(); 15039 15040 xmlCtxtReset(ctxt); 15041 15042 input = xmlParserInputBufferCreateStatic(buffer, size, 15043 XML_CHAR_ENCODING_NONE); 15044 if (input == NULL) { 15045 return(NULL); 15046 } 15047 15048 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15049 if (stream == NULL) { 15050 xmlFreeParserInputBuffer(input); 15051 return(NULL); 15052 } 15053 15054 inputPush(ctxt, stream); 15055 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15056} 15057 15058/** 15059 * xmlCtxtReadFd: 15060 * @ctxt: an XML parser context 15061 * @fd: an open file descriptor 15062 * @URL: the base URL to use for the document 15063 * @encoding: the document encoding, or NULL 15064 * @options: a combination of xmlParserOption 15065 * 15066 * parse an XML from a file descriptor and build a tree. 15067 * This reuses the existing @ctxt parser context 15068 * NOTE that the file descriptor will not be closed when the 15069 * reader is closed or reset. 15070 * 15071 * Returns the resulting document tree 15072 */ 15073xmlDocPtr 15074xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd, 15075 const char *URL, const char *encoding, int options) 15076{ 15077 xmlParserInputBufferPtr input; 15078 xmlParserInputPtr stream; 15079 15080 if (fd < 0) 15081 return (NULL); 15082 if (ctxt == NULL) 15083 return (NULL); 15084 xmlInitParser(); 15085 15086 xmlCtxtReset(ctxt); 15087 15088 15089 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 15090 if (input == NULL) 15091 return (NULL); 15092 input->closecallback = NULL; 15093 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15094 if (stream == NULL) { 15095 xmlFreeParserInputBuffer(input); 15096 return (NULL); 15097 } 15098 inputPush(ctxt, stream); 15099 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15100} 15101 15102/** 15103 * xmlCtxtReadIO: 15104 * @ctxt: an XML parser context 15105 * @ioread: an I/O read function 15106 * @ioclose: an I/O close function 15107 * @ioctx: an I/O handler 15108 * @URL: the base URL to use for the document 15109 * @encoding: the document encoding, or NULL 15110 * @options: a combination of xmlParserOption 15111 * 15112 * parse an XML document from I/O functions and source and build a tree. 15113 * This reuses the existing @ctxt parser context 15114 * 15115 * Returns the resulting document tree 15116 */ 15117xmlDocPtr 15118xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread, 15119 xmlInputCloseCallback ioclose, void *ioctx, 15120 const char *URL, 15121 const char *encoding, int options) 15122{ 15123 xmlParserInputBufferPtr input; 15124 xmlParserInputPtr stream; 15125 15126 if (ioread == NULL) 15127 return (NULL); 15128 if (ctxt == NULL) 15129 return (NULL); 15130 xmlInitParser(); 15131 15132 xmlCtxtReset(ctxt); 15133 15134 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 15135 XML_CHAR_ENCODING_NONE); 15136 if (input == NULL) { 15137 if (ioclose != NULL) 15138 ioclose(ioctx); 15139 return (NULL); 15140 } 15141 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15142 if (stream == NULL) { 15143 xmlFreeParserInputBuffer(input); 15144 return (NULL); 15145 } 15146 inputPush(ctxt, stream); 15147 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15148}