Reactos
at master 1148 lines 31 kB view raw
1/* 2 * entities.c : implementation for the XML entities handling 3 * 4 * See Copyright for the status of this software. 5 * 6 * daniel@veillard.com 7 */ 8 9/* To avoid EBCDIC trouble when parsing on zOS */ 10#if defined(__MVS__) 11#pragma convert("ISO8859-1") 12#endif 13 14#define IN_LIBXML 15#include "libxml.h" 16 17#include <string.h> 18#include <stdlib.h> 19 20#include <libxml/xmlmemory.h> 21#include <libxml/hash.h> 22#include <libxml/entities.h> 23#include <libxml/parser.h> 24#include <libxml/parserInternals.h> 25#include <libxml/xmlerror.h> 26#include <libxml/dict.h> 27 28#include "private/entities.h" 29#include "private/error.h" 30 31/* 32 * The XML predefined entities. 33 */ 34 35static xmlEntity xmlEntityLt = { 36 NULL, XML_ENTITY_DECL, BAD_CAST "lt", 37 NULL, NULL, NULL, NULL, NULL, NULL, 38 BAD_CAST "<", BAD_CAST "<", 1, 39 XML_INTERNAL_PREDEFINED_ENTITY, 40 NULL, NULL, NULL, NULL, 0, 0, 0 41}; 42static xmlEntity xmlEntityGt = { 43 NULL, XML_ENTITY_DECL, BAD_CAST "gt", 44 NULL, NULL, NULL, NULL, NULL, NULL, 45 BAD_CAST ">", BAD_CAST ">", 1, 46 XML_INTERNAL_PREDEFINED_ENTITY, 47 NULL, NULL, NULL, NULL, 0, 0, 0 48}; 49static xmlEntity xmlEntityAmp = { 50 NULL, XML_ENTITY_DECL, BAD_CAST "amp", 51 NULL, NULL, NULL, NULL, NULL, NULL, 52 BAD_CAST "&", BAD_CAST "&", 1, 53 XML_INTERNAL_PREDEFINED_ENTITY, 54 NULL, NULL, NULL, NULL, 0, 0, 0 55}; 56static xmlEntity xmlEntityQuot = { 57 NULL, XML_ENTITY_DECL, BAD_CAST "quot", 58 NULL, NULL, NULL, NULL, NULL, NULL, 59 BAD_CAST "\"", BAD_CAST "\"", 1, 60 XML_INTERNAL_PREDEFINED_ENTITY, 61 NULL, NULL, NULL, NULL, 0, 0, 0 62}; 63static xmlEntity xmlEntityApos = { 64 NULL, XML_ENTITY_DECL, BAD_CAST "apos", 65 NULL, NULL, NULL, NULL, NULL, NULL, 66 BAD_CAST "'", BAD_CAST "'", 1, 67 XML_INTERNAL_PREDEFINED_ENTITY, 68 NULL, NULL, NULL, NULL, 0, 0, 0 69}; 70 71/** 72 * xmlEntitiesErrMemory: 73 * @extra: extra information 74 * 75 * Handle an out of memory condition 76 */ 77static void 78xmlEntitiesErrMemory(const char *extra) 79{ 80 __xmlSimpleError(XML_FROM_TREE, XML_ERR_NO_MEMORY, NULL, NULL, extra); 81} 82 83/** 84 * xmlEntitiesErr: 85 * @code: the error code 86 * @msg: the message 87 * 88 * Raise an error. 89 */ 90static void LIBXML_ATTR_FORMAT(2,0) 91xmlEntitiesErr(xmlParserErrors code, const char *msg) 92{ 93 __xmlSimpleError(XML_FROM_TREE, code, NULL, msg, NULL); 94} 95 96/** 97 * xmlEntitiesWarn: 98 * @code: the error code 99 * @msg: the message 100 * 101 * Raise a warning. 102 */ 103static void LIBXML_ATTR_FORMAT(2,0) 104xmlEntitiesWarn(xmlParserErrors code, const char *msg, const xmlChar *str1) 105{ 106 __xmlRaiseError(NULL, NULL, NULL, 107 NULL, NULL, XML_FROM_TREE, code, 108 XML_ERR_WARNING, NULL, 0, 109 (const char *)str1, NULL, NULL, 0, 0, 110 msg, (const char *)str1, NULL); 111} 112 113/* 114 * xmlFreeEntity : clean-up an entity record. 115 */ 116void 117xmlFreeEntity(xmlEntityPtr entity) 118{ 119 xmlDictPtr dict = NULL; 120 121 if (entity == NULL) 122 return; 123 124 if (entity->doc != NULL) 125 dict = entity->doc->dict; 126 127 128 if ((entity->children) && (entity->owner == 1) && 129 (entity == (xmlEntityPtr) entity->children->parent)) 130 xmlFreeNodeList(entity->children); 131 if ((entity->name != NULL) && 132 ((dict == NULL) || (!xmlDictOwns(dict, entity->name)))) 133 xmlFree((char *) entity->name); 134 if (entity->ExternalID != NULL) 135 xmlFree((char *) entity->ExternalID); 136 if (entity->SystemID != NULL) 137 xmlFree((char *) entity->SystemID); 138 if (entity->URI != NULL) 139 xmlFree((char *) entity->URI); 140 if (entity->content != NULL) 141 xmlFree((char *) entity->content); 142 if (entity->orig != NULL) 143 xmlFree((char *) entity->orig); 144 xmlFree(entity); 145} 146 147/* 148 * xmlCreateEntity: 149 * 150 * internal routine doing the entity node structures allocations 151 */ 152static xmlEntityPtr 153xmlCreateEntity(xmlDictPtr dict, const xmlChar *name, int type, 154 const xmlChar *ExternalID, const xmlChar *SystemID, 155 const xmlChar *content) { 156 xmlEntityPtr ret; 157 158 ret = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity)); 159 if (ret == NULL) { 160 xmlEntitiesErrMemory("xmlCreateEntity: malloc failed"); 161 return(NULL); 162 } 163 memset(ret, 0, sizeof(xmlEntity)); 164 ret->type = XML_ENTITY_DECL; 165 166 /* 167 * fill the structure. 168 */ 169 ret->etype = (xmlEntityType) type; 170 if (dict == NULL) { 171 ret->name = xmlStrdup(name); 172 if (ExternalID != NULL) 173 ret->ExternalID = xmlStrdup(ExternalID); 174 if (SystemID != NULL) 175 ret->SystemID = xmlStrdup(SystemID); 176 } else { 177 ret->name = xmlDictLookup(dict, name, -1); 178 ret->ExternalID = xmlStrdup(ExternalID); 179 ret->SystemID = xmlStrdup(SystemID); 180 } 181 if (content != NULL) { 182 ret->length = xmlStrlen(content); 183 ret->content = xmlStrndup(content, ret->length); 184 } else { 185 ret->length = 0; 186 ret->content = NULL; 187 } 188 ret->URI = NULL; /* to be computed by the layer knowing 189 the defining entity */ 190 ret->orig = NULL; 191 ret->owner = 0; 192 193 return(ret); 194} 195 196/* 197 * xmlAddEntity : register a new entity for an entities table. 198 */ 199static xmlEntityPtr 200xmlAddEntity(xmlDtdPtr dtd, const xmlChar *name, int type, 201 const xmlChar *ExternalID, const xmlChar *SystemID, 202 const xmlChar *content) { 203 xmlDictPtr dict = NULL; 204 xmlEntitiesTablePtr table = NULL; 205 xmlEntityPtr ret, predef; 206 207 if (name == NULL) 208 return(NULL); 209 if (dtd == NULL) 210 return(NULL); 211 if (dtd->doc != NULL) 212 dict = dtd->doc->dict; 213 214 switch (type) { 215 case XML_INTERNAL_GENERAL_ENTITY: 216 case XML_EXTERNAL_GENERAL_PARSED_ENTITY: 217 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY: 218 predef = xmlGetPredefinedEntity(name); 219 if (predef != NULL) { 220 int valid = 0; 221 222 /* 4.6 Predefined Entities */ 223 if ((type == XML_INTERNAL_GENERAL_ENTITY) && 224 (content != NULL)) { 225 int c = predef->content[0]; 226 227 if (((content[0] == c) && (content[1] == 0)) && 228 ((c == '>') || (c == '\'') || (c == '"'))) { 229 valid = 1; 230 } else if ((content[0] == '&') && (content[1] == '#')) { 231 if (content[2] == 'x') { 232 xmlChar *hex = BAD_CAST "0123456789ABCDEF"; 233 xmlChar ref[] = "00;"; 234 235 ref[0] = hex[c / 16 % 16]; 236 ref[1] = hex[c % 16]; 237 if (xmlStrcasecmp(&content[3], ref) == 0) 238 valid = 1; 239 } else { 240 xmlChar ref[] = "00;"; 241 242 ref[0] = '0' + c / 10 % 10; 243 ref[1] = '0' + c % 10; 244 if (xmlStrEqual(&content[2], ref)) 245 valid = 1; 246 } 247 } 248 } 249 if (!valid) { 250 xmlEntitiesWarn(XML_ERR_ENTITY_PROCESSING, 251 "xmlAddEntity: invalid redeclaration of predefined" 252 " entity '%s'", name); 253 return(NULL); 254 } 255 } 256 if (dtd->entities == NULL) 257 dtd->entities = xmlHashCreateDict(0, dict); 258 table = dtd->entities; 259 break; 260 case XML_INTERNAL_PARAMETER_ENTITY: 261 case XML_EXTERNAL_PARAMETER_ENTITY: 262 if (dtd->pentities == NULL) 263 dtd->pentities = xmlHashCreateDict(0, dict); 264 table = dtd->pentities; 265 break; 266 case XML_INTERNAL_PREDEFINED_ENTITY: 267 return(NULL); 268 } 269 if (table == NULL) 270 return(NULL); 271 ret = xmlCreateEntity(dict, name, type, ExternalID, SystemID, content); 272 if (ret == NULL) 273 return(NULL); 274 ret->doc = dtd->doc; 275 276 if (xmlHashAddEntry(table, name, ret)) { 277 /* 278 * entity was already defined at another level. 279 */ 280 xmlFreeEntity(ret); 281 return(NULL); 282 } 283 return(ret); 284} 285 286/** 287 * xmlGetPredefinedEntity: 288 * @name: the entity name 289 * 290 * Check whether this name is an predefined entity. 291 * 292 * Returns NULL if not, otherwise the entity 293 */ 294xmlEntityPtr 295xmlGetPredefinedEntity(const xmlChar *name) { 296 if (name == NULL) return(NULL); 297 switch (name[0]) { 298 case 'l': 299 if (xmlStrEqual(name, BAD_CAST "lt")) 300 return(&xmlEntityLt); 301 break; 302 case 'g': 303 if (xmlStrEqual(name, BAD_CAST "gt")) 304 return(&xmlEntityGt); 305 break; 306 case 'a': 307 if (xmlStrEqual(name, BAD_CAST "amp")) 308 return(&xmlEntityAmp); 309 if (xmlStrEqual(name, BAD_CAST "apos")) 310 return(&xmlEntityApos); 311 break; 312 case 'q': 313 if (xmlStrEqual(name, BAD_CAST "quot")) 314 return(&xmlEntityQuot); 315 break; 316 default: 317 break; 318 } 319 return(NULL); 320} 321 322/** 323 * xmlAddDtdEntity: 324 * @doc: the document 325 * @name: the entity name 326 * @type: the entity type XML_xxx_yyy_ENTITY 327 * @ExternalID: the entity external ID if available 328 * @SystemID: the entity system ID if available 329 * @content: the entity content 330 * 331 * Register a new entity for this document DTD external subset. 332 * 333 * Returns a pointer to the entity or NULL in case of error 334 */ 335xmlEntityPtr 336xmlAddDtdEntity(xmlDocPtr doc, const xmlChar *name, int type, 337 const xmlChar *ExternalID, const xmlChar *SystemID, 338 const xmlChar *content) { 339 xmlEntityPtr ret; 340 xmlDtdPtr dtd; 341 342 if (doc == NULL) { 343 xmlEntitiesErr(XML_DTD_NO_DOC, 344 "xmlAddDtdEntity: document is NULL"); 345 return(NULL); 346 } 347 if (doc->extSubset == NULL) { 348 xmlEntitiesErr(XML_DTD_NO_DTD, 349 "xmlAddDtdEntity: document without external subset"); 350 return(NULL); 351 } 352 dtd = doc->extSubset; 353 ret = xmlAddEntity(dtd, name, type, ExternalID, SystemID, content); 354 if (ret == NULL) return(NULL); 355 356 /* 357 * Link it to the DTD 358 */ 359 ret->parent = dtd; 360 ret->doc = dtd->doc; 361 if (dtd->last == NULL) { 362 dtd->children = dtd->last = (xmlNodePtr) ret; 363 } else { 364 dtd->last->next = (xmlNodePtr) ret; 365 ret->prev = dtd->last; 366 dtd->last = (xmlNodePtr) ret; 367 } 368 return(ret); 369} 370 371/** 372 * xmlAddDocEntity: 373 * @doc: the document 374 * @name: the entity name 375 * @type: the entity type XML_xxx_yyy_ENTITY 376 * @ExternalID: the entity external ID if available 377 * @SystemID: the entity system ID if available 378 * @content: the entity content 379 * 380 * Register a new entity for this document. 381 * 382 * Returns a pointer to the entity or NULL in case of error 383 */ 384xmlEntityPtr 385xmlAddDocEntity(xmlDocPtr doc, const xmlChar *name, int type, 386 const xmlChar *ExternalID, const xmlChar *SystemID, 387 const xmlChar *content) { 388 xmlEntityPtr ret; 389 xmlDtdPtr dtd; 390 391 if (doc == NULL) { 392 xmlEntitiesErr(XML_DTD_NO_DOC, 393 "xmlAddDocEntity: document is NULL"); 394 return(NULL); 395 } 396 if (doc->intSubset == NULL) { 397 xmlEntitiesErr(XML_DTD_NO_DTD, 398 "xmlAddDocEntity: document without internal subset"); 399 return(NULL); 400 } 401 dtd = doc->intSubset; 402 ret = xmlAddEntity(dtd, name, type, ExternalID, SystemID, content); 403 if (ret == NULL) return(NULL); 404 405 /* 406 * Link it to the DTD 407 */ 408 ret->parent = dtd; 409 ret->doc = dtd->doc; 410 if (dtd->last == NULL) { 411 dtd->children = dtd->last = (xmlNodePtr) ret; 412 } else { 413 dtd->last->next = (xmlNodePtr) ret; 414 ret->prev = dtd->last; 415 dtd->last = (xmlNodePtr) ret; 416 } 417 return(ret); 418} 419 420/** 421 * xmlNewEntity: 422 * @doc: the document 423 * @name: the entity name 424 * @type: the entity type XML_xxx_yyy_ENTITY 425 * @ExternalID: the entity external ID if available 426 * @SystemID: the entity system ID if available 427 * @content: the entity content 428 * 429 * Create a new entity, this differs from xmlAddDocEntity() that if 430 * the document is NULL or has no internal subset defined, then an 431 * unlinked entity structure will be returned, it is then the responsibility 432 * of the caller to link it to the document later or free it when not needed 433 * anymore. 434 * 435 * Returns a pointer to the entity or NULL in case of error 436 */ 437xmlEntityPtr 438xmlNewEntity(xmlDocPtr doc, const xmlChar *name, int type, 439 const xmlChar *ExternalID, const xmlChar *SystemID, 440 const xmlChar *content) { 441 xmlEntityPtr ret; 442 xmlDictPtr dict; 443 444 if ((doc != NULL) && (doc->intSubset != NULL)) { 445 return(xmlAddDocEntity(doc, name, type, ExternalID, SystemID, content)); 446 } 447 if (doc != NULL) 448 dict = doc->dict; 449 else 450 dict = NULL; 451 ret = xmlCreateEntity(dict, name, type, ExternalID, SystemID, content); 452 if (ret == NULL) 453 return(NULL); 454 ret->doc = doc; 455 return(ret); 456} 457 458/** 459 * xmlGetEntityFromTable: 460 * @table: an entity table 461 * @name: the entity name 462 * @parameter: look for parameter entities 463 * 464 * Do an entity lookup in the table. 465 * returns the corresponding parameter entity, if found. 466 * 467 * Returns A pointer to the entity structure or NULL if not found. 468 */ 469static xmlEntityPtr 470xmlGetEntityFromTable(xmlEntitiesTablePtr table, const xmlChar *name) { 471 return((xmlEntityPtr) xmlHashLookup(table, name)); 472} 473 474/** 475 * xmlGetParameterEntity: 476 * @doc: the document referencing the entity 477 * @name: the entity name 478 * 479 * Do an entity lookup in the internal and external subsets and 480 * returns the corresponding parameter entity, if found. 481 * 482 * Returns A pointer to the entity structure or NULL if not found. 483 */ 484xmlEntityPtr 485xmlGetParameterEntity(xmlDocPtr doc, const xmlChar *name) { 486 xmlEntitiesTablePtr table; 487 xmlEntityPtr ret; 488 489 if (doc == NULL) 490 return(NULL); 491 if ((doc->intSubset != NULL) && (doc->intSubset->pentities != NULL)) { 492 table = (xmlEntitiesTablePtr) doc->intSubset->pentities; 493 ret = xmlGetEntityFromTable(table, name); 494 if (ret != NULL) 495 return(ret); 496 } 497 if ((doc->extSubset != NULL) && (doc->extSubset->pentities != NULL)) { 498 table = (xmlEntitiesTablePtr) doc->extSubset->pentities; 499 return(xmlGetEntityFromTable(table, name)); 500 } 501 return(NULL); 502} 503 504/** 505 * xmlGetDtdEntity: 506 * @doc: the document referencing the entity 507 * @name: the entity name 508 * 509 * Do an entity lookup in the DTD entity hash table and 510 * returns the corresponding entity, if found. 511 * Note: the first argument is the document node, not the DTD node. 512 * 513 * Returns A pointer to the entity structure or NULL if not found. 514 */ 515xmlEntityPtr 516xmlGetDtdEntity(xmlDocPtr doc, const xmlChar *name) { 517 xmlEntitiesTablePtr table; 518 519 if (doc == NULL) 520 return(NULL); 521 if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) { 522 table = (xmlEntitiesTablePtr) doc->extSubset->entities; 523 return(xmlGetEntityFromTable(table, name)); 524 } 525 return(NULL); 526} 527 528/** 529 * xmlGetDocEntity: 530 * @doc: the document referencing the entity 531 * @name: the entity name 532 * 533 * Do an entity lookup in the document entity hash table and 534 * returns the corresponding entity, otherwise a lookup is done 535 * in the predefined entities too. 536 * 537 * Returns A pointer to the entity structure or NULL if not found. 538 */ 539xmlEntityPtr 540xmlGetDocEntity(const xmlDoc *doc, const xmlChar *name) { 541 xmlEntityPtr cur; 542 xmlEntitiesTablePtr table; 543 544 if (doc != NULL) { 545 if ((doc->intSubset != NULL) && (doc->intSubset->entities != NULL)) { 546 table = (xmlEntitiesTablePtr) doc->intSubset->entities; 547 cur = xmlGetEntityFromTable(table, name); 548 if (cur != NULL) 549 return(cur); 550 } 551 if (doc->standalone != 1) { 552 if ((doc->extSubset != NULL) && 553 (doc->extSubset->entities != NULL)) { 554 table = (xmlEntitiesTablePtr) doc->extSubset->entities; 555 cur = xmlGetEntityFromTable(table, name); 556 if (cur != NULL) 557 return(cur); 558 } 559 } 560 } 561 return(xmlGetPredefinedEntity(name)); 562} 563 564/* 565 * Macro used to grow the current buffer. 566 */ 567#define growBufferReentrant() { \ 568 xmlChar *tmp; \ 569 size_t new_size = buffer_size * 2; \ 570 if (new_size < buffer_size) goto mem_error; \ 571 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \ 572 if (tmp == NULL) goto mem_error; \ 573 buffer = tmp; \ 574 buffer_size = new_size; \ 575} 576 577/** 578 * xmlEncodeEntitiesInternal: 579 * @doc: the document containing the string 580 * @input: A string to convert to XML. 581 * @attr: are we handling an attribute value 582 * 583 * Do a global encoding of a string, replacing the predefined entities 584 * and non ASCII values with their entities and CharRef counterparts. 585 * Contrary to xmlEncodeEntities, this routine is reentrant, and result 586 * must be deallocated. 587 * 588 * Returns A newly allocated string with the substitution done. 589 */ 590static xmlChar * 591xmlEncodeEntitiesInternal(xmlDocPtr doc, const xmlChar *input, int attr) { 592 const xmlChar *cur = input; 593 xmlChar *buffer = NULL; 594 xmlChar *out = NULL; 595 size_t buffer_size = 0; 596 int html = 0; 597 598 if (input == NULL) return(NULL); 599 if (doc != NULL) 600 html = (doc->type == XML_HTML_DOCUMENT_NODE); 601 602 /* 603 * allocate an translation buffer. 604 */ 605 buffer_size = 1000; 606 buffer = (xmlChar *) xmlMalloc(buffer_size); 607 if (buffer == NULL) { 608 xmlEntitiesErrMemory("xmlEncodeEntities: malloc failed"); 609 return(NULL); 610 } 611 out = buffer; 612 613 while (*cur != '\0') { 614 size_t indx = out - buffer; 615 if (indx + 100 > buffer_size) { 616 617 growBufferReentrant(); 618 out = &buffer[indx]; 619 } 620 621 /* 622 * By default one have to encode at least '<', '>', '"' and '&' ! 623 */ 624 if (*cur == '<') { 625 const xmlChar *end; 626 627 /* 628 * Special handling of server side include in HTML attributes 629 */ 630 if (html && attr && 631 (cur[1] == '!') && (cur[2] == '-') && (cur[3] == '-') && 632 ((end = xmlStrstr(cur, BAD_CAST "-->")) != NULL)) { 633 while (cur != end) { 634 *out++ = *cur++; 635 indx = out - buffer; 636 if (indx + 100 > buffer_size) { 637 growBufferReentrant(); 638 out = &buffer[indx]; 639 } 640 } 641 *out++ = *cur++; 642 *out++ = *cur++; 643 *out++ = *cur++; 644 continue; 645 } 646 *out++ = '&'; 647 *out++ = 'l'; 648 *out++ = 't'; 649 *out++ = ';'; 650 } else if (*cur == '>') { 651 *out++ = '&'; 652 *out++ = 'g'; 653 *out++ = 't'; 654 *out++ = ';'; 655 } else if (*cur == '&') { 656 /* 657 * Special handling of &{...} construct from HTML 4, see 658 * http://www.w3.org/TR/html401/appendix/notes.html#h-B.7.1 659 */ 660 if (html && attr && (cur[1] == '{') && 661 (strchr((const char *) cur, '}'))) { 662 while (*cur != '}') { 663 *out++ = *cur++; 664 indx = out - buffer; 665 if (indx + 100 > buffer_size) { 666 growBufferReentrant(); 667 out = &buffer[indx]; 668 } 669 } 670 *out++ = *cur++; 671 continue; 672 } 673 *out++ = '&'; 674 *out++ = 'a'; 675 *out++ = 'm'; 676 *out++ = 'p'; 677 *out++ = ';'; 678 } else if (((*cur >= 0x20) && (*cur < 0x80)) || 679 (*cur == '\n') || (*cur == '\t') || ((html) && (*cur == '\r'))) { 680 /* 681 * default case, just copy ! 682 */ 683 *out++ = *cur; 684 } else if (*cur >= 0x80) { 685 if (((doc != NULL) && (doc->encoding != NULL)) || (html)) { 686 /* 687 * Bjørn Reese <br@sseusa.com> provided the patch 688 xmlChar xc; 689 xc = (*cur & 0x3F) << 6; 690 if (cur[1] != 0) { 691 xc += *(++cur) & 0x3F; 692 *out++ = xc; 693 } else 694 */ 695 *out++ = *cur; 696 } else { 697 /* 698 * We assume we have UTF-8 input. 699 * It must match either: 700 * 110xxxxx 10xxxxxx 701 * 1110xxxx 10xxxxxx 10xxxxxx 702 * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 703 * That is: 704 * cur[0] is 11xxxxxx 705 * cur[1] is 10xxxxxx 706 * cur[2] is 10xxxxxx if cur[0] is 111xxxxx 707 * cur[3] is 10xxxxxx if cur[0] is 1111xxxx 708 * cur[0] is not 11111xxx 709 */ 710 char buf[11], *ptr; 711 int val = 0, l = 1; 712 713 if (((cur[0] & 0xC0) != 0xC0) || 714 ((cur[1] & 0xC0) != 0x80) || 715 (((cur[0] & 0xE0) == 0xE0) && ((cur[2] & 0xC0) != 0x80)) || 716 (((cur[0] & 0xF0) == 0xF0) && ((cur[3] & 0xC0) != 0x80)) || 717 (((cur[0] & 0xF8) == 0xF8))) { 718 xmlEntitiesErr(XML_CHECK_NOT_UTF8, 719 "xmlEncodeEntities: input not UTF-8"); 720 snprintf(buf, sizeof(buf), "&#%d;", *cur); 721 buf[sizeof(buf) - 1] = 0; 722 ptr = buf; 723 while (*ptr != 0) *out++ = *ptr++; 724 cur++; 725 continue; 726 } else if (*cur < 0xE0) { 727 val = (cur[0]) & 0x1F; 728 val <<= 6; 729 val |= (cur[1]) & 0x3F; 730 l = 2; 731 } else if (*cur < 0xF0) { 732 val = (cur[0]) & 0x0F; 733 val <<= 6; 734 val |= (cur[1]) & 0x3F; 735 val <<= 6; 736 val |= (cur[2]) & 0x3F; 737 l = 3; 738 } else if (*cur < 0xF8) { 739 val = (cur[0]) & 0x07; 740 val <<= 6; 741 val |= (cur[1]) & 0x3F; 742 val <<= 6; 743 val |= (cur[2]) & 0x3F; 744 val <<= 6; 745 val |= (cur[3]) & 0x3F; 746 l = 4; 747 } 748 if ((l == 1) || (!IS_CHAR(val))) { 749 xmlEntitiesErr(XML_ERR_INVALID_CHAR, 750 "xmlEncodeEntities: char out of range\n"); 751 snprintf(buf, sizeof(buf), "&#%d;", *cur); 752 buf[sizeof(buf) - 1] = 0; 753 ptr = buf; 754 while (*ptr != 0) *out++ = *ptr++; 755 cur++; 756 continue; 757 } 758 /* 759 * We could do multiple things here. Just save as a char ref 760 */ 761 snprintf(buf, sizeof(buf), "&#x%X;", val); 762 buf[sizeof(buf) - 1] = 0; 763 ptr = buf; 764 while (*ptr != 0) *out++ = *ptr++; 765 cur += l; 766 continue; 767 } 768 } else if (IS_BYTE_CHAR(*cur)) { 769 char buf[11], *ptr; 770 771 snprintf(buf, sizeof(buf), "&#%d;", *cur); 772 buf[sizeof(buf) - 1] = 0; 773 ptr = buf; 774 while (*ptr != 0) *out++ = *ptr++; 775 } 776 cur++; 777 } 778 *out = 0; 779 return(buffer); 780 781mem_error: 782 xmlEntitiesErrMemory("xmlEncodeEntities: realloc failed"); 783 xmlFree(buffer); 784 return(NULL); 785} 786 787/** 788 * xmlEncodeAttributeEntities: 789 * @doc: the document containing the string 790 * @input: A string to convert to XML. 791 * 792 * Do a global encoding of a string, replacing the predefined entities 793 * and non ASCII values with their entities and CharRef counterparts for 794 * attribute values. 795 * 796 * Returns A newly allocated string with the substitution done. 797 */ 798xmlChar * 799xmlEncodeAttributeEntities(xmlDocPtr doc, const xmlChar *input) { 800 return xmlEncodeEntitiesInternal(doc, input, 1); 801} 802 803/** 804 * xmlEncodeEntitiesReentrant: 805 * @doc: the document containing the string 806 * @input: A string to convert to XML. 807 * 808 * Do a global encoding of a string, replacing the predefined entities 809 * and non ASCII values with their entities and CharRef counterparts. 810 * Contrary to xmlEncodeEntities, this routine is reentrant, and result 811 * must be deallocated. 812 * 813 * Returns A newly allocated string with the substitution done. 814 */ 815xmlChar * 816xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) { 817 return xmlEncodeEntitiesInternal(doc, input, 0); 818} 819 820/** 821 * xmlEncodeSpecialChars: 822 * @doc: the document containing the string 823 * @input: A string to convert to XML. 824 * 825 * Do a global encoding of a string, replacing the predefined entities 826 * this routine is reentrant, and result must be deallocated. 827 * 828 * Returns A newly allocated string with the substitution done. 829 */ 830xmlChar * 831xmlEncodeSpecialChars(const xmlDoc *doc ATTRIBUTE_UNUSED, const xmlChar *input) { 832 const xmlChar *cur = input; 833 xmlChar *buffer = NULL; 834 xmlChar *out = NULL; 835 size_t buffer_size = 0; 836 if (input == NULL) return(NULL); 837 838 /* 839 * allocate an translation buffer. 840 */ 841 buffer_size = 1000; 842 buffer = (xmlChar *) xmlMalloc(buffer_size); 843 if (buffer == NULL) { 844 xmlEntitiesErrMemory("xmlEncodeSpecialChars: malloc failed"); 845 return(NULL); 846 } 847 out = buffer; 848 849 while (*cur != '\0') { 850 size_t indx = out - buffer; 851 if (indx + 10 > buffer_size) { 852 853 growBufferReentrant(); 854 out = &buffer[indx]; 855 } 856 857 /* 858 * By default one have to encode at least '<', '>', '"' and '&' ! 859 */ 860 if (*cur == '<') { 861 *out++ = '&'; 862 *out++ = 'l'; 863 *out++ = 't'; 864 *out++ = ';'; 865 } else if (*cur == '>') { 866 *out++ = '&'; 867 *out++ = 'g'; 868 *out++ = 't'; 869 *out++ = ';'; 870 } else if (*cur == '&') { 871 *out++ = '&'; 872 *out++ = 'a'; 873 *out++ = 'm'; 874 *out++ = 'p'; 875 *out++ = ';'; 876 } else if (*cur == '"') { 877 *out++ = '&'; 878 *out++ = 'q'; 879 *out++ = 'u'; 880 *out++ = 'o'; 881 *out++ = 't'; 882 *out++ = ';'; 883 } else if (*cur == '\r') { 884 *out++ = '&'; 885 *out++ = '#'; 886 *out++ = '1'; 887 *out++ = '3'; 888 *out++ = ';'; 889 } else { 890 /* 891 * Works because on UTF-8, all extended sequences cannot 892 * result in bytes in the ASCII range. 893 */ 894 *out++ = *cur; 895 } 896 cur++; 897 } 898 *out = 0; 899 return(buffer); 900 901mem_error: 902 xmlEntitiesErrMemory("xmlEncodeSpecialChars: realloc failed"); 903 xmlFree(buffer); 904 return(NULL); 905} 906 907/** 908 * xmlCreateEntitiesTable: 909 * 910 * create and initialize an empty entities hash table. 911 * This really doesn't make sense and should be deprecated 912 * 913 * Returns the xmlEntitiesTablePtr just created or NULL in case of error. 914 */ 915xmlEntitiesTablePtr 916xmlCreateEntitiesTable(void) { 917 return((xmlEntitiesTablePtr) xmlHashCreate(0)); 918} 919 920/** 921 * xmlFreeEntityWrapper: 922 * @entity: An entity 923 * @name: its name 924 * 925 * Deallocate the memory used by an entities in the hash table. 926 */ 927static void 928xmlFreeEntityWrapper(void *entity, const xmlChar *name ATTRIBUTE_UNUSED) { 929 if (entity != NULL) 930 xmlFreeEntity((xmlEntityPtr) entity); 931} 932 933/** 934 * xmlFreeEntitiesTable: 935 * @table: An entity table 936 * 937 * Deallocate the memory used by an entities hash table. 938 */ 939void 940xmlFreeEntitiesTable(xmlEntitiesTablePtr table) { 941 xmlHashFree(table, xmlFreeEntityWrapper); 942} 943 944#ifdef LIBXML_TREE_ENABLED 945/** 946 * xmlCopyEntity: 947 * @ent: An entity 948 * 949 * Build a copy of an entity 950 * 951 * Returns the new xmlEntitiesPtr or NULL in case of error. 952 */ 953static void * 954xmlCopyEntity(void *payload, const xmlChar *name ATTRIBUTE_UNUSED) { 955 xmlEntityPtr ent = (xmlEntityPtr) payload; 956 xmlEntityPtr cur; 957 958 cur = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity)); 959 if (cur == NULL) { 960 xmlEntitiesErrMemory("xmlCopyEntity:: malloc failed"); 961 return(NULL); 962 } 963 memset(cur, 0, sizeof(xmlEntity)); 964 cur->type = XML_ENTITY_DECL; 965 966 cur->etype = ent->etype; 967 if (ent->name != NULL) 968 cur->name = xmlStrdup(ent->name); 969 if (ent->ExternalID != NULL) 970 cur->ExternalID = xmlStrdup(ent->ExternalID); 971 if (ent->SystemID != NULL) 972 cur->SystemID = xmlStrdup(ent->SystemID); 973 if (ent->content != NULL) 974 cur->content = xmlStrdup(ent->content); 975 if (ent->orig != NULL) 976 cur->orig = xmlStrdup(ent->orig); 977 if (ent->URI != NULL) 978 cur->URI = xmlStrdup(ent->URI); 979 return(cur); 980} 981 982/** 983 * xmlCopyEntitiesTable: 984 * @table: An entity table 985 * 986 * Build a copy of an entity table. 987 * 988 * Returns the new xmlEntitiesTablePtr or NULL in case of error. 989 */ 990xmlEntitiesTablePtr 991xmlCopyEntitiesTable(xmlEntitiesTablePtr table) { 992 return(xmlHashCopy(table, xmlCopyEntity)); 993} 994#endif /* LIBXML_TREE_ENABLED */ 995 996#ifdef LIBXML_OUTPUT_ENABLED 997 998/** 999 * xmlDumpEntityContent: 1000 * @buf: An XML buffer. 1001 * @content: The entity content. 1002 * 1003 * This will dump the quoted string value, taking care of the special 1004 * treatment required by % 1005 */ 1006static void 1007xmlDumpEntityContent(xmlBufferPtr buf, const xmlChar *content) { 1008 if (xmlStrchr(content, '%')) { 1009 const xmlChar * base, *cur; 1010 1011 xmlBufferCCat(buf, "\""); 1012 base = cur = content; 1013 while (*cur != 0) { 1014 if (*cur == '"') { 1015 if (base != cur) 1016 xmlBufferAdd(buf, base, cur - base); 1017 xmlBufferAdd(buf, BAD_CAST "&quot;", 6); 1018 cur++; 1019 base = cur; 1020 } else if (*cur == '%') { 1021 if (base != cur) 1022 xmlBufferAdd(buf, base, cur - base); 1023 xmlBufferAdd(buf, BAD_CAST "&#x25;", 6); 1024 cur++; 1025 base = cur; 1026 } else { 1027 cur++; 1028 } 1029 } 1030 if (base != cur) 1031 xmlBufferAdd(buf, base, cur - base); 1032 xmlBufferCCat(buf, "\""); 1033 } else { 1034 xmlBufferWriteQuotedString(buf, content); 1035 } 1036} 1037 1038/** 1039 * xmlDumpEntityDecl: 1040 * @buf: An XML buffer. 1041 * @ent: An entity table 1042 * 1043 * This will dump the content of the entity table as an XML DTD definition 1044 */ 1045void 1046xmlDumpEntityDecl(xmlBufferPtr buf, xmlEntityPtr ent) { 1047 if ((buf == NULL) || (ent == NULL)) return; 1048 switch (ent->etype) { 1049 case XML_INTERNAL_GENERAL_ENTITY: 1050 xmlBufferWriteChar(buf, "<!ENTITY "); 1051 xmlBufferWriteCHAR(buf, ent->name); 1052 xmlBufferWriteChar(buf, " "); 1053 if (ent->orig != NULL) 1054 xmlBufferWriteQuotedString(buf, ent->orig); 1055 else 1056 xmlDumpEntityContent(buf, ent->content); 1057 xmlBufferWriteChar(buf, ">\n"); 1058 break; 1059 case XML_EXTERNAL_GENERAL_PARSED_ENTITY: 1060 xmlBufferWriteChar(buf, "<!ENTITY "); 1061 xmlBufferWriteCHAR(buf, ent->name); 1062 if (ent->ExternalID != NULL) { 1063 xmlBufferWriteChar(buf, " PUBLIC "); 1064 xmlBufferWriteQuotedString(buf, ent->ExternalID); 1065 xmlBufferWriteChar(buf, " "); 1066 xmlBufferWriteQuotedString(buf, ent->SystemID); 1067 } else { 1068 xmlBufferWriteChar(buf, " SYSTEM "); 1069 xmlBufferWriteQuotedString(buf, ent->SystemID); 1070 } 1071 xmlBufferWriteChar(buf, ">\n"); 1072 break; 1073 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY: 1074 xmlBufferWriteChar(buf, "<!ENTITY "); 1075 xmlBufferWriteCHAR(buf, ent->name); 1076 if (ent->ExternalID != NULL) { 1077 xmlBufferWriteChar(buf, " PUBLIC "); 1078 xmlBufferWriteQuotedString(buf, ent->ExternalID); 1079 xmlBufferWriteChar(buf, " "); 1080 xmlBufferWriteQuotedString(buf, ent->SystemID); 1081 } else { 1082 xmlBufferWriteChar(buf, " SYSTEM "); 1083 xmlBufferWriteQuotedString(buf, ent->SystemID); 1084 } 1085 if (ent->content != NULL) { /* Should be true ! */ 1086 xmlBufferWriteChar(buf, " NDATA "); 1087 if (ent->orig != NULL) 1088 xmlBufferWriteCHAR(buf, ent->orig); 1089 else 1090 xmlBufferWriteCHAR(buf, ent->content); 1091 } 1092 xmlBufferWriteChar(buf, ">\n"); 1093 break; 1094 case XML_INTERNAL_PARAMETER_ENTITY: 1095 xmlBufferWriteChar(buf, "<!ENTITY % "); 1096 xmlBufferWriteCHAR(buf, ent->name); 1097 xmlBufferWriteChar(buf, " "); 1098 if (ent->orig == NULL) 1099 xmlDumpEntityContent(buf, ent->content); 1100 else 1101 xmlBufferWriteQuotedString(buf, ent->orig); 1102 xmlBufferWriteChar(buf, ">\n"); 1103 break; 1104 case XML_EXTERNAL_PARAMETER_ENTITY: 1105 xmlBufferWriteChar(buf, "<!ENTITY % "); 1106 xmlBufferWriteCHAR(buf, ent->name); 1107 if (ent->ExternalID != NULL) { 1108 xmlBufferWriteChar(buf, " PUBLIC "); 1109 xmlBufferWriteQuotedString(buf, ent->ExternalID); 1110 xmlBufferWriteChar(buf, " "); 1111 xmlBufferWriteQuotedString(buf, ent->SystemID); 1112 } else { 1113 xmlBufferWriteChar(buf, " SYSTEM "); 1114 xmlBufferWriteQuotedString(buf, ent->SystemID); 1115 } 1116 xmlBufferWriteChar(buf, ">\n"); 1117 break; 1118 default: 1119 xmlEntitiesErr(XML_DTD_UNKNOWN_ENTITY, 1120 "xmlDumpEntitiesDecl: internal: unknown type entity type"); 1121 } 1122} 1123 1124/** 1125 * xmlDumpEntityDeclScan: 1126 * @ent: An entity table 1127 * @buf: An XML buffer. 1128 * 1129 * When using the hash table scan function, arguments need to be reversed 1130 */ 1131static void 1132xmlDumpEntityDeclScan(void *ent, void *buf, 1133 const xmlChar *name ATTRIBUTE_UNUSED) { 1134 xmlDumpEntityDecl((xmlBufferPtr) buf, (xmlEntityPtr) ent); 1135} 1136 1137/** 1138 * xmlDumpEntitiesTable: 1139 * @buf: An XML buffer. 1140 * @table: An entity table 1141 * 1142 * This will dump the content of the entity table as an XML DTD definition 1143 */ 1144void 1145xmlDumpEntitiesTable(xmlBufferPtr buf, xmlEntitiesTablePtr table) { 1146 xmlHashScan(table, xmlDumpEntityDeclScan, buf); 1147} 1148#endif /* LIBXML_OUTPUT_ENABLED */