sdk/lib/3rdparty/libxml2/HTMLtree.c at master

huwcampbell.com / reactos
fork atom
Reactos
fork atom
reactos / sdk / lib / 3rdparty / libxml2 / HTMLtree.c
at master 1199 lines 34 kB view raw
wrap content
Timo Kreuzer [LIBXML2] Update to release 2.12.8 (from Wine-10.0) 4mo ago
8b7ade14
   1/*
   2 * HTMLtree.c : implementation of access function for an HTML tree.
   3 *
   4 * See Copyright for the status of this software.
   5 *
   6 * daniel@veillard.com
   7 */
   8
   9
  10#define IN_LIBXML
  11#include "libxml.h"
  12#ifdef LIBXML_HTML_ENABLED
  13
  14#include <string.h> /* for memset() only ! */
  15#include <ctype.h>
  16#include <stdlib.h>
  17
  18#include <libxml/xmlmemory.h>
  19#include <libxml/HTMLparser.h>
  20#include <libxml/HTMLtree.h>
  21#include <libxml/entities.h>
  22#include <libxml/xmlerror.h>
  23#include <libxml/parserInternals.h>
  24#include <libxml/uri.h>
  25
  26#include "private/buf.h"
  27#include "private/error.h"
  28#include "private/io.h"
  29#include "private/save.h"
  30
  31/************************************************************************
  32 *									*
  33 *		Getting/Setting encoding meta tags			*
  34 *									*
  35 ************************************************************************/
  36
  37/**
  38 * htmlGetMetaEncoding:
  39 * @doc:  the document
  40 *
  41 * Encoding definition lookup in the Meta tags
  42 *
  43 * Returns the current encoding as flagged in the HTML source
  44 */
  45const xmlChar *
  46htmlGetMetaEncoding(htmlDocPtr doc) {
  47    htmlNodePtr cur;
  48    const xmlChar *content;
  49    const xmlChar *encoding;
  50
  51    if (doc == NULL)
  52	return(NULL);
  53    cur = doc->children;
  54
  55    /*
  56     * Search the html
  57     */
  58    while (cur != NULL) {
  59	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
  60	    if (xmlStrEqual(cur->name, BAD_CAST"html"))
  61		break;
  62	    if (xmlStrEqual(cur->name, BAD_CAST"head"))
  63		goto found_head;
  64	    if (xmlStrEqual(cur->name, BAD_CAST"meta"))
  65		goto found_meta;
  66	}
  67	cur = cur->next;
  68    }
  69    if (cur == NULL)
  70	return(NULL);
  71    cur = cur->children;
  72
  73    /*
  74     * Search the head
  75     */
  76    while (cur != NULL) {
  77	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
  78	    if (xmlStrEqual(cur->name, BAD_CAST"head"))
  79		break;
  80	    if (xmlStrEqual(cur->name, BAD_CAST"meta"))
  81		goto found_meta;
  82	}
  83	cur = cur->next;
  84    }
  85    if (cur == NULL)
  86	return(NULL);
  87found_head:
  88    cur = cur->children;
  89
  90    /*
  91     * Search the meta elements
  92     */
  93found_meta:
  94    while (cur != NULL) {
  95	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
  96	    if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
  97		xmlAttrPtr attr = cur->properties;
  98		int http;
  99		const xmlChar *value;
 100
 101		content = NULL;
 102		http = 0;
 103		while (attr != NULL) {
 104		    if ((attr->children != NULL) &&
 105		        (attr->children->type == XML_TEXT_NODE) &&
 106		        (attr->children->next == NULL)) {
 107			value = attr->children->content;
 108			if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
 109			 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
 110			    http = 1;
 111			else if ((value != NULL)
 112			 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
 113			    content = value;
 114			if ((http != 0) && (content != NULL))
 115			    goto found_content;
 116		    }
 117		    attr = attr->next;
 118		}
 119	    }
 120	}
 121	cur = cur->next;
 122    }
 123    return(NULL);
 124
 125found_content:
 126    encoding = xmlStrstr(content, BAD_CAST"charset=");
 127    if (encoding == NULL)
 128	encoding = xmlStrstr(content, BAD_CAST"Charset=");
 129    if (encoding == NULL)
 130	encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
 131    if (encoding != NULL) {
 132	encoding += 8;
 133    } else {
 134	encoding = xmlStrstr(content, BAD_CAST"charset =");
 135	if (encoding == NULL)
 136	    encoding = xmlStrstr(content, BAD_CAST"Charset =");
 137	if (encoding == NULL)
 138	    encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
 139	if (encoding != NULL)
 140	    encoding += 9;
 141    }
 142    if (encoding != NULL) {
 143	while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
 144    }
 145    return(encoding);
 146}
 147
 148/**
 149 * htmlSetMetaEncoding:
 150 * @doc:  the document
 151 * @encoding:  the encoding string
 152 *
 153 * Sets the current encoding in the Meta tags
 154 * NOTE: this will not change the document content encoding, just
 155 * the META flag associated.
 156 *
 157 * Returns 0 in case of success and -1 in case of error
 158 */
 159int
 160htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
 161    htmlNodePtr cur, meta = NULL, head = NULL;
 162    const xmlChar *content = NULL;
 163    char newcontent[100];
 164
 165    newcontent[0] = 0;
 166
 167    if (doc == NULL)
 168	return(-1);
 169
 170    /* html isn't a real encoding it's just libxml2 way to get entities */
 171    if (!xmlStrcasecmp(encoding, BAD_CAST "html"))
 172        return(-1);
 173
 174    if (encoding != NULL) {
 175	snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
 176                (char *)encoding);
 177	newcontent[sizeof(newcontent) - 1] = 0;
 178    }
 179
 180    cur = doc->children;
 181
 182    /*
 183     * Search the html
 184     */
 185    while (cur != NULL) {
 186	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
 187	    if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
 188		break;
 189	    if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
 190		goto found_head;
 191	    if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
 192		goto found_meta;
 193	}
 194	cur = cur->next;
 195    }
 196    if (cur == NULL)
 197	return(-1);
 198    cur = cur->children;
 199
 200    /*
 201     * Search the head
 202     */
 203    while (cur != NULL) {
 204	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
 205	    if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
 206		break;
 207	    if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
 208                head = cur->parent;
 209		goto found_meta;
 210            }
 211	}
 212	cur = cur->next;
 213    }
 214    if (cur == NULL)
 215	return(-1);
 216found_head:
 217    head = cur;
 218    if (cur->children == NULL)
 219        goto create;
 220    cur = cur->children;
 221
 222found_meta:
 223    /*
 224     * Search and update all the remaining the meta elements carrying
 225     * encoding information
 226     */
 227    while (cur != NULL) {
 228	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
 229	    if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
 230		xmlAttrPtr attr = cur->properties;
 231		int http;
 232		const xmlChar *value;
 233
 234		content = NULL;
 235		http = 0;
 236		while (attr != NULL) {
 237		    if ((attr->children != NULL) &&
 238		        (attr->children->type == XML_TEXT_NODE) &&
 239		        (attr->children->next == NULL)) {
 240			value = attr->children->content;
 241			if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
 242			 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
 243			    http = 1;
 244			else
 245                        {
 246                           if ((value != NULL) &&
 247                               (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
 248			       content = value;
 249                        }
 250		        if ((http != 0) && (content != NULL))
 251			    break;
 252		    }
 253		    attr = attr->next;
 254		}
 255		if ((http != 0) && (content != NULL)) {
 256		    meta = cur;
 257		    break;
 258		}
 259
 260	    }
 261	}
 262	cur = cur->next;
 263    }
 264create:
 265    if (meta == NULL) {
 266        if ((encoding != NULL) && (head != NULL)) {
 267            /*
 268             * Create a new Meta element with the right attributes
 269             */
 270
 271            meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
 272            if (head->children == NULL)
 273                xmlAddChild(head, meta);
 274            else
 275                xmlAddPrevSibling(head->children, meta);
 276            xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
 277            xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
 278        }
 279    } else {
 280        /* remove the meta tag if NULL is passed */
 281        if (encoding == NULL) {
 282            xmlUnlinkNode(meta);
 283            xmlFreeNode(meta);
 284        }
 285        /* change the document only if there is a real encoding change */
 286        else if (xmlStrcasestr(content, encoding) == NULL) {
 287            xmlSetProp(meta, BAD_CAST"content", BAD_CAST newcontent);
 288        }
 289    }
 290
 291
 292    return(0);
 293}
 294
 295/**
 296 * booleanHTMLAttrs:
 297 *
 298 * These are the HTML attributes which will be output
 299 * in minimized form, i.e. <option selected="selected"> will be
 300 * output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method"
 301 *
 302 */
 303static const char* const htmlBooleanAttrs[] = {
 304  "checked", "compact", "declare", "defer", "disabled", "ismap",
 305  "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
 306  "selected", NULL
 307};
 308
 309
 310/**
 311 * htmlIsBooleanAttr:
 312 * @name:  the name of the attribute to check
 313 *
 314 * Determine if a given attribute is a boolean attribute.
 315 *
 316 * returns: false if the attribute is not boolean, true otherwise.
 317 */
 318int
 319htmlIsBooleanAttr(const xmlChar *name)
 320{
 321    int i = 0;
 322
 323    while (htmlBooleanAttrs[i] != NULL) {
 324        if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0)
 325            return 1;
 326        i++;
 327    }
 328    return 0;
 329}
 330
 331#ifdef LIBXML_OUTPUT_ENABLED
 332/************************************************************************
 333 *									*
 334 *			Output error handlers				*
 335 *									*
 336 ************************************************************************/
 337/**
 338 * htmlSaveErrMemory:
 339 * @extra:  extra information
 340 *
 341 * Handle an out of memory condition
 342 */
 343static void
 344htmlSaveErrMemory(const char *extra)
 345{
 346    __xmlSimpleError(XML_FROM_OUTPUT, XML_ERR_NO_MEMORY, NULL, NULL, extra);
 347}
 348
 349/**
 350 * htmlSaveErr:
 351 * @code:  the error number
 352 * @node:  the location of the error.
 353 * @extra:  extra information
 354 *
 355 * Handle an out of memory condition
 356 */
 357static void
 358htmlSaveErr(int code, xmlNodePtr node, const char *extra)
 359{
 360    const char *msg = NULL;
 361
 362    switch(code) {
 363        case XML_SAVE_NOT_UTF8:
 364	    msg = "string is not in UTF-8\n";
 365	    break;
 366	case XML_SAVE_CHAR_INVALID:
 367	    msg = "invalid character value\n";
 368	    break;
 369	case XML_SAVE_UNKNOWN_ENCODING:
 370	    msg = "unknown encoding %s\n";
 371	    break;
 372	case XML_SAVE_NO_DOCTYPE:
 373	    msg = "HTML has no DOCTYPE\n";
 374	    break;
 375	default:
 376	    msg = "unexpected error number\n";
 377    }
 378    __xmlSimpleError(XML_FROM_OUTPUT, code, node, msg, extra);
 379}
 380
 381/************************************************************************
 382 *									*
 383 *		Dumping HTML tree content to a simple buffer		*
 384 *									*
 385 ************************************************************************/
 386
 387/**
 388 * htmlBufNodeDumpFormat:
 389 * @buf:  the xmlBufPtr output
 390 * @doc:  the document
 391 * @cur:  the current node
 392 * @format:  should formatting spaces been added
 393 *
 394 * Dump an HTML node, recursive behaviour,children are printed too.
 395 *
 396 * Returns the number of byte written or -1 in case of error
 397 */
 398static size_t
 399htmlBufNodeDumpFormat(xmlBufPtr buf, xmlDocPtr doc, xmlNodePtr cur,
 400	           int format) {
 401    size_t use;
 402    int ret;
 403    xmlOutputBufferPtr outbuf;
 404
 405    if (cur == NULL) {
 406	return (-1);
 407    }
 408    if (buf == NULL) {
 409	return (-1);
 410    }
 411    outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer));
 412    if (outbuf == NULL) {
 413        htmlSaveErrMemory("allocating HTML output buffer");
 414	return (-1);
 415    }
 416    memset(outbuf, 0, sizeof(xmlOutputBuffer));
 417    outbuf->buffer = buf;
 418    outbuf->encoder = NULL;
 419    outbuf->writecallback = NULL;
 420    outbuf->closecallback = NULL;
 421    outbuf->context = NULL;
 422    outbuf->written = 0;
 423
 424    use = xmlBufUse(buf);
 425    htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format);
 426    xmlFree(outbuf);
 427    ret = xmlBufUse(buf) - use;
 428    return (ret);
 429}
 430
 431/**
 432 * htmlNodeDump:
 433 * @buf:  the HTML buffer output
 434 * @doc:  the document
 435 * @cur:  the current node
 436 *
 437 * Dump an HTML node, recursive behaviour,children are printed too,
 438 * and formatting returns are added.
 439 *
 440 * Returns the number of byte written or -1 in case of error
 441 */
 442int
 443htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
 444    xmlBufPtr buffer;
 445    size_t ret;
 446
 447    if ((buf == NULL) || (cur == NULL))
 448        return(-1);
 449
 450    xmlInitParser();
 451    buffer = xmlBufFromBuffer(buf);
 452    if (buffer == NULL)
 453        return(-1);
 454
 455    ret = htmlBufNodeDumpFormat(buffer, doc, cur, 1);
 456
 457    xmlBufBackToBuffer(buffer);
 458
 459    if (ret > INT_MAX)
 460        return(-1);
 461    return((int) ret);
 462}
 463
 464/**
 465 * htmlNodeDumpFileFormat:
 466 * @out:  the FILE pointer
 467 * @doc:  the document
 468 * @cur:  the current node
 469 * @encoding: the document encoding
 470 * @format:  should formatting spaces been added
 471 *
 472 * Dump an HTML node, recursive behaviour,children are printed too.
 473 *
 474 * TODO: if encoding == NULL try to save in the doc encoding
 475 *
 476 * returns: the number of byte written or -1 in case of failure.
 477 */
 478int
 479htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
 480	               xmlNodePtr cur, const char *encoding, int format) {
 481    xmlOutputBufferPtr buf;
 482    xmlCharEncodingHandlerPtr handler = NULL;
 483    int ret;
 484
 485    xmlInitParser();
 486
 487    if (encoding != NULL) {
 488	xmlCharEncoding enc;
 489
 490	enc = xmlParseCharEncoding(encoding);
 491	if (enc != XML_CHAR_ENCODING_UTF8) {
 492	    handler = xmlFindCharEncodingHandler(encoding);
 493	    if (handler == NULL)
 494		htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
 495	}
 496    } else {
 497        /*
 498         * Fallback to HTML or ASCII when the encoding is unspecified
 499         */
 500        if (handler == NULL)
 501            handler = xmlFindCharEncodingHandler("HTML");
 502        if (handler == NULL)
 503            handler = xmlFindCharEncodingHandler("ascii");
 504    }
 505
 506    /*
 507     * save the content to a temp buffer.
 508     */
 509    buf = xmlOutputBufferCreateFile(out, handler);
 510    if (buf == NULL) return(0);
 511
 512    htmlNodeDumpFormatOutput(buf, doc, cur, NULL, format);
 513
 514    ret = xmlOutputBufferClose(buf);
 515    return(ret);
 516}
 517
 518/**
 519 * htmlNodeDumpFile:
 520 * @out:  the FILE pointer
 521 * @doc:  the document
 522 * @cur:  the current node
 523 *
 524 * Dump an HTML node, recursive behaviour,children are printed too,
 525 * and formatting returns are added.
 526 */
 527void
 528htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
 529    htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
 530}
 531
 532/**
 533 * htmlDocDumpMemoryFormat:
 534 * @cur:  the document
 535 * @mem:  OUT: the memory pointer
 536 * @size:  OUT: the memory length
 537 * @format:  should formatting spaces been added
 538 *
 539 * Dump an HTML document in memory and return the xmlChar * and it's size.
 540 * It's up to the caller to free the memory.
 541 */
 542void
 543htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) {
 544    xmlOutputBufferPtr buf;
 545    xmlCharEncodingHandlerPtr handler = NULL;
 546    const char *encoding;
 547
 548    xmlInitParser();
 549
 550    if ((mem == NULL) || (size == NULL))
 551        return;
 552    if (cur == NULL) {
 553	*mem = NULL;
 554	*size = 0;
 555	return;
 556    }
 557
 558    encoding = (const char *) htmlGetMetaEncoding(cur);
 559
 560    if (encoding != NULL) {
 561	xmlCharEncoding enc;
 562
 563	enc = xmlParseCharEncoding(encoding);
 564	if (enc != XML_CHAR_ENCODING_UTF8) {
 565	    handler = xmlFindCharEncodingHandler(encoding);
 566	    if (handler == NULL)
 567                htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
 568
 569	}
 570    } else {
 571        /*
 572         * Fallback to HTML or ASCII when the encoding is unspecified
 573         */
 574        if (handler == NULL)
 575            handler = xmlFindCharEncodingHandler("HTML");
 576        if (handler == NULL)
 577            handler = xmlFindCharEncodingHandler("ascii");
 578    }
 579
 580    buf = xmlAllocOutputBufferInternal(handler);
 581    if (buf == NULL) {
 582	*mem = NULL;
 583	*size = 0;
 584	return;
 585    }
 586
 587    htmlDocContentDumpFormatOutput(buf, cur, NULL, format);
 588
 589    xmlOutputBufferFlush(buf);
 590    if (buf->conv != NULL) {
 591	*size = xmlBufUse(buf->conv);
 592	*mem = xmlStrndup(xmlBufContent(buf->conv), *size);
 593    } else {
 594	*size = xmlBufUse(buf->buffer);
 595	*mem = xmlStrndup(xmlBufContent(buf->buffer), *size);
 596    }
 597    (void)xmlOutputBufferClose(buf);
 598}
 599
 600/**
 601 * htmlDocDumpMemory:
 602 * @cur:  the document
 603 * @mem:  OUT: the memory pointer
 604 * @size:  OUT: the memory length
 605 *
 606 * Dump an HTML document in memory and return the xmlChar * and it's size.
 607 * It's up to the caller to free the memory.
 608 */
 609void
 610htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
 611	htmlDocDumpMemoryFormat(cur, mem, size, 1);
 612}
 613
 614
 615/************************************************************************
 616 *									*
 617 *		Dumping HTML tree content to an I/O output buffer	*
 618 *									*
 619 ************************************************************************/
 620
 621/**
 622 * htmlDtdDumpOutput:
 623 * @buf:  the HTML buffer output
 624 * @doc:  the document
 625 * @encoding:  the encoding string
 626 *
 627 * TODO: check whether encoding is needed
 628 *
 629 * Dump the HTML document DTD, if any.
 630 */
 631static void
 632htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
 633	          const char *encoding ATTRIBUTE_UNUSED) {
 634    xmlDtdPtr cur = doc->intSubset;
 635
 636    if (cur == NULL) {
 637	htmlSaveErr(XML_SAVE_NO_DOCTYPE, (xmlNodePtr) doc, NULL);
 638	return;
 639    }
 640    xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
 641    xmlOutputBufferWriteString(buf, (const char *)cur->name);
 642    if (cur->ExternalID != NULL) {
 643	xmlOutputBufferWriteString(buf, " PUBLIC ");
 644	xmlBufWriteQuotedString(buf->buffer, cur->ExternalID);
 645	if (cur->SystemID != NULL) {
 646	    xmlOutputBufferWriteString(buf, " ");
 647	    xmlBufWriteQuotedString(buf->buffer, cur->SystemID);
 648	}
 649    } else if (cur->SystemID != NULL &&
 650	       xmlStrcmp(cur->SystemID, BAD_CAST "about:legacy-compat")) {
 651	xmlOutputBufferWriteString(buf, " SYSTEM ");
 652	xmlBufWriteQuotedString(buf->buffer, cur->SystemID);
 653    }
 654    xmlOutputBufferWriteString(buf, ">\n");
 655}
 656
 657/**
 658 * htmlAttrDumpOutput:
 659 * @buf:  the HTML buffer output
 660 * @doc:  the document
 661 * @cur:  the attribute pointer
 662 *
 663 * Dump an HTML attribute
 664 */
 665static void
 666htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
 667    xmlChar *value;
 668
 669    /*
 670     * The html output method should not escape a & character
 671     * occurring in an attribute value immediately followed by
 672     * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
 673     * This is implemented in xmlEncodeEntitiesReentrant
 674     */
 675
 676    if (cur == NULL) {
 677	return;
 678    }
 679    xmlOutputBufferWriteString(buf, " ");
 680    if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
 681        xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
 682	xmlOutputBufferWriteString(buf, ":");
 683    }
 684    xmlOutputBufferWriteString(buf, (const char *)cur->name);
 685    if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) {
 686	value = xmlNodeListGetString(doc, cur->children, 0);
 687	if (value) {
 688	    xmlOutputBufferWriteString(buf, "=");
 689	    if ((cur->ns == NULL) && (cur->parent != NULL) &&
 690		(cur->parent->ns == NULL) &&
 691		((!xmlStrcasecmp(cur->name, BAD_CAST "href")) ||
 692	         (!xmlStrcasecmp(cur->name, BAD_CAST "action")) ||
 693		 (!xmlStrcasecmp(cur->name, BAD_CAST "src")) ||
 694		 ((!xmlStrcasecmp(cur->name, BAD_CAST "name")) &&
 695		  (!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) {
 696		xmlChar *escaped;
 697		xmlChar *tmp = value;
 698
 699		while (IS_BLANK_CH(*tmp)) tmp++;
 700
 701		/*
 702                 * Angle brackets are technically illegal in URIs, but they're
 703                 * used in server side includes, for example. Curly brackets
 704                 * are illegal as well and often used in templates.
 705                 * Don't escape non-whitespace, printable ASCII chars for
 706                 * improved interoperability. Only escape space, control
 707                 * and non-ASCII chars.
 708		 */
 709		escaped = xmlURIEscapeStr(tmp,
 710                        BAD_CAST "\"#$%&+,/:;<=>?@[\\]^`{|}");
 711		if (escaped != NULL) {
 712		    xmlBufWriteQuotedString(buf->buffer, escaped);
 713		    xmlFree(escaped);
 714		} else {
 715		    xmlBufWriteQuotedString(buf->buffer, value);
 716		}
 717	    } else {
 718		xmlBufWriteQuotedString(buf->buffer, value);
 719	    }
 720	    xmlFree(value);
 721	} else  {
 722	    xmlOutputBufferWriteString(buf, "=\"\"");
 723	}
 724    }
 725}
 726
 727/**
 728 * htmlNodeDumpFormatOutput:
 729 * @buf:  the HTML buffer output
 730 * @doc:  the document
 731 * @cur:  the current node
 732 * @encoding:  the encoding string (unused)
 733 * @format:  should formatting spaces been added
 734 *
 735 * Dump an HTML node, recursive behaviour,children are printed too.
 736 */
 737void
 738htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
 739	                 xmlNodePtr cur, const char *encoding ATTRIBUTE_UNUSED,
 740                         int format) {
 741    xmlNodePtr root, parent;
 742    xmlAttrPtr attr;
 743    const htmlElemDesc * info;
 744
 745    xmlInitParser();
 746
 747    if ((cur == NULL) || (buf == NULL)) {
 748	return;
 749    }
 750
 751    root = cur;
 752    parent = cur->parent;
 753    while (1) {
 754        switch (cur->type) {
 755        case XML_HTML_DOCUMENT_NODE:
 756        case XML_DOCUMENT_NODE:
 757            if (((xmlDocPtr) cur)->intSubset != NULL) {
 758                htmlDtdDumpOutput(buf, (xmlDocPtr) cur, NULL);
 759            }
 760            if (cur->children != NULL) {
 761                /* Always validate cur->parent when descending. */
 762                if (cur->parent == parent) {
 763                    parent = cur;
 764                    cur = cur->children;
 765                    continue;
 766                }
 767            } else {
 768                xmlOutputBufferWriteString(buf, "\n");
 769            }
 770            break;
 771
 772        case XML_ELEMENT_NODE:
 773            /*
 774             * Some users like lxml are known to pass nodes with a corrupted
 775             * tree structure. Fall back to a recursive call to handle this
 776             * case.
 777             */
 778            if ((cur->parent != parent) && (cur->children != NULL)) {
 779                htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
 780                break;
 781            }
 782
 783            /*
 784             * Get specific HTML info for that node.
 785             */
 786            if (cur->ns == NULL)
 787                info = htmlTagLookup(cur->name);
 788            else
 789                info = NULL;
 790
 791            xmlOutputBufferWriteString(buf, "<");
 792            if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
 793                xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
 794                xmlOutputBufferWriteString(buf, ":");
 795            }
 796            xmlOutputBufferWriteString(buf, (const char *)cur->name);
 797            if (cur->nsDef)
 798                xmlNsListDumpOutput(buf, cur->nsDef);
 799            attr = cur->properties;
 800            while (attr != NULL) {
 801                htmlAttrDumpOutput(buf, doc, attr);
 802                attr = attr->next;
 803            }
 804
 805            if ((info != NULL) && (info->empty)) {
 806                xmlOutputBufferWriteString(buf, ">");
 807            } else if (cur->children == NULL) {
 808                if ((info != NULL) && (info->saveEndTag != 0) &&
 809                    (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
 810                    (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
 811                    xmlOutputBufferWriteString(buf, ">");
 812                } else {
 813                    xmlOutputBufferWriteString(buf, "></");
 814                    if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
 815                        xmlOutputBufferWriteString(buf,
 816                                (const char *)cur->ns->prefix);
 817                        xmlOutputBufferWriteString(buf, ":");
 818                    }
 819                    xmlOutputBufferWriteString(buf, (const char *)cur->name);
 820                    xmlOutputBufferWriteString(buf, ">");
 821                }
 822            } else {
 823                xmlOutputBufferWriteString(buf, ">");
 824                if ((format) && (info != NULL) && (!info->isinline) &&
 825                    (cur->children->type != HTML_TEXT_NODE) &&
 826                    (cur->children->type != HTML_ENTITY_REF_NODE) &&
 827                    (cur->children != cur->last) &&
 828                    (cur->name != NULL) &&
 829                    (cur->name[0] != 'p')) /* p, pre, param */
 830                    xmlOutputBufferWriteString(buf, "\n");
 831                parent = cur;
 832                cur = cur->children;
 833                continue;
 834            }
 835
 836            if ((format) && (cur->next != NULL) &&
 837                (info != NULL) && (!info->isinline)) {
 838                if ((cur->next->type != HTML_TEXT_NODE) &&
 839                    (cur->next->type != HTML_ENTITY_REF_NODE) &&
 840                    (parent != NULL) &&
 841                    (parent->name != NULL) &&
 842                    (parent->name[0] != 'p')) /* p, pre, param */
 843                    xmlOutputBufferWriteString(buf, "\n");
 844            }
 845
 846            break;
 847
 848        case XML_ATTRIBUTE_NODE:
 849            htmlAttrDumpOutput(buf, doc, (xmlAttrPtr) cur);
 850            break;
 851
 852        case HTML_TEXT_NODE:
 853            if (cur->content == NULL)
 854                break;
 855            if (((cur->name == (const xmlChar *)xmlStringText) ||
 856                 (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
 857                ((parent == NULL) ||
 858                 ((xmlStrcasecmp(parent->name, BAD_CAST "script")) &&
 859                  (xmlStrcasecmp(parent->name, BAD_CAST "style"))))) {
 860                xmlChar *buffer;
 861
 862                buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
 863                if (buffer != NULL) {
 864                    xmlOutputBufferWriteString(buf, (const char *)buffer);
 865                    xmlFree(buffer);
 866                }
 867            } else {
 868                xmlOutputBufferWriteString(buf, (const char *)cur->content);
 869            }
 870            break;
 871
 872        case HTML_COMMENT_NODE:
 873            if (cur->content != NULL) {
 874                xmlOutputBufferWriteString(buf, "<!--");
 875                xmlOutputBufferWriteString(buf, (const char *)cur->content);
 876                xmlOutputBufferWriteString(buf, "-->");
 877            }
 878            break;
 879
 880        case HTML_PI_NODE:
 881            if (cur->name != NULL) {
 882                xmlOutputBufferWriteString(buf, "<?");
 883                xmlOutputBufferWriteString(buf, (const char *)cur->name);
 884                if (cur->content != NULL) {
 885                    xmlOutputBufferWriteString(buf, " ");
 886                    xmlOutputBufferWriteString(buf,
 887                            (const char *)cur->content);
 888                }
 889                xmlOutputBufferWriteString(buf, ">");
 890            }
 891            break;
 892
 893        case HTML_ENTITY_REF_NODE:
 894            xmlOutputBufferWriteString(buf, "&");
 895            xmlOutputBufferWriteString(buf, (const char *)cur->name);
 896            xmlOutputBufferWriteString(buf, ";");
 897            break;
 898
 899        case HTML_PRESERVE_NODE:
 900            if (cur->content != NULL) {
 901                xmlOutputBufferWriteString(buf, (const char *)cur->content);
 902            }
 903            break;
 904
 905        default:
 906            break;
 907        }
 908
 909        while (1) {
 910            if (cur == root)
 911                return;
 912            if (cur->next != NULL) {
 913                cur = cur->next;
 914                break;
 915            }
 916
 917            cur = parent;
 918            /* cur->parent was validated when descending. */
 919            parent = cur->parent;
 920
 921            if ((cur->type == XML_HTML_DOCUMENT_NODE) ||
 922                (cur->type == XML_DOCUMENT_NODE)) {
 923                xmlOutputBufferWriteString(buf, "\n");
 924            } else {
 925                if ((format) && (cur->ns == NULL))
 926                    info = htmlTagLookup(cur->name);
 927                else
 928                    info = NULL;
 929
 930                if ((format) && (info != NULL) && (!info->isinline) &&
 931                    (cur->last->type != HTML_TEXT_NODE) &&
 932                    (cur->last->type != HTML_ENTITY_REF_NODE) &&
 933                    (cur->children != cur->last) &&
 934                    (cur->name != NULL) &&
 935                    (cur->name[0] != 'p')) /* p, pre, param */
 936                    xmlOutputBufferWriteString(buf, "\n");
 937
 938                xmlOutputBufferWriteString(buf, "</");
 939                if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
 940                    xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
 941                    xmlOutputBufferWriteString(buf, ":");
 942                }
 943                xmlOutputBufferWriteString(buf, (const char *)cur->name);
 944                xmlOutputBufferWriteString(buf, ">");
 945
 946                if ((format) && (info != NULL) && (!info->isinline) &&
 947                    (cur->next != NULL)) {
 948                    if ((cur->next->type != HTML_TEXT_NODE) &&
 949                        (cur->next->type != HTML_ENTITY_REF_NODE) &&
 950                        (parent != NULL) &&
 951                        (parent->name != NULL) &&
 952                        (parent->name[0] != 'p')) /* p, pre, param */
 953                        xmlOutputBufferWriteString(buf, "\n");
 954                }
 955            }
 956        }
 957    }
 958}
 959
 960/**
 961 * htmlNodeDumpOutput:
 962 * @buf:  the HTML buffer output
 963 * @doc:  the document
 964 * @cur:  the current node
 965 * @encoding:  the encoding string (unused)
 966 *
 967 * Dump an HTML node, recursive behaviour,children are printed too,
 968 * and formatting returns/spaces are added.
 969 */
 970void
 971htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
 972	           xmlNodePtr cur, const char *encoding ATTRIBUTE_UNUSED) {
 973    htmlNodeDumpFormatOutput(buf, doc, cur, NULL, 1);
 974}
 975
 976/**
 977 * htmlDocContentDumpFormatOutput:
 978 * @buf:  the HTML buffer output
 979 * @cur:  the document
 980 * @encoding:  the encoding string (unused)
 981 * @format:  should formatting spaces been added
 982 *
 983 * Dump an HTML document.
 984 */
 985void
 986htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
 987	                       const char *encoding ATTRIBUTE_UNUSED,
 988                               int format) {
 989    int type = 0;
 990    if (cur) {
 991        type = cur->type;
 992        cur->type = XML_HTML_DOCUMENT_NODE;
 993    }
 994    htmlNodeDumpFormatOutput(buf, cur, (xmlNodePtr) cur, NULL, format);
 995    if (cur)
 996        cur->type = (xmlElementType) type;
 997}
 998
 999/**
1000 * htmlDocContentDumpOutput:
1001 * @buf:  the HTML buffer output
1002 * @cur:  the document
1003 * @encoding:  the encoding string (unused)
1004 *
1005 * Dump an HTML document. Formatting return/spaces are added.
1006 */
1007void
1008htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
1009	                 const char *encoding ATTRIBUTE_UNUSED) {
1010    htmlNodeDumpFormatOutput(buf, cur, (xmlNodePtr) cur, NULL, 1);
1011}
1012
1013/************************************************************************
1014 *									*
1015 *		Saving functions front-ends				*
1016 *									*
1017 ************************************************************************/
1018
1019/**
1020 * htmlDocDump:
1021 * @f:  the FILE*
1022 * @cur:  the document
1023 *
1024 * Dump an HTML document to an open FILE.
1025 *
1026 * returns: the number of byte written or -1 in case of failure.
1027 */
1028int
1029htmlDocDump(FILE *f, xmlDocPtr cur) {
1030    xmlOutputBufferPtr buf;
1031    xmlCharEncodingHandlerPtr handler = NULL;
1032    const char *encoding;
1033    int ret;
1034
1035    xmlInitParser();
1036
1037    if ((cur == NULL) || (f == NULL)) {
1038	return(-1);
1039    }
1040
1041    encoding = (const char *) htmlGetMetaEncoding(cur);
1042
1043    if (encoding != NULL) {
1044	xmlCharEncoding enc;
1045
1046	enc = xmlParseCharEncoding(encoding);
1047	if (enc != XML_CHAR_ENCODING_UTF8) {
1048	    handler = xmlFindCharEncodingHandler(encoding);
1049	    if (handler == NULL)
1050		htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
1051	}
1052    } else {
1053        /*
1054         * Fallback to HTML or ASCII when the encoding is unspecified
1055         */
1056        if (handler == NULL)
1057            handler = xmlFindCharEncodingHandler("HTML");
1058        if (handler == NULL)
1059            handler = xmlFindCharEncodingHandler("ascii");
1060    }
1061
1062    buf = xmlOutputBufferCreateFile(f, handler);
1063    if (buf == NULL) return(-1);
1064    htmlDocContentDumpOutput(buf, cur, NULL);
1065
1066    ret = xmlOutputBufferClose(buf);
1067    return(ret);
1068}
1069
1070/**
1071 * htmlSaveFile:
1072 * @filename:  the filename (or URL)
1073 * @cur:  the document
1074 *
1075 * Dump an HTML document to a file. If @filename is "-" the stdout file is
1076 * used.
1077 * returns: the number of byte written or -1 in case of failure.
1078 */
1079int
1080htmlSaveFile(const char *filename, xmlDocPtr cur) {
1081    xmlOutputBufferPtr buf;
1082    xmlCharEncodingHandlerPtr handler = NULL;
1083    const char *encoding;
1084    int ret;
1085
1086    if ((cur == NULL) || (filename == NULL))
1087        return(-1);
1088
1089    xmlInitParser();
1090
1091    encoding = (const char *) htmlGetMetaEncoding(cur);
1092
1093    if (encoding != NULL) {
1094	xmlCharEncoding enc;
1095
1096	enc = xmlParseCharEncoding(encoding);
1097	if (enc != XML_CHAR_ENCODING_UTF8) {
1098	    handler = xmlFindCharEncodingHandler(encoding);
1099	    if (handler == NULL)
1100		htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
1101	}
1102    } else {
1103        /*
1104         * Fallback to HTML or ASCII when the encoding is unspecified
1105         */
1106        if (handler == NULL)
1107            handler = xmlFindCharEncodingHandler("HTML");
1108        if (handler == NULL)
1109            handler = xmlFindCharEncodingHandler("ascii");
1110    }
1111
1112    /*
1113     * save the content to a temp buffer.
1114     */
1115    buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
1116    if (buf == NULL) return(0);
1117
1118    htmlDocContentDumpOutput(buf, cur, NULL);
1119
1120    ret = xmlOutputBufferClose(buf);
1121    return(ret);
1122}
1123
1124/**
1125 * htmlSaveFileFormat:
1126 * @filename:  the filename
1127 * @cur:  the document
1128 * @format:  should formatting spaces been added
1129 * @encoding: the document encoding
1130 *
1131 * Dump an HTML document to a file using a given encoding.
1132 *
1133 * returns: the number of byte written or -1 in case of failure.
1134 */
1135int
1136htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
1137	           const char *encoding, int format) {
1138    xmlOutputBufferPtr buf;
1139    xmlCharEncodingHandlerPtr handler = NULL;
1140    int ret;
1141
1142    if ((cur == NULL) || (filename == NULL))
1143        return(-1);
1144
1145    xmlInitParser();
1146
1147    if (encoding != NULL) {
1148	xmlCharEncoding enc;
1149
1150	enc = xmlParseCharEncoding(encoding);
1151	if (enc != XML_CHAR_ENCODING_UTF8) {
1152	    handler = xmlFindCharEncodingHandler(encoding);
1153	    if (handler == NULL)
1154		htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
1155	}
1156        htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
1157    } else {
1158	htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
1159
1160        /*
1161         * Fallback to HTML or ASCII when the encoding is unspecified
1162         */
1163        if (handler == NULL)
1164            handler = xmlFindCharEncodingHandler("HTML");
1165        if (handler == NULL)
1166            handler = xmlFindCharEncodingHandler("ascii");
1167    }
1168
1169    /*
1170     * save the content to a temp buffer.
1171     */
1172    buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1173    if (buf == NULL) return(0);
1174
1175    htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
1176
1177    ret = xmlOutputBufferClose(buf);
1178    return(ret);
1179}
1180
1181/**
1182 * htmlSaveFileEnc:
1183 * @filename:  the filename
1184 * @cur:  the document
1185 * @encoding: the document encoding
1186 *
1187 * Dump an HTML document to a file using a given encoding
1188 * and formatting returns/spaces are added.
1189 *
1190 * returns: the number of byte written or -1 in case of failure.
1191 */
1192int
1193htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
1194    return(htmlSaveFileFormat(filename, cur, encoding, 1));
1195}
1196
1197#endif /* LIBXML_OUTPUT_ENABLED */
1198
1199#endif /* LIBXML_HTML_ENABLED */