Error Buddy
Do you have an error message from your application? Then find the answer with Error Buddy. You can search over 40000 source code files and troubleshooting documents using our beta lucene/nutch search interface or if you prefer, search as normal using google. With LXR technology you can drill right down into the line of source code where it came from with full cross-referencing.
If after searching you didn't get your ideal answer, or you are still unclear what the error means, you can choose to post that question to the community forums following the link included in the search results.
[1.6]001 /* 002 * HTMLtree.c : implementation of access function for an HTML tree. 003 * 004 * See Copyright for the status of this software. 005 * 006 * daniel@veillard.com 007 */ 008 009 010 #define IN_LIBXML 011 #include "libxml.h" 012 #ifdef LIBXML_HTML_ENABLED 013 014 #include <string.h> /* for memset() only ! */ 015 016 #ifdef HAVE_CTYPE_H 017 #include <ctype.h> 018 #endif 019 #ifdef HAVE_STDLIB_H 020 #include <stdlib.h> 021 #endif 022 023 #include <libxml/xmlmemory.h> 024 #include <libxml/HTMLparser.h> 025 #include <libxml/HTMLtree.h> 026 #include <libxml/entities.h> 027 #include <libxml/valid.h> 028 #include <libxml/xmlerror.h> 029 #include <libxml/parserInternals.h> 030 #include <libxml/globals.h> 031 #include <libxml/uri.h> 032 033 /************************************************************************ 034 * * 035 * Getting/Setting encoding meta tags * 036 * * 037 ************************************************************************/ 038 039 /** 040 * htmlGetMetaEncoding: 041 * @doc: the document 042 * 043 * Encoding definition lookup in the Meta tags 044 * 045 * Returns the current encoding as flagged in the HTML source 046 */ 047 const xmlChar * 048 htmlGetMetaEncoding(htmlDocPtr doc) { 049 htmlNodePtr cur; 050 const xmlChar *content; 051 const xmlChar *encoding; 052 053 if (doc == NULL) 054 return(NULL); 055 cur = doc->children; 056 057 /* 058 * Search the html 059 */ 060 while (cur != NULL) { 061 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) { 062 if (xmlStrEqual(cur->name, BAD_CAST"html")) 063 break; 064 if (xmlStrEqual(cur->name, BAD_CAST"head")) 065 goto found_head; 066 if (xmlStrEqual(cur->name, BAD_CAST"meta")) 067 goto found_meta; 068 } 069 cur = cur->next; 070 } 071 if (cur == NULL) 072 return(NULL); 073 cur = cur->children; 074 075 /* 076 * Search the head 077 */ 078 while (cur != NULL) { 079 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) { 080 if (xmlStrEqual(cur->name, BAD_CAST"head")) 081 break; 082 if (xmlStrEqual(cur->name, BAD_CAST"meta")) 083 goto found_meta; 084 } 085 cur = cur->next; 086 } 087 if (cur == NULL) 088 return(NULL); 089 found_head: 090 cur = cur->children; 091 092 /* 093 * Search the meta elements 094 */ 095 found_meta: 096 while (cur != NULL) { 097 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) { 098 if (xmlStrEqual(cur->name, BAD_CAST"meta")) { 099 xmlAttrPtr attr = cur->properties; 100 int http; 101 const xmlChar *value; 102 103 content = NULL; 104 http = 0; 105 while (attr != NULL) { 106 if ((attr->children != NULL) && 107 (attr->children->type == XML_TEXT_NODE) && 108 (attr->children->next == NULL)) { 109 value = attr->children->content; 110 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv")) 111 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type"))) 112 http = 1; 113 else if ((value != NULL) 114 && (!xmlStrcasecmp(attr->name, BAD_CAST"content"))) 115 content = value; 116 if ((http != 0) && (content != NULL)) 117 goto found_content; 118 } 119 attr = attr->next; 120 } 121 } 122 } 123 cur = cur->next; 124 } 125 return(NULL); 126 127 found_content: 128 encoding = xmlStrstr(content, BAD_CAST"charset="); 129 if (encoding == NULL) 130 encoding = xmlStrstr(content, BAD_CAST"Charset="); 131 if (encoding == NULL) 132 encoding = xmlStrstr(content, BAD_CAST"CHARSET="); 133 if (encoding != NULL) { 134 encoding += 8; 135 } else { 136 encoding = xmlStrstr(content, BAD_CAST"charset ="); 137 if (encoding == NULL) 138 encoding = xmlStrstr(content, BAD_CAST"Charset ="); 139 if (encoding == NULL) 140 encoding = xmlStrstr(content, BAD_CAST"CHARSET ="); 141 if (encoding != NULL) 142 encoding += 9; 143 } 144 if (encoding != NULL) { 145 while ((*encoding == ' ') || (*encoding == '\t')) encoding++; 146 } 147 return(encoding); 148 } 149 150 /** 151 * htmlSetMetaEncoding: 152 * @doc: the document 153 * @encoding: the encoding string 154 * 155 * Sets the current encoding in the Meta tags 156 * NOTE: this will not change the document content encoding, just 157 * the META flag associated. 158 * 159 * Returns 0 in case of success and -1 in case of error 160 */ 161 int 162 htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) { 163 htmlNodePtr cur, meta; 164 const xmlChar *content; 165 char newcontent[100]; 166 167 168 if (doc == NULL) 169 return(-1); 170 171 if (encoding != NULL) { 172 snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s", 173 (char *)encoding); 174 newcontent[sizeof(newcontent) - 1] = 0; 175 } 176 177 cur = doc->children; 178 179 /* 180 * Search the html 181 */ 182 while (cur != NULL) { 183 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) { 184 if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0) 185 break; 186 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0) 187 goto found_head; 188 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) 189 goto found_meta; 190 } 191 cur = cur->next; 192 } 193 if (cur == NULL) 194 return(-1); 195 cur = cur->children; 196 197 /* 198 * Search the head 199 */ 200 while (cur != NULL) { 201 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) { 202 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0) 203 break; 204 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) 205 goto found_meta; 206 } 207 cur = cur->next; 208 } 209 if (cur == NULL) 210 return(-1); 211 found_head: 212 if (cur->children == NULL) { 213 if (encoding == NULL) 214 return(0); 215 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL); 216 xmlAddChild(cur, meta); 217 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type"); 218 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent); 219 return(0); 220 } 221 cur = cur->children; 222 223 found_meta: 224 if (encoding != NULL) { 225 /* 226 * Create a new Meta element with the right attributes 227 */ 228 229 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL); 230 xmlAddPrevSibling(cur, meta); 231 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type"); 232 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent); 233 } 234 235 /* 236 * Search and destroy all the remaining the meta elements carrying 237 * encoding informations 238 */ 239 while (cur != NULL) { 240 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) { 241 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) { 242 xmlAttrPtr attr = cur->properties; 243 int http; 244 const xmlChar *value; 245 246 content = NULL; 247 http = 0; 248 while (attr != NULL) { 249 if ((attr->children != NULL) && 250 (attr->children->type == XML_TEXT_NODE) && 251 (attr->children->next == NULL)) { 252 value = attr->children->content; 253 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv")) 254 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type"))) 255 http = 1; 256 else 257 { 258 if ((value != NULL) && 259 (!xmlStrcasecmp(attr->name, BAD_CAST"content"))) 260 content = value; 261 } 262 if ((http != 0) && (content != NULL)) 263 break; 264 } 265 attr = attr->next; 266 } 267 if ((http != 0) && (content != NULL)) { 268 meta = cur; 269 cur = cur->next; 270 xmlUnlinkNode(meta); 271 xmlFreeNode(meta); 272 continue; 273 } 274 275 } 276 } 277 cur = cur->next; 278 } 279 return(0); 280 } 281 282 /** 283 * booleanHTMLAttrs: 284 * 285 * These are the HTML attributes which will be output 286 * in minimized form, i.e. <option selected="selected"> will be 287 * output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method" 288 * 289 */ 290 static const char* htmlBooleanAttrs[] = { 291 "checked", "compact", "declare", "defer", "disabled", "ismap", 292 "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly", 293 "selected", NULL 294 }; 295 296 297 /** 298 * htmlIsBooleanAttr: 299 * @name: the name of the attribute to check 300 * 301 * Determine if a given attribute is a boolean attribute. 302 * 303 * returns: false if the attribute is not boolean, true otherwise. 304 */ 305 int 306 htmlIsBooleanAttr(const xmlChar *name) 307 { 308 int i = 0; 309 310 while (htmlBooleanAttrs[i] != NULL) { 311 if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0) 312 return 1; 313 i++; 314 } 315 return 0; 316 } 317 318 #ifdef LIBXML_OUTPUT_ENABLED 319 /************************************************************************ 320 * * 321 * Output error handlers * 322 * * 323 ************************************************************************/ 324 /** 325 * htmlSaveErrMemory: 326 * @extra: extra informations 327 * 328 * Handle an out of memory condition 329 */ 330 static void 331 htmlSaveErrMemory(const char *extra) 332 { 333 __xmlSimpleError(XML_FROM_OUTPUT, XML_ERR_NO_MEMORY, NULL, NULL, extra); 334 } 335 336 /** 337 * htmlSaveErr: 338 * @code: the error number 339 * @node: the location of the error. 340 * @extra: extra informations 341 * 342 * Handle an out of memory condition 343 */ 344 static void 345 htmlSaveErr(int code, xmlNodePtr node, const char *extra) 346 { 347 const char *msg = NULL; 348 349 switch(code) { 350 case XML_SAVE_NOT_UTF8: 351 msg = "string is not in UTF-8"; 352 break; 353 case XML_SAVE_CHAR_INVALID: 354 msg = "invalid character value"; 355 break; 356 case XML_SAVE_UNKNOWN_ENCODING: 357 msg = "unknown encoding %s"; 358 break; 359 case XML_SAVE_NO_DOCTYPE: 360 msg = "HTML has no DOCTYPE"; 361 break; 362 default: 363 msg = "unexpected error number"; 364 } 365 __xmlSimpleError(XML_FROM_OUTPUT, code, node, msg, extra); 366 } 367 368 /************************************************************************ 369 * * 370 * Dumping HTML tree content to a simple buffer * 371 * * 372 ************************************************************************/ 373 374 static int 375 htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, 376 int format); 377 378 /** 379 * htmlNodeDumpFormat: 380 * @buf: the HTML buffer output 381 * @doc: the document 382 * @cur: the current node 383 * @format: should formatting spaces been added 384 * 385 * Dump an HTML node, recursive behaviour,children are printed too. 386 * 387 * Returns the number of byte written or -1 in case of error 388 */ 389 static int 390 htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, 391 int format) { 392 unsigned int use; 393 int ret; 394 xmlOutputBufferPtr outbuf; 395 396 if (cur == NULL) { 397 return (-1); 398 } 399 if (buf == NULL) { 400 return (-1); 401 } 402 outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer)); 403 if (outbuf == NULL) { 404 htmlSaveErrMemory("allocating HTML output buffer"); 405 return (-1); 406 } 407 memset(outbuf, 0, (size_t) sizeof(xmlOutputBuffer)); 408 outbuf->buffer = buf; 409 outbuf->encoder = NULL; 410 outbuf->writecallback = NULL; 411 outbuf->closecallback = NULL; 412 outbuf->context = NULL; 413 outbuf->written = 0; 414 415 use = buf->use; 416 htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format); 417 xmlFree(outbuf); 418 ret = buf->use - use; 419 return (ret); 420 } 421 422 /** 423 * htmlNodeDump: 424 * @buf: the HTML buffer output 425 * @doc: the document 426 * @cur: the current node 427 * 428 * Dump an HTML node, recursive behaviour,children are printed too, 429 * and formatting returns are added. 430 * 431 * Returns the number of byte written or -1 in case of error 432 */ 433 int 434 htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) { 435 xmlInitParser(); 436 437 return(htmlNodeDumpFormat(buf, doc, cur, 1)); 438 } 439 440 /** 441 * htmlNodeDumpFileFormat: 442 * @out: the FILE pointer 443 * @doc: the document 444 * @cur: the current node 445 * @encoding: the document encoding 446 * @format: should formatting spaces been added 447 * 448 * Dump an HTML node, recursive behaviour,children are printed too. 449 * 450 * TODO: if encoding == NULL try to save in the doc encoding 451 * 452 * returns: the number of byte written or -1 in case of failure. 453 */ 454 int 455 htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc, 456 xmlNodePtr cur, const char *encoding, int format) { 457 xmlOutputBufferPtr buf; 458 xmlCharEncodingHandlerPtr handler = NULL; 459 int ret; 460 461 xmlInitParser(); 462 463 if (encoding != NULL) { 464 xmlCharEncoding enc; 465 466 enc = xmlParseCharEncoding(encoding); 467 if (enc != XML_CHAR_ENCODING_UTF8) { 468 handler = xmlFindCharEncodingHandler(encoding); 469 if (handler == NULL) 470 return(-1); 471 } 472 } 473 474 /* 475 * Fallback to HTML or ASCII when the encoding is unspecified 476 */ 477 if (handler == NULL) 478 handler = xmlFindCharEncodingHandler("HTML"); 479 if (handler == NULL) 480 handler = xmlFindCharEncodingHandler("ascii"); 481 482 /* 483 * save the content to a temp buffer. 484 */ 485 buf = xmlOutputBufferCreateFile(out, handler); 486 if (buf == NULL) return(0); 487 488 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format); 489 490 ret = xmlOutputBufferClose(buf); 491 return(ret); 492 } 493 494 /** 495 * htmlNodeDumpFile: 496 * @out: the FILE pointer 497 * @doc: the document 498 * @cur: the current node 499 * 500 * Dump an HTML node, recursive behaviour,children are printed too, 501 * and formatting returns are added. 502 */ 503 void 504 htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) { 505 htmlNodeDumpFileFormat(out, doc, cur, NULL, 1); 506 } 507 508 /** 509 * htmlDocDumpMemory: 510 * @cur: the document 511 * @mem: OUT: the memory pointer 512 * @size: OUT: the memory length 513 * 514 * Dump an HTML document in memory and return the xmlChar * and it's size. 515 * It's up to the caller to free the memory. 516 */ 517 void 518 htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) { 519 xmlOutputBufferPtr buf; 520 xmlCharEncodingHandlerPtr handler = NULL; 521 const char *encoding; 522 523 xmlInitParser(); 524 525 if ((mem == NULL) || (size == NULL)) 526 return; 527 if (cur == NULL) { 528 *mem = NULL; 529 *size = 0; 530 return; 531 } 532 533 encoding = (const char *) htmlGetMetaEncoding(cur); 534 535 if (encoding != NULL) { 536 xmlCharEncoding enc; 537 538 enc = xmlParseCharEncoding(encoding); 539 if (enc != cur->charset) { 540 if (cur->charset != XML_CHAR_ENCODING_UTF8) { 541 /* 542 * Not supported yet 543 */ 544 *mem = NULL; 545 *size = 0; 546 return; 547 } 548 549 handler = xmlFindCharEncodingHandler(encoding); 550 if (handler == NULL) { 551 *mem = NULL; 552 *size = 0; 553 return; 554 } 555 } 556 } 557 558 /* 559 * Fallback to HTML or ASCII when the encoding is unspecified 560 */ 561 if (handler == NULL) 562 handler = xmlFindCharEncodingHandler("HTML"); 563 if (handler == NULL) 564 handler = xmlFindCharEncodingHandler("ascii"); 565 566 buf = xmlAllocOutputBuffer(handler); 567 if (buf == NULL) { 568 *mem = NULL; 569 *size = 0; 570 return; 571 } 572 573 htmlDocContentDumpOutput(buf, cur, NULL); 574 xmlOutputBufferFlush(buf); 575 if (buf->conv != NULL) { 576 *size = buf->conv->use; 577 *mem = xmlStrndup(buf->conv->content, *size); 578 } else { 579 *size = buf->buffer->use; 580 *mem = xmlStrndup(buf->buffer->content, *size); 581 } 582 (void)xmlOutputBufferClose(buf); 583 } 584 585 586 /************************************************************************ 587 * * 588 * Dumping HTML tree content to an I/O output buffer * 589 * * 590 ************************************************************************/ 591 592 void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur); 593 594 /** 595 * htmlDtdDumpOutput: 596 * @buf: the HTML buffer output 597 * @doc: the document 598 * @encoding: the encoding string 599 * 600 * TODO: check whether encoding is needed 601 * 602 * Dump the HTML document DTD, if any. 603 */ 604 static void 605 htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, 606 const char *encoding ATTRIBUTE_UNUSED) { 607 xmlDtdPtr cur = doc->intSubset; 608 609 if (cur == NULL) { 610 htmlSaveErr(XML_SAVE_NO_DOCTYPE, (xmlNodePtr) doc, NULL); 611 return; 612 } 613 xmlOutputBufferWriteString(buf, "<!DOCTYPE "); 614 xmlOutputBufferWriteString(buf, (const char *)cur->name); 615 if (cur->ExternalID != NULL) { 616 xmlOutputBufferWriteString(buf, " PUBLIC "); 617 xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID); 618 if (cur->SystemID != NULL) { 619 xmlOutputBufferWriteString(buf, " "); 620 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID); 621 } 622 } else if (cur->SystemID != NULL) { 623 xmlOutputBufferWriteString(buf, " SYSTEM "); 624 xmlBufferWriteQuotedString(buf->buffer,