Error Buddy

Do you have an error message from your application? Then find the answer with Error Buddy. You can search over 40000 source code files and troubleshooting documents using our beta lucene/nutch search interface or if you prefer, search as normal using google. With LXR technology you can drill right down into the line of source code where it came from with full cross-referencing.

If after searching you didn't get your ideal answer, or you are still unclear what the error means, you can choose to post that question to the community forums following the link included in the search results.

corestack/ libxml2-2.6.19/ HTMLtree.c [1.6]
001 /*
002  * HTMLtree.c : implementation of access function for an HTML tree.
003  *
004  * See Copyright for the status of this software.
005  *
006  * daniel@veillard.com
007  */
008 
009 
010 #define IN_LIBXML
011 #include "libxml.h"
012 #ifdef LIBXML_HTML_ENABLED
013 
014 #include <string.h> /* for memset() only ! */
015 
016 #ifdef HAVE_CTYPE_H
017 #include <ctype.h>
018 #endif
019 #ifdef HAVE_STDLIB_H
020 #include <stdlib.h>
021 #endif
022 
023 #include <libxml/xmlmemory.h>
024 #include <libxml/HTMLparser.h>
025 #include <libxml/HTMLtree.h>
026 #include <libxml/entities.h>
027 #include <libxml/valid.h>
028 #include <libxml/xmlerror.h>
029 #include <libxml/parserInternals.h>
030 #include <libxml/globals.h>
031 #include <libxml/uri.h>
032 
033 /************************************************************************
034  *                                                                      *
035  *              Getting/Setting encoding meta tags                      *
036  *                                                                      *
037  ************************************************************************/
038 
039 /**
040  * htmlGetMetaEncoding:
041  * @doc:  the document
042  * 
043  * Encoding definition lookup in the Meta tags
044  *
045  * Returns the current encoding as flagged in the HTML source
046  */
047 const xmlChar *
048 htmlGetMetaEncoding(htmlDocPtr doc) {
049     htmlNodePtr cur;
050     const xmlChar *content;
051     const xmlChar *encoding;
052 
053     if (doc == NULL)
054         return(NULL);
055     cur = doc->children;
056 
057     /*
058      * Search the html
059      */
060     while (cur != NULL) {
061         if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
062             if (xmlStrEqual(cur->name, BAD_CAST"html"))
063                 break;
064             if (xmlStrEqual(cur->name, BAD_CAST"head"))
065                 goto found_head;
066             if (xmlStrEqual(cur->name, BAD_CAST"meta"))
067                 goto found_meta;
068         }
069         cur = cur->next;
070     }
071     if (cur == NULL)
072         return(NULL);
073     cur = cur->children;
074 
075     /*
076      * Search the head
077      */
078     while (cur != NULL) {
079         if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
080             if (xmlStrEqual(cur->name, BAD_CAST"head"))
081                 break;
082             if (xmlStrEqual(cur->name, BAD_CAST"meta"))
083                 goto found_meta;
084         }
085         cur = cur->next;
086     }
087     if (cur == NULL)
088         return(NULL);
089 found_head:
090     cur = cur->children;
091 
092     /*
093      * Search the meta elements
094      */
095 found_meta:
096     while (cur != NULL) {
097         if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
098             if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
099                 xmlAttrPtr attr = cur->properties;
100                 int http;
101                 const xmlChar *value;
102 
103                 content = NULL;
104                 http = 0;
105                 while (attr != NULL) {
106                     if ((attr->children != NULL) &&
107                         (attr->children->type == XML_TEXT_NODE) &&
108                         (attr->children->next == NULL)) {
109                         value = attr->children->content;
110                         if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
111                          && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
112                             http = 1;
113                         else if ((value != NULL)
114                          && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
115                             content = value;
116                         if ((http != 0) && (content != NULL))
117                             goto found_content;
118                     }
119                     attr = attr->next;
120                 }
121             }
122         }
123         cur = cur->next;
124     }
125     return(NULL);
126 
127 found_content:
128     encoding = xmlStrstr(content, BAD_CAST"charset=");
129     if (encoding == NULL) 
130         encoding = xmlStrstr(content, BAD_CAST"Charset=");
131     if (encoding == NULL) 
132         encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
133     if (encoding != NULL) {
134         encoding += 8;
135     } else {
136         encoding = xmlStrstr(content, BAD_CAST"charset =");
137         if (encoding == NULL) 
138             encoding = xmlStrstr(content, BAD_CAST"Charset =");
139         if (encoding == NULL) 
140             encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
141         if (encoding != NULL)
142             encoding += 9;
143     }
144     if (encoding != NULL) {
145         while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
146     }
147     return(encoding);
148 }
149 
150 /**
151  * htmlSetMetaEncoding:
152  * @doc:  the document
153  * @encoding:  the encoding string
154  * 
155  * Sets the current encoding in the Meta tags
156  * NOTE: this will not change the document content encoding, just
157  * the META flag associated.
158  *
159  * Returns 0 in case of success and -1 in case of error
160  */
161 int
162 htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
163     htmlNodePtr cur, meta;
164     const xmlChar *content;
165     char newcontent[100];
166 
167 
168     if (doc == NULL)
169         return(-1);
170 
171     if (encoding != NULL) {
172         snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
173                 (char *)encoding);
174         newcontent[sizeof(newcontent) - 1] = 0;
175     }
176 
177     cur = doc->children;
178 
179     /*
180      * Search the html
181      */
182     while (cur != NULL) {
183         if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
184             if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
185                 break;
186             if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
187                 goto found_head;
188             if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
189                 goto found_meta;
190         }
191         cur = cur->next;
192     }
193     if (cur == NULL)
194         return(-1);
195     cur = cur->children;
196 
197     /*
198      * Search the head
199      */
200     while (cur != NULL) {
201         if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
202             if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
203                 break;
204             if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
205                 goto found_meta;
206         }
207         cur = cur->next;
208     }
209     if (cur == NULL)
210         return(-1);
211 found_head:
212     if (cur->children == NULL) {
213         if (encoding == NULL)
214             return(0);
215         meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
216         xmlAddChild(cur, meta);
217         xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
218         xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
219         return(0);
220     }
221     cur = cur->children;
222 
223 found_meta:
224     if (encoding != NULL) {
225         /*
226          * Create a new Meta element with the right attributes
227          */
228 
229         meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
230         xmlAddPrevSibling(cur, meta);
231         xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
232         xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
233     }
234 
235     /*
236      * Search and destroy all the remaining the meta elements carrying
237      * encoding informations
238      */
239     while (cur != NULL) {
240         if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
241             if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
242                 xmlAttrPtr attr = cur->properties;
243                 int http;
244                 const xmlChar *value;
245 
246                 content = NULL;
247                 http = 0;
248                 while (attr != NULL) {
249                     if ((attr->children != NULL) &&
250                         (attr->children->type == XML_TEXT_NODE) &&
251                         (attr->children->next == NULL)) {
252                         value = attr->children->content;
253                         if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
254                          && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
255                             http = 1;
256                         else 
257                         {
258                            if ((value != NULL) && 
259                                 (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
260                               content = value;
261                         }
262                         if ((http != 0) && (content != NULL))
263                             break;
264                     }
265                     attr = attr->next;
266                 }
267                 if ((http != 0) && (content != NULL)) {
268                     meta = cur;
269                     cur = cur->next;
270                     xmlUnlinkNode(meta);
271                     xmlFreeNode(meta);
272                     continue;
273                 }
274 
275             }
276         }
277         cur = cur->next;
278     }
279     return(0);
280 }
281 
282 /**
283  * booleanHTMLAttrs:
284  *
285  * These are the HTML attributes which will be output
286  * in minimized form, i.e. <option selected="selected"> will be
287  * output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method"
288  *
289  */
290 static const char* htmlBooleanAttrs[] = {
291   "checked", "compact", "declare", "defer", "disabled", "ismap",
292   "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
293   "selected", NULL
294 };
295 
296 
297 /**
298  * htmlIsBooleanAttr:
299  * @name:  the name of the attribute to check
300  *
301  * Determine if a given attribute is a boolean attribute.
302  * 
303  * returns: false if the attribute is not boolean, true otherwise.
304  */
305 int
306 htmlIsBooleanAttr(const xmlChar *name)
307 {
308     int i = 0;
309 
310     while (htmlBooleanAttrs[i] != NULL) {
311         if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0)
312             return 1;
313         i++;
314     }
315     return 0;
316 }
317 
318 #ifdef LIBXML_OUTPUT_ENABLED
319 /************************************************************************
320  *                                                                      *
321  *                      Output error handlers                           *
322  *                                                                      *
323  ************************************************************************/
324 /**
325  * htmlSaveErrMemory:
326  * @extra:  extra informations
327  *
328  * Handle an out of memory condition
329  */
330 static void
331 htmlSaveErrMemory(const char *extra)
332 {
333     __xmlSimpleError(XML_FROM_OUTPUT, XML_ERR_NO_MEMORY, NULL, NULL, extra);
334 }
335 
336 /**
337  * htmlSaveErr:
338  * @code:  the error number
339  * @node:  the location of the error.
340  * @extra:  extra informations
341  *
342  * Handle an out of memory condition
343  */
344 static void
345 htmlSaveErr(int code, xmlNodePtr node, const char *extra)
346 {
347     const char *msg = NULL;
348 
349     switch(code) {
350         case XML_SAVE_NOT_UTF8:
351             msg = "string is not in UTF-8";
352             break;
353         case XML_SAVE_CHAR_INVALID:
354             msg = "invalid character value";
355             break;
356         case XML_SAVE_UNKNOWN_ENCODING:
357             msg = "unknown encoding %s";
358             break;
359         case XML_SAVE_NO_DOCTYPE:
360             msg = "HTML has no DOCTYPE";
361             break;
362         default:
363             msg = "unexpected error number";
364     }
365     __xmlSimpleError(XML_FROM_OUTPUT, code, node, msg, extra);
366 }
367 
368 /************************************************************************
369  *                                                                      *
370  *              Dumping HTML tree content to a simple buffer            *
371  *                                                                      *
372  ************************************************************************/
373 
374 static int
375 htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
376                    int format);
377 
378 /**
379  * htmlNodeDumpFormat:
380  * @buf:  the HTML buffer output
381  * @doc:  the document
382  * @cur:  the current node
383  * @format:  should formatting spaces been added
384  *
385  * Dump an HTML node, recursive behaviour,children are printed too.
386  *
387  * Returns the number of byte written or -1 in case of error
388  */
389 static int
390 htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
391                    int format) {
392     unsigned int use;
393     int ret;
394     xmlOutputBufferPtr outbuf;
395 
396     if (cur == NULL) {
397         return (-1);
398     }
399     if (buf == NULL) {
400         return (-1);
401     }
402     outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer));
403     if (outbuf == NULL) {
404         htmlSaveErrMemory("allocating HTML output buffer");
405         return (-1);
406     }
407     memset(outbuf, 0, (size_t) sizeof(xmlOutputBuffer));
408     outbuf->buffer = buf;
409     outbuf->encoder = NULL;
410     outbuf->writecallback = NULL;
411     outbuf->closecallback = NULL;
412     outbuf->context = NULL;
413     outbuf->written = 0;
414 
415     use = buf->use;
416     htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format);
417     xmlFree(outbuf);
418     ret = buf->use - use;
419     return (ret);
420 }
421 
422 /**
423  * htmlNodeDump:
424  * @buf:  the HTML buffer output
425  * @doc:  the document
426  * @cur:  the current node
427  *
428  * Dump an HTML node, recursive behaviour,children are printed too,
429  * and formatting returns are added.
430  *
431  * Returns the number of byte written or -1 in case of error
432  */
433 int
434 htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
435     xmlInitParser();
436 
437     return(htmlNodeDumpFormat(buf, doc, cur, 1));
438 }
439 
440 /**
441  * htmlNodeDumpFileFormat:
442  * @out:  the FILE pointer
443  * @doc:  the document
444  * @cur:  the current node
445  * @encoding: the document encoding
446  * @format:  should formatting spaces been added
447  *
448  * Dump an HTML node, recursive behaviour,children are printed too.
449  *
450  * TODO: if encoding == NULL try to save in the doc encoding
451  *
452  * returns: the number of byte written or -1 in case of failure.
453  */
454 int
455 htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
456                        xmlNodePtr cur, const char *encoding, int format) {
457     xmlOutputBufferPtr buf;
458     xmlCharEncodingHandlerPtr handler = NULL;
459     int ret;
460 
461     xmlInitParser();
462 
463     if (encoding != NULL) {
464         xmlCharEncoding enc;
465 
466         enc = xmlParseCharEncoding(encoding);
467         if (enc != XML_CHAR_ENCODING_UTF8) {
468             handler = xmlFindCharEncodingHandler(encoding);
469             if (handler == NULL)
470                 return(-1);
471         }
472     }
473 
474     /*
475      * Fallback to HTML or ASCII when the encoding is unspecified
476      */
477     if (handler == NULL)
478         handler = xmlFindCharEncodingHandler("HTML");
479     if (handler == NULL)
480         handler = xmlFindCharEncodingHandler("ascii");
481 
482     /* 
483      * save the content to a temp buffer.
484      */
485     buf = xmlOutputBufferCreateFile(out, handler);
486     if (buf == NULL) return(0);
487 
488     htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
489 
490     ret = xmlOutputBufferClose(buf);
491     return(ret);
492 }
493 
494 /**
495  * htmlNodeDumpFile:
496  * @out:  the FILE pointer
497  * @doc:  the document
498  * @cur:  the current node
499  *
500  * Dump an HTML node, recursive behaviour,children are printed too,
501  * and formatting returns are added.
502  */
503 void
504 htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
505     htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
506 }
507 
508 /**
509  * htmlDocDumpMemory:
510  * @cur:  the document
511  * @mem:  OUT: the memory pointer
512  * @size:  OUT: the memory length
513  *
514  * Dump an HTML document in memory and return the xmlChar * and it's size.
515  * It's up to the caller to free the memory.
516  */
517 void
518 htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
519     xmlOutputBufferPtr buf;
520     xmlCharEncodingHandlerPtr handler = NULL;
521     const char *encoding;
522 
523     xmlInitParser();
524 
525     if ((mem == NULL) || (size == NULL))
526         return;
527     if (cur == NULL) {
528         *mem = NULL;
529         *size = 0;
530         return;
531     }
532 
533     encoding = (const char *) htmlGetMetaEncoding(cur);
534 
535     if (encoding != NULL) {
536         xmlCharEncoding enc;
537 
538         enc = xmlParseCharEncoding(encoding);
539         if (enc != cur->charset) {
540             if (cur->charset != XML_CHAR_ENCODING_UTF8) {
541                 /*
542                  * Not supported yet
543                  */
544                 *mem = NULL;
545                 *size = 0;
546                 return;
547             }
548 
549             handler = xmlFindCharEncodingHandler(encoding);
550             if (handler == NULL) {
551                 *mem = NULL;
552                 *size = 0;
553                 return;
554             }
555         }
556     }
557 
558     /*
559      * Fallback to HTML or ASCII when the encoding is unspecified
560      */
561     if (handler == NULL)
562         handler = xmlFindCharEncodingHandler("HTML");
563     if (handler == NULL)
564         handler = xmlFindCharEncodingHandler("ascii");
565 
566     buf = xmlAllocOutputBuffer(handler);
567     if (buf == NULL) {
568         *mem = NULL;
569         *size = 0;
570         return;
571     }
572 
573     htmlDocContentDumpOutput(buf, cur, NULL);
574     xmlOutputBufferFlush(buf);
575     if (buf->conv != NULL) {
576         *size = buf->conv->use;
577         *mem = xmlStrndup(buf->conv->content, *size);
578     } else {
579         *size = buf->buffer->use;
580         *mem = xmlStrndup(buf->buffer->content, *size);
581     }
582     (void)xmlOutputBufferClose(buf);
583 }
584 
585 
586 /************************************************************************
587  *                                                                      *
588  *              Dumping HTML tree content to an I/O output buffer       *
589  *                                                                      *
590  ************************************************************************/
591 
592 void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur);
593 
594 /**
595  * htmlDtdDumpOutput:
596  * @buf:  the HTML buffer output
597  * @doc:  the document
598  * @encoding:  the encoding string
599  * 
600  * TODO: check whether encoding is needed
601  *
602  * Dump the HTML document DTD, if any.
603  */
604 static void
605 htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
606                   const char *encoding ATTRIBUTE_UNUSED) {
607     xmlDtdPtr cur = doc->intSubset;
608 
609     if (cur == NULL) {
610         htmlSaveErr(XML_SAVE_NO_DOCTYPE, (xmlNodePtr) doc, NULL);
611         return;
612     }
613     xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
614     xmlOutputBufferWriteString(buf, (const char *)cur->name);
615     if (cur->ExternalID != NULL) {
616         xmlOutputBufferWriteString(buf, " PUBLIC ");
617         xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);
618         if (cur->SystemID != NULL) {
619             xmlOutputBufferWriteString(buf, " ");
620             xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
621         } 
622     }  else if (cur->SystemID != NULL) {
623         xmlOutputBufferWriteString(buf, " SYSTEM ");
624         xmlBufferWriteQuotedString(buf->buffer,