From: Tenihin Andrey (algorithm@peterlink.ru)
Date: Mon Dec 18 2000 - 07:21:04 EST
Here is the patch for handling `encoding' in
htmlParseDoc and htmlSAXParseDoc.
Encoding passed to htmlParseDoc has higher
prioritet when html document has "Content-Type".
HTML browser also ignore encoding in html if
there is encoding in "Content-Type" returned by
HTTP server (and passed to htmlParseDoc).
patch against libxml-2.2.10
Andrey.
--- HTMLparser.c.orig Mon Dec 18 13:44:20 2000
+++ HTMLparser.c Mon Dec 18 14:20:43 2000
@@ -2787,6 +2787,10 @@
if ((ctxt == NULL) || (attvalue == NULL))
return;
+ /* do not change encoding */
+ if (ctxt->input->encoding != NULL)
+ return;
+
encoding = xmlStrcasestr(attvalue, BAD_CAST"charset=");
if (encoding != NULL) {
encoding += 8;
@@ -4822,6 +4826,7 @@
htmlParserInputPtr inputStream;
xmlParserInputBufferPtr buf;
/* htmlCharEncoding enc; */
+ xmlChar *content, *content_line = "charset=";
buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
if (buf == NULL) return(NULL);
@@ -4852,6 +4857,22 @@
inputStream->free = NULL;
inputPush(ctxt, inputStream);
+
+ /* set encoding */
+ if (encoding)
+ {
+ content = xmlMalloc (strlen (content_line) + strlen (encoding) + 1);
+
+ if (content)
+ {
+ strcpy (content, content_line);
+ strcat (content, encoding);
+ htmlCheckEncoding (ctxt, content);
+ xmlFree (content);
+ }
+ }
+
+
return(ctxt);
}
---- Message from the list xml@rpmfind.net Archived at : http://xmlsoft.org/messages/ to unsubscribe: echo "unsubscribe xml" | mail majordomo@rpmfind.net
This archive was generated by hypermail 2b29 : Mon Dec 18 2000 - 09:44:04 EST