Re: [xml] A truncation bug and some testHTML.c enhancements

Date view Thread view Subject view Author view

From: Wayne Davison (wayned@blorf.net)
Date: Sun Aug 13 2000 - 13:04:17 EDT


On Sun, 13 Aug 2000, Daniel Veillard wrote:
> Quick question: before or after the 2.2.2 release (i.e. yesterday
> afternoon) ?

I had thought that I was using 2.2.2 because I did a brand-new, fresh
pull of the gnome-xml library yesterday afternoon (and the on-line
version of the cvs library showed that your patch was there).
However, there were some changes that I just pulled from CVS this
morning, so it looks like I didn't get the full 2.2.2 release somehow.

> Because I think I fixed this bug

Yes, it seems to be gone now. Fabulous!

> Ok maybe bigpush should be the default and adding --smallpush
> is the right option.

I've appended a patch that does this, and also fixes a minor oversight
in your push-SAX code where "doc" could get used uninitialized if the
initial file read failed, and it includes my recommend improvements to
the option-parsing code.

..wayne..

---8<------8<------8<------8<---cut here--->8------>8------>8------>8---
Index: testHTML.c
--- testHTML.c 2000/08/12 21:12:02 1.13
+++ testHTML.c 2000/08/13 16:47:55
@@ -49,6 +49,7 @@
 static int repeat = 0;
 static int noout = 0;
 static int push = 0;
+static int smallpush = 0;
 static char *encoding = NULL;
 
 xmlSAXHandler emptySAXHandlerStruct = {
@@ -575,7 +576,7 @@
  ************************************************************************/
 
 void parseSAXFile(char *filename) {
- htmlDocPtr doc;
+ htmlDocPtr doc = NULL;
     /*
      * Empty callbacks for checking
      */
@@ -584,12 +585,12 @@
 
         f = fopen(filename, "r");
         if (f != NULL) {
- int res, size = 3;
             char chars[4096];
+ int res, size = sizeof chars;
             htmlParserCtxtPtr ctxt;
 
- /* if (repeat) */
- size = 4096;
+ if (smallpush)
+ size = 3;
             res = fread(chars, 1, 4, f);
             if (res > 0) {
                 ctxt = htmlCreatePushParserCtxt(emptySAXHandler, NULL,
@@ -610,12 +611,12 @@
         if (!noout) {
             f = fopen(filename, "r");
             if (f != NULL) {
- int res, size = 3;
                 char chars[4096];
+ int res, size = sizeof chars;
                 htmlParserCtxtPtr ctxt;
 
- /* if (repeat) */
- size = 4096;
+ if (smallpush)
+ size = 3;
                 res = fread(chars, 1, 4, f);
                 if (res > 0) {
                     ctxt = htmlCreatePushParserCtxt(debugSAXHandler, NULL,
@@ -665,12 +666,12 @@
 
         f = fopen(filename, "r");
         if (f != NULL) {
- int res, size = 3;
             char chars[4096];
+ int res, size = sizeof chars;
             htmlParserCtxtPtr ctxt;
 
- /* if (repeat) */
- size = 4096;
+ if (smallpush)
+ size = 3;
             res = fread(chars, 1, 4, f);
             if (res > 0) {
                 ctxt = htmlCreatePushParserCtxt(NULL, NULL,
@@ -731,34 +732,32 @@
     int files = 0;
 
     for (i = 1; i < argc ; i++) {
+ if (argv[i][0] == '-') {
+ char *opt = argv[i] + 1;
+ if (*opt == '-')
+ opt++;
 #ifdef LIBXML_DEBUG_ENABLED
- if ((!strcmp(argv[i], "-debug")) || (!strcmp(argv[i], "--debug")))
- debug++;
- else
+ if (!strcmp(opt, "debug"))
+ debug++;
+ else
 #endif
- if ((!strcmp(argv[i], "-copy")) || (!strcmp(argv[i], "--copy")))
- copy++;
- else if ((!strcmp(argv[i], "-push")) || (!strcmp(argv[i], "--push")))
- push++;
- else if ((!strcmp(argv[i], "-sax")) || (!strcmp(argv[i], "--sax")))
- sax++;
- else if ((!strcmp(argv[i], "-noout")) || (!strcmp(argv[i], "--noout")))
- noout++;
- else if ((!strcmp(argv[i], "-repeat")) ||
- (!strcmp(argv[i], "--repeat")))
- repeat++;
- else if ((!strcmp(argv[i], "-encode")) ||
- (!strcmp(argv[i], "--encode"))) {
- i++;
- encoding = argv[i];
- }
+ if (!strcmp(opt, "copy"))
+ copy++;
+ else if (!strcmp(opt, "push"))
+ push++;
+ else if (!strcmp(opt, "smallpush"))
+ push++, smallpush++;
+ else if (!strcmp(opt, "sax"))
+ sax++;
+ else if (!strcmp(opt, "noout"))
+ noout++;
+ else if (!strcmp(opt, "repeat"))
+ repeat++;
+ else if (!strcmp(opt, "encode"))
+ encoding = argv[++i];
+ }
     }
     for (i = 1; i < argc ; i++) {
- if ((!strcmp(argv[i], "-encode")) ||
- (!strcmp(argv[i], "--encode"))) {
- i++;
- continue;
- }
         if (argv[i][0] != '-') {
             if (repeat) {
                 for (count = 0;count < 100 * repeat;count++) {
@@ -774,10 +773,16 @@
                     parseAndPrintFile(argv[i]);
             }
             files ++;
+ } else {
+ char *opt = argv[i] + 1;
+ if (*opt == '-')
+ opt++;
+ if (!strcmp(opt, "encode"))
+ i++;
         }
     }
     if (files == 0) {
- printf("Usage : %s [--debug] [--copy] [--copy] HTMLfiles ...\n",
+ printf("Usage : %s [--debug] [--copy] [--...] HTMLfiles ...\n",
                argv[0]);
         printf("\tParse the HTML files and output the result of the parsing\n");
 #ifdef LIBXML_DEBUG_ENABLED
@@ -788,6 +793,7 @@
         printf("\t--repeat : parse the file 100 times, for timing\n");
         printf("\t--noout : do not print the result\n");
         printf("\t--push : use the push mode parser\n");
+ printf("\t--smallpush : like --push, but use a 3-byte buffer\n");
         printf("\t--encode encoding : output in the given encoding\n");
     }
     xmlCleanupParser();
---8<------8<------8<------8<---cut here--->8------>8------>8------>8---

----
Message from the list xml@xmlsoft.org
Archived at : http://xmlsoft.org/messages/
to unsubscribe: echo "unsubscribe xml" | mail  majordomo@xmlsoft.org


Date view Thread view Subject view Author view

This archive was generated by hypermail 2b29 : Sun Aug 13 2000 - 10:43:11 EDT