[xml] Another xinclude vs. DTD patch

Date view Thread view Subject view Author view

From: Paul D. Smith (pausmith@nortelnetworks.com)
Date: Tue Nov 28 2000 - 23:20:53 EST


This fixes my third problem with DTD validation in the presence of
xinclude operations. Only one left...

In playing with this I realized that the two functions
xmlValidateElementTypeElement() and xmlValidateElementTypeExpr() shared
three instances of exactly the same code between them, and that this
code was causing my problem by not properly ignoring the
XML_XINCLUDE_START and XML_XINCLUDE_END nodes.

So, I refactored this out into its own internal function
xmlValidateFindNextElement(), and changed the other two to invoke it. I
modified this section of code to use a switch statement instead of a
series of if statements, which simplified it a good bit I think.

Also, there was one other place which seemed like it should be testing
for these ignorable nodes, although my sample didn't cause a problem
there. I added them.

I think this is maybe not the best way forward in general, esp. if you
envision more of these "ignorable node types" in the future.

Here's an idea: you create a bit which, if set in the node type value,
means "ignorable". Then for the ignorable flags you or (|) that bit
into the value of the type.

Now in the code when you want to skip ignorable nodes, you can walk
through them testing to see if that bit is set; if it is, the node is
ignorable. If not, it isn't.

The advantage here isn't so much speed (although it will be faster to
test this bit than testing equality with 5 or more values), but rather
ease of adding new ignorable nodes... rather than go through the entire
codebase determining which loops skip ignorable nodes and adding tests
for the new ones, you just set the bit on the type in tree.h and you're
done.

That might look something like this (in tree.h):

  #define XML_IGNORABLE_BIT 0x80000000

  typedef enum {
      XML_ELEMENT_NODE= 1,
      XML_ATTRIBUTE_NODE= 2,
      XML_TEXT_NODE= 3,
      XML_CDATA_SECTION_NODE= 4,
      XML_ENTITY_REF_NODE= 5,
      XML_ENTITY_NODE= 6,
      XML_PI_NODE= (7 | XML_IGNORABLE_BIT),
      XML_COMMENT_NODE= (8 | XML_IGNORABLE_BIT),
      XML_DOCUMENT_NODE= 9,
      XML_DOCUMENT_TYPE_NODE= 10,
      XML_DOCUMENT_FRAG_NODE= 11,
      XML_NOTATION_NODE= 12,
      XML_HTML_DOCUMENT_NODE= 13,
      XML_DTD_NODE= 14,
      XML_ELEMENT_DECL= 15,
      XML_ATTRIBUTE_DECL= 16,
      XML_ENTITY_DECL= 17,
      XML_NAMESPACE_DECL= 18,
      XML_XINCLUDE_START= (19 | XML_IGNORABLE_BIT),
      XML_XINCLUDE_END= (20 | XML_IGNORABLE_BIT)
  #ifdef LIBXML_SGML_ENABLED
     ,XML_SGML_DOCUMENT_NODE= 21
  #endif
  } xmlElementType;

  /* extern int xmlIsIgnorable(xmlElementType type); */
  #define xmlIsIgnorable(_t) ((_t) & XML_IGNORABLE_BIT)

Now use "if (xmlIsIgnorable(node->type))" as the test.

Alternatively, if you didn't want to change the values of the
xmlElementType enum like that, you could define the xmlIsIgnorable()
macro in tree.h to check to see if "type" was ignorable by testing all
the ignorable values, like this:

  /* NOTE! The argument has macro side-effects! */
  #define xmlIsIgnorable(_t) ((_t)==XML_PI_NODE \
                              || (_t)==XML_COMMENT_NODE \
                              || (_t)==XML_XINCLUDE_START \
                              || (_t)==XML_XINCLUDE_END)

This still pulls the "ignorability" into a single point of change, but
isn't as efficient. Well, anyway, here's my patch.

--- valid.c-dist Tue Nov 7 08:19:11 2000
+++ valid.c Tue Nov 28 22:59:08 2000
@@ -2900,6 +2900,54 @@
     return(ret);
 }
 
+/* Find the next XML_ELEMENT_NODE, subject to the content constraints.
+ * Return -1 if we found something unexpected, or 1 otherwise.
+ */
+
+static int
+xmlValidateFindNextElement(xmlValidCtxtPtr ctxt, xmlNodePtr *child,
+ xmlElementContentPtr cont)
+{
+ while (*child && (*child)->type != XML_ELEMENT_NODE) {
+ switch ((*child)->type) {
+ /*
+ * If there is an entity declared and it's not empty
+ * Push the current node on the stack and process with the
+ * entity content.
+ */
+ case XML_ENTITY_REF_NODE:
+ if (((*child)->children != NULL) &&
+ ((*child)->children->children != NULL)) {
+ nodeVPush(ctxt, *child);
+ *child = (*child)->children->children;
+ continue;
+ }
+ break;
+
+ /* These things are ignored (skipped) during validation. */
+ case XML_PI_NODE:
+ case XML_COMMENT_NODE:
+ case XML_XINCLUDE_START:
+ case XML_XINCLUDE_END:
+ break;
+
+ case XML_TEXT_NODE:
+ if (xmlIsBlankNode(*child)
+ && (cont->type == XML_ELEMENT_CONTENT_ELEMENT
+ || cont->type == XML_ELEMENT_CONTENT_SEQ
+ || cont->type == XML_ELEMENT_CONTENT_OR))
+ break;
+ return -1;
+
+ default:
+ return -1;
+ }
+ *child = (*child)->next;
+ }
+
+ return 1;
+}
+
 int xmlValidateElementTypeElement(xmlValidCtxtPtr ctxt, xmlNodePtr *child,
                                   xmlElementContentPtr cont);
 
@@ -2924,42 +2972,9 @@
 
     if (cont == NULL) return(-1);
     DEBUG_VALID_STATE(*child, cont)
- while (*child != NULL) {
- if ((*child)->type == XML_ENTITY_REF_NODE) {
- /*
- * If there is an entity declared an it's not empty
- * Push the current node on the stack and process with the
- * entity content.
- */
- if (((*child)->children != NULL) &&
- ((*child)->children->children != NULL)) {
- nodeVPush(ctxt, *child);
- *child = (*child)->children->children;
- } else
- *child = (*child)->next;
- continue;
- }
- if (((*child)->type == XML_TEXT_NODE) &&
- (xmlIsBlankNode(*child)) &&
- ((cont->type == XML_ELEMENT_CONTENT_ELEMENT) ||
- (cont->type == XML_ELEMENT_CONTENT_SEQ) ||
- (cont->type == XML_ELEMENT_CONTENT_OR))) {
- *child = (*child)->next;
- continue;
- }
- if ((*child)->type == XML_PI_NODE) {
- *child = (*child)->next;
- continue;
- }
- if ((*child)->type == XML_COMMENT_NODE) {
- *child = (*child)->next;
- continue;
- }
- else if ((*child)->type != XML_ELEMENT_NODE) {
+ ret = xmlValidateFindNextElement(ctxt, child, cont);
+ if (ret < 0)
             return(-1);
- }
- break;
- }
     DEBUG_VALID_STATE(*child, cont)
     switch (cont->type) {
         case XML_ELEMENT_CONTENT_PCDATA:
@@ -3032,47 +3047,14 @@
 xmlValidateElementTypeElement(xmlValidCtxtPtr ctxt, xmlNodePtr *child,
                               xmlElementContentPtr cont) {
     xmlNodePtr cur;
- int ret = 1;
+ int ret;
 
     if (cont == NULL) return(-1);
 
     DEBUG_VALID_STATE(*child, cont)
- while (*child != NULL) {
- if ((*child)->type == XML_ENTITY_REF_NODE) {
- /*
- * If there is an entity declared an it's not empty
- * Push the current node on the stack and process with the
- * entity content.
- */
- if (((*child)->children != NULL) &&
- ((*child)->children->children != NULL)) {
- nodeVPush(ctxt, *child);
- *child = (*child)->children->children;
- } else
- *child = (*child)->next;
- continue;
- }
- if (((*child)->type == XML_TEXT_NODE) &&
- (xmlIsBlankNode(*child)) &&
- ((cont->type == XML_ELEMENT_CONTENT_ELEMENT) ||
- (cont->type == XML_ELEMENT_CONTENT_SEQ) ||
- (cont->type == XML_ELEMENT_CONTENT_OR))) {
- *child = (*child)->next;
- continue;
- }
- if ((*child)->type == XML_PI_NODE) {
- *child = (*child)->next;
- continue;
- }
- if ((*child)->type == XML_COMMENT_NODE) {
- *child = (*child)->next;
- continue;
- }
- else if ((*child)->type != XML_ELEMENT_NODE) {
+ ret = xmlValidateFindNextElement(ctxt, child, cont);
+ if (ret < 0)
             return(-1);
- }
- break;
- }
     DEBUG_VALID_STATE(*child, cont)
     cur = *child;
     ret = xmlValidateElementTypeExpr(ctxt, child, cont);
@@ -3082,8 +3064,10 @@
             if (ret == 1) {
                 /* skip ignorable elems */
                 while ((*child != NULL) &&
- (((*child)->type == XML_PI_NODE) ||
- ((*child)->type == XML_COMMENT_NODE))) {
+ ((*child)->type == XML_PI_NODE
+ || (*child)->type == XML_COMMENT_NODE
+ || (*child)->type == XML_XINCLUDE_START
+ || (*child)->type == XML_XINCLUDE_END)) {
                     while ((*child)->next == NULL) {
                         if (((*child)->parent != NULL) &&
                             ((*child)->parent->type == XML_ENTITY_REF_NODE)) {
@@ -3119,8 +3103,8 @@
             do {
                 if (*child == NULL)
                     break; /* while */
- if (((*child)->type == XML_TEXT_NODE) &&
- (xmlIsBlankNode(*child))) {
+ if ((*child)->type == XML_TEXT_NODE
+ && xmlIsBlankNode(*child)) {
                     *child = (*child)->next;
                     continue;
                 }
@@ -3132,43 +3116,8 @@
             *child = cur;
             break;
     }
- while (*child != NULL) {
- if ((*child)->type == XML_ENTITY_REF_NODE) {
- /*
- * If there is an entity declared an it's not empty
- * Push the current node on the stack and process with the
- * entity content.
- */
- if (((*child)->children != NULL) &&
- ((*child)->children->children != NULL)) {
- nodeVPush(ctxt, *child);
- *child = (*child)->children->children;
- } else
- *child = (*child)->next;
- continue;
- }
- if (((*child)->type == XML_TEXT_NODE) &&
- (xmlIsBlankNode(*child)) &&
- ((cont->type == XML_ELEMENT_CONTENT_ELEMENT) ||
- (cont->type == XML_ELEMENT_CONTENT_SEQ) ||
- (cont->type == XML_ELEMENT_CONTENT_OR))) {
- *child = (*child)->next;
- continue;
- }
- if ((*child)->type == XML_PI_NODE) {
- *child = (*child)->next;
- continue;
- }
- if ((*child)->type == XML_COMMENT_NODE) {
- *child = (*child)->next;
- continue;
- }
- else if ((*child)->type != XML_ELEMENT_NODE) {
- return(-1);
- }
- break;
- }
- return(1);
+
+ return xmlValidateFindNextElement(ctxt, child, cont);
 }
 
 /**

-- 
-------------------------------------------------------------------------------
 Paul D. Smith <psmith@baynetworks.com>    HASMAT--HA Software Methods & Tools
 "Please remain calm...I may be mad, but I am a professional." --Mad Scientist
-------------------------------------------------------------------------------
   These are my opinions---Nortel Networks takes no responsibility for them.
----
Message from the list xml@rpmfind.net
Archived at : http://xmlsoft.org/messages/
to unsubscribe: echo "unsubscribe xml" | mail  majordomo@rpmfind.net


Date view Thread view Subject view Author view

This archive was generated by hypermail 2b29 : Tue Nov 28 2000 - 23:44:59 EST