W3C home > Mailing lists > Public > html-tidy@w3.org > April to June 2000

Re: Java Tidy Backlog

From: Andy Quick <ac.quick@sympatico.ca>
Date: Sat, 22 Apr 2000 12:04:31 -0400
Message-ID: <003801bfac76$675e1280$8a42acce@quick>
To: <html-tidy@w3.org>
Cc: <smarks@digisolutions.com>
With the current version of Java tidy, the comment shows up in
the DOM tree as a COMMENT_NODE in both cases.  I use the
following code to print out a representation of a DOM tree.

Andy Quick
> 
> Note: the big difference is the comment element is treated properly in
> the second case. That is 
>  <!--  EM { font-family: garamond, serif; }  A { text-decoration:
> none; }   -->
> does show up as  text node. 
> 
> So my question, perhaps bug, is why does the presence of the the Style
> induce what appears to be incorrect behavior. That is why does the
> presence of the STYLE tag cause the comment tag to be treated as a
> text node and not an element node. 

import java.io.PrintWriter;
import java.io.FileInputStream;
import java.io.IOException;

import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

import org.w3c.tidy.Tidy;

public class PrintDOM {

    /* --------------------- DEBUG -------------------------- */

    private static final String[] nodeTypeString =
    {
        "dummy",
        "ELEMENT_NODE",
        "ATTRIBUTE_NODE",
        "TEXT_NODE",
        "CDATA_SECTION_NODE",
        "ENTITY_REFERENCE_NODE",
        "ENTITY_NODE",
        "PROCESSING_INSTRUCTION_NODE",
        "COMMENT_NODE",
        "DOCUMENT_NODE",
        "DOCUMENT_TYPE_NODE",
        "DOCUMENT_FRAGMENT_NODE",
        "NOTATION_NODE"
    };

    public String toString(Node n)
    {
        String s = "";
        String v;

            s += "[Node type=";
            s += nodeTypeString[n.getNodeType()];
            s += ",name=";
            if (n.getNodeName() != null)
                s += n.getNodeName();
            else
                s += "null";
            s += ",value=";
            v = n.getNodeValue();
            if (v != null) {
                v = v.replace('\n', '|');
                s += v;
            } else {
                s += "null";
            }
            s += ",children=";
            NodeList children = n.getChildNodes();
            if ( children != null ) {
                int len = children.getLength();
                for ( int i = 0; i < len; i++ ) {
                    s += toString(children.item(i));
                    if (i < len - 1)
                        s += ",";
                }
                if (len == 0)
                    s += "0";
            } else {
                s += "null";
            }
            s += "]";

        return s;
    }
    /* --------------------- END DEBUG ---------------------- */

   public static void main(String args[]) {

      if ( args.length == 0 ) {
         System.exit(1);
      }

      System.err.println(args[0]);

      FileInputStream in;
      Tidy tidy = new Tidy();

      PrintDOM t = new PrintDOM();

      try {
          in = new FileInputStream(args[0]);
          tidy.setXmlTags(true);
          System.out.println("Tidy DOM tree");
          System.out.print(t.toString(tidy.parseDOM(in, null)));
      }
      catch ( IOException e ) {
          System.err.println( e.toString() );
      }
      catch ( Exception e ) {
          System.err.println( e.toString() );
      }

   }


}
Received on Saturday, 22 April 2000 12:20:15 GMT

This archive was generated by hypermail 2.2.0+W3C-0.50 : Tuesday, 3 April 2012 06:13:43 GMT