W3C home > Mailing lists > Public > www-validator-cvs@w3.org > July 2010

markup-validator commit: Don't filter out undefined entity errors from libxml.

From: Mercurial notifier <nobody@w3.org>
Date: Thu, 15 Jul 2010 16:08:55 +0000
To: markup-validator updates <www-validator-cvs@w3.org>
Message-Id: <E1OZQzX-0001Lx-3T@blinky.w3.org>
changeset:   3127:4e7d67c43cf1
user:        Ville Skyttä <ville.skytta@iki.fi>
date:        Thu Jul 15 19:05:24 2010 +0300
files:       httpd/cgi-bin/check
description:
Don't filter out undefined entity errors from libxml.

We let libxml fetch external entities now (and use catalogs with it)
so false positives should no longer occur.


diff -r a04b16736235 -r 4e7d67c43cf1 httpd/cgi-bin/check
--- a/httpd/cgi-bin/check	Thu Jul 15 18:30:01 2010 +0300
+++ b/httpd/cgi-bin/check	Thu Jul 15 19:05:24 2010 +0300
@@ -1277,21 +1277,7 @@
 
     &override_charset($File, "UTF-8");
 
-    my $xml_string = join "\n", @{$File->{Content}};
-
-    my $xmlws = qr/[\x20\x09\x0D\x0A]/o;
-
-    # Is the document standalone?  Need to check with a regex because the
-    # parser may fail to return a document we could use for this.
-    my $standalone = (
-        $xml_string =~ /^<\?xml\b[^>]*${xmlws}
-                                      standalone${xmlws}*=${xmlws}*
-                                      (["'])yes\1
-                                     /sox
-    );
-
-    eval { $xmlparser->parse_string($xml_string); };
-    $xml_string = undef;
+    eval { $xmlparser->parse_string(join("\n", @{$File->{Content}})); };
     my @xmlwf_error_list;
 
     if (ref($@)) {
@@ -1310,16 +1296,6 @@
             $err->{msg}  = $err_obj->message();
 
             $err_obj = $err_obj->_prev();
-
-            # The validator will sometimes fail to dereference entities files;
-            # we're filtering the resulting bogus error for non-standalone
-            # documents. @@@TODO: is this still needed?
-            if (!$standalone &&
-                $err->{msg} =~ /Entity '\w+' not defined/)
-            {
-                $err = undef;
-                next;
-            }
 
             unshift(@xmlwf_error_list, $err);
         }
@@ -1411,17 +1387,6 @@
                 $err->{type} = "E";
                 $err->{msg}  = $xmlwf_error_msg;
 
-                # The validator will sometimes fail to dereference entities
-                # files; we're filtering the resulting bogus error for
-                # non-standalone documents. @@@TODO: is this still needed?
-                if (!$standalone &&
-                    $err->{msg} =~ /Entity '\w+' not defined/)
-                {
-                    $xmlwf_error_line = undef;
-                    $xmlwf_error_col  = undef;
-                    $xmlwf_error_msg  = undef;
-                    next;
-                }
                 push(@xmlwf_error_list, $err);
                 $xmlwf_error_line = undef;
                 $xmlwf_error_col  = undef;
Received on Thursday, 15 July 2010 16:08:56 UTC

This archive was generated by hypermail 2.3.1 : Wednesday, 7 January 2015 15:17:43 UTC