W3C home > Mailing lists > Public > www-validator-cvs@w3.org > July 2007

validator/httpd/cgi-bin check,1.551,1.552

From: Olivier Thereaux via cvs-syncmail <cvsmail@w3.org>
Date: Mon, 30 Jul 2007 06:23:46 +0000
To: www-validator-cvs@w3.org
Message-Id: <E1IFOfS-0004IP-4B@lionel-hutz.w3.org>

Update of /sources/public/validator/httpd/cgi-bin
In directory hutz:/tmp/cvs-serv15894

Modified Files:
	check 
Log Message:
More robust parsing of the error output from XML::LibXML, 
taking into account the three lines structure of the error

:12:  parser error : Error Message
   ... here markup quoted ...
           ^

(first the error message, 
then a second line with quoted content which we ignore,
then the pointer for the column number)

This should fix http://www.w3.org/Bugs/Public/show_bug.cgi?id=4892



Index: check
===================================================================
RCS file: /sources/public/validator/httpd/cgi-bin/check,v
retrieving revision 1.551
retrieving revision 1.552
diff -u -d -r1.551 -r1.552
--- check	26 Jul 2007 21:41:51 -0000	1.551
+++ check	30 Jul 2007 06:23:43 -0000	1.552
@@ -653,19 +653,32 @@
     my $xmlwf_error_line = undef;
     my $xmlwf_error_col = undef;
     my $xmlwf_error_msg = undef;
+    my $got_error_message = 0;
+    my $got_quoted_line = 0;
     my $num_xmlwf_error = 0;
     foreach my $msg_line (split "\n", $xmlwf_errors){
+
       $msg_line =~ s{[^\x0d\x0a](:\d+:)}{\n$1}g;
       $msg_line =~ s{[^\x0d\x0a]+[\x0d\x0a]$}{};
-      if ($msg_line =~ /(:\d+:)(.*)/ ) {
+      
+      # first we get the actual error message
+      if (($got_error_message eq 0) and ($msg_line =~ /^(:\d+:)( parser error : .*)/ )) {
         $xmlwf_error_line = $1;
         $xmlwf_error_msg = $2;
         $xmlwf_error_line =~ s/:(\d+):/$1/;
         $xmlwf_error_msg =~ s/ parser error :/XML Parsing Error: /;
+        $got_error_message = 1;
       }
-      elsif ($msg_line =~ /(\s+)\^/) {
+      # then we skip the second line, which shows the context (we don't use that)
+      elsif (($got_error_message eq 1) and ($got_quoted_line eq 0)) {
+        $got_quoted_line = 1;
+      }
+      # we now take the third line, with the pointer to the error's column
+      elsif (($msg_line =~ /(\s+)\^/) and ($got_error_message eq 1) and ($got_quoted_line eq 1)) {
         $xmlwf_error_col = length($1);
       }
+
+      #  cleanup for a number of bugs for the column number
       if (defined($xmlwf_error_col)) {
         if ((my $l = length($File->{Content}->[$xmlwf_error_line-1])) < $xmlwf_error_col) {
           # http://bugzilla.gnome.org/show_bug.cgi?id=434196
@@ -684,7 +697,13 @@
         }
       }
 
+      # when we have all the info (one full error message), proceed and move on to the next error
       if ((defined $xmlwf_error_line) and (defined $xmlwf_error_col) and (defined $xmlwf_error_msg)){
+        # Reinitializing for the next batch of 3 lines
+        $got_error_message = 0;
+        $got_quoted_line = 0;
+        
+        # formatting the error message for output
         my $err;
         $err->{src}  = '...'; # do this with show_open_entities()?
         $err->{line} = $xmlwf_error_line;
@@ -695,7 +714,12 @@
 
         # The validator will sometimes fail to dereference entities files
         # we're filtering the bogus resulting error
-        next if ($err->{msg} =~ /Entity '\w+' not defined/);
+        if ($err->{msg} =~ /Entity '\w+' not defined/) {
+          $xmlwf_error_line = undef;
+          $xmlwf_error_col = undef;
+          $xmlwf_error_msg = undef;
+          next;
+        }
         push (@xmlwf_error_list, $err);
         $xmlwf_error_line = undef;
         $xmlwf_error_col = undef;
@@ -708,8 +732,8 @@
       $File->{'Is Valid'} = FALSE;
       push @{$File->{WF_Errors}}, $errmsg;
     }
-
   }
+  
 }
 
 
Received on Monday, 30 July 2007 06:23:53 GMT

This archive was generated by hypermail 2.2.0+W3C-0.50 : Thursday, 26 April 2012 12:54:58 GMT