- From: Olivier Thereaux via cvs-syncmail <cvsmail@w3.org>
- Date: Thu, 05 Feb 2009 21:54:11 +0000
- To: www-validator-cvs@w3.org
Update of /sources/public/validator/httpd/cgi-bin In directory hutz:/tmp/cvs-serv2350 Modified Files: check Log Message: * first implementation of XML::LibXML structured errors. Will only work well with XML::LibXML > 0.69 since versions 0.67 to 0.69 have a bug reporting a single error instead of many Also, there are a couple apparent bugs wrt the offset/column position of the errors - see http://lists.w3.org/Archives/Public/public-qa-dev/2009Feb/0007.html * fixig a bug whereby error column position reported by SGML::Parser::OpenSP would start at 0 * fixing a bug in direct input mode where no content-type would be passed to html5 validation engine, resulting in a crash and... no error reporting Index: check =================================================================== RCS file: /sources/public/validator/httpd/cgi-bin/check,v retrieving revision 1.644 retrieving revision 1.645 diff -u -d -r1.644 -r1.645 --- check 4 Feb 2009 20:06:48 -0000 1.644 +++ check 5 Feb 2009 21:54:08 -0000 1.645 @@ -708,8 +708,43 @@ $xml_string = undef; my $xml_parse_errors_line = undef; my @xmlwf_error_list; + my @xmlwf_obj_error_list; + if (ref($@)) { + # handle a structured error (XML::LibXML::Error object) + # (lib XML::LibXML > 0.66, but will work MUCH better > 0.69 ) + push (@xmlwf_obj_error_list, $@); + # my $prev_err = $@->_prev(); + # die($prev_err->{msg}); + my $err_obj = $@; + while($err_obj->_prev()) { + $err_obj = $err_obj->_prev(); + unshift(@xmlwf_obj_error_list, $err_obj); + } + my $num_xmlwf_error = 0; + foreach my $err_obj (@xmlwf_obj_error_list){ + #die($err_obj->dump()); + my $err; + my $offset; + #if($err_obj->int2()) {$offset = $err_obj->int2();} # this should be the location of the column per http://xmlsoft.org/html/libxml-xmlerror.html + if ($err_obj->num2()) {$offset = $err_obj->num2();} # this is bogus but seems to be the actual behavior + $err->{src} = '...'; # do this with show_open_entities()? + $err->{line} = $err_obj->line(); + $err->{char} = $offset; + $err->{num} = "libxml2-".$err_obj->code(); + $err->{type} = "E"; + $err->{msg} = $err_obj->message(); + # The validator will sometimes fail to dereference entities files + # we're filtering the bogus resulting error + if ($err->{msg} =~ /Entity '\w+' not defined/) { + $err = undef; + next; + } + push (@xmlwf_error_list, $err); + $num_xmlwf_error++; + } + } + if ($@) { - my $xmlwf_errors = $@; my $xmlwf_error_line = undef; my $xmlwf_error_col = undef; @@ -789,10 +824,10 @@ } } - foreach my $errmsg (@xmlwf_error_list){ - $File->{'Is Valid'} = FALSE; - push @{$File->{WF_Errors}}, $errmsg; - } + } + foreach my $errmsg (@xmlwf_error_list){ + $File->{'Is Valid'} = FALSE; + push @{$File->{WF_Errors}}, $errmsg; } } } @@ -837,8 +872,12 @@ # most likely be a source of errors about internal/actual charset # differences as long as our transcoding process does not "fix" the # charset info in XML declaration and meta http-equiv (any others?). - - $req->content_type("$File->{ContentType}; charset=UTF-8"); + if($File->{'Direct Input'}) { # sane default when using html5 validator by direct input + $req->content_type("text/html; charset=UTF-8"); + } + else { + $req->content_type("$File->{ContentType}; charset=UTF-8"); + } $req->content(Encode::encode_utf8(join("\n", @{$File->{Content}}))); } else { @@ -906,7 +945,7 @@ } if($attribute->name eq "last-column") { - $html5_error_col = $attribute->getValue()-1; + $html5_error_col = $attribute->getValue(); } if($attribute->name eq "last-line") { $html5_error_line = $attribute->getValue(); @@ -2018,7 +2057,7 @@ } elsif ($col == length $line) { # If error is at EOL... $length = $col - 1; # ...leave last char to indicate position. } else { # Otherwise grab everything up to pos of error. - $length = $col; + $length = $col-1; } $left = substr $line, $offset, $length; } @@ -2033,7 +2072,7 @@ if ($col == length $line) { # If err is at EOL... $offset = $col - 1; # ...then grab last char on line instead. } else { - $offset = $col; # Otherwise just grab the char. + $offset = $col-1; # Otherwise just grab the char. } $char = substr $line, $offset, $length; $char = &ent($char); @@ -2050,7 +2089,7 @@ if ($col == length $line) { # If at EOL... $offset = 0; # Don't bother as there is nothing left to grab. } else { - $offset = $col + 1; # Otherwise get everything from char-after-error. + $offset = $col; # Otherwise get everything from char-after-error. } # Length... @@ -2926,7 +2965,7 @@ $err->{src} = '...'; # do this with show_open_entities()? $err->{line} = $mess->{primary_message}{LineNumber}; - $err->{char} = $mess->{primary_message}{ColumnNumber}; + $err->{char} = $mess->{primary_message}{ColumnNumber}+1; $err->{num} = $mess->{primary_message}{Number}; $err->{type} = $mess->{primary_message}{Severity}; $err->{msg} = $mess->{primary_message}{Text};
Received on Thursday, 5 February 2009 21:54:20 UTC