- From: Olivier Thereaux via cvs-syncmail <cvsmail@w3.org>
- Date: Thu, 05 Feb 2009 21:54:11 +0000
- To: www-validator-cvs@w3.org
Update of /sources/public/validator/httpd/cgi-bin
In directory hutz:/tmp/cvs-serv2350
Modified Files:
check
Log Message:
* first implementation of XML::LibXML structured errors.
Will only work well with XML::LibXML > 0.69 since versions 0.67 to 0.69 have a bug reporting a single error instead of many
Also, there are a couple apparent bugs wrt the offset/column position of the errors -
see http://lists.w3.org/Archives/Public/public-qa-dev/2009Feb/0007.html
* fixig a bug whereby error column position reported by SGML::Parser::OpenSP would start at 0
* fixing a bug in direct input mode where no content-type would be passed to html5 validation engine, resulting in a crash and... no error reporting
Index: check
===================================================================
RCS file: /sources/public/validator/httpd/cgi-bin/check,v
retrieving revision 1.644
retrieving revision 1.645
diff -u -d -r1.644 -r1.645
--- check 4 Feb 2009 20:06:48 -0000 1.644
+++ check 5 Feb 2009 21:54:08 -0000 1.645
@@ -708,8 +708,43 @@
$xml_string = undef;
my $xml_parse_errors_line = undef;
my @xmlwf_error_list;
+ my @xmlwf_obj_error_list;
+ if (ref($@)) {
+ # handle a structured error (XML::LibXML::Error object)
+ # (lib XML::LibXML > 0.66, but will work MUCH better > 0.69 )
+ push (@xmlwf_obj_error_list, $@);
+ # my $prev_err = $@->_prev();
+ # die($prev_err->{msg});
+ my $err_obj = $@;
+ while($err_obj->_prev()) {
+ $err_obj = $err_obj->_prev();
+ unshift(@xmlwf_obj_error_list, $err_obj);
+ }
+ my $num_xmlwf_error = 0;
+ foreach my $err_obj (@xmlwf_obj_error_list){
+ #die($err_obj->dump());
+ my $err;
+ my $offset;
+ #if($err_obj->int2()) {$offset = $err_obj->int2();} # this should be the location of the column per http://xmlsoft.org/html/libxml-xmlerror.html
+ if ($err_obj->num2()) {$offset = $err_obj->num2();} # this is bogus but seems to be the actual behavior
+ $err->{src} = '...'; # do this with show_open_entities()?
+ $err->{line} = $err_obj->line();
+ $err->{char} = $offset;
+ $err->{num} = "libxml2-".$err_obj->code();
+ $err->{type} = "E";
+ $err->{msg} = $err_obj->message();
+ # The validator will sometimes fail to dereference entities files
+ # we're filtering the bogus resulting error
+ if ($err->{msg} =~ /Entity '\w+' not defined/) {
+ $err = undef;
+ next;
+ }
+ push (@xmlwf_error_list, $err);
+ $num_xmlwf_error++;
+ }
+ }
+
if ($@) {
-
my $xmlwf_errors = $@;
my $xmlwf_error_line = undef;
my $xmlwf_error_col = undef;
@@ -789,10 +824,10 @@
}
}
- foreach my $errmsg (@xmlwf_error_list){
- $File->{'Is Valid'} = FALSE;
- push @{$File->{WF_Errors}}, $errmsg;
- }
+ }
+ foreach my $errmsg (@xmlwf_error_list){
+ $File->{'Is Valid'} = FALSE;
+ push @{$File->{WF_Errors}}, $errmsg;
}
}
}
@@ -837,8 +872,12 @@
# most likely be a source of errors about internal/actual charset
# differences as long as our transcoding process does not "fix" the
# charset info in XML declaration and meta http-equiv (any others?).
-
- $req->content_type("$File->{ContentType}; charset=UTF-8");
+ if($File->{'Direct Input'}) { # sane default when using html5 validator by direct input
+ $req->content_type("text/html; charset=UTF-8");
+ }
+ else {
+ $req->content_type("$File->{ContentType}; charset=UTF-8");
+ }
$req->content(Encode::encode_utf8(join("\n", @{$File->{Content}})));
}
else {
@@ -906,7 +945,7 @@
}
if($attribute->name eq "last-column") {
- $html5_error_col = $attribute->getValue()-1;
+ $html5_error_col = $attribute->getValue();
}
if($attribute->name eq "last-line") {
$html5_error_line = $attribute->getValue();
@@ -2018,7 +2057,7 @@
} elsif ($col == length $line) { # If error is at EOL...
$length = $col - 1; # ...leave last char to indicate position.
} else { # Otherwise grab everything up to pos of error.
- $length = $col;
+ $length = $col-1;
}
$left = substr $line, $offset, $length;
}
@@ -2033,7 +2072,7 @@
if ($col == length $line) { # If err is at EOL...
$offset = $col - 1; # ...then grab last char on line instead.
} else {
- $offset = $col; # Otherwise just grab the char.
+ $offset = $col-1; # Otherwise just grab the char.
}
$char = substr $line, $offset, $length;
$char = &ent($char);
@@ -2050,7 +2089,7 @@
if ($col == length $line) { # If at EOL...
$offset = 0; # Don't bother as there is nothing left to grab.
} else {
- $offset = $col + 1; # Otherwise get everything from char-after-error.
+ $offset = $col; # Otherwise get everything from char-after-error.
}
# Length...
@@ -2926,7 +2965,7 @@
$err->{src} = '...'; # do this with show_open_entities()?
$err->{line} = $mess->{primary_message}{LineNumber};
- $err->{char} = $mess->{primary_message}{ColumnNumber};
+ $err->{char} = $mess->{primary_message}{ColumnNumber}+1;
$err->{num} = $mess->{primary_message}{Number};
$err->{type} = $mess->{primary_message}{Severity};
$err->{msg} = $mess->{primary_message}{Text};
Received on Thursday, 5 February 2009 21:54:20 UTC