- From: Olivier Thereaux via cvs-syncmail <cvsmail@w3.org>
- Date: Mon, 19 Mar 2007 00:58:46 +0000
- To: www-validator-cvs@w3.org
Update of /sources/public/validator/httpd/cgi-bin In directory hutz:/tmp/cvs-serv13633/httpd/cgi-bin Modified Files: check Log Message: Adding XML well-formedness parsing for XML docs with ideas from Jacques Distler http://golem.ph.utexas.edu/~distler/blog/archives/001054.html We could be using only the XML parser and not parse twice, but at the moment I am tempted to keep using opensp, for as long as we have the best library of error message and explanations for it. Code is not really pretty, and should be amended at some point to use a real SAX ErrorHandler. That said, it very much tolls the bell for the "Validator XML support has some limitations." message. Index: check =================================================================== RCS file: /sources/public/validator/httpd/cgi-bin/check,v retrieving revision 1.483 retrieving revision 1.484 diff -u -d -r1.483 -r1.484 --- check 16 Mar 2007 12:42:36 -0000 1.483 +++ check 19 Mar 2007 00:58:44 -0000 1.484 @@ -604,6 +604,79 @@ # Sanity check Charset information and add any warnings necessary. $File = &charset_conflicts($File); + + +# before we start the parsing, clean slate +$File->{'Is Valid'} = TRUE; +$File->{Errors} = []; + +# preparse with XML parser if necessary +# we should really be using a SAX ErrorHandler, but I can't find +# a way to make it work with XML::LibXML::SAX::Parser... ** FIXME ** +# ditto, we should try using W3C::Validator::ErrorHandler, +# but it's badly linked to opensp at the moment +if (&is_xml($File)) { + + use XML::LibXML; + my $xmlparser = XML::LibXML->new(); + $xmlparser->line_numbers(1); + eval { + $xmlparser->parse_string(join"\n",@{$File->{Content}}); + }; + my $xml_parse_errors_line = undef; + my @xmlwf_error_list; + if ($@) { + + my $xmlwf_errors = $@; + my $xmlwf_error_line = undef; + my $xmlwf_error_col = undef; + my $xmlwf_error_msg = undef; + my $num_xmlwf_error = 0; + my $last_err_msg = undef; + my $err; + foreach my $msg_line (split "\n", $xmlwf_errors){ + $msg_line =~ s{[^\x0d\x0a](:\d+:)}{\n$1}g; + $msg_line =~ s{[^\x0d\x0a]+[\x0d\x0a]$}{}; + if ($msg_line =~ /(:\d+:)(.*)/ ){ + $xmlwf_error_line = $1; + $xmlwf_error_msg = $2; + $xmlwf_error_line =~ s/:(\d+):/$1/; + $xmlwf_error_msg =~ s/ parser error :/XML Parsing Error: /; + } + if ($msg_line =~ /(.+)\^/){ + $xmlwf_error_col = length($1); + } + + if ((defined $xmlwf_error_line) and (defined $xmlwf_error_col) and (defined $xmlwf_error_msg)){ + $err->{src} = '...'; # do this with show_open_entities()? + $err->{line} = $xmlwf_error_line; + $err->{char} = $xmlwf_error_col; + $err->{num} = 0; + $err->{type} = "E"; + $err->{msg} = $xmlwf_error_msg; + + # ... + $last_err_msg = $err; + push (@xmlwf_error_list, $err); + $err = undef; + $xmlwf_error_line = undef; + $xmlwf_error_col = undef; + $xmlwf_error_msg = undef; + $num_xmlwf_error++; + + } + } + foreach my $errmsg (@xmlwf_error_list){ + $File->{'Is Valid'} = FALSE; + push @{$File->{Errors}}, $errmsg; + } + + } +} + + + + # # Abandon all hope ye who enter here... $File = &parse($File); @@ -631,6 +704,7 @@ # FIXME when fixed s:p:o gets released } + # # Parser configuration $opensp->search_dirs($CFG->{Paths}->{SGML}->{Library}); @@ -680,11 +754,12 @@ # # Set Version to be the FPI initially. $File->{Version} = $File->{DOCTYPE}; - return $File; } + + # # Force "XML" if type is an XML type and an FPI was not found. # Otherwise set the type to be the FPI. @@ -2052,10 +2127,6 @@ my $self = { _file => $File, _parser => $parser }; - # ... - $File->{'Is Valid'} = TRUE; - $File->{Errors} = []; - bless $self, $class; }
Received on Monday, 19 March 2007 00:58:50 UTC