- From: Olivier Thereaux via cvs-syncmail <cvsmail@w3.org>
- Date: Tue, 17 Mar 2009 16:27:36 +0000
- To: www-validator-cvs@w3.org
Update of /sources/public/validator/httpd/cgi-bin In directory hutz:/tmp/cvs-serv8841/httpd/cgi-bin Modified Files: check Log Message: EXPERIMENTAL - trying to see whether we could pass doctypeless SVG, or any doctypeless XML with multiple namespaces found, to validator.nu engine Index: check =================================================================== RCS file: /sources/public/validator/httpd/cgi-bin/check,v retrieving revision 1.651 retrieving revision 1.652 diff -u -d -r1.651 -r1.652 --- check 2 Mar 2009 18:41:14 -0000 1.651 +++ check 17 Mar 2009 16:27:34 -0000 1.652 @@ -845,11 +845,148 @@ $File->{Templates}->{Error}->param(fatal_missing_checker => "HTML5 Validator"); } } +elsif(($File->{DOCTYPE} eq '') and (($File->{Root} eq "svg") or @{$File->{Namespaces}} >1)){ + # we send doctypeless SVG, or any doctypeless XML document with multiple namespaces found, to a different engine + # WARNING this is experimental. + if ($CFG->{External}->{CompoundXML}) { + $File = &compoundxml_validate($File); + &add_warning('W00', { + W00_experimental_name => "validator.nu Conformance Checker", + W00_experimental_URI => "feedback.html" + }); + } +} else { $File = &dtd_validate($File); } &abort_if_error_flagged($File, 0); +sub compoundxml_validate (\$) { + my $File = shift; + my $ua = new W3C::Validator::UserAgent ($CFG, $File); + + $File->{ParserName} = "validator.nu"; + $File->{ParserOpts} = ""; + + my $url = URI->new($CFG->{External}->{CompoundXML}); + $url->query_form(out => "xml"); + + my $req = HTTP::Request->new(POST => $url); + + if ($File->{Opt}->{DOCTYPE} || $File->{Charset}->{Override}) { + # Doctype or charset overridden, need to use $File->{Content} in UTF-8 + # because $File->{Bytes} is not affected by the overrides. This will + # most likely be a source of errors about internal/actual charset + # differences as long as our transcoding process does not "fix" the + # charset info in XML declaration and meta http-equiv (any others?). + if($File->{'Direct Input'}) { # sane default when using html5 validator by direct input + $req->content_type("application/xml; charset=UTF-8"); + } + else { + $req->content_type("$File->{ContentType}; charset=UTF-8"); + } + $req->content(Encode::encode_utf8(join("\n", @{$File->{Content}}))); + } + else { + # Pass original bytes, Content-Type and charset as-is. + # We trust that our and validator.nu's interpretation of line numbers + # is the same (regardless of EOL chars used in the document). + + my @content_type = ($File->{ContentType} => undef); + push(@content_type, charset => $File->{Charset}->{HTTP}) + if $File->{Charset}->{HTTP}; + + $req->content_type(HTTP::Headers::Util::join_header_words(@content_type)); + $req->content_ref(\$File->{Bytes}); + } + + $req->content_language($File->{ContentLang}) if $File->{ContentLang}; + # Intentionally using direct header access instead of $req->last_modified + $req->header('Last-Modified', $File->{Modified}) if $File->{Modified}; + + # If not in debug mode, gzip the request (LWP >= 5.817) + eval { $req->encode("gzip"); } unless $File->{Opt}->{Debug}; + + my $res = $ua->request($req); + if (! $res->is_success()) { + $File->{'Error Flagged'} = TRUE; + $File->{Templates}->{Error}->param(fatal_no_checker => TRUE); + $File->{Templates}->{Error}->param(fatal_missing_checker => "HTML5 Validator"); + } + else { + my $content = $res->can('decoded_content') ? + $res->decoded_content(charset => 'none') : $res->content; + # and now we parse according to http://wiki.whatwg.org/wiki/Validator.nu_XML_Output + # I wish we could use XML::LibXML::Reader here. but SHAME on those major + # unix distributions still shipping with libxml2 2.6.16… 4 years after its release + my $xml_reader = XML::LibXML->new(); + my $xmlDOM; + eval { $xmlDOM = $xml_reader->parse_string( $content);}; + if ($@) { + $File->{'Error Flagged'} = TRUE; + $File->{Templates}->{Error}->param(fatal_no_checker => TRUE); + $File->{Templates}->{Error}->param(fatal_missing_checker => "HTML5 Validator"); + return $File; + } + my @nodelist = $xmlDOM->getElementsByTagName("messages"); + my $messages_node = $nodelist[0]; + my @message_nodes = $messages_node->childNodes; + foreach my $message_node (@message_nodes) { + my $message_type = $message_node->localname; + my $err; + my ($xml_error_line, $xml_error_col, $xml_error_msg, $xml_error_expl); + if ($message_type eq "error") { + $err->{type} = "E"; + $File->{'Is Valid'} = FALSE; + } + elsif ($message_type eq "info") { + $err->{type} = "I"; # by default - we find warnings in the type attribute (below) + } + if ($message_node->hasAttributes()) { + my @attributelist = $message_node->attributes(); + foreach my $attribute (@attributelist) { + if($attribute->name eq "type"){ + if (($attribute->getValue() eq "warning") and ($message_type eq "info")) { + $err->{type} = "W"; + } + + } + if($attribute->name eq "last-column") { + $xml_error_col = $attribute->getValue(); + } + if($attribute->name eq "last-line") { + $xml_error_line = $attribute->getValue(); + } + + } + } + my @child_nodes = $message_node->childNodes; + foreach my $child_node (@child_nodes) { + if ($child_node->localname eq "message") { + $xml_error_msg= $child_node->toString(); + $xml_error_msg =~ s,</?[^>]*>,,gsi; + } + if ($child_node->localname eq "elaboration") { + $xml_error_expl = $child_node->toString(); + $xml_error_expl =~ s,</?elaboration>,,gi; + $xml_error_expl = "\n<div class=\"ve xml\">$xml_error_expl</div>\n"; + } + } + # formatting the error message for output + $err->{src} = '...'; # do this with show_open_entities()? + $err->{line} = $xml_error_line; + $err->{char} = $xml_error_col; + $err->{num} = 'html5'; + $err->{msg} = $xml_error_msg; + $err->{expl} = $xml_error_expl; + push @{$File->{Errors}}, $err; + # @@ TODO message explanation / elaboration + } + } +return $File; +} + + sub html5_validate (\$) { my $File = shift; my $ua = new W3C::Validator::UserAgent ($CFG, $File); @@ -2219,41 +2356,41 @@ # root element and some version attribute is enough # TODO applicable doctypes should be migrated to a config file? - if (($File->{DOCTYPE} eq '') and ($File->{Root} eq "svg") ) { - if (($File->{'Root Version'}) or ($File->{'Root BaseProfile'})) - { - if (! $File->{'Root Version'}) { $File->{'Root Version'} = "0"; } - if (! $File->{'Root BaseProfile'}) { $File->{'Root BaseProfile'} = "0"; } - if ($File->{'Root Version'} eq "1.0"){ - $File->{DOCTYPE} = "-//W3C//DTD SVG 1.0//EN"; - $File->{"DOCTYPEless OK"} = TRUE; - $File->{Opt}->{DOCTYPE} = "SVG 1.0"; - } - if ((($File->{'Root Version'} eq "1.1") or ($File->{'Root Version'} eq "0")) and ($File->{'Root BaseProfile'} eq "tiny")) { - $File->{DOCTYPE} = "-//W3C//DTD SVG Tiny 1.1//EN"; - $File->{"DOCTYPEless OK"} = TRUE; - $File->{Opt}->{DOCTYPE} = "SVG 1.1 Tiny"; - } - elsif ((($File->{'Root Version'} eq "1.1") or ($File->{'Root Version'} eq "0")) and ($File->{'Root BaseProfile'} eq "basic")) { - $File->{DOCTYPE} = "-//W3C//DTD SVG Basic 1.1//EN"; - $File->{Opt}->{DOCTYPE} = "SVG 1.1 Basic"; - $File->{"DOCTYPEless OK"} = TRUE; - } - elsif (($File->{'Root Version'} eq "1.1") and (!$File->{'Root BaseProfile'})) { - $File->{DOCTYPE} = "-//W3C//DTD SVG 1.1//EN"; - $File->{Opt}->{DOCTYPE} = "SVG 1.1"; - $File->{"DOCTYPEless OK"} = TRUE; - } - if ($File->{'Root Version'} eq "0") { $File->{'Root Version'} = undef; } - if ($File->{'Root BaseProfile'} eq "0") { $File->{'Root BaseProfile'} = undef; } - } - else { - # by default for an svg root elt, we use SVG 1.1 - $File->{DOCTYPE} = "-//W3C//DTD SVG 1.1//EN"; - $File->{Opt}->{DOCTYPE} = "SVG 1.1"; - $File->{"DOCTYPEless OK"} = TRUE; - } - } + # if (($File->{DOCTYPE} eq '') and ($File->{Root} eq "svg") ) { + # if (($File->{'Root Version'}) or ($File->{'Root BaseProfile'})) + # { + # if (! $File->{'Root Version'}) { $File->{'Root Version'} = "0"; } + # if (! $File->{'Root BaseProfile'}) { $File->{'Root BaseProfile'} = "0"; } + # if ($File->{'Root Version'} eq "1.0"){ + # $File->{DOCTYPE} = "-//W3C//DTD SVG 1.0//EN"; + # $File->{"DOCTYPEless OK"} = TRUE; + # $File->{Opt}->{DOCTYPE} = "SVG 1.0"; + # } + # if ((($File->{'Root Version'} eq "1.1") or ($File->{'Root Version'} eq "0")) and ($File->{'Root BaseProfile'} eq "tiny")) { + # $File->{DOCTYPE} = "-//W3C//DTD SVG Tiny 1.1//EN"; + # $File->{"DOCTYPEless OK"} = TRUE; + # $File->{Opt}->{DOCTYPE} = "SVG 1.1 Tiny"; + # } + # elsif ((($File->{'Root Version'} eq "1.1") or ($File->{'Root Version'} eq "0")) and ($File->{'Root BaseProfile'} eq "basic")) { + # $File->{DOCTYPE} = "-//W3C//DTD SVG Basic 1.1//EN"; + # $File->{Opt}->{DOCTYPE} = "SVG 1.1 Basic"; + # $File->{"DOCTYPEless OK"} = TRUE; + # } + # elsif (($File->{'Root Version'} eq "1.1") and (!$File->{'Root BaseProfile'})) { + # $File->{DOCTYPE} = "-//W3C//DTD SVG 1.1//EN"; + # $File->{Opt}->{DOCTYPE} = "SVG 1.1"; + # $File->{"DOCTYPEless OK"} = TRUE; + # } + # if ($File->{'Root Version'} eq "0") { $File->{'Root Version'} = undef; } + # if ($File->{'Root BaseProfile'} eq "0") { $File->{'Root BaseProfile'} = undef; } + # } + # else { + # # by default for an svg root elt, we use SVG 1.1 + # $File->{DOCTYPE} = "-//W3C//DTD SVG 1.1//EN"; + # $File->{Opt}->{DOCTYPE} = "SVG 1.1"; + # $File->{"DOCTYPEless OK"} = TRUE; + # } + # } if (($File->{"DOCTYPEless OK"}) and ($File->{Opt}->{DOCTYPE})) { # doctypeless document type found, we fake the override # so that the parser will have something to validate against
Received on Tuesday, 17 March 2009 16:27:47 UTC