- From: Olivier Thereaux via cvs-syncmail <cvsmail@w3.org>
- Date: Fri, 16 Mar 2007 12:42:39 +0000
- To: www-validator-cvs@w3.org
Update of /sources/public/validator/httpd/cgi-bin In directory hutz:/tmp/cvs-serv30141/httpd/cgi-bin Modified Files: check Log Message: rewriting parse mode choice routine, adding comments to code See http://www.w3.org/Bugs/Public/show_bug.cgi?id=22 for explanations and testing Index: check =================================================================== RCS file: /sources/public/validator/httpd/cgi-bin/check,v retrieving revision 1.482 retrieving revision 1.483 diff -u -d -r1.482 -r1.483 --- check 16 Mar 2007 06:56:09 -0000 1.482 +++ check 16 Mar 2007 12:42:36 -0000 1.483 @@ -1735,48 +1735,58 @@ my $File = shift; my $CFG = shift; my $fpi = $File->{DOCTYPE}; - my $mode = $CFG->{Types}->{$fpi}->{'Parse Mode'}; + my $parseModeFromDoctype = $CFG->{Types}->{$fpi}->{'Parse Mode'}; - if (!exists $CFG->{Types}->{$fpi}) - { - &add_warning('W08', {W08_mime => $File->{ContentType}}) - if $File->{Mode} eq 'TBD'; + my $parseModeFromMimeType = $File->{Mode}; + + + if (($parseModeFromMimeType eq 'TBD') and (!exists $CFG->{Types}->{$fpi})) { + # the mime type is text/html (ambiguous, hence TBD mode) + # and the doctype isn't in the catalogue... we scream + &add_warning('W08', {W08_mime => $File->{ContentType}}); return; } + + $parseModeFromDoctype = 'TBD' unless $parseModeFromDoctype eq 'SGML' or $parseModeFromDoctype eq 'XML'; - $mode = 'TBD' unless $mode eq 'SGML' or $mode eq 'XML'; - - # if the document type is not known ($mode = TBD) - # but the content-type clearly shows we should used SGML or XML ($File->{Mode} ne 'TBD') - # then we're happy - we use that - if (($mode eq 'TBD') and ($File->{Mode} ne 'TBD')) { - $mode = $File->{Mode}; + if (($parseModeFromDoctype eq 'TBD') and ($parseModeFromMimeType eq 'TBD')) { + # if both doctype and mime type are useless to give us a parse mode + # => we use SGML as a default + $File->{Mode} = 'SGML'; + + &add_warning('W06', { + W06_mime => $File->{ContentType}, + w06_doctype => $File->{Version} + }); + return; } - - # if document-type recommended mode and content-type recommended mode clash - if (($mode ne 'TBD') and ($mode ne $File->{Mode}) ) { + elsif ($parseModeFromDoctype eq 'TBD') { + # doctype does not give us anything clear (e.g custom DTD) + # but mime type gives clear indication + # => we just use what the content type tells us - move along + return; + } + elsif ($parseModeFromMimeType eq 'TBD') { + # the mime type is text/html (ambiguous, hence TBD mode) + # but by now we're sure that the document type is a good indication + # so we use that. + $File->{Mode} = $parseModeFromDoctype; + return; + } + elsif ($parseModeFromMimeType ne $parseModeFromDoctype) { + # if document-type recommended mode and content-type recommended mode clash + # shoot a warning &add_warning('W07', { W07_mime => $File->{ContentType}, - W07_ct => $File->{Mode}, - W07_dtd => $mode, + W07_ct => $parseModeFromMimeType, + W07_dtd => $parseModeFromDoctype, }); return; - } - - if ($mode eq 'SGML' or $mode eq 'XML') { - $File->{Mode} = $mode - } else { - # if by now we still don't know, use SGML as default. - # but warn that we are not pleased - $File->{Mode} = 'SGML'; - &add_warning('W06', { - W06_mime => $File->{ContentType}, - w06_doctype => $File->{Version} - }); } } + # # Utility sub to tell if mode "is" XML. sub is_xml {shift->{Mode} eq 'XML'};
Received on Friday, 16 March 2007 12:42:48 UTC