validator/httpd/cgi-bin check,1.482,1.483

Update of /sources/public/validator/httpd/cgi-bin
In directory hutz:/tmp/cvs-serv30141/httpd/cgi-bin

Modified Files:
	check 
Log Message:
rewriting parse mode choice routine, adding comments to code
See http://www.w3.org/Bugs/Public/show_bug.cgi?id=22 for explanations and testing



Index: check
===================================================================
RCS file: /sources/public/validator/httpd/cgi-bin/check,v
retrieving revision 1.482
retrieving revision 1.483
diff -u -d -r1.482 -r1.483
--- check	16 Mar 2007 06:56:09 -0000	1.482
+++ check	16 Mar 2007 12:42:36 -0000	1.483
@@ -1735,48 +1735,58 @@
   my $File = shift;
   my $CFG = shift;
   my $fpi = $File->{DOCTYPE};
-  my $mode = $CFG->{Types}->{$fpi}->{'Parse Mode'};
+  my $parseModeFromDoctype = $CFG->{Types}->{$fpi}->{'Parse Mode'};
 
-  if (!exists $CFG->{Types}->{$fpi})
-  {
-    &add_warning('W08', {W08_mime => $File->{ContentType}})
-      if $File->{Mode} eq 'TBD';
+  my $parseModeFromMimeType = $File->{Mode};
+  
+  
+  if (($parseModeFromMimeType eq 'TBD') and (!exists $CFG->{Types}->{$fpi})) {
+    # the mime type is text/html (ambiguous, hence TBD mode) 
+    # and the doctype isn't in the catalogue... we scream
+    &add_warning('W08', {W08_mime => $File->{ContentType}});
     return;
   }
+  
+  $parseModeFromDoctype = 'TBD' unless $parseModeFromDoctype eq 'SGML' or $parseModeFromDoctype eq 'XML';
 
-  $mode = 'TBD' unless $mode eq 'SGML' or $mode eq 'XML';
-
-  # if the document type is not known ($mode = TBD) 
-  # but the content-type clearly shows we should used SGML or XML ($File->{Mode} ne 'TBD')
-  # then we're happy - we use that
-  if (($mode eq 'TBD') and ($File->{Mode} ne 'TBD')) {
-    $mode = $File->{Mode};
+  if (($parseModeFromDoctype eq 'TBD') and ($parseModeFromMimeType eq 'TBD')) {
+    # if both doctype and mime type are useless to give us a parse mode
+    # => we use SGML as a default
+    $File->{Mode} = 'SGML';
+    
+    &add_warning('W06', {
+      W06_mime => $File->{ContentType},
+      w06_doctype => $File->{Version}
+    });
+    return;
   }
-
-  #  if document-type recommended mode and content-type recommended mode clash
-  if (($mode ne 'TBD') and ($mode ne $File->{Mode}) ) {
+  elsif ($parseModeFromDoctype eq 'TBD') {
+    # doctype does not give us anything clear (e.g custom DTD)
+    # but mime type gives clear indication 
+    # => we just use what the content type tells us - move along
+    return;
+  }
+  elsif ($parseModeFromMimeType eq 'TBD') {
+    # the mime type is text/html (ambiguous, hence TBD mode) 
+    # but by now we're sure that the document type is a good indication
+    # so we use that.
+    $File->{Mode} = $parseModeFromDoctype;
+    return;
+  }
+  elsif ($parseModeFromMimeType ne $parseModeFromDoctype) {
+    #  if document-type recommended mode and content-type recommended mode clash
+    # shoot a warning
     &add_warning('W07', {
       W07_mime => $File->{ContentType},
-      W07_ct   => $File->{Mode},
-      W07_dtd  => $mode,
+      W07_ct   => $parseModeFromMimeType,
+      W07_dtd  => $parseModeFromDoctype,
     });
     return;
-  }
-
-  if ($mode eq 'SGML' or $mode eq 'XML') {
-    $File->{Mode} = $mode
-  } else {
-    # if by now we still don't know, use SGML as default.
-    # but warn that we are not pleased
     
-    $File->{Mode} = 'SGML';
-    &add_warning('W06', {
-      W06_mime => $File->{ContentType},
-      w06_doctype => $File->{Version}
-    });
   }
 }
 
+
 #
 # Utility sub to tell if mode "is" XML.
 sub is_xml {shift->{Mode} eq 'XML'};

Received on Friday, 16 March 2007 12:42:48 UTC