validator/httpd/cgi-bin check,1.414,1.415

Update of /sources/public/validator/httpd/cgi-bin
In directory hutz:/tmp/cvs-serv13287

Modified Files:
	check 
Log Message:
Applying patch from:
http://www.w3.org/Bugs/Public/show_bug.cgi?id=1184

This should simplify/clarify preparse_doctype() without any change 
to the actual doctype detection algorithm (yet).

Also leaves door opened to different handling of documents without FPI 
(currently causing "no doctype" warning)




Index: check
===================================================================
RCS file: /sources/public/validator/httpd/cgi-bin/check,v
retrieving revision 1.414
retrieving revision 1.415
diff -u -d -r1.414 -r1.415
--- check	9 Apr 2005 17:31:42 -0000	1.414
+++ check	19 Apr 2005 05:52:40 -0000	1.415
@@ -1724,16 +1724,10 @@
 sub preparse_doctype {
   my $File = shift;
 
-  #
-  # Reset DOCTYPE, Root (for second invocation, probably not needed anymore).
-  $File->{DOCTYPE}         = '';
-  $File->{Root}            = '';
-
   my $dtd = sub {
     return if $File->{Root};
-    ($File->{Root}, $File->{DOCTYPE}) = shift =~  m(<!DOCTYPE\s+(\w+)\s+PUBLIC\s+(?:[\'\"])([^\"\']+)(?:[\"\']).*>)si;
   };
-
+  
   my $start = sub {
     my $tag  = shift;
     my $attr = shift;
@@ -1747,14 +1741,25 @@
     if ($attr->{xmlns}) {$File->{Namespace} = $attr->{xmlns}};
   };
 
-  my $p = HTML::Parser->new(api_version => 3);
-  $p->xml_mode(TRUE);
-  $p->ignore_elements('BODY');
-  $p->ignore_elements('body');
-  $p->handler(declaration => $dtd, 'text');
-  $p->handler(start => $start, 'tag,attr');
-  $p->parse(join "\n", @{$File->{Content}});
 
+  if (! $File->{DOCTYPE}){
+    my $allcontent = join "\n", @{$File->{Content}};
+    if ($allcontent =~ /<!DOCTYPE([^>]*)>/iso){
+      my $genericdoctype = $1;
+      $_ = $genericdoctype;
+      if (m:.(\w+)\s+PUBLIC\s+(.*):iso)
+        {  
+          my $publicdoctype = $2;
+          $publicdoctype =~ m/[\"\']([^\"\']+)[\"\']/;
+          $File->{DOCTYPE} = $1;
+        }
+      elsif (m:(\w+)\s+SYSTEM\s+[\"\'](.*)[\"\']:iso)
+        {
+          # nothing yet
+        }
+        
+      }
+  }
   $File->{DOCTYPE} = '' unless defined $File->{DOCTYPE};
   $File->{DOCTYPE} =~ s(^\s+){ }g;
   $File->{DOCTYPE} =~ s(\s+$){ }g;

Received on Tuesday, 19 April 2005 05:52:44 UTC