- From: Olivier Thereaux via cvs-syncmail <cvsmail@w3.org>
- Date: Tue, 19 Apr 2005 05:52:42 +0000
- To: www-validator-cvs@w3.org
Update of /sources/public/validator/httpd/cgi-bin In directory hutz:/tmp/cvs-serv13287 Modified Files: check Log Message: Applying patch from: http://www.w3.org/Bugs/Public/show_bug.cgi?id=1184 This should simplify/clarify preparse_doctype() without any change to the actual doctype detection algorithm (yet). Also leaves door opened to different handling of documents without FPI (currently causing "no doctype" warning) Index: check =================================================================== RCS file: /sources/public/validator/httpd/cgi-bin/check,v retrieving revision 1.414 retrieving revision 1.415 diff -u -d -r1.414 -r1.415 --- check 9 Apr 2005 17:31:42 -0000 1.414 +++ check 19 Apr 2005 05:52:40 -0000 1.415 @@ -1724,16 +1724,10 @@ sub preparse_doctype { my $File = shift; - # - # Reset DOCTYPE, Root (for second invocation, probably not needed anymore). - $File->{DOCTYPE} = ''; - $File->{Root} = ''; - my $dtd = sub { return if $File->{Root}; - ($File->{Root}, $File->{DOCTYPE}) = shift =~ m(<!DOCTYPE\s+(\w+)\s+PUBLIC\s+(?:[\'\"])([^\"\']+)(?:[\"\']).*>)si; }; - + my $start = sub { my $tag = shift; my $attr = shift; @@ -1747,14 +1741,25 @@ if ($attr->{xmlns}) {$File->{Namespace} = $attr->{xmlns}}; }; - my $p = HTML::Parser->new(api_version => 3); - $p->xml_mode(TRUE); - $p->ignore_elements('BODY'); - $p->ignore_elements('body'); - $p->handler(declaration => $dtd, 'text'); - $p->handler(start => $start, 'tag,attr'); - $p->parse(join "\n", @{$File->{Content}}); + if (! $File->{DOCTYPE}){ + my $allcontent = join "\n", @{$File->{Content}}; + if ($allcontent =~ /<!DOCTYPE([^>]*)>/iso){ + my $genericdoctype = $1; + $_ = $genericdoctype; + if (m:.(\w+)\s+PUBLIC\s+(.*):iso) + { + my $publicdoctype = $2; + $publicdoctype =~ m/[\"\']([^\"\']+)[\"\']/; + $File->{DOCTYPE} = $1; + } + elsif (m:(\w+)\s+SYSTEM\s+[\"\'](.*)[\"\']:iso) + { + # nothing yet + } + + } + } $File->{DOCTYPE} = '' unless defined $File->{DOCTYPE}; $File->{DOCTYPE} =~ s(^\s+){ }g; $File->{DOCTYPE} =~ s(\s+$){ }g;
Received on Tuesday, 19 April 2005 05:52:44 UTC