- From: Martin Duerst <duerst@dev.w3.org>
- Date: Wed, 22 Sep 2004 00:48:39 +0000
- To: www-validator-cvs@w3.org
Update of /sources/public/validator/httpd/cgi-bin In directory hutz:/tmp/cvs-serv21045 Modified Files: check Log Message: moved preparse_meta close to other charset-related code Index: check =================================================================== RCS file: /sources/public/validator/httpd/cgi-bin/check,v retrieving revision 1.344 retrieving revision 1.345 diff -u -d -r1.344 -r1.345 --- check 22 Sep 2004 00:46:12 -0000 1.344 +++ check 22 Sep 2004 00:48:37 -0000 1.345 @@ -1930,59 +1930,6 @@ } # -# Do an initial parse of the Document Entity to extract charset from HTML <meta>. -# (still also extracts FPI) -sub preparse_meta { - my $File = shift; - - # - # Reset DOCTYPE, Root, and Charset (for second invocation). - $File->{Charset}->{META} = ''; - $File->{DOCTYPE} = ''; - $File->{Root} = ''; - - my $dtd = sub { - return if $File->{Root}; - ($File->{Root}, $File->{DOCTYPE}) = shift =~ m(<!DOCTYPE\s+(\w+)\s+PUBLIC\s+(?:[\'\"])([^\"\']+)(?:[\"\']).*>)si; - }; - - my $start = sub { - my $tag = shift; - my $attr = shift; - my %attr = map {lc($_) => $attr->{$_}} keys %{$attr}; - - if ($File->{Root}) { - if (lc $tag eq 'meta') { - if (lc $attr{'http-equiv'} eq 'content-type') { - if ($attr{content} =~ m(charset\s*=[\s\"\']*([^\s;\"\'>]*))si) { - $File->{Charset}->{META} = lc $1; - } - } - } - return unless $tag eq $File->{Root}; - } else { - $File->{Root} = $tag; - } - if ($attr->{xmlns}) {$File->{Namespace} = $attr->{xmlns}}; - }; - - my $p = HTML::Parser->new(api_version => 3); - $p->xml_mode(TRUE); - $p->ignore_elements('BODY'); - $p->ignore_elements('body'); - $p->handler(declaration => $dtd, 'text'); - $p->handler(start => $start, 'tag,attr'); - $p->parse(join "\n", @{$File->{Content}}); - - $File->{DOCTYPE} = '' unless defined $File->{DOCTYPE}; - $File->{DOCTYPE} =~ s(^\s+){ }g; - $File->{DOCTYPE} =~ s(\s+$){ }g; - $File->{DOCTYPE} =~ s(\s+) { }g; - - return $File; -} - -# # Print out the raw ESIS output for debugging. sub show_esis ($) { print <<'EOF'; @@ -2220,6 +2167,59 @@ # +# Do an initial parse of the Document Entity to extract charset from HTML <meta>. +# (still also extracts FPI) +sub preparse_meta { + my $File = shift; + + # + # Reset DOCTYPE, Root, and Charset (for second invocation). + $File->{Charset}->{META} = ''; + $File->{DOCTYPE} = ''; + $File->{Root} = ''; + + my $dtd = sub { + return if $File->{Root}; + ($File->{Root}, $File->{DOCTYPE}) = shift =~ m(<!DOCTYPE\s+(\w+)\s+PUBLIC\s+(?:[\'\"])([^\"\']+)(?:[\"\']).*>)si; + }; + + my $start = sub { + my $tag = shift; + my $attr = shift; + my %attr = map {lc($_) => $attr->{$_}} keys %{$attr}; + + if ($File->{Root}) { + if (lc $tag eq 'meta') { + if (lc $attr{'http-equiv'} eq 'content-type') { + if ($attr{content} =~ m(charset\s*=[\s\"\']*([^\s;\"\'>]*))si) { + $File->{Charset}->{META} = lc $1; + } + } + } + return unless $tag eq $File->{Root}; + } else { + $File->{Root} = $tag; + } + if ($attr->{xmlns}) {$File->{Namespace} = $attr->{xmlns}}; + }; + + my $p = HTML::Parser->new(api_version => 3); + $p->xml_mode(TRUE); + $p->ignore_elements('BODY'); + $p->ignore_elements('body'); + $p->handler(declaration => $dtd, 'text'); + $p->handler(start => $start, 'tag,attr'); + $p->parse(join "\n", @{$File->{Content}}); + + $File->{DOCTYPE} = '' unless defined $File->{DOCTYPE}; + $File->{DOCTYPE} =~ s(^\s+){ }g; + $File->{DOCTYPE} =~ s(\s+$){ }g; + $File->{DOCTYPE} =~ s(\s+) { }g; + + return $File; +} + +# # Check charset conflicts and add any warnings necessary. sub charset_conflicts { my $File = shift;
Received on Wednesday, 22 September 2004 00:48:39 UTC