- From: Ville Skytta via cvs-syncmail <cvsmail@w3.org>
- Date: Wed, 07 Jan 2009 22:19:33 +0000
- To: www-validator-cvs@w3.org
Update of /sources/public/validator/httpd/cgi-bin In directory hutz:/tmp/cvs-serv22761/httpd/cgi-bin Modified Files: check Log Message: Improve doctype preparse performance by aborting as soon as possible, fix required HTML::Parser version. Index: check =================================================================== RCS file: /sources/public/validator/httpd/cgi-bin/check,v retrieving revision 1.636 retrieving revision 1.637 diff -u -d -r1.636 -r1.637 --- check 7 Jan 2009 22:04:23 -0000 1.636 +++ check 7 Jan 2009 22:19:30 -0000 1.637 @@ -55,7 +55,7 @@ use Encode::JIS2K qw(); # ditto extra japanese encodings use File::Spec::Functions qw(catfile); use HTML::Encoding 0.52 qw(); -use HTML::Parser 3.25 qw(); # Need 3.25 for $p->ignore_elements. +use HTML::Parser 3.24 qw(); # Need 3.24 for $p->parse($code_ref) use HTML::Template 2.6 qw(); # Need 2.6 for path param, other things. use HTTP::Headers::Util qw(); use HTTP::Request qw(); @@ -2111,8 +2111,7 @@ }; my $start = sub { - my $tag = shift; - my $attr = shift; + my ($p, $tag, $attr) = @_; if ($File->{Root}) { return unless $tag eq $File->{Root}; @@ -2122,6 +2121,9 @@ if ($attr->{xmlns}) {$File->{Namespace} = $attr->{xmlns}}; if ($attr->{version}) {$File->{'Root Version'} = $attr->{version}}; if ($attr->{baseProfile}) {$File->{'Root BaseProfile'} = $attr->{baseProfile}}; + + # We're done parsing. + $p->eof(); }; # we use HTML::Parser as pre-parser. May use html5lib or other in the future @@ -2130,11 +2132,15 @@ # if content-type has shown we should pre-parse with XML mode, use that # otherwise (mostly text/html cases) use default mode $p->xml_mode(TRUE) if ($File->{Mode} =~ /XML/); - $p->ignore_elements('BODY'); - $p->ignore_elements('body'); $p->handler(declaration => $dtd, 'text'); - $p->handler(start => $start, 'tag,attr'); - $p->parse(join "\n", @{$File->{Content}}); + $p->handler(start => $start, 'self,tag,attr'); + + my $line = 0; + my $max = scalar(@{$File->{Content}}); + $p->parse(sub { + return ($line < $max) ? $File->{Content}->[$line++] . "\n" : undef; + }); + $p->eof(); # TODO: These \s here are probably wrong now that the strings are utf8_on $File->{DOCTYPE} = '' unless defined $File->{DOCTYPE};
Received on Wednesday, 7 January 2009 22:19:41 UTC