- From: Ville Skytta via cvs-syncmail <cvsmail@w3.org>
- Date: Wed, 07 Jan 2009 22:19:33 +0000
- To: www-validator-cvs@w3.org
Update of /sources/public/validator/httpd/cgi-bin
In directory hutz:/tmp/cvs-serv22761/httpd/cgi-bin
Modified Files:
check
Log Message:
Improve doctype preparse performance by aborting as soon as possible, fix required HTML::Parser version.
Index: check
===================================================================
RCS file: /sources/public/validator/httpd/cgi-bin/check,v
retrieving revision 1.636
retrieving revision 1.637
diff -u -d -r1.636 -r1.637
--- check 7 Jan 2009 22:04:23 -0000 1.636
+++ check 7 Jan 2009 22:19:30 -0000 1.637
@@ -55,7 +55,7 @@
use Encode::JIS2K qw(); # ditto extra japanese encodings
use File::Spec::Functions qw(catfile);
use HTML::Encoding 0.52 qw();
-use HTML::Parser 3.25 qw(); # Need 3.25 for $p->ignore_elements.
+use HTML::Parser 3.24 qw(); # Need 3.24 for $p->parse($code_ref)
use HTML::Template 2.6 qw(); # Need 2.6 for path param, other things.
use HTTP::Headers::Util qw();
use HTTP::Request qw();
@@ -2111,8 +2111,7 @@
};
my $start = sub {
- my $tag = shift;
- my $attr = shift;
+ my ($p, $tag, $attr) = @_;
if ($File->{Root}) {
return unless $tag eq $File->{Root};
@@ -2122,6 +2121,9 @@
if ($attr->{xmlns}) {$File->{Namespace} = $attr->{xmlns}};
if ($attr->{version}) {$File->{'Root Version'} = $attr->{version}};
if ($attr->{baseProfile}) {$File->{'Root BaseProfile'} = $attr->{baseProfile}};
+
+ # We're done parsing.
+ $p->eof();
};
# we use HTML::Parser as pre-parser. May use html5lib or other in the future
@@ -2130,11 +2132,15 @@
# if content-type has shown we should pre-parse with XML mode, use that
# otherwise (mostly text/html cases) use default mode
$p->xml_mode(TRUE) if ($File->{Mode} =~ /XML/);
- $p->ignore_elements('BODY');
- $p->ignore_elements('body');
$p->handler(declaration => $dtd, 'text');
- $p->handler(start => $start, 'tag,attr');
- $p->parse(join "\n", @{$File->{Content}});
+ $p->handler(start => $start, 'self,tag,attr');
+
+ my $line = 0;
+ my $max = scalar(@{$File->{Content}});
+ $p->parse(sub {
+ return ($line < $max) ? $File->{Content}->[$line++] . "\n" : undef;
+ });
+ $p->eof();
# TODO: These \s here are probably wrong now that the strings are utf8_on
$File->{DOCTYPE} = '' unless defined $File->{DOCTYPE};
Received on Wednesday, 7 January 2009 22:19:41 UTC