validator/httpd/cgi-bin check,1.636,1.637

Update of /sources/public/validator/httpd/cgi-bin
In directory hutz:/tmp/cvs-serv22761/httpd/cgi-bin

Modified Files:
	check 
Log Message:
Improve doctype preparse performance by aborting as soon as possible, fix required HTML::Parser version.

Index: check
===================================================================
RCS file: /sources/public/validator/httpd/cgi-bin/check,v
retrieving revision 1.636
retrieving revision 1.637
diff -u -d -r1.636 -r1.637
--- check	7 Jan 2009 22:04:23 -0000	1.636
+++ check	7 Jan 2009 22:19:30 -0000	1.637
@@ -55,7 +55,7 @@
 use Encode::JIS2K             qw(); # ditto extra japanese encodings
 use File::Spec::Functions     qw(catfile);
 use HTML::Encoding       0.52 qw();
-use HTML::Parser         3.25 qw(); # Need 3.25 for $p->ignore_elements.
+use HTML::Parser         3.24 qw(); # Need 3.24 for $p->parse($code_ref)
 use HTML::Template       2.6  qw(); # Need 2.6 for path param, other things.
 use HTTP::Headers::Util       qw();
 use HTTP::Request             qw();
@@ -2111,8 +2111,7 @@
   };
 
   my $start = sub {
-    my $tag  = shift;
-    my $attr = shift;
+    my ($p, $tag, $attr) = @_;
 
     if ($File->{Root}) {
       return unless $tag eq $File->{Root};
@@ -2122,6 +2121,9 @@
     if ($attr->{xmlns}) {$File->{Namespace} = $attr->{xmlns}};
     if ($attr->{version}) {$File->{'Root Version'} = $attr->{version}};
     if ($attr->{baseProfile}) {$File->{'Root BaseProfile'} = $attr->{baseProfile}};
+
+    # We're done parsing.
+    $p->eof();
   };
 
   # we use HTML::Parser as pre-parser. May use html5lib or other in the future
@@ -2130,11 +2132,15 @@
   # if content-type has shown we should pre-parse with XML mode, use that
   # otherwise (mostly text/html cases) use default mode
   $p->xml_mode(TRUE) if ($File->{Mode} =~ /XML/);
-  $p->ignore_elements('BODY');
-  $p->ignore_elements('body');
   $p->handler(declaration => $dtd, 'text');
-  $p->handler(start => $start, 'tag,attr');
-  $p->parse(join "\n", @{$File->{Content}});
+  $p->handler(start => $start, 'self,tag,attr');
+
+  my $line = 0;
+  my $max = scalar(@{$File->{Content}});
+  $p->parse(sub {
+    return ($line < $max) ? $File->{Content}->[$line++] . "\n" : undef;
+  });
+  $p->eof();
 
   # TODO: These \s here are probably wrong now that the strings are utf8_on
   $File->{DOCTYPE} = '' unless defined $File->{DOCTYPE};

Received on Wednesday, 7 January 2009 22:19:41 UTC