- From: Mercurial notifier <nobody@w3.org>
- Date: Thu, 05 Aug 2010 14:47:04 +0000
- To: link-checker updates <www-validator-cvs@w3.org>
changeset: 138:708d72ea9a48 user: ville date: Sat May 06 18:24:10 2006 +0000 files: Makefile.PL bin/checklink docs/checklink.html description: Outsource line counting to HTML::Parser; version >= 3.20 is now required. diff -r f00b04f44da5 -r 708d72ea9a48 Makefile.PL --- a/Makefile.PL Sat May 06 18:19:49 2006 +0000 +++ b/Makefile.PL Sat May 06 18:24:10 2006 +0000 @@ -23,7 +23,7 @@ PREREQ_PM => { CGI => 0, Config::General => 2.06, - HTML::Parser => 3.00, + HTML::Parser => 3.20, LWP => 5.66, Net::IP => 0, # Optional, see the docs. Term::ReadKey => 2.00, diff -r f00b04f44da5 -r 708d72ea9a48 bin/checklink --- a/bin/checklink Sat May 06 18:19:49 2006 +0000 +++ b/bin/checklink Sat May 06 18:24:10 2006 +0000 @@ -5,7 +5,7 @@ # (c) 1999-2005 World Wide Web Consortium # based on Renaud Bruyeron's checklink.pl # -# $Id: checklink,v 4.28 2006-05-06 18:19:49 ville Exp $ +# $Id: checklink,v 4.29 2006-05-06 18:24:10 ville Exp $ # # This program is licensed under the W3C(r) Software License: # http://www.w3.org/Consortium/Legal/copyright-software @@ -102,7 +102,7 @@ $DocType $Head $Accept $ContentTypes %Cfg); use HTML::Entities qw(); -use HTML::Parser 3.00 qw(); +use HTML::Parser 3.20 qw(); # >= 3.20 for "line" argspec identifier use HTTP::Request qw(); use HTTP::Response qw(); use Time::HiRes qw(); @@ -123,7 +123,7 @@ $PROGRAM = 'W3C-checklink'; $VERSION = '4.2.1'; $REVISION = sprintf('version %s (c) 1999-2005 W3C', $VERSION); - my ($cvsver) = q$Revision: 4.28 $ =~ /(\d+[\d\.]*\.\d+)/; + my ($cvsver) = q$Revision: 4.29 $ =~ /(\d+[\d\.]*\.\d+)/; $AGENT = sprintf('%s/%s [%s] %s', $PROGRAM, $VERSION, $cvsver, LWP::RobotUA->_agent()); @@ -1196,17 +1196,15 @@ my $p = HTML::Parser::new(@_, api_version => 3); # Start tags - $p->handler(start => 'start', 'self, tagname, attr, text'); + $p->handler(start => 'start', 'self, tagname, attr, text, line'); # Declarations $p->handler(declaration => sub { my $self = shift; $self->declaration(substr($_[0], 2, -1)); - }, 'self, text'); + }, 'self, text, line'); # Other stuff - $p->handler(default => 'text', 'self, text'); - # Line count - $p->{Line} = 1; + $p->handler(default => 'parse_progress', 'self, line') if $Opts{Progress}; # Check <a [..] name="...">? $p->{check_name} = 1; # Check <[..] id="..">? @@ -1242,16 +1240,14 @@ $self->xml_mode(1) if (m%^-//W3C//DTD XHTML %); } -####################################### -# Count the number of lines in a file # -####################################### +################################### +# Print parse progress indication # +################################### -sub new_line +sub parse_progress { - my ($self, $string) = @_; - my $count = ($string =~ tr/\n//); - $self->{Line} = $self->{Line} + $count; - printf("\r%4d%%", int($self->{Line}/$self->{Total}*100)) if $Opts{Progress}; + my ($self, $line) = @_; + printf("\r%4d%%", int($line/$self->{Total}*100)); } ############################# @@ -1280,17 +1276,17 @@ sub add_link { - my ($self, $uri) = @_; - $self->{Links}{$uri}{$self->{Line}}++ if defined($uri); + my ($self, $uri, $line) = @_; + $self->{Links}{$uri}{$line}++ if defined($uri); } sub start { - my ($self, $tag, $attr, $text) = @_; + my ($self, $tag, $attr, $text, $line) = @_; # Anchors my $anchor = $self->get_anchor($tag, $attr); - $self->{Anchors}{$anchor}{$self->{Line}}++ if defined($anchor); + $self->{Anchors}{$anchor}{$line}++ if defined($anchor); # Links if (!$self->{only_anchors}) { @@ -1302,31 +1298,19 @@ $self->{base} = $attr->{href}; } } else { - $self->add_link($attr->{href}); + $self->add_link($attr->{href}, $line); } - $self->add_link($attr->{src}); - $self->add_link($attr->{data}) if ($tag eq 'object'); - $self->add_link($attr->{cite}) if ($tag eq 'blockquote'); + $self->add_link($attr->{src}, $line); + $self->add_link($attr->{data}, $line) if ($tag eq 'object'); + $self->add_link($attr->{cite}, $line) if ($tag eq 'blockquote'); } - # Line counting - $self->new_line($text) if ($text =~ m/\n/); -} - -sub text -{ - my ($self, $text) = @_; - if (!$Opts{Progress}) { - # If we are just extracting information about anchors, - # parsing this part is only cosmetic (progress indicator) - return unless !$self->{only_anchors}; - } - $self->new_line($text) if ($text =~ /\n/); + $self->parse_progress($line) if $Opts{Progress}; } sub declaration { - my ($self, $text) = @_; + my ($self, $text, $line) = @_; # Extract the doctype my @declaration = split(/\s+/, $text, 4); if (($#declaration >= 3) && @@ -1337,7 +1321,7 @@ # Store the doctype $self->doctype($1) if $1; # If there is a link to the DTD, record it - $self->{Links}{$3}{$self->{Line}}++ if (!$self->{only_anchors} && $3); + $self->{Links}{$3}{$line}++ if (!$self->{only_anchors} && $3); } return unless !$self->{only_anchors}; $self->text($text); diff -r f00b04f44da5 -r 708d72ea9a48 docs/checklink.html --- a/docs/checklink.html Sat May 06 18:19:49 2006 +0000 +++ b/docs/checklink.html Sat May 06 18:24:10 2006 +0000 @@ -6,7 +6,7 @@ <title>W3C Link Checker Documentation</title> <link rev="made" href="mailto:www-validator@w3.org" /> <style type="text/css" media="all">@import "linkchecker.css";</style> - <meta name="revision" content="$Id: checklink.html,v 1.32 2005-05-15 13:45:03 ville Exp $" /> + <meta name="revision" content="$Id: checklink.html,v 1.33 2006-05-06 18:24:10 ville Exp $" /> </head> <body> @@ -162,7 +162,7 @@ <li><a href="http://search.cpan.org/dist/W3C-LinkChecker/">W3C-LinkChecker</a> (the link checker itself)</li> <li><a href="http://search.cpan.org/dist/CGI.pm/">CGI.pm</a> (required for CGI mode only)</li> <li><a href="http://search.cpan.org/dist/Config-General/">Config-General</a> (optional, version 2.06 or newer; required only for reading the (optional) configuration file)</li> - <li><a href="http://search.cpan.org/dist/HTML-Parser/">HTML-Parser</a> (version 3.00 or newer)</li> + <li><a href="http://search.cpan.org/dist/HTML-Parser/">HTML-Parser</a> (version 3.20 or newer)</li> <li><a href="http://search.cpan.org/dist/libwww-perl/">libwww-perl</a> (version 5.66 or newer; version 5.70 or newer recommended, except for 5.76 which has a bug that may cause the link checker follow redirects to <code>file:</code> URLs)</li> <li><a href="http://search.cpan.org/dist/Net-IP/">Net-IP</a> (optional but recommended; required for restricting access to <a href="http://www.ietf.org/rfc/rfc1918.txt">private IP addresses</a>)</li> <li><a href="http://search.cpan.org/dist/TermReadKey/">TermReadKey</a> (optional but recommended; required only in command line mode for password input)</li> @@ -297,7 +297,7 @@ alt="Valid XHTML 1.0!" /></a> <a title="Send Feedback for the W3C Link Checker" href="http://validator.w3.org/feedback.html">The W3C Validator Team</a><br /> - $Date: 2005-05-15 13:45:03 $ + $Date: 2006-05-06 18:24:10 $ </address> <p class="copyright"> <a rel="Copyright" href="http://www.w3.org/Consortium/Legal/ipr-notice#Copyright">Copyright</a> © 1994-2005
Received on Thursday, 5 August 2010 14:47:14 UTC