- From: Mercurial notifier <nobody@w3.org>
- Date: Thu, 05 Aug 2010 14:47:04 +0000
- To: link-checker updates <www-validator-cvs@w3.org>
changeset: 138:708d72ea9a48
user: ville
date: Sat May 06 18:24:10 2006 +0000
files: Makefile.PL bin/checklink docs/checklink.html
description:
Outsource line counting to HTML::Parser; version >= 3.20 is now required.
diff -r f00b04f44da5 -r 708d72ea9a48 Makefile.PL
--- a/Makefile.PL Sat May 06 18:19:49 2006 +0000
+++ b/Makefile.PL Sat May 06 18:24:10 2006 +0000
@@ -23,7 +23,7 @@
PREREQ_PM => {
CGI => 0,
Config::General => 2.06,
- HTML::Parser => 3.00,
+ HTML::Parser => 3.20,
LWP => 5.66,
Net::IP => 0, # Optional, see the docs.
Term::ReadKey => 2.00,
diff -r f00b04f44da5 -r 708d72ea9a48 bin/checklink
--- a/bin/checklink Sat May 06 18:19:49 2006 +0000
+++ b/bin/checklink Sat May 06 18:24:10 2006 +0000
@@ -5,7 +5,7 @@
# (c) 1999-2005 World Wide Web Consortium
# based on Renaud Bruyeron's checklink.pl
#
-# $Id: checklink,v 4.28 2006-05-06 18:19:49 ville Exp $
+# $Id: checklink,v 4.29 2006-05-06 18:24:10 ville Exp $
#
# This program is licensed under the W3C(r) Software License:
# http://www.w3.org/Consortium/Legal/copyright-software
@@ -102,7 +102,7 @@
$DocType $Head $Accept $ContentTypes %Cfg);
use HTML::Entities qw();
-use HTML::Parser 3.00 qw();
+use HTML::Parser 3.20 qw(); # >= 3.20 for "line" argspec identifier
use HTTP::Request qw();
use HTTP::Response qw();
use Time::HiRes qw();
@@ -123,7 +123,7 @@
$PROGRAM = 'W3C-checklink';
$VERSION = '4.2.1';
$REVISION = sprintf('version %s (c) 1999-2005 W3C', $VERSION);
- my ($cvsver) = q$Revision: 4.28 $ =~ /(\d+[\d\.]*\.\d+)/;
+ my ($cvsver) = q$Revision: 4.29 $ =~ /(\d+[\d\.]*\.\d+)/;
$AGENT = sprintf('%s/%s [%s] %s',
$PROGRAM, $VERSION, $cvsver, LWP::RobotUA->_agent());
@@ -1196,17 +1196,15 @@
my $p = HTML::Parser::new(@_, api_version => 3);
# Start tags
- $p->handler(start => 'start', 'self, tagname, attr, text');
+ $p->handler(start => 'start', 'self, tagname, attr, text, line');
# Declarations
$p->handler(declaration =>
sub {
my $self = shift;
$self->declaration(substr($_[0], 2, -1));
- }, 'self, text');
+ }, 'self, text, line');
# Other stuff
- $p->handler(default => 'text', 'self, text');
- # Line count
- $p->{Line} = 1;
+ $p->handler(default => 'parse_progress', 'self, line') if $Opts{Progress};
# Check <a [..] name="...">?
$p->{check_name} = 1;
# Check <[..] id="..">?
@@ -1242,16 +1240,14 @@
$self->xml_mode(1) if (m%^-//W3C//DTD XHTML %);
}
-#######################################
-# Count the number of lines in a file #
-#######################################
+###################################
+# Print parse progress indication #
+###################################
-sub new_line
+sub parse_progress
{
- my ($self, $string) = @_;
- my $count = ($string =~ tr/\n//);
- $self->{Line} = $self->{Line} + $count;
- printf("\r%4d%%", int($self->{Line}/$self->{Total}*100)) if $Opts{Progress};
+ my ($self, $line) = @_;
+ printf("\r%4d%%", int($line/$self->{Total}*100));
}
#############################
@@ -1280,17 +1276,17 @@
sub add_link
{
- my ($self, $uri) = @_;
- $self->{Links}{$uri}{$self->{Line}}++ if defined($uri);
+ my ($self, $uri, $line) = @_;
+ $self->{Links}{$uri}{$line}++ if defined($uri);
}
sub start
{
- my ($self, $tag, $attr, $text) = @_;
+ my ($self, $tag, $attr, $text, $line) = @_;
# Anchors
my $anchor = $self->get_anchor($tag, $attr);
- $self->{Anchors}{$anchor}{$self->{Line}}++ if defined($anchor);
+ $self->{Anchors}{$anchor}{$line}++ if defined($anchor);
# Links
if (!$self->{only_anchors}) {
@@ -1302,31 +1298,19 @@
$self->{base} = $attr->{href};
}
} else {
- $self->add_link($attr->{href});
+ $self->add_link($attr->{href}, $line);
}
- $self->add_link($attr->{src});
- $self->add_link($attr->{data}) if ($tag eq 'object');
- $self->add_link($attr->{cite}) if ($tag eq 'blockquote');
+ $self->add_link($attr->{src}, $line);
+ $self->add_link($attr->{data}, $line) if ($tag eq 'object');
+ $self->add_link($attr->{cite}, $line) if ($tag eq 'blockquote');
}
- # Line counting
- $self->new_line($text) if ($text =~ m/\n/);
-}
-
-sub text
-{
- my ($self, $text) = @_;
- if (!$Opts{Progress}) {
- # If we are just extracting information about anchors,
- # parsing this part is only cosmetic (progress indicator)
- return unless !$self->{only_anchors};
- }
- $self->new_line($text) if ($text =~ /\n/);
+ $self->parse_progress($line) if $Opts{Progress};
}
sub declaration
{
- my ($self, $text) = @_;
+ my ($self, $text, $line) = @_;
# Extract the doctype
my @declaration = split(/\s+/, $text, 4);
if (($#declaration >= 3) &&
@@ -1337,7 +1321,7 @@
# Store the doctype
$self->doctype($1) if $1;
# If there is a link to the DTD, record it
- $self->{Links}{$3}{$self->{Line}}++ if (!$self->{only_anchors} && $3);
+ $self->{Links}{$3}{$line}++ if (!$self->{only_anchors} && $3);
}
return unless !$self->{only_anchors};
$self->text($text);
diff -r f00b04f44da5 -r 708d72ea9a48 docs/checklink.html
--- a/docs/checklink.html Sat May 06 18:19:49 2006 +0000
+++ b/docs/checklink.html Sat May 06 18:24:10 2006 +0000
@@ -6,7 +6,7 @@
<title>W3C Link Checker Documentation</title>
<link rev="made" href="mailto:www-validator@w3.org" />
<style type="text/css" media="all">@import "linkchecker.css";</style>
- <meta name="revision" content="$Id: checklink.html,v 1.32 2005-05-15 13:45:03 ville Exp $" />
+ <meta name="revision" content="$Id: checklink.html,v 1.33 2006-05-06 18:24:10 ville Exp $" />
</head>
<body>
@@ -162,7 +162,7 @@
<li><a href="http://search.cpan.org/dist/W3C-LinkChecker/">W3C-LinkChecker</a> (the link checker itself)</li>
<li><a href="http://search.cpan.org/dist/CGI.pm/">CGI.pm</a> (required for CGI mode only)</li>
<li><a href="http://search.cpan.org/dist/Config-General/">Config-General</a> (optional, version 2.06 or newer; required only for reading the (optional) configuration file)</li>
- <li><a href="http://search.cpan.org/dist/HTML-Parser/">HTML-Parser</a> (version 3.00 or newer)</li>
+ <li><a href="http://search.cpan.org/dist/HTML-Parser/">HTML-Parser</a> (version 3.20 or newer)</li>
<li><a href="http://search.cpan.org/dist/libwww-perl/">libwww-perl</a> (version 5.66 or newer; version 5.70 or newer recommended, except for 5.76 which has a bug that may cause the link checker follow redirects to <code>file:</code> URLs)</li>
<li><a href="http://search.cpan.org/dist/Net-IP/">Net-IP</a> (optional but recommended; required for restricting access to <a href="http://www.ietf.org/rfc/rfc1918.txt">private IP addresses</a>)</li>
<li><a href="http://search.cpan.org/dist/TermReadKey/">TermReadKey</a> (optional but recommended; required only in command line mode for password input)</li>
@@ -297,7 +297,7 @@
alt="Valid XHTML 1.0!" /></a>
<a title="Send Feedback for the W3C Link Checker"
href="http://validator.w3.org/feedback.html">The W3C Validator Team</a><br />
- $Date: 2005-05-15 13:45:03 $
+ $Date: 2006-05-06 18:24:10 $
</address>
<p class="copyright">
<a rel="Copyright" href="http://www.w3.org/Consortium/Legal/ipr-notice#Copyright">Copyright</a> © 1994-2005
Received on Thursday, 5 August 2010 14:47:14 UTC