- From: Mercurial notifier <nobody@w3.org>
- Date: Thu, 05 Aug 2010 14:47:15 +0000
- To: link-checker updates <www-validator-cvs@w3.org>
changeset: 245:2eff729e3580 user: ville date: Mon Sep 22 19:33:31 2008 +0000 files: bin/checklink description: Add non-robot developer mode, thanks to Michael Ernst. diff -r f1ad528b8c2d -r 2eff729e3580 bin/checklink --- a/bin/checklink Thu Sep 11 16:51:41 2008 +0000 +++ b/bin/checklink Mon Sep 22 19:33:31 2008 +0000 @@ -5,7 +5,7 @@ # (c) 1999-2008 World Wide Web Consortium # based on Renaud Bruyeron's checklink.pl # -# $Id: checklink,v 4.115 2008-09-11 16:51:41 ville Exp $ +# $Id: checklink,v 4.116 2008-09-22 19:33:31 ville Exp $ # # This program is licensed under the W3C(r) Software License: # http://www.w3.org/Consortium/Legal/copyright-software @@ -33,9 +33,17 @@ package W3C::UserAgent; -use LWP::RobotUA 1.19 qw(); +use LWP::RobotUA 1.19 qw(); +use LWP::UserAgent qw(); -@W3C::UserAgent::ISA = qw(LWP::RobotUA); +# if 0, ignore robots exclusion (useful for testing) +use constant USE_ROBOT_UA => 1; + +if (USE_ROBOT_UA) { + @W3C::UserAgent::ISA = qw(LWP::RobotUA); +} else { + @W3C::UserAgent::ISA = qw(LWP::UserAgent); +} sub new { @@ -50,7 +58,16 @@ # WWW::RobotRules <= 5.78 have bugs which cause suboptimal results with # User-Agent substring matching against robots.txt files; "User-Agent: *" # should work ok with all though, and "User-Agent: W3C-checklink" for >= 5.77 - my $self = $class->SUPER::new($name, $from, $rules); + my $self; + if (USE_ROBOT_UA) { + $self = $class->SUPER::new($name, $from, $rules); + } else { + my %cnf; + @cnf{qw(agent from)} = ($name, $from); + $self = LWP::UserAgent->new(%cnf); + $self = bless $self, $class; + } + $self->from(undef) unless $from_ok; $self->env_proxy(); @@ -242,9 +259,12 @@ $PROGRAM = 'W3C-checklink'; $VERSION = '4.3'; $REVISION = sprintf('version %s (c) 1999-2008 W3C', $VERSION); - my ($cvsver) = q$Revision: 4.115 $ =~ /(\d+[\d\.]*\.\d+)/; + my ($cvsver) = q$Revision: 4.116 $ =~ /(\d+[\d\.]*\.\d+)/; $AGENT = sprintf('%s/%s [%s] %s', - $PROGRAM, $VERSION, $cvsver, LWP::RobotUA->_agent()); + $PROGRAM, $VERSION, $cvsver, + (W3C::UserAgent::USE_ROBOT_UA + ? LWP::RobotUA->_agent() + : LWP::UserAgent->_agent())); # Pull in mod_perl modules if applicable. eval { @@ -393,7 +413,9 @@ my $ua = W3C::UserAgent->new($AGENT); # @@@ TODO: admin address # @@@ make number of keep-alive connections customizable $ua->conn_cache({ total_capacity => 1}); # 1 keep-alive connection -$ua->delay($Opts{Sleep_Time}/60); +if ($ua->can('delay')) { + $ua->delay($Opts{Sleep_Time}/60); +} $ua->timeout($Opts{Timeout}); eval { $ua->allow_private_ips($Opts{Allow_Private_IPs});
Received on Thursday, 5 August 2010 14:47:27 UTC