- From: Mercurial notifier <nobody@w3.org>
- Date: Thu, 05 Aug 2010 14:47:15 +0000
- To: link-checker updates <www-validator-cvs@w3.org>
changeset: 245:2eff729e3580
user: ville
date: Mon Sep 22 19:33:31 2008 +0000
files: bin/checklink
description:
Add non-robot developer mode, thanks to Michael Ernst.
diff -r f1ad528b8c2d -r 2eff729e3580 bin/checklink
--- a/bin/checklink Thu Sep 11 16:51:41 2008 +0000
+++ b/bin/checklink Mon Sep 22 19:33:31 2008 +0000
@@ -5,7 +5,7 @@
# (c) 1999-2008 World Wide Web Consortium
# based on Renaud Bruyeron's checklink.pl
#
-# $Id: checklink,v 4.115 2008-09-11 16:51:41 ville Exp $
+# $Id: checklink,v 4.116 2008-09-22 19:33:31 ville Exp $
#
# This program is licensed under the W3C(r) Software License:
# http://www.w3.org/Consortium/Legal/copyright-software
@@ -33,9 +33,17 @@
package W3C::UserAgent;
-use LWP::RobotUA 1.19 qw();
+use LWP::RobotUA 1.19 qw();
+use LWP::UserAgent qw();
-@W3C::UserAgent::ISA = qw(LWP::RobotUA);
+# if 0, ignore robots exclusion (useful for testing)
+use constant USE_ROBOT_UA => 1;
+
+if (USE_ROBOT_UA) {
+ @W3C::UserAgent::ISA = qw(LWP::RobotUA);
+} else {
+ @W3C::UserAgent::ISA = qw(LWP::UserAgent);
+}
sub new
{
@@ -50,7 +58,16 @@
# WWW::RobotRules <= 5.78 have bugs which cause suboptimal results with
# User-Agent substring matching against robots.txt files; "User-Agent: *"
# should work ok with all though, and "User-Agent: W3C-checklink" for >= 5.77
- my $self = $class->SUPER::new($name, $from, $rules);
+ my $self;
+ if (USE_ROBOT_UA) {
+ $self = $class->SUPER::new($name, $from, $rules);
+ } else {
+ my %cnf;
+ @cnf{qw(agent from)} = ($name, $from);
+ $self = LWP::UserAgent->new(%cnf);
+ $self = bless $self, $class;
+ }
+
$self->from(undef) unless $from_ok;
$self->env_proxy();
@@ -242,9 +259,12 @@
$PROGRAM = 'W3C-checklink';
$VERSION = '4.3';
$REVISION = sprintf('version %s (c) 1999-2008 W3C', $VERSION);
- my ($cvsver) = q$Revision: 4.115 $ =~ /(\d+[\d\.]*\.\d+)/;
+ my ($cvsver) = q$Revision: 4.116 $ =~ /(\d+[\d\.]*\.\d+)/;
$AGENT = sprintf('%s/%s [%s] %s',
- $PROGRAM, $VERSION, $cvsver, LWP::RobotUA->_agent());
+ $PROGRAM, $VERSION, $cvsver,
+ (W3C::UserAgent::USE_ROBOT_UA
+ ? LWP::RobotUA->_agent()
+ : LWP::UserAgent->_agent()));
# Pull in mod_perl modules if applicable.
eval {
@@ -393,7 +413,9 @@
my $ua = W3C::UserAgent->new($AGENT); # @@@ TODO: admin address
# @@@ make number of keep-alive connections customizable
$ua->conn_cache({ total_capacity => 1}); # 1 keep-alive connection
-$ua->delay($Opts{Sleep_Time}/60);
+if ($ua->can('delay')) {
+ $ua->delay($Opts{Sleep_Time}/60);
+}
$ua->timeout($Opts{Timeout});
eval {
$ua->allow_private_ips($Opts{Allow_Private_IPs});
Received on Thursday, 5 August 2010 14:47:27 UTC