- From: Ville Skytta via cvs-syncmail <cvsmail@w3.org>
- Date: Mon, 22 Sep 2008 19:33:33 +0000
- To: www-validator-cvs@w3.org
Update of /sources/public/perl/modules/W3C/LinkChecker/bin In directory hutz:/tmp/cvs-serv32752 Modified Files: checklink Log Message: Add non-robot developer mode, thanks to Michael Ernst. Index: checklink =================================================================== RCS file: /sources/public/perl/modules/W3C/LinkChecker/bin/checklink,v retrieving revision 4.115 retrieving revision 4.116 diff -u -d -r4.115 -r4.116 --- checklink 11 Sep 2008 16:51:41 -0000 4.115 +++ checklink 22 Sep 2008 19:33:31 -0000 4.116 @@ -33,9 +33,17 @@ package W3C::UserAgent; -use LWP::RobotUA 1.19 qw(); +use LWP::RobotUA 1.19 qw(); +use LWP::UserAgent qw(); -@W3C::UserAgent::ISA = qw(LWP::RobotUA); +# if 0, ignore robots exclusion (useful for testing) +use constant USE_ROBOT_UA => 1; + +if (USE_ROBOT_UA) { + @W3C::UserAgent::ISA = qw(LWP::RobotUA); +} else { + @W3C::UserAgent::ISA = qw(LWP::UserAgent); +} sub new { @@ -50,7 +58,16 @@ # WWW::RobotRules <= 5.78 have bugs which cause suboptimal results with # User-Agent substring matching against robots.txt files; "User-Agent: *" # should work ok with all though, and "User-Agent: W3C-checklink" for >= 5.77 - my $self = $class->SUPER::new($name, $from, $rules); + my $self; + if (USE_ROBOT_UA) { + $self = $class->SUPER::new($name, $from, $rules); + } else { + my %cnf; + @cnf{qw(agent from)} = ($name, $from); + $self = LWP::UserAgent->new(%cnf); + $self = bless $self, $class; + } + $self->from(undef) unless $from_ok; $self->env_proxy(); @@ -244,7 +261,10 @@ $REVISION = sprintf('version %s (c) 1999-2008 W3C', $VERSION); my ($cvsver) = q$Revision$ =~ /(\d+[\d\.]*\.\d+)/; $AGENT = sprintf('%s/%s [%s] %s', - $PROGRAM, $VERSION, $cvsver, LWP::RobotUA->_agent()); + $PROGRAM, $VERSION, $cvsver, + (W3C::UserAgent::USE_ROBOT_UA + ? LWP::RobotUA->_agent() + : LWP::UserAgent->_agent())); # Pull in mod_perl modules if applicable. eval { @@ -393,7 +413,9 @@ my $ua = W3C::UserAgent->new($AGENT); # @@@ TODO: admin address # @@@ make number of keep-alive connections customizable $ua->conn_cache({ total_capacity => 1}); # 1 keep-alive connection -$ua->delay($Opts{Sleep_Time}/60); +if ($ua->can('delay')) { + $ua->delay($Opts{Sleep_Time}/60); +} $ua->timeout($Opts{Timeout}); eval { $ua->allow_private_ips($Opts{Allow_Private_IPs});
Received on Monday, 22 September 2008 19:34:08 UTC