- From: Ville Skytta via cvs-syncmail <cvsmail@w3.org>
- Date: Sun, 29 Jul 2007 13:25:24 +0000
- To: www-validator-cvs@w3.org
Update of /sources/public/perl/modules/W3C/LinkChecker/bin
In directory hutz:/tmp/cvs-serv10786
Modified Files:
checklink
Log Message:
Refactor IP address allowance and redirect progress callback code into our UserAgent class.
Index: checklink
===================================================================
RCS file: /sources/public/perl/modules/W3C/LinkChecker/bin/checklink,v
retrieving revision 4.55
retrieving revision 4.56
diff -u -d -r4.55 -r4.56
--- checklink 28 Jul 2007 10:44:32 -0000 4.55
+++ checklink 29 Jul 2007 13:25:21 -0000 4.56
@@ -34,7 +34,6 @@
package W3C::UserAgent;
use LWP::RobotUA 1.19 qw();
-# @@@ Needs also W3C::LinkChecker but can't use() it here.
@W3C::UserAgent::ISA = qw(LWP::RobotUA);
@@ -55,45 +54,107 @@
$self->from(undef) unless $from_ok;
$self->env_proxy();
+
+ $self->allow_private_ips(1);
+
return $self;
}
+sub allow_private_ips
+{
+ my $self = shift;
+ if (@_) {
+ $self->{Checklink_allow_private_ips} = shift;
+ if (!$self->{Checklink_allow_private_ips}) {
+ # Pull in dependencies
+ require Net::IP;
+ require Socket;
+ require Net::hostent;
+ }
+ }
+ return $self->{Checklink_allow_private_ips};
+}
+
+sub redirect_progress_callback
+{
+ my $self = shift;
+ $self->{Checklink_redirect_callback} = shift if @_;
+ return $self->{Checklink_redirect_callback};
+}
+
sub simple_request
{
my $self = shift;
- my $response = do {
+
+ my $response = $self->ip_disallowed($_[0]->uri());
+
+ $response ||= do {
local $SIG{__WARN__} = sub { # Suppress some warnings, rt.cpan.org #18902
warn($_[0]) if ($_[0] && $_[0] !~ /^RobotRules/);
};
+ # @@@ Why not just $self->SUPER::simple_request?
$self->W3C::UserAgent::SUPER::simple_request(@_);
};
+
if (! defined($self->{FirstResponse})) {
$self->{FirstResponse} = $response->code();
$self->{FirstMessage} = $response->message() || '(no message)';
}
+
return $response;
}
sub redirect_ok
{
my ($self, $request, $response) = @_;
- if ($self->{Checklink_verbose_progress}) {
- # @@@ TODO: when an LWP internal robots.txt request gets redirected,
- # this will a bit confusingly print out info about it. Would need a
- # robust way of determining whether something is a LWP "internal" request.
- &W3C::LinkChecker::hprintf("\n%s %s ", $request->method(),$request->uri());
+
+ if (my $callback = $self->redirect_progress_callback()) {
+ &$callback($request->method(), $request->uri());
}
+
return 0 unless $self->SUPER::redirect_ok($request, $response);
- if (my $res = &W3C::LinkChecker::ip_allowed($request->uri())) {
+
+ if (my $res = $self->ip_disallowed($request->uri())) {
$response->previous($response->clone());
$response->request($request);
$response->code($res->code());
$response->message($res->message());
return 0;
}
+
return 1;
}
+#
+# Checks whether we're allowed to retrieve the document based on it's IP
+# address. Takes an URI object and returns a HTTP::Response containing the
+# appropriate status and error message if the IP was disallowed, 0
+# otherwise. URIs without hostname or IP address are always allowed,
+# including schemes where those make no sense (eg. data:, often javascript:).
+#
+sub ip_disallowed
+{
+ my ($self, $uri) = @_;
+ return 0 if $self->allow_private_ips(); # Short-circuit
+
+ my $hostname = undef;
+ eval { $hostname = $uri->host() }; # Not all URIs implement host()...
+ return 0 unless $hostname;
+
+ my $addr = my $iptype = my $resp = undef;
+ if (my $host = Net::hostent::gethostbyname($hostname)) {
+ $addr = Socket::inet_ntoa($host->addr()) if $host->addr();
+ if ($addr && (my $ip = Net::IP->new($addr))) {
+ $iptype = $ip->iptype();
+ }
+ }
+ if ($iptype && $iptype ne 'PUBLIC') {
+ $resp = HTTP::Response->new(403,
+ 'Checking non-public IP address disallowed by link checker configuration');
+ }
+ return $resp;
+}
+
# -----------------------------------------------------------------------------
package W3C::LinkChecker;
@@ -229,22 +290,6 @@
);
undef $cmdline;
-unless ($Opts{Allow_Private_IPs}) {
- eval {
- require Net::IP;
- require Socket;
- Socket->import('inet_ntoa');
- require Net::hostent;
- };
- if ($@) {
- die <<".EOF.";
-Allow_Private_IPs is false; this feature requires the Net::IP, Socket, and
-Net::hostent modules:
-$@
-.EOF.
- }
-}
-
# Global variables
# What URI's did we process? (used for recursive mode)
my %processed;
@@ -278,6 +323,16 @@
$ua->conn_cache({ total_capacity => 1}); # 1 keep-alive connection
$ua->delay($Opts{Sleep_Time}/60);
$ua->timeout($Opts{Timeout});
+eval {
+ $ua->allow_private_ips($Opts{Allow_Private_IPs});
+};
+if ($@) {
+ die <<".EOF.";
+Allow_Private_IPs is false; this feature requires the Net::IP, Socket, and
+Net::hostent modules:
+$@
+.EOF.
+}
if ($Opts{Command_Line}) {
@@ -1067,15 +1122,12 @@
}
}
- # Tell the user agent if we want progress reports (in redirects) or not.
- $ua->{Checklink_verbose_progress} = $verbose_progress;
-
- # Check if the IP address is allowed.
- my $response = &ip_allowed($request->uri());
- return $response if $response;
+ # Tell the user agent if we want progress reports for redirects or not.
+ $ua->redirect_progress_callback(sub { &hprintf("\n%s %s ", @_); })
+ if $verbose_progress;
# Do the query
- $response = $ua->request($request);
+ my $response = $ua->request($request);
# Get the results
# Record the very first response
@@ -1860,36 +1912,6 @@
}
}
-#
-# Checks whether we're allowed to retrieve the document based on it's IP
-# address. Takes an URI object and returns a HTTP::Response containing the
-# appropriate status and error message if the IP was disallowed, 0
-# otherwise. URIs without hostname or IP address are always allowed,
-# including schemes where those make no sense (eg. data:, often javascript:).
-#
-sub ip_allowed ($)
-{
- my ($uri) = @_;
- return 0 if $Opts{Allow_Private_IPs}; # Short-circuit
-
- my $hostname = undef;
- eval { $hostname = $uri->host() }; # Not all URIs implement host()...
- return 0 unless $hostname;
-
- my $addr = my $iptype = my $resp = undef;
- if (my $host = Net::hostent::gethostbyname($hostname)) {
- $addr = inet_ntoa($host->addr()) if $host->addr();
- if ($addr && (my $ip = Net::IP->new($addr))) {
- $iptype = $ip->iptype();
- }
- }
- if ($iptype && $iptype ne 'PUBLIC') {
- $resp = HTTP::Response->new(403,
- 'Checking non-public IP address disallowed by link checker configuration');
- }
- return $resp;
-}
-
sub links_summary (\%\%\%\%)
{
# Advices to fix the problems
Received on Sunday, 29 July 2007 13:25:32 UTC