- From: Ville Skytta via cvs-syncmail <cvsmail@w3.org>
- Date: Tue, 10 Feb 2009 20:11:13 +0000
- To: www-validator-cvs@w3.org
Update of /sources/public/perl/modules/W3C/LinkChecker/bin In directory hutz:/tmp/cvs-serv30630/bin Modified Files: checklink Log Message: Improve handling of URI schemes we don't want checked. Index: checklink =================================================================== RCS file: /sources/public/perl/modules/W3C/LinkChecker/bin/checklink,v retrieving revision 4.145 retrieving revision 4.146 diff -u -d -r4.145 -r4.146 --- checklink 10 Feb 2009 19:28:52 -0000 4.145 +++ checklink 10 Feb 2009 20:11:11 -0000 4.146 @@ -72,6 +72,9 @@ $self->allow_private_ips(1); + # TODO: bug 29 + $self->protocols_forbidden([qw(mailto javascript)]); + return $self; } @@ -203,9 +206,10 @@ use URI::file qw(); # @@@ Needs also W3C::UserAgent but can't use() it here. -use constant RC_ROBOTS_TXT => -1; -use constant RC_DNS_ERROR => -2; -use constant RC_IP_DISALLOWED => -3; +use constant RC_ROBOTS_TXT => -1; +use constant RC_DNS_ERROR => -2; +use constant RC_IP_DISALLOWED => -3; +use constant RC_PROTOCOL_DISALLOWED => -4; use constant LINE_UNKNOWN => -1; @@ -498,6 +502,11 @@ CGI::Carp->import(qw(fatalsToBrowser)); require CGI::Cookie; + # file: URIs are not allowed in CGI mode + my $forbidden = $ua->protocols_forbidden() || []; + push(@$forbidden, 'file'); + $ua->protocols_forbidden($forbidden); + my $query = new CGI; # Set a few parameters in CGI mode $Opts{Verbose} = 0; @@ -580,11 +589,7 @@ } if (MP2() && !$ENV{HTTP_AUTHORIZATION}); $uri =~ s/^\s+//g; - if ($uri =~ m/^file:/) { - # Only the http scheme is allowed - # TODO: bug 29 - &file_uri($uri); - } elsif ($uri !~ m/:/) { + if ($uri !~ m/:/) { if ($uri =~ m|^//|) { $uri = 'http:'.$uri; } else { @@ -1010,10 +1015,6 @@ my %broken; while (my ($u, $ulinks) = each(%links)) { - # Don't check mailto: URI's - # TODO: bug 29 - next if ($u =~ m/^mailto:/); - if ($Opts{Summary_Only}) { # Hack: avoid browser/server timeouts in summary only CGI mode, bug 896 print ' ' if ($Opts{HTML} && !$Opts{Command_Line}); @@ -1370,6 +1371,9 @@ $results{$uri}{location}{code} = RC_DNS_ERROR() if ($results{$uri}{location}{code} == 500 && $response->message() =~ /Bad hostname '[^\']*'/); + $results{$uri}{location}{code} = RC_PROTOCOL_DISALLOWED() + if ($results{$uri}{location}{code} == 500 && + $response->message() =~ /Access to '[^\']*' URIs has been disabled/); $results{$uri}{location}{type} = $response->header('Content-type'); $results{$uri}{location}{display} = $results{$uri}{location}{code}; # Rewind, check for the original code and message. @@ -1699,21 +1703,6 @@ # $links is a hash of the links in the documents checked # $redirects is a map of the redirects encountered - # Checking file: URI's is not allowed with a CGI - # TODO: bug 29 - if ($referer ne $uri) { - if (!$Opts{Command_Line} && $referer !~ m/^file:/ && $uri =~ m/^file:/) { - my $msg = 'Error: \'file:\' URI not allowed'; - # Can't test? Return 400 Bad request. - $results{$uri}{location}{code} = 400; - $results{$uri}{location}{record} = 400; - $results{$uri}{location}{success} = 0; - $results{$uri}{location}{message} = $msg; - &hprintf("Error: %d %s\n", 400, $msg) if $Opts{Verbose}; - return; - } - } - # Get the document with the appropriate method # Only use GET if there are fragments. HEAD is enough if it's not the # case. @@ -2187,6 +2176,7 @@ RC_DNS_ERROR() => 'The hostname could not be resolved. Check the link for typos.', RC_IP_DISALLOWED() => sprintf('The link resolved to a %snon-public IP address%s, and this link checker instance has been configured to not access such addresses. This may be a real error or just a quirk of the name resolver configuration on the server where the link checker runs. Check the link manually, in particular its hostname/IP address.', $Opts{HTML} ? ('<a href="http://www.ietf.org/rfc/rfc1918.txt">', '</a>') : ('') x 2), + RC_PROTOCOL_DISALLOWED() => 'The link checker does not support checking links with this URI scheme.', ); my %priority = ( 410 => 1, 404 => 2, @@ -2399,7 +2389,8 @@ my $r = HTTP::Response->new($code); if ($r->is_success()) { $icon_type = 'error'; # if is success but reported, it's because of broken frags => error - } elsif ($code == RC_ROBOTS_TXT() || $code == RC_IP_DISALLOWED()) { + } elsif ($code == RC_ROBOTS_TXT() || $code == RC_IP_DISALLOWED() || + $code == RC_PROTOCOL_DISALLOWED()) { $icon_type = 'info'; } elsif ($code == 300) { $icon_type = 'info'; @@ -2484,18 +2475,6 @@ return; } -sub file_uri ($) -{ - my ($uri) = @_; - &html_header($uri); - printf(<<'EOF', &encode($uri)); -<h2>Forbidden</h2> -<p>You cannot check such a URI (<code>%s</code>).</p> -EOF - &html_footer(); - exit; -} - sub print_form (\%$$) { my ($params, $cookie, $check_num) = @_;
Received on Tuesday, 10 February 2009 20:11:21 UTC