- From: Ville Skytta <ville@hutz.w3.org>
- Date: Sun, 11 Apr 2004 11:05:56 +0000
- To: www-validator-cvs@w3.org
Update of /sources/public/perl/modules/W3C/LinkChecker/bin In directory hutz:/tmp/cvs-serv5916/bin Modified Files: checklink Log Message: Reimplement/replace internal redirect tracking logic using the response chaining feature of libwww-perl (HTTP::Response->previous()). Should be more robust and RobotUA-friendly now. Index: checklink =================================================================== RCS file: /sources/public/perl/modules/W3C/LinkChecker/bin/checklink,v retrieving revision 3.29 retrieving revision 3.30 diff -u -d -r3.29 -r3.30 --- checklink 10 Apr 2004 22:44:51 -0000 3.29 +++ checklink 11 Apr 2004 11:05:53 -0000 3.30 @@ -72,15 +72,9 @@ sub redirect_ok { my ($self, $request, $response) = @_; - if ($self->{Checklink_verbose_progress}) { &W3C::LinkChecker::hprintf("\n%s %s ", $request->method(),$request->uri()); } - - # Build a map of redirects - $self->{Redirects}{$self->{fetching}} = $request->uri(); - $self->{fetching} = $request->uri(); - return $self->SUPER::redirect_ok($request, $response); } @@ -808,7 +802,7 @@ } else { $response = &get_uri($method, $uri); &record_results($uri, $method, $response); - &record_redirects($redirects, $response->{Redirects}); + &record_redirects($redirects, $response); } if (! $response->is_success()) { if (! $in_recursion) { @@ -924,12 +918,6 @@ $ua->timeout($Opts{Timeout}); $ua->proxy('http', 'http://' . $Opts{HTTP_Proxy}) if $Opts{HTTP_Proxy}; - # $ua->{fetching} contains the URI we originally wanted - # $ua->{uri} is modified in the case of a redirect; this is used to - # build $ua->{Redirects} - $ua->{uri} = $ua->{fetching} = $uri; - $ua->{Redirects} = $redirects if defined($redirects); - # Do we want printouts of progress? my $verbose_progress = ! ($Opts{Summary_Only} || (!$doc_count && $Opts{HTML})); @@ -983,19 +971,14 @@ $realm = $1; } print "\n" if $verbose_progress; - return &get_uri($method, $response->request->url, - $start, $ua->{Redirects}, - $code, $realm, $message, 1); + return &get_uri($method, $response->request()->url(), + $start, $redirects, $code, $realm, $message, 1); } - # Record the redirects - $response->{Redirects} = $ua->{Redirects}; # @@@ subtract robot delay from the "fetched in" time? &hprintf(" fetched in %ss\n", &time_diff($start, &get_timestamp())) if $verbose_progress; - $response->{OriginalCode} = $code; - $response->{OriginalMessage} = $message; - $response->{Realm} = $realm if defined($realm); + $response->{Realm} = $realm if defined($realm); return $response; } @@ -1012,10 +995,10 @@ $results{$uri}{location}{code} = $response->code(); $results{$uri}{location}{type} = $response->header('Content-type'); $results{$uri}{location}{display} = $results{$uri}{location}{code}; - $results{$uri}{location}{orig} = $response->{OriginalCode}; - # Did we get a redirect? - if ($response->{OriginalCode} != $response->code()) { - $results{$uri}{location}{orig_message} = $response->{OriginalMessage}; + # Rewind, check for the original code and message. + for (my $tmp = $response->previous(); $tmp; $tmp = $tmp->previous()) { + $results{$uri}{location}{orig} = $tmp->code(); + $results{$uri}{location}{orig_message} = $tmp->message(); } $results{$uri}{location}{success} = $response->is_success(); # Stores the authentication information @@ -1273,7 +1256,6 @@ # Can't test? Return 400 Bad request. $results{$uri}{location}{code} = 400; $results{$uri}{location}{record} = 400; - $results{$uri}{location}{orig} = 400; $results{$uri}{location}{success} = 0; $results{$uri}{location}{message} = $msg; &hprintf("Error: %d %s\n", 400, $msg) if $Opts{Verbose}; @@ -1296,7 +1278,7 @@ # Get the information back from get_uri() &record_results($uri, $method, $response); # Record the redirects - &record_redirects($redirects, $response->{Redirects}); + &record_redirects($redirects, $response); } # We got the response of the HTTP request. Stop here if it was a HEAD. @@ -1408,11 +1390,11 @@ ######################## # Record the redirects in a hash -sub record_redirects (\%\%) +sub record_redirects (\%$) { - my ($redirects, $sub) = @_; - foreach my $r (keys %$sub) { - $redirects->{$r} = $sub->{$r}; + my ($redirects, $response) = @_; + for (my $prev = $response->previous(); $prev; $prev = $prev->previous()) { + $redirects->{$prev->request()->url()} = $response->request()->url(); } } @@ -1598,6 +1580,9 @@ my $s = (scalar(@unique) > 1) ? 's' : ''; undef @unique; + my @http_codes = ($currloc->{code}); + unshift(@http_codes, $currloc->{orig}) if $currloc->{orig}; + if ($Opts{HTML}) { # Style stuff my $idref = ''; @@ -1623,7 +1608,7 @@ printf(" <dt%s>%s</dt> <dd>What to do: <strong%s>%s</strong>%s<br></dd> -<dd>HTTP Code returned: %d%s<br> +<dd>HTTP Code returned: %s<br> HTTP Message: %s%s%s</dd> <dd>Line%s: %s</dd>\n", # Anchor for return codes @@ -1639,13 +1624,9 @@ # Redirect too? $redirect_too ? sprintf(' <span %s>%s</span>', &bgcolor(301), $redirect_too) : '', - # Original HTTP reply - $currloc->{orig}, - # Final HTTP reply - ($currloc->{code} != $currloc->{orig}) - ? ' <span title="redirected to">-></span> '. - &encode($currloc->{code}) - : '', + # Response code chain + join(' <span title="redirected to">-></span> ', + map { &encode($_) } @http_codes), # Realm defined($currloc->{realm}) ? sprintf('Realm: %s<br>', &encode($currloc->{realm})) : '', @@ -1670,17 +1651,13 @@ } } else { my $redirmsg = $redirect_loop ? ' redirect loop detected' : ''; - printf("\n%s\t%s\n Code: %d%s %s\n%s\n", + printf("\n%s\t%s\n Code: %s %s\n%s\n", # List of redirects $redirected ? join("\n-> ", @redirects_urls) . $redirmsg : $u, # List of lines $lines_list ? "Line$s: $lines_list" : '', - # Original HTTP reply - $currloc->{orig}, - # Final HTTP reply - ($currloc->{code} != $currloc->{orig}) - ? ' -> '.$currloc->{code} - : '', + # Response code chain + join(' -> ', @http_codes), # HTTP message $currloc->{message} || '', # What to do @@ -1755,12 +1732,8 @@ $iptype = 'PUBLIC' if ($iptype && $iptype eq 'PRIVATE' && $Opts{Allow_Private_IPs}); if ($iptype && $iptype ne 'PUBLIC') { - my $code = 403; - my $msg = - 'Checking non-public IP address disallowed by service configuration'; - $resp = HTTP::Response->new($code, $msg); - $resp->{OriginalCode} = $code; - $resp->{OriginalMessage} = $msg; + $resp = HTTP::Response->new(403, + 'Checking non-public IP address disallowed by link checker configuration'); } return $resp; }
Received on Sunday, 11 April 2004 07:06:14 UTC