- From: Ville Skytta via cvs-syncmail <cvsmail@w3.org>
- Date: Thu, 03 Dec 2009 20:30:22 +0000
- To: www-validator-cvs@w3.org
Update of /sources/public/perl/modules/W3C/LinkChecker/bin In directory hutz:/tmp/cvs-serv17123 Modified Files: checklink Log Message: Decode content encodings also when parsing for anchors, thanks to Mark Olson. Index: checklink =================================================================== RCS file: /sources/public/perl/modules/W3C/LinkChecker/bin/checklink,v retrieving revision 4.168 retrieving revision 4.169 diff -u -d -r4.168 -r4.169 --- checklink 4 Nov 2009 18:44:17 -0000 4.168 +++ checklink 3 Dec 2009 20:30:20 -0000 4.169 @@ -1210,6 +1210,32 @@ return; } +########################################## +# Decode Content-Encodings in a response # +########################################## + +sub decode_content ($) +{ + my $response = shift; + my $error = undef; + + # @@@TODO: maybe also decode charsets? + my $docref = $response->decoded_content(ref => 1, charset => 'none'); + if (defined($docref)) { + $response->content_ref($docref); + # Remove Content-Encoding so it won't be decoded again later. + $response->remove_header('Content-Encoding') + } else { + my $ce = $response->header('Content-Encoding'); + $ce = defined($ce) ? "'$ce'" : 'undefined'; + my $ct = $response->header('Content-Type'); + $ct = defined($ct) ? "'$ct'" : 'undefined'; + my $request_uri = $response->request->url; + $error = "Error decoding document at <$request_uri>, Content-Type $ct, Content-Encoding $ce: '$@'"; + } + return $error; +} + ####################################### # Get and parse a resource to process # ####################################### @@ -1270,19 +1296,7 @@ $failed_reason = "Content-Type for <$request_uri> is " . (defined($ct) ? "'$ct'" : 'undefined'); } else { - # Pre-decode Content-Encoding. - # @@@TODO: maybe also decode charsets? - my $docref = $response->decoded_content(ref => 1, charset => 'none'); - if (defined($docref)) { - $response->content_ref($docref); - # Remove Content-Encoding so it won't be decoded again later. - $response->remove_header('Content-Encoding') - } else { - my $ce = $response->header('Content-Encoding'); - $ce = defined($ce) ? "'$ce'" : 'undefined'; - $ct = defined($ct) ? "'$ct'" : 'undefined'; - $failed_reason = "Error decoding document at <$request_uri>, Content-Type $ct, Content-Encoding $ce: '$@'"; - } + $failed_reason = decode_content($response); } if ($failed_reason) { # No, there is a problem... @@ -1850,6 +1864,10 @@ return; } # Do it then + if (my $error = decode_content($response)) { + &hprintf("%s\n.", $error); + } + # @@@TODO: this isn't the best thing to do if a decode error occurred $p = &parse_document($uri, $response->base(), $response, 0, $want_links); } else { # We already had the information
Received on Thursday, 3 December 2009 20:30:24 UTC