- From: Ville Skytta via cvs-syncmail <cvsmail@w3.org>
- Date: Thu, 03 Dec 2009 20:30:22 +0000
- To: www-validator-cvs@w3.org
Update of /sources/public/perl/modules/W3C/LinkChecker/bin
In directory hutz:/tmp/cvs-serv17123
Modified Files:
checklink
Log Message:
Decode content encodings also when parsing for anchors, thanks to Mark Olson.
Index: checklink
===================================================================
RCS file: /sources/public/perl/modules/W3C/LinkChecker/bin/checklink,v
retrieving revision 4.168
retrieving revision 4.169
diff -u -d -r4.168 -r4.169
--- checklink 4 Nov 2009 18:44:17 -0000 4.168
+++ checklink 3 Dec 2009 20:30:20 -0000 4.169
@@ -1210,6 +1210,32 @@
return;
}
+##########################################
+# Decode Content-Encodings in a response #
+##########################################
+
+sub decode_content ($)
+{
+ my $response = shift;
+ my $error = undef;
+
+ # @@@TODO: maybe also decode charsets?
+ my $docref = $response->decoded_content(ref => 1, charset => 'none');
+ if (defined($docref)) {
+ $response->content_ref($docref);
+ # Remove Content-Encoding so it won't be decoded again later.
+ $response->remove_header('Content-Encoding')
+ } else {
+ my $ce = $response->header('Content-Encoding');
+ $ce = defined($ce) ? "'$ce'" : 'undefined';
+ my $ct = $response->header('Content-Type');
+ $ct = defined($ct) ? "'$ct'" : 'undefined';
+ my $request_uri = $response->request->url;
+ $error = "Error decoding document at <$request_uri>, Content-Type $ct, Content-Encoding $ce: '$@'";
+ }
+ return $error;
+}
+
#######################################
# Get and parse a resource to process #
#######################################
@@ -1270,19 +1296,7 @@
$failed_reason = "Content-Type for <$request_uri> is " .
(defined($ct) ? "'$ct'" : 'undefined');
} else {
- # Pre-decode Content-Encoding.
- # @@@TODO: maybe also decode charsets?
- my $docref = $response->decoded_content(ref => 1, charset => 'none');
- if (defined($docref)) {
- $response->content_ref($docref);
- # Remove Content-Encoding so it won't be decoded again later.
- $response->remove_header('Content-Encoding')
- } else {
- my $ce = $response->header('Content-Encoding');
- $ce = defined($ce) ? "'$ce'" : 'undefined';
- $ct = defined($ct) ? "'$ct'" : 'undefined';
- $failed_reason = "Error decoding document at <$request_uri>, Content-Type $ct, Content-Encoding $ce: '$@'";
- }
+ $failed_reason = decode_content($response);
}
if ($failed_reason) {
# No, there is a problem...
@@ -1850,6 +1864,10 @@
return;
}
# Do it then
+ if (my $error = decode_content($response)) {
+ &hprintf("%s\n.", $error);
+ }
+ # @@@TODO: this isn't the best thing to do if a decode error occurred
$p = &parse_document($uri, $response->base(), $response, 0, $want_links);
} else {
# We already had the information
Received on Thursday, 3 December 2009 20:30:24 UTC