- From: Ville Skytta via cvs-syncmail <cvsmail@w3.org>
- Date: Mon, 21 Apr 2008 07:09:55 +0000
- To: www-validator-cvs@w3.org
Update of /sources/public/perl/modules/W3C/LinkChecker/bin
In directory hutz:/tmp/cvs-serv17006
Modified Files:
checklink
Log Message:
Support Content-Encodings automatically handled by libwww-perl (#5648).
Index: checklink
===================================================================
RCS file: /sources/public/perl/modules/W3C/LinkChecker/bin/checklink,v
retrieving revision 4.98
retrieving revision 4.99
diff -u -d -r4.98 -r4.99
--- checklink 19 Feb 2008 22:41:12 -0000 4.98
+++ checklink 21 Apr 2008 07:09:53 -0000 4.99
@@ -1088,13 +1088,25 @@
# Can we parse the document?
my $failed_reason;
my $ct = $response->header('Content-Type');
- my $ce = $response->header('Content-Encoding');
if (!$ct || $ct !~ $ContentTypes) {
$failed_reason = "Content-Type for <$request_uri> is " .
(defined($ct) ? "'$ct'" : 'undefined');
- } elsif (defined($ce) && $ce ne 'identity') {
- # @@@ We could maybe handle gzip...
- $failed_reason = "Content-Encoding for <$request_uri> is '$ce'";
+ } else {
+ if ($response->can('decoded_content')) { # LWP >= 5.802
+ # Pre-decode Content-Encoding.
+ # @@@TODO: maybe also decode charsets?
+ my $docref = $response->decoded_content(ref => 1, charset => 'none');
+ if (defined($docref)) {
+ $response->content_ref($docref);
+ # Remove Content-Encoding so it won't be decoded again later.
+ $response->remove_header('Content-Encoding')
+ } else {
+ my $ce = $response->header('Content-Encoding');
+ $ce = defined($ce) ? "'$ce'" : 'undefined';
+ $ct = defined($ct) ? "'$ct'" : 'undefined';
+ $failed_reason = "Error decoding document at <$request_uri>, Content-Type $ct, Content-Encoding $ce: '$@'";
+ }
+ }
}
if ($failed_reason) {
# No, there is a problem...
@@ -1324,12 +1336,6 @@
return $p;
}
- my $docref = undef;
- # @@@TODO: maybe also do charset decoding some day?
- $docref = $response->decoded_content(ref => 1, charset => "none")
- if ($response->can('decoded_content')); # LWP >= 5.802
- $docref ||= $response->content_ref();
-
my $start;
$p = W3C::LinkChecker->new();
$p->{base} = $base_uri;
@@ -1338,6 +1344,9 @@
print("Parsing...\n");
}
+ # Content-Encoding etc already decoded in get_document().
+ my $docref = $response->content_ref();
+
# Count lines beforehand if needed for progress indicator. In all cases,
# the actual final number of lines processed shown is populated by our
# end_document handler.
Received on Monday, 21 April 2008 07:10:28 UTC