- From: Mercurial notifier <nobody@w3.org>
- Date: Thu, 05 Aug 2010 14:47:24 +0000
- To: link-checker updates <www-validator-cvs@w3.org>
changeset: 341:0dfa6acfa23d
user: ville
date: Thu Feb 04 19:05:22 2010 +0000
files: bin/checklink
description:
Let LWP decode response charsets.
BOM based decoding works with LWP >= 5.827.
diff -r e9cbb588608c -r 0dfa6acfa23d bin/checklink
--- a/bin/checklink Thu Jan 14 16:55:18 2010 +0000
+++ b/bin/checklink Thu Feb 04 19:05:22 2010 +0000
@@ -5,7 +5,7 @@
# (c) 1999-2010 World Wide Web Consortium
# based on Renaud Bruyeron's checklink.pl
#
-# $Id: checklink,v 4.177 2010-01-14 16:55:18 ville Exp $
+# $Id: checklink,v 4.178 2010-02-04 19:05:22 ville Exp $
#
# This program is licensed under the W3C(r) Software License:
# http://www.w3.org/Consortium/Legal/copyright-software
@@ -283,7 +283,7 @@
$PROGRAM = 'W3C-checklink';
$VERSION = '4.5';
$REVISION = sprintf('version %s (c) 1999-2010 W3C', $VERSION);
- my ($cvsver) = q$Revision: 4.177 $ =~ /(\d+[\d\.]*\.\d+)/;
+ my ($cvsver) = q$Revision: 4.178 $ =~ /(\d+[\d\.]*\.\d+)/;
$AGENT = sprintf('%s/%s [%s] %s',
$PROGRAM, $VERSION, $cvsver,
(W3C::UserAgent::USE_ROBOT_UA
@@ -1244,9 +1244,9 @@
my $response = shift;
my $error = undef;
- # @@@TODO: maybe also decode charsets?
- my $docref = $response->decoded_content(ref => 1, charset => 'none');
+ my $docref = $response->decoded_content(ref => 1);
if (defined($docref)) {
+ utf8::encode($$docref);
$response->content_ref($docref);
# Remove Content-Encoding so it won't be decoded again later.
$response->remove_header('Content-Encoding')
@@ -1256,7 +1256,12 @@
my $ct = $response->header('Content-Type');
$ct = defined($ct) ? "'$ct'" : 'undefined';
my $request_uri = $response->request->url;
- $error = "Error decoding document at <$request_uri>, Content-Type $ct, Content-Encoding $ce: '$@'";
+ # content_charset() is available in LWP >= 5.827
+ my $cs = $response->can('content_charset') ?
+ $response->content_charset() : undef;
+ $cs = defined($cs) ? "'$cs'" : 'unknown';
+ $error = "Error decoding document at <$request_uri>, Content-Type $ct, " .
+ "Content-Encoding $ce, content charset $cs: '$@'";
}
return $error;
}
Received on Thursday, 5 August 2010 14:47:38 UTC