- From: Mercurial notifier <nobody@w3.org>
- Date: Thu, 05 Aug 2010 14:47:24 +0000
- To: link-checker updates <www-validator-cvs@w3.org>
changeset: 341:0dfa6acfa23d user: ville date: Thu Feb 04 19:05:22 2010 +0000 files: bin/checklink description: Let LWP decode response charsets. BOM based decoding works with LWP >= 5.827. diff -r e9cbb588608c -r 0dfa6acfa23d bin/checklink --- a/bin/checklink Thu Jan 14 16:55:18 2010 +0000 +++ b/bin/checklink Thu Feb 04 19:05:22 2010 +0000 @@ -5,7 +5,7 @@ # (c) 1999-2010 World Wide Web Consortium # based on Renaud Bruyeron's checklink.pl # -# $Id: checklink,v 4.177 2010-01-14 16:55:18 ville Exp $ +# $Id: checklink,v 4.178 2010-02-04 19:05:22 ville Exp $ # # This program is licensed under the W3C(r) Software License: # http://www.w3.org/Consortium/Legal/copyright-software @@ -283,7 +283,7 @@ $PROGRAM = 'W3C-checklink'; $VERSION = '4.5'; $REVISION = sprintf('version %s (c) 1999-2010 W3C', $VERSION); - my ($cvsver) = q$Revision: 4.177 $ =~ /(\d+[\d\.]*\.\d+)/; + my ($cvsver) = q$Revision: 4.178 $ =~ /(\d+[\d\.]*\.\d+)/; $AGENT = sprintf('%s/%s [%s] %s', $PROGRAM, $VERSION, $cvsver, (W3C::UserAgent::USE_ROBOT_UA @@ -1244,9 +1244,9 @@ my $response = shift; my $error = undef; - # @@@TODO: maybe also decode charsets? - my $docref = $response->decoded_content(ref => 1, charset => 'none'); + my $docref = $response->decoded_content(ref => 1); if (defined($docref)) { + utf8::encode($$docref); $response->content_ref($docref); # Remove Content-Encoding so it won't be decoded again later. $response->remove_header('Content-Encoding') @@ -1256,7 +1256,12 @@ my $ct = $response->header('Content-Type'); $ct = defined($ct) ? "'$ct'" : 'undefined'; my $request_uri = $response->request->url; - $error = "Error decoding document at <$request_uri>, Content-Type $ct, Content-Encoding $ce: '$@'"; + # content_charset() is available in LWP >= 5.827 + my $cs = $response->can('content_charset') ? + $response->content_charset() : undef; + $cs = defined($cs) ? "'$cs'" : 'unknown'; + $error = "Error decoding document at <$request_uri>, Content-Type $ct, " . + "Content-Encoding $ce, content charset $cs: '$@'"; } return $error; }
Received on Thursday, 5 August 2010 14:47:38 UTC