- From: Ville Skytta via cvs-syncmail <cvsmail@w3.org>
- Date: Thu, 04 Feb 2010 19:05:24 +0000
- To: www-validator-cvs@w3.org
Update of /sources/public/perl/modules/W3C/LinkChecker/bin In directory hutz:/tmp/cvs-serv11613 Modified Files: checklink Log Message: Let LWP decode response charsets. BOM based decoding works with LWP >= 5.827. Index: checklink =================================================================== RCS file: /sources/public/perl/modules/W3C/LinkChecker/bin/checklink,v retrieving revision 4.177 retrieving revision 4.178 diff -u -d -r4.177 -r4.178 --- checklink 14 Jan 2010 16:55:18 -0000 4.177 +++ checklink 4 Feb 2010 19:05:22 -0000 4.178 @@ -1244,9 +1244,9 @@ my $response = shift; my $error = undef; - # @@@TODO: maybe also decode charsets? - my $docref = $response->decoded_content(ref => 1, charset => 'none'); + my $docref = $response->decoded_content(ref => 1); if (defined($docref)) { + utf8::encode($$docref); $response->content_ref($docref); # Remove Content-Encoding so it won't be decoded again later. $response->remove_header('Content-Encoding') @@ -1256,7 +1256,12 @@ my $ct = $response->header('Content-Type'); $ct = defined($ct) ? "'$ct'" : 'undefined'; my $request_uri = $response->request->url; - $error = "Error decoding document at <$request_uri>, Content-Type $ct, Content-Encoding $ce: '$@'"; + # content_charset() is available in LWP >= 5.827 + my $cs = $response->can('content_charset') ? + $response->content_charset() : undef; + $cs = defined($cs) ? "'$cs'" : 'unknown'; + $error = "Error decoding document at <$request_uri>, Content-Type $ct, " . + "Content-Encoding $ce, content charset $cs: '$@'"; } return $error; }
Received on Thursday, 4 February 2010 19:05:25 UTC