W3C home > Mailing lists > Public > www-validator-cvs@w3.org > February 2010

perl/modules/W3C/LinkChecker/bin checklink,4.177,4.178

From: Ville Skytta via cvs-syncmail <cvsmail@w3.org>
Date: Thu, 04 Feb 2010 19:05:24 +0000
To: www-validator-cvs@w3.org
Message-Id: <E1Nd712-00035q-6i@lionel-hutz.w3.org>
Update of /sources/public/perl/modules/W3C/LinkChecker/bin
In directory hutz:/tmp/cvs-serv11613

Modified Files:
	checklink 
Log Message:
Let LWP decode response charsets.

BOM based decoding works with LWP >= 5.827.


Index: checklink
===================================================================
RCS file: /sources/public/perl/modules/W3C/LinkChecker/bin/checklink,v
retrieving revision 4.177
retrieving revision 4.178
diff -u -d -r4.177 -r4.178
--- checklink	14 Jan 2010 16:55:18 -0000	4.177
+++ checklink	4 Feb 2010 19:05:22 -0000	4.178
@@ -1244,9 +1244,9 @@
   my $response = shift;
   my $error = undef;
 
-  # @@@TODO: maybe also decode charsets?
-  my $docref = $response->decoded_content(ref => 1, charset => 'none');
+  my $docref = $response->decoded_content(ref => 1);
   if (defined($docref)) {
+    utf8::encode($$docref);
     $response->content_ref($docref);
     # Remove Content-Encoding so it won't be decoded again later.
     $response->remove_header('Content-Encoding')
@@ -1256,7 +1256,12 @@
     my $ct = $response->header('Content-Type');
     $ct = defined($ct) ? "'$ct'" : 'undefined';
     my $request_uri = $response->request->url;
-    $error = "Error decoding document at <$request_uri>, Content-Type $ct, Content-Encoding $ce: '$@'";
+    # content_charset() is available in LWP >= 5.827
+    my $cs = $response->can('content_charset') ?
+      $response->content_charset() : undef;
+    $cs = defined($cs) ? "'$cs'" : 'unknown';
+    $error = "Error decoding document at <$request_uri>, Content-Type $ct, " .
+      "Content-Encoding $ce, content charset $cs: '$@'";
   }
   return $error;
 }
Received on Thursday, 4 February 2010 19:05:25 UTC

This archive was generated by hypermail 2.3.1 : Wednesday, 7 January 2015 15:17:41 UTC