W3C home > Mailing lists > Public > www-validator-cvs@w3.org > May 2007

validator/httpd/cgi-bin check,1.520,1.521

From: Olivier Thereaux via cvs-syncmail <cvsmail@w3.org>
Date: Fri, 18 May 2007 00:54:55 +0000
To: www-validator-cvs@w3.org
Message-Id: <E1HoqkB-0008EV-GA@lionel-hutz.w3.org>

Update of /sources/public/validator/httpd/cgi-bin
In directory hutz:/tmp/cvs-serv30796/httpd/cgi-bin

Modified Files:
	check 
Log Message:
The fix for bug 4474 actually broke the transcoding routine for some versions of Encode lib. 
http://lists.w3.org/Archives/Public/www-validator-cvs/2007Apr/0159.html

This patch:
* fixes the transcoding routine back to normal
  (hence removing mistaken "non-sgml character" for unicode content)
* makes sure that content we output is properly encoded from perl's internal to utf-8
* adds proper decoding/encoding for tidy processing



Index: check
===================================================================
RCS file: /sources/public/validator/httpd/cgi-bin/check,v
retrieving revision 1.520
retrieving revision 1.521
diff -u -d -r1.520 -r1.521
--- check	17 May 2007 04:30:00 -0000	1.520
+++ check	18 May 2007 00:54:52 -0000	1.521
@@ -32,6 +32,7 @@
 # Pragmas.
 use strict;
 use warnings;
+use utf8;
 
 #
 # Modules.  See also the BEGIN block further down below.
@@ -846,7 +847,7 @@
     require HTML::Tidy;
     my $tidy = HTML::Tidy->new({config_file => $CFG->{Paths}->{TidyConf}});
 
-    $File->{'Tidy'} = $tidy->clean(join"\n",@{$File->{Content}});
+    $File->{'Tidy'} = Encode::decode('utf-8', $tidy->clean(join"\n",@{$File->{Content}}));
     $File->{'Tidy_OK'} = TRUE;
   };
   if ($@) {
@@ -908,7 +909,8 @@
 #  if $template->param('opt_show_raw_errors');
 #  $T->param(file_outline   => &outline($File)) if $T->param('opt_show_outline');
 
-print $template->output;
+# transcode output from perl's internal to utf-8 and output
+print Encode::encode('UTF-8', $template->output);
 
 #
 # Get rid of $File object and exit.
@@ -2045,7 +2047,7 @@
 
   # Try to transcode
   eval {
-    $output = Encode::encode("utf8", Encode::decode($cs, $input, Encode::FB_CROAK));
+    $output = Encode::decode($cs, $input, Encode::FB_CROAK);
   };
 
   # Transcoding failed
@@ -2071,6 +2073,8 @@
   # tentative fix for http://www.w3.org/Bugs/Public/show_bug.cgi?id=3992
   $output =~ s/(\r\n|\n|\r)/\n/g;
 
+  #debug: we could check if the content has utf8 bit on with 
+  #$output= utf8::is_utf8($output) ? 1 : 0;
   $File->{Content} = [split/\n/, $output];
 
   return $File;
@@ -2123,7 +2127,8 @@
 
   if ($File->{Opt}->{Output} eq 'html') {
     &prep_template($File, $File->{Templates}->{Error});
-    print $File->{Templates}->{Error}->output;
+    # transcode output from perl's internal to utf-8 and output
+    print Encode::encode('UTF-8',$File->{Templates}->{Error}->output);
     exit;
   } else {
 
Received on Friday, 18 May 2007 00:54:58 UTC

This archive was generated by hypermail 2.4.0 : Friday, 17 January 2020 23:02:22 UTC