- From: Olivier Thereaux via cvs-syncmail <cvsmail@w3.org>
- Date: Fri, 18 May 2007 00:54:55 +0000
- To: www-validator-cvs@w3.org
Update of /sources/public/validator/httpd/cgi-bin
In directory hutz:/tmp/cvs-serv30796/httpd/cgi-bin
Modified Files:
check
Log Message:
The fix for bug 4474 actually broke the transcoding routine for some versions of Encode lib.
http://lists.w3.org/Archives/Public/www-validator-cvs/2007Apr/0159.html
This patch:
* fixes the transcoding routine back to normal
(hence removing mistaken "non-sgml character" for unicode content)
* makes sure that content we output is properly encoded from perl's internal to utf-8
* adds proper decoding/encoding for tidy processing
Index: check
===================================================================
RCS file: /sources/public/validator/httpd/cgi-bin/check,v
retrieving revision 1.520
retrieving revision 1.521
diff -u -d -r1.520 -r1.521
--- check 17 May 2007 04:30:00 -0000 1.520
+++ check 18 May 2007 00:54:52 -0000 1.521
@@ -32,6 +32,7 @@
# Pragmas.
use strict;
use warnings;
+use utf8;
#
# Modules. See also the BEGIN block further down below.
@@ -846,7 +847,7 @@
require HTML::Tidy;
my $tidy = HTML::Tidy->new({config_file => $CFG->{Paths}->{TidyConf}});
- $File->{'Tidy'} = $tidy->clean(join"\n",@{$File->{Content}});
+ $File->{'Tidy'} = Encode::decode('utf-8', $tidy->clean(join"\n",@{$File->{Content}}));
$File->{'Tidy_OK'} = TRUE;
};
if ($@) {
@@ -908,7 +909,8 @@
# if $template->param('opt_show_raw_errors');
# $T->param(file_outline => &outline($File)) if $T->param('opt_show_outline');
-print $template->output;
+# transcode output from perl's internal to utf-8 and output
+print Encode::encode('UTF-8', $template->output);
#
# Get rid of $File object and exit.
@@ -2045,7 +2047,7 @@
# Try to transcode
eval {
- $output = Encode::encode("utf8", Encode::decode($cs, $input, Encode::FB_CROAK));
+ $output = Encode::decode($cs, $input, Encode::FB_CROAK);
};
# Transcoding failed
@@ -2071,6 +2073,8 @@
# tentative fix for http://www.w3.org/Bugs/Public/show_bug.cgi?id=3992
$output =~ s/(\r\n|\n|\r)/\n/g;
+ #debug: we could check if the content has utf8 bit on with
+ #$output= utf8::is_utf8($output) ? 1 : 0;
$File->{Content} = [split/\n/, $output];
return $File;
@@ -2123,7 +2127,8 @@
if ($File->{Opt}->{Output} eq 'html') {
&prep_template($File, $File->{Templates}->{Error});
- print $File->{Templates}->{Error}->output;
+ # transcode output from perl's internal to utf-8 and output
+ print Encode::encode('UTF-8',$File->{Templates}->{Error}->output);
exit;
} else {
Received on Friday, 18 May 2007 00:54:58 UTC