- From: Olivier Thereaux via cvs-syncmail <cvsmail@w3.org>
- Date: Mon, 28 Apr 2008 03:42:45 +0000
- To: www-validator-cvs@w3.org
Update of /sources/public/validator/httpd/cgi-bin In directory hutz:/tmp/cvs-serv14629/httpd/cgi-bin Modified Files: check Log Message: Trying to solve the headache of the default fallback character encoding, when different specs suggest different encodings, and we try to promote utf-8 as the best practice. This patch tried to go around the issue by trying, in sequence, a fallback of utf8, win-1252, then iso-8859-1 - in sequence. Index: check =================================================================== RCS file: /sources/public/validator/httpd/cgi-bin/check,v retrieving revision 1.584 retrieving revision 1.585 diff -u -d -r1.584 -r1.585 --- check 23 Apr 2008 04:23:31 -0000 1.584 +++ check 28 Apr 2008 03:42:43 -0000 1.585 @@ -519,8 +519,7 @@ $File->{Charset}->{Use} = "utf-8"; } elsif (&is_xml($File) and not $File->{ContentType} =~ m(^text/)) { $File->{Charset}->{Use} = 'utf-8'; # UTF-8 (image/svg+xml etc.) -} - +} unless ($File->{Charset}->{Use}) { $File->{Charset}->{Use} = $File->{Charset}->{META}; } @@ -563,12 +562,6 @@ } } -unless ($File->{Charset}->{Use}) { # No charset given... - &add_warning('W04', {W04_charset => 'UTF-8'}); - $File->{Tentative} |= T_ERROR; # Can never be valid. - $File->{Charset}->{Use} = 'utf-8'; -} - # # Abort if an error was flagged while finding the encoding. &abort_if_error_flagged($File, O_CHARSET|O_DOCTYPE); @@ -598,9 +591,32 @@ # gb18030 requires Encode::HanExtra but no additional alias -# +$File->{Charset}->{Default} = FALSE; +unless ($File->{Charset}->{Use}) { # No charset given... + $File->{Charset}->{Use} = 'utf-8'; + $File->{Charset}->{Default} = TRUE; + $File->{Tentative} |= T_ERROR; # Can never be valid. + &add_warning('W04', {W04_charset => "UTF-8"}); +} + + # Always transcode, even if the content claims to be UTF-8 $File = transcode($File); +if (($File->{ContentType} == "text/html") and ($File->{Charset}->{Default}) and $File->{'Error Flagged'}) { + $File->{'Error Flagged'} = FALSE; # reset + # we try again, this time with win-1252 + $File->{Charset}->{Use} = 'windows-1252'; + &add_warning('W04', {W04_charset => "windows-1252", W04_also_tried=> "UTF-8"}); + $File = transcode($File); +} +if (($File->{ContentType} == "text/html") and ($File->{Charset}->{Default}) and $File->{'Error Flagged'}) { + $File->{'Error Flagged'} = FALSE; # reset + # we try again, this time with latin1... + $File->{Charset}->{Use} = 'iso-8859-1'; + &add_warning('W04', {W04_charset => "iso-8859-1", W04_also_tried => "UTF-8, windows-1252"}); + $File = transcode($File); +} +# if it still does not work, we abandon hope here &abort_if_error_flagged($File, O_CHARSET); #
Received on Monday, 28 April 2008 03:43:24 UTC