- From: Olivier Thereaux via cvs-syncmail <cvsmail@w3.org>
- Date: Thu, 19 Jul 2007 06:21:05 +0000
- To: www-validator-cvs@w3.org
Update of /sources/public/validator/httpd/cgi-bin
In directory hutz:/tmp/cvs-serv28789
Modified Files:
check
Log Message:
The XML parser will check the value of encoding attribute in XML declaration
so we have to amend it to reflect transcoding: using a regexp to match the xml declaration
and replace the part with encoding="foo" with encoding="utf-8".
Note: this may mess up column/offset numbering if there are errors on that line.
See also Bug 4867:
http://www.w3.org/Bugs/Public/show_bug.cgi?id=4867
Index: check
===================================================================
RCS file: /sources/public/validator/httpd/cgi-bin/check,v
retrieving revision 1.541
retrieving revision 1.542
diff -u -d -r1.541 -r1.542
--- check 19 Jul 2007 03:59:24 -0000 1.541
+++ check 19 Jul 2007 06:21:03 -0000 1.542
@@ -628,8 +628,14 @@
$xmlparser->line_numbers(1);
# loading the XML catalog for entities resolution
$xmlparser->load_catalog( File::Spec->catfile($CFG->{Paths}->{SGML}->{Library}, 'xml.soc') );
+ my $xml_string = join"\n",@{$File->{Content}};
+ # the XML parser will check the value of encoding attribute in XML declaration
+ # so we have to amend it to reflect transcoding. see Bug 4867
+ $xml_string =~ s/(<\?xml.*)
+(encoding[\x20|\x09|\x0D|\x0A]*=[\x20|\x09|\x0D|\x0A]*(?:"[A-Za-z][a-zA-Z0-9_-]+"|'[A-Za-z][a-zA-Z0-9_-]+'))
+(.*\?>)/\1encoding="utf-8"\3/sx;
eval {
- $xmlparser->parse_string(join"\n",@{$File->{Content}});
+ $xmlparser->parse_string($xml_string);
};
my $xml_parse_errors_line = undef;
my @xmlwf_error_list;
Received on Thursday, 19 July 2007 06:21:09 UTC