- From: Ville Skytta via cvs-syncmail <cvsmail@w3.org>
- Date: Mon, 14 Dec 2009 22:56:02 +0000
- To: www-validator-cvs@w3.org
Update of /sources/public/validator/httpd/cgi-bin
In directory hutz:/tmp/cvs-serv25509/httpd/cgi-bin
Modified Files:
check
Log Message:
More XML declaration related regex cleanups.
Index: check
===================================================================
RCS file: /sources/public/validator/httpd/cgi-bin/check,v
retrieving revision 1.758
retrieving revision 1.759
diff -u -d -r1.758 -r1.759
--- check 14 Dec 2009 22:51:18 -0000 1.758
+++ check 14 Dec 2009 22:56:00 -0000 1.759
@@ -617,24 +617,26 @@
#$xmlparser->load_catalog(catfile($CFG->{Paths}->{SGML}->{Library}, 'xml.soc'));
my $xml_string = join "\n", @{$File->{Content}};
+ my $xmlws = qr/[\x20\x09\x0D\x0A]/o;
+
# the XML parser will check the value of encoding attribute in XML
# declaration so we have to amend it to reflect transcoding.
# see Bug 4867
$xml_string =~ s/
- (^<\?xml\b[^>]*[\x20\x09\x0D\x0A])
- (encoding[\x20\x09\x0D\x0A]*=[\x20\x09\x0D\x0A]*
+ (^<\?xml\b[^>]*${xmlws})
+ (encoding${xmlws}*=${xmlws}*
(?:(["'])[A-Za-z][a-zA-Z0-9_-]+\3)
)
([^>].*\?>)
- /$1encoding="UTF-8"$4/sx;
+ /$1encoding="UTF-8"$4/sox;
# Is the document standalone? Need to check with a regex because
# the parser may fail to return a document we could use for this.
my $standalone = (
- $xml_string =~ /^<\?xml\b[^>]*[\x20\x09\x0D\x0A]
- standalone[\x20\x09\x0D\x0A]*=[\x20\x09\x0D\x0A]*
+ $xml_string =~ /^<\?xml\b[^>]*${xmlws}
+ standalone${xmlws}*=${xmlws}*
(["'])yes\1
- /sx
+ /sox
);
eval { $xmlparser->parse_string($xml_string); };
@@ -2735,6 +2737,8 @@
$File->{ModeChoice} = '';
my $parseModeFromDoctype = $CFG->{Types}->{$fpi}->{'Parse Mode'} || 'TBD';
+ my $xmlws = qr/[\x20\x09\x0D\x0A]/o;
+
# $File->{Mode} may have been set in parse_content_type
# and it would come from the Media Type
my $parseModeFromMimeType = $File->{Mode};
@@ -2743,23 +2747,22 @@
# the 10 first lines should be safe
my $parseModeFromXMLDecl = (
$begincontent =~
- /^ [\x20\x09\x0D\x0A]* # whitespace before the decl should not be happening
- # but we are greedy for the sake of detection, not validation
- <\?xml # start matching an XML Declaration
- [\x20\x09\x0D\x0A]+ # x20, x09, xD and xA are the allowed "xml white space"
- version [\x20\x09\x0D\x0A]* = # for documents, version info is mandatory
- [\x20\x09\x0D\x0A]* (["'])1.[01]\1 # hardcoding the existing XML versions.
- # Maybe we should use \d\.\d
- (?:[\x20\x09\x0D\x0A]+ encoding
- [\x20\x09\x0D\x0A]* = [\x20\x09\x0D\x0A]*
+ /^ ${xmlws}* # whitespace before the decl should not be happening
+ # but we are greedy for the sake of detection, not validation
+ <\?xml ${xmlws}+ # start matching an XML Declaration
+ version ${xmlws}* = # for documents, version info is mandatory
+ ${xmlws}* (["'])1.[01]\1 # hardcoding the existing XML versions.
+ # Maybe we should use \d\.\d
+ (?:${xmlws}+ encoding
+ ${xmlws}* = ${xmlws}*
(["'])[A-Za-z][a-zA-Z0-9_-]+\2
- )? # encoding info is optional
- (?:[\x20\x09\x0D\x0A]+ standalone
- [\x20\x09\x0D\x0A]* = [\x20\x09\x0D\x0A]*
+ )? # encoding info is optional
+ (?:${xmlws}+ standalone
+ ${xmlws}* = ${xmlws}*
(["'])(?:yes|no)\3
- )? # ditto standalone info, optional
- [\x20\x09\x0D\x0A]* \?> # end of XML Declaration
- /x
+ )? # ditto standalone info, optional
+ ${xmlws}* \?> # end of XML Declaration
+ /ox
?
'XML' :
'TBD'
Received on Monday, 14 December 2009 22:56:16 UTC