W3C home > Mailing lists > Public > www-validator-cvs@w3.org > December 2009

validator/httpd/cgi-bin check,1.758,1.759

From: Ville Skytta via cvs-syncmail <cvsmail@w3.org>
Date: Mon, 14 Dec 2009 22:56:02 +0000
To: www-validator-cvs@w3.org
Message-Id: <E1NKJpi-0006eI-QN@lionel-hutz.w3.org>
Update of /sources/public/validator/httpd/cgi-bin
In directory hutz:/tmp/cvs-serv25509/httpd/cgi-bin

Modified Files:
	check 
Log Message:
More XML declaration related regex cleanups.

Index: check
===================================================================
RCS file: /sources/public/validator/httpd/cgi-bin/check,v
retrieving revision 1.758
retrieving revision 1.759
diff -u -d -r1.758 -r1.759
--- check	14 Dec 2009 22:51:18 -0000	1.758
+++ check	14 Dec 2009 22:56:00 -0000	1.759
@@ -617,24 +617,26 @@
         #$xmlparser->load_catalog(catfile($CFG->{Paths}->{SGML}->{Library}, 'xml.soc'));
         my $xml_string = join "\n", @{$File->{Content}};
 
+        my $xmlws = qr/[\x20\x09\x0D\x0A]/o;
+
         # the XML parser will check the value of encoding attribute in XML
         # declaration so we have to amend it to reflect transcoding.
         # see Bug 4867
         $xml_string =~ s/
-               (^<\?xml\b[^>]*[\x20\x09\x0D\x0A])
-               (encoding[\x20\x09\x0D\x0A]*=[\x20\x09\x0D\x0A]*
+               (^<\?xml\b[^>]*${xmlws})
+               (encoding${xmlws}*=${xmlws}*
                    (?:(["'])[A-Za-z][a-zA-Z0-9_-]+\3)
                )
                ([^>].*\?>)
-           /$1encoding="UTF-8"$4/sx;
+           /$1encoding="UTF-8"$4/sox;
 
         # Is the document standalone?  Need to check with a regex because
         # the parser may fail to return a document we could use for this.
         my $standalone = (
-            $xml_string =~ /^<\?xml\b[^>]*[\x20\x09\x0D\x0A]
-                           standalone[\x20\x09\x0D\x0A]*=[\x20\x09\x0D\x0A]*
+            $xml_string =~ /^<\?xml\b[^>]*${xmlws}
+                           standalone${xmlws}*=${xmlws}*
                            (["'])yes\1
-                          /sx
+                          /sox
         );
 
         eval { $xmlparser->parse_string($xml_string); };
@@ -2735,6 +2737,8 @@
     $File->{ModeChoice} = '';
     my $parseModeFromDoctype = $CFG->{Types}->{$fpi}->{'Parse Mode'} || 'TBD';
 
+    my $xmlws = qr/[\x20\x09\x0D\x0A]/o;
+
     # $File->{Mode} may have been set in parse_content_type
     # and it would come from the Media Type
     my $parseModeFromMimeType = $File->{Mode};
@@ -2743,23 +2747,22 @@
                                 # the 10 first lines should be safe
     my $parseModeFromXMLDecl = (
         $begincontent =~
-            /^ [\x20\x09\x0D\x0A]*                    # whitespace before the decl should not be happening
-                                                      # but we are greedy for the sake of detection, not validation
-      <\?xml                                          # start matching an XML Declaration
-      [\x20\x09\x0D\x0A]+                             # x20, x09, xD and xA are the allowed "xml white space"
-      version [\x20\x09\x0D\x0A]* =                   # for documents, version info is mandatory
-      [\x20\x09\x0D\x0A]* (["'])1.[01]\1              # hardcoding the existing XML versions.
-                                                      # Maybe we should use \d\.\d
-      (?:[\x20\x09\x0D\x0A]+ encoding
-       [\x20\x09\x0D\x0A]* = [\x20\x09\x0D\x0A]*
+            /^ ${xmlws}*                # whitespace before the decl should not be happening
+                                        # but we are greedy for the sake of detection, not validation
+      <\?xml ${xmlws}+                  # start matching an XML Declaration
+      version ${xmlws}* =               # for documents, version info is mandatory
+      ${xmlws}* (["'])1.[01]\1          # hardcoding the existing XML versions.
+                                        # Maybe we should use \d\.\d
+      (?:${xmlws}+ encoding
+       ${xmlws}* = ${xmlws}*
        (["'])[A-Za-z][a-zA-Z0-9_-]+\2
-      )?                                              # encoding info is optional
-      (?:[\x20\x09\x0D\x0A]+ standalone
-       [\x20\x09\x0D\x0A]* = [\x20\x09\x0D\x0A]*
+      )?                                # encoding info is optional
+      (?:${xmlws}+ standalone
+       ${xmlws}* = ${xmlws}*
        (["'])(?:yes|no)\3
-      )?                                              # ditto standalone info, optional
-      [\x20\x09\x0D\x0A]* \?>                         # end of XML Declaration
-    /x
+      )?                                # ditto standalone info, optional
+      ${xmlws}* \?>                     # end of XML Declaration
+    /ox
         ?
             'XML' :
             'TBD'
Received on Monday, 14 December 2009 22:56:16 GMT

This archive was generated by hypermail 2.2.0+W3C-0.50 : Thursday, 26 April 2012 12:55:16 GMT