Re: Adding <meta> charset to HTML format tests

Ian Hickson wrote:
> On Mon, 26 Nov 2007, fantasai wrote:
>>> Should we consider adding
>>>     <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
>>> to the HTML version of the tests so that they work when downloaded?
>> This should do it, although I'm not sure if it's the right place to put 
>> the change. Ian, does that look ok to you, or should this be in the 
>> makefile instead?
> 
> I don't understand why this would be in the Makefile. The right way to do 
> this is to manipulate the DOM tree before you serialise the DOM, though, 
> not regular expressions like the way the proposed patch does it.

You think this (attached) is better?

> Personally I'm not convinced we should be supporting non-HTTP variants. 
> The encoding should be in the HTTP headers. We'll almost certainly end up 
> with tests that require CGI scripts on the server side to test particular 
> edge cases (like timing issues).

That's fine for official testing, but it's not uncommon to want to have a
local copy of the tests for development work.

~fantasai
? patch.dfif
Index: treeTools.pm
===================================================================
RCS file: /sources/public/CSS/CSS2.1-test-suite/lib/treeTools.pm,v
retrieving revision 1.2
diff -u -r1.2 treeTools.pm
--- treeTools.pm	9 Aug 2006 05:21:17 -0000	1.2
+++ treeTools.pm	7 Mar 2008 22:11:49 -0000
@@ -142,3 +142,15 @@
         return $node;
     }
 }
+
+sub firstChildElementWithName {
+    my($name) = shift @_;
+    my($parent) = @_;
+    foreach my $child (@{$parent->{childNodes}}) {
+        if (ref $child and $child->{nodeType} eq 'element' and
+            $child->{tagName} eq "{http://www.w3.org/1999/xhtml}$name")
+        {
+            return $child;
+        }
+    }
+}
\ No newline at end of file
Index: format/html4.pm
===================================================================
RCS file: /sources/public/CSS/CSS2.1-test-suite/lib/format/html4.pm,v
retrieving revision 1.1
diff -u -r1.1 html4.pm
--- format/html4.pm	2 Dec 2004 16:06:44 -0000	1.1
+++ format/html4.pm	7 Mar 2008 22:11:49 -0000
@@ -2,6 +2,7 @@
 use strict;
 use utf8;
 use html;
+use treeTools;
 1;
 
 sub output {
@@ -10,8 +11,46 @@
     if (@$namespaces != 1 or $namespaces->[0] ne 'http://www.w3.org/1999/xhtml') {
         return undef; # not an HTML4 test.
     }
+    addEncoding($tree);
     my $output = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN">';
     $output .= "\n";
     $output .= html::treeAsHTML($tree);
+    removeEncoding($tree);
     return $output;
 }
+
+sub addEncoding {
+    my ($tree) = @_;
+    $tree = treeTools::rootElement($tree);
+    if ($tree->{tagName} eq '{http://www.w3.org/1999/xhtml}html') {
+        my $head = treeTools::firstChildElementWithName('head', $tree);
+        if ($head) {
+            my $metaAttrs = {'http-equiv' => 'Content-Type',
+                             'content' => 'text/html; charset=UTF-8'};
+            my $meta = {nodeType => 'element',
+                        tagName => '{http://www.w3.org/1999/xhtml}meta',
+                        localName => 'meta',
+                        namespace => 'http://www.w3.org/1999/xhtml',
+                        attributes => $metaAttrs,
+                        attributesPrefixed => $metaAttrs,
+                        childNodes => []};
+            unshift(@{$head->{childNodes}}, $meta);
+        }
+    }
+}
+
+sub removeEncoding {
+    my ($tree) = @_;
+    $tree = treeTools::rootElement($tree);
+    if ($tree->{tagName} eq '{http://www.w3.org/1999/xhtml}html') {
+        my $head = treeTools::firstChildElementWithName('head', $tree);
+        if ($head) {
+            my $meta = treeTools::firstChildElementWithName('meta', $head);
+            if ($meta and $meta->{attributes}->{'http-equiv'} eq 'Content-Type' and
+                $meta->{attributes}->{'content'} eq 'text/html; charset=UTF-8')
+            {
+                shift(@{$head->{childNodes}});
+            }
+        }
+    }
+}

Received on Friday, 7 March 2008 22:15:12 UTC