- From: Michael Smith via cvs-syncmail <cvsmail@w3.org>
- Date: Mon, 16 Feb 2009 10:45:23 +0000
- To: public-html-commits@w3.org
Update of /sources/public/html5/spec/static
In directory hutz:/tmp/cvs-serv17540
Modified Files:
Makefile Overview.html spec-splitter.py
Log Message:
checkpointing WD version
Index: spec-splitter.py
===================================================================
RCS file: /sources/public/html5/spec/static/spec-splitter.py,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -d -r1.2 -r1.3
--- spec-splitter.py 10 Jun 2008 09:25:37 -0000 1.2
+++ spec-splitter.py 16 Feb 2009 10:45:21 -0000 1.3
@@ -1,13 +1,4 @@
-try:
- import psyco
- psyco.full() # make html5lib faster
-except ImportError:
- pass
-
import sys
-import html5lib
-import html5lib.serializer
-import html5lib.treewalkers
import re
from lxml import etree # requires lxml 2.0
from copy import deepcopy
@@ -16,6 +7,8 @@
absolute_uris = False
w3c = False
+use_html5lib_parser = False
+use_html5lib_serialiser = False
file_args = []
for arg in sys.argv[1:]:
@@ -23,6 +16,10 @@
absolute_uris = True
elif arg == '--w3c':
w3c = True
+ elif arg == '--html5lib-parser':
+ use_html5lib_parser = True
+ elif arg == '--html5lib-serialiser':
+ use_html5lib_serialiser = True
else:
file_args.append(arg)
@@ -31,10 +28,17 @@
print '(The directory "multipage" must already exist)'
print
print 'Options:'
- print ' --absolute convert relative URIs to absolute (e.g. for images)'
- print ' --w3c use W3C variant instead of WHATWG'
+ print ' --absolute ............. convert relative URLs to absolute (e.g. for images)'
+ print ' --w3c .................. use W3C variant instead of WHATWG'
+ print ' --html5lib-parser ...... use html5lib parser instead of lxml'
+ print ' --html5lib-serialiser .. use html5lib serialiser instead of lxml'
sys.exit()
+if use_html5lib_parser or use_html5lib_serialiser:
+ import html5lib
+ import html5lib.serializer
+ import html5lib.treewalkers
+
if w3c:
index_page = 'Overview'
else:
@@ -44,17 +48,21 @@
# (which were chosen to split any pages that were larger than about 100-200KB, and
# may need to be adjusted as the spec changes):
split_exceptions = [
- 'offline', 'history', 'structured',
- 'the-root', 'text-level', 'embedded0', 'video', 'the-canvas', 'tabular', 'interactive-elements',
- 'parsing', 'tokenisation', 'tree-construction', 'serializing', 'named',
+ 'text-level-semantics', 'embedded-content-0', 'video', 'the-canvas-element', 'tabular-data', 'forms', 'interactive-elements',
+ 'offline', 'history', 'structured-client-side-storage',
+ 'parsing', 'tokenization', 'tree-construction', 'serializing-html-fragments', 'named-character-references',
]
print "Parsing..."
# Parse document
-parser = html5lib.html5parser.HTMLParser(tree = html5lib.treebuilders.getTreeBuilder('lxml'))
-doc = parser.parse(open(file_args[0]), encoding='utf-8')
+if use_html5lib_parser:
+ parser = html5lib.html5parser.HTMLParser(tree = html5lib.treebuilders.getTreeBuilder('lxml'))
+ doc = parser.parse(open(file_args[0]), encoding='utf-8')
+else:
+ parser = etree.HTMLParser(encoding='utf-8', recover=False)
+ doc = etree.parse(open(file_args[0]), parser)
print "Splitting..."
@@ -86,7 +94,7 @@
# Prepare the link-fixup script
if not w3c:
- link_fixup_script = etree.XML('<script src="fragment-links.js"/>')
+ link_fixup_script = etree.XML('<script src="link-fixup.js"/>')
doc.find('head')[-1].tail = '\n '
doc.find('head').append(link_fixup_script)
link_fixup_script.tail = '\n '
@@ -105,7 +113,7 @@
id_pages[e.get('id')] = page
# Updates all the href="#id" to point to page#id
-missing_warnings = []
+missing_warnings = set()
def fix_refs(page, node):
for e in node.findall('.//a[@href]'):
if e.get('href')[0] == '#':
@@ -114,9 +122,11 @@
if id_pages[id] != page: # only do non-local links
e.set('href', '%s#%s' % (get_page_filename(id_pages[id]), id))
else:
- if id not in missing_warnings:
- print "warning: can't find target for #%s" % id
- missing_warnings.append(id)
+ missing_warnings.add(id)
+
+def report_broken_refs():
+ for id in sorted(missing_warnings):
+ print "warning: can't find target for #%s" % id
pages = [] # for saving all the output, so fix_refs can be called in a second pass
@@ -147,7 +157,7 @@
title = getNodeText(heading)
name = heading.get('id')
if name == index_page: name = 'section-%s' % name
- print ' %s' % name
+ print ' <%s> %s' % (heading.tag, name)
page = deepcopy(doc)
page_body = page.find('body')
@@ -219,6 +229,8 @@
doc.find('body').insert(1, nav) # after the header
+report_broken_refs()
+
print "Outputting..."
# Output all the pages
@@ -228,14 +240,17 @@
f.write('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN">\n')
else:
f.write('<!DOCTYPE HTML>\n')
- tokens = html5lib.treewalkers.getTreeWalker('lxml')(doc)
- serializer = html5lib.serializer.HTMLSerializer(quote_attr_values=True, inject_meta_charset=False)
- for text in serializer.serialize(tokens, encoding='us-ascii'):
- f.write(text)
+ if use_html5lib_serialiser:
+ tokens = html5lib.treewalkers.getTreeWalker('lxml')(doc)
+ serializer = html5lib.serializer.HTMLSerializer(quote_attr_values=True, inject_meta_charset=False)
+ for text in serializer.serialize(tokens, encoding='us-ascii'):
+ f.write(text)
+ else:
+ f.write(etree.tostring(doc, pretty_print=False, method="html"))
# Generate the script to fix broken links
f = open('%s/fragment-links.js' % (file_args[1]), 'w')
-f.write('var fragment_links = { ' + ','.join("'%s':'%s'" % (k,v) for (k,v) in id_pages.items()) + ' };\n')
+f.write('var fragment_links = { ' + ','.join("'%s':'%s'" % (k.replace("\\", "\\\\").replace("'", "\\'"), v) for (k,v) in id_pages.items()) + ' };\n')
f.write("""
var fragid = window.location.hash.substr(1);
if (!fragid) { /* handle section-foo.html links from the old multipage version, and broken foo.html from the new version */
Index: Overview.html
===================================================================
RCS file: /sources/public/html5/spec/static/Overview.html,v
retrieving revision 1.7
retrieving revision 1.8
diff -u -d -r1.7 -r1.8
--- Overview.html 10 Jun 2008 16:44:07 -0000 1.7
+++ Overview.html 16 Feb 2009 10:45:21 -0000 1.8
@@ -1,6389 +1,5735 @@
-<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN">
-<!-- when publishing, change bits marked ZZZ -->
-
-<html lang=en-US>
- <head>
- <title>HTML 5</title>
-
- <style type="text/css">
- dt, dfn { font-weight: bold; font-style: normal; }
- img.extra { float: right; }
- body ins, body del { display: block; }
[...88651 lines suppressed...]
+ and "base-sixteen", the order that 0-9 A-Z a-z is mentioned,
+ and the detail to which the spec explains how to interpret a
+ string as a hexadecimal number.
+ XXX * expose the form data set, either as an object (on which one can
+ invoke the JSON serialiser), or in the form of a method on
+ HTMLFormElement that returns the form data set serialised
+ according to a particular encoding (defaulting to the form's
+ enctype="" one, probably). This would allow forms to be used
+ with XHR-like systems without having to manually construct the
+ form data set the way that is done today.
+ XXX * placeholder="" for <textarea>, e.g. as seen on:
+ http://code.google.com/p/support/issues/detail?id=1#makechanges
+ XXX * become more consistent about what markup we use to mark up
+ productions (nothing? <i>? <code>?)
+ XXX * expose the value of a radio button group
+ - either on the NodeList returned by HTMLFormControlCollection
+ - or on the radio button itself
+ - or both, so it works even when the form controls have names
+ that vary more than HTMLFormControlCollection allows?
-->
Index: Makefile
===================================================================
RCS file: /sources/public/html5/spec/static/Makefile,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -d -r1.2 -r1.3
--- Makefile 10 Jun 2008 12:56:19 -0000 1.2
+++ Makefile 16 Feb 2009 10:45:21 -0000 1.3
@@ -26,12 +26,19 @@
all: images multipage/fragment-links.js multipage/images
-release: all valid check
+release: all valid CHECKLOG single-page/Overview.html
multipage/fragment-links.js: Overview.html
-mkdir multipage
$(PYTHON) $(PYTHONFLAGS) $(SPLITTER) $(SPLITTERFLAGS) $< multipage
+multipage/single-page/Overview.html: Overview.html multipage/single-page/images
+ cp -p $< $@
+
+multipage/single-page/images: images
+ -mkdir multipage/single-page
+ cp -pR $< $@
+
valid: Overview.html
$(CURL) $(CURLFLAGS) $(VNUFLAGS) -F doc=@$< $(VNU)
@@ -54,11 +61,14 @@
images: ../images
-mkdir images
cp -pR $</*.png $@
+ cp -pR $</*.svg $@
multipage/images: ../images
-mkdir -p multipage/images
cp -pR $</*.png $@
+ cp -pR $</*.svg $@
clean:
$(RM) -r multipage
$(RM) -r images
+ $(RM) CHECKLOG
Received on Monday, 16 February 2009 10:45:48 UTC