- From: Michael Smith via cvs-syncmail <cvsmail@w3.org>
- Date: Mon, 16 Feb 2009 10:45:23 +0000
- To: public-html-commits@w3.org
Update of /sources/public/html5/spec/static In directory hutz:/tmp/cvs-serv17540 Modified Files: Makefile Overview.html spec-splitter.py Log Message: checkpointing WD version Index: spec-splitter.py =================================================================== RCS file: /sources/public/html5/spec/static/spec-splitter.py,v retrieving revision 1.2 retrieving revision 1.3 diff -u -d -r1.2 -r1.3 --- spec-splitter.py 10 Jun 2008 09:25:37 -0000 1.2 +++ spec-splitter.py 16 Feb 2009 10:45:21 -0000 1.3 @@ -1,13 +1,4 @@ -try: - import psyco - psyco.full() # make html5lib faster -except ImportError: - pass - import sys -import html5lib -import html5lib.serializer -import html5lib.treewalkers import re from lxml import etree # requires lxml 2.0 from copy import deepcopy @@ -16,6 +7,8 @@ absolute_uris = False w3c = False +use_html5lib_parser = False +use_html5lib_serialiser = False file_args = [] for arg in sys.argv[1:]: @@ -23,6 +16,10 @@ absolute_uris = True elif arg == '--w3c': w3c = True + elif arg == '--html5lib-parser': + use_html5lib_parser = True + elif arg == '--html5lib-serialiser': + use_html5lib_serialiser = True else: file_args.append(arg) @@ -31,10 +28,17 @@ print '(The directory "multipage" must already exist)' print print 'Options:' - print ' --absolute convert relative URIs to absolute (e.g. for images)' - print ' --w3c use W3C variant instead of WHATWG' + print ' --absolute ............. convert relative URLs to absolute (e.g. for images)' + print ' --w3c .................. use W3C variant instead of WHATWG' + print ' --html5lib-parser ...... use html5lib parser instead of lxml' + print ' --html5lib-serialiser .. use html5lib serialiser instead of lxml' sys.exit() +if use_html5lib_parser or use_html5lib_serialiser: + import html5lib + import html5lib.serializer + import html5lib.treewalkers + if w3c: index_page = 'Overview' else: @@ -44,17 +48,21 @@ # (which were chosen to split any pages that were larger than about 100-200KB, and # may need to be adjusted as the spec changes): split_exceptions = [ - 'offline', 'history', 'structured', - 'the-root', 'text-level', 'embedded0', 'video', 'the-canvas', 'tabular', 'interactive-elements', - 'parsing', 'tokenisation', 'tree-construction', 'serializing', 'named', + 'text-level-semantics', 'embedded-content-0', 'video', 'the-canvas-element', 'tabular-data', 'forms', 'interactive-elements', + 'offline', 'history', 'structured-client-side-storage', + 'parsing', 'tokenization', 'tree-construction', 'serializing-html-fragments', 'named-character-references', ] print "Parsing..." # Parse document -parser = html5lib.html5parser.HTMLParser(tree = html5lib.treebuilders.getTreeBuilder('lxml')) -doc = parser.parse(open(file_args[0]), encoding='utf-8') +if use_html5lib_parser: + parser = html5lib.html5parser.HTMLParser(tree = html5lib.treebuilders.getTreeBuilder('lxml')) + doc = parser.parse(open(file_args[0]), encoding='utf-8') +else: + parser = etree.HTMLParser(encoding='utf-8', recover=False) + doc = etree.parse(open(file_args[0]), parser) print "Splitting..." @@ -86,7 +94,7 @@ # Prepare the link-fixup script if not w3c: - link_fixup_script = etree.XML('<script src="fragment-links.js"/>') + link_fixup_script = etree.XML('<script src="link-fixup.js"/>') doc.find('head')[-1].tail = '\n ' doc.find('head').append(link_fixup_script) link_fixup_script.tail = '\n ' @@ -105,7 +113,7 @@ id_pages[e.get('id')] = page # Updates all the href="#id" to point to page#id -missing_warnings = [] +missing_warnings = set() def fix_refs(page, node): for e in node.findall('.//a[@href]'): if e.get('href')[0] == '#': @@ -114,9 +122,11 @@ if id_pages[id] != page: # only do non-local links e.set('href', '%s#%s' % (get_page_filename(id_pages[id]), id)) else: - if id not in missing_warnings: - print "warning: can't find target for #%s" % id - missing_warnings.append(id) + missing_warnings.add(id) + +def report_broken_refs(): + for id in sorted(missing_warnings): + print "warning: can't find target for #%s" % id pages = [] # for saving all the output, so fix_refs can be called in a second pass @@ -147,7 +157,7 @@ title = getNodeText(heading) name = heading.get('id') if name == index_page: name = 'section-%s' % name - print ' %s' % name + print ' <%s> %s' % (heading.tag, name) page = deepcopy(doc) page_body = page.find('body') @@ -219,6 +229,8 @@ doc.find('body').insert(1, nav) # after the header +report_broken_refs() + print "Outputting..." # Output all the pages @@ -228,14 +240,17 @@ f.write('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN">\n') else: f.write('<!DOCTYPE HTML>\n') - tokens = html5lib.treewalkers.getTreeWalker('lxml')(doc) - serializer = html5lib.serializer.HTMLSerializer(quote_attr_values=True, inject_meta_charset=False) - for text in serializer.serialize(tokens, encoding='us-ascii'): - f.write(text) + if use_html5lib_serialiser: + tokens = html5lib.treewalkers.getTreeWalker('lxml')(doc) + serializer = html5lib.serializer.HTMLSerializer(quote_attr_values=True, inject_meta_charset=False) + for text in serializer.serialize(tokens, encoding='us-ascii'): + f.write(text) + else: + f.write(etree.tostring(doc, pretty_print=False, method="html")) # Generate the script to fix broken links f = open('%s/fragment-links.js' % (file_args[1]), 'w') -f.write('var fragment_links = { ' + ','.join("'%s':'%s'" % (k,v) for (k,v) in id_pages.items()) + ' };\n') +f.write('var fragment_links = { ' + ','.join("'%s':'%s'" % (k.replace("\\", "\\\\").replace("'", "\\'"), v) for (k,v) in id_pages.items()) + ' };\n') f.write(""" var fragid = window.location.hash.substr(1); if (!fragid) { /* handle section-foo.html links from the old multipage version, and broken foo.html from the new version */ Index: Overview.html =================================================================== RCS file: /sources/public/html5/spec/static/Overview.html,v retrieving revision 1.7 retrieving revision 1.8 diff -u -d -r1.7 -r1.8 --- Overview.html 10 Jun 2008 16:44:07 -0000 1.7 +++ Overview.html 16 Feb 2009 10:45:21 -0000 1.8 @@ -1,6389 +1,5735 @@ -<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"> -<!-- when publishing, change bits marked ZZZ --> - -<html lang=en-US> - <head> - <title>HTML 5</title> - - <style type="text/css"> - dt, dfn { font-weight: bold; font-style: normal; } - img.extra { float: right; } - body ins, body del { display: block; } [...88651 lines suppressed...] + and "base-sixteen", the order that 0-9 A-Z a-z is mentioned, + and the detail to which the spec explains how to interpret a + string as a hexadecimal number. + XXX * expose the form data set, either as an object (on which one can + invoke the JSON serialiser), or in the form of a method on + HTMLFormElement that returns the form data set serialised + according to a particular encoding (defaulting to the form's + enctype="" one, probably). This would allow forms to be used + with XHR-like systems without having to manually construct the + form data set the way that is done today. + XXX * placeholder="" for <textarea>, e.g. as seen on: + http://code.google.com/p/support/issues/detail?id=1#makechanges + XXX * become more consistent about what markup we use to mark up + productions (nothing? <i>? <code>?) + XXX * expose the value of a radio button group + - either on the NodeList returned by HTMLFormControlCollection + - or on the radio button itself + - or both, so it works even when the form controls have names + that vary more than HTMLFormControlCollection allows? --> Index: Makefile =================================================================== RCS file: /sources/public/html5/spec/static/Makefile,v retrieving revision 1.2 retrieving revision 1.3 diff -u -d -r1.2 -r1.3 --- Makefile 10 Jun 2008 12:56:19 -0000 1.2 +++ Makefile 16 Feb 2009 10:45:21 -0000 1.3 @@ -26,12 +26,19 @@ all: images multipage/fragment-links.js multipage/images -release: all valid check +release: all valid CHECKLOG single-page/Overview.html multipage/fragment-links.js: Overview.html -mkdir multipage $(PYTHON) $(PYTHONFLAGS) $(SPLITTER) $(SPLITTERFLAGS) $< multipage +multipage/single-page/Overview.html: Overview.html multipage/single-page/images + cp -p $< $@ + +multipage/single-page/images: images + -mkdir multipage/single-page + cp -pR $< $@ + valid: Overview.html $(CURL) $(CURLFLAGS) $(VNUFLAGS) -F doc=@$< $(VNU) @@ -54,11 +61,14 @@ images: ../images -mkdir images cp -pR $</*.png $@ + cp -pR $</*.svg $@ multipage/images: ../images -mkdir -p multipage/images cp -pR $</*.png $@ + cp -pR $</*.svg $@ clean: $(RM) -r multipage $(RM) -r images + $(RM) CHECKLOG
Received on Monday, 16 February 2009 10:45:48 UTC