html5/spec-author-view patch.spec-splitter.1,1.8,1.9 spec.html,1.1872,1.1873 from Michael Smith via cvs-syncmail on 2012-01-14 (public-html-commits@w3.org from January 2012)

From: Michael Smith via cvs-syncmail <cvsmail@w3.org>
Date: Sat, 14 Jan 2012 11:01:15 +0000
To: public-html-commits@w3.org
Message-Id: <E1Rm1MJ-0001oc-61@lionel-hutz.w3.org>
Update of /sources/public/html5/spec-author-view
In directory hutz:/tmp/cvs-serv6958

Modified Files:
	patch.spec-splitter.1 spec.html 
Log Message:
Make <wbr> less magical. (whatwg r6898)

[updated by splitter]


Index: patch.spec-splitter.1
===================================================================
RCS file: /sources/public/html5/spec-author-view/patch.spec-splitter.1,v
retrieving revision 1.8
retrieving revision 1.9
diff -u -d -r1.8 -r1.9
--- patch.spec-splitter.1	5 Jul 2011 12:00:03 -0000	1.8
+++ patch.spec-splitter.1	14 Jan 2012 11:01:02 -0000	1.9
@@ -1,16 +1,23 @@
 Index: html5-tools/spec-splitter/spec-splitter.py
 ===================================================================
---- html5-tools/spec-splitter/spec-splitter.py	(revision 190)
+--- html5-tools/spec-splitter/spec-splitter.py	(revision 199)
 +++ html5-tools/spec-splitter/spec-splitter.py	(working copy)
-@@ -9,6 +9,7 @@
- w3c = False
+@@ -6,17 +6,23 @@
+ print "HTML5 Spec Splitter"
+ 
+ absolute_uris = False
++w3c = False
  use_html5lib_parser = False
  use_html5lib_serialiser = False
 +make_index_of_terms = False
  file_args = []
  
  for arg in sys.argv[1:]:
-@@ -20,6 +21,8 @@
+     if arg == '--absolute':
+         absolute_uris = True
++    elif arg == '--w3c':
++        w3c = True
+     elif arg == '--html5lib-parser':
          use_html5lib_parser = True
      elif arg == '--html5lib-serialiser':
          use_html5lib_serialiser = True
@@ -19,10 +26,30 @@
      else:
          file_args.append(arg)
  
-@@ -48,17 +51,86 @@
+@@ -26,6 +32,7 @@
+     print
+     print 'Options:'
+     print '  --absolute ............. convert relative URLs to absolute (e.g. for images)'
++    print '  --w3c .................. use W3C variant instead of WHATWG'
+     print '  --html5lib-parser ...... use html5lib parser instead of lxml'
+     print '  --html5lib-serialiser .. use html5lib serialiser instead of lxml'
+     sys.exit()
+@@ -35,24 +42,96 @@
+     import html5lib.serializer
+     import html5lib.treewalkers
+ 
+-index_page = 'index'
++if w3c:
++    index_page = 'Overview'
++else:
++    index_page = 'index'
+ 
+ # The document is split on all <h2> elements, plus the following specific elements
  # (which were chosen to split any pages that were larger than about 100-200KB, and
  # may need to be adjusted as the spec changes):
  split_exceptions = [
+-    'common-microsyntaxes', 'urls', 'fetching-resources', 'common-dom-interfaces', 'namespaces', # <-- infrastructure
+-    'elements', # <-- dom
 +    'the-a-element', 'the-abbr-element', 'the-address-element',
 +    'the-area-element', 'the-article-element', 'the-aside-element',
 +    'the-audio-element', 'the-b-element', 'the-base-element',
@@ -58,7 +85,7 @@
 +    'the-time-element', 'the-title-element', 'the-tr-element',
 +    'the-track-element', 'the-u-element', 'the-ul-element',
 +    'the-var-element', 'the-video-element', 'the-wbr-element',
-+
+ 
 +    'styling',
 +    'usage-summary',
 +    'attributes-common-to-ins-and-del-elements',
@@ -82,9 +109,9 @@
 +    'headings-and-sections',
 +
 +    'dynamic-markup-insertion',
-     'common-microsyntaxes', 'urls', # <-- infrastructure
-     'elements', 'content-models', 'apis-in-html-documents', # <-- dom
- 
++    'common-microsyntaxes', 'urls', # <-- infrastructure
++    'elements', 'content-models', 'apis-in-html-documents', # <-- dom
++
 +    'attributes-common-to-form-controls',
 +    'textFieldSelection',
 +    'constraints',
@@ -93,22 +120,25 @@
 +    'common-idioms-without-dedicated-elements',
 +
      'scripting-1', 'sections', 'grouping-content', 'text-level-semantics', 'edits',
--    'embedded-content-1', 'the-iframe-element', 'video', 'the-canvas-element', 'the-map-element', 'tabular-data',
+-    'embedded-content-1', 'the-iframe-element', 'the-video-element', 'the-canvas-element', 'the-map-element', 'tabular-data',
 -    'forms', 'the-input-element', 'states-of-the-type-attribute', 'number-state', 'common-input-element-attributes', 'the-button-element', 'association-of-controls-and-forms',
+-    'interactive-elements', 'commands', 'common-idioms', 'selectors', # <-- semantics
 +    'embedded-content-1', 'tabular-data',
 +    'forms', 'states-of-the-type-attribute', 'number-state', 'common-input-element-attributes', 'the-button-element', 'association-of-controls-and-forms',
-     'interactive-elements', 'commands', # <-- semantics
++    'interactive-elements', 'commands', # <-- semantics
  
-     'predefined-vocabularies-0', 'converting-html-to-other-formats', # <-- microdata
++    'predefined-vocabularies-0', 'converting-html-to-other-formats', # <-- microdata
      'origin-0', 'timers', 'offline', 'history', 'links', # <-- browsers
 +    'user-prompts',
 +    'system-state-and-capabilities',
      'dnd', # <-- editing
 +    'editing-apis',
  
+-    'workers', 'network', 'web-messaging', 'webstorage',
      'parsing', 'tokenization', 'tree-construction', 'the-end', 'named-character-references', # <-- syntax
  ]
-@@ -78,6 +150,10 @@
+ 
+@@ -71,6 +150,10 @@
  
  doctitle = doc.find('.//title').text
  
@@ -119,7 +149,16 @@
  # Absolutise some references, so the spec can be hosted elsewhere
  if absolute_uris:
      for a in ('href', 'src'):
-@@ -102,14 +178,14 @@
+@@ -87,7 +170,7 @@
+ # Create an empty body, for the page content to be added into later
+ default_body = etree.Element('body')
+ if original_body.get('class'): default_body.set('class', original_body.get('class'))
+-if original_body.get('onload'): default_body.set('onload', 'fixBrokenLink(); %s' % original_body.get('onload'))
++default_body.set('onload', 'fixBrokenLink(); %s' % original_body.get('onload'))
+ original_body.getparent().replace(original_body, default_body)
+ 
+ # Extract the header, so we can reuse it in every page
+@@ -95,14 +178,14 @@
  
  # Make a stripped-down version of it
  short_header = deepcopy(header)
@@ -136,7 +175,23 @@
                  items.append( (depth, c.get('href')[1:], c) )
              elif c.tag == 'ol':
                  extract_toc_items(items, c, depth+1)
-@@ -166,8 +242,8 @@
+@@ -110,10 +193,11 @@
+ extract_toc_items(toc_items, original_body.find('.//ol[@class="toc"]'), 0)
+ 
+ # Prepare the link-fixup script
+-link_fixup_script = etree.XML('<script src="link-fixup.js"/>')
+-doc.find('head')[-1].tail = '\n  '
+-doc.find('head').append(link_fixup_script)
+-link_fixup_script.tail = '\n  '
++if not w3c:
++    link_fixup_script = etree.XML('<script src="link-fixup.js"/>')
++    doc.find('head')[-1].tail = '\n  '
++    doc.find('head').append(link_fixup_script)
++    link_fixup_script.tail = '\n  '
+ 
+ # Stuff for fixing up references:
+ 
+@@ -158,8 +242,8 @@
  # Contents/intro page:
  
  page = deepcopy(doc)
@@ -146,7 +201,40 @@
  
  # Keep copying stuff from the front of the source document into this
  # page, until we find the first heading that isn't class="no-toc"
-@@ -205,8 +281,8 @@
+@@ -172,25 +256,19 @@
+ 
+ # Section/subsection pages:
+ 
+-def first_elm(e):
+-    for c in e.iterchildren(tag=etree.Element):
+-        return c
+-    return None
+-
+ def should_split(e):
+     if e.tag == 'h2': return True
+     if e.get('id') in split_exceptions: return True
+-    # handle wrapping <div>
+-    if e.tag == 'div':
+-        c = first_elm(e)
+-        if c:
+-            if c.tag == 'h2': return True
+-            if c.get('id') in split_exceptions: return True
++    if e.tag == 'div' and e.get('class') == 'impl':
++        c = e.getchildren()
++        if len(c):
++            if c[0].tag == 'h2': return True
++            if c[0].get('id') in split_exceptions: return True
+     return False
+ 
+ def get_heading_text_and_id(e):
+-    if e.tag == 'div':
+-        node = first_elm(e)
++    if e.tag == 'div' and e.get('class') == 'impl':
++        node = e.getchildren()[0]
+     else:
+         node = e
+     title = re.sub('\s+', ' ', etree.tostring(node, method='text').strip())
+@@ -203,8 +281,8 @@
      print '  <%s> %s - %s' % (heading.tag, name, title)
  
      page = deepcopy(doc)
@@ -156,15 +244,20 @@
  
      page.find('//title').text = title + u' \u2014 ' + doctitle
  
-@@ -242,6 +318,7 @@
-         nav = etree.Element('div') # HTML 4 compatibility
-     else:
-         nav = etree.Element('nav')
+@@ -236,7 +314,11 @@
+ 
+     head = doc.find('head')
+ 
+-    nav = etree.Element('nav')
++    if w3c:
++        nav = etree.Element('div') # HTML 4 compatibility
++    else:
++        nav = etree.Element('nav')
 +    nav.set('class', 'prev_next')
      nav.text = '\n   '
      nav.tail = '\n\n  '
  
-@@ -258,7 +335,7 @@
+@@ -253,7 +335,7 @@
      a = etree.XML('<a href="%s.html#contents">Table of contents</a>' % index_page)
      a.tail = '\n  '
      nav.append(a)
@@ -173,7 +266,7 @@
      link.tail = '\n  '
      head.append(link)
  
-@@ -305,6 +382,22 @@
+@@ -300,6 +382,22 @@
  
      doc.find('body').insert(1, nav) # after the header
  
@@ -196,18 +289,22 @@
  report_broken_refs()
  
  print "Outputting..."
-@@ -312,13 +405,10 @@
- # Output all the pages
- for name, doc, title in pages:
-     f = open('%s/%s' % (file_args[1], get_page_filename(name)), 'w')
--    if w3c:
--        f.write('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN">\n')
--    else:
--        f.write('<!DOCTYPE html>\n')
+@@ -310,14 +408,16 @@
      if use_html5lib_serialiser:
          tokens = html5lib.treewalkers.getTreeWalker('lxml')(doc)
          serializer = html5lib.serializer.HTMLSerializer(quote_attr_values=True, inject_meta_charset=False)
 +        f.write("<!doctype html>\n")
          for text in serializer.serialize(tokens, encoding='us-ascii'):
-             if text != '<!DOCTYPE html>': # some versions of lxml emit this; get rid of it if so
-                 f.write(text)
+-            f.write(text)
++            if text != '<!DOCTYPE html>': # some versions of lxml emit this; get rid of it if so
++                f.write(text)
+     else:
+         f.write(etree.tostring(doc, pretty_print=False, method="html"))
+ 
+ # Generate the script to fix broken links
+ f = open('%s/fragment-links.js' % (file_args[1]), 'w')
+-links = ','.join('"%s":"%s"' % (k.replace("\\", "\\\\").replace('"', '\\"'), v) for (k,v) in id_pages.items())
++links = ','.join("'%s':'%s'" % (k.replace("\\", "\\\\").replace("'", "\\'"), v) for (k,v) in id_pages.items())
+ f.write('var fragment_links = { ' + re.sub(r"([^\x20-\x7f])", lambda m: "\\u%04x" % ord(m.group(1)), links) + ' };\n')
+ f.write("""
+ var fragid = window.location.hash.substr(1);

Index: spec.html
===================================================================
RCS file: /sources/public/html5/spec-author-view/spec.html,v
retrieving revision 1.1872
retrieving revision 1.1873
diff -u -d -r1.1872 -r1.1873
--- spec.html	13 Jan 2012 23:57:40 -0000	1.1872
+++ spec.html	14 Jan 2012 11:01:02 -0000	1.1873
@@ -1,66185 +0,0 @@
-<!doctype html>
-<html lang="en-US-x-Hixie"><head><meta content="text/html; charset=utf-8" http-equiv="Content-Type"><title>HTML5: Edition for Web Authors</title><link href="style.css" rel="stylesheet"><link href="http://www.w3.org/StyleSheets/TR/W3C-ED" rel="stylesheet" type="text/css"><script src="link-fixup.js" type="text/javascript"></script><style type="text/css">
-
-   .applies thead th > * { display: block; }
-   .applies thead code { display: block; }
-   .applies tbody th { whitespace: nowrap; }
-   .applies td { text-align: center; }
-   .applies .yes { background: yellow; }
-
-   .matrix, .matrix td { border: hidden; text-align: right; }
-   .matrix { margin-left: 2em; }
[...66154 lines suppressed...]
-  and
-  Øistein E. Andersen,
-
-  for their useful comments, both large and small, that have led to
-  changes to this specification over the years.</p><p>Thanks also to everyone who has ever posted about HTML to their
-  blogs, public mailing lists, or forums, including all the
-  contributors to the <a href="http://www.w3.org/html/wg/lists/">various W3C HTML WG
-  lists</a> and the <a href="http://www.whatwg.org/mailing-list">various WHATWG lists</a>.
-
-  </p><p>Special thanks to Richard Williamson for creating the first
-  implementation of <code><a href="#the-canvas-element" id="canvas_31">canvas</a></code> in Safari, from which the
-  canvas feature was designed.</p><p>Special thanks also to the Microsoft employees who first
-  implemented the event-based drag-and-drop mechanism, <code title="attr-contenteditable"><a href="#attr-contenteditable" id="contenteditable_3">contenteditable</a></code>, and other
-  features first widely deployed by the Windows Internet Explorer
-  browser.</p><p>Thanks to the many sources that provided inspiration for the
-  examples used in the specification.</p><p>Thanks also to the Microsoft blogging community for some ideas,
-  to the attendees of the W3C Workshop on Web Applications and
-  Compound Documents for inspiration, to the #mrt crew, the #mrt.no
-  crew, and the #whatwg crew, and to Pillar and Hedral for their ideas
-  and support.</p></body></html>
Received on Saturday, 14 January 2012 11:01:37 UTC