html5/spec-author-view patch.anolis,1.1,1.2

Update of /sources/public/html5/spec-author-view
In directory hutz:/tmp/cvs-serv21531

Modified Files:
	patch.anolis 
Log Message:
added mechanism for generating an inndex of terms; checkpointing

Index: patch.anolis
===================================================================
RCS file: /sources/public/html5/spec-author-view/patch.anolis,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- patch.anolis	10 Aug 2010 10:45:42 -0000	1.1
+++ patch.anolis	15 Aug 2010 22:55:06 -0000	1.2
@@ -1,6 +1,30 @@
+diff -r 16550726fd0d anolis
+--- anolis	Sun Aug 30 16:53:19 2009 -0500
++++ anolis	Sun Aug 15 20:17:47 2010 +0900
+@@ -206,7 +206,7 @@
+                       dest="output_encoding", help="Output encoding")
+ 
+     parser.set_defaults(
+-        processes=["filter", "sub", "toc", "xref", "annotate"],
++        processes=["filter", "sub", "toc", "xref", "annotate", "terms"],
+         parser="html5lib",
+         serializer="html5lib",
+         newline_char=u"\n",
+diff -r 16550726fd0d anolislib/generator.py
+--- anolis/anolislib/generator.py	Sun Aug 30 16:53:19 2009 -0500
++++ anolis/anolislib/generator.py	Sun Aug 15 20:17:47 2010 +0900
+@@ -28,7 +28,7 @@
+ from lxml import etree
+ 
+ 
+-def process(tree, processes=["sub", "toc", "xref"], **kwargs):
++def process(tree, processes=["sub", "toc", "xref", "terms"], **kwargs):
+     """ Process the given tree. """
+ 
+     # Find number of passes to do
 diff -r 16550726fd0d anolislib/processes/filter.py
 --- anolis/anolislib/processes/filter.py	Sun Aug 30 16:53:19 2009 -0500
-+++ anolis/anolislib/processes/filter.py	Tue Aug 10 19:18:46 2010 +0900
++++ anolis/anolislib/processes/filter.py	Sun Aug 15 20:17:47 2010 +0900
 @@ -5,23 +5,4 @@
          return
      selector = cssselect.CSSSelector(kwargs["filter"])
@@ -26,34 +50,270 @@
 -
 -        
 +        element.drop_tree()
-diff -r 16550726fd0d anolislib/processes/xref.py
---- anolis/anolislib/processes/xref.py	Sun Aug 30 16:53:19 2009 -0500
-+++ anolis/anolislib/processes/xref.py	Tue Aug 10 19:18:46 2010 +0900
-@@ -25,7 +25,7 @@
- 
- from anolislib import utils
- 
--instance_elements = frozenset([u"span", u"abbr", u"code", u"var", u"i"])
-+instance_elements = frozenset([u"a", u"span", u"abbr", u"code", u"var", u"i"])
- w3c_instance_elements = frozenset([u"abbr", u"acronym", u"b", u"bdo", u"big",
-                                    u"code", u"del", u"em", u"i", u"ins",
-                                    u"kbd", u"label", u"legend", u"q", u"samp",
-@@ -96,13 +96,17 @@
-                                 break
- 
-                     if goodParentingAndChildren:
--                        if element.tag == u"span":
-+                        id = utils.generateID(element, **kwargs)
-+                        if element.tag == u"span"\
-+                           or element.tag == u"a":
-                             element.tag = u"a"
-                             element.set(u"href", u"#" + self.dfns[term])
-+                            element.set(u"id", id)
-                         else:
-                             link = etree.Element(u"a",
-                                                  {u"href":
-                                                   u"#" + self.dfns[term]})
-+                            link.set(u"id", id)
-                             if w3c_compat or w3c_compat_xref_a_placement:
-                                 for node in element:
-                                     link.append(node)
+diff -r 16550726fd0d anolislib/processes/terms.py
+--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
++++ anolis/anolislib/processes/terms.py	Sun Aug 15 20:17:47 2010 +0900
+@@ -0,0 +1,263 @@
++# coding=UTF-8
++# Copyright (c) 2010 Michael(tm) Smith
++#
++# Permission is hereby granted, free of charge, to any person obtaining a copy
++# of this software and associated documentation files (the "Software"), to deal
++# in the Software without restriction, including without limitation the rights
++# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
++# copies of the Software, and to permit persons to whom the Software is
++# furnished to do so, subject to the following conditions:
++#
++# The above copyright notice and this permission notice shall be included in
++# all copies or substantial portions of the Software.
++#
++# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
++# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
++# THE SOFTWARE.
++
++import re
++
++from lxml import etree
++from copy import deepcopy
++
++from anolislib import utils
++
++class terms(object):
++    """Build and add an index of terms."""
++
++    terms = None
++
++    def __init__(self, ElementTree, **kwargs):
++        self.terms = etree.Element(u"div")
++        self.buildTerms(ElementTree, **kwargs)
++        self.addTerms(ElementTree, **kwargs)
++
++    def buildTerms(self, ElementTree, w3c_compat=False, **kwargs):
++        self.terms.text = "\n"
++        # make a list of all the defining instances of "terms" in the document
++        # -- <dfn> elements
++        dfnList = ElementTree.findall("//dfn")
++        if dfnList:
++            # sort the list of <dfn> terms by the lowercase value of the DOM
++            # textContent of the <dfn> element (concantentation of the <dfn>
++            # text nodes and that of any of its descendant elements)
++            dfnList.sort(key=lambda dfn: dfn.text_content().lower())
++            for dfn in dfnList:
++                # we don't need the tail, so copy the <dfn> and drop the tail
++                term = deepcopy(dfn)
++                term.tail = None
++                termID = None
++                if dfn.get("id"):
++                    # if this <dfn> itself has an id, we'll us it as part of the
++                    # id on the index entry for this term
++                    termID = dfn.get("id")
++                elif dfn.getparent().get("id"):
++                    # if this <dfn> itself has no id, use the id of its parent
++                    # node as the id on the index entry for this term
++                    termID = dfn.getparent().get("id")
++                # if we found an id, then create an index entry for this <dfn>
++                # term; otherwise, do nothing further
++                if termID:
++                    indexEntry = etree.Element(u"dl",{u"id": termID+"_index"})
++                    indexEntry.text = "\n"
++                    # termName is the name of the term as it appears in the index
++                    termName = etree.Element(u"dt")
++                    # textContent of the DOM textContent of this <dfn> element
++                    textContent = dfn.text_content()
++                    # normalize the text content of each <dfn> in the document
++                    # and the normalize the text content of this <dfn>, then
++                    # do a case-insensitive comparison of them and count how
++                    # many matches we have
++                    expr = "count(//dfn\
++                            [normalize-space(translate(.,'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'))\
++                            =normalize-space(translate($content,'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'))])"
++                    if ElementTree.xpath(expr, content = textContent) > 1:
++                        # we have more than one <dfn> in the document whose
++                        # content is a case-insensitive match for the
++                        # textContent of this <dfn>; so, we qualify the name of
++                        # each such term in the index listing by appending the
++                        # id of the <dfn> to it
++                        termName.text = textContent+" "
++                        indexEntryID = etree.Element(u"span",{u"class": u"index-id", u"title": u""})
++                        indexEntryID.text = "("+termID+")"
++                        indexEntryID.tail = " "
++                        termName.append(indexEntryID)
++                    else:
++                        # otherwise, the textContent of this <dfn> is unique
++                        # among the <dfn>s in this document, so we just use the
++                        # textContent as the name of the term
++                        termName.text = textContent+" "
++                    indexEntry.append(termName)
++                    # add a hyperlink back to the <dfn> that is the defining
++                    # instance of the term
++                    dfnLink = etree.Element(u"a", {u"href": "#"+termID, u"class": "dfn-ref"})
++                    dfnLink.text = u"\u203B"
++                    termName.append(dfnLink)
++                    termName.tail = "\n"
++                    # #########################################################
++                    # make a list of all the instances of terms in the document
++                    # that are hyperlinked references back to the <dfn> term
++                    # that is the defining instance of the term
++                    instanceList = ElementTree.xpath("//a[substring-after(@href,'#') = $targetID]", targetID = termID)
++                    # if we found any instances of hyperlinked references to
++                    # this <dfn> term, then we process the list
++                    if instanceList:
++                        instanceItem = None
++                        lastLinkToHeading = None
++                        lastInstanceItem = None
++                        for instance in instanceList:
++                            # each of these term instances is an <a> hyperlink
++                            # without an id attributes, but we need each of
++                            # these <a> instance hyperlinks to have an id
++                            # attribute so that we can link back to it from the
++                            # index of terms; so we create an id for each
++                            instanceID = utils.generateID(instance, **kwargs)
++                            instance.set(u"id",instanceID)
++                            # make a copy of the node of the h1-h6 heading for the
++                            # section that contains this instance hyperlink
++                            linkToHeading = deepcopy(self.getAncestorHeading(instance))
++                            # some headings may have id attributes, but we don't
++                            # want the id attribute, so drop it if we find one
++                            if "id" in linkToHeading.attrib:
++                                del linkToHeading.attrib["id"]
++                            # some headings may contain descendants that are <a>
++                            # links, and/or that have id attributeds
++                            embeddedLinks = linkToHeading.xpath(".//*[@href or @id]")
++                            # because we later transform the copy of this
++                            # heading itself into a hyperlink, it can't
++                            # contain descendant links; so, we un-linkify
++                            # any elements that have @href attributes
++                            # by turning it into an @href-less <span>
++                            for element in embeddedLinks:
++                                if "href" in element.attrib:
++                                    del element.attrib["href"]
++                                    element.tag = "span"
++                                # this might be an <a> element that we added an
++                                # id attribute to earlier and/or maybe be a
++                                # <dfn> that already had an id attribute; but we
++                                # don't want to copy the id attributes here, so
++                                # drop any id attribute we find
++                                if "id" in element.attrib:
++                                    del element.attrib["id"]
++                            # if this heading is not the same as one that we've
++                            # already added to the index entry for this term,
++                            # then process the heading
++                            if lastLinkToHeading is None or linkToHeading.text_content() != lastLinkToHeading.text_content():
++                                instanceItem = etree.Element(u"dd")
++                                instanceItem.text = "\n"
++                                lastLinkToHeading = linkToHeading
++                                n = 1
++                                # change this copy from being an h1-h6 node, to
++                                # just being an <a> hyperlink
++                                linkToHeading.tag = "a"
++                                # make this item link back to the actual place
++                                # in the document where we found this particular
++                                # instance of the term; we use the value of the
++                                # id attribute that we added to the instance earlier
++                                linkToHeading.set(u"href","#"+instanceID)
++                                # we wait to add the item for the previous
++                                # instance at this point because we need to
++                                # delay adding in order to see if for this
++                                # instance there are multiple references to the
++                                # same ancestor heading (if there are, we append
++                                # link numbers to them, instead of repeating the
++                                # heading; see below)
++                                if lastInstanceItem is not None:
++                                    #print(etree.tostring(lastInstanceItem,method="text"))
++                                    indexEntry.append(lastInstanceItem)
++                                lastInstanceItem = instanceItem
++                                linkToHeading.tail = "\n"
++                                instanceItem.append(linkToHeading)
++                                instanceItem.tail = "\n"
++                            # otherwise, this heading is the same as one that
++                            # we've already added to the index entry for this
++                            # term; so instead of reprocessing the heading, we
++                            # just append one or more link numbers to it
++                            else:
++                                n += 1
++                                counterLink = etree.Element(u"a",{u"href": instanceID, u"class": "index-counter"})
++                                counterLink.text = "("+str(n)+")"
++                                counterLink.tail = "\n"
++                                instanceItem.append(counterLink)
++                            # if the value of our n counter is still at 1 at
++                            # this point, it means the document contains only
++                            # one instance of a reference this term, so we need
++                            # to add that instance now
++                            if n == 1:
++                                indexEntry.append(instanceItem)
++                    # otherwise, the document contains no hyperlinked references
++                    # to this term at all, so we just add a note to indicate that
++                    else:
++                        noRefsNote = etree.Element(u"dd",{u"class": "index-norefs"})
++                        noRefsNote.text = "No references in this file."
++                        indexEntry.append(noRefsNote)
++                    self.terms.append(indexEntry)
++                    indexEntry.tail = "\n"
++        self.terms.tail = "\n"
++
++    def getAncestorHeading(self, descendantNode):
++        """ Given a node, return the node of the heading for the section that contains it."""
++        node = descendantNode
++        while (node is not None):
++            if isinstance(node.tag,str) and re.match("^[hH][1-6]$",node.tag):
++                return node
++            elif node.getprevious() == None:
++                node = node.getparent()
++            else:
++                node = node.getprevious()
++                if isinstance(node.tag,str) and node.get("class") == "impl":
++                    node = xpath("($thisnode/node())[last()]", thisNode = node)
++        return None
++
++    def addTerms(self, ElementTree, **kwargs):
++        to_remove = set()
++        in_terms = False
++        for node in ElementTree.iter():
++            if in_terms:
++                if node.tag is etree.Comment and \
++                   node.text.strip(utils.spaceCharacters) == u"end-index-terms":
++                    if node.getparent() is not terms_parent:
++                        raise DifferentParentException(u"begin-index-terms and end-index-terms have different parents")
++                    in_terms = False
++                else:
++                    to_remove.add(node)
++            elif node.tag is etree.Comment:
++                if node.text.strip(utils.spaceCharacters) == u"begin-index-terms":
++                    terms_parent = node.getparent()
++                    in_terms = True
++                    node.tail = None
++                    node.addnext(deepcopy(self.terms))
++                    self.indentNode(node.getnext(), 0, **kwargs)
++                elif node.text.strip(utils.spaceCharacters) == u"index-terms":
++                    node.addprevious(etree.Comment(u"begin-index-terms"))
++                    self.indentNode(node.getprevious(), 0, **kwargs)
++                    node.addprevious(deepcopy(self.terms))
++                    self.indentNode(node.getprevious(), 0, **kwargs)
++                    node.addprevious(etree.Comment(u"end-index-terms"))
++                    self.indentNode(node.getprevious(), 0, **kwargs)
++                    node.getprevious().tail = node.tail
++                    to_remove.add(node)
++        for node in to_remove:
++            node.getparent().remove(node)
++
++    def indentNode(self, node, indent=0, newline_char=u"\n", indent_char=u" ",
++                   **kwargs):
++        whitespace = newline_char + indent_char * indent
++        if node.getprevious() is not None:
++            if node.getprevious().tail is None:
++                node.getprevious().tail = whitespace
++            else:
++                node.getprevious().tail += whitespace
++        else:
++            if node.getparent().text is None:
++                node.getparent().text = whitespace
++            else:
++                node.getparent().text += whitespace
++
++class DifferentParentException(utils.AnolisException):
++    """begin-index-terms and end-index-terms do not have the same parent."""
++    pass

Received on Sunday, 15 August 2010 22:55:09 UTC