RE: XSLT script for IMS

Hi Kevin

I've written some code that can do some canonicalization on names and
included it in templateSaxon.xsl - for people who are interested I include a
code fragment below.

There are still problems with canonicalizing names in the files - for
example they use John W. Dower and John Dower, but as Andy said there is a
limit to how much we need to address this problem at the moment. 

Dr Mark H. Butler
Research Scientist                HP Labs Bristol
mark-h_butler@hp.com
Internet: http://www-uk.hpl.hp.com/people/marbut/

<!-- This function canonicalizes names of the form "butler, mark" to
"mark_butler" -->

<xsl:function name="str:orderName">
	<xsl:param name="name"/>
	<xsl:choose>
		<xsl:when test="matches($name,'.*,.*')">
			<xsl:variable name="tokenizedName"
select="tokenize($name,',')"/>
			<xsl:variable name="surname"
select="item-at($tokenizedName,1)"/>
			<xsl:variable name="forename"
select="item-at($tokenizedName,2)"/>
			<xsl:value-of
select="normalize=space(concat($forename,concat(' ',$surname)))"/>
		</xsl:when>
		<xsl:otherwise>
			<xsl:value-of select="normalize-space($name)"/>
		</xsl:otherwise>
	</xsl:choose>
</xsl:function>

<!-- This function does further canonicalization on names to turn them into
URIs
  1. It strips out spaces and colons and replaces them with underscores.
  2. It converts the name to lower-case
-->

<xsl:function name="str:canonicalizeName">
	<xsl:param name="name"/>
	<xsl:variable name="spacefreename"
select="replace(replace(str:orderName($name),': ','_'),' ','_')"/>  
	<xsl:variable
name="lcletters">abcdefghijklmnopqrstuvwxyz</xsl:variable>
	<xsl:variable
name="ucletters">ABCDEFGHIJKLMNOPQRSTUVWXYZ</xsl:variable>
	<xsl:value-of
select="translate($spacefreename,$ucletters,$lcletters)"/>
</xsl:function>

<xsl:template name="contrib">
	<lom:Entity>
		<xsl:attribute
name="rdf:about">&ocw;contributors#<xsl:value-of
select="str:canonicalizeName(Entity)"/></xsl:attribute>
		<vc:FN><xsl:value-of
select="str:orderName(Entity)"/></vc:FN>
    	</lom:Entity>
</xsl:template>

Received on Friday, 24 October 2003 08:15:41 UTC