- From: Alex Muir <alex.g.muir@gmail.com>
- Date: Wed, 20 Jan 2010 13:19:28 +0000
- To: XProc Dev <xproc-dev@w3.org>
- Message-ID: <88b533b91001200519j792a9e48oc382af6f608ba7c6@mail.gmail.com>
Hi,
I have a working xproc script that is loading some html files using xslt and
making a few modifications and then tidying the content before outputting
the files. It works for most of the files but occasionally the xml parsers
in tidy throw an exception which the p:try does not seem to be able to
handle I assume because the exception is launched from the exec. Example
exception output below:
Let me know if I'm doing something wrong.
I'm running Calabash
Thanks Much
<p:declare-step xmlns:p="http://www.w3.org/ns/xproc" xmlns:c="
http://www.w3.org/ns/xproc-step"
xmlns:cx="http://xmlcalabash.com/ns/extensions" name="LoadAndTidy">
<p:input port="source">
<p:document href="blank.xml"/>
</p:input>
<p:output port="result" sequence="true"/>
<p:declare-step type="cx:message">
<p:input port="source"/>
<p:output port="result"/>
<p:option name="message" required="true"/>
</p:declare-step>
<!-- ***** Starting and Ending File Numbers ***** -->
<p:variable name="startingFileNumber" select="'497'"/>
<p:variable name="endingFileNumber" select="'3000'"/>
<!-- source and output folder variables -->
<p:variable name="source-folder" select="'../../2009/'"/>
<p:variable name="output-folder" select="'../../2009/tidy/'"/>
<p:variable name="error-folder" select="'../../2009/tidy/error/'"/>
<p:variable name="exception-folder"
select="'../../2009/tidy/exception/'"/>
<p:directory-list>
<p:with-option name="path" select="$source-folder">
<p:empty/>
</p:with-option>
</p:directory-list>
<p:for-each name="forEachFile">
<p:iteration-source
select="//c:file[position() ge number($startingFileNumber) and
position() le number($endingFileNumber)]"/>
<p:variable name="fileName" select="c:file/@name"/>
<p:variable name="startingIterationPosition"
select="number(p:iteration-position()) +
number($startingFileNumber)-1"/>
<cx:message>
<p:with-option name="message"
select="concat('-----------------------------',
'Iteration-position:',' ', $startingIterationPosition, ' File: ',
$fileName,'-----------------------------')"
/>
</cx:message>
<p:try>
<p:group>
<cx:message>
<p:with-option name="message" select="'######
LoadAndPrepareFileForTidy'"/>
</cx:message>
<p:xslt name="LoadAndPrepareFileForTidy">
<p:input port="source"/>
<p:input port="stylesheet">
<p:document href="../XSLT/LoadAndPrepareFileForTidy.xsl"/>
</p:input>
<p:with-param name="input_uri"
select="concat($source-folder,$fileName)"/>
<p:input port="parameters">
<p:empty/>
</p:input>
</p:xslt>
<cx:message>
<p:with-option name="message" select="'###### Begin Tidy'"/>
</cx:message>
<p:exec command="..\Resources\html-tidy\tidy" source-is-xml="false"
result-is-xml="true"
wrap-result-lines="false">
<p:with-option name="args"
select="concat('-asxml --add-xml-decl yes --quote-nbsp yes
--indent yes --markup yes --show-warnings yes --word-2000 yes --clean yes
--logical-emphasis yes --error-file ',$error-folder,
replace($fileName,'.html',''),'.txt --doctype omit --numeric-entities yes
--output-xml yes --new-blocklevel-tags
unparsed-text,DOCUMENT--new-inline-tags PAGE')"
/>
</p:exec>
<p:unwrap match="c:result"/>
<cx:message>
<p:with-option name="message" select="'###### tidy Complete'"/>
</cx:message>
<p:documentation> Store XML file Output </p:documentation>
<p:identity name="out_file"/>
<p:store name="store">
<p:with-option name="href"
select="replace(replace(concat($output-folder,$fileName,'-',
$startingIterationPosition, '.xml'),'.html',''),' ','')">
<p:pipe step="out_file" port="result"/>
</p:with-option>
</p:store>
<p:documentation> Create result XML </p:documentation>
<p:identity>
<p:input port="source">
<p:pipe step="store" port="result"/>
</p:input>
</p:identity>
</p:group>
<p:catch name="catch">
<p:group name="output">
<p:identity>
<p:input port="source">
<p:pipe port="error" step="catch"/>
</p:input>
</p:identity>
<cx:message>
<p:with-option name="message" select="concat('**************
EXCEPTION: ', $fileName)"/>
</cx:message>
<p:documentation> Store XML file Output </p:documentation>
<p:store name="store">
<p:with-option name="href"
select="replace(replace(concat($exception-folder,
$fileName,'-', $startingIterationPosition, '.xml'),'.html',''),' ','')">
<p:pipe step="catch" port="error"/>
</p:with-option>
</p:store>
<p:documentation> Create result XML </p:documentation>
<p:identity>
<p:input port="source">
<p:pipe step="store" port="result"/>
</p:input>
</p:identity>
</p:group>
</p:catch>
</p:try>
</p:for-each>
<p:documentation>Wrap result XML </p:documentation>
<p:wrap-sequence wrapper="forEachFile"/>
<p:identity/>
</p:declare-step>
Exception Output:
Jan 20, 2010 8:03:19 AM com.xmlcalabash.core.XProcRuntime info
INFO:
file:/C:/Users/alex/XSLT/Project%20Files%20Form%2010-K/XPROC/HTMLTidy.xpl:71:
Exec: ..\Resources\html-tidy\tidy -asxml -
-add-xml-decl yes --quote-nbsp yes --indent yes --markup yes
--show-warnings yes --word-2000 yes --clean yes --logical-emphas
is yes --error-file
../../i4ContentOutput/SEC/10-k/2009/tidy/error/DebtResolveInc_CIK0001106645.txt
--doctype omit --numeric-e
ntities yes --output-xml yes --new-blocklevel-tags
unparsed-text,DOCUMENT,TEXT --new-inline-tags PAGE,TYPE,SEQUENCE,FILENAME,D
ESCRIPTION,FilingAttributes
Error on line 13097 column 61
SXXP0003: Error reported by XML parser: The prefix "font-size" for
attribute
"font-size:9pt" associated with an element type "p" is not bound.
Exception in thread "Thread-799" com.xmlcalabash.core.XProcException: XProc
error err:XD0011
at
com.xmlcalabash.library.Exec$ProcessOutputReader.run(Exec.java:354)
at java.lang.Thread.run(Unknown Source)
Caused by: net.sf.saxon.s9api.SaxonApiException:
org.xml.sax.SAXParseException: The prefix "font-size" for attribute
"font-siz
e:9pt" associated with an element type "p" is not bound.
at
net.sf.saxon.s9api.DocumentBuilder.build(DocumentBuilder.java:290)
at
com.xmlcalabash.library.Exec$ProcessOutputReader.run(Exec.java:352)
... 1 more
Caused by: net.sf.saxon.trans.XPathException: org.xml.sax.SAXParseException:
The prefix "font-size" for attribute "font-size:9
pt" associated with an element type "p" is not bound.
at net.sf.saxon.event.Sender.sendSAXSource(Sender.java:418)
at net.sf.saxon.event.Sender.send(Sender.java:193)
at net.sf.saxon.event.Sender.send(Sender.java:50)
at net.sf.saxon.Configuration.buildDocument(Configuration.java:2973)
at
com.saxonica.validate.SchemaAwareConfiguration.buildDocument(SchemaAwareConfiguration.java:999)
at
net.sf.saxon.s9api.DocumentBuilder.build(DocumentBuilder.java:287)
... 2 more
Caused by: org.xml.sax.SAXParseException: The prefix "font-size" for
attribute "font-size:9pt" associated with an element type
"p" is not bound.
at
com.sun.org.apache.xerces.internal.util.ErrorHandlerWrapper.createSAXParseException(Unknown
Source)
at
com.sun.org.apache.xerces.internal.util.ErrorHandlerWrapper.fatalError(Unknown
Source)
at
com.sun.org.apache.xerces.internal.impl.XMLErrorReporter.reportError(Unknown
Source)
at
com.sun.org.apache.xerces.internal.impl.XMLErrorReporter.reportError(Unknown
Source)
at
com.sun.org.apache.xerces.internal.impl.XMLNSDocumentScannerImpl.scanStartElement(Unknown
Source)
at
com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl$FragmentContentDriver.next(Unknown
Source)
at
com.sun.org.apache.xerces.internal.impl.XMLDocumentScannerImpl.next(Unknown
Source)
at
com.sun.org.apache.xerces.internal.impl.XMLNSDocumentScannerImpl.next(Unknown
Source)
at
com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl.scanDocument(Unknown
Source)
at
com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(Unknown
Source)
at
com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(Unknown
Source)
at
com.sun.org.apache.xerces.internal.parsers.XMLParser.parse(Unknown Source)
at
com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.parse(Unknown
Source)
at
com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl$JAXPSAXParser.parse(Unknown
Source)
at net.sf.saxon.event.Sender.sendSAXSource(Sender.java:404)
... 7 more
--
Alex
https://sites.google.com/a/utg.edu.gm/alex
Received on Wednesday, 20 January 2010 13:20:03 UTC