- From: Alex Muir <alex.g.muir@gmail.com>
- Date: Wed, 20 Jan 2010 13:19:28 +0000
- To: XProc Dev <xproc-dev@w3.org>
- Message-ID: <88b533b91001200519j792a9e48oc382af6f608ba7c6@mail.gmail.com>
Hi, I have a working xproc script that is loading some html files using xslt and making a few modifications and then tidying the content before outputting the files. It works for most of the files but occasionally the xml parsers in tidy throw an exception which the p:try does not seem to be able to handle I assume because the exception is launched from the exec. Example exception output below: Let me know if I'm doing something wrong. I'm running Calabash Thanks Much <p:declare-step xmlns:p="http://www.w3.org/ns/xproc" xmlns:c=" http://www.w3.org/ns/xproc-step" xmlns:cx="http://xmlcalabash.com/ns/extensions" name="LoadAndTidy"> <p:input port="source"> <p:document href="blank.xml"/> </p:input> <p:output port="result" sequence="true"/> <p:declare-step type="cx:message"> <p:input port="source"/> <p:output port="result"/> <p:option name="message" required="true"/> </p:declare-step> <!-- ***** Starting and Ending File Numbers ***** --> <p:variable name="startingFileNumber" select="'497'"/> <p:variable name="endingFileNumber" select="'3000'"/> <!-- source and output folder variables --> <p:variable name="source-folder" select="'../../2009/'"/> <p:variable name="output-folder" select="'../../2009/tidy/'"/> <p:variable name="error-folder" select="'../../2009/tidy/error/'"/> <p:variable name="exception-folder" select="'../../2009/tidy/exception/'"/> <p:directory-list> <p:with-option name="path" select="$source-folder"> <p:empty/> </p:with-option> </p:directory-list> <p:for-each name="forEachFile"> <p:iteration-source select="//c:file[position() ge number($startingFileNumber) and position() le number($endingFileNumber)]"/> <p:variable name="fileName" select="c:file/@name"/> <p:variable name="startingIterationPosition" select="number(p:iteration-position()) + number($startingFileNumber)-1"/> <cx:message> <p:with-option name="message" select="concat('-----------------------------', 'Iteration-position:',' ', $startingIterationPosition, ' File: ', $fileName,'-----------------------------')" /> </cx:message> <p:try> <p:group> <cx:message> <p:with-option name="message" select="'###### LoadAndPrepareFileForTidy'"/> </cx:message> <p:xslt name="LoadAndPrepareFileForTidy"> <p:input port="source"/> <p:input port="stylesheet"> <p:document href="../XSLT/LoadAndPrepareFileForTidy.xsl"/> </p:input> <p:with-param name="input_uri" select="concat($source-folder,$fileName)"/> <p:input port="parameters"> <p:empty/> </p:input> </p:xslt> <cx:message> <p:with-option name="message" select="'###### Begin Tidy'"/> </cx:message> <p:exec command="..\Resources\html-tidy\tidy" source-is-xml="false" result-is-xml="true" wrap-result-lines="false"> <p:with-option name="args" select="concat('-asxml --add-xml-decl yes --quote-nbsp yes --indent yes --markup yes --show-warnings yes --word-2000 yes --clean yes --logical-emphasis yes --error-file ',$error-folder, replace($fileName,'.html',''),'.txt --doctype omit --numeric-entities yes --output-xml yes --new-blocklevel-tags unparsed-text,DOCUMENT--new-inline-tags PAGE')" /> </p:exec> <p:unwrap match="c:result"/> <cx:message> <p:with-option name="message" select="'###### tidy Complete'"/> </cx:message> <p:documentation> Store XML file Output </p:documentation> <p:identity name="out_file"/> <p:store name="store"> <p:with-option name="href" select="replace(replace(concat($output-folder,$fileName,'-', $startingIterationPosition, '.xml'),'.html',''),' ','')"> <p:pipe step="out_file" port="result"/> </p:with-option> </p:store> <p:documentation> Create result XML </p:documentation> <p:identity> <p:input port="source"> <p:pipe step="store" port="result"/> </p:input> </p:identity> </p:group> <p:catch name="catch"> <p:group name="output"> <p:identity> <p:input port="source"> <p:pipe port="error" step="catch"/> </p:input> </p:identity> <cx:message> <p:with-option name="message" select="concat('************** EXCEPTION: ', $fileName)"/> </cx:message> <p:documentation> Store XML file Output </p:documentation> <p:store name="store"> <p:with-option name="href" select="replace(replace(concat($exception-folder, $fileName,'-', $startingIterationPosition, '.xml'),'.html',''),' ','')"> <p:pipe step="catch" port="error"/> </p:with-option> </p:store> <p:documentation> Create result XML </p:documentation> <p:identity> <p:input port="source"> <p:pipe step="store" port="result"/> </p:input> </p:identity> </p:group> </p:catch> </p:try> </p:for-each> <p:documentation>Wrap result XML </p:documentation> <p:wrap-sequence wrapper="forEachFile"/> <p:identity/> </p:declare-step> Exception Output: Jan 20, 2010 8:03:19 AM com.xmlcalabash.core.XProcRuntime info INFO: file:/C:/Users/alex/XSLT/Project%20Files%20Form%2010-K/XPROC/HTMLTidy.xpl:71: Exec: ..\Resources\html-tidy\tidy -asxml - -add-xml-decl yes --quote-nbsp yes --indent yes --markup yes --show-warnings yes --word-2000 yes --clean yes --logical-emphas is yes --error-file ../../i4ContentOutput/SEC/10-k/2009/tidy/error/DebtResolveInc_CIK0001106645.txt --doctype omit --numeric-e ntities yes --output-xml yes --new-blocklevel-tags unparsed-text,DOCUMENT,TEXT --new-inline-tags PAGE,TYPE,SEQUENCE,FILENAME,D ESCRIPTION,FilingAttributes Error on line 13097 column 61 SXXP0003: Error reported by XML parser: The prefix "font-size" for attribute "font-size:9pt" associated with an element type "p" is not bound. Exception in thread "Thread-799" com.xmlcalabash.core.XProcException: XProc error err:XD0011 at com.xmlcalabash.library.Exec$ProcessOutputReader.run(Exec.java:354) at java.lang.Thread.run(Unknown Source) Caused by: net.sf.saxon.s9api.SaxonApiException: org.xml.sax.SAXParseException: The prefix "font-size" for attribute "font-siz e:9pt" associated with an element type "p" is not bound. at net.sf.saxon.s9api.DocumentBuilder.build(DocumentBuilder.java:290) at com.xmlcalabash.library.Exec$ProcessOutputReader.run(Exec.java:352) ... 1 more Caused by: net.sf.saxon.trans.XPathException: org.xml.sax.SAXParseException: The prefix "font-size" for attribute "font-size:9 pt" associated with an element type "p" is not bound. at net.sf.saxon.event.Sender.sendSAXSource(Sender.java:418) at net.sf.saxon.event.Sender.send(Sender.java:193) at net.sf.saxon.event.Sender.send(Sender.java:50) at net.sf.saxon.Configuration.buildDocument(Configuration.java:2973) at com.saxonica.validate.SchemaAwareConfiguration.buildDocument(SchemaAwareConfiguration.java:999) at net.sf.saxon.s9api.DocumentBuilder.build(DocumentBuilder.java:287) ... 2 more Caused by: org.xml.sax.SAXParseException: The prefix "font-size" for attribute "font-size:9pt" associated with an element type "p" is not bound. at com.sun.org.apache.xerces.internal.util.ErrorHandlerWrapper.createSAXParseException(Unknown Source) at com.sun.org.apache.xerces.internal.util.ErrorHandlerWrapper.fatalError(Unknown Source) at com.sun.org.apache.xerces.internal.impl.XMLErrorReporter.reportError(Unknown Source) at com.sun.org.apache.xerces.internal.impl.XMLErrorReporter.reportError(Unknown Source) at com.sun.org.apache.xerces.internal.impl.XMLNSDocumentScannerImpl.scanStartElement(Unknown Source) at com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl$FragmentContentDriver.next(Unknown Source) at com.sun.org.apache.xerces.internal.impl.XMLDocumentScannerImpl.next(Unknown Source) at com.sun.org.apache.xerces.internal.impl.XMLNSDocumentScannerImpl.next(Unknown Source) at com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl.scanDocument(Unknown Source) at com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(Unknown Source) at com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(Unknown Source) at com.sun.org.apache.xerces.internal.parsers.XMLParser.parse(Unknown Source) at com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.parse(Unknown Source) at com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl$JAXPSAXParser.parse(Unknown Source) at net.sf.saxon.event.Sender.sendSAXSource(Sender.java:404) ... 7 more -- Alex https://sites.google.com/a/utg.edu.gm/alex
Received on Wednesday, 20 January 2010 13:20:03 UTC