W3C home > Mailing lists > Public > xproc-dev@w3.org > January 2010

Try catch Exception handling with exec to tidy not working in Calabash

From: Alex Muir <alex.g.muir@gmail.com>
Date: Wed, 20 Jan 2010 13:19:28 +0000
Message-ID: <88b533b91001200519j792a9e48oc382af6f608ba7c6@mail.gmail.com>
To: XProc Dev <xproc-dev@w3.org>
Hi,

I have a working xproc script that is loading some html files using xslt and
making a few modifications and then tidying the content before outputting
the files. It works for most of the files but occasionally the xml parsers
in tidy throw an exception which the p:try does not seem to be able to
handle I assume because the exception is launched from the exec. Example
exception output below:

Let me know if I'm doing something wrong.

I'm running Calabash

Thanks Much


<p:declare-step xmlns:p="http://www.w3.org/ns/xproc" xmlns:c="
http://www.w3.org/ns/xproc-step"
  xmlns:cx="http://xmlcalabash.com/ns/extensions" name="LoadAndTidy">

  <p:input port="source">
    <p:document href="blank.xml"/>
  </p:input>
  <p:output port="result" sequence="true"/>
  <p:declare-step type="cx:message">
    <p:input port="source"/>
    <p:output port="result"/>
    <p:option name="message" required="true"/>
  </p:declare-step>


  <!-- ***** Starting and Ending File Numbers ***** -->
  <p:variable name="startingFileNumber" select="'497'"/>
  <p:variable name="endingFileNumber" select="'3000'"/>

  <!-- source and output folder variables -->
  <p:variable name="source-folder" select="'../../2009/'"/>
  <p:variable name="output-folder" select="'../../2009/tidy/'"/>
  <p:variable name="error-folder" select="'../../2009/tidy/error/'"/>
  <p:variable name="exception-folder"
select="'../../2009/tidy/exception/'"/>

  <p:directory-list>
    <p:with-option name="path" select="$source-folder">
      <p:empty/>
    </p:with-option>
  </p:directory-list>

  <p:for-each name="forEachFile">


    <p:iteration-source
      select="//c:file[position() ge number($startingFileNumber) and
position() le number($endingFileNumber)]"/>

    <p:variable name="fileName" select="c:file/@name"/>
    <p:variable name="startingIterationPosition"
      select="number(p:iteration-position()) +
number($startingFileNumber)-1"/>
    <cx:message>
      <p:with-option name="message"
        select="concat('-----------------------------',
'Iteration-position:','  ', $startingIterationPosition, '  File: ',
$fileName,'-----------------------------')"
      />
    </cx:message>

    <p:try>
      <p:group>

        <cx:message>
          <p:with-option name="message" select="'######
LoadAndPrepareFileForTidy'"/>
        </cx:message>
        <p:xslt name="LoadAndPrepareFileForTidy">
          <p:input port="source"/>
          <p:input port="stylesheet">
            <p:document href="../XSLT/LoadAndPrepareFileForTidy.xsl"/>
          </p:input>
          <p:with-param name="input_uri"
select="concat($source-folder,$fileName)"/>
          <p:input port="parameters">
            <p:empty/>
          </p:input>
        </p:xslt>

        <cx:message>
          <p:with-option name="message" select="'######   Begin Tidy'"/>
        </cx:message>
        <p:exec command="..\Resources\html-tidy\tidy" source-is-xml="false"
result-is-xml="true"
          wrap-result-lines="false">
          <p:with-option name="args"
            select="concat('-asxml --add-xml-decl yes --quote-nbsp yes
--indent yes --markup yes  --show-warnings yes --word-2000 yes --clean yes
--logical-emphasis yes --error-file ',$error-folder,
replace($fileName,'.html',''),'.txt --doctype omit --numeric-entities yes
--output-xml yes --new-blocklevel-tags
unparsed-text,DOCUMENT--new-inline-tags PAGE')"
          />
        </p:exec>


        <p:unwrap match="c:result"/>
        <cx:message>
          <p:with-option name="message" select="'######   tidy Complete'"/>
        </cx:message>


        <p:documentation> Store XML file Output </p:documentation>
        <p:identity name="out_file"/>
        <p:store name="store">
          <p:with-option name="href"
            select="replace(replace(concat($output-folder,$fileName,'-',
$startingIterationPosition, '.xml'),'.html',''),' ','')">
            <p:pipe step="out_file" port="result"/>
          </p:with-option>
        </p:store>

        <p:documentation> Create result XML </p:documentation>
        <p:identity>
          <p:input port="source">
            <p:pipe step="store" port="result"/>
          </p:input>
        </p:identity>

      </p:group>
      <p:catch name="catch">
        <p:group name="output">


          <p:identity>
            <p:input port="source">
              <p:pipe port="error" step="catch"/>
            </p:input>
          </p:identity>

          <cx:message>
            <p:with-option name="message" select="concat('**************
EXCEPTION: ', $fileName)"/>
          </cx:message>

          <p:documentation> Store XML file Output </p:documentation>
          <p:store name="store">
            <p:with-option name="href"
              select="replace(replace(concat($exception-folder,
$fileName,'-',  $startingIterationPosition, '.xml'),'.html',''),' ','')">
              <p:pipe step="catch" port="error"/>
            </p:with-option>
          </p:store>

          <p:documentation> Create result XML </p:documentation>
          <p:identity>
            <p:input port="source">
              <p:pipe step="store" port="result"/>
            </p:input>
          </p:identity>

        </p:group>
      </p:catch>
    </p:try>
  </p:for-each>

  <p:documentation>Wrap result XML </p:documentation>
  <p:wrap-sequence wrapper="forEachFile"/>
  <p:identity/>
</p:declare-step>


Exception Output:
Jan 20, 2010 8:03:19 AM com.xmlcalabash.core.XProcRuntime info
INFO:
file:/C:/Users/alex/XSLT/Project%20Files%20Form%2010-K/XPROC/HTMLTidy.xpl:71:
Exec: ..\Resources\html-tidy\tidy -asxml -
-add-xml-decl yes --quote-nbsp yes --indent yes --markup yes
--show-warnings yes --word-2000 yes --clean yes --logical-emphas
is yes --error-file
../../i4ContentOutput/SEC/10-k/2009/tidy/error/DebtResolveInc_CIK0001106645.txt
--doctype omit --numeric-e
ntities yes --output-xml yes --new-blocklevel-tags
unparsed-text,DOCUMENT,TEXT --new-inline-tags PAGE,TYPE,SEQUENCE,FILENAME,D
ESCRIPTION,FilingAttributes
Error on line 13097 column 61
  SXXP0003: Error reported by XML parser: The prefix "font-size" for
attribute
  "font-size:9pt" associated with an element type "p" is not bound.
Exception in thread "Thread-799" com.xmlcalabash.core.XProcException: XProc
error err:XD0011
        at
com.xmlcalabash.library.Exec$ProcessOutputReader.run(Exec.java:354)
        at java.lang.Thread.run(Unknown Source)
Caused by: net.sf.saxon.s9api.SaxonApiException:
org.xml.sax.SAXParseException: The prefix "font-size" for attribute
"font-siz
e:9pt" associated with an element type "p" is not bound.
        at
net.sf.saxon.s9api.DocumentBuilder.build(DocumentBuilder.java:290)
        at
com.xmlcalabash.library.Exec$ProcessOutputReader.run(Exec.java:352)
        ... 1 more
Caused by: net.sf.saxon.trans.XPathException: org.xml.sax.SAXParseException:
The prefix "font-size" for attribute "font-size:9
pt" associated with an element type "p" is not bound.
        at net.sf.saxon.event.Sender.sendSAXSource(Sender.java:418)
        at net.sf.saxon.event.Sender.send(Sender.java:193)
        at net.sf.saxon.event.Sender.send(Sender.java:50)
        at net.sf.saxon.Configuration.buildDocument(Configuration.java:2973)
        at
com.saxonica.validate.SchemaAwareConfiguration.buildDocument(SchemaAwareConfiguration.java:999)
        at
net.sf.saxon.s9api.DocumentBuilder.build(DocumentBuilder.java:287)
        ... 2 more
Caused by: org.xml.sax.SAXParseException: The prefix "font-size" for
attribute "font-size:9pt" associated with an element type
 "p" is not bound.
        at
com.sun.org.apache.xerces.internal.util.ErrorHandlerWrapper.createSAXParseException(Unknown
Source)
        at
com.sun.org.apache.xerces.internal.util.ErrorHandlerWrapper.fatalError(Unknown
Source)
        at
com.sun.org.apache.xerces.internal.impl.XMLErrorReporter.reportError(Unknown
Source)
        at
com.sun.org.apache.xerces.internal.impl.XMLErrorReporter.reportError(Unknown
Source)
        at
com.sun.org.apache.xerces.internal.impl.XMLNSDocumentScannerImpl.scanStartElement(Unknown
Source)
        at
com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl$FragmentContentDriver.next(Unknown
Source)
        at
com.sun.org.apache.xerces.internal.impl.XMLDocumentScannerImpl.next(Unknown
Source)
        at
com.sun.org.apache.xerces.internal.impl.XMLNSDocumentScannerImpl.next(Unknown
Source)
        at
com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl.scanDocument(Unknown
Source)
        at
com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(Unknown
Source)
        at
com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(Unknown
Source)
        at
com.sun.org.apache.xerces.internal.parsers.XMLParser.parse(Unknown Source)
        at
com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.parse(Unknown
Source)
        at
com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl$JAXPSAXParser.parse(Unknown
Source)
        at net.sf.saxon.event.Sender.sendSAXSource(Sender.java:404)
        ... 7 more


-- 

Alex
https://sites.google.com/a/utg.edu.gm/alex
Received on Wednesday, 20 January 2010 13:20:03 GMT

This archive was generated by hypermail 2.2.0+W3C-0.50 : Wednesday, 20 January 2010 13:20:03 GMT