- From: <vojtech.toman@emc.com>
- Date: Mon, 29 Nov 2010 03:18:34 -0500
- To: <xproc-dev@w3.org>
- Message-ID: <3799D0FD120AD940B731A37E36DAF3FE32B10FC825@MX20A.corp.emc.com>
When processing XHTML (or any other vocabulary that refers to W3C schemas - or in fact, any vocabulary that depends on remote resources), it is always a good practice to use local XML catalogs so that: a) your processing can still work if there is no connection to the remote site; and b) you don't put the remote server under unnecessary load by frequently requesting resources that you could/should cache locally.
Calabash probably provides a way of registering an XML catalog or a catalog resolver.
Regards,
Vojtech
--
Vojtech Toman
Consultant Software Engineer
EMC | Information Intelligence Group
vojtech.toman@emc.com
http://developer.emc.com/xmltech
From: xproc-dev-request@w3.org [mailto:xproc-dev-request@w3.org] On Behalf Of Tony Rogers
Sent: Monday, November 29, 2010 12:39 AM
To: XProc Dev
Subject: Baffling error with http-request
Hello,
I'm collecting data for a final project in one of my classes with this script. I need to collect a bunch of data using HTTP requests, and the first step of this project was going swimmingly. I was about to start work on the second step when suddenly the first step started generating a very strange error.
(Note: I'm using Oxygen 12, bundled with Calabash 0.9.23, on a Mac.)
I have absolutely no idea what the hell this error means...
SystemID: /Users/amrogers/Developer/Projects/oXygen_workspace/edu.umd/terpconnect/model/documents/201008/INFM298I/Final Project/xproc.xpl
Engine name: Calabash XProc
Severity: error
Description: net.sf.saxon.s9api.SaxonApiException: org.apache.commons.httpclient.HttpException: 404 Not Found for: http://www.w3.org/TR/xhtml11/DTD/xhtml-datatypes-1.mod 404 Not Found for: http://www.w3.org/TR/xhtml11/DTD/xhtml-datatypes-1.mod
Here's my pipeline:
<?xml version='1.0' encoding='UTF-8'?>
<p:pipeline
xmlns:p="http://www.w3.org/ns/xproc"
xmlns:c="http://www.w3.org/ns/xproc-step"
xmlns:cx="http://xmlcalabash.com/ns/extensions"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:local="#empty"
version="1.0">
<p:serialization
port="result"
encoding="UTF-8"
indent="true"
method="xml"
omit-xml-declaration="false"
/>
<p:import href="library.xpl" />
<!--
Get results of forum HTTP requests
-->
<local:get-each-url name="get-forums-data">
<p:input port="source">
<p:inline>
<!- test WTF is going wrong, use the first only ->
<links xml:base="http://us.battle.net/sc2/en/forum/">
<link href="40568/" title="general" />
<!--<link href="13432/" title="wol-campaign" />
<link href="13433/" title="terran" />
<link href="13434/" title="protoss" />
<link href="13435/" title="zerg" />
<link href="13436/" title="multiplayer-and-esports" />
<link href="13437/" title="custom-maps" />-->
</links>
</p:inline>
</p:input>
</local:get-each-url>
<!--<p:store href="results/forums.xml">
<p:input port="source">
<p:pipe step="get-forums-data" port="result" />
</p:input>
</p:store>-->
<p:identity>
<p:input port="source">
<p:pipe step="get-forums-data" port="result" />
</p:input>
</p:identity>
<!--<p:filter select="//xhtml:table[ @id = 'posts' ]" xmlns:xhtml="http://www.w3.org/1999/xhtml">
<p:input port="source">
<p:pipe step="get-forums-data" port="result" />
</p:input>
</p:filter>
<p:wrap-sequence wrapper="c:results" />-->
</p:pipeline>
<?xml version='1.0' encoding='UTF-8'?>
<p:library
xmlns:c="http://www.w3.org/ns/xproc-step"
xmlns:cx="http://xmlcalabash.com/ns/extensions"
xmlns:local="#empty"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:p="http://www.w3.org/ns/xproc"
version="1.0">
<!--<p:import href="http://xmlcalabash.com/extension/steps/library-1.0.xpl" />-->
<!--
get-each-url
-->
<p:declare-step type="local:get-each-url">
<p:input port="source" primary="true" kind="document"/>
<p:output port="result" primary="true" />
<p:make-absolute-uris match="//@href">
<p:with-option name="base-uri" select="/links/@xml:base" />
</p:make-absolute-uris>
<p:for-each>
<p:iteration-source select="//link" />
<p:rename
match="//link"
new-name="c:request"
/>
<!-- Build up the c:request -->
<p:group>
<p:insert match="c:request" position="first-child">
<p:input port="insertion">
<p:inline>
<c:header name="Cookie" value="forumView=advanced"/>
</p:inline>
</p:input>
</p:insert>
<p:insert match="c:request" position="first-child">
<p:input port="insertion">
<p:inline>
<c:header>
<p:with-option
name="Date"
select="format-dateTime(
dateTime(),
'[FNn,*-3], [D01] [Mn,*-3] [Y] [H01]:[m01]:[s01] [z]' ,
'en'
)"
/>
</c:header>
</p:inline>
</p:input>
</p:insert>
<p:add-attribute match="c:request" attribute-name="method" attribute-value="GET"/>
<p:add-attribute match="c:request" attribute-name="detailed" attribute-value="true"/>
<p:namespace-rename apply-to="all" from="#empty" to=""/>
<p:delete match="//@title"/>
</p:group>
<!-- Print request for debugging -->
<!--<cx:message>
<p:with-option name="message" select="/*" />
</cx:message>-->
<p:http-request/>
</p:for-each>
<p:wrap-sequence wrapper="c:results"/>
<!--
<p:identity>
<p:input port="source">
<p:pipe port="result" step="store-results"/>
</p:input>
</p:identity>
-->
</p:declare-step>
<!--
threads-from-forum
-->
<!--<p:declare-step type="local:get-threads-in-forum">
<p:input port="source" kind="document" />
<p:output port="result" sequence="false" />
<p:for-each >
<p:iteration-source select="//my:thread/@xml:id" />
<p:variable name="prefix" select="'http://us.battle.net/sc2/en/forum/topic/'" />
<local:link href="" />
</p:for-each>
<p:wrap-sequence
wrapper="links"
wrapper-namespace="#empty"
wrapper-prefix="local"
/>
</p:declare-step>-->
<!--
URL-from-id
-->
<!--<p:declare-step type="local:url-from-id">
<p:documentation>
Using a combination of the specified options, this step takes as options
a resource type (must be 'forum','thread', or 'user') and an id (a number)
and outputs the appropriate URL for the resource.
</p:documentation>
<!-\- ports -\->
<p:input port="source" primary="false"><p:empty /></p:input>
<p:output port="url" primary="false"></p:output>
<!-\- options -\->
<p:option name="resource-type" required="true" />
<p:option name="id" required="true" />
<!-\- variables -\->
<p:variable name="forum-url-prefix" select="'http://us.battle.net/sc2/en/forum/'"/>
<p:variable name="thread-url-prefix" select="'http://us.battle.net/sc2/en/forum/topic/'" />
<p:variable name="user-url-prefix" select="'http://us.battle.net/sc2/en/profile/'" />
<p:variable name="general" select="concat($forum-url-prefix,'40568/')"/>
<p:variable name="wol-campaign" select="concat($forum-url-prefix,'13432/')"/>
<p:variable name="terran" select="concat($forum-url-prefix,'13433/')"/>
<p:variable name="protoss" select="concat($forum-url-prefix,'13434/')"/>
<p:variable name="zerg" select="concat($forum-url-prefix,'13435/')"/>
<p:variable name="multiplayer-and-esports"
select="concat($forum-url-prefix,'13436/')"/>
<p:variable name="custom-maps" select="concat($forum-url-prefix,'13437/')"/>
<p:variable name="blizzcon" select="concat($forum-url-prefix,'692681/')"/>
<!-\- step execution -\->
<p:choose>
<p:when test="$resource-type = 'forum'">
<link href="concat($forum-url-prefix,$id)" />
</p:when>
<p:when test="$resource-type = 'thread'">
<link href="concat($thread-url-prefix,$id)" />
</p:when>
<p:when test="$resource-type = 'user'">
<link href="concat($user-url-prefix,$id)" />
</p:when>
<p:otherwise>
<p:error></p:error>
</p:otherwise>
</p:choose>
</p:declare-step>-->
</p:library>
Received on Monday, 29 November 2010 08:20:07 UTC