Non-blocking stream download hangs or fails

We wish to write a "robot" which does not extract links to follow from
crawled pages, but is given a batch of URLs to fetch (sort of a batch-mode
bulk-fetch routine).

We tried using the non-blocking robot profile, added proxy support, and
issued requests for several URLs at once.  The client was a byte-count
stream.  The after-handler gets fired right away with status codes of -1
for all pages.

Would appreciate some guidance with this, thanks.  Code follows:

#include "WWWLib.h"			      /* Global Library Include
file */
#include "WWWMIME.h"				    /* MIME
parser/generator */
#include "WWWNews.h"				       /* News access
module */
#include "WWWHTTP.h"				       /* HTTP access
module */
#include "WWWFTP.h"
#include "WWWFile.h"
#include "WWWGophe.h"
#include "WWWInit.h"

#define APP_VERSION		"1.0"
#define DEFAULT_OUTPUT_FILE     "get.out"

PRIVATE int printer (const char * fmt, va_list pArgs)
    return (vfprintf(stdout, fmt, pArgs));

PRIVATE int tracer (const char * fmt, va_list pArgs)
    return (vfprintf(stderr, fmt, pArgs));

class Context {
  int *		premain;
  HTStream *	stream;

**  We get called here from the event loop when we are done
**  loading. Here we terminate the program as we have nothing
**  better to do.
int terminate_handler (HTRequest * request, HTResponse * response,
    		       void * param, int status)
  Context * context = (Context*) HTRequest_context(request);
  // Check for status
  HTPrint("Load resulted in status=%d context=%x\n", status, context);
  // We are done with this request
  return 0;

int main (int argc, char ** argv)
  char * urlSet[] = {

  char * urlNewSet[] = {

  HTProfile_newRobot("Memex", "0");
  HTProxy_add("http", "");

  /* Need our own trace and print functions */

  /* Add our own filter to terminate the application */
  HTNet_addAfter(terminate_handler, NULL, NULL, HT_ALL, HT_FILTER_LAST);

  /* Set the timeout for how long we are going to wait for a response */

  char fname[] = "wgotDDD.html";
  int remain = sizeof(urlSet)/sizeof(char*);

  for ( int ux = 0; ux < sizeof(urlSet)/sizeof(char*); ux++ ) {
    sprintf(fname, "wgot%d.html", ux);
    const char * url = urlSet[ux];
    HTRequest * request = HTRequest_new();
    HTRequest_setOutputFormat (request, WWW_RAW);
    HTStream * getLength = HTContentCounter(HTBlackHole(), request, -1);
    Context * ctx = (Context*) HT_CALLOC(1, sizeof(Context));
    ctx->premain = &remain;
    ctx->stream = getLength;
    HTRequest_setContext(request, ctx);
    BOOL status = HTLoadToStream(url, getLength, request);
    HTPrint("%d %s\n", status, url);

  return 0;

Received on Monday, 1 January 2001 06:00:46 UTC