- From: Jim Giza 617 290 0710 x6453 <giza@cheops.hq.ileaf.com>
- Date: Fri, 1 Dec 95 14:36:56 EST
- To: www-lib@w3.org
Hello, similiar to the post of this morning, I am also looking for
details on how to best get at the HREF's within the retrieved HTML
file. Specifically, after HTLoadAbsolute succeeds, how do I get
at any HREFs contained within? Is there an object chain to traverse,
or does it require parsing through the html?
Thanks.
-----------------------------------------------------------------------
/*
findurl.c - Resolve URLs out on the World-Wide Web using the public
domain libwww.
06oct95 giza@ultranet.com Began.
*/
#include <stdio.h>
#include <stdlib.h>
#include <sysexits.h>
#include <WWWLib.h>
#include "findurl.h"
#define DASH '-'
void main(int argv, char **argc);
void show_usage(char *pname);
BOOLEAN resolve_url(char *url, char *fname);
/*
Code to use W3C Reference library to resolve URLs. Note, do *not* explicitly
close the file opened in this function. The library does that for you.
Returns TRUE upon success.
*/
BOOLEAN resolve_url(char *url, char *fname)
{
FILE *fp;
BOOLEAN return_value = FALSE;
HTRequest *request;
if (DASH == *fname )
fp = stdout;
else
fp = fopen(fname, "w");
if ( (NULL != fp) && (NULL != (request = HTRequest_new())))
{
int i;
request->method = HTMethod_enum("GET");
request->output_format = WWW_SOURCE;
request->BlockingIO = TRUE;
request->output_stream = HTFWriter_new(fp, FALSE);
switch((i=HTLoadAbsolute(url,request)))
{
case HT_LOADED:
if (NULL != getenv("DEBUG"))
printf("URI: [%s]\n", HTAnchor_address((HTAnchor*)request->anchor));
return_value = TRUE;
break;
case HT_ERROR:
fprintf(stderr, "HT_ERROR: Error has occured\n");
break;
default:
fprintf(stderr, "HTLoadAbsolute returned error code %d\n", i);
break;
}
HTRequest_delete(request);
}
return(return_value);
}
void show_usage(char *pname)
{
fprintf(stderr,"Usage:\t%s URL-goes-here name-of-outfile-here\n",pname);
fprintf(stderr,"if name-of-output-file is \"-\", then stdout is used\n");
}
void main(int argc, char **argv)
{
char *url, *fname;
/* Validate the command line. */
if (3 != argc || (NULL==(url=argv[1])) || (NULL==(fname=argv[2])))
{
show_usage(argv[0]);
exit(EX_USAGE);
}
/* Initialize the W3C Reference library. */
if (NULL != getenv("DEBUG"))
WWW_TraceFlag = 1;
HTLibInit();
/* Resovle the URL. */
if (NULL == resolve_url(url, fname))
fprintf(stderr, "Unable to resolve URL %s into file %s\n", url, fname);
/* Close the W3C Reference Library. */
HTLibTerminate();
/* All done. */
exit(EX_OK);
}
Received on Friday, 1 December 1995 14:37:21 UTC