- From: Jim Giza 617 290 0710 x6453 <giza@cheops.hq.ileaf.com>
- Date: Fri, 1 Dec 95 14:36:56 EST
- To: www-lib@w3.org
Hello, similiar to the post of this morning, I am also looking for details on how to best get at the HREF's within the retrieved HTML file. Specifically, after HTLoadAbsolute succeeds, how do I get at any HREFs contained within? Is there an object chain to traverse, or does it require parsing through the html? Thanks. ----------------------------------------------------------------------- /* findurl.c - Resolve URLs out on the World-Wide Web using the public domain libwww. 06oct95 giza@ultranet.com Began. */ #include <stdio.h> #include <stdlib.h> #include <sysexits.h> #include <WWWLib.h> #include "findurl.h" #define DASH '-' void main(int argv, char **argc); void show_usage(char *pname); BOOLEAN resolve_url(char *url, char *fname); /* Code to use W3C Reference library to resolve URLs. Note, do *not* explicitly close the file opened in this function. The library does that for you. Returns TRUE upon success. */ BOOLEAN resolve_url(char *url, char *fname) { FILE *fp; BOOLEAN return_value = FALSE; HTRequest *request; if (DASH == *fname ) fp = stdout; else fp = fopen(fname, "w"); if ( (NULL != fp) && (NULL != (request = HTRequest_new()))) { int i; request->method = HTMethod_enum("GET"); request->output_format = WWW_SOURCE; request->BlockingIO = TRUE; request->output_stream = HTFWriter_new(fp, FALSE); switch((i=HTLoadAbsolute(url,request))) { case HT_LOADED: if (NULL != getenv("DEBUG")) printf("URI: [%s]\n", HTAnchor_address((HTAnchor*)request->anchor)); return_value = TRUE; break; case HT_ERROR: fprintf(stderr, "HT_ERROR: Error has occured\n"); break; default: fprintf(stderr, "HTLoadAbsolute returned error code %d\n", i); break; } HTRequest_delete(request); } return(return_value); } void show_usage(char *pname) { fprintf(stderr,"Usage:\t%s URL-goes-here name-of-outfile-here\n",pname); fprintf(stderr,"if name-of-output-file is \"-\", then stdout is used\n"); } void main(int argc, char **argv) { char *url, *fname; /* Validate the command line. */ if (3 != argc || (NULL==(url=argv[1])) || (NULL==(fname=argv[2]))) { show_usage(argv[0]); exit(EX_USAGE); } /* Initialize the W3C Reference library. */ if (NULL != getenv("DEBUG")) WWW_TraceFlag = 1; HTLibInit(); /* Resovle the URL. */ if (NULL == resolve_url(url, fname)) fprintf(stderr, "Unable to resolve URL %s into file %s\n", url, fname); /* Close the W3C Reference Library. */ HTLibTerminate(); /* All done. */ exit(EX_OK); }
Received on Friday, 1 December 1995 14:37:21 UTC