- From: Asaf Segal <asaf@cs.huji.ac.il>
- Date: Sun, 17 Dec 2000 17:23:28 +0200
- To: n_d <n_d@sina.com>
- CC: www-lib@w3.org
n_d wrote:
> Hi
> I'm a newbie to libwww. I have a problem in do HTTP GET
> operations, what I wanna do just is to fetch a html page and
> all links within the page.
>
> part of my code:
> //start
> HTProfile_newHTMLNoCacheClient("mytest", "1.0");
> HTNet_addAfter(terminate_handler, NULL, NULL, HT_ALL, HT_FILTER_LAST);
> HText_registerLinkCallback(foundlink);
> HTRequest_setOutputFormat(request, WWW_SOURCE);
> ...
> anchor = HTAnchor_findAddress(url);
> chunk = HTLoadAnchorToChunk(anchor, request);
> HTEventList_loop(request);
> //end
>
> The trouble is I can't got both html page and links.
> Upper code running result is I can get html page in variable "chunk"
> But CANNOT get links -- function "foundlink" which I registered was
> never called.
>
> If I got rid of "HTRequest_setOutputFormat(request, WWW_SOURCE)"
> the result is just opposite.
>
> Can I get both html page and links? Any help will be appreciated!
>
> ______________________________________
>
> ===================================================================
> 新浪免费电子邮箱 http://mail.sina.com.cn
>
> 花样年华 为青春保鲜!
> http://ad.doubleclick.net/clk;2219239;4732092;g?http://www.fa-ya.com
here is a code that is based on the example showlinks.
It has two problems that i'll be happy to get a solution for:
The program doesn't stop: How can I know at the terminate handler that I
there aren't any more request ?
When I try to get an image the program prompts for file name. Hoe can I
avoid it ?
If I try HTRequest_setOutputFormat(new_request, WWW_SOURCE); I get:
Interrupted!
Fatal Error: Data transfer interrupted how can I read the file to a
string without this message ?
#include "WWWLib.h"
#include "WWWInit.h"
#include "WWWHTML.h"
PRIVATE int printer (const char * fmt, va_list pArgs)
{
return (vfprintf(stdout, fmt, pArgs));
}
PRIVATE int tracer (const char * fmt, va_list pArgs)
{
return (vfprintf(stderr, fmt, pArgs));
}
PRIVATE int terminate_handler (HTRequest * request, HTResponse *
response,
void * param, int status)
{
/* We are done with this request */
HTRequest_delete(request);
/* Terminate libwww */
// HTProfile_delete();
// exit(0);
}
PRIVATE void foundLink (HText * text,
int element_number, int attribute_number,
HTChildAnchor * anchor,
const BOOL * present, const char ** value)
{
if (anchor) {
/*
** Find out which link we got. The anchor we are passed is
** a child anchor of the anchor we are current parsing. We
** have to go from this child anchor to the actual destination.
*/
HTAnchor * dest = HTAnchor_followMainLink((HTAnchor *) anchor);
char * address = HTAnchor_address(dest);
SGML_dtd * dtd = HTML_dtd();
HTTag * tag = SGML_findTag(dtd, element_number);
HTPrint("Found link `%s\'", address);
if (tag) {
char * tagname = HTTag_name(tag);
HTPrint(" in element: <%s>\n",
tagname ? tagname : "<unknown>");
if ((strcmp(tagname, "FRAME") == 0) ||
(strcmp(tagname, "IMG") == 0)) {
char *uri = HTParse(address, NULL, PARSE_ALL);
HTRequest *new_request = HTRequest_new();
HTAnchor *new_anchor = HTAnchor_findAddress(uri);
HTRequest_setAnchor(new_request, new_anchor);
HTLoadAnchor(new_anchor, new_request);
}
}
HT_FREE(address);
}
}
int main (int argc, char ** argv)
{
char *uri = NULL;
/* Create a new premptive client */
HTProfile_newHTMLNoCacheClient ("ReadPage", "1.0");
/* Need our own trace and print functions */
HTPrint_setCallback(printer);
HTTrace_setCallback(tracer);
/* Set trace messages and alert messages */
#if 0
HTSetTraceMessageMask("sop");
#endif
/* Add our own termination filter */
HTNet_addAfter(terminate_handler, NULL, NULL, HT_ALL,
HT_FILTER_LAST);
/*
** Register our HTML link handler. We don't actually create a HText
** object as this is not needed. We only register the specific link
** callback.
*/
HText_registerLinkCallback(foundLink);
/* Setup a timeout on the request for 15 secs */
HTHost_setEventTimeout(15000);
/* Handle command line args */
if (argc >= 2)
uri = HTParse(argv[1], NULL, PARSE_ALL);
if (uri) {
HTRequest * request = NULL;
HTAnchor * anchor = NULL;
BOOL status = NO;
FILE * devnull = fopen("/dev/null", "w");
printf("Parsing %s\n", uri);
/* Create a request */
request = HTRequest_new();
/* Get an anchor object for the URI */
anchor = HTAnchor_findAddress(uri);
/*
HTRequest_setOutputFormat(request, WWW_SOURCE);
HTRequest_setOutputStream(request, HTFWriter_new(request, devnull,
YES));
*/
/* Issue the GET and store the result in a chunk */
status = HTLoadAnchor(anchor, request);
/* Go into the event loop... */
if (status == YES) HTEventList_loop(request);
} else {
HTPrint("invalid address: %s\n", argv[1]);
HTPrint("Type the URI to print out a list of embedded links\n");
HTPrint("\treadpage <uri>\n");
HTPrint("For example:\n");
HTPrint("\treadpage http://www.w3.org\n");
}
return 0;
}
Received on Sunday, 17 December 2000 10:23:31 UTC