W3C home > Mailing lists > Public > www-lib@w3.org > April to June 1998

Authentication and libwww (and others...)

From: William Snow <Will.Snow@Eng.Sun.COM>
Date: Thu, 11 Jun 1998 09:26:10 -0700 (PDT)
Message-Id: <199806111626.JAA16909@thresher.eng.sun.com>
To: www-lib@w3.org

I'm trying to spider a website that uses authentication, but 
I want to hardcode in the user and password instead of asking
the user when the actual request is made.

I've tried using HTAA_newModule to add in my own before/after/
delete modules (where only the before actually does anything
and it basically just does a HTRequest_addCredentials
with the right information (uuencoded and all)

Thanks for any help,

--will snow
will.snow@eng.sun.com



The pseudo-code looks like this:


int fetchURL(char *url) {
	HTChunk * chunk = NULL;
	HTRequest * request = HTRequest_new();
	int size;
	
	HTRequest_setOutputFormat(request, WWW_SOURCE);
	HTRequest_setPreemptive(request, YES);
	
	chunk = HTLoadToChunk(url, request);
	size = HTChunkSize(chunk);
	
	printf("URL: %s Size: %d\n", url, size);
	
	/* do other stuff here */
	
	HTChunk_delete(chunk);
	HTRequest_delete(request);

}

PUBLIC int authBefore(HTRequest *request, void * context, int mode) 
{
	BOOL proxy = mode==HT_NO_PROXY_ACCESS ? YES : NO;

	static char ct[] = "foo:bar"; /* cleartext uid/passwd */
	char ci[512];
	char co[512+32];
	
	HTUU_encode(ct, strlen(ct), ci);
	strcpy(co, "Basic ");
	strcat(co, ci);
	if (proxy) {
		HTRequest_addCredentials(request,
				"Proxy-Authorization", cookie);
	} else {
		HTRequest_addCredentials(request, 
				"Authorization", cookie);
	}

	return HT_OK;
}
PUBLIC int authDel(void * context) {
    printf("authDel\n");
}

PUBLIC int authAfter(HTRequest *request, HTResponse * response,
           void * context, int status) {
    printf("authAfter\n");
    return HT_OK;
}


int main(int argc, char **argv) {
	char *url = argv[1];
	
	HTProfile_newRobot("Foo", "1.0");
	HTAA_newModule("basic", authBefore, authAfter, authDel);
	fetchURL(url);
	HTProfile_delete();
}
Received on Thursday, 11 June 1998 12:29:36 GMT

This archive was generated by hypermail 2.2.0+W3C-0.50 : Monday, 23 April 2007 18:18:27 GMT