- From: Vic Bancroft <bancroft@america.net>
- Date: Sat, 15 Oct 2005 12:28:44 -0400
- To: Sam Varshavchik <mrsam@courier-mta.com>
- CC: jose.kahan@w3.org, Harald Hoyer <harald@redhat.com>, www-lib@w3.org
Getting ready for a new version of cumulative bug fixes, does it looks like 5.5.0 ? Sam Varshavchik wrote: > Back in June, I tried to figure out who contact about this. [...] I > must've overlooked the mailing list. Ya, we tend to keep a low profile . . . in any case, we should go ahead and put out a new revision/version. The ChangeLog.diff prior to your patch looks like, Index: ChangeLog =================================================================== RCS file: /sources/public/libwww/ChangeLog,v retrieving revision 1.50 diff -r1.50 ChangeLog 0a1,64 > 2005-08-01 Vic Bancroft <bancroft@america.net> > > * Library/src/: HTAlert.c, HTHeader.c, HTInit.c, HTNet.c, > HTProfil.c, HTProt.c, HTTrans.c: Patch to greatly speed up > repeated requests, from Arthur Smith > > 2005-07-25 Vic Bancroft <bancroft@america.net> > > * Library/src/HTSQL.c: modifications to compile without using > deprecated mysql functions > * config/: config.sub, ltmain.sh: updates for recent version of > libtool > > 2005-04-04 Jose Kahan <jose@w3.org> > > * INSTALL.html, Library/src/HTEvtLst.c: cleaning > > 2005-03-09 Vic Bancroft <bancroft@america.net> > > * libwww-config.in: include -lwwwssl, thanks to mgoddard at > itgs-presearch.com > > 2005-02-28 Vic Bancroft <bancroft@america.net> > 07:28 vbancrof > > * Library/src/SSL/HTSSLWriter.c: avoids an eternal loop in libwww > (thanks to Steinar Bang) > > 2005-02-27 Vic Bancroft <bancroft@america.net> > > * Library/src/SSL/HTSSL.html, Robot/src/RobotMain.c: fix for webbot > -v option check and documentation addition > * configure.ac, Library/src/SSL/HTSSL.c, > Library/src/SSL/windows/wwwssl.def, Robot/src/HTRobMan.html, > Robot/src/Makefile.am, Robot/src/RobotMain.c: basic support for > client side certificates using PEM format > > 2005-01-23 Vic Bancroft <bancroft@america.net> > > * Library/src/SSL/: HTSSL.c, HTSSLReader.c, HTSSLWriter.c: add > openssl to include for ssl.h and rand.h > * config/: config.guess, config.sub, ltmain.sh: update after > running libtoolize > * Robot/src/Makefile.am: use SSL directory for libwwwssl.la > * Robot/src/RobotMain.c: include HTSSL.h > * configure.ac: fix aclocal underquoting warnings > * Robot/src/: RobotMain.c, Makefile.am: update to enable https > protocol > > 2005-01-05 Martin Duerst <duerst@w3.org> > > * Library/src/HTTPReq.c: fixed , to _ in HTTRACE call > * Library/src/HTTPReq.c: removed LIBWWW_USEIDN, because unnecessary > * modules/idn/unicode_template.c: forgot one file > * Library/src/HTDNS.html: moved IDN to main branch > * Library/src/HTDNS.c: moved IDN to main branch > * Library/src/HTTPReq.c: added "LIBWWW_USEIDN" conditional > * Library/src/HTTPReq.c: moved IDN to main branch > > 2004-01-29 Jose Kahan <jose@w3.org> > > * Library/Overview.html: JK: Added the libwww survey results > > > While waiting for the reply, I ran into even more problems with > HTBound.c, so I just ended up rewriting it from the beginning. Okay, your patch looks like, 16a18,19 > ** SV Jun 05 Rewrote HTBoundary_put_block. Fixed many bugs+segfaults. > ** SV Jul 05 Fix double-counting of processed bytes. 25a29,30 > #include "HTNetMan.h" > #include "HTChannl.h" 28c33,34 < #define PUTBLOCK(b, l) (*me->target->isa->put_block)(me->target, b, l) --- > #define PUTBLOCK(b, l) (me->target ? (*me->target->isa->put_block)(me->target, b, l):HT_OK) > 33a40 > HTNet * net; 39,41d45 < BOOL body; /* Body or preamble|epilog */ < HTEOLState state; < int dash; /* Number of dashes */ 43c47,52 < char * bpos; --- > > BOOL keptcrlf; > int (*state)(HTStream *, const char *, int); > > char *boundary_ptr; > 45a55,56 > PRIVATE int HTBoundary_flush (HTStream * me); > 47a59,73 > PRIVATE int start_of_line (HTStream * me, const char * b, int l); > PRIVATE int seen_dash (HTStream * me, const char * b, int l); > PRIVATE int seen_doubledash (HTStream * me, const char * b, int l); > PRIVATE int seen_delimiter_nonterminal(HTStream * me, const char * b, int l); > PRIVATE int seen_delimiter_nonterminal_CR(HTStream * me, const char * b, int l); > PRIVATE int seen_delimiter_dash(HTStream * me, const char * b, int l); > PRIVATE int seen_delimiter_terminal(HTStream * me, const char * b, int l); > PRIVATE int seen_delimiter_terminal_CR(HTStream * me, const char * b, int l); > PRIVATE int not_delimiter(HTStream * me, const char * b, int l, int extra); > PRIVATE int seen_nothing(HTStream * me, const char * b, int l); > PRIVATE int seen_cr(HTStream * me, const char * b, int l); > PRIVATE void process_boundary(HTStream *me, int isterminal); > > #define UNUSED(l) (l=l) /* Shut up about unused variables */ > 50,79c76,179 < const char *start = b; < const char *end = b; < while (l-- > 0) { < if (me->state == EOL_FCR) { < me->state = (*b == LF) ? EOL_FLF : EOL_BEGIN; < } else if (me->state == EOL_FLF) { < if (me->dash == 2) { < while (l>0 && *me->bpos && *me->bpos==*b) l--, me->bpos++, b++; < if (!*me->bpos) { < HTTRACE(STREAM_TRACE, "Boundary.... `%s\' found\n" _ me->boundary); < me->bpos = me->boundary; < me->body = YES; < me->state = EOL_DOT; < } else if (l>0) { < me->dash = 0; < me->bpos = me->boundary; < me->state = EOL_BEGIN; < } < } < if (*b == '-') { < me->dash++; < } else if (*b != CR && *b != LF) { < me->dash = 0; < me->state = EOL_BEGIN; < } < } else if (me->state == EOL_SLF) { /* Look for closing '--' */ < if (me->dash == 4) { < if (end > start) { < int status = PUTBLOCK(start, end-start); < if (status != HT_OK) return status; --- > /* > ** The HTBoundary object gets attached downstream of HTMime. > ** The HTBoundary object creates another HTMime object downstream of > ** the HTBoundary object. > ** > ** When we push data downstream to the second HTBoundary object, it > ** updates the bytes read count in the HTNet object. > ** > ** When we return to the parent HTMime object, itupdates the > ** bytes read count in the HTNet object again. Oops. > ** > ** Same thing happens with the consumed byte count. We can prevent > ** the consumed byte counts from being updated by temporary setting > ** the input channel stream pointer to NULL, but for the byte counts > ** we have to save them and restore them before existing. > ** > ** This bug was discovered by chance when a multipart/partial response > ** was partially received, and as a result of double-counting the > ** real response got cut off (because HTMime thought that more bytes > ** were processed than actually were, thus it processed only the > ** partial count of the remaining bytes in the response). When the > ** multipart/partial response was received all at once this bug did > ** not get triggered. > */ > > HTHost *host=HTNet_host(me->net); > HTChannel *c=HTHost_channel(host); > HTInputStream *i=HTChannel_input(c); > > long saveBytesRead=HTNet_bytesRead(me->net); > long saveHeaderBytesRead=HTNet_headerBytesRead(me->net); > > if (i) > HTChannel_setInput(c, NULL); > > HTTRACE(STREAM_TRACE, "Boundary: processing %d bytes\n" _ l); > /* Main loop consumes all input */ > > while (l) > { > int n= (*me->state)(me, b, l); > > if (n == 0) > return HT_ERROR; > b += n; > l -= n; > } > > if (i) > HTChannel_setInput(c, i); > HTNet_setBytesRead(me->net, saveBytesRead); > HTNet_setHeaderBytesRead(me->net, saveHeaderBytesRead); > > return HT_OK; > } > > /* > ** Start of line, keptcrlf=YES if we've kept the preceding CRLF from downstream > ** and we'll pass it along if we decide that this is not a boundary delimiter. > */ > > PRIVATE int start_of_line (HTStream * me, const char * b, int l) > { > if (*b != '-') > return not_delimiter(me, b, l, 0); > > HTTRACE(STREAM_TRACE, "Boundary: start of line: input '-'\n"); > > me->state= seen_dash; > > return 1; > } > > /* > ** Line: - > */ > > PRIVATE int seen_dash (HTStream * me, const char * b, int l) > { > if (*b != '-') > return not_delimiter(me, b, l, 1); > > HTTRACE(STREAM_TRACE, "Boundary: start of line: input '--'\n"); > > me->state= seen_doubledash; > me->boundary_ptr=me->boundary; > return 1; > } > > /* > ** Line: -- > */ > > PRIVATE int seen_doubledash (HTStream * me, const char * b, int l) > { > me->state=seen_doubledash; > > if (*me->boundary_ptr) > { > if (*b != *me->boundary_ptr) > { > return not_delimiter(me, b, l, > me->boundary_ptr - me->boundary > + 2); 81,96c181,411 < HTTRACE(STREAM_TRACE, "Boundary.... Ending\n"); < start = b; < me->dash = 0; < me->state = EOL_BEGIN; < } < if (*b == '-') { < me->dash++; < } else if (*b != CR && *b != LF) { < me->dash = 0; < me->state = EOL_BEGIN; < } < me->body = NO; < } else if (me->state == EOL_DOT) { < int status; < if (me->body) { < if (me->target) FREE_TARGET; --- > ++me->boundary_ptr; > return 1; > } > > /* > ** Line: --delimiter > */ > > if (*b == '-') > { > HTTRACE(STREAM_TRACE, > "Boundary: start of line: input '--%s-'\n" > _ me->boundary); > > me->state=seen_delimiter_dash; > return 1; > } > > HTTRACE(STREAM_TRACE, > "Boundary: Found: '--%s'\n" _ me->boundary); > > return seen_delimiter_nonterminal(me, b, l); > } > > /* > ** Line: --delimiter > ** > ** Waiting for CRLF. > */ > > > PRIVATE int seen_delimiter_nonterminal(HTStream * me, const char * b, int l) > { > UNUSED(l); > > me->state=seen_delimiter_nonterminal; > if (*b == CR) > me->state=seen_delimiter_nonterminal_CR; > > return 1; > } > > /* > ** Line: --delimiter<CR> > */ > > PRIVATE int seen_delimiter_nonterminal_CR(HTStream * me, const char * b, int l) > { > HTTRACE(STREAM_TRACE, > "Boundary: Found: '--%s<CR>'\n" _ me->boundary); > > if (*b != LF) > return seen_delimiter_nonterminal(me, b, l); > > HTTRACE(STREAM_TRACE, > "Boundary: Found: '--%s<CR><LF>'\n" _ me->boundary); > > process_boundary(me, NO); > return 1; > } > > /* > ** Line: --delimiter- > */ > > PRIVATE int seen_delimiter_dash(HTStream * me, const char * b, int l) > { > if (*b != '-') > return seen_delimiter_nonterminal(me, b, l); > > HTTRACE(STREAM_TRACE, > "Boundary: start of line: input '--%s--'\n" > _ me->boundary); > > me->state=seen_delimiter_terminal; > return 1; > } > > /* > ** Line: --delimiter-- > */ > > PRIVATE int seen_delimiter_terminal(HTStream * me, const char * b, int l) > { > UNUSED(l); > > me->state=seen_delimiter_terminal; > > if (*b == CR) > me->state=seen_delimiter_terminal_CR; > return 1; > } > /* > ** Line: --delimiter--<CR> > */ > > PRIVATE int seen_delimiter_terminal_CR(HTStream * me, const char * b, int l) > { > HTTRACE(STREAM_TRACE, > "Boundary: Found '--%s--<CR>'\n" > _ me->boundary); > > if (*b != LF) > return seen_delimiter_terminal(me, b, l); > HTTRACE(STREAM_TRACE, > "Boundary: Found '--%s--<CR><LF>'\n" > _ me->boundary); > > process_boundary(me, YES); > return 1; > } > > /* > ** Beginning of the line does not contain a delimiter. > ** > ** > ** extra: Count of characters in a partially matched delimiter. Since it's > ** not a delimiter this is content that needs to go downstream. > */ > > PRIVATE int not_delimiter(HTStream * me, const char * b, int l, int extra) > { > HTTRACE(STREAM_TRACE, "Boundary: not a delimiter line\n"); > > if (me->keptcrlf) > { > HTTRACE(STREAM_TRACE, "Boundary: Sending previous line's <CR><LF>\n"); > /* > ** Did not process CRLF from previous line, because prev CRLF > ** is considered a part of the delimiter. See MIME RFC. > */ > > me->keptcrlf=NO; > if (PUTBLOCK("\r\n", 2) != HT_OK) > return 0; > } > > /* > ** Potentially matched some of: --DELIMITER > */ > > if (extra) > { > HTTRACE(STREAM_TRACE, "Boundary: Sending partially-matched %d characters\n" _ extra); > > if (PUTBLOCK("--", extra > 2 ? 2:extra) != HT_OK) > return 0; > > if (extra > 2) > if (PUTBLOCK(me->boundary, extra-2) != HT_OK) > return 0; > } > return seen_nothing(me, b, l); > } > > /* > ** We're not looking for a delimiter. Look for the next line of input > ** in the data that could potentially be a delimiter. > */ > > PRIVATE int seen_nothing(HTStream * me, const char * b, int l) > { > int i; > > me->state=seen_nothing; > > for (i=0; i<l; i++) > { > if (b[i] != CR) > continue; > > /* > ** If we have at least four more characters in unconsumed > ** input, and they're not \r\n--, we can safely skip over > ** them. > */ > > if (l-i > 4 && > strncmp(b+i, "\r\n--", 4)) > continue; > break; > } > > if (i == 0) > { > /* Could only be a CR here. */ > > me->state=seen_cr; > return 1; > } > > HTTRACE(STREAM_TRACE, "Boundary: Processed %d (out of %d) bytes\n" > _ i _ l); > > if (PUTBLOCK(b, i) != HT_OK) > return 0; > > return i; > } > > /* > ** State: seen a CR > */ > > PRIVATE int seen_cr(HTStream * me, const char * b, int l) > { > HTTRACE(STREAM_TRACE, "Boundary: Processed <CR>\n"); > > if (*b != LF) > { > HTTRACE(STREAM_TRACE, "Boundary: ... <LF> didn't follow\n"); > if (PUTBLOCK("\r", 1) != HT_OK) > return 0; > return seen_nothing(me, b, l); > } > > HTTRACE(STREAM_TRACE, "Boundary: Processed <CR><LF>\n"); > me->state=start_of_line; > me->keptcrlf=YES; > return 1; > } > > PRIVATE void process_boundary(HTStream *me, int isterminal) > { > HTBoundary_flush(me); > if (me->target) FREE_TARGET; > me->target=NULL; > me->state=start_of_line; > me->keptcrlf=NO; > > if (!isterminal) 100,121d414 < if (end > start) { < if ((status = PUTBLOCK(start, end-start)) != HT_OK) < return status; < } < } else { < if (me->debug) < if ((status = PUTDEBUG(start, end-start)) != HT_OK) < return status; < } < start = b; < if (*b == '-') me->dash++; < me->state = EOL_SLF; < } else if (*b == CR) { < me->state = EOL_FCR; < end = b; < } else if (*b == LF) { < if (me->state != EOL_FCR) end = b; < me->state = EOL_FLF; < } < b++; < } < return (start<b && me->body) ? PUTBLOCK(start, b-start) : HT_OK; 123a417 > 136c430,432 < return (*me->target->isa->flush)(me->target); --- > if (me->target == NULL) > return HT_OK; > return (*me->target->isa->flush)(me->target); 184a481,484 > > UNUSED(param); > UNUSED(input_format); > 190c490,491 < me->request = request; --- > me->net = HTRequest_net(request); > me->request = request; 194c495,498 < me->state = EOL_FLF; --- > > me->state = start_of_line; > me->keptcrlf=NO; > 196c500 < me->bpos = me->boundary; --- > We might want to apply that to our sandboxes and see if anything else breaks . . . more, l8r, v -- "The future is here. It's just not evenly distributed yet." -- William Gibson, quoted by Whitfield Diffie
Received on Saturday, 15 October 2005 16:28:55 UTC