W3C home > Mailing lists > Public > www-lib@w3.org > July to September 2000

Setting the size of a chunk + some optimizations

From: Wayne Davison <wayne@clari.net>
Date: Mon, 7 Aug 2000 17:32:08 -0700 (PDT)
To: www-lib@w3.org
Message-ID: <Pine.GSO.4.21.0008071137270.4520-100000@house.clari.net>
In some code of mine that uses an HTChunk object, I needed to be able
to set the size of the chunk upwards so that I could read some data
into it directly.  While I was adding this function, I noticed that
both HTChunk_clear() and HTChunk_truncate() zeroed-out all of the
allocated memory (which is typically a waste of time unless the
buffer is being written to outside of the indicated size).  Since
the code that appends strings onto the buffer assumes that the data
outside the size is all zeros, I think that it is a good idea to
provide a way to zero just the "size" bytes (while still providing a
way to zero all the allocated bytes, if you so desire).

The appended patch makes the following changes:

 + The new function HTChunk_setSize() is added.  This allows you to
   size a chunk upwards or downwards.  (HTChunk_truncate() still
   exists and still enforces the sizing of the string downwards.)
   Both these string-sizing functions only zero the relevant bytes
   in the "size" region (if the string is being shortened).  Note
   that HTChunk_clear() was left unchanged -- it still clears all of
   the Chunk's allocated memory.

 + Fixed the HT_OUTOFMEM() calls in HTChunk_ensure() to have the
   proper location string.

 + Some code in HTBind.c was optimized to use HTChunk_truncate().

 + HTFTP.c and HTNews.c was optimized to use HTChunk_setSize() rather
   than using HTChunk_ensure() and writing data past the "size".

 + Optimized the code in HTMIME.c and HTML.c to use
   HTChunk_truncate(chunk,0) rather than HTChunk_clear(chunk).

 + Documented the new HTChunk_setSize() function.

 + Improved the comments for HTChunk_clear().

 + Fixed the comments for HTChunk_ensure() (it was describing the
   extra-size parameter incorrectly).

 + Fixed the comments for HTChunk_toString() (to indicate that the
   function destroys the chunk when handing off the CString data).

 + Fixed a few spelling errors in HTChunk.html.

There are probably other places in the code that could be optimized
to use truncate rather than clear, but I didn't have time to check
each one to see if the chunk's data was properly respecting the size
indicator or not.

..wayne..

---8<------8<------8<------8<---cut here--->8------>8------>8------>8---
Index: Library/src/HTBind.c
@@ -228,7 +228,7 @@
 	    for (; *ptr; ptr++)
 		*ptr = TOLOWER(*ptr);
 	    suff->type = HTAtom_for(HTChunk_data(chunk));
-	    HTChunk_clear(chunk);
+	    HTChunk_truncate(chunk,0);
 	}
 	if (encoding) {
 	    HTChunk_puts(chunk, encoding);
@@ -236,7 +236,7 @@
 	    for (; *ptr; ptr++)
 		*ptr = TOLOWER(*ptr);
 	    suff->encoding = HTAtom_for(HTChunk_data(chunk));
-	    HTChunk_clear(chunk);
+	    HTChunk_truncate(chunk,0);
 	}
 	if (transfer) {
 	    HTChunk_puts(chunk, transfer);
@@ -244,7 +244,7 @@
 	    for (; *ptr; ptr++)
 		*ptr = TOLOWER(*ptr);
 	    suff->transfer = HTAtom_for(HTChunk_data(chunk));
-	    HTChunk_clear(chunk);
+	    HTChunk_truncate(chunk,0);
 	}
 	if (language) {
 	    HTChunk_puts(chunk, language);
@@ -252,7 +252,7 @@
 	    for (; *ptr; ptr++)
 		*ptr = TOLOWER(*ptr);
 	    suff->language = HTAtom_for(HTChunk_data(chunk));
-	    HTChunk_clear(chunk);
+	    HTChunk_truncate(chunk,0);
 	}
 	HTChunk_delete(chunk);
 	suff->quality = value;
Index: Library/src/HTChunk.c
@@ -74,16 +74,31 @@
     return ch ? ch->size : -1;
 }
 
-PUBLIC BOOL HTChunk_truncate (HTChunk * ch, int position)
+PUBLIC BOOL HTChunk_truncate (HTChunk * ch, int length)
 {
-    if (ch && position>=0 && position < ch->size) {
-	ch->size = position;
-	if (ch->data) memset(ch->data+position, '\0', ch->allocated-position);
+    if (ch && length >= 0 && length < ch->size) {
+	memset(ch->data+length, '\0', ch->size-length);
+	ch->size = length;
 	return YES;
     }
     return NO;
 }
 
+/* Set the "size" of the Chunk's data.  The actual allocated length must
+ * be at least 1 byte longer to hold the mandatory null terminator. */
+PUBLIC BOOL HTChunk_setSize (HTChunk * ch, int length)
+{
+    if (ch && length >= 0) {
+	if (length < ch->size)
+	    memset(ch->data+length, '\0', ch->size-length);
+	else if (length >= ch->allocated)
+	    HTChunk_ensure(ch, length - ch->size);
+	ch->size = length;
+	return YES;
+    }
+    return NO;
+}
+
 /*	Create a chunk from an allocated string
 **	---------------------------------------
 */
@@ -175,17 +190,17 @@
 */
 PUBLIC void HTChunk_ensure (HTChunk * ch, int len)
 {
-    if (ch && len) {
+    if (ch && len > 0) {
 	int needed = ch->size+len;
 	if (needed >= ch->allocated) {
 	    ch->allocated = needed - needed%ch->growby + ch->growby;
 	    if (ch->data) {
 		if ((ch->data = (char  *) HT_REALLOC(ch->data, ch->allocated)) == NULL)
-		    HT_OUTOFMEM("HTChunk_putb");
+		    HT_OUTOFMEM("HTChunk_ensure");
 	        memset((void *) (ch->data + ch->size), '\0', ch->allocated-ch->size);
 	    } else {
 		if ((ch->data = (char  *) HT_CALLOC(1, ch->allocated)) == NULL)
-		    HT_OUTOFMEM("ch->data ");
+		    HT_OUTOFMEM("HTChunk_ensure");
 	    }
 	}
     }
Index: Library/src/HTChunk.html
@@ -16,8 +16,8 @@
 <P>
 The Chunk Class defines a way to automatically handle dynamic strings and
 other data types. You create a chunk with an initial size and it will then
-automatically grow to accomodate added data to the chunk. It is a general
-utility module. It is garanteed that the array is <CODE>'\0' </CODE>terminated
+automatically grow to accommodate added data to the chunk. It is a general
+utility module. It is guaranteed that the array is <CODE>'\0' </CODE>terminated
 at all times (and hence is a valid C type string). The method
 <A HREF="HTChunk.html#Terminate">HTChunkTerminate</A> can be used to explicitly
 add a terminating <CODE>'\0'</CODE> and then to include this character in
@@ -60,7 +60,10 @@
 <P>
 Keep the chunk in memory but clear all data kept inside. This can be used
 if you know that you can reuse the allocated memory instead of allocating
-new memory.
+new memory.  This zeros out all the allocated data (even data past the
+indicated size) and sets the size of the chunk to 0.  If you have not used
+any bytes past the indicated size, it is more efficient to truncate the
+chunk to 0 instead.
 <PRE>
 extern void HTChunk_clear (HTChunk * ch);
 </PRE>
@@ -68,11 +71,13 @@
   Ensure a Chunk has a Certain Amount of Free Space
 </H2>
 <P>
-Make sure that a chunk has a certain size. If this is not the case then the
-chunk is expanded. Nothing is done if the current size if bigger than the
-size requested.
+Make sure that a chunk has enough memory allocated to grow by the
+indicated extra size. If this is not the case, then the chunk is expanded
+(in multiples of the chunk's "growby" size).  Nothing is done if the
+current size plus the requested extra space fits within the chunk's
+currently allocated memory.
 <PRE>
-extern void HTChunk_ensure (HTChunk * ch, int s);
+extern void HTChunk_ensure (HTChunk * ch, int extra_size);
 </PRE>
 <H2>
   Append a character to a chunk
@@ -118,15 +123,19 @@
 extern int HTChunk_size (HTChunk * ch);
 </PRE>
 <H2>
-  Truncate Chunk
+  Setting the Size of a Chunk
 </H2>
 <P>
-If for some reason you want to cut off a piece of a chunk then you can
-use this function. It sets the size of the chunk to be
-<CODE>position</CODE>. Clearing the chunk is equivalent to a position
-of 0.
+If you want to cut off a piece of a chunk or extend it to make room
+for some direct buffer manipulation, then you can use one of these
+functions.  Both of these calls set the size of the chunk to be
+<CODE>size</CODE>, but the truncate call only allows you to make the
+string shorter. If the string is made shorter, the formerly-used bytes
+are cleared, so truncating a chunk to 0 is analogous to clearing it,
+but slightly more efficient.
 <PRE>
-extern BOOL HTChunk_truncate (HTChunk * ch, int position);
+extern BOOL HTChunk_truncate (HTChunk * ch, int size);
+extern BOOL HTChunk_setSize (HTChunk * ch, int size);
 </PRE>
 <H2>
   Zero Terminate a chunk
@@ -144,9 +153,9 @@
 </H2>
 <P>
 A Chunk may be built from an allocated string. The chunk assumes control
-of the passed string, elminating the need for additional allocations and
+of the passed string, eliminating the need for additional allocations and
 string copies.<BR>
-Once a string is built, the chunk may be destroyed and the string kept around.
+When you take control of the CString from a chunk, it is destroyed.
 <PRE>
 extern HTChunk * HTChunk_fromCString	(char * str, int grow);
 extern char * HTChunk_toCString		(HTChunk * ch);
Index: Library/src/HTFTP.c
@@ -387,8 +387,7 @@
 {
     int len = strlen(token) + (pars ? strlen(pars)+1:0) + 2;
     HTStream * input = HTRequest_inputStream(request);
-    HTChunk_clear(ctrl->cmd);
-    HTChunk_ensure(ctrl->cmd, len);
+    HTChunk_setSize(ctrl->cmd, len);
     if (pars && *pars)
 	sprintf(HTChunk_data(ctrl->cmd), "%s %s%c%c", token, pars, CR, LF);
     else
Index: Library/src/HTMIME.c
@@ -356,8 +356,8 @@
                         }
 		    }
 	        } else {				/* EOL_LINE */
-		    HTChunk_clear(me->token);
-		    HTChunk_clear(me->value);
+		    HTChunk_truncate(me->token,0);
+		    HTChunk_truncate(me->value,0);
 		    me->haveToken = NO;
 		    me->hash = 0;
 		    value = NULL;
Index: Library/src/HTML.c
@@ -420,7 +420,7 @@
 	break;
 
     case HTML_TITLE:
-        HTChunk_clear(me->title);
+        HTChunk_truncate(me->title,0);
 	break;
     }
 
Index: Library/src/HTNews.c
@@ -308,8 +308,7 @@
 {
     HTStream * input = HTRequest_inputStream(request);
     int len = strlen(token) + (pars ? strlen(pars)+1:0) + 2;
-    HTChunk_clear(news->cmd);
-    HTChunk_ensure(news->cmd, len);
+    HTChunk_setSize(news->cmd, len);
     if (pars && *pars)
 	sprintf(HTChunk_data(news->cmd), "%s %s%c%c", token, pars, CR, LF);
     else
---8<------8<------8<------8<---cut here--->8------>8------>8------>8---
Received on Monday, 7 August 2000 20:32:14 GMT

This archive was generated by hypermail 2.2.0+W3C-0.50 : Monday, 23 April 2007 18:18:37 GMT