To: marca@ncsa.uiuc.edu Subject: xmosaic & libHTML: it works! Date: Fri, 29 Jan 93 14:41:57 CST From: Dan Connolly I replaced the HTMLparse.c file in your distribution with one that uses libHTML to do most of the dirty work. It builds the same data structures your parsing code built (modulo some whitespace, I think.) This new version of the HTMLParse routine puts all the whitespace returned by the SGML parser in the mark->text. I think your routines used to strip some whitespace out based on the preformat variable. You'll have to change either the data() or save_text() routines in HTMLparse.c or your routines in HTMLformat.c to deal with the different handling of whitespace. You can get libHTML on info.cern.ch in pub/www/dev/libHTML-930121.tar.Z I had to change HTML.h to use upper-case for the element names to be consistent with the way libHTML handles names. I hope you'll be able to use this parsing code! It handles all the comments and other lexical wieriosities of SGML. Oh! I made kind of a gross hack to deal with attributes. libHTML parses attributes and returns an array of name/value pairs (each its own char*). I didn't want to stringify all the values and stick them in mark->start, only to have them parsed back out again (incorrectly, I might add :-). So I just stored the value of the HREF attribute on A tags in mark->start. All other attributes are lost. We should probably come up with a better strategy to save the attributes in the mark_up structure so the formatter will have them. Dan # This is a shell archive. Remove anything before this line, # then unpack it by saving it in a file and typing "sh file". # # Wrapped by imagine!connolly on Fri Jan 29 14:32:33 CST 1993 # Contents: HTML.h HTMLparse.c HTMLformat.c echo x - HTML.h sed 's/^@//' > "HTML.h" <<'@//E*O*F HTML.h//' #ifndef HTML_H #define HTML_H #ifdef MOTIF #include #else #include #include #endif /* MOTIF */ #include /* * Public functions */ #ifdef _NO_PROTO extern char *HTMLGetText (); #else extern char *HTMLGetText (Widget w); #endif /* _NO_PROTO */ /* * defines and structures used for the formatted element list */ #define E_TEXT 1 #define E_BULLET 2 #define E_LINEFEED 3 struct ele_rec { int type; XFontStruct *font; Boolean selected; Boolean internal; int x, y; int line_number; unsigned long fg; unsigned long bg; char *anchorHRef; char *edata; int edata_len; struct ele_rec *next; struct ele_rec *prev; }; struct page_rec { int pnum; int pheight; int line_num; struct ele_rec *elist; }; /* * defines and structures used for the HTML parser, and the * parsed object list. */ /* Mark types */ #define M_UNKNOWN -1 #define M_NONE 0 #define M_TITLE 1 #define M_HEADER_1 2 #define M_HEADER_2 3 #define M_HEADER_3 4 #define M_HEADER_4 5 #define M_HEADER_5 6 #define M_HEADER_6 7 #define M_ANCHOR 8 #define M_PARAGRAPH 9 #define M_ADDRESS 10 #define M_PLAIN_TEXT 11 #define M_UNUM_LIST 12 #define M_LIST_ITEM 13 #define M_DESC_LIST 14 #define M_DESC_TITLE 15 #define M_DESC_TEXT 16 #define M_PREFORMAT 17 #define M_PLAIN_FILE 18 /* syntax of Mark types */ #define MT_TITLE "TITLE" #define MT_HEADER_1 "H1" #define MT_HEADER_2 "H2" #define MT_HEADER_3 "H3" #define MT_HEADER_4 "H4" #define MT_HEADER_5 "H5" #define MT_HEADER_6 "H6" #define MT_ANCHOR "A" #define MT_PARAGRAPH "P" #define MT_ADDRESS "ADDRESS" #define MT_PLAIN_TEXT "XMP" #define MT_UNUM_LIST "UL" #define MT_LIST_ITEM "LI" #define MT_DESC_LIST "DL" #define MT_DESC_TITLE "DT" #define MT_DESC_TEXT "DD" #define MT_PREFORMAT "PRE" #define MT_PLAIN_FILE "PLAINTEXT" /* amperstand escapes */ #define A_LESS_THAN "<" #define A_GREATER_THAN ">" #define A_AMPERSTAND "&" /* anchor tags */ #define AT_NAME "name" #define AT_HREF "href" struct mark_up { int type; int is_end; char *start; char *text; char *end; struct mark_up *next; }; /* * New resource names */ #define WbNmarginWidth "marginWidth" #define WbNmarginHeight "marginHeight" #define WbNtext "text" #define WbNanchorColor "anchorColor" #define WbNactiveAnchorFG "activeAnchorFG" #define WbNactiveAnchorBG "activeAnchorBG" #define WbNautoSize "autoSize" #define WbNtitleFont "titleFont" #define WbNheader1Font "header1Font" #define WbNheader2Font "header2Font" #define WbNheader3Font "header3Font" #define WbNheader4Font "header4Font" #define WbNheader5Font "header5Font" #define WbNheader6Font "header6Font" #define WbNaddressFont "addressFont" #define WbNplainFont "plainFont" #define WbNanchorCallback "anchorCallback" #define WbNdocumentPageCallback "documentPageCallback" #define WbNpageHeight "pageHeight" /* * New resource classes */ #define WbCMarginWidth "MarginWidth" #define WbCMarginHeight "MarginHeight" #define WbCText "Text" #define WbCAnchorColor "AnchorColor" #define WbCActiveAnchorFG "ActiveAnchorFG" #define WbCActiveAnchorBG "ActiveAnchorBG" #define WbCAutoSize "AutoSize" #define WbCTitleFont "TitleFont" #define WbCHeader1Font "Header1Font" #define WbCHeader2Font "Header2Font" #define WbCHeader3Font "Header3Font" #define WbCHeader4Font "Header4Font" #define WbCHeader5Font "Header5Font" #define WbCHeader6Font "Header6Font" #define WbCAddressFont "AddressFont" #define WbCPlainFont "PlainFont" #define WbCPageHeight "PageHeight" typedef struct _HTMLClassRec *HTMLWidgetClass; typedef struct _HTMLRec *HTMLWidget; extern WidgetClass htmlWidgetClass; #endif /* HTML_H */ @//E*O*F HTML.h// chmod u=rw,g=r,o=r HTML.h echo x - HTMLparse.c sed 's/^@//' > "HTMLparse.c" <<'@//E*O*F HTMLparse.c//' #include #include "HTML.h" #include "HMDoc.h" #include "HTMLdtd.h" #include "object.h" extern void FreeObjList(); extern struct mark_up *AddObj(); static HMStartTagProc startTag; static HMEndTagProc endTag; static HMDataProc data; HMDoc_Class MarkList = {0, 0, 0, startTag, endTag, data, html_entity_text}; static HMGetcProc buffer_getc; static void save_text(HMDoc* doc); struct _HMDoc{ int preformat; char* nextchar; char* plaintext; char* buffer; int cur, tot; /* size_t? @@*/ struct mark_up *list; struct mark_up *current; }; /* * Main parser of HTML text. Takes raw text, and produces a linked * list of mark objects. Mark objects are either text strings, or * starting and ending mark delimiters. * The old list is passed in so it can be freed, and in the future we * may want to add code to append to the old list. */ struct mark_up * HTMLParse(old_list, str) struct mark_up *old_list; char *str; { HMDoc doc; /* * Free up the previous Object List if one exists */ if(old_list) FreeObjList(old_list); if (str == NULL){ return(NULL); } doc.nextchar = str; doc.buffer = 0; doc.cur = doc.tot = 0; doc.plaintext = 0; doc.list = NULL; doc.current = NULL; doc.preformat = 0; SGML_parseInstance((HMStream)&(doc.nextchar), buffer_getc, &doc, &MarkList); save_text(&doc); if(doc.plaintext){ /* * If last tag was we lump all the rest of * the text in. */ char* text = doc.plaintext; char* tptr = (char *)malloc(strlen(text) + 1); struct mark_up *mark = (struct mark_up *)malloc(sizeof(struct mark_up)); if (tptr == NULL) { fprintf(stderr, "Cannot malloc space for text\n"); return(doc.list); } strcpy(tptr, text); text = tptr; if (mark == NULL) { fprintf(stderr, "Cannot malloc for mark_up struct\n"); return(doc.list); } mark->type = M_NONE; mark->is_end = 0; mark->start = NULL; mark->text = text; mark->end = NULL; mark->next = NULL; doc.current = AddObj(&(doc.list), doc.current, mark, 1); } return doc.list; } static int buffer_getc(HMStream in) { unsigned char** p = (unsigned char**)in; int c = **p; if(c == 0) /* end of string*/ return EOF; else{ /* bump buffer pointer */ (*p)++; return c; } } static void save_text(HMDoc* doc) { if(doc->cur>0){ struct mark_up *mark = NEW(struct mark_up, 1); mark->type = M_NONE; mark->is_end = 0; mark->start = 0; mark->text = doc->buffer; mark->end = 0; mark->next = 0; doc->current = AddObj(&(doc->list), doc->current, mark, doc->preformat); doc->buffer = 0; doc->cur = doc->tot = 0; } } static int startTag(HMDoc* doc, const char* gi, const HMBinding* attrs, int nattrs) { struct mark_up *mark = NEW(struct mark_up, 1); save_text(doc); mark->type = ChooseMarkType(gi); mark->is_end = 0; switch(mark->type){ caseM_PLAIN_FILE: /* <PLAINTEXT> ends SGML text entity */ doc->plaintext = doc->nextchar; doc->nextchar = ""; /* simulate EOF on next getchar */ break; case M_PLAIN_TEXT: case M_PREFORMAT: doc->preformat = 1; break; } mark->start = 0; if(mark->type == M_ANCHOR){ int i; for(i = 0; i<nattrs; i++){ if(!strcmp(attrs[i].name, "HREF")){ mark->start = strdup(attrs[i].value); /* check return val@@ */ break; } } } mark->end = 0; mark->text = 0; doc->current = AddObj(&(doc->list), doc->current, mark, doc->preformat); return HTML_content(gi); } static void endTag(HMDoc* doc, const char* gi) { struct mark_up *mark = NEW(struct mark_up, 1); save_text(doc); mark->type = ChooseMarkType(gi); mark->is_end = 1; mark->start = 0; mark->end = 0; /*@@*/ mark->text = 0; doc->current = AddObj(&(doc->list), doc->current, mark, doc->preformat); switch(mark->type){ case M_PLAIN_TEXT: /*@@ any end tag should end <XMP> and <LITERAL> */ case M_PREFORMAT: doc->preformat = 0; break; } } static void data(HMDoc* doc, const char* chars, int nchars) { if(nchars + doc->cur >= doc->tot){ /* no room in the buffer! */ /* allocate some more; remember how much */ char* newbuf = NEW(char, (doc->tot = ((doc->tot * 3/2) + nchars + 1))); /* copy the old stuff if necessary */ if(doc->cur > 0){ strcpy(newbuf, doc->buffer); free(doc->buffer); } doc->buffer = newbuf; } /* copy the new stuff */ memcpy(doc->buffer + doc->cur, chars, nchars); doc->cur += nchars; doc->buffer[doc->cur] = 0; /* null terminate the buffer */ } /* * Determine mark type from the identifier */ int ChooseMarkType(str) char *str; { if (str == NULL) { return(M_NONE); } if (!strcmp(str, MT_TITLE)) { return(M_TITLE); } else if (!strcmp(str, MT_HEADER_1)) { return(M_HEADER_1); } else if (!strcmp(str, MT_HEADER_2)) { return(M_HEADER_2); } else if (!strcmp(str, MT_HEADER_3)) { return(M_HEADER_3); } else if (!strcmp(str, MT_HEADER_4)) { return(M_HEADER_4); } else if (!strcmp(str, MT_HEADER_5)) { return(M_HEADER_5); } else if (!strcmp(str, MT_HEADER_6)) { return(M_HEADER_6); } else if (!strcmp(str, MT_ADDRESS)) { return(M_ADDRESS); } else if (!strcmp(str, MT_PLAIN_TEXT)) { return(M_PLAIN_TEXT); } else if (!strcmp(str, MT_PLAIN_FILE)) { return(M_PLAIN_FILE); } else if (!strcmp(str, MT_PARAGRAPH)) { return(M_PARAGRAPH); } else if (!strcmp(str, MT_UNUM_LIST)) { return(M_UNUM_LIST); } else if (!strcmp(str, MT_LIST_ITEM)) { return(M_LIST_ITEM); } else if (!strcmp(str, MT_DESC_LIST)) { return(M_DESC_LIST); } else if (!strcmp(str, MT_DESC_TITLE)) { return(M_DESC_TITLE); } else if (!strcmp(str, MT_DESC_TEXT)) { return(M_DESC_TEXT); } else if (!strcmp(str, MT_PREFORMAT)) { return(M_PREFORMAT); } else if (!strcmp(str, MT_ANCHOR)) { return(M_ANCHOR); } else{ #ifdef VERBOSE fprintf(stderr, "warning: unknown mark (%s)\n", str); #endif return(M_UNKNOWN); } } @//E*O*F HTMLparse.c// chmod u=rw,g=r,o=r HTMLparse.c echo x - HTMLformat.c sed 's/^@//' > "HTMLformat.c" <<'@//E*O*F HTMLformat.c//' #ifdef TIMING #include <sys/time.h> struct timeval Tv; struct timezone Tz; #endif #include <stdio.h> #include "HTMLP.h" #define INDENT_SPACES 2 #define D_NONE 0 #define D_TITLE 1 #define D_TEXT 2 extern struct ele_rec *AddEle(); extern void FreeLineList(); extern void FreeObjList(); extern int SwapElements(); extern struct ele_rec **MakeLineList(); extern struct mark_up *HTMLParse(); static XFontStruct *currentFont; static XFontStruct *saveFont; static unsigned long Fg; static unsigned long Bg; static int Width; static int LineNumber; static int LineHeight; static int TextIndent; static int MarginW; static int Centered; static int Ignore; static int Preformat; static int PF_LF_State; /* Pre-formatted linefeed state. Hack for bad HTMLs */ static int DescType; /* D_NONE, D_TITLE, D_TEXT */ static Boolean NeedSpace; static Boolean Internal; static int CenterWidth; static int CenterX; static int CenterY; static struct ele_rec *CenterStart; static struct ele_rec *CenterEnd; static struct ele_rec *Current; static char *AnchorText; static struct mark_up *Last; /* * Create a formatted element */ struct ele_rec * CreateElement(type, fp, x, y, edata) int type; XFontStruct *fp; int x, y; char *edata; { struct ele_rec *eptr; eptr = (struct ele_rec *)malloc(sizeof(struct ele_rec)); if (eptr == NULL) { fprintf(stderr, "Cannot allocate space for element buffer\n"); exit(1); } eptr->type = type; eptr->font = fp; eptr->selected = False; eptr->internal = Internal; eptr->x = x; eptr->y = y; eptr->line_number = LineNumber; eptr->fg = Fg; eptr->bg = Bg; switch(type) { case E_TEXT: eptr->edata_len = strlen(edata); eptr->edata = (char *)malloc(eptr->edata_len + 1); if (eptr->edata == NULL) { eptr->edata_len = 0; fprintf(stderr, "Cannot allocate space for copy of text element data\n"); exit(1); } strcpy(eptr->edata, edata); /* * if this is an anchor, puts its href value into * the element. */ if (AnchorText != NULL) { eptr->anchorHRef = strdup(AnchorText); } else { eptr->anchorHRef = NULL; } break; case E_BULLET: eptr->edata = NULL; eptr->edata_len = 0; eptr->anchorHRef = NULL; break; case E_LINEFEED: eptr->edata = NULL; eptr->edata_len = 0; /* * if this linefeed is part of a broken anchor put * its href value into the element so we can reconnect * it when activated. */ if (AnchorText != NULL) { eptr->anchorHRef = strdup(AnchorText); } else { eptr->anchorHRef = NULL; } break; default: fprintf(stderr, "CreateElement: Unknown type %d\n", type); eptr->edata = NULL; eptr->edata_len = 0; eptr->anchorHRef = NULL; break; } return(eptr); } /* * Set the formatted element into the format list. Use a pre-allocated * list position if possible, otherwise allocate a new list position. */ void SetElement(hw, type, fp, x, y, edata) HTMLWidget hw; int type; XFontStruct *fp; int x, y; char *edata; { struct ele_rec *eptr; int len; /* * There is not pre-allocated format list, or we have reached * the end of the pre-allocated list. Create a new element, and * add it. */ if ((hw->html.formatted_elements == NULL)|| ((Current != NULL)&&(Current->next == NULL))) { eptr = CreateElement(type, fp, x, y, edata); Current = AddEle(&(hw->html.formatted_elements), Current, eptr); return; } /* * If current is null, but we have a pre-allocated format list, then * this is the first SetElement() call for this formated text, and * we must set current to the head of the formatted list. Otherwise * we move current to the next pre-allocated list position. */ if (Current == NULL) { Current = hw->html.formatted_elements; } else { Current = Current->next; } eptr = Current; if (eptr == NULL) { fprintf(stderr, "SetElement: Error, setting a null element\n"); exit(1); } eptr->type = type; eptr->font = fp; eptr->selected = False; eptr->internal = Internal; eptr->x = x; eptr->y = y; eptr->line_number = LineNumber; eptr->fg = Fg; eptr->bg = Bg; switch(type) { case E_TEXT: len = strlen(edata); if (len > eptr->edata_len) { if (eptr->edata != NULL) { free((char *)eptr->edata); } eptr->edata = (char *)malloc(len + 1); if (eptr->edata == NULL) { eptr->edata_len = 0; fprintf(stderr, "Cannot allocate space for copy of text element data\n"); exit(1); } } eptr->edata_len = len; /* Trying to get around coredump... */ if (edata[0]) strcpy(eptr->edata, edata); else eptr->edata = strdup (" "); /* * if this is an anchor, puts its href value into * the element. */ if (eptr->anchorHRef != NULL) { free((char *)eptr->anchorHRef); } if (AnchorText != NULL) { eptr->anchorHRef = strdup(AnchorText); } else { eptr->anchorHRef = NULL; } break; case E_BULLET: if (eptr->edata != NULL) { free((char *)eptr->edata); } eptr->edata = NULL; eptr->edata_len = 0; if (eptr->anchorHRef != NULL) { free((char *)eptr->anchorHRef); } eptr->anchorHRef = NULL; break; case E_LINEFEED: if (eptr->edata != NULL) { free((char *)eptr->edata); } eptr->edata = NULL; eptr->edata_len = 0; /* * if this linefeed is part of a broken anchor put * its href value into the element so we can reconnect * it when activated. */ if (eptr->anchorHRef != NULL) { free((char *)eptr->anchorHRef); } if (AnchorText != NULL) { eptr->anchorHRef = strdup(AnchorText); } else { eptr->anchorHRef = NULL; } break; default: fprintf(stderr, "SetElement: Unknown type %d\n", type); if (eptr->edata != NULL) { free((char *)eptr->edata); } eptr->edata = NULL; eptr->edata_len = 0; if (eptr->anchorHRef != NULL) { free((char *)eptr->anchorHRef); } eptr->anchorHRef = NULL; break; } } /* * Initialize local static variables for beginning of a block * of centered text. */ void BeginCenter(x, y) int x, y; { CenterWidth = 0; CenterX = x; CenterY = y; CenterStart = Current; CenterEnd = CenterStart; } /* * Center the current line of text */ void FlushCenter(xp, yp) int *xp, *yp; { int x, y, offset; struct ele_rec *eptr; /* * If there is nothing to be centered, just return */ if (CenterStart == NULL) { return; } /* * CenterStart, and CenterEnd point to a list of formatted elements * which must be centered. Find center, and offset these elements * to place them there. */ CenterEnd = Current; x = MarginW + ((Width - (2 * MarginW) - CenterWidth) / 2); y = CenterY; offset = x - CenterStart->x; /* * Realign from CenterStart to CenterEnd */ eptr = CenterStart; while (eptr != NULL) { eptr->x = eptr->x + offset; if (eptr == CenterEnd) { break; } eptr = eptr->next; } /* * Clean up */ *xp = x + CenterWidth; *yp = eptr->y; CenterStart = NULL; CenterEnd = NULL; } /* * Place a linefeed at the end of a line. * Create and add the element record for it. */ void LinefeedPlace(hw, x, y) HTMLWidget hw; int *x, *y; { SetElement(hw, E_LINEFEED, currentFont, *x, *y, (char *)NULL); /* * After adding an element, we need to check, and if we are starting * a centered block, we need to save location info */ if (Centered == 1) { Centered = 2; BeginCenter(*x, *y); } } /* * We have encountered a line break. Incrment the line counter, * and move down some space. If we are centering, flush the line * just before this linefeed, and then continue. */ void LineFeed(hw, x, y) HTMLWidget hw; int *x, *y; { /* * Manipulate linefeed state for special pre-formatted linefeed * hack for broken HTMLs */ if (Preformat) { switch(PF_LF_State) { /* * First soft linefeed */ case 0: PF_LF_State = 1; break; /* * Collapse multiple soft linefeeds within a pre */ case 1: return; break; /* * Ignore soft linefeeds after hard linefeeds * within a pre */ case 2: return; break; default: PF_LF_State = 1; break; } } LinefeedPlace(hw, x, y); if (Centered == 2) { FlushCenter(x, y); Centered = 1; } *x = TextIndent; *y = *y + LineHeight; NeedSpace = False; LineNumber++; } /* * hack to make broken HTMLs within pre-formatted text have nice * looking linefeeds. */ void HardLineFeed(hw, x, y) HTMLWidget hw; int *x, *y; { /* * Manipulate linefeed state for special pre-formatted linefeed * hack for broken HTMLs */ if (Preformat) { switch(PF_LF_State) { /* * First hard linefeed */ case 0: PF_LF_State = 2; break; /* * Previous soft linefeed should have been ignored, so * ignore this hard linefeed, but set state like it * was not ignored. */ case 1: PF_LF_State = 2; return; break; /* * Honor multiple hard linefeeds. */ case 2: break; default: PF_LF_State = 2; break; } } LinefeedPlace(hw, x, y); if (Centered == 2) { FlushCenter(x, y); Centered = 1; } *x = TextIndent; *y = *y + LineHeight; NeedSpace = False; LineNumber++; } /* * Place the bullet at the beginning of an unnumbered * list item. Create and add the element record for it. */ void BulletPlace(hw, x, y) HTMLWidget hw; int *x, *y; { int width; NeedSpace = False; width = hw->html.font->max_bounds.width; SetElement(hw, E_BULLET, hw->html.font, *x, *y, (char *)NULL); /* * After adding an element, we need to check, and if we are starting * a centered block, we need to save location info */ if (Centered == 1) { Centered = 2; BeginCenter(*x, *y); } } /* * Place a piece of pre-formatted text. Add an element record for it. */ void PreformatPlace(hw, mptr, x, y, width) HTMLWidget hw; struct mark_up *mptr; int *x, *y; unsigned int width; { char *text; char *start; char *end; char *ptr; char tchar; int len; int dir, ascent, descent; XCharStruct all; text = mptr->text; end = text; while (*end != '\0') { /* * make start and end point to one word. A word is either * a lone linefeed, or all whitespace before a word, plus * the text of the word itself. */ start = end; /* * Throw out carriage returns and form-feeds */ if ((*end == '\r')||(*end == '\f')) { start++; end++; } else if (*end == '\n') { end++; } else { while ((*end == ' ')||(*end == '\t')) { end++; } while ((!isspace(*end))&&(*end != '\0')) { end++; } } /* * Add the word to the end of this line, or insert * a linefeed if the word is a lone linefeed. */ if (start != end) { tchar = *end; *end = '\0'; ptr = (char *)malloc(strlen(start) + 1); strcpy(ptr, start); XTextExtents(currentFont, ptr, strlen(ptr), &dir, &ascent, &descent, &all); if (*start == '\n') { HardLineFeed(hw, x, y); } else { SetElement(hw, E_TEXT, currentFont, *x, *y, (char *)ptr); PF_LF_State = 0; /* * After adding an element, we need to check, * and if we are starting a centered block, * we need to save location info. */ if (Centered == 1) { Centered = 2; BeginCenter(*x, *y); } *x = *x + all.width; CenterWidth = CenterWidth + all.width; } free(ptr); NeedSpace = True; *end = tchar; } } } /* * Format and place a piece of text. Add an element record for it. */ void FormatPlace(hw, mptr, x, y, width) HTMLWidget hw; struct mark_up *mptr; int *x, *y; unsigned int width; { char *text; char *start; char *end; char *ptr; char tchar; int stripped_space; int len; int dir, ascent, descent; XCharStruct all; text = mptr->text; end = text; while (*end != '\0') { /* * make start and end point to one word. * set flag if we removed any leading white space. */ stripped_space = 0; start = end; while (isspace(*start)) { stripped_space = 1; start++; } end = start; while ((!isspace(*end))&&(*end != '\0')) { end++; } /* * Add the word to the end of this line, or insert * a linefeed an put the word at the start of the next line. */ if (start != end) { tchar = *end; *end = '\0'; ptr = (char *)malloc(strlen(start) + 2); if ((NeedSpace == True)&& ((!ispunct((int)*start))||(stripped_space))) { strcpy(ptr, " "); } else { strcpy(ptr, ""); } strcat(ptr, start); XTextExtents(currentFont, ptr, strlen(ptr), &dir, &ascent, &descent, &all); if ((*x + all.width + MarginW) <= width) { SetElement(hw, E_TEXT, currentFont, *x, *y, (char *)ptr); /* * After adding an element, we need to check, * and if we are starting a centered block, * we need to save location info. */ if (Centered == 1) { Centered = 2; BeginCenter(*x, *y); } } else { LineFeed(hw, x, y); XTextExtents(currentFont, (char *)(ptr + 1), (strlen(ptr) - 1), &dir, &ascent, &descent, &all); SetElement(hw, E_TEXT, currentFont, *x, *y, (char *)(ptr + 1)); /* * After adding an element, we need to check, * and if we are starting a centered block, * we need to save location info. */ if (Centered == 1) { Centered = 2; BeginCenter(*x, *y); } } NeedSpace = True; *x = *x + all.width; CenterWidth = CenterWidth + all.width; *end = tchar; } } } /* * Change our drawing font */ void NewFont(fp) XFontStruct *fp; { LineHeight = fp->max_bounds.ascent + fp->max_bounds.descent; } /* * Make necessary changes to formatting, based on the type of the * parsed HTML text we are formatting. * Some calls create elements that are added to the formatted element list. */ void TriggerMarkChanges(hw, mptr, x, y) HTMLWidget hw; struct mark_up *mptr; int *x, *y; { struct mark_up *mark; XFontStruct *font; int type, width; mark = mptr; type = mark->type; font = NULL; /* * If Ignore is set, we ignore all further elements until we get to the * end of the Ignore */ if ((Ignore)&&(type != M_TITLE)) { return; } switch(type) { /* * Place the text. Different functions based on whether it * is pre-formatted or not. */ case M_NONE: if (Preformat) { PreformatPlace(hw, mptr, x, y, Width); } else { FormatPlace(hw, mptr, x, y, Width); } break; /* * Titles are centered */ case M_TITLE: if (mark->is_end) { Ignore = 0; } else { Ignore = 1; } #ifdef DISPLAY_CENTERED_TITLES LineFeed(hw, x, y); if (mark->is_end) { Centered = 0; font = hw->html.font; } else { if (Centered == 0) { Centered = 1; } font = hw->html.title_font; } #endif /* DISPLAY_CENTERED_TITLES */ break; /* * Headers are preceeded and followed by a linefeed, * and the change the font. */ case M_HEADER_1: LineFeed(hw, x, y); if (mark->is_end) { font = hw->html.font; NewFont(font); currentFont = font; LineFeed(hw, x, y); } else { font = hw->html.header1_font; } break; case M_HEADER_2: LineFeed(hw, x, y); if (mark->is_end) { font = hw->html.font; NewFont(font); currentFont = font; LineFeed(hw, x, y); } else { font = hw->html.header2_font; } break; case M_HEADER_3: LineFeed(hw, x, y); if (mark->is_end) { font = hw->html.font; NewFont(font); currentFont = font; LineFeed(hw, x, y); } else { font = hw->html.header3_font; } break; case M_HEADER_4: LineFeed(hw, x, y); if (mark->is_end) { font = hw->html.font; } else { font = hw->html.header4_font; } break; case M_HEADER_5: LineFeed(hw, x, y); if (mark->is_end) { font = hw->html.font; } else { font = hw->html.header5_font; } break; case M_HEADER_6: LineFeed(hw, x, y); if (mark->is_end) { font = hw->html.font; } else { font = hw->html.header6_font; } break; /* * Anchors just change the text color. * No linefeeds, so they can be imbedded anywhere. */ case M_ANCHOR: if (mark->is_end) { /* * Without motif we use our own foreground resource instead of * using the manager's */ #ifdef MOTIF Fg = hw->manager.foreground; #else Fg = hw->html.foreground; #endif /* MOTIF */ AnchorText = NULL; } else { Fg = hw->html.anchor_fg; AnchorText = mark->start; } break; /* * Just insert a linefeed, or ignore if this is prefomatted * text because the <P> will be followed be a linefeed. */ case M_PARAGRAPH: LineFeed(hw, x, y); LineFeed(hw, x, y); break; /* * Addresses are just like headers. A linefeed before and * after, and change the font. */ case M_ADDRESS: LineFeed(hw, x, y); if (mark->is_end) { font = hw->html.font; } else { font = hw->html.address_font; } break; /* * Plain text. A single pre-formatted chunk of text * in its own font. */ case M_PLAIN_TEXT: LineFeed(hw, x, y); if (mark->is_end) { Preformat = 0; font = hw->html.font; } else { Preformat = 1; PF_LF_State = 0; font = hw->html.plain_font; } break; /* * Plain text. The rest of the text is pre-formatted. * There is not end for this mark. */ case M_PLAIN_FILE: LineFeed(hw, x, y); Preformat = 1; PF_LF_State = 0; font = hw->html.plain_font; break; /* * Unnumbered list. Change the value * of the TxtIndent (can be nested) * Linefeed at the end of the list. */ case M_UNUM_LIST: /* LineFeed(hw, x, y); */ width = hw->html.font->max_bounds.width; if (mark->is_end) { TextIndent = TextIndent - ((INDENT_SPACES + 1) * width); LineFeed(hw, x, y); } else { TextIndent = TextIndent + ((INDENT_SPACES + 1) * width); } break; /* * Place the bullet element at the beginning of this item. */ case M_LIST_ITEM: LineFeed(hw, x, y); BulletPlace(hw, x, y); break; /* * Description lists, not yet implemented */ case M_DESC_LIST: width = hw->html.font->max_bounds.width; if (mark->is_end) { if (DescType == D_TEXT) { TextIndent = TextIndent - ((INDENT_SPACES + 1) * width); } DescType = D_NONE; LineFeed(hw, x, y); } else { if (DescType == D_TITLE) { TextIndent = TextIndent + ((INDENT_SPACES + 1) * width); } DescType = D_TITLE; } break; case M_DESC_TITLE: width = hw->html.font->max_bounds.width; /* * Special hack. Don't indent again for * multiple <dt>'s in a row. */ if (DescType == D_TEXT) { TextIndent = TextIndent - ((INDENT_SPACES + 1) * width); } DescType = D_TITLE; LineFeed(hw, x, y); break; case M_DESC_TEXT: width = hw->html.font->max_bounds.width; /* * Special hack. Don't indent again for * multiple <dd>'s in a row. */ if (DescType == D_TITLE) { TextIndent = TextIndent + ((INDENT_SPACES + 1) * width); } DescType = D_TEXT; LineFeed(hw, x, y); break; case M_PREFORMAT: if (mark->is_end) { Preformat = 0; LineFeed(hw, x, y); if (saveFont != NULL) { hw->html.font = saveFont; saveFont = NULL; } font = hw->html.font; } else { LineFeed(hw, x, y); Preformat = 1; PF_LF_State = 0; if (saveFont == NULL) { saveFont = hw->html.font; hw->html.font = hw->html.plain_font; } font = hw->html.font; } break; default: break; } if ((font != NULL)&&(font != currentFont)) { NewFont(font); currentFont = font; } } /* * Format all the objects in the passed Widget's * parsed object list to fit the locally global Width. * Passes in the x,y coords of where to start placing the * formatted text. * Returns the ending x,y in same variables. * Title objects are ignored, and not formatted. * * The locally global variables are assumed to have been initialized * before this function was called. */ void FormatChunk(hw, x, y) HTMLWidget hw; int *x, *y; { struct mark_up *mptr; /* * Format all objects */ mptr = hw->html.html_objects; Last = NULL; while (mptr != NULL) { TriggerMarkChanges(hw, mptr, x, y); /* * Save last non-text mark */ if (mptr->type != M_NONE) { Last = mptr; } mptr = mptr->next; } } /* * Called by the widget to format all the objects in the * parsed object list to fit its current window size. * Returns the max_height of the entire document. * Title objects are ignored, and not formatted. */ int FormatAll(hw, width) HTMLWidget hw; int width; { int x, y; int npages, pheight; struct mark_up *mptr; struct page_rec *plist; #ifdef TIMING gettimeofday(&Tv, &Tz); fprintf(stderr, "FormatAll enter (%d.%d)\n", Tv.tv_sec, Tv.tv_usec); #endif /* * Initialize local variables, some from the widget */ MarginW = hw->html.margin_width; /* * Without motif we use our own foreground resource instead of * using the manager's */ #ifdef MOTIF Fg = hw->manager.foreground; #else Fg = hw->html.foreground; #endif /* MOTIF */ Bg = hw->core.background_pixel; Width = width; TextIndent = MarginW; LineNumber = 1; Centered = 0; Ignore = 0; Preformat = 0; DescType = D_NONE; NeedSpace = False; Internal = False; AnchorText = NULL; #ifdef THROW_AWAY_OLD_LIST /* * Free up previously formatted elements */ FreeLineList(hw->html.formatted_elements); hw->html.formatted_elements = NULL; #endif /* * Clear any previous selections */ hw->html.select_start = NULL; hw->html.select_end = NULL; /* * Set up a starting font, and starting x, y, position */ NewFont(hw->html.font); currentFont = hw->html.font; saveFont = NULL; x = TextIndent; y = hw->html.margin_height; /* * Start a null element list, to be filled in as we go. */ Current = NULL; /* * Format all objects for width */ FormatChunk(hw, &x, &y); /* * Restore the proper font from unterminated preformatted text * sequences. */ if (saveFont != NULL) { hw->html.font = saveFont; saveFont = NULL; } /* * Free and extra of the pre-allocated list. * Terminate the element list. */ if ((Current != NULL)&&(Current->next != NULL)) { FreeLineList(Current->next); Current->next = NULL; } else if ((Current == NULL)&&(hw->html.formatted_elements != NULL)) { FreeLineList(hw->html.formatted_elements); hw->html.formatted_elements = NULL; } /* * Do all the cool multi-page stuff if this document is longer * than 1 page */ pheight = y - hw->html.margin_height; if (pheight > hw->html.page_height) { int i, page; int dy, dline; char anch[256]; char buf[4096]; struct mark_up *mptr; struct mark_up *mlist; struct ele_rec *elist; struct ele_rec *eptr; elist = hw->html.formatted_elements; mlist = hw->html.html_objects; npages = (pheight + hw->html.page_height - 1) / hw->html.page_height; plist = (struct page_rec *) malloc(sizeof(struct page_rec) * npages); for (page = 1; page <= npages; page++) { if (elist == NULL) { plist[page - 1].elist = NULL; plist[page - 1].pheight = 0; plist[page - 1].pnum = page; plist[page - 1].line_num = 1; continue; } sprintf(anch, "-- <A HREF=\"Internal Page %d\">[Go To Previous Page]</A>", (page - 1)); sprintf(buf, "<H3>Document Page %d of %d %s</H3><H3>Document Pages ", page, npages, ((page == 1) ? "-- <A HREF=\"http://hoohoo.ncsa.uiuc.edu:80/mosaic-docs/document-page-explanation.html\">(Document pages explained.)</A>" : anch)); for (i = 1; i < npages; i++) { sprintf(anch, "<A HREF=\"Internal Page %d\">%d</A>, ", i, i); strcat(buf, anch); } sprintf(anch, "<A HREF=\"Internal Page %d\">%d</A>", npages, npages); strcat(buf, anch); strcat(buf, "</H3><P>"); mptr = HTMLParse(NULL, buf); hw->html.html_objects = mptr; hw->html.formatted_elements = NULL; /* * Re-Initialize local variables */ /* * Without motif we use our own foreground resource instead of * using the manager's */ #ifdef MOTIF Fg = hw->manager.foreground; #else Fg = hw->html.foreground; #endif /* MOTIF */ Bg = hw->core.background_pixel; TextIndent = MarginW; LineNumber = 1; Centered = 0; Ignore = 0; Preformat = 0; DescType = D_NONE; NeedSpace = False; Internal = True; AnchorText = NULL; NewFont(hw->html.font); currentFont = hw->html.font; saveFont = NULL; x = MarginW; y = hw->html.margin_height; Current = NULL; FormatChunk(hw, &x, &y); dy = y - elist->y; dline = LineNumber + 1 - elist->line_number; Current->next = elist; elist->prev = Current; eptr = elist; while ((eptr != NULL)&& (eptr->y < (page * hw->html.page_height))) { eptr->y += dy; eptr->line_number += dline; eptr = eptr->next; Current = Current->next; } elist = eptr; sprintf(anch, "-- <A HREF=\"Internal Page %d\">[Go To Next Page]</A>", (page + 1)); sprintf(buf, "<H3>Document Page %d of %d %s</H3><H3>Document Pages ", page, npages, ((page == npages) ? "" : anch)); for (i = 1; i < npages; i++) { sprintf(anch, "<A HREF=\"Internal Page %d\">%d</A>, ", i, i); strcat(buf, anch); } sprintf(anch, "<A HREF=\"Internal Page %d\">%d</A>", npages, npages); strcat(buf, anch); strcat(buf, "</H3><P>"); mptr = HTMLParse(mptr, buf); hw->html.html_objects = mptr; x = MarginW; if (elist == NULL) { y = Current->y + LineHeight; LineNumber = Current->line_number + 1; } else { y = elist->y + dy; LineNumber = elist->line_number + dline; } Current->next = NULL; FormatChunk(hw, &x, &y); y = y + hw->html.margin_height; Current->next = NULL; FreeObjList(mptr); plist[page - 1].elist = hw->html.formatted_elements; plist[page - 1].pheight = y; plist[page - 1].pnum = page; plist[page - 1].line_num = LineNumber; } hw->html.html_objects = mlist; } else { /* * Add the bottom margin to the max height. */ y = y + hw->html.margin_height; npages = 1; plist = (struct page_rec *)malloc(sizeof(struct page_rec)); plist[0].elist = hw->html.formatted_elements; plist[0].pheight = y; plist[0].pnum = 1; plist[0].line_num = LineNumber; } if (hw->html.current_page < 1) { hw->html.current_page = 1; } if (hw->html.current_page > npages) { hw->html.current_page = npages; } hw->html.formatted_elements = plist[hw->html.current_page - 1].elist; LineNumber = plist[hw->html.current_page - 1].line_num; /* * Make the line array indexed into the element list * and store it into the widget */ hw->html.line_count = LineNumber; hw->html.line_array = MakeLineList(hw->html.formatted_elements, LineNumber); hw->html.pages = plist; hw->html.page_cnt = npages; #ifdef TIMING gettimeofday(&Tv, &Tz); fprintf(stderr, "FormatAll exit (%d.%d)\n", Tv.tv_sec, Tv.tv_usec); #endif return(plist[hw->html.current_page - 1].pheight); } /* * Redraw a linefeed. * Basically a filled rectangle at the end of a line. */ void LinefeedRefresh(hw, eptr) HTMLWidget hw; struct ele_rec *eptr; { int x1, y1; unsigned int width, height; x1 = eptr->x; y1 = eptr->y; width = hw->core.width - x1; height = eptr->font->max_bounds.ascent + eptr->font->max_bounds.descent; if (eptr->selected == True) { XSetForeground(XtDisplay(hw), hw->html.drawGC, eptr->fg); } else { XSetForeground(XtDisplay(hw), hw->html.drawGC, eptr->bg); } XFillRectangle(XtDisplay(hw), XtWindow(hw), hw->html.drawGC, x1, y1, width, height); } /* * Redraw a formatted text element */ void TextRefresh(hw, eptr) HTMLWidget hw; struct ele_rec *eptr; { int ascent; XSetFont(XtDisplay(hw), hw->html.drawGC, eptr->font->fid); ascent = eptr->font->max_bounds.ascent; if (eptr->selected == True) { XSetForeground(XtDisplay(hw), hw->html.drawGC, eptr->bg); XSetBackground(XtDisplay(hw), hw->html.drawGC, eptr->fg); } else { XSetForeground(XtDisplay(hw), hw->html.drawGC, eptr->fg); XSetBackground(XtDisplay(hw), hw->html.drawGC, eptr->bg); } XDrawImageString(XtDisplay(hw), XtWindow(hw), hw->html.drawGC, eptr->x, eptr->y + ascent, (char *)eptr->edata, eptr->edata_len); } /* * Redraw a formatted bullet element */ void BulletRefresh(hw, eptr) HTMLWidget hw; struct ele_rec *eptr; { int width, line_height; int y1; width = eptr->font->max_bounds.width; line_height = eptr->font->max_bounds.ascent + eptr->font->max_bounds.descent; y1 = eptr->y + (line_height / 2) - (width / 4); XSetFont(XtDisplay(hw), hw->html.drawGC, eptr->font->fid); XSetForeground(XtDisplay(hw), hw->html.drawGC, eptr->fg); XSetBackground(XtDisplay(hw), hw->html.drawGC, eptr->bg); XFillArc(XtDisplay(hw), XtWindow(hw), hw->html.drawGC, (eptr->x - width), y1, (width / 2), (width / 2), 0, 23040); } void RefreshTextRange(hw, start, end) HTMLWidget hw; struct ele_rec *start; struct ele_rec *end; { struct ele_rec *eptr; eptr = start; while ((eptr != NULL)&&(eptr != end)) { if (eptr->type == E_TEXT) { TextRefresh(hw, eptr); } eptr = eptr->next; } if (eptr != NULL) { if (eptr->type == E_TEXT) { TextRefresh(hw, eptr); } } } /* * Refresh all elements on a single line into the widget's window */ void PlaceLine(hw, line) HTMLWidget hw; int line; { struct ele_rec *eptr; /* * Initialize local variables, some from the widget */ eptr = hw->html.line_array[line]; while ((eptr != NULL)&&(eptr->line_number == (line + 1))) { switch(eptr->type) { case E_TEXT: TextRefresh(hw, eptr); break; case E_BULLET: BulletRefresh(hw, eptr); break; case E_LINEFEED: LinefeedRefresh(hw, eptr); break; } eptr = eptr->next; } } /* * Locate the element (if any) that is at the passed location * in the widget. If there is no corresponding element, return * NULL */ struct ele_rec * LocateElement(hw, x, y) HTMLWidget hw; int x, y; { struct ele_rec *eptr; int i, start, end, line; int tx1, tx2, ty1, ty2; #ifdef DEBUG fprintf(stderr, "locate %d,%d\n", x, y); #endif /* * Narrow the search down to a 2 line range * before beginning to search element by element */ start = -1; end = -1; for (i=0; i<hw->html.line_count; i++) { if (hw->html.line_array[i] == NULL) { continue; } else if (hw->html.line_array[i]->y <= y) { start = i; continue; } else { end = i; break; } } /* * Search may have already failed, or it may be a one line * range. */ if ((start == -1)&&(end == -1)) { return(NULL); } else if (start == -1) { start = end; } else if (end == -1) { end = start; } #ifdef DEBUG fprintf(stderr, "search %d to %d\n", hw->html.line_array[start]->y, hw->html.line_array[end]->y); #endif /* * Search element by element, for now we only search * text elements and linefeeds. */ eptr = hw->html.line_array[start]; ty1 = eptr->y; ty2 = eptr->y + eptr->font->max_bounds.ascent + eptr->font->max_bounds.descent; line = eptr->line_number; while ((eptr != NULL)&&(eptr->line_number <= (end + 1))) { if (eptr->line_number != line) { ty1 = ty2; ty2 = eptr->y + eptr->font->max_bounds.ascent + eptr->font->max_bounds.descent; line = eptr->line_number; } if (eptr->type == E_TEXT) { int dir, ascent, descent; XCharStruct all; tx1 = eptr->x; XTextExtents(eptr->font, (char *)eptr->edata, eptr->edata_len, &dir, &ascent, &descent, &all); tx2 = eptr->x + all.width; if ((x >= tx1)&&(x <= tx2)&&(y >= ty1)&&(y <= ty2)) { return(eptr); } } else if (eptr->type == E_LINEFEED) { tx1 = eptr->x; if ((x >= tx1)&&(y >= ty1)&&(y <= ty2)) { return(eptr); } else if (eptr->next == NULL) { return(eptr); } else if (eptr->next != NULL) { int tmpy; tmpy = eptr->next->y + eptr->next->font->max_bounds.ascent + eptr->next->font->max_bounds.descent; tx2 = eptr->next->x; if ((x < tx2)&&(y >= ty2)&&(y <= tmpy)) { return(eptr); } } } eptr = eptr->next; } return(NULL); } /* * Parse all the formatted text elements from start to end * into an ascii text string, and return it. */ char * ParseTextToString(elist, startp, endp) struct ele_rec *elist; struct ele_rec *startp; struct ele_rec *endp; { int length, line; char *text; struct ele_rec *eptr; struct ele_rec *start; struct ele_rec *end; if (startp == NULL) { return(NULL); } if (SwapElements(elist, startp, endp)) { start = endp; end = startp; } else { start = startp; end = endp; } length = 0; eptr = start; line = eptr->line_number; while ((eptr != NULL)&&(eptr != end)) { /* * Skip the special internal text added for multi-page * documents. */ if (eptr->internal == True) { eptr = eptr->next; continue; } if (eptr->type == E_TEXT) { length = length + eptr->edata_len; } else if (eptr->type == E_LINEFEED) { length = length + 1; } eptr = eptr->next; } if ((eptr != NULL)&&(eptr->internal == False)) { if (eptr->type == E_TEXT) { length = length + eptr->edata_len; } else if (eptr->type == E_LINEFEED) { length = length + 1; } } text = (char *)malloc(length + 1); if (text == NULL) { fprintf(stderr, "ParseTextToString malloc failed\n"); return(NULL); } strcpy(text, ""); eptr = start; line = eptr->line_number; while ((eptr != NULL)&&(eptr != end)) { /* * Skip the special internal text added for multi-page * documents. */ if (eptr->internal == True) { eptr = eptr->next; continue; } if (eptr->type == E_TEXT) { strcat(text, (char *)eptr->edata); } else if (eptr->type == E_LINEFEED) { strcat(text, "\n"); } eptr = eptr->next; } if ((eptr != NULL)&&(eptr->internal == False)) { if (eptr->type == E_TEXT) { strcat(text, (char *)eptr->edata); } else if (eptr->type == E_LINEFEED) { strcat(text, "\n"); } } return(text); } @//E*O*F HTMLformat.c// chmod u=rw,g=r,o=r HTMLformat.c exit 0