Thanks Bruce for updating your "Terminology and Framework." document. I haven't had time yet to review it, but thought that it would be worth reporting on my work. I have made steady progress on my Prolog implementation of the lexical tokenization and operator precedence analysis of input strings. The parser infers a '.' operator between adjacent tokens which aren't operators, e.g. "y=a x" becomes "y = a . x". This would be used to infer multiplication by a matching map rule. The parse works pretty well and is included below. It doesn't work for strings like: y = 1 + integral of sin(x) wrt x * cos(x) This example screws because the + and * operators bind tighter than "of" and "wrt" respectively. To make it work, users would have to bracket the integral expression, e.g. y = 1 + {integral of sin(x) wrt x} * cos(x) I find this unacceptable and am now looking at how to exploit the map rules to override the normal left and right operator precedence values to ensure the templates are satisfied. This then combines bottom up parsing for expressions with top down parsing for templates. The final tokenizer will function correctly with '\' before tokens so that a LaTeX like notation can be expressed by people who don't like the above style of tokens. The current tokenizer embodies the bracketing rules discussed in previous phone calls. You can tryout the tokenizer e.g: tokenizer("{(1+x}", L). The parser is used by: test("1 + {sin a x} over e^x"). which then prints the parse tree with < and > around each node. I will present this work at the SGML and Math workshop. Regards, Dave ------ prolog file for parser and tokenizer follows -------------- /* operator precedence parser */ /* from and to associate to left for limits while up and down associate to the right for powers */ prec(_, 0, eof, 0). prec(prefix, 800, sym:'+', 799). prec(prefix, 800, sym:'-', 799). prec(infix, 750, sym:'^', 749). % right associative prec(infix, 750, sym:'_', 749). % right associative prec(infix, 750, name:up, 749). prec(infix, 750, name:down, 749). prec(infix, 600, sym:'.', 599). prec(infix, 599, sym:'*', 600). prec(infix, 599, sym:'/', 600). prec(infix, 499, name:over, 500). prec(infix, 499, name:atop, 500). prec(infix, 499, name:choose, 500). prec(infix, 399, sym:'+', 400). prec(infix, 399, sym:'-', 400). prec(infix, 349, sym:',', 350). prec(infix, 299, name:to, 300). % left associative prec(infix, 299, name:from, 300). % left associative prec(infix, 299, name:wrt, 300). prec(infix, 300, name:of, 299). prec(infix, 199, sym:'=', 200). prec(prefix, 1, left:S, 2) :- not(S='{'). prec(postfix, 2, right:S, 1) :- not(S='}'). prec(prefix, 0, left:'{', 1). prec(postfix, 1, right:'}', 0). infix(X) :- prec(infix, _, Y, _), X=Y. % to see whats happening prefix(X) :- prec(prefix, _, X, _). postfix(X) :- prec(postfix, _, X, _). op(X) :- prec(_, _, X, _). % if true then reduce Op1 before Op2 higher(Fix, Op1, Op2) :- prec(Fix, _, Op1, R1), prec(_, L2, Op2, _), R1 >= L2. test1 :- tokenize("1+2*3", T), parseExpression(T, E), print(E). test2 :- tokenize("1+2+3", T), parseExpression(T, E), print(E). test3 :- tokenize("1+X^2", T), parseExpression(T, E), print(E). test4 :- once(tokenize("+1", T)), parseExpression(T, E), print(E). test5 :- once(tokenize("+x/y", T)), parseExpression(T, E), print(E). test6 :- once(tokenize("x/-y", T)), parseExpression(T, E), print(E). test7 :- once(tokenize("x + + y", T)), parseExpression(T, E), print(E). test8 :- once(tokenize("[1+x]", T)), parseExpression(T, E), print(E). test9 :- once(tokenize("y=integral from 0 to pi/2 of sin a x wrt x", T)), parseExpression(T, E), print(E). test(X) :- once(tokenize(X, T)), parseExpression(T, E), print(E). parseExpression(T, E) :- parseExpr(T, [], E). parseExpr([X], [], X) :- !. parseExpr(L1, L3, R) :- parseExpr(L1, L2), parseExpr(L2, L3, R). % reduce { brackets } parseExpr([left:'{', X, right:'}'|T], [X|T]) :- !. % reduce other brackets parseExpr([left:L, X, right:R|T], [node(L, X, R)|T]) :- !. % reduce bracketed expressions first parseExpr([L, Op1, R, left:S|T1], [L, Op1|T2]) :- infix(Op1), not(op(R)), parseExpr([R, left:S|T1], T2). parseExpr([Op1, R, left:S|T1], [Op1|T2]) :- prefix(Op1), not(op(R)), parseExpr([R, left:S|T1], T2). parseExpr([L1, left:S|T1], [L1|T2]) :- not(op(L1)), parseExpr([left:S|T1], T2). % reduce infix operator parseExpr([L, Op, R], [node(L, Op, R)]) :- infix(Op). parseExpr([L, Op1, R, Op2|T], [node(L, Op1, R), Op2|T]) :- higher(infix, Op1, Op2), !. parseExpr([L, Op1, R, Op2|T1], [L, Op1|T2]) :- infix(Op1), parseExpr([R, Op2|T1], T2). % reduce prefix operator parseExpr([Op, R], [node(Op, R)]) :- prefix(Op). parseExpr([Op1, R, Op2|T], [node(Op1, R), Op2|T]):- higher(prefix, Op1, Op2), !. parseExpr([Op1, R, Op2|T1], [Op1|T2]):- prefix(Op1), !, parseExpr([R, Op2|T1], T2). % reduce postfix operator parseExpr([L, Op|T], [node(L, Op)|T]) :- postfix(Op). % infer infix '.' between two non-operators parseExpr([L1, L2|T], [L1, sym:'.', L2|T]) :- not(op(L1)), not(op(L2)). /* tokenizer for HTML-Math */ portray(name:N) :- write(N). portray(sym:S) :- write(S). portray(number:N) :- write(N). portray(left:S) :- write('\\'), write(S). portray(right:S) :- write('\\'), write(S). portray(node(L, Op, R)) :- write('<'), print(L), write(' '), print(Op),write(' '), print(R), write('>'). portray(node(Op, R)) :- write('<'), print(Op), write(' '), print(R), write('>'). portray('.') :- write('.'). letter(X) :- 97 =< X, X =< 122. letter(X) :- 65 =< X, X =< 90. digit(X) :- 48 =< X, X =< 57. symchar(X) :- member(X, "~!@#$%^&*(_-+=|][':;?/><.,`"). symchar(34). /* double quote char */ leftbracket(X) :- member(X, "[({"). rightbracket(X) :- member(X, "])}"). namechar(X) :- letter(X). namechar(X) :- digit(X). tokenize([], []). tokenize(U, [T2|R]) :- readtoken(U, T1, V), discardwhite(V, T1, T2, W), tokenize(W, R). discardwhite(U, white, T2, W) :- readtoken(U, T1, V), discardwhite(V, T1, T2, W). discardwhite(L, T, T, L) :- not (T=white). readtoken([94|T], sym:'^', T). readtoken([95|T], sym:'_', T). readtoken([42|T], sym:'*', T). readtoken([47|T], sym:'/', T). readtoken([43|T], sym:'+', T). readtoken([45|T], sym:'-', T). readtoken([61|T], sym:'=', T). readtoken([44|T], sym:',', T). readtoken([S|T], left:N, T) :- leftbracket(S), name(N, [S]). readtoken([S|T], right:N, T) :- rightbracket(S), name(N, [S]). readtoken([], eof, []). readtoken([H|T], name:Name, L) :- letter(H), readname(T, N, L), name(Name,[H|N]). readtoken([H|T], white, T) :- H =< 32. readtoken([H|T], number:Number, L) :- digit(H), readnumber(T, N, L), name(Number, [H|N]). readtoken([H|T], sym:Sym, L) :- symchar(H), readsym(T, N, L), name(Sym, [H|N]). readname([], [], []). readname([H|T], [H|N], L) :- namechar(H), readname(T, N, L). readname([H|T], [], [H|T]). readnumber([], [], []). readnumber([H|T], [H|N], L) :- digit(H), readnumber(T, N, L). readnumber([H|T], [], [H|T]). readsym([], [], []). readsym([H|T], [H|N], L) :- symchar(H), readsym(T, N, L). readsym([H|T], [], [H|T]).Received on Thursday, 18 April 1996 16:50:53 UTC
This archive was generated by hypermail 2.4.0 : Saturday, 15 April 2023 17:19:56 UTC