- From: Michael Smith via cvs-syncmail <cvsmail@w3.org>
- Date: Mon, 03 Aug 2009 05:32:50 +0000
- To: public-html-commits@w3.org
Update of /sources/public/html5/tools/hv/rnv-1.7.8 In directory hutz:/tmp/cvs-serv11387 Added Files: Makefile Makefile.bcc Makefile.bsd Makefile.gnu arx.c ary.c ary.h build_vms.com changes.txt drv.c drv.h dsl.c dsl.h dxl.c dxl.h er.c er.h erbit.h ht.c ht.h license.txt ll.h m.c m.h readme.txt rn.c rn.h rnc.c rnc.h rnd.c rnd.h rnl.c rnl.h rnv.c rnv.h rnx.c rnx.h rvp.c rx.c rx.h rx_cls_ranges.c rx_cls_u.c s.c s.h sc.c sc.h src.txt test.c u.c u.h xcl.c xmlc.c xmlc.h xsd.c xsd.h xsd_tm.c xsd_tm.h xsdck.c Log Message: initial add --- NEW FILE: rx.h --- /* $Id: rx.h,v 1.1 2009/08/03 05:32:47 mike Exp $ */ #include <stdarg.h> #ifndef RX_H #define RX_H #define RX_ER_BADCH 0 #define RX_ER_UNFIN 1 #define RX_ER_NOLSQ 2 #define RX_ER_NORSQ 3 #define RX_ER_NOLCU 4 #define RX_ER_NORCU 5 #define RX_ER_NOLPA 6 #define RX_ER_NORPA 7 #define RX_ER_BADCL 8 #define RX_ER_NODGT 9 #define RX_ER_DNUOB 10 #define RX_ER_NOTRC 11 extern void (*rx_verror_handler)(int erno,va_list ap); extern int rx_compact; extern void rx_default_verror_handler(int erno,va_list ap); extern void rx_init(void); extern void rx_clear(void); /* just compiles the expression to check the syntax */ extern int rx_check(char *rx); /* returns positive value if the s[0..n] ~= rx, 0 if not, -1 on regex error; rx and s are in utf-8, rx is 0-terminated, s is n bytes long; rmatch replaces white space in s with 0x20, cmatch collapses white space. */ extern int rx_match(char *rx,char *s,int n); extern int rx_rmatch(char *rx,char *s,int n); extern int rx_cmatch(char *rx,char *s,int n); #endif --- NEW FILE: rnc.h --- /* $Id: rnc.h,v 1.1 2009/08/03 05:32:47 mike Exp $ */ #include <stdarg.h> #ifndef RNC_H #define RNC_H 1 #define RNC_ER_IO 0 #define RNC_ER_UTF 10 #define RNC_ER_XESC 20 #define RNC_ER_LEXP 30 #define RNC_ER_LLIT 31 #define RNC_ER_LILL 32 #define RNC_ER_SEXP 40 #define RNC_ER_SILL 41 #define RNC_ER_NOTGR 42 #define RNC_ER_EXT 50 #define RNC_ER_DUPNS 51 #define RNC_ER_DUPDT 52 #define RNC_ER_DFLTNS 53 #define RNC_ER_DFLTDT 54 #define RNC_ER_NONS 55 #define RNC_ER_NODT 56 #define RNC_ER_NCEX 57 #define RNC_ER_2HEADS 58 #define RNC_ER_COMBINE 59 #define RNC_ER_OVRIDE 60 #define RNC_ER_EXPT 61 #define RNC_ER_INCONT 62 #define RNC_ER_NOSTART 70 #define RNC_ER_UNDEF 71 struct rnc_cym { char *s; int slen; int line,col; int sym; }; struct rnc_source { int flags; char *fn; int fd; char *buf; int i,n; int complete; int line,col,prevline/*when error reported*/; int u,v,w; int nx; int cur; struct rnc_cym sym[2]; }; extern void (*rnc_verror_handler)(int er_no,va_list ap); extern void rnc_default_verror_handler(int erno,va_list ap); extern void rnc_init(void); extern void rnc_clear(void); extern int rnc_open(struct rnc_source *sp,char *fn); extern int rnc_stropen(struct rnc_source *sp,char *fn,char *s,int len); extern int rnc_bind(struct rnc_source *sp,char *fn,int fd); extern int rnc_close(struct rnc_source *sp); extern int rnc_parse(struct rnc_source *sp); extern int rnc_errors(struct rnc_source *sp); #endif --- NEW FILE: erbit.h --- /* $Id: erbit.h,v 1.1 2009/08/03 05:32:46 mike Exp $ */ #ifndef ERBIT_H #define ERBIT_H 1 #define ERBIT_RNC 0x01000 #define ERBIT_RND 0x02000 #define ERBIT_RNL 0x04000 #define ERBIT_RX 0x08000 #define ERBIT_XSD 0x10000 #define ERBIT_DRV 0x20000 #define ERBIT_RNV 0x40000 #endif --- NEW FILE: s.c --- /* $Id: s.c,v 1.1 2009/08/03 05:32:48 mike Exp $ */ #include <string.h> /*strcpy,strlen*/ #include <assert.h> #include "xmlc.h" #include "m.h" #include "s.h" int s_cmpn(char *s1,char *s2,int n2) { char *end=s2+n2; for(;;++s1,++s2) { if(s2==end) return *s1; if(*s1=='\0') return -*s2; if(*s1!=*s2) return *s1-*s2; } } int s_tokcmpn(char *s1,char *s2,int n2) { char *end2=s2+n2; /* all white space characters are one byte long */ while(xmlc_white_space(*s1)) ++s1; while(s2!=end2&&xmlc_white_space(*s2)) ++s2; for(;;) { if(s2==end2) { while(xmlc_white_space(*s1)) ++s1; return *s1; } if(*s1=='\0') { while(s2!=end2&&xmlc_white_space(*s2)) ++s2; return s2==end2?0:-*s2; } if(xmlc_white_space(*s1)&&xmlc_white_space(*s2)) { do ++s1; while(xmlc_white_space(*s1)); do ++s2; while(s2!=end2&&xmlc_white_space(*s2)); } else { if(*s1!=*s2) return *s1-*s2; ++s1; ++s2; } } } int s_hval(char *s) { int h=0; while(*s) h=h*31+*(s++); return h; } char *s_clone(char *s) { return strcpy((char*)m_alloc(strlen(s)+1,sizeof(char)),s); } char *s_abspath(char *r,char *b) { if(*r!='/') { char *c=b,*sep=(char*)0; for(;;) {if(!(*c)) break; if(*c++=='/') sep=c;} if(sep) { char *p=r,*q; while(*p++); q=p+(sep-b); do *(--q)=*(--p); while(p!=r); while(b!=sep) *r++=*b++; } } return r; } int s_tab(char *s,char *tab[],int size) {return s_ntab(s,strlen(s),tab,size);} int s_ntab(char *s,int len,char *tab[],int size) { int n=0,m=size-1,i,cmp; for(;;) { if(n>m) return size; i=(n+m)/2; if((cmp=s_cmpn(tab[i],s,len))==0) return i; else {if(cmp>0) m=i-1; else n=i+1;} } } void s_test() { assert(s_cmpn("","",0)==0); assert(s_cmpn("/xyz","/xyz",4)==0); assert(s_cmpn("xyz","yz",2)<0); assert(s_cmpn("xyz","xxyz",4)>0); { char r[256]; s_abspath(strcpy(r,"/x"),"/y"); assert(strcmp(r,"/x")==0); s_abspath(strcpy(r,"x"),"/y"); assert(strcmp(r,"/x")==0); s_abspath(strcpy(r,"x"),"/y/"); assert(strcmp(r,"/y/x")==0); s_abspath(strcpy(r,"x"),"y/"); assert(strcmp(r,"y/x")==0); s_abspath(strcpy(r,"x"),"y"); assert(strcmp(r,"x")==0); s_abspath(strcpy(r,""),"y"); assert(strcmp(r,"")==0); } assert(s_tokcmpn("","",0)==0); assert(s_tokcmpn(""," ",1)==0); assert(s_tokcmpn("A","A",1)==0); assert(s_tokcmpn(" A B","A B ",5)==0); assert(s_tokcmpn("AB","A B",3)>0); assert(s_tokcmpn("","A",1)<0); } --- NEW FILE: er.h --- /* $Id: er.h,v 1.1 2009/08/03 05:32:46 mike Exp $ */ #ifndef ER_H #define ER_H 1 #include <stdarg.h> extern int (*er_printf)(char *format,...); extern int (*er_vprintf)(char *format,va_list ap); extern int er_default_printf(char *format,...); extern int er_default_vprintf(char *format,va_list ap); #endif --- NEW FILE: rx.c --- /* $Id: rx.c,v 1.1 2009/08/03 05:32:47 mike Exp $ */ #include <string.h> /*strlen,strcpy,strcmp*/ #include <assert.h> #include "u.h" /*u_get,u_strlen*/ #include "xmlc.h" #include "m.h" #include "s.h" #include "ht.h" #include "ll.h" #include "er.h" #include "rx.h" #define LEN_P RX_LEN_P #define PRIME_P RX_PRIME_P #define LIM_P RX_LIM_P #define LEN_2 RX_LEN_2 #define PRIME_2 RX_PRIME_2 #define LEN_R RX_LEN_R #define PRIME_R RX_PRIME_R #define R_AVG_SIZE 16 /* it is good to have few patterns when deltas are memoized */ #define P_ERROR 0 #define P_NOT_ALLOWED 1 #define P_EMPTY 2 #define P_CHOICE 3 #define P_GROUP 4 #define P_ONE_OR_MORE 5 /*+*/ #define P_EXCEPT 6 /*single-single*/ #define P_RANGE 7 /*lower,upper inclusive*/ #define P_CLASS 8 /*complement is .-*/ #define P_ANY 9 #define P_CHAR 10 #define P_SIZE 3 #define P_AVG_SIZE 2 static int p_size[]={1,1,1,3,3,2,3,3,2,1,2}; #define P_TYP(i) (pattern[i]&0xF) #define P_IS(i,x) (x==P_TYP(i)) #define P_CHK(i,x) assert(P_IS(i,x)) #define P_unop(TYP,p,p1) P_CHK(p,TYP); p1=pattern[p+1] #define P_binop(TYP,p,p1,p2) P_unop(TYP,p,p1); p2=pattern[p+2] #define NotAllowed(p) P_CHK(p,P_NotAllowed) #define Empty(p) P_CHK(p,P_Empty) #define Any(p) P_CHK(p,P_Any) #define Choice(p,p1,p2) P_binop(P_CHOICE,p,p1,p2) #define Group(p,p1,p2) P_binop(P_GROUP,p,p1,p2) #define OneOrMore(p,p1) P_unop(P_ONE_OR_MORE,p,p1) #define Except(p,p1,p2) P_binop(P_EXCEPT,p,p1,p2) #define Range(p,cf,cl) P_binop(P_RANGE,p,cf,cl) #define Class(p,cn) P_unop(P_CLASS,p,cn) #define Char(p,c) P_unop(P_CHAR,p,c) #define P_NUL 0x100 #define setNullable(x) if(x) pattern[i_p]|=P_NUL #define nullable(p) (pattern[p]&P_NUL) int rx_compact=0; /* 'compact' in drv and rx do different things. In drv, it limits the size of the table of memoized deltas. In rx, it limits the size of the buffer for cached regular expressions; memoized deltas are always limited by LIM_M, since the whole repertoire of unicode characters can blow up the buffer. */ static char *regex; static int *pattern; static int (*r2p)[2]; static struct hashtable ht_r,ht_p,ht_2; static int i_p,len_p,i_r,len_r,i_2,len_2; static int empty,notAllowed,any; static int accept_p(void) { int j; if((j=ht_get(&ht_p,i_p))==-1) { ht_put(&ht_p,j=i_p); i_p+=p_size[P_TYP(i_p)]; if(i_p+P_SIZE>len_p) pattern=(int*)m_stretch(pattern,len_p=2*(i_p+P_SIZE),i_p,sizeof(int)); } return j; } #define P_NEW(x) (pattern[i_p]=x) #define P_newunop(TYP,p1) P_NEW(TYP); pattern[i_p+1]=p1 #define P_newbinop(TYP,p1,p2) P_newunop(TYP,p1); pattern[i_p+2]=p2 static int newNotAllowed(void) {P_NEW(P_NOT_ALLOWED); return accept_p();} static int newEmpty(void) {P_NEW(P_EMPTY); setNullable(1); return accept_p();} static int newAny(void) {P_NEW(P_ANY); return accept_p();} static int newChoice(int p1,int p2) {P_newbinop(P_CHOICE,p1,p2); setNullable(nullable(p1)||nullable(p2)); return accept_p();} static int newGroup(int p1,int p2) {P_newbinop(P_GROUP,p1,p2); setNullable(nullable(p1)&&nullable(p2)); return accept_p();} static int newOneOrMore(int p1) {P_newunop(P_ONE_OR_MORE,p1); setNullable(nullable(p1)); return accept_p();} static int newExcept(int p1,int p2) {P_newbinop(P_EXCEPT,p1,p2); return accept_p();} static int newRange(int cf,int cl) {P_newbinop(P_RANGE,cf,cl); return accept_p();} static int newClass(int cn) {P_newunop(P_CLASS,cn); return accept_p();} static int newChar(int c) {P_newunop(P_CHAR,c); return accept_p();} static int one_or_more(int p) { if(P_IS(p,P_EMPTY)) return p; if(P_IS(p,P_NOT_ALLOWED)) return p; return newOneOrMore(p); } static int group(int p1,int p2) { if(P_IS(p1,P_NOT_ALLOWED)) return p1; if(P_IS(p2,P_NOT_ALLOWED)) return p2; if(P_IS(p1,P_EMPTY)) return p2; if(P_IS(p2,P_EMPTY)) return p1; return newGroup(p1,p2); } static int samechoice(int p1,int p2) { if(P_IS(p1,P_CHOICE)) { int p11,p12; Choice(p1,p11,p12); return p12==p2||samechoice(p11,p2); } else return p1==p2; } static int choice(int p1,int p2) { if(P_IS(p1,P_NOT_ALLOWED)) return p2; if(P_IS(p2,P_NOT_ALLOWED)) return p1; if(P_IS(p2,P_CHOICE)) { int p21,p22; Choice(p2,p21,p22); p1=choice(p1,p21); return choice(p1,p22); } if(samechoice(p1,p2)) return p1; if(nullable(p1) && (P_IS(p2,P_EMPTY))) return p1; if(nullable(p2) && (P_IS(p1,P_EMPTY))) return p2; return newChoice(p1,p2); } static int cls(int cn) { if(cn<0) return newExcept(any,newClass(-cn)); if(cn==0) return notAllowed; return newClass(cn); } static int equal_r(int r1,int r2) {return strcmp(regex+r1,regex+r2)==0;} static int hash_r(int r) {return s_hval(regex+r);} static int equal_p(int p1,int p2) { int *pp1=pattern+p1,*pp2=pattern+p2; if(P_TYP(p1)!=P_TYP(p2)) return 0; switch(p_size[P_TYP(p1)]) { case 3: if(pp1[2]!=pp2[2]) return 0; case 2: if(pp1[1]!=pp2[1]) return 0; case 1: return 1; default: assert(0); } return 0; } static int hash_p(int p) { int *pp=pattern+p; int h=0; switch(p_size[P_TYP(p)]) { case 1: h=pp[0]&0xF; break; case 2: h=(pp[0]&0xF)|(pp[1]<<4); break; case 3: h=(pp[0]&0xF)|((pp[1]^pp[2])<<4); break; default: assert(0); } return h*PRIME_P; } static int equal_2(int x1,int x2) {return r2p[x1][0]==r2p[x2][0];} static int hash_2(int x) {return r2p[x][0]*PRIME_2;} static int add_r(char *rx) { int len=strlen(rx)+1; if(i_r+len>len_r) regex=(char*)m_stretch(regex,len_r=2*(i_r+len),i_r,sizeof(char)); strcpy(regex+i_r,rx); return len; } #define ERRPOS #define err(msg) (*er_vprintf)(msg" in \"%s\" at offset %i\n",ap) void rx_default_verror_handler(int erno,va_list ap) { (*er_printf)("regular expressions: "); switch(erno) { case RX_ER_BADCH: err("bad character"); break; case RX_ER_UNFIN: err("unfinished expression"); break; case RX_ER_NOLSQ: err("'[' expected"); break; case RX_ER_NORSQ: err("']' expected"); break; case RX_ER_NOLCU: err("'{' expected"); break; case RX_ER_NORCU: err("'}' expected"); break; case RX_ER_NOLPA: err("'(' expected"); break; case RX_ER_NORPA: err("')' expected"); break; case RX_ER_BADCL: err("unknown class"); break; case RX_ER_NODGT: err("digit expected"); break; case RX_ER_DNUOB: err("reversed bounds"); break; case RX_ER_NOTRC: err("range or class expected"); break; default: assert(0); } } void (*rx_verror_handler)(int erno,va_list ap)=&rx_default_verror_handler; static void error_handler(int erno,...) { va_list ap; va_start(ap,erno); (*rx_verror_handler)(erno,ap); va_end(ap); } #define LEN_M RX_LEN_M #define PRIME_M RX_PRIME_M #define LIM_M RX_LIM_M #define M_SIZE 3 #define M_SET(p) memo[i_m][M_SIZE-1]=p #define M_RET(m) memo[m][M_SIZE-1] static int (*memo)[M_SIZE]; static int i_m,len_m; static struct hashtable ht_m; static int new_memo(int p,int c) { int *me=memo[i_m]; ht_deli(&ht_m,i_m); me[0]=p; me[1]=c; return ht_get(&ht_m,i_m); } static int equal_m(int m1,int m2) { int *me1=memo[m1],*me2=memo[m2]; return (me1[0]==me2[0])&&(me1[1]==me2[1]); } static int hash_m(int m) { int *me=memo[m]; return (me[0]^me[1])*PRIME_M; } static void accept_m(void) { if(ht_get(&ht_m,i_m)!=-1) ht_del(&ht_m,i_m); ht_put(&ht_m,i_m++); if(i_m>=LIM_M) i_m=0; if(i_m==len_m) memo=(int(*)[M_SIZE])m_stretch(memo,len_m=i_m*2,i_m,sizeof(int[M_SIZE])); } static void windup(void); static int initialized=0; void rx_init(void) { if(!initialized) { initialized=1; pattern=(int *)m_alloc(len_p=P_AVG_SIZE*LEN_P,sizeof(int)); r2p=(int (*)[2])m_alloc(len_2=LEN_2,sizeof(int[2])); regex=(char*)m_alloc(len_r=R_AVG_SIZE*LEN_R,sizeof(char)); memo=(int (*)[M_SIZE])m_alloc(len_m=LEN_M,sizeof(int[M_SIZE])); ht_init(&ht_p,LEN_P,&hash_p,&equal_p); ht_init(&ht_2,LEN_2,&hash_2,&equal_2); ht_init(&ht_r,LEN_R,&hash_r,&equal_r); ht_init(&ht_m,LEN_M,&hash_m,&equal_m); windup(); } } void rx_clear(void) { ht_clear(&ht_p); ht_clear(&ht_2); ht_clear(&ht_r); ht_clear(&ht_m); windup(); } static void windup(void) { i_p=i_r=i_2=i_m=0; pattern[0]=P_ERROR; accept_p(); empty=newEmpty(); notAllowed=newNotAllowed(); any=newAny(); } #define SYM_END 0 #define SYM_CLS 1 #define SYM_ESC 2 #define SYM_CHR 3 static int r0,ri,sym,val,errors; static void error(int erno) { if(!errors) error_handler(erno,regex+r0,u_strlen(regex+r0)-u_strlen(regex+ri)); ++errors; } #include "rx_cls_u.c" static int chclass(void) { int u,cl,rj; ri+=u_get(&u,regex+ri); if(u=='\0') {--ri; error(RX_ER_NOLCU); return 0;} if(u!='{') {error(RX_ER_NOLCU); return 0;} rj=ri; for(;;) { if(regex[rj]=='\0') {ri=rj; error(RX_ER_NORCU); return 0;} if(regex[rj]=='}') { if((cl=s_ntab(regex+ri,rj-ri,clstab,NUM_CLS_U))==NUM_CLS_U) {error(RX_ER_BADCL); cl=0;} ri=rj+1; return cl; } ++rj; } } #define CLS_NL (NUM_CLS_U+1) #define CLS_S (NUM_CLS_U+2) #define CLS_I (NUM_CLS_U+3) #define CLS_C (NUM_CLS_U+4) #define CLS_W (NUM_CLS_U+5) #define NUM_CLS (NUM_CLS_U+6) static void getsym(void) { int u; if(regex[ri]=='\0') sym=SYM_END; else { ri+=u_get(&u,regex+ri); if(u=='\\') { ri+=u_get(&u,regex+ri); switch(u) { case '\0': --ri; error(RX_ER_UNFIN); sym=SYM_END; break; case 'p': sym=SYM_CLS; val=chclass(); break; case 'P': sym=SYM_CLS; val=-chclass(); break; case 's': sym=SYM_CLS; val=CLS_S; break; case 'S': sym=SYM_CLS; val=-CLS_S; break; case 'i': sym=SYM_CLS; val=CLS_I; break; case 'I': sym=SYM_CLS; val=-CLS_I; break; case 'c': sym=SYM_CLS; val=CLS_C; break; case 'C': sym=SYM_CLS; val=-CLS_C; break; case 'd': sym=SYM_CLS; val=CLS_U_Nd; break; case 'D': sym=SYM_CLS; val=-CLS_U_Nd; break; case 'w': sym=SYM_CLS; val=CLS_W; break; case 'W': sym=SYM_CLS; val=-CLS_W; break; case 'n': sym=SYM_ESC; val=0xA; break; case 'r': sym=SYM_ESC; val=0xD; break; case 't': sym=SYM_ESC; val=0x9; break; case '\\': case '|': case '.': case '-': case '^': case '?': case '*': case '+': case '{': case '}': case '[': case ']': case '(': case ')': sym=SYM_ESC; val=u; break; default: error(RX_ER_BADCH); sym=SYM_ESC; val=u; break; } } else { switch(u) { case '.': sym=SYM_CLS; val=-CLS_NL; break; default: sym=SYM_CHR; val=u; break; } } } } static void chk_get(int v,int erno) {if(sym!=SYM_CHR||val!=v) error(erno); getsym();} #define chkrch(val) if((val)=='['||(val)==']'||(val)=='-') error(RX_ER_NOTRC) static int chgroup(void) { int p=notAllowed,c; for(;;) { switch(sym) { case SYM_CHR: chkrch(val); case SYM_ESC: c=val; getsym(); if(sym==SYM_CHR&&val=='-') { if(regex[ri]=='[') { p=choice(p,newChar(c)); goto END_OF_GROUP; } else { getsym(); switch(sym) { case SYM_CHR: chkrch(val); case SYM_ESC: p=choice(p,newRange(c,val)); getsym(); break; default: error(RX_ER_BADCH); getsym(); break; } } } else { p=choice(p,newChar(c)); } break; case SYM_CLS: p=choice(p,cls(val)); getsym(); break; case SYM_END: error(RX_ER_NORSQ); goto END_OF_GROUP; default: assert(0); } if(sym==SYM_CHR&&(val==']'||val=='-')) goto END_OF_GROUP; } END_OF_GROUP:; return p; } static int chexpr(void) { int p; if(sym==SYM_CHR&&val=='^') { getsym(); p=newExcept(any,chgroup()); } else { p=chgroup(); } if(sym==SYM_CHR&&val=='-') { getsym(); chk_get('[',RX_ER_NOLSQ); p=newExcept(p,chexpr()); chk_get(']',RX_ER_NORSQ); } return p; } static int expression(void); static int atom(void) { int p=0; switch(sym) { case SYM_CHR: switch(val) { case '[': getsym(); p=chexpr(); chk_get(']',RX_ER_NORSQ); break; case '(': getsym(); p=expression(); chk_get(')',RX_ER_NORPA); break; case '{': case '?': case '*': case '+': case '|': case ')': case ']': case '}': error(RX_ER_BADCH); getsym(); break; default: p=newChar(val); getsym(); break; } break; case SYM_ESC: p=newChar(val); getsym(); break; case SYM_CLS: p=cls(val); getsym(); break; default: error(RX_ER_BADCH); getsym(); break; } return p; } static int number(void) { int n=0,m; for(;;) { if(sym!=SYM_CHR) goto END_OF_DIGITS; switch(val) { case '0': m=0; break; case '1': m=1; break; case '2': m=2; break; case '3': m=3; break; case '4': m=4; break; case '5': m=5; break; case '6': m=6; break; case '7': m=7; break; case '8': m=8; break; case '9': m=9; break; default: goto END_OF_DIGITS; } n=n*10+m; getsym(); } END_OF_DIGITS:; return n; } static int quantifier(int p0) { int p=empty,n,n0; n=n0=number(); while(n--) p=group(p,p0); if(sym==SYM_CHR) { if(val==',') { getsym(); if(sym==SYM_CHR && val=='}') { p=group(p,choice(empty,one_or_more(p0))); } else { n=number()-n0; if(n<0) {error(RX_ER_DNUOB); n=0;} while(n--) p=group(p,choice(empty,p0)); } } } else error(RX_ER_NODGT); return p; } static int piece(void) { int p; p=atom(); if(sym==SYM_CHR) { switch(val) { case '{': getsym(); p=quantifier(p); chk_get('}',RX_ER_NOLCU); break; case '?': getsym(); p=choice(empty,p); break; case '*': getsym(); p=choice(empty,one_or_more(p)); break; case '+': getsym(); p=one_or_more(p); break; default: break; } } return p; } static int branch(void) { int p; p=empty; while(!(sym==SYM_END||(sym==SYM_CHR&&(val=='|'||val==')')))) p=group(p,piece()); return p; } static int expression(void) { int p; p=branch(); while(sym==SYM_CHR&&val=='|') { getsym(); p=choice(p,branch()); } return p; } static void bind(int r) { r0=ri=r; sym=-1; errors=0; getsym(); } static int compile(char *rx) { int r=0,p=0,d_r; d_r=add_r(rx); if((r=ht_get(&ht_r,i_r))==-1) { if(rx_compact&&i_p>=P_AVG_SIZE*LIM_P) {rx_clear(); d_r=add_r(rx);} ht_put(&ht_r,r=i_r); i_r+=d_r; bind(r); p=expression(); if(sym!=SYM_END) error(RX_ER_BADCH); r2p[i_2][0]=r; r2p[i_2][1]=p; ht_put(&ht_2,i_2++); if(i_2==len_2) r2p=(int(*)[2])m_stretch(r2p,len_2=2*i_2,i_2,sizeof(int[2])); } else { r2p[i_2][0]=r; p=r2p[ht_get(&ht_2,i_2)][1]; } return p; } #include "rx_cls_ranges.c" static int in_class(int c,int cn) { switch(cn) { case 0: return 0; case CLS_U_C: return in_class(c,CLS_U_Cc)||in_class(c,CLS_U_Cf)||in_class(c,CLS_U_Co); case CLS_U_Cc: return u_in_ranges(c,CcRanges,sizeof(CcRanges)/sizeof(int[2])); case CLS_U_Cf: return u_in_ranges(c,CfRanges,sizeof(CfRanges)/sizeof(int[2])); case CLS_U_Co: return u_in_ranges(c,CoRanges,sizeof(CoRanges)/sizeof(int[2])); case CLS_U_IsAlphabeticPresentationForms: return u_in_ranges(c,IsAlphabeticPresentationFormsRanges,sizeof(IsAlphabeticPresentationFormsRanges)/sizeof(int[2])); case CLS_U_IsArabic: return u_in_ranges(c,IsArabicRanges,sizeof(IsArabicRanges)/sizeof(int[2])); case CLS_U_IsArabicPresentationForms_A: return u_in_ranges(c,IsArabicPresentationForms_ARanges,sizeof(IsArabicPresentationForms_ARanges)/sizeof(int[2])); case CLS_U_IsArabicPresentationForms_B: return u_in_ranges(c,IsArabicPresentationForms_BRanges,sizeof(IsArabicPresentationForms_BRanges)/sizeof(int[2])); case CLS_U_IsArmenian: return u_in_ranges(c,IsArmenianRanges,sizeof(IsArmenianRanges)/sizeof(int[2])); case CLS_U_IsArrows: return u_in_ranges(c,IsArrowsRanges,sizeof(IsArrowsRanges)/sizeof(int[2])); case CLS_U_IsBasicLatin: return u_in_ranges(c,IsBasicLatinRanges,sizeof(IsBasicLatinRanges)/sizeof(int[2])); case CLS_U_IsBengali: return u_in_ranges(c,IsBengaliRanges,sizeof(IsBengaliRanges)/sizeof(int[2])); case CLS_U_IsBlockElements: return u_in_ranges(c,IsBlockElementsRanges,sizeof(IsBlockElementsRanges)/sizeof(int[2])); case CLS_U_IsBopomofo: return u_in_ranges(c,IsBopomofoRanges,sizeof(IsBopomofoRanges)/sizeof(int[2])); case CLS_U_IsBopomofoExtended: return u_in_ranges(c,IsBopomofoExtendedRanges,sizeof(IsBopomofoExtendedRanges)/sizeof(int[2])); case CLS_U_IsBoxDrawing: return u_in_ranges(c,IsBoxDrawingRanges,sizeof(IsBoxDrawingRanges)/sizeof(int[2])); case CLS_U_IsBraillePatterns: return u_in_ranges(c,IsBraillePatternsRanges,sizeof(IsBraillePatternsRanges)/sizeof(int[2])); case CLS_U_IsByzantineMusicalSymbols: return u_in_ranges(c,IsByzantineMusicalSymbolsRanges,sizeof(IsByzantineMusicalSymbolsRanges)/sizeof(int[2])); case CLS_U_IsCJKCompatibility: return u_in_ranges(c,IsCJKCompatibilityRanges,sizeof(IsCJKCompatibilityRanges)/sizeof(int[2])); case CLS_U_IsCJKCompatibilityForms: return u_in_ranges(c,IsCJKCompatibilityFormsRanges,sizeof(IsCJKCompatibilityFormsRanges)/sizeof(int[2])); case CLS_U_IsCJKCompatibilityIdeographs: return u_in_ranges(c,IsCJKCompatibilityIdeographsRanges,sizeof(IsCJKCompatibilityIdeographsRanges)/sizeof(int[2])); case CLS_U_IsCJKCompatibilityIdeographsSupplement: return u_in_ranges(c,IsCJKCompatibilityIdeographsSupplementRanges,sizeof(IsCJKCompatibilityIdeographsSupplementRanges)/sizeof(int[2])); case CLS_U_IsCJKRadicalsSupplement: return u_in_ranges(c,IsCJKRadicalsSupplementRanges,sizeof(IsCJKRadicalsSupplementRanges)/sizeof(int[2])); case CLS_U_IsCJKSymbolsandPunctuation: return u_in_ranges(c,IsCJKSymbolsandPunctuationRanges,sizeof(IsCJKSymbolsandPunctuationRanges)/sizeof(int[2])); case CLS_U_IsCJKUnifiedIdeographs: return u_in_ranges(c,IsCJKUnifiedIdeographsRanges,sizeof(IsCJKUnifiedIdeographsRanges)/sizeof(int[2])); case CLS_U_IsCJKUnifiedIdeographsExtensionA: return u_in_ranges(c,IsCJKUnifiedIdeographsExtensionARanges,sizeof(IsCJKUnifiedIdeographsExtensionARanges)/sizeof(int[2])); case CLS_U_IsCJKUnifiedIdeographsExtensionB: return u_in_ranges(c,IsCJKUnifiedIdeographsExtensionBRanges,sizeof(IsCJKUnifiedIdeographsExtensionBRanges)/sizeof(int[2])); case CLS_U_IsCherokee: return u_in_ranges(c,IsCherokeeRanges,sizeof(IsCherokeeRanges)/sizeof(int[2])); case CLS_U_IsCombiningDiacriticalMarks: return u_in_ranges(c,IsCombiningDiacriticalMarksRanges,sizeof(IsCombiningDiacriticalMarksRanges)/sizeof(int[2])); case CLS_U_IsCombiningHalfMarks: return u_in_ranges(c,IsCombiningHalfMarksRanges,sizeof(IsCombiningHalfMarksRanges)/sizeof(int[2])); case CLS_U_IsCombiningMarksforSymbols: return u_in_ranges(c,IsCombiningMarksforSymbolsRanges,sizeof(IsCombiningMarksforSymbolsRanges)/sizeof(int[2])); case CLS_U_IsControlPictures: return u_in_ranges(c,IsControlPicturesRanges,sizeof(IsControlPicturesRanges)/sizeof(int[2])); case CLS_U_IsCurrencySymbols: return u_in_ranges(c,IsCurrencySymbolsRanges,sizeof(IsCurrencySymbolsRanges)/sizeof(int[2])); case CLS_U_IsCyrillic: return u_in_ranges(c,IsCyrillicRanges,sizeof(IsCyrillicRanges)/sizeof(int[2])); case CLS_U_IsDeseret: return u_in_ranges(c,IsDeseretRanges,sizeof(IsDeseretRanges)/sizeof(int[2])); case CLS_U_IsDevanagari: return u_in_ranges(c,IsDevanagariRanges,sizeof(IsDevanagariRanges)/sizeof(int[2])); case CLS_U_IsDingbats: return u_in_ranges(c,IsDingbatsRanges,sizeof(IsDingbatsRanges)/sizeof(int[2])); case CLS_U_IsEnclosedAlphanumerics: return u_in_ranges(c,IsEnclosedAlphanumericsRanges,sizeof(IsEnclosedAlphanumericsRanges)/sizeof(int[2])); case CLS_U_IsEnclosedCJKLettersandMonths: return u_in_ranges(c,IsEnclosedCJKLettersandMonthsRanges,sizeof(IsEnclosedCJKLettersandMonthsRanges)/sizeof(int[2])); case CLS_U_IsEthiopic: return u_in_ranges(c,IsEthiopicRanges,sizeof(IsEthiopicRanges)/sizeof(int[2])); case CLS_U_IsGeneralPunctuation: return u_in_ranges(c,IsGeneralPunctuationRanges,sizeof(IsGeneralPunctuationRanges)/sizeof(int[2])); case CLS_U_IsGeometricShapes: return u_in_ranges(c,IsGeometricShapesRanges,sizeof(IsGeometricShapesRanges)/sizeof(int[2])); case CLS_U_IsGeorgian: return u_in_ranges(c,IsGeorgianRanges,sizeof(IsGeorgianRanges)/sizeof(int[2])); case CLS_U_IsGothic: return u_in_ranges(c,IsGothicRanges,sizeof(IsGothicRanges)/sizeof(int[2])); case CLS_U_IsGreek: return u_in_ranges(c,IsGreekRanges,sizeof(IsGreekRanges)/sizeof(int[2])); case CLS_U_IsGreekExtended: return u_in_ranges(c,IsGreekExtendedRanges,sizeof(IsGreekExtendedRanges)/sizeof(int[2])); case CLS_U_IsGujarati: return u_in_ranges(c,IsGujaratiRanges,sizeof(IsGujaratiRanges)/sizeof(int[2])); case CLS_U_IsGurmukhi: return u_in_ranges(c,IsGurmukhiRanges,sizeof(IsGurmukhiRanges)/sizeof(int[2])); case CLS_U_IsHalfwidthandFullwidthForms: return u_in_ranges(c,IsHalfwidthandFullwidthFormsRanges,sizeof(IsHalfwidthandFullwidthFormsRanges)/sizeof(int[2])); case CLS_U_IsHangulCompatibilityJamo: return u_in_ranges(c,IsHangulCompatibilityJamoRanges,sizeof(IsHangulCompatibilityJamoRanges)/sizeof(int[2])); case CLS_U_IsHangulJamo: return u_in_ranges(c,IsHangulJamoRanges,sizeof(IsHangulJamoRanges)/sizeof(int[2])); case CLS_U_IsHangulSyllables: return u_in_ranges(c,IsHangulSyllablesRanges,sizeof(IsHangulSyllablesRanges)/sizeof(int[2])); case CLS_U_IsHebrew: return u_in_ranges(c,IsHebrewRanges,sizeof(IsHebrewRanges)/sizeof(int[2])); case CLS_U_IsHiragana: return u_in_ranges(c,IsHiraganaRanges,sizeof(IsHiraganaRanges)/sizeof(int[2])); case CLS_U_IsIPAExtensions: return u_in_ranges(c,IsIPAExtensionsRanges,sizeof(IsIPAExtensionsRanges)/sizeof(int[2])); case CLS_U_IsIdeographicDescriptionCharacters: return u_in_ranges(c,IsIdeographicDescriptionCharactersRanges,sizeof(IsIdeographicDescriptionCharactersRanges)/sizeof(int[2])); case CLS_U_IsKanbun: return u_in_ranges(c,IsKanbunRanges,sizeof(IsKanbunRanges)/sizeof(int[2])); case CLS_U_IsKangxiRadicals: return u_in_ranges(c,IsKangxiRadicalsRanges,sizeof(IsKangxiRadicalsRanges)/sizeof(int[2])); case CLS_U_IsKannada: return u_in_ranges(c,IsKannadaRanges,sizeof(IsKannadaRanges)/sizeof(int[2])); case CLS_U_IsKatakana: return u_in_ranges(c,IsKatakanaRanges,sizeof(IsKatakanaRanges)/sizeof(int[2])); case CLS_U_IsKhmer: return u_in_ranges(c,IsKhmerRanges,sizeof(IsKhmerRanges)/sizeof(int[2])); case CLS_U_IsLao: return u_in_ranges(c,IsLaoRanges,sizeof(IsLaoRanges)/sizeof(int[2])); case CLS_U_IsLatin_1Supplement: return u_in_ranges(c,IsLatin_1SupplementRanges,sizeof(IsLatin_1SupplementRanges)/sizeof(int[2])); case CLS_U_IsLatinExtended_A: return u_in_ranges(c,IsLatinExtended_ARanges,sizeof(IsLatinExtended_ARanges)/sizeof(int[2])); case CLS_U_IsLatinExtended_B: return u_in_ranges(c,IsLatinExtended_BRanges,sizeof(IsLatinExtended_BRanges)/sizeof(int[2])); case CLS_U_IsLatinExtendedAdditional: return u_in_ranges(c,IsLatinExtendedAdditionalRanges,sizeof(IsLatinExtendedAdditionalRanges)/sizeof(int[2])); case CLS_U_IsLetterlikeSymbols: return u_in_ranges(c,IsLetterlikeSymbolsRanges,sizeof(IsLetterlikeSymbolsRanges)/sizeof(int[2])); case CLS_U_IsMalayalam: return u_in_ranges(c,IsMalayalamRanges,sizeof(IsMalayalamRanges)/sizeof(int[2])); case CLS_U_IsMathematicalAlphanumericSymbols: return u_in_ranges(c,IsMathematicalAlphanumericSymbolsRanges,sizeof(IsMathematicalAlphanumericSymbolsRanges)/sizeof(int[2])); case CLS_U_IsMathematicalOperators: return u_in_ranges(c,IsMathematicalOperatorsRanges,sizeof(IsMathematicalOperatorsRanges)/sizeof(int[2])); case CLS_U_IsMiscellaneousSymbols: return u_in_ranges(c,IsMiscellaneousSymbolsRanges,sizeof(IsMiscellaneousSymbolsRanges)/sizeof(int[2])); case CLS_U_IsMiscellaneousTechnical: return u_in_ranges(c,IsMiscellaneousTechnicalRanges,sizeof(IsMiscellaneousTechnicalRanges)/sizeof(int[2])); case CLS_U_IsMongolian: return u_in_ranges(c,IsMongolianRanges,sizeof(IsMongolianRanges)/sizeof(int[2])); case CLS_U_IsMusicalSymbols: return u_in_ranges(c,IsMusicalSymbolsRanges,sizeof(IsMusicalSymbolsRanges)/sizeof(int[2])); case CLS_U_IsMyanmar: return u_in_ranges(c,IsMyanmarRanges,sizeof(IsMyanmarRanges)/sizeof(int[2])); case CLS_U_IsNumberForms: return u_in_ranges(c,IsNumberFormsRanges,sizeof(IsNumberFormsRanges)/sizeof(int[2])); case CLS_U_IsOgham: return u_in_ranges(c,IsOghamRanges,sizeof(IsOghamRanges)/sizeof(int[2])); case CLS_U_IsOldItalic: return u_in_ranges(c,IsOldItalicRanges,sizeof(IsOldItalicRanges)/sizeof(int[2])); case CLS_U_IsOpticalCharacterRecognition: return u_in_ranges(c,IsOpticalCharacterRecognitionRanges,sizeof(IsOpticalCharacterRecognitionRanges)/sizeof(int[2])); case CLS_U_IsOriya: return u_in_ranges(c,IsOriyaRanges,sizeof(IsOriyaRanges)/sizeof(int[2])); case CLS_U_IsPrivateUse: return u_in_ranges(c,IsPrivateUseRanges,sizeof(IsPrivateUseRanges)/sizeof(int[2])); case CLS_U_IsRunic: return u_in_ranges(c,IsRunicRanges,sizeof(IsRunicRanges)/sizeof(int[2])); case CLS_U_IsSinhala: return u_in_ranges(c,IsSinhalaRanges,sizeof(IsSinhalaRanges)/sizeof(int[2])); case CLS_U_IsSmallFormVariants: return u_in_ranges(c,IsSmallFormVariantsRanges,sizeof(IsSmallFormVariantsRanges)/sizeof(int[2])); case CLS_U_IsSpacingModifierLetters: return u_in_ranges(c,IsSpacingModifierLettersRanges,sizeof(IsSpacingModifierLettersRanges)/sizeof(int[2])); case CLS_U_IsSpecials: return u_in_ranges(c,IsSpecialsRanges,sizeof(IsSpecialsRanges)/sizeof(int[2])); case CLS_U_IsSuperscriptsandSubscripts: return u_in_ranges(c,IsSuperscriptsandSubscriptsRanges,sizeof(IsSuperscriptsandSubscriptsRanges)/sizeof(int[2])); case CLS_U_IsSyriac: return u_in_ranges(c,IsSyriacRanges,sizeof(IsSyriacRanges)/sizeof(int[2])); case CLS_U_IsTags: return u_in_ranges(c,IsTagsRanges,sizeof(IsTagsRanges)/sizeof(int[2])); case CLS_U_IsTamil: return u_in_ranges(c,IsTamilRanges,sizeof(IsTamilRanges)/sizeof(int[2])); case CLS_U_IsTelugu: return u_in_ranges(c,IsTeluguRanges,sizeof(IsTeluguRanges)/sizeof(int[2])); case CLS_U_IsThaana: return u_in_ranges(c,IsThaanaRanges,sizeof(IsThaanaRanges)/sizeof(int[2])); case CLS_U_IsThai: return u_in_ranges(c,IsThaiRanges,sizeof(IsThaiRanges)/sizeof(int[2])); case CLS_U_IsTibetan: return u_in_ranges(c,IsTibetanRanges,sizeof(IsTibetanRanges)/sizeof(int[2])); case CLS_U_IsUnifiedCanadianAboriginalSyllabics: return u_in_ranges(c,IsUnifiedCanadianAboriginalSyllabicsRanges,sizeof(IsUnifiedCanadianAboriginalSyllabicsRanges)/sizeof(int[2])); case CLS_U_IsYiRadicals: return u_in_ranges(c,IsYiRadicalsRanges,sizeof(IsYiRadicalsRanges)/sizeof(int[2])); case CLS_U_IsYiSyllables: return u_in_ranges(c,IsYiSyllablesRanges,sizeof(IsYiSyllablesRanges)/sizeof(int[2])); case CLS_U_L: return in_class(c,CLS_U_Ll)||in_class(c,CLS_U_Lm)||in_class(c,CLS_U_Lo)||in_class(c,CLS_U_Lt)||in_class(c,CLS_U_Lu); case CLS_U_Ll: return u_in_ranges(c,LlRanges,sizeof(LlRanges)/sizeof(int[2])); case CLS_U_Lm: return u_in_ranges(c,LmRanges,sizeof(LmRanges)/sizeof(int[2])); case CLS_U_Lo: return u_in_ranges(c,LoRanges,sizeof(LoRanges)/sizeof(int[2])); case CLS_U_Lt: return u_in_ranges(c,LtRanges,sizeof(LtRanges)/sizeof(int[2])); case CLS_U_Lu: return u_in_ranges(c,LuRanges,sizeof(LuRanges)/sizeof(int[2])); case CLS_U_M: return in_class(c,CLS_U_Mc)||in_class(c,CLS_U_Me)||in_class(c,CLS_U_Mn); case CLS_U_Mc: return u_in_ranges(c,McRanges,sizeof(McRanges)/sizeof(int[2])); case CLS_U_Me: return u_in_ranges(c,MeRanges,sizeof(MeRanges)/sizeof(int[2])); case CLS_U_Mn: return u_in_ranges(c,MnRanges,sizeof(MnRanges)/sizeof(int[2])); case CLS_U_N: return in_class(c,CLS_U_Nd)||in_class(c,CLS_U_Nl)||in_class(c,CLS_U_No); case CLS_U_Nd: return u_in_ranges(c,NdRanges,sizeof(NdRanges)/sizeof(int[2])); case CLS_U_Nl: return u_in_ranges(c,NlRanges,sizeof(NlRanges)/sizeof(int[2])); case CLS_U_No: return u_in_ranges(c,NoRanges,sizeof(NoRanges)/sizeof(int[2])); case CLS_U_P: return in_class(c,CLS_U_Pc)||in_class(c,CLS_U_Pd)||in_class(c,CLS_U_Pe)||in_class(c,CLS_U_Pf)||in_class(c,CLS_U_Pi)||in_class(c,CLS_U_Po)||in_class(c,CLS_U_Ps); case CLS_U_Pc: return u_in_ranges(c,PcRanges,sizeof(PcRanges)/sizeof(int[2])); case CLS_U_Pd: return u_in_ranges(c,PdRanges,sizeof(PdRanges)/sizeof(int[2])); case CLS_U_Pe: return u_in_ranges(c,PeRanges,sizeof(PeRanges)/sizeof(int[2])); case CLS_U_Pf: return u_in_ranges(c,PfRanges,sizeof(PfRanges)/sizeof(int[2])); case CLS_U_Pi: return u_in_ranges(c,PiRanges,sizeof(PiRanges)/sizeof(int[2])); case CLS_U_Po: return u_in_ranges(c,PoRanges,sizeof(PoRanges)/sizeof(int[2])); case CLS_U_Ps: return u_in_ranges(c,PsRanges,sizeof(PsRanges)/sizeof(int[2])); case CLS_U_S: return in_class(c,CLS_U_Sc)||in_class(c,CLS_U_Sk)||in_class(c,CLS_U_Sm)||in_class(c,CLS_U_So); case CLS_U_Sc: return u_in_ranges(c,ScRanges,sizeof(ScRanges)/sizeof(int[2])); case CLS_U_Sk: return u_in_ranges(c,SkRanges,sizeof(SkRanges)/sizeof(int[2])); case CLS_U_Sm: return u_in_ranges(c,SmRanges,sizeof(SmRanges)/sizeof(int[2])); case CLS_U_So: return u_in_ranges(c,SoRanges,sizeof(SoRanges)/sizeof(int[2])); case CLS_U_Z: return in_class(c,CLS_U_Zl)||in_class(c,CLS_U_Zp)||in_class(c,CLS_U_Zs); case CLS_U_Zl: return u_in_ranges(c,ZlRanges,sizeof(ZlRanges)/sizeof(int[2])); case CLS_U_Zp: return u_in_ranges(c,ZpRanges,sizeof(ZpRanges)/sizeof(int[2])); case CLS_U_Zs: return u_in_ranges(c,ZsRanges,sizeof(ZsRanges)/sizeof(int[2])); case CLS_NL: return c=='\n'||c=='\r'; case CLS_S: return xmlc_white_space(c); case CLS_I: return xmlc_base_char(c)||xmlc_ideographic(c)||c=='_'||c==':'; case CLS_C: return in_class(c,CLS_I)||xmlc_digit(c)||xmlc_combining_char(c)||xmlc_extender(c)||c=='.'||c=='-'; case CLS_W: return !(in_class(c,CLS_U_P)||in_class(c,CLS_U_Z)||in_class(c,CLS_U_C)); default: assert(0); } return 0; } static int drv(int p,int c) { int p1,p2,cf,cl,cn,ret,m; assert(!P_IS(p,P_ERROR)); m=new_memo(p,c); if(m!=-1) return M_RET(m); switch(P_TYP(p)) { case P_NOT_ALLOWED: case P_EMPTY: ret=notAllowed; break; case P_CHOICE: Choice(p,p1,p2); ret=choice(drv(p1,c),drv(p2,c)); break; case P_GROUP: Group(p,p1,p2); {int p11=group(drv(p1,c),p2); ret=nullable(p1)?choice(p11,drv(p2,c)):p11;} break; case P_ONE_OR_MORE: OneOrMore(p,p1); ret=group(drv(p1,c),choice(empty,p)); break; case P_EXCEPT: Except(p,p1,p2); ret=nullable(drv(p1,c))&&!nullable(drv(p2,c))?empty:notAllowed; break; case P_RANGE: Range(p,cf,cl); ret=cf<=c&&c<=cl?empty:notAllowed; break; case P_CLASS: Class(p,cn); ret=in_class(c,cn)?empty:notAllowed; break; case P_ANY: ret=empty; break; case P_CHAR: Char(p,cf); ret=c==cf?empty:notAllowed; break; default: ret=0; assert(0); } new_memo(p,c); M_SET(ret); accept_m(); return ret; } int rx_check(char *rx) {(void)compile(rx); return !errors;} int rx_match(char *rx,char *s,int n) { int p=compile(rx); if(!errors) { char *end=s+n; int u; for(;;) { if(p==notAllowed) return 0; if(s==end) return nullable(p); s+=u_get(&u,s); p=drv(p,u); } } else return 0; } int rx_rmatch(char *rx,char *s,int n) { int p=compile(rx); if(!errors) { char *end=s+n; int u; for(;;) { if(p==notAllowed) return 0; if(s==end) return nullable(p); s+=u_get(&u,s); if(xmlc_white_space(u)) u=' '; p=drv(p,u); } } else return 0; } int rx_cmatch(char *rx,char *s,int n) { int p=compile(rx); if(!errors) { char *end=s+n; int u; SKIP_SPACE: for(;;) { if(s==end) return nullable(p); s+=u_get(&u,s); if(!xmlc_white_space(u)) break; } for(;;) { if(p==notAllowed) return 0; if(xmlc_white_space(u)) { u=' '; p=drv(p,u); if(p==notAllowed) { for(;;) { if(s==end) return 1; s+=u_get(&u,s); if(!xmlc_white_space(u)) return 0; } } else goto SKIP_SPACE; } p=drv(p,u); if(s==end) goto SKIP_SPACE; s+=u_get(&u,s); } } else return 0; } --- NEW FILE: er.c --- /* $Id: er.c,v 1.1 2009/08/03 05:32:46 mike Exp $ */ #include <stdio.h> #include "er.h" int (*er_printf)(char *format,...)=&er_default_printf; int (*er_vprintf)(char *format,va_list ap)=&er_default_vprintf; int er_default_printf(char *format,...) { int ret; va_list ap; va_start(ap,format); ret=(*er_vprintf)(format,ap); va_end(ap); return ret; } int er_default_vprintf(char *format,va_list ap) {return vfprintf(stderr,format,ap);} --- NEW FILE: s.h --- /* $Id: s.h,v 1.1 2009/08/03 05:32:48 mike Exp $ */ #ifndef S_H #define S_H 1 /* compares two strings, s1 is null terminated, s2 is n characters long */ extern int s_cmpn(char *s1,char *s2,int n2); /* compares two tokens, s1 is null terminated, s2 is n characters long */ extern int s_tokcmpn(char *s1,char *s2,int n2); /* hash value for a zero-terminated string */ extern int s_hval(char *s); /* strdup is a non-standard function */ extern char *s_clone(char *s); /* compute the absolute path from a relative path and a base path; the caller must ensure that there is enough space in r: size(r) > strlen(r)+strlen(b) returns a pointer to the string containing the relative path */ extern char *s_abspath(char *r,char *b); /* find a string in a sorted array, return the index, or size on failure */ extern int s_tab(char *s,char *tab[],int size); extern int s_ntab(char *s,int len,char *tab[],int size); extern void s_test(void); #endif --- NEW FILE: xsd_tm.h --- /* $Id: xsd_tm.h,v 1.1 2009/08/03 05:32:48 mike Exp $ */ #ifndef XSD_TM_H #define XSD_TM_H 1 struct xsd_tm {int days,secs,mics,tz;}; /* fmt is a combination of ymdtz */ extern void xsd_mktm(struct xsd_tm *tmp,char *fmt,char *val); extern void xsd_mktmn(struct xsd_tm *tmp,char *fmt,char *s,int n); /* -1 - less, 0 - equal, 1 - greater, other - unknown */ extern int xsd_tmcmp(struct xsd_tm *tmp1, struct xsd_tm *tmp2); #endif --- NEW FILE: xsd_tm.c --- /* $Id: xsd_tm.c,v 1.1 2009/08/03 05:32:48 mike Exp $ */ #include <stdlib.h> /*strtol*/ #include <limits.h> #include <string.h> /*strlen*/ #include <assert.h> #include "xsd_tm.h" static int leap(int yr) {return !(yr%4)&&((yr%100)||!(yr%400));} static int y2d(int yr) {return yr*365+yr/4-yr/100+yr/400;} static int ymd2dn(int yr,int mo,int dy) { switch(mo) { case 12: dy+=30; case 11: dy+=31; case 10: dy+=30; case 9: dy+=31; case 8: dy+=31; case 7: dy+=30; case 6: dy+=31; case 5: dy+=30; case 4: dy+=31; case 3: dy+=28; case 2: dy+=31; case 1: break; } if(mo>2&&leap(yr)) ++dy; return dy; } static int ymd2ds(int yr,int mo,int dy) { return (yr>=0?y2d(yr-1):y2d(yr)-366)+ymd2dn(yr,mo,dy); } #define DAYSECS 86400 #define TZSECS 50400 static void addsecs(struct xsd_tm *tmp,int secs) { tmp->secs+=secs; if(tmp->secs<0) { --tmp->days; tmp->secs+=DAYSECS; } else if(tmp->secs>=DAYSECS) { ++tmp->days; tmp->secs-=DAYSECS; } } void xsd_mktmn(struct xsd_tm *tmp,char *fmt,char *s,int n) { char *end=s+n; int yr=2000,mo=1,dy=1,hr=0,mi=0,zh=15,zm=0; double se=0.0; for(;;) { if(s==end||!*fmt) break; switch(*s) { case '-': switch(*fmt) { case 'y': ++fmt; yr=strtol(s,&s,10); continue; case 'z': ++fmt; ++s; zh=strtol(s,&s,10); ++s; zm=strtol(s,&s,10); continue; } break; case '+': assert(*fmt=='z'); ++fmt; zh=-strtol(s,&s,10); ++s; zm=-strtol(s,&s,10); continue; case 'Z': assert(*fmt=='z'); ++fmt; zh=0; zm=0; ++s; continue; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': switch(*(fmt++)) { case 'y': yr=strtol(s,&s,10); continue; case 'm': mo=strtol(s,&s,10); continue; case 'd': dy=strtol(s,&s,10); continue; case 't': hr=strtol(s,&s,10); ++s; mi=strtol(s,&s,10); ++s; se=strtod(s,&s); continue; } break; } ++s; } tmp->mics=(int)((se-(int)se)*1000000+0.5); tmp->secs=(int)se+60*(mi+60*hr); tmp->days=ymd2ds(yr,mo,dy); if((tmp->tz=(zh!=15))) addsecs(tmp,60*(zm+60*zh)); } void xsd_mktm(struct xsd_tm *tmp,char *fmt,char *val) {xsd_mktmn(tmp,fmt,val,strlen(val));} static int tmcmp(struct xsd_tm *tmp1, struct xsd_tm *tmp2) { int dd=tmp1->days-tmp2->days, ds=tmp1->secs-tmp2->secs, dm=tmp1->mics-tmp2->mics; return dd<0?-1:dd>0?1:ds<0?-1:ds>0?1:dm<0?-1:dm>0?1:0; } extern int xsd_tmcmp(struct xsd_tm *tmp1, struct xsd_tm *tmp2) { if(tmp1->tz==tmp2->tz) { return tmcmp(tmp1,tmp2); } else if(tmp1->tz) { struct xsd_tm tm; tm.mics=tmp2->mics; tm.days=tmp2->days; tm.secs=tmp2->secs; addsecs(&tm,TZSECS); if(tmcmp(tmp1,&tm)==1) return 1; tm.days=tmp2->days; tm.secs=tmp2->secs; addsecs(&tm,-TZSECS); if(tmcmp(tmp1,&tm)==-1) return -1; return 2; } else return -xsd_tmcmp(tmp2,tmp1); } --- NEW FILE: readme.txt --- RNV -- Relax NG Compact Syntax Validator in C Version 1.7 Table of Contents News since 1.6 New since 1.5 Aknowledgements Package Contents Installation Invocation Limitations Applications ARX RVP User-Defined Datatype Libraries Datatype Library Plug-in Scheme Datatypes New versions Abstract RNV is an implementation of Relax NG Compact Syntax, http://relaxng.org/compact-20021121.html. It is written in ANSI C, the command-line utility uses Expat, http://www.jclark.com/xml/expat.html. It is distributed under BSD license, see license.txt for details. RNV is a part of an on-going work, and the current code can have bugs and shortcomings; however, it validates documents against a number of grammars. I use it. News since 1.6 The format for error messages is similar to that of Jing (file name, line and column are colon-separated). Entities and DTD processing is moved out of RNV, use XX, available from the same download location, to expand entities. New since 1.5 Better reporting: required and permitted content is reported separately; it helps debug grammars. Several bugfixes; I relied on an acquired test suite and published schemata, but have found that I can make more bugs than they cover, thus a reworked an extended test suite is now used for testing. The code has also been cleaned up and simplified in places during porting to Plan9. Aknowledgements I would like to thank those who have helped me develop RNV. Dave Pawson has been the first user of the program. Alexander Peshkov helps me with testing and I have been able to correct very well hidden errors with his help. Sebastian Rahtz encouraged me to continue working on RNV since the first release, and has helped me to improve it on more than one occasion. Package Contents Note I have put rnv.exe and arx.exe, Win32 executables statically linked with a current version of Expat from http://expat.sourceforge.net/, into a separate distribution archive (with name ending in -win32bin). It contains only the program binaries and should be available from the same location as the source distribution. The package consists of: * the license, license.txt; * the source code, *.[ch]; * the source code map, src.txt; * Makefile.bsd for BSD make; * Makefile.gnu for GNU Make; * Makefile.bcc for Win32 and Borland C/C++ Compiler; * tools/xck, a simple shell script I am using to validate documents; * tools/*.rnc, sample Relax NG grammars; * scm/*.scm, program modules in Scheme, for Scheme Datatypes Library; * the log of changes, changes.txt; * this file, readme.txt. * Other scripts, samples and plug-ins appear in tools/ eventually. Installation On Unix-like systems, run make -f Makefile.gnu or make -f Makefile.bsd, depending on which flavour of make you have; Makefile.bsd should probably work on SysV, but, unfortunately, I have no place to check for the last couple of years. If you are using Expat 1.2, define EXPAT_H as xmlparse.h instead of expat.h). On Windows, use rnv.exe. To recompile from the sources, use Makefile.bcc with Borland C/C++ Compiler, or create a makefile or project for your environment. Invocation The command-line syntax is rnv {-q|-p|-c|-s|-v|-h} grammar.rnc {document1.xml} If no documents are specified, RNV attempts to read the XML document from the standard input. The options are: -q names of files being processed are not printed; in error messages, expected elements and attributes are not listed; -n <num> sets the maximum number of reported expected elements and attributes, -q sets this to 0 and can be overriden; -p copies the input to the output; -c if the only argument is a grammar, checks the grammar and exits; -s uses less memory and runs slower; -v prints version number; -h displays usage summary and exits. Limitations * RNV assumes that the encoding of the syntax file is UTF-8. * Support for XML Schema Part 2: Datatypes is partial. + ordering for duration is not implemented; + only local parts of QName values are checked for equality, ENTITY values are only checked for lexical validity. * The schema parser does not check that all restrictions are obeyed, in particular, restrictions 7.3 and 7.4 are not checked. * RNV for Win32 platforms is a Unix program compiled on Win32. It expects file paths to be written with normal slashes; if a schema is in a different directory and includes or refers external files, then the schema's path must be written in the Unix way for the relative paths to work. For example, under Windows, rnv that uses ..\schema\docbook.rnc to validate userguide.dbx should be invoked as rnv.exe ../schema/docbook.rnc userguide.dbx Applications The distribution includes several utilities built upon RNV; they are listed and described in the following sections. ARX ARX is a tool to automatically determine the type of a document from its name and contents. It is inspired by James Clark's schema location approach for nXML, http://groups.yahoo.com/group/emacs-nxml-mode/message/259, and is a development of the idea described in http://relaxng.org/pipermail/relaxng-user/2003-December/000214.htm l. ARX is a command-line utility. The invocation syntax is arx {-n|-v|-h} document.xml arx.conf {arx.conf} ARX either prints a string corresponding to the document's type or nothing if the type cannot be determined. The options are: -n turns off prepending base path of the configuration file to the result, even if it looks like a relative path (useful when the configuration file and the grammars are in separate directories, or for association with something that is not a file); -v prints current version; -h displays usage summary and exits. The configuration file must conform to the following grammar: arx = grammars route* grammars = "grammars" "{" type2string+ "}" type2string = type "=" literal type = nmtoken route = match|nomatch|valid|invalid match = "=~" regexp "=>" type nomatch = "!~" regexp "=>" type valid = "valid" "{" rng "}" "=>" type invalid = "!valid" "{" rng "}" "=>" type literal=string in '"', '"' inside must be prepended by '\' regexp=string in '/', '/' inside must be prepended by '\' rng=Relax NG Compact Syntax Comments start with # and continue till the end of line. Rules are processed sequentially, the first matching rule determines the file's type. Relax NG templates are matched against file contents, regular expressions are applied to file names. The sample below associates documents with grammars for XSLT, DocBook or XSL FO. grammars { docbook="docbook.rnc" xslt="xslt.rnc" xslfo="fo.rnc" } valid { start = element (book|article|chapter|reference) {any} any = (element * {any}|attribute * {text}|text)* } => docbook !valid { default namespace xsl = "http://www.w3.org/1999/XSL/Transform" start = element *-xsl:* {not-xsl} not-xsl = (element *-xsl:* {not-xsl}|attribute * {text}|text)* } => xslt =~/.*\.xsl/ => xslt =~/.*\.fo/ => xslfo ARX can also be used to link documents to any type of information or processing. RVP RVP is abbreviation for Relax NG Validation Pipe. It reads validation primitives from the standard input and reports result to the standard output; it's main purpose is to ease embedding of a Relax NG validator into various languages and environment. An application would launch RVP as a parallel process and use a simple protocol to perform validation. The protocol, in BNF, is: query ::= ( quit | start | start-tag-open | attribute | start-tag-close | text | end-tag) z. quit ::= "quit". start ::= "start" [gramno]. start-tag-open ::= "start-tag-open" patno name. attribute ::= "attribute" patno name value. start-tag-close :: = "start-tag-close" patno name. text ::= ("text"|"mixed") patno text. end-tag ::= "end-tag" patno name. response ::= (ok | er | error) z. ok ::= "ok" patno. er ::= "er" patno erno. error ::= "error" patno erno error. z ::= "\0" . * RVP assumes that the last colon in a name separates the local part from the namespace URI (it is what one gets if specifies `:' as namespace separator to Expat). * Error codes can be grabbed from rvp sources by grep _ER_ *.h and OR-ing them with corresponding masks from erbit.h. Additionally, error 0 is the protocol format error. * Either er or error responses are returned, not both; -q chooses between concise and verbose forms (invocation syntax described later). * start passes the index of a grammar (first grammar in the list of command-line arguments has number 0); if the number is omitted, 0 is assumed. * quit is not opposite of start; instead, it quits RVP. The command-line syntax is: rvp {-q|-s|-v|-h} {schema.rnc} The options are: -q returns only error numbers, suppresses messages; -s takes less memory and runs slower; -v prints current version; -h displays usage summary and exits. To assist embedding RVP, samples in Perl (tools/rvp.pl) and Python (tools/rvp.py) are provided. The scripts use Expat wrappers for each of the languages to parse documents; they take a Relax NG grammar (in the compact syntax) as the command line argument and read the XML from the standard input. For example, the following commands validate rnv.dbx against docbook.rnc: perl rvp.pl docbook.rnc < rnv.dbx python rvp.py docbook.rnc < rnv.dbx The scripts are kept simple and unobscured to illustrate the technique, rather than being designed as general-purpose modules. Programmers using Perl, Python, Ruby and other languages are encouraged to implement and share reusable RVP-based components for their languages of choice. User-Defined Datatype Libraries Relax NG relies on XML Schema Datatypes to check validity of data in an XML document. The specification allows the implementation to support other datatype libraries, a library is required to provide two services, datatypeAllows and datatypeEqual. A powerful and popular technique is the use of string regular expressions to restrict values of attributes and character data. However, XML Schema regular expressions must be written as single strings, without any parameterization; they often grow to several dozens of characters in length and are very hard to read or debug. A solution for these problem would be to allow the user to define custom datatypes and to specify them in a high-level programming language. The user can then either use regular expressions as such, employ lex for lexical analysis, or any other technique which is best suited for each particular case (for example XSL FO datatypes would benefit from a custom datatype library). With many datatype libraries eventually implemented, it is likely that a clearer picture of the right language for validation of data will eventually emerge. RNV provides two different ways to implement this solution; I believe that they correspond to different tastes and traditions. In both cases, a high-level language can be used to implement a datatype library, the language is not related to the implementation language of RNV, and RNV need not be recompiled to add a new datatype library. Datatype Library Plug-in A datatype plug-in is an executable. RNV invokes it as either program allows type key value ... data or program equal type data1 data2 program is the executable's, name, the rest is the command line; key and value pairs are datatype parameters and can be repeated. The program is executed for each datatype in library http://davidashen.net/relaxng/pluggable-datatypes; if the exit status is 0 for success, non-zero for failure. Both RNV and RVP can use pluggable datatypes, and must be compiled with DXL_EXC set to 1 (make DXL_EXC=1) to support them, in which case they accept an additional command-line option -d with the name of the plugin as the argument. An implementation of XML Schema datatypes as a plugin (in C) is included in the distribution, see xsdck.c. For example, rnv -d xsdck xslt-dxl.rnc $HOME/work/docbook/xsl/*/*.xsl will validate all DocBook XSL stylesheets on my workstation against a grammar for XSLT 1.0 modified to use RNV Pluggable Datatypes Library instead of XML Schema Datatypes. Scheme Datatypes Another way to add custom datatypes to RNV is to use the built-in Scheme interpeter (SCM, http://www.swiss.ai.mit.edu/~jaffer/SCM.html) to implement the library in Scheme, a dialect of Lisp. This solution is more flexible and robust than the previous one, but requires knowledge of a particular programming language (or at least desire to learn it, and the result is definitely worth the effort). To support it, SCM must be installed on the computer, and RNV or RVP must be compiled with DSL_SCM set to 1 (make DSL_SCM=1), in which case they accept an additional option -e with the name of a scheme program as an argument. The datatype library is bound to http://davidashen.net/relaxng/scheme-datatypes; a sample implementation is in scm/dsl.scm. For example, rnv -e scm/dsl.scm xslt-dsl.rnc $HOME/work/docbook/xsl/*/*.xsl check the stylesheets against an XSLT 1.0 grammar modified to use an RNV Scheme Datatypes Library implemented in scm/dsl.scm. A Datatype Library in Scheme must provide two functions in top-level environment: (dsl-equal? string string string) and (dsl-allows? string '((string . string)*) string) To assist development of datatype libraries, a Scheme implementation of XML Schema Regular Expressions is included in the distribution as scm/rx.scm. The Regular Expression library is not just a way to re-implement the built-in datatypes. Owing to flexibility of the language it is much easier to write and debug regular expressions in Scheme, even if they are to be used with built-in XML Schema Datatypes in the end. For example, a regular expression for e-mail address, with insignificant simplifications, is: pattern= "(\(([^\(\)\\]|\\.)*\) )?" ~ "([a-zA-Z0-9!#$%&'*+\-/=?\^_`{|}~]+" ~ "(\.[a-zA-Z0-9!#$%&'*+\-/=?\^_`{|}~]+)*" ~ """|"([^"\\]|\\.)*")""" ~ "@" ~ "([a-zA-Z0-9!#$%&'*+\-/=?\^_`{|}~]+" ~ "(\.[a-zA-Z0-9!#$%&'*+\-/=?\^_`{|}~]+)*" ~ "|\[([^\[\]\\]|\\.)*\])" ~ "( \(([^\(\)\\]|\\.)*\))?" which, even split into four lines, is ugly-looking and hard to read. Meanwhile, it consists of a few repeating subexpressions, which could easily be factored out, but the syntax does not have the means for that. Using Scheme interpreter, it is as simple as (define addr-spec-regex (let* ( (atom "[a-zA-Z0-9!#$%&'*+\\-/=?\\^_`{|}~]+") (person "\"([^"\\\\]|\\\\.)\"") (location "\\[([^\\[\\]\\\\]|\\\\.)*\\]") (domain (string-append atom "(\\." atom ")*"))) (string-append "(" domain "|" person ")" "@" "(" domain "|" location ")"))) This code is much simpler to read and debug, and then the parts can be joined and added to the grammar for production use. Furthermore, it is easy to implement the parsing of structured regular expressions embedded into parameters of datatypes in Relax NG itself. dsl.scm, the sample datatype library, can handle parameter s-pattern with regular expressions split into named parts, and the example above becomes: s-pattern=""" comment = "\(([^\(\)\\]|\\.)*\)" atom = "[a-zA-Z0-9!#$%&'*+\-/=?\^_`{|}~]+" atoms = atom "(\." atom ")*" person = "\"([^\"\\]|\\.)*\"" location = "\[([^\[\]\\]|\\.)*\]" local-part = "(" atom "|" person ")" domain = "(" atoms "|" location ")" start = "(" comment " )?" local-part "@" domain "( " comment ")?" """ addr-spec-dsl.rnc is included in the distribution. New versions Visit http://davidashen.net/ for news and downloads. --- NEW FILE: sc.c --- /* $Id: sc.c,v 1.1 2009/08/03 05:32:48 mike Exp $ */ #include <assert.h> /*assert*/ #include "m.h" #include "ll.h" #include "sc.h" #define BASE -1 #define LOCK -2 #define LEN SC_LEN static void windup(struct sc_stack *stp) { stp->top=0; sc_open(stp); } void sc_init(struct sc_stack *stp) { stp->tab=(int(*)[SC_RECSIZE])m_alloc(stp->len=LEN,sizeof(int[SC_RECSIZE])); windup(stp); } void sc_clear(struct sc_stack *stp) { windup(stp); } void sc_open(struct sc_stack *stp) { stp->tab[stp->base=stp->top++][1]=BASE; if(stp->top==stp->len) stp->tab=(int(*)[SC_RECSIZE])m_stretch( stp->tab,stp->len*=stp->top*2,stp->top,sizeof(int[SC_RECSIZE])); } int sc_void(struct sc_stack *stp) { return stp->base==0; } void sc_lock(struct sc_stack *stp) { stp->tab[stp->base][1]=LOCK; } int sc_locked(struct sc_stack *stp) { return stp->tab[stp->base][1]==LOCK; } void sc_close(struct sc_stack *stp) { stp->top=stp->base; while(stp->tab[--stp->base][1]>BASE); } int sc_find(struct sc_stack *stp,int key) { int i=stp->top; stp->tab[stp->base][0]=key; while(stp->tab[--i][0]!=key); return i!=stp->base?i:0; } int sc_add(struct sc_stack *stp,int key,int val,int aux) { int i=stp->top; assert(!sc_locked(stp)); stp->tab[i][0]=key; stp->tab[i][1]=val; stp->tab[i][2]=aux; if(++stp->top==stp->len) stp->tab=(int(*)[SC_RECSIZE])m_stretch( stp->tab,stp->len=stp->top*2,stp->top,sizeof(int[SC_RECSIZE])); return i; } --- NEW FILE: test.c --- #include "s.h" #include "xsd.h" int main() { s_test(); xsd_test(); return 0; } --- NEW FILE: sc.h --- /* $Id: sc.h,v 1.1 2009/08/03 05:32:48 mike Exp $ */ #ifndef SC_H #define SC_H 1 #define SC_RECSIZE 3 /* 0 - key, 1 - value, 2 - auxiliary */ struct sc_stack { int (*tab)[SC_RECSIZE]; int len,base,top; }; extern void sc_init(struct sc_stack *stp); extern void sc_clear(struct sc_stack *stp); extern void sc_open(struct sc_stack *stp); extern void sc_lock(struct sc_stack *stp); extern void sc_close(struct sc_stack *stp); extern int sc_void(struct sc_stack *sp); extern int sc_locked(struct sc_stack *stp); extern int sc_find(struct sc_stack *stp,int key); /* returns 0 if not found, index in tab otherwise */ extern int sc_add(struct sc_stack *stp,int key,int val,int aux); /* returns index for the new record */ #endif --- NEW FILE: rx_cls_u.c --- /* $Id: rx_cls_u.c,v 1.1 2009/08/03 05:32:48 mike Exp $ */ #define CLS_U_ 0 #define CLS_U_C 1 #define CLS_U_Cc 2 #define CLS_U_Cf 3 #define CLS_U_Co 4 #define CLS_U_IsAlphabeticPresentationForms 5 #define CLS_U_IsArabic 6 #define CLS_U_IsArabicPresentationForms_A 7 #define CLS_U_IsArabicPresentationForms_B 8 #define CLS_U_IsArmenian 9 #define CLS_U_IsArrows 10 #define CLS_U_IsBasicLatin 11 #define CLS_U_IsBengali 12 #define CLS_U_IsBlockElements 13 #define CLS_U_IsBopomofo 14 #define CLS_U_IsBopomofoExtended 15 #define CLS_U_IsBoxDrawing 16 #define CLS_U_IsBraillePatterns 17 #define CLS_U_IsByzantineMusicalSymbols 18 #define CLS_U_IsCJKCompatibility 19 #define CLS_U_IsCJKCompatibilityForms 20 #define CLS_U_IsCJKCompatibilityIdeographs 21 #define CLS_U_IsCJKCompatibilityIdeographsSupplement 22 #define CLS_U_IsCJKRadicalsSupplement 23 #define CLS_U_IsCJKSymbolsandPunctuation 24 #define CLS_U_IsCJKUnifiedIdeographs 25 #define CLS_U_IsCJKUnifiedIdeographsExtensionA 26 #define CLS_U_IsCJKUnifiedIdeographsExtensionB 27 #define CLS_U_IsCherokee 28 #define CLS_U_IsCombiningDiacriticalMarks 29 #define CLS_U_IsCombiningHalfMarks 30 #define CLS_U_IsCombiningMarksforSymbols 31 #define CLS_U_IsControlPictures 32 #define CLS_U_IsCurrencySymbols 33 #define CLS_U_IsCyrillic 34 #define CLS_U_IsDeseret 35 #define CLS_U_IsDevanagari 36 #define CLS_U_IsDingbats 37 #define CLS_U_IsEnclosedAlphanumerics 38 #define CLS_U_IsEnclosedCJKLettersandMonths 39 #define CLS_U_IsEthiopic 40 #define CLS_U_IsGeneralPunctuation 41 #define CLS_U_IsGeometricShapes 42 #define CLS_U_IsGeorgian 43 #define CLS_U_IsGothic 44 #define CLS_U_IsGreek 45 #define CLS_U_IsGreekExtended 46 #define CLS_U_IsGujarati 47 #define CLS_U_IsGurmukhi 48 #define CLS_U_IsHalfwidthandFullwidthForms 49 #define CLS_U_IsHangulCompatibilityJamo 50 #define CLS_U_IsHangulJamo 51 #define CLS_U_IsHangulSyllables 52 #define CLS_U_IsHebrew 53 #define CLS_U_IsHiragana 54 #define CLS_U_IsIPAExtensions 55 #define CLS_U_IsIdeographicDescriptionCharacters 56 #define CLS_U_IsKanbun 57 #define CLS_U_IsKangxiRadicals 58 #define CLS_U_IsKannada 59 #define CLS_U_IsKatakana 60 #define CLS_U_IsKhmer 61 #define CLS_U_IsLao 62 #define CLS_U_IsLatin_1Supplement 63 #define CLS_U_IsLatinExtended_A 64 #define CLS_U_IsLatinExtended_B 65 #define CLS_U_IsLatinExtendedAdditional 66 #define CLS_U_IsLetterlikeSymbols 67 #define CLS_U_IsMalayalam 68 #define CLS_U_IsMathematicalAlphanumericSymbols 69 #define CLS_U_IsMathematicalOperators 70 #define CLS_U_IsMiscellaneousSymbols 71 #define CLS_U_IsMiscellaneousTechnical 72 #define CLS_U_IsMongolian 73 #define CLS_U_IsMusicalSymbols 74 #define CLS_U_IsMyanmar 75 #define CLS_U_IsNumberForms 76 #define CLS_U_IsOgham 77 #define CLS_U_IsOldItalic 78 #define CLS_U_IsOpticalCharacterRecognition 79 #define CLS_U_IsOriya 80 #define CLS_U_IsPrivateUse 81 #define CLS_U_IsRunic 82 #define CLS_U_IsSinhala 83 #define CLS_U_IsSmallFormVariants 84 #define CLS_U_IsSpacingModifierLetters 85 #define CLS_U_IsSpecials 86 #define CLS_U_IsSuperscriptsandSubscripts 87 #define CLS_U_IsSyriac 88 #define CLS_U_IsTags 89 #define CLS_U_IsTamil 90 #define CLS_U_IsTelugu 91 #define CLS_U_IsThaana 92 #define CLS_U_IsThai 93 #define CLS_U_IsTibetan 94 #define CLS_U_IsUnifiedCanadianAboriginalSyllabics 95 #define CLS_U_IsYiRadicals 96 #define CLS_U_IsYiSyllables 97 #define CLS_U_L 98 #define CLS_U_Ll 99 #define CLS_U_Lm 100 #define CLS_U_Lo 101 #define CLS_U_Lt 102 #define CLS_U_Lu 103 #define CLS_U_M 104 #define CLS_U_Mc 105 #define CLS_U_Me 106 #define CLS_U_Mn 107 #define CLS_U_N 108 #define CLS_U_Nd 109 #define CLS_U_Nl 110 #define CLS_U_No 111 #define CLS_U_P 112 #define CLS_U_Pc 113 #define CLS_U_Pd 114 #define CLS_U_Pe 115 #define CLS_U_Pf 116 #define CLS_U_Pi 117 #define CLS_U_Po 118 #define CLS_U_Ps 119 #define CLS_U_S 120 #define CLS_U_Sc 121 #define CLS_U_Sk 122 #define CLS_U_Sm 123 #define CLS_U_So 124 #define CLS_U_Z 125 #define CLS_U_Zl 126 #define CLS_U_Zp 127 #define CLS_U_Zs 128 #define NUM_CLS_U 129 static char *clstab[NUM_CLS_U]={"", "C", "Cc", "Cf", "Co", "IsAlphabeticPresentationForms", "IsArabic", "IsArabicPresentationForms-A", "IsArabicPresentationForms-B", "IsArmenian", "IsArrows", "IsBasicLatin", "IsBengali", "IsBlockElements", "IsBopomofo", "IsBopomofoExtended", "IsBoxDrawing", "IsBraillePatterns", "IsByzantineMusicalSymbols", "IsCJKCompatibility", "IsCJKCompatibilityForms", "IsCJKCompatibilityIdeographs", "IsCJKCompatibilityIdeographsSupplement", "IsCJKRadicalsSupplement", "IsCJKSymbolsandPunctuation", "IsCJKUnifiedIdeographs", "IsCJKUnifiedIdeographsExtensionA", "IsCJKUnifiedIdeographsExtensionB", "IsCherokee", "IsCombiningDiacriticalMarks", "IsCombiningHalfMarks", "IsCombiningMarksforSymbols", "IsControlPictures", "IsCurrencySymbols", "IsCyrillic", "IsDeseret", "IsDevanagari", "IsDingbats", "IsEnclosedAlphanumerics", "IsEnclosedCJKLettersandMonths", "IsEthiopic", "IsGeneralPunctuation", "IsGeometricShapes", "IsGeorgian", "IsGothic", "IsGreek", "IsGreekExtended", "IsGujarati", "IsGurmukhi", "IsHalfwidthandFullwidthForms", "IsHangulCompatibilityJamo", "IsHangulJamo", "IsHangulSyllables", "IsHebrew", "IsHiragana", "IsIPAExtensions", "IsIdeographicDescriptionCharacters", "IsKanbun", "IsKangxiRadicals", "IsKannada", "IsKatakana", "IsKhmer", "IsLao", "IsLatin-1Supplement", "IsLatinExtended-A", "IsLatinExtended-B", "IsLatinExtendedAdditional", "IsLetterlikeSymbols", "IsMalayalam", "IsMathematicalAlphanumericSymbols", "IsMathematicalOperators", "IsMiscellaneousSymbols", "IsMiscellaneousTechnical", "IsMongolian", "IsMusicalSymbols", "IsMyanmar", "IsNumberForms", "IsOgham", "IsOldItalic", "IsOpticalCharacterRecognition", "IsOriya", "IsPrivateUse", "IsRunic", "IsSinhala", "IsSmallFormVariants", "IsSpacingModifierLetters", "IsSpecials", "IsSuperscriptsandSubscripts", "IsSyriac", "IsTags", "IsTamil", "IsTelugu", "IsThaana", "IsThai", "IsTibetan", "IsUnifiedCanadianAboriginalSyllabics", "IsYiRadicals", "IsYiSyllables", "L", "Ll", "Lm", "Lo", "Lt", "Lu", "M", "Mc", "Me", "Mn", "N", "Nd", "Nl", "No", "P", "Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps", "S", "Sc", "Sk", "Sm", "So", "Z", "Zl", "Zp", "Zs" }; --- NEW FILE: ht.h --- /* $Id: ht.h,v 1.1 2009/08/03 05:32:46 mike Exp $ */ #ifndef HT_H #define HT_H 1 struct hashtable { int (*hash)(int i); int (*equal)(int i1,int i2); int tablen,used,limit; int *table; }; extern void ht_init(struct hashtable *ht,int len,int (*hash)(int),int (*equal)(int,int)); extern void ht_clear(struct hashtable *ht); extern void ht_dispose(struct hashtable *ht); extern int ht_get(struct hashtable *ht,int i); extern void ht_put(struct hashtable *ht,int i); extern int ht_del(struct hashtable *ht,int i); extern int ht_deli(struct hashtable *ht,int i); /* delete only if i refers to itself */ #endif --- NEW FILE: u.c --- /* $Id: u.c,v 1.1 2009/08/03 05:32:48 mike Exp $ */ #include "u.h" #define ux(u,c) (((u)<<6)|(c&0x3F)) #define u1(t) t[0] #define u2(t) ux(t[0]&0x1F,t[1]) #define u3(t) ux(ux(t[0]&0xF,t[1]),t[2]) #define u4(t) ux(ux(ux(t[0]&0x7,t[1]),t[2]),t[3]) #define u5(t) ux(ux(ux(ux(t[0]&0x3,t[1]),t[2]),t[3]),t[4]) #define u6(t) ux(ux(ux(ux(ux(t[0]&0x1,t[1]),t[2]),t[3]),t[4]),t[5]) #define vx(c,u) c=0x80|((u)&0x3F) #define v1(t,u) t[0]=u #define v2(t,u) t[0]=0xC0|(u>>6);vx(t[1],u) #define v3(t,u) t[0]=0xE0|(u>>12);vx(t[1],u>>6);vx(t[2],u) #define v4(t,u) t[0]=0xF0|(u>>18);vx(t[1],u>>12);vx(t[2],u>>6);vx(t[3],u) #define v5(t,u) t[0]=0xF8|(u>>24);vx(t[1],u>>18);vx(t[2],u>>12);vx(t[3],u>>6);vx(t[4],u) #define v6(t,u) t[0]=0xFC|(u>>30);vx(t[1],u>>24);vx(t[2],u>>18);vx(t[3],u>>12);vx(t[4],u>>6);vx(t[5],u) #define B1 0xFFFFFF80 #define B2 0xFFFFF800 #define B3 0xFFFF0000 #define B4 0xFFE00000 #define B5 0xFC000000 #define B6 0x80000000 #define BOM "\xEF\xBB\xBF" #define BOMLEN 3 int u_bom(char *s,int n) { char *bom=(char*)(BOM+BOMLEN); if(n>=BOMLEN) { n=BOMLEN; s+=n; while(n--!=0) if(*(--s)!=*(--bom)) return 0; return BOMLEN; } return 0; } int u_get(int *up,char *s) { unsigned char *t=(unsigned char*)s; if(*t<0x80) {*up=u1(t); return 1;} if(*t<0xC0) return 0; if(*t<0xE0) {*up=u2(t); return (*up&B1)?2:0;} if(*t<0xF0) {*up=u3(t); return (*up&B2)?3:0;} if(*t<0xF8) {*up=u4(t); return (*up&B3)?4:0;} if(*t<0xFC) {*up=u5(t); return (*up&B4)?5:0;} if(*t<0xFE) {*up=u6(t); return (*up&B5)?6:0;} return 0; } int u_put(char *s,int u) { unsigned char *t=(unsigned char*)s; if(!(u&B1)) {v1(t,u); return 1;} if(!(u&B2)) {v2(t,u); return 2;} if(!(u&B3)) {v3(t,u); return 3;} if(!(u&B4)) {v4(t,u); return 4;} if(!(u&B5)) {v5(t,u); return 5;} if(!(u&B6)) {v6(t,u); return 6;} return 0; } int u_strlen(char *s) {int n=0; while(*(s+n)) ++n; return u_strnlen(s,n);} int u_strnlen(char *s,int n) { int i,len=0,u; char *end=s+n; for(;;) { if(s==end) break; i=u_get(&u,s); if(i==0) {len=-1; break;} s+=i; if(s>end) {len=-1; break;} ++len; } return len; } int u_in_ranges(int u,int r[][2],int len) { int n=0,m=len-1,i; for(;;) { if(n>m) return 0; i=(n+m)/2; if(u<r[i][0]) m=i-1; else if(u>r[i][1]) n=i+1; else return 1; } } --- NEW FILE: xsd.c --- /* $Id: xsd.c,v 1.1 2009/08/03 05:32:48 mike Exp $ */ #include <limits.h> /*INT_MAX*/ #include <stdlib.h> /*atof,atol,strtol*/ #include <string.h> /*strlen*/ #include <math.h> /*HUGE_VAL*/ #include <assert.h> #include "u.h" #include "xmlc.h" #include "s.h" #include "erbit.h" #include "rx.h" #include "xsd_tm.h" #include "er.h" #include "xsd.h" #define err(msg) (*er_vprintf)(msg"\n",ap) void xsd_default_verror_handler(int erno,va_list ap) { (*er_printf)("XML Schema datatypes: "); if(erno&ERBIT_RX) { rx_default_verror_handler(erno&~ERBIT_RX,ap); } else { switch(erno) { case XSD_ER_TYP: err("unknown type %s"); break; case XSD_ER_PAR: err("unknown parameter %s"); break; case XSD_ER_PARVAL: err("invalid parameter value %s=\"%s\""); break; case XSD_ER_VAL: err("invalid typed value \"%s\" for type %s"); break; case XSD_ER_NPAT: err("no more than 16 patterns per type are supported"); break; case XSD_ER_WS: err("the builtin derived datatype that specifies the desired value for the whiteSpace facet should be used instead of 'whiteSpace'"); break; case XSD_ER_ENUM: err("'value' should be used instead of 'enumeration'"); break; default: assert(0); } } } void (*xsd_verror_handler)(int erno,va_list ap)=&xsd_default_verror_handler; static void error_handler(int erno,...) { va_list ap; va_start(ap,erno); (*xsd_verror_handler)(erno,ap); va_end(ap); } static void verror_handler_rx(int erno,va_list ap) {(*xsd_verror_handler)(erno|ERBIT_RX,ap);} static void windup(void); static int initialized=0; void xsd_init(void) { if(!initialized) { initialized=1; rx_init(); rx_verror_handler=&verror_handler_rx; windup(); } } void xsd_clear(void) { windup(); } static void windup(void) { } #define FCT_ENUMERATION 0 #define FCT_FRACTION_DIGITS 1 #define FCT_LENGTH 2 #define FCT_MAX_EXCLUSIVE 3 #define FCT_MAX_INCLUSIVE 4 #define FCT_MAX_LENGTH 5 #define FCT_MIN_EXCLUSIVE 6 #define FCT_MIN_INCLUSIVE 7 #define FCT_MIN_LENGTH 8 #define FCT_PATTERN 9 #define FCT_TOTAL_DIGITS 10 #define FCT_WHITE_SPACE 11 #define NFCT 12 static char *fcttab[NFCT]={ "enumeration", "fractionDigits", "length", "maxExclusive", "maxInclusive", "maxLength", "minExclusive", "minInclusive", "minLength", "pattern", "totalDigits", "whiteSpace"}; #define FCT_IBOUNDS (1<<FCT_MIN_INCLUSIVE|1<<FCT_MAX_INCLUSIVE) #define FCT_EBOUNDS (1<<FCT_MIN_EXCLUSIVE|1<<FCT_MAX_EXCLUSIVE) #define FCT_BOUNDS (FCT_IBOUNDS|FCT_EBOUNDS) #define WS_PRESERVE 0 #define WS_REPLACE 1 #define WS_COLLAPSE 2 static int (*match[])(char *r,char *s,int n)={&rx_match,&rx_rmatch,&rx_cmatch}; #define TYP_ENTITIES 0 #define TYP_ENTITY 1 #define TYP_ID 2 #define TYP_IDREF 3 #define TYP_IDREFS 4 #define TYP_NCNAME 5 #define TYP_NMTOKEN 6 #define TYP_NMTOKENS 7 #define TYP_NOTATION 8 #define TYP_NAME 9 #define TYP_QNAME 10 #define TYP_ANY_URI 11 #define TYP_BASE64_BINARY 12 #define TYP_BOOLEAN 13 #define TYP_BYTE 14 #define TYP_DATE 15 #define TYP_DATE_TIME 16 #define TYP_DECIMAL 17 #define TYP_DOUBLE 18 #define TYP_DURATION 19 #define TYP_FLOAT 20 #define TYP_G_DAY 21 #define TYP_G_MONTH 22 #define TYP_G_MONTH_DAY 23 #define TYP_G_YEAR 24 #define TYP_G_YEAR_MONTH 25 #define TYP_HEX_BINARY 26 #define TYP_INT 27 #define TYP_INTEGER 28 #define TYP_LANGUAGE 29 #define TYP_LONG 30 #define TYP_NEGATIVE_INTEGER 31 #define TYP_NON_NEGATIVE_INTEGER 32 #define TYP_NON_POSITIVE_INTEGER 33 #define TYP_NORMALIZED_STRING 34 #define TYP_POSITIVE_INTEGER 35 #define TYP_SHORT 36 #define TYP_STRING 37 #define TYP_TIME 38 #define TYP_TOKEN 39 #define TYP_UNSIGNED_BYTE 40 #define TYP_UNSIGNED_INT 41 #define TYP_UNSIGNED_LONG 42 #define TYP_UNSIGNED_SHORT 43 #define NTYP 44 static char *typtab[NTYP]={ "ENTITIES", "ENTITY", "ID", "IDREF", "IDREFS", "NCName", "NMTOKEN", "NMTOKENS", "NOTATION", "Name", "QName", "anyURI", "base64Binary", "boolean", "byte", "date", "dateTime", "decimal", "double", "duration", "float", "gDay", "gMonth", "gMonthDay", "gYear", "gYearMonth", "hexBinary", "int", "integer", "language", "long", "negativeInteger", "nonNegativeInteger", "nonPositiveInteger", "normalizedString", "positiveInteger", "short", "string", "time", "token", "unsignedByte", "unsignedInt", "unsignedLong", "unsignedShort"}; #define ERR_PARAMETER "invalid XML Schema datatype parameter '%s'" #define ERR_DATATYPE "invalid XML Schema datatype name '%s'" #define ERR_VALUE "invalid value '%s' for XML Schema datatype '%s'" struct dura {int yr,mo,dy,hr,mi;double se;}; static void durainit(struct dura *d) {d->yr=d->mo=d->dy=d->hr=d->mi=0; d->se=0.0;} static void s2dura(struct dura *dp,char *s,int n) { char *end=s+n,*np="0"; int sign=1,time=0; durainit(dp); while(s!=end) { switch(*s) { case '-': sign=-1; break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '.': np=s; break; case 'T': time=1; break; case 'Y': dp->yr=sign*atoi(np); break; case 'M': if(time) dp->mi=sign*atoi(np); else dp->mo=sign*atoi(np); break; case 'D': dp->dy=sign*atoi(np); break; case 'H': dp->hr=sign*atoi(np); break; case 'S': dp->se=sign*atof(np); break; } ++s; } } static int duracmp(char *s1,char *s2,int n) { struct dura d1,d2; s2dura(&d1,s1,strlen(s1)); s2dura(&d2,s2,n); if(d1.yr!=d2.yr) return d1.yr-d2.yr; if(d1.mo!=d2.mo) return d1.mo-d2.mo; if(d1.dy!=d2.dy) return d1.dy-d2.dy; if(d1.hr!=d2.hr) return d1.hr-d2.hr; if(d1.mi!=d2.mi) return d1.mi-d2.mi; if(d1.se!=d2.se) return d1.se<d2.se?-1:1; return 0; } static int dtcmpn(char *s1,char *s2,int n,char *fmt) { struct xsd_tm tm1,tm2; xsd_mktm(&tm1,fmt,s1); xsd_mktmn(&tm2,fmt,s2,n); return xsd_tmcmp(&tm1,&tm2); } static int toklenn(char *s,int n) { char *end=s+n; int u,len=0; SKIP_SPACE: for(;;) { if(s==end) return len?len-1:0; s+=u_get(&u,s); if(!xmlc_white_space(u)) break; } ++len; for(;;) { if(s==end) return len; ++len; s+=u_get(&u,s); if(xmlc_white_space(u)) goto SKIP_SPACE; } } static int tokcntn(char *s,int n) { char *end=s+n; int u,cnt=0; SKIP_SPACE: for(;;) { if(s==end) return cnt; s+=u_get(&u,s); if(!xmlc_white_space(u)) break; } ++cnt; for(;;) { if(s==end) return cnt; s+=u_get(&u,s); if(xmlc_white_space(u)) goto SKIP_SPACE; } } static int b64lenn(char *s,int n) { char *end=s+n; int l=0,len; for(;;) { if(end==s) break; --end; if(!xmlc_white_space(*end)&&*end!='=') {++end; break;} } while(s!=end) {if(!xmlc_white_space(*s)) ++l; ++s;} len=l/4*3; switch(l%4) { case 0: break; case 1: len=-1; break; case 2: len+=1; break; case 3: len+=2; break; } return len; } static int fdiglenn(char *s,int n) { char *end=s+n; int len=0; for(;;) { if(end==s) break; --end; if(*end!='0'&&!xmlc_white_space(*end)) {++end; break;} } for(;;) { if(s==end) break; if(*(s++)=='.') { while(s++!=end) ++len; break; } } return len; } static int diglenn(char *s,int n) { char *end=s+n; int len=0; for(;;) { if(s==end) break; if(!(xmlc_white_space(*s)||*s=='+'||*s=='-'||*s=='0')) break; ++s; } for(;;) { if(s==end||*s=='.'||xmlc_white_space(*s)) break; ++len; ++s; } if(len==0) len=1; if(*s=='.') len+=fdiglenn(s,end-s); return len; } #define NPAT 16 struct facets { int set; char *pattern[NPAT+1]; int npat; int length, minLength, maxLength, totalDigits, fractionDigits; char *maxExclusive, *maxInclusive, *minExclusive, *minInclusive; int whiteSpace; }; /* PAT_DECIMAL is unsigned decimal, signed decimal matches PAT_FIXED */ #define PAT_ORDINAL "([0-9]+)" #define PAT_FRACTIONAL "(\\.[0-9]+)" #define PAT_DECIMAL "("PAT_ORDINAL"\\.?|"PAT_ORDINAL"?"PAT_FRACTIONAL")" #define PAT_POSITIVE "\\+?"PAT_ORDINAL #define PAT_NON_NEGATIVE "\\+?"PAT_ORDINAL #define PAT_NON_POSITIVE "\\-"PAT_ORDINAL"|0+" #define PAT_NEGATIVE "\\-"PAT_ORDINAL #define PAT_INTEGER "([+\\-]?"PAT_ORDINAL")" #define PAT_FIXED "([+\\-]?"PAT_DECIMAL")" #define PAT_FLOATING PAT_FIXED"([Ee]"PAT_INTEGER")?|INF|-INF|NaN" #define PAT_HEX_BINARY "[0-9a-fA-F]+" #define PAT_BASE64 "([A-Za-z0-9+/] ?)" #define PAT_BASE64_2 "([AQgw] ?)" #define PAT_BASE64_1 "([AEIMQUYcgkosw048] ?)" #define PAT_BASE64_BINARY \ "("PAT_BASE64"{4})*" \ "("PAT_BASE64 PAT_BASE64_2"= ?=" \ "|"PAT_BASE64"{2}" PAT_BASE64_1"=)?" #define PAT_ANY_URI "(([a-zA-Z][0-9a-zA-Z+\\-\\.]*:)?/{0,2}[0-9a-zA-Z;/?:@&=+$\\.\\-_!~*'()%]+)?(#[0-9a-zA-Z;/?:@&=+$\\.\\-_!~*'()%]+)?" #define PAT_NCNAME "[\\i-[:]][\\c-[:]]*" #define PAT_QNAME "("PAT_NCNAME":)?"PAT_NCNAME #define PAT_NMTOKEN "\\c+" #define PAT_NAME "\\i\\c*" #define PAT_NCNAMES PAT_NCNAME"( "PAT_NCNAME")*" #define PAT_NMTOKENS PAT_NMTOKEN"( "PAT_NMTOKEN")*" #define PAT_NAMES PAT_NAME"( "PAT_NAME")*" #define PAT_LANGUAGE "([a-zA-Z]{1,8}(-[a-zA-Z0-9]{1,8})*)" #define PAT_DURAY "("PAT_ORDINAL"Y)" #define PAT_DURAM "("PAT_ORDINAL"M)" #define PAT_DURAD "("PAT_ORDINAL"D)" #define PAT_DURADATE \ "(" PAT_DURAY PAT_DURAM"?"PAT_DURAD"?" \ "|" PAT_DURAY"?"PAT_DURAM PAT_DURAD"?" \ "|" PAT_DURAY"?"PAT_DURAM"?"PAT_DURAD ")" #define PAT_DURAH "("PAT_ORDINAL"H)" #define PAT_DURAM "("PAT_ORDINAL"M)" #define PAT_DURAS "("PAT_DECIMAL"S)" #define PAT_DURATIME \ "(T(" PAT_DURAH PAT_DURAM"?"PAT_DURAS"?" \ "|" PAT_DURAM"?"PAT_DURAM PAT_DURAS"?" \ "|" PAT_DURAS"?"PAT_DURAM"?"PAT_DURAS "))" #define PAT_DURATION "-?P("PAT_DURADATE PAT_DURATIME"|"PAT_DURADATE"|"PAT_DURATIME")" #define PAT_ZONE "(Z|[+\\-](0[0-9]|1[0-4]):[0-5][0-9])" #define PAT_YEAR0 "[0-9]{4,}" #define PAT_MONTH0 "(0[1-9]|1[0-2])" #define PAT_DAY0 "([0-2][0-9]|3[01])" #define PAT_YEAR "-?"PAT_YEAR0 PAT_ZONE"?" #define PAT_MONTH "--"PAT_MONTH0"--"PAT_ZONE"?" #define PAT_DAY "---"PAT_DAY0 PAT_ZONE"?" #define PAT_YEAR_MONTH "-?"PAT_YEAR0"-"PAT_MONTH0 PAT_ZONE"?" #define PAT_MONTH_DAY "--"PAT_MONTH0"-"PAT_DAY0 PAT_ZONE"?" #define PAT_DATE0 PAT_YEAR0"-"PAT_MONTH0"-"PAT_DAY0 #define PAT_TIME0 "([0-1][0-9]|2[0-3]):[0-5][0-9]:([0-5][0-9]|60)"PAT_FRACTIONAL"?" #define PAT_DATE "-?"PAT_DATE0 PAT_ZONE"?" #define PAT_TIME PAT_TIME0 PAT_ZONE"?" #define PAT_DATE_TIME "-?"PAT_DATE0"T"PAT_TIME0 PAT_ZONE"?" static void anchdec(int *plus,int *zero,char **beg,char **dp,char **end,char *s,int n) { char *end0=s+n; *beg=s; *zero=1; *plus=1; for(;;) { if(end0==*beg) break; --end0; if(!xmlc_white_space(*end0)) {++end0; break;} } *end=end0; for(;;) { if(*end==*beg) break; --*end; if(!(**end=='0'||**end=='+'||**end=='-')) { if(**end!='.') *zero=0; ++*end; break; } } *dp=*end; for(;;) { if(*beg==*end) break; if(**beg=='-') *plus=0; else if(!(**beg=='0'||**beg=='+'||xmlc_white_space(**beg))) { if(**beg!='.') *zero=0; for(;;) { if(*dp==*beg) {*dp=*end=end0; break;} --*dp; if(**dp=='.') break; } break; } ++*beg; } } static int deccmp(char *s1,int n1,char *s2,int n2) { int p1,p2,z1,z2,cmp; char *d1,*e1,*d2,*e2,*c1,*c2; anchdec(&p1,&z1,&s1,&d1,&e1,s1,n1); anchdec(&p2,&z2,&s2,&d2,&e2,s2,n2); if(z1&&z2) return 0; if(p1!=p2) return p1-p2; cmp=0; if(d1-s1!=d2-s2) cmp=d1-s1-(d2-s2); if(cmp!=0) return p1?cmp:-cmp; c1=s1; c2=s2; for(;;) { if(c1==d1) break; if(*c1!=*c2) {cmp=*c1-*c2; break;} ++c1; ++c2; } if(cmp!=0) return p1?cmp:-cmp; if(c1!=e1) ++c1; if(c2!=e2) ++c2; for(;;) { if(c1==e1) {cmp=-(c2!=e2); break;} if(c2==e2) {cmp=1; break;} if(*c1!=*c2) {cmp=*c1-*c2; break;} ++c1; ++c2; } return p1?cmp:-cmp; } static int chkdec(struct facets *fp,char *s,int n) { int ok=1; if(fp->set&(1<<FCT_MIN_EXCLUSIVE)) ok=ok&&deccmp(s,n,fp->minExclusive,strlen(fp->minExclusive))>0; if(fp->set&(1<<FCT_MIN_INCLUSIVE)) ok=ok&&deccmp(s,n,fp->minInclusive,strlen(fp->minInclusive))>=0; if(fp->set&(1<<FCT_MAX_INCLUSIVE)) ok=ok&&deccmp(s,n,fp->maxInclusive,strlen(fp->maxInclusive))<=0; if(fp->set&(1<<FCT_MAX_EXCLUSIVE)) ok=ok&&deccmp(s,n,fp->maxExclusive,strlen(fp->maxExclusive))<0; return ok; } static double atodn(char *s,int n) { return s_tokcmpn("-INF",s,n)==0?-HUGE_VAL : s_tokcmpn("INF",s,n)==0?HUGE_VAL : atof(s); } static double atod(char *s) {return atodn(s,strlen(s));} static int chkdbl(struct facets *fp,char *s,int n) { int ok=1,nan=s_tokcmpn("NaN",s,n)==0; double d=atodn(s,n); if(fp->set&(1<<FCT_MIN_EXCLUSIVE)) ok=ok&&!nan&&d>atod(fp->minExclusive); if(fp->set&(1<<FCT_MIN_INCLUSIVE)) ok=ok&&!nan&&d>=atod(fp->minInclusive); if(fp->set&(1<<FCT_MAX_INCLUSIVE)) ok=ok&&!nan&&d<=atod(fp->maxInclusive); if(fp->set&(1<<FCT_MAX_EXCLUSIVE)) ok=ok&&!nan&&d<atod(fp->maxExclusive); return ok; } static int chktmlim(char *typ,char *fmt,char *val,int cmpmin,int cmpmax,struct xsd_tm *tmp) { struct xsd_tm tmf; int cmp; if(!xsd_allows(typ,"",val,strlen(val))) {(*error_handler)(XSD_ER_PARVAL); return 0;} xsd_mktm(&tmf,fmt,val); cmp=xsd_tmcmp(tmp,&tmf); return cmpmin<=cmp&&cmp<=cmpmax; } static int chktm(char *typ,char *fmt,struct facets *fp,char *s,int n) { int ok=1; struct xsd_tm tms; if(!xsd_allows(typ,"",s,n)) return 0; xsd_mktmn(&tms,fmt,s,n); if(fp->set&(1<<FCT_MIN_EXCLUSIVE)) ok=ok&&chktmlim(typ,fmt,fp->minExclusive,1,1,&tms); if(fp->set&(1<<FCT_MIN_INCLUSIVE)) ok=ok&&chktmlim(typ,fmt,fp->minInclusive,0,1,&tms); if(fp->set&(1<<FCT_MAX_INCLUSIVE)) ok=ok&&chktmlim(typ,fmt,fp->maxInclusive,-1,0,&tms); if(fp->set&(1<<FCT_MAX_EXCLUSIVE)) ok=ok&&chktmlim(typ,fmt,fp->maxExclusive,-1,-1,&tms); return ok; } int xsd_allows(char *typ,char *ps,char *s,int n) { int ok=1,length; int dt=s_tab(typ,typtab,NTYP); struct facets fct; fct.set=0; fct.npat=0; switch(dt) { case TYP_INTEGER: fct.pattern[fct.npat++]=PAT_INTEGER; dt=TYP_DECIMAL; break; case TYP_POSITIVE_INTEGER: fct.pattern[fct.npat++]=PAT_POSITIVE; dt=TYP_DECIMAL; fct.set|=1<<FCT_MIN_INCLUSIVE; fct.minInclusive="1"; break; case TYP_NON_NEGATIVE_INTEGER: fct.pattern[fct.npat++]=PAT_NON_NEGATIVE; dt=TYP_DECIMAL; fct.set|=1<<FCT_MIN_INCLUSIVE; fct.minInclusive="0"; break; case TYP_NON_POSITIVE_INTEGER: fct.pattern[fct.npat++]=PAT_NON_POSITIVE; dt=TYP_DECIMAL; fct.set|=1<<FCT_MAX_INCLUSIVE; fct.maxInclusive="0"; break; case TYP_NEGATIVE_INTEGER: fct.pattern[fct.npat++]=PAT_NEGATIVE; dt=TYP_DECIMAL; fct.set|=1<<FCT_MAX_INCLUSIVE; fct.maxInclusive="-1"; break; case TYP_BYTE: fct.pattern[fct.npat++]=PAT_INTEGER; dt=TYP_DECIMAL; fct.set|=FCT_IBOUNDS; fct.minInclusive="-128"; fct.maxInclusive="127"; break; case TYP_UNSIGNED_BYTE: fct.pattern[fct.npat++]=PAT_NON_NEGATIVE; dt=TYP_DECIMAL; fct.set|=FCT_IBOUNDS; fct.minInclusive="0"; fct.maxInclusive="255"; break; case TYP_SHORT: fct.pattern[fct.npat++]=PAT_INTEGER; dt=TYP_DECIMAL; fct.set|=FCT_IBOUNDS; fct.minInclusive="-32768"; fct.maxInclusive="32767"; break; case TYP_UNSIGNED_SHORT: fct.pattern[fct.npat++]=PAT_NON_NEGATIVE; dt=TYP_DECIMAL; fct.set|=FCT_IBOUNDS; fct.minInclusive="0"; fct.maxInclusive="65535"; break; case TYP_INT: fct.pattern[fct.npat++]=PAT_INTEGER; dt=TYP_DECIMAL; fct.set|=FCT_IBOUNDS; fct.minInclusive="-2147483648"; fct.maxInclusive="2147483647"; break; case TYP_UNSIGNED_INT: fct.pattern[fct.npat++]=PAT_NON_NEGATIVE; dt=TYP_DECIMAL; fct.set|=FCT_IBOUNDS; fct.minInclusive="0"; fct.maxInclusive="4294967295"; break; case TYP_LONG: fct.pattern[fct.npat++]=PAT_INTEGER; dt=TYP_DECIMAL; fct.set|=FCT_IBOUNDS; fct.minInclusive="-9223372036854775808"; fct.maxInclusive="9223372036854775807"; break; case TYP_UNSIGNED_LONG: fct.pattern[fct.npat++]=PAT_NON_NEGATIVE; dt=TYP_DECIMAL; fct.set|=FCT_IBOUNDS; fct.minInclusive="0"; fct.maxInclusive="18446744073709551615"; break; } { int n; while((n=strlen(ps))) { char *key=ps,*val=key+n+1,*end,i; switch(i=s_tab(key,fcttab,NFCT)) { case FCT_LENGTH: fct.length=(int)strtol(val,&end,10); if(!*val||*end) (*error_handler)(XSD_ER_PARVAL,key,val); break; case FCT_MAX_LENGTH: fct.maxLength=(int)strtol(val,&end,10); if(!*val||*end) (*error_handler)(XSD_ER_PARVAL,key,val); break; case FCT_MIN_LENGTH: fct.minLength=(int)strtol(val,&end,10); if(!*val||*end) (*error_handler)(XSD_ER_PARVAL,key,val); break; case FCT_FRACTION_DIGITS: fct.fractionDigits=(int)strtol(val,&end,10); if(!*val||*end) (*error_handler)(XSD_ER_PARVAL,key,val); break; case FCT_TOTAL_DIGITS: fct.totalDigits=(int)strtol(val,&end,10); if(!*val||*end) (*error_handler)(XSD_ER_PARVAL,key,val); break; case FCT_PATTERN: if(fct.npat==NPAT) (*error_handler)(XSD_ER_NPAT); else { fct.pattern[fct.npat++]=val; } break; case FCT_MAX_EXCLUSIVE: fct.maxExclusive=val; break; case FCT_MAX_INCLUSIVE: fct.maxInclusive=val; break; case FCT_MIN_EXCLUSIVE: fct.minExclusive=val; break; case FCT_MIN_INCLUSIVE: fct.minInclusive=val; break; case FCT_WHITE_SPACE: (*error_handler)(XSD_ER_WS); break; case FCT_ENUMERATION: (*error_handler)(XSD_ER_ENUM); break; case NFCT: (*error_handler)(XSD_ER_PAR,key); break; default: assert(0); } fct.set|=1<<i; ps=val+strlen(val)+1; } } fct.whiteSpace=WS_COLLAPSE; length=INT_MAX; switch(dt) { /*primitive*/ case TYP_STRING: fct.whiteSpace=WS_PRESERVE; length=u_strnlen(s,n); break; case TYP_BOOLEAN: fct.pattern[fct.npat++]="true|false|1|0"; break; case TYP_DECIMAL: fct.pattern[fct.npat++]=PAT_FIXED; if(fct.set&(1<<FCT_FRACTION_DIGITS)) ok=ok&&fdiglenn(s,n)<=fct.fractionDigits; if(fct.set&(1<<FCT_TOTAL_DIGITS)) ok=ok&&diglenn(s,n)<=fct.totalDigits; if(fct.set&FCT_BOUNDS) ok=ok&chkdec(&fct,s,n); break; case TYP_FLOAT: case TYP_DOUBLE: /* float and double is the same type */ fct.pattern[fct.npat++]=PAT_FLOATING; if(fct.set&FCT_BOUNDS) ok=ok&chkdbl(&fct,s,n); break; case TYP_DURATION: fct.pattern[fct.npat++]=PAT_DURATION; break; case TYP_DATE_TIME: fct.pattern[fct.npat++]=PAT_DATE_TIME; if(fct.set&FCT_BOUNDS) ok=ok&chktm(typ,"ymdtz",&fct,s,n); break; case TYP_DATE: fct.pattern[fct.npat++]=PAT_DATE; if(fct.set&FCT_BOUNDS) ok=ok&chktm(typ,"ymdz",&fct,s,n); break; case TYP_TIME: fct.pattern[fct.npat++]=PAT_TIME; if(fct.set&FCT_BOUNDS) ok=ok&chktm(typ,"tz",&fct,s,n); break; case TYP_G_YEAR_MONTH: fct.pattern[fct.npat++]=PAT_YEAR_MONTH; if(fct.set&FCT_BOUNDS) ok=ok&chktm(typ,"ymz",&fct,s,n); break; case TYP_G_YEAR: fct.pattern[fct.npat++]=PAT_YEAR; if(fct.set&FCT_BOUNDS) ok=ok&chktm(typ,"yz",&fct,s,n); break; case TYP_G_MONTH_DAY: fct.pattern[fct.npat++]=PAT_MONTH_DAY; if(fct.set&FCT_BOUNDS) ok=ok&chktm(typ,"mdz",&fct,s,n); break; case TYP_G_DAY: fct.pattern[fct.npat++]=PAT_DAY; if(fct.set&FCT_BOUNDS) ok=ok&chktm(typ,"dz",&fct,s,n); break; case TYP_G_MONTH: fct.pattern[fct.npat++]=PAT_MONTH; if(fct.set&FCT_BOUNDS) ok=ok&chktm(typ,"mz",&fct,s,n); break; case TYP_HEX_BINARY: fct.pattern[fct.npat++]=PAT_HEX_BINARY; length=(toklenn(s,n)+1)/2; break; case TYP_BASE64_BINARY: fct.pattern[fct.npat++]=PAT_BASE64_BINARY; length=b64lenn(s,n); break; case TYP_ANY_URI: fct.pattern[fct.npat++]=PAT_ANY_URI; length=toklenn(s,n); break; case TYP_QNAME: case TYP_NOTATION: fct.pattern[fct.npat++]=PAT_QNAME; fct.set&=~(1<<FCT_LENGTH|1<<FCT_MIN_LENGTH|1<<FCT_MAX_LENGTH); /* the errata states that any value is valid */ break; /*derived*/ case TYP_NORMALIZED_STRING: fct.whiteSpace=WS_REPLACE; length=u_strnlen(s,n); break; case TYP_TOKEN: length=toklenn(s,n); break; case TYP_LANGUAGE: fct.pattern[fct.npat++]=PAT_LANGUAGE; length=toklenn(s,n); break; case TYP_NMTOKEN: fct.pattern[fct.npat++]=PAT_NMTOKEN; length=toklenn(s,n); break; case TYP_NMTOKENS: fct.pattern[fct.npat++]=PAT_NMTOKENS; length=tokcntn(s,n); break; case TYP_NAME: fct.pattern[fct.npat++]=PAT_NAME; length=toklenn(s,n); break; case TYP_NCNAME: fct.pattern[fct.npat++]=PAT_NCNAME; length=toklenn(s,n); break; case TYP_ID: fct.pattern[fct.npat++]=PAT_NCNAME; length=toklenn(s,n); break; case TYP_IDREF: fct.pattern[fct.npat++]=PAT_NCNAME; length=toklenn(s,n); break; case TYP_IDREFS: fct.pattern[fct.npat++]=PAT_NCNAMES; length=tokcntn(s,n); break; case TYP_ENTITY: fct.pattern[fct.npat++]=PAT_NCNAME; length=toklenn(s,n); break; case TYP_ENTITIES: fct.pattern[fct.npat++]=PAT_NCNAMES; length=tokcntn(s,n); break; case NTYP: (*error_handler)(XSD_ER_TYP,typ); break; default: assert(0); } while(fct.npat--) ok=ok&&match[fct.whiteSpace](fct.pattern[fct.npat],s,n); if(fct.set&(1<<FCT_LENGTH)) ok=ok&&length==fct.length; if(fct.set&(1<<FCT_MAX_LENGTH)) ok=ok&&length<=fct.maxLength; if(fct.set&(1<<FCT_MIN_LENGTH)) ok=ok&&length>=fct.minLength; return ok; } static int dblcmpn(char *val,char *s,char n) { double d1,d2; return s_tokcmpn(val,s,n)==0?0 : s_tokcmpn(val,"NaN",3)==0||s_tokcmpn("NaN",s,n)==0?1 : (d1=atod(val),d2=atodn(s,n),d1<d2?-1:d1>d2?1:0); } static int hexcmpn(char *s1,char *s2,int n) { char *end=s2+n; for(;;++s1,++s2) { while(*s1&&xmlc_white_space(*s1)) ++s1; while(s2!=end&&xmlc_white_space(*s2)) ++s2; if(s2==end) return *s1; if(!*s1) return -*s2; switch(*s1) { case 'a': case 'A': if(*s2=='a'||*s2=='A') continue; case 'b': case 'B': if(*s2=='b'||*s2=='B') continue; case 'c': case 'C': if(*s2=='c'||*s2=='C') continue; case 'd': case 'D': if(*s2=='d'||*s2=='D') continue; case 'e': case 'E': if(*s2=='e'||*s2=='E') continue; case 'f': case 'F': if(*s2=='f'||*s2=='F') continue; default: if(*s1!=*s2) return *s1-*s2; } } } static int b64cmpn(char *s1,char *s2,int n) { char *end=s2+n; for(;;++s1,++s2) { while(*s1&&xmlc_white_space(*s1)) ++s1; while(s2!=end&&xmlc_white_space(*s2)) ++s2; if(s2==end) return *s1; if(!*s1) return -*s2; if(*s1!=*s2) return *s1-*s2; } } static int nrmcmpn(char *s1,char *s2,int n) { char *end=s2+n; for(;;++s1,++s2) { if(s2==end) return *s1; if(!*s1) return -*s2; if(!(*s1==*s2||(xmlc_white_space(*s1)&&xmlc_white_space(*s2)))) return *s1-*s2; } } static int qncmpn(char *s1,char *s2,int n2) { /* context is not passed over; compare local parts */ char *ln1=s1,*ln2=s2; int n=n2; while(*ln1&&*ln1!=':') ++ln1; while(n!=0&&*ln2!=':') {++ln2; --n;} if(*ln1) { return n?s_tokcmpn(ln1+1,ln2+1,n-1):s_tokcmpn(ln1+1,s2,n2); } else { return n?s_tokcmpn(s1,ln2+1,n-1):s_tokcmpn(s1,s2,n2); } } int xsd_equal(char *typ,char *val,char *s,int n) { if(!xsd_allows(typ,"",val,strlen(val))) { (*error_handler)(XSD_ER_VAL,val); return 0; } if(!xsd_allows(typ,"",s,n)) return 0; switch(s_tab(typ,typtab,NTYP)) { /*primitive*/ case TYP_STRING: return s_cmpn(val,s,n)==0; case TYP_BOOLEAN: return (s_tokcmpn("true",val,strlen(val))==0||s_tokcmpn("1",val,strlen(val))==0)==(s_tokcmpn("true",s,n)==0||s_tokcmpn("1",s,n)==0); case TYP_DECIMAL: return deccmp(val,strlen(val),s,n)==0; case TYP_FLOAT: case TYP_DOUBLE: return dblcmpn(val,s,n)==0; case TYP_DURATION: return duracmp(val,s,n)==0; case TYP_DATE_TIME: return dtcmpn(val,s,n,"ymdtz")==0; case TYP_DATE: return dtcmpn(val,s,n,"ymdz")==0; case TYP_TIME: return dtcmpn(val,s,n,"tz")==0; case TYP_G_YEAR_MONTH: return dtcmpn(val,s,n,"ymz")==0; case TYP_G_YEAR: return dtcmpn(val,s,n,"yz")==0; case TYP_G_MONTH_DAY: return dtcmpn(val,s,n,"mdz")==0; case TYP_G_DAY: return dtcmpn(val,s,n,"dz")==0; case TYP_G_MONTH: return dtcmpn(val,s,n,"mz")==0; case TYP_HEX_BINARY: return hexcmpn(val,s,n)==0; case TYP_BASE64_BINARY: return b64cmpn(val,s,n)==0; case TYP_ANY_URI: return s_tokcmpn(val,s,n)==0; case TYP_QNAME: case TYP_NOTATION: return qncmpn(val,s,n)==0; /*derived*/ case TYP_NORMALIZED_STRING: return nrmcmpn(val,s,n)==0; case TYP_TOKEN: case TYP_LANGUAGE: case TYP_NMTOKEN: case TYP_NMTOKENS: case TYP_NAME: case TYP_NCNAME: case TYP_ID: case TYP_IDREF: case TYP_IDREFS: case TYP_ENTITY: case TYP_ENTITIES: return s_tokcmpn(val,s,n)==0; case TYP_INTEGER: case TYP_POSITIVE_INTEGER: case TYP_NON_NEGATIVE_INTEGER: case TYP_NON_POSITIVE_INTEGER: case TYP_NEGATIVE_INTEGER: case TYP_BYTE: case TYP_UNSIGNED_BYTE: case TYP_SHORT: case TYP_UNSIGNED_SHORT: case TYP_INT: case TYP_UNSIGNED_INT: case TYP_LONG: case TYP_UNSIGNED_LONG: return deccmp(val,strlen(val),s,n)==0; case NTYP: (*error_handler)(XSD_ER_TYP,typ); return 0; default: assert(0); } return 0; } void xsd_test() { rx_init(); assert(toklenn("",0)==0); assert(toklenn("A",1)==1); assert(toklenn(" A ",4)==1); assert(toklenn(" A B ",7)==3); assert(tokcntn("",0)==0); assert(tokcntn("A",1)==1); assert(tokcntn("AB CD",5)==2); assert(tokcntn(" AB C ",9)==2); assert(diglenn(" +14.25",7)==4); assert(diglenn("1",1)==1); assert(diglenn("0",1)==1); assert(diglenn("+00.0",5)==1); assert(fdiglenn(".1",2)==1); assert(fdiglenn("+0.0140",7)==3); assert(fdiglenn("0",1)==0); assert(deccmp("0",1,"0.0",3)==0); assert(deccmp("1 ",2," 1",2)==0); assert(deccmp("0.",2,".0",2)==0); assert(deccmp("1",1,"1.0",3)==0); assert(deccmp("01.1",4,"1.10",4)==0); assert(deccmp("+1",2,"1.0",3)==0); assert(deccmp("+0.",3,"-0",2)==0); assert(deccmp("0",1,"0.1",3)<0); assert(deccmp("1.",2,".0",2)>0); assert(deccmp("+1",2,"-1",2)>0); assert(hexcmpn("","",0)==0); assert(hexcmpn("ABC123","ABC123",6)==0); assert(hexcmpn("aBCd","AbCd",4)==0); assert(hexcmpn("ABC 123"," ABC123",7)==0); assert(hexcmpn("ABC124","ABC123",6)>0); assert(rx_match(PAT_BASE64_BINARY,"",0)); assert(rx_match(PAT_BASE64_BINARY,"YmFz",4)); assert(rx_match(PAT_BASE64_BINARY,"YA==",4)); assert(rx_match(PAT_BASE64_BINARY,"Y w = =",7)); assert(rx_match(PAT_BASE64_BINARY,"YF8=",4)); assert(!rx_match(PAT_BASE64_BINARY,"YmF@",4)); assert(!rx_match(PAT_BASE64_BINARY,"YmFgH",5)); assert(!rx_match(PAT_BASE64_BINARY,"Y===",4)); assert(!rx_match(PAT_BASE64_BINARY,"YF=O",4)); assert(!rx_match(PAT_BASE64_BINARY,"YFZ=",4)); assert(b64cmpn("","",0)==0); assert(b64cmpn("ABC123","ABC123",6)==0); assert(b64cmpn("ABC 123"," ABC123",7)==0); assert(b64cmpn("ABC124","ABC123",6)>0); assert(b64cmpn("ABC123","abc123",6)<0); assert(nrmcmpn("A B","A B",3)==0); assert(nrmcmpn("A B","A C",3)<0); assert(nrmcmpn("A B","A\nB",3)==0); assert(nrmcmpn(" A","A ",2)<0); } --- NEW FILE: changes.txt --- /* $Id: changes.txt,v 1.1 2009/08/03 05:32:46 mike Exp $ */ 1.7.8 Thu Nov 2 12:52:18 AMT 2006 dvd: build script for OpenVMS by Jim Duff, http://eight-cubed.com/ 1.7.7 Sun Jan 29 11:13:23 PST 2006 dvd: rnv exits with non-zero return code if validation fails. 1.7.6 Fri Jan 7 01:07:37 AMT 2005 dvd: base64: I hope I got it right this time. Thu Jan 6 15:46:55 AMT 2005 dvd: base64 regular expression checks full syntax of RFC2045. 1.7.5 Thu Jan 6 02:18:51 AMT 2005 dvd: build error fixed 1.7.4 Wed Jan 5 13:55:41 AMT 2005 dvd: unit tests for s.c and xsd.c. 1.7.3 Wed Jan 5 05:57:55 AMT 2005 dvd: bug in xsd.c:b64cmpn fixed (thanks to Sridhar Guthula), tiny code cleanup in comparison routines. 1.7.2 Tue Nov 9 15:28:23 AMT 2004 dvd: fixed a bug in rvp.py, didn't update the current pattern after text or mixed 1.7.1 Thu Aug 19 00:58:58 AMST 2004 dvd: discovered and fixed a bug with parsing of builtin datatypes, noone seems to use them explicitely. 1.7.0 Sat Mar 27 19:26:39 AMT 2004 dvd: entities are not expanded, use XX if you need to. error messages have the same syntax as Jing's. 1.6.6 Sun Mar 21 22:41:10 AMT 2004 dvd: turned on parsing of external parameter entities; with the next next release rnv will be split in two; the core utility will just validate an XML stream, an optional preprocessor will handle entities, dtds and xinclude 1.6.5 Sat Mar 13 21:24:31 AMT 2004 dvd: continued overhaul of rn.c and other places 1.6.4 Fri Mar 12 13:15:33 AMT 2004 dvd: looked again at the code in rn.c and rnl.c, streamlined and simplified many things Fri Mar 12 04:44:17 AMT 2004 dvd: better logics in mark_p 1.6.3 Thu Mar 11 22:47:17 AMT 2004 dvd: Bug fix in mark_p -- no more dangling references to unused patterns. 1.6.2 Mon Mar 8 13:22:52 AMT 2004 dvd: Added processing of davidashen-net-xg-file and -pos PIs to work with xg. 1.6.1 Mon Mar 1 12:55:48 AMT 2004 dvd: Added ranlib to Makefile to make rnv build smoothly on Mac OS X. 1.6.0 Thu Feb 26 21:14:39 AMT 2004 dvd: fixed mixed to be interleave, not choice. The test for this was wrong. Wed Feb 25 03:59:45 AMT 2004 dvd: got rid of ##; prone to name conflicts 1.5.8 Wed Feb 18 16:53:05 AMT 2004 dvd: enhanced diagnostics: first required elements and attributes are reported, then optional 1.5.7 Mon Feb 16 20:03:13 AMT 2004 dvd: rewrote the pattern space compression algorithm -- it should be correct now, and if it is still not, debugging should be much easier 1.5.6 Tue Feb 10 19:37:07 AMT 2004 dvd: actually turned path checks on 1.5.5 Fri Jan 30 11:07:01 AMT 2004 dvd: added check for recursion in structured regexps 1.5.4 Fri Jan 30 00:10:32 AMT 2004 dvd: implemented structured regexps for scheme-datatypes 1.5.3 Thu Jan 29 03:19:29 AMT 2004 dvd: made it compile on SunOS 5.8 (name clashes) 1.5.2 Wed Jan 28 15:35:38 AMT 2004 dvd: added handler for external system entities Makefile.bcc is back; people still need win32, and cygwin is bulky. Wed Jan 28 01:43:42 AMT 2004 dvd: removed static pointers from rnv and qualified name passed from Expat is modified instead. This is safe. Mon Jan 26 21:13:30 AMT 2004 dvd: ++ moved out of assert in xsd_tm.c 1.5.1 Mon Jan 26 01:25:42 AMT 2004 dvd: Internal release, regexp in Scheme implemented Sat Jan 24 22:42:58 AMT 2004 dvd: fixed rx.c: upper bound in quantifier Fri Jan 23 21:51:34 AMT 2004 dvd: newRef+1 is initialized to 0 (bugfix, integrity check in ht) Wed Jan 21 02:38:09 AMT 2004 dvd: SCM is now working as embedded engine; it was not obvious that the initialization call had to be lower on the stack than all other calls to SCM; static mode added to m, if M_STATIC is non-zero it is the size of available memory in bytes -- good for debugging. Tue Jan 20 22:24:50 AMT 2004 dvd: changed rx to conform to XSD errata (- is not a valid range) added -d and -e to rnv; I was not sure for a long time whether I should do it. Tue Jan 20 04:45:39 AMT 2004 dvd: rearranged a lot of things; put error reporting through a single function er_vprintf, useful for interfacing to scripting languages; some success with dsl -- dsl-equal? works Tue Jan 20 00:24:56 AMT 2004 dvd: renamed strops to s, memops to m, functions has s_ and m_ prefixes Mon Jan 19 13:43:57 AMT 2004 dvd: fixed typo in diagnostics when literal is missing after ~ Thu Jan 15 18:10:02 AMT 2004 dvd: added -d command to rvp to load external type checker; dxl.c dxl.h implement http://davidashen.net/relaxng/pluggable-datatypes datatype; xsdck makes xsd: pluggable. 1.4.1 Tue Jan 13 14:29:08 AMT 2004 dvd: added sys/types.h where appropriate; fixed Makefile.bcc to include rnl.{c,h,obj} 1.4.0 Mon Jan 12 17:39:58 AMT 2004 dvd: changed rvp.py to use os.read|os.write for performance Mon Jan 12 03:53:56 AMT 2004 dvd: rvp is implemented; reads validation events from input, sends diagnostics to output; embedding examples in perl and python Sat Jan 10 03:18:12 AMT 2004 dvd: factored out loading of rnc into rnl; now, instead of calling rnc and rnd separately, rnl_(fn|fd|s) is called. 1.3.5 Tue Jan 6 11:56:06 AMT 2004 dvd: learned that UTF-8 BOM is reality, added processing; fixed check for name characters in arx, made it >0x7f||nmtoken (no good reason to process unicode); added end-of-line to error messages from Expat 1.3.4 Mon Jan 5 22:43:05 AMT 2004 dvd: grammar samples added to the distribution, entries for other grammars commented out in tools/arx.conf Mon Jan 5 17:00:57 AMT 2004 dvd: isany is now ary_isany (ary is helpers for arx) Mon Jan 5 00:56:25 AMT 2004 dvd: rnx_isany is added. It is used by arx and checks for a pattern that matches any element; should be written as any = (element * {any}|attribute * {text}|text)* (triple choice, elements in any order) Sun Jan 4 03:38:10 AMT 2004 dvd: Renamings 1.3.3 Sat Jan 3 18:58:36 AMT 2004 dvd: arx, a grammar (and not just grammar) association utility has been added; performance improvements, Makefile.bcc to compile the binaries under win32+Borland C/C++ builder 1.3.2 Fri Jan 2 04:30:30 AMT 2004 dvd: multiple cleanups and speedups; ht_deli added Tue Dec 30 23:40:59 AMT 2003 dvd: uri2rnc.pl and rnv.vim as a proof-of-concept; learned a lot of things about vim Tue Dec 30 17:24:05 AMT 2003 dvd: added warning about include in includeContent Tue Dec 30 04:19:22 AMT 2003 dvd: expected now correctly returns attributes behind elements Mon Dec 29 20:26:01 AMT 2003 dvd: error handling refactored, validation logic separated from command-line interface 1.3.1 Mon Dec 29 01:58:38 AMT 2003 dvd: invalid elements are correctly skipped (they were supposed to before), but the code contained a fallout that prevented them from doing so 1.3.0 Mon Dec 29 01:57:46 AMT 2003 dvd: pattern pools are now one-dimensional, pointer arithmetics is a little more complex, but performance and memory use are better; besides, this makes pattern datatype extensible, which is a good thing 1.2.1 Fri Dec 26 15:50:49 AMT 2003 dvd: error reporting redone; errors for xsd and rx are routed through validation errors' handler Fri Dec 26 01:55:37 AMT 2003 dvd: debugged on NIST tests; the only tests which don't pass when they should are those with doubles due to overflow and underflow. Thu Dec 25 19:30:55 AMT 2003 dvd: added puorg_nr to implement attribute_open correctly; empty elements are correctly validated against data and values; equality and ordering for dateTime (and its partial variants) implemented; rx,xsd use overloadable error handlers with variable lists. 1.2.0 Wed Dec 24 04:40:25 AMT 2003 dvd: XML Schema Part 2: regular expressions and datatypes. many things are implemented, with exception of context-dependent checks and a calendar library; optimizations and cleanups. Fri Dec 19 22:19:05 AMT 2003 dvd: rn_params joined with rn_string 1.1.0 Fri Dec 19 03:56:46 AMT 2003 dvd: drv_attribute_open|drv_attribute_close and memoization implemented; hash values are multiplied by large primes - helps with current hashing algorithms; garbage collection added to rn (rn_compress, rn_compress_last); compact mode implemented for validation, the buffer for memoized patterns does not exceed a limit when the validator runs in compact mode. 1.0.9 Wed Dec 17 23:36:05 AMT 2003 dvd: drv_mixed_text added, results memoized; code cleanups to compile easier; added targets for static and shared libraries to the Makefile, many small touch-ups 1.0.8; Tue Dec 16 14:05:16 AMT 2003 dvd: memory for error message in rnv.c is allocated properly 1.0.7; Tue Dec 16 00:12:04 AMT 2003 dvd: added reporting of attribute value for invalid attributes --- NEW FILE: dxl.h --- /* $Id: dxl.h,v 1.1 2009/08/03 05:32:46 mike Exp $ */ #ifndef DXL_H #define DXL_H 1 #ifndef DXL_EXC #define DXL_EXC 0 #endif #define DXL_URL "http://davidashen.net/relaxng/pluggable-datatypes" extern char *dxl_cmd; extern int dxl_allows(char *typ,char *ps,char *s,int n); extern int dxl_equal(char *typ,char *val,char *s,int n); #endif --- NEW FILE: u.h --- /* $Id: u.h,v 1.1 2009/08/03 05:32:48 mike Exp $ */ #ifndef U_H #define U_H 1 #define U_MAXLEN 6 /* returns BOM length if the string starts with BOM */ extern int u_bom(char *s,int n); /* computes a unicode character u off the head of s; returns number of bytes read. 0 means error. */ extern int u_get(int *up,char *s); /* encodes u in utf-8, returns number of octets taken */ extern int u_put(char *s,int u); /* number of unicode characters in the string; -1 means error */ extern int u_strlen(char *s); extern int u_strnlen(char *s,int n); /* checks whether a character falls within one of sorted ranges */ extern int u_in_ranges(int u,int r[][2],int len); #endif --- NEW FILE: ht.c --- /* $Id: ht.c,v 1.1 2009/08/03 05:32:46 mike Exp $ */ #include <stdlib.h> /*NULL*/ #include <assert.h> /*assert*/ #include "m.h" #include "ht.h" #define LOAD_FACTOR 2 void ht_init(struct hashtable *ht,int len,int (*hash)(int),int (*equal)(int,int)) { assert(len>0); ht->tablen=1; len*=LOAD_FACTOR; while(ht->tablen<len) ht->tablen<<=1; ht->limit=ht->tablen/LOAD_FACTOR; ht->table=(int*)m_alloc(ht->tablen<<1,sizeof(int)); /* the second half is hash values */ ht->hash=hash; ht->equal=equal; ht_clear(ht); } void ht_clear(struct hashtable *ht) { int i; ht->used=0; for(i=0;i!=ht->tablen;++i) ht->table[i]=-1; } void ht_dispose(struct hashtable *ht) { m_free(ht->table); ht->table=NULL; } #define first(ht,hv) (hv&(ht->tablen-1)) #define next(ht,i) (i==0?ht->tablen-1:i-1) int ht_get(struct hashtable *ht,int i) { int hv=ht->hash(i),j; for(j=first(ht,hv);;j=next(ht,j)) { int tj=ht->table[j]; if(tj==-1) break; if(ht->equal(i,tj)) return tj; } return -1; } void ht_put(struct hashtable *ht,int i) { int hv=ht->hash(i),j; if(ht->used==ht->limit) { int tablen=ht->tablen; int *table=ht->table; ht->tablen<<=1; ht->limit<<=1; ht->table=(int*)m_alloc(ht->tablen<<1,sizeof(int)); for(j=0;j!=ht->tablen;++j) ht->table[j]=-1; for(j=0;j!=tablen;++j) { if(table[j]!=-1) { int hvj=table[j|tablen]; int k; for(k=first(ht,hvj);ht->table[k]!=-1;k=next(ht,k)); ht->table[k]=table[j]; ht->table[k|ht->tablen]=hvj; } } m_free(table); } for(j=first(ht,hv);ht->table[j]!=-1;j=next(ht,j)) assert(!ht->equal(i,ht->table[j])); ht->table[j]=i; ht->table[ht->tablen|j]=hv; ++ht->used; } static int del(struct hashtable *ht,int i,int eq) { if(ht->used!=0) { int hv=ht->hash(i),j; for(j=first(ht,hv);;j=next(ht,j)) { int tj=ht->table[j]; if(tj==-1) break; if(eq?i==tj:ht->equal(i,tj)) { do { int k=j,j0; ht->table[j]=-1; for(;;) { j=next(ht,j); if(ht->table[j]==-1) break; j0=first(ht,ht->table[j|ht->tablen]); if((k<=j0||j0<j)&&(j0<j||j<=k)&&(j<=k||k<=j0)) break; } ht->table[k]=ht->table[j]; ht->table[k|ht->tablen]=ht->table[j|ht->tablen]; } while(ht->table[j]!=-1); --ht->used; return tj; } } } return -1; } int ht_del(struct hashtable *ht,int i) {return del(ht,i,0);} int ht_deli(struct hashtable *ht,int i) {return del(ht,i,1);} --- NEW FILE: rx_cls_ranges.c --- /* $Id: rx_cls_ranges.c,v 1.1 2009/08/03 05:32:47 mike Exp $ */ /* blocks */ static int IsBasicLatinRanges[][2]={{0x0000,0x007F}}; static int IsLatin_1SupplementRanges[][2]={{0x0080,0x00FF}}; static int IsLatinExtended_ARanges[][2]={{0x0100,0x017F}}; static int IsLatinExtended_BRanges[][2]={{0x0180,0x024F}}; static int IsIPAExtensionsRanges[][2]={{0x0250,0x02AF}}; static int IsSpacingModifierLettersRanges[][2]={{0x02B0,0x02FF}}; static int IsCombiningDiacriticalMarksRanges[][2]={{0x0300,0x036F}}; static int IsGreekRanges[][2]={{0x0370,0x03FF}}; static int IsCyrillicRanges[][2]={{0x0400,0x04FF}}; static int IsArmenianRanges[][2]={{0x0530,0x058F}}; static int IsHebrewRanges[][2]={{0x0590,0x05FF}}; static int IsArabicRanges[][2]={{0x0600,0x06FF}}; static int IsSyriacRanges[][2]={{0x0700,0x074F}}; static int IsThaanaRanges[][2]={{0x0780,0x07BF}}; static int IsDevanagariRanges[][2]={{0x0900,0x097F}}; static int IsBengaliRanges[][2]={{0x0980,0x09FF}}; static int IsGurmukhiRanges[][2]={{0x0A00,0x0A7F}}; static int IsGujaratiRanges[][2]={{0x0A80,0x0AFF}}; static int IsOriyaRanges[][2]={{0x0B00,0x0B7F}}; static int IsTamilRanges[][2]={{0x0B80,0x0BFF}}; static int IsTeluguRanges[][2]={{0x0C00,0x0C7F}}; static int IsKannadaRanges[][2]={{0x0C80,0x0CFF}}; static int IsMalayalamRanges[][2]={{0x0D00,0x0D7F}}; static int IsSinhalaRanges[][2]={{0x0D80,0x0DFF}}; static int IsThaiRanges[][2]={{0x0E00,0x0E7F}}; static int IsLaoRanges[][2]={{0x0E80,0x0EFF}}; static int IsTibetanRanges[][2]={{0x0F00,0x0FFF}}; static int IsMyanmarRanges[][2]={{0x1000,0x109F}}; static int IsGeorgianRanges[][2]={{0x10A0,0x10FF}}; static int IsHangulJamoRanges[][2]={{0x1100,0x11FF}}; static int IsEthiopicRanges[][2]={{0x1200,0x137F}}; static int IsCherokeeRanges[][2]={{0x13A0,0x13FF}}; static int IsUnifiedCanadianAboriginalSyllabicsRanges[][2]={{0x1400,0x167F}}; static int IsOghamRanges[][2]={{0x1680,0x169F}}; static int IsRunicRanges[][2]={{0x16A0,0x16FF}}; static int IsKhmerRanges[][2]={{0x1780,0x17FF}}; static int IsMongolianRanges[][2]={{0x1800,0x18AF}}; static int IsLatinExtendedAdditionalRanges[][2]={{0x1E00,0x1EFF}}; static int IsGreekExtendedRanges[][2]={{0x1F00,0x1FFF}}; static int IsGeneralPunctuationRanges[][2]={{0x2000,0x206F}}; static int IsSuperscriptsandSubscriptsRanges[][2]={{0x2070,0x209F}}; static int IsCurrencySymbolsRanges[][2]={{0x20A0,0x20CF}}; static int IsCombiningMarksforSymbolsRanges[][2]={{0x20D0,0x20FF}}; static int IsLetterlikeSymbolsRanges[][2]={{0x2100,0x214F}}; static int IsNumberFormsRanges[][2]={{0x2150,0x218F}}; static int IsArrowsRanges[][2]={{0x2190,0x21FF}}; static int IsMathematicalOperatorsRanges[][2]={{0x2200,0x22FF}}; static int IsMiscellaneousTechnicalRanges[][2]={{0x2300,0x23FF}}; static int IsControlPicturesRanges[][2]={{0x2400,0x243F}}; static int IsOpticalCharacterRecognitionRanges[][2]={{0x2440,0x245F}}; static int IsEnclosedAlphanumericsRanges[][2]={{0x2460,0x24FF}}; static int IsBoxDrawingRanges[][2]={{0x2500,0x257F}}; static int IsBlockElementsRanges[][2]={{0x2580,0x259F}}; static int IsGeometricShapesRanges[][2]={{0x25A0,0x25FF}}; static int IsMiscellaneousSymbolsRanges[][2]={{0x2600,0x26FF}}; static int IsDingbatsRanges[][2]={{0x2700,0x27BF}}; static int IsBraillePatternsRanges[][2]={{0x2800,0x28FF}}; static int IsCJKRadicalsSupplementRanges[][2]={{0x2E80,0x2EFF}}; static int IsKangxiRadicalsRanges[][2]={{0x2F00,0x2FDF}}; static int IsIdeographicDescriptionCharactersRanges[][2]={{0x2FF0,0x2FFF}}; static int IsCJKSymbolsandPunctuationRanges[][2]={{0x3000,0x303F}}; static int IsHiraganaRanges[][2]={{0x3040,0x309F}}; static int IsKatakanaRanges[][2]={{0x30A0,0x30FF}}; static int IsBopomofoRanges[][2]={{0x3100,0x312F}}; static int IsHangulCompatibilityJamoRanges[][2]={{0x3130,0x318F}}; static int IsKanbunRanges[][2]={{0x3190,0x319F}}; static int IsBopomofoExtendedRanges[][2]={{0x31A0,0x31BF}}; static int IsEnclosedCJKLettersandMonthsRanges[][2]={{0x3200,0x32FF}}; static int IsCJKCompatibilityRanges[][2]={{0x3300,0x33FF}}; static int IsCJKUnifiedIdeographsExtensionARanges[][2]={{0x3400,0x4DB5}}; static int IsCJKUnifiedIdeographsRanges[][2]={{0x4E00,0x9FFF}}; static int IsYiSyllablesRanges[][2]={{0xA000,0xA48F}}; static int IsYiRadicalsRanges[][2]={{0xA490,0xA4CF}}; static int IsHangulSyllablesRanges[][2]={{0xAC00,0xD7A3}}; static int IsCJKCompatibilityIdeographsRanges[][2]={{0xF900,0xFAFF}}; static int IsAlphabeticPresentationFormsRanges[][2]={{0xFB00,0xFB4F}}; static int IsArabicPresentationForms_ARanges[][2]={{0xFB50,0xFDFF}}; static int IsCombiningHalfMarksRanges[][2]={{0xFE20,0xFE2F}}; static int IsCJKCompatibilityFormsRanges[][2]={{0xFE30,0xFE4F}}; static int IsSmallFormVariantsRanges[][2]={{0xFE50,0xFE6F}}; static int IsArabicPresentationForms_BRanges[][2]={{0xFE70,0xFEFE}}; static int IsSpecialsRanges[][2]={{0xFEFF,0xFEFF},{0xFFF0,0xFFFD}}; static int IsHalfwidthandFullwidthFormsRanges[][2]={{0xFF00,0xFFEF}}; static int IsOldItalicRanges[][2]={{0x10300,0x1032F}}; static int IsGothicRanges[][2]={{0x10330,0x1034F}}; static int IsDeseretRanges[][2]={{0x10400,0x1044F}}; static int IsByzantineMusicalSymbolsRanges[][2]={{0x1D000,0x1D0FF}}; static int IsMusicalSymbolsRanges[][2]={{0x1D100,0x1D1FF}}; static int IsMathematicalAlphanumericSymbolsRanges[][2]={{0x1D400,0x1D7FF}}; static int IsCJKUnifiedIdeographsExtensionBRanges[][2]={{0x20000,0x2A6D6}}; static int IsCJKCompatibilityIdeographsSupplementRanges[][2]={{0x2F800,0x2FA1F}}; static int IsTagsRanges[][2]={{0xE0000,0xE007F}}; static int IsPrivateUseRanges[][2]={{0xE000,0xF8FF},{0xF0000,0xFFFFD},{0x100000,0x10FFFD}}; /* classes */ static int LuRanges[][2]={{0x41,0x5A},{0xC0,0xD6},{0xD8,0xDE},{0x100,0x100},{0x102,0x102},{0x104,0x104},{0x106,0x106},{0x108,0x108},{0x10A,0x10A},{0x10C,0x10C},{0x10E,0x10E},{0x110,0x110},{0x112,0x112},{0x114,0x114},{0x116,0x116},{0x118,0x118},{0x11A,0x11A},{0x11C,0x11C},{0x11E,0x11E},{0x120,0x120},{0x122,0x122},{0x124,0x124},{0x126,0x126},{0x128,0x128},{0x12A,0x12A},{0x12C,0x12C},{0x12E,0x12E},{0x130,0x130},{0x132,0x132},{0x134,0x134},{0x136,0x136},{0x139,0x139},{0x13B,0x13B},{0x13D,0x13D},{0x13F,0x13F},{0x141,0x141},{0x143,0x143},{0x145,0x145},{0x147,0x147},{0x14A,0x14A},{0x14C,0x14C},{0x14E,0x14E},{0x150,0x150},{0x152,0x152},{0x154,0x154},{0x156,0x156},{0x158,0x158},{0x15A,0x15A},{0x15C,0x15C},{0x15E,0x15E},{0x160,0x160},{0x162,0x162},{0x164,0x164},{0x166,0x166},{0x168,0x168},{0x16A,0x16A},{0x16C,0x16C},{0x16E,0x16E},{0x170,0x170},{0x172,0x172},{0x174,0x174},{0x176,0x176},{0x178,0x179},{0x17B,0x17B},{0x17D,0x17D},{0x181,0x182},{0x184,0x184},{0x186,0x187},{0x189,0x18B},{0x18E,0x191},{0x193,0x194},{0x196,0198},{0x19C,0x19D},{0x19F,0x1A0},{0x1A2,0x1A2},{0x1A4,0x1A4},{0x1A6,0x1A7},{0x1A9,0x1A9},{0x1AC,0x1AC},{0x1AE,0x1AF},{0x1B1,0x1B3},{0x1B5,0x1B5},{0x1B7,0x1B8},{0x1BC,0x1BC},{0x1C4,0x1C4},{0x1C7,0x1C7},{0x1CA,0x1CA},{0x1CD,0x1CD},{0x1CF,0x1CF},{0x1D1,0x1D1},{0x1D3,0x1D3},{0x1D5,0x1D5},{0x1D7,0x1D7},{0x1D9,0x1D9},{0x1DB,0x1DB},{0x1DE,0x1DE},{0x1E0,0x1E0},{0x1E2,0x1E2},{0x1E4,0x1E4},{0x1E6,0x1E6},{0x1E8,0x1E8},{0x1EA,0x1EA},{0x1EC,0x1EC},{0x1EE,0x1EE},{0x1F1,0x1F1},{0x1F4,0x1F4},{0x1F6,0x1F8},{0x1FA,0x1FA},{0x1FC,0x1FC},{0x1FE,0x1FE},{0x200,0x200},{0x202,0x202},{0x204,0x204},{0x206,0x206},{0x208,0x208},{0x20A,0x20A},{0x20C,0x20C},{0x20E,0x20E},{0x210,0x210},{0x212,0x212},{0x214,0x214},{0x216,0x216},{0x218,0x218},{0x21A,0x21A},{0x21C,0x21C},{0x21E,0x21E},{0x222,0x222},{0x224,0x224},{0x226,0x226},{0x228,0x228},{0x22A,0x22A},{0x22C,0x22C},{0x22E,0x22E},{0x230,0x230},{0x232,0x232},{0x386,0x386},{0x388,0x38A},{0x38C,0x38C},{0x38E,0x38F},{0x391,0x3A1},{0x3A3,0x3AB},{0x3D2,0x3D4},{0x3DA,0x3DA},{0x3DC,0x3DC},{0x3DE,0xDE},{0x3E0,0x3E0},{0x3E2,0x3E2},{0x3E4,0x3E4},{0x3E6,0x3E6},{0x3E8,0x3E8},{0x3EA,0x3EA},{0x3EC,0x3EC},{0x3EE,0x3EE},{0x3F4,0x3F4},{0x400,0x42F},{0x460,0x460},{0x462,0x462},{0x464,0x464},{0x466,0x466},{0x468,0x468},{0x46A,0x46A},{0x46C,0x46C},{0x46E,0x46E},{0x470,0x470},{0x472,0x472},{0x474,0x474},{0x476,0x476},{0x478,0x478},{0x47A,0x47A},{0x47C,0x47C},{0x47E,0x47E},{0x480,0x480},{0x48C,0x48C},{0x48E,0x48E},{0x490,0x490},{0x492,0x492},{0x494,0x494},{0x496,0x496},{0x498,0x498},{0x49A,0x49A},{0x49C,0x49C},{0x49E,0x49E},{0x4A0,0x4A0},{0x4A2,0x4A2},{0x4A4,0x4A4},{0x4A6,0x4A6},{0x4A8,0x4A8},{0x4AA,0x4AA},{0x4AC,0x4AC},{0x4AE,0x4AE},{0x4B0,0x4B0},{0x4B2,0x4B2},{0x4B4,0x4B4},{0x4B6,0x4B6},{0x4B8,0x4B8},{0x4BA,0x4BA},{0x4BC,0x4BC},{0x4BE,0x4BE},{0x4C0,0x4C1},{0x4C3,0x4C3},{0x4C7,0x4C7},{0x4CB,0x4CB},{0x4D0,0x4D0},{0x4D2,0x4D2},{0x4D4,0x4D4},{0x4D6,0x4D6},{0x4D8,0x4D8},{0x4DA,0x4DA},{0x4DC,0x4DC},{0x4DE,0x4DE},{0x4E0,0x4E0},{0x4E2,0x4E2},{0x4E4,0x4E4},{0x4E6,0x4E6},{0x4E8,0x4E8},{0x4EA,0x4EA},{0x4EC,0x4EC},{0x4EE,0x4E},{0x4F0,0x4F0},{0x4F2,0x4F2},{0x4F4,0x4F4},{0x4F8,0x4F8},{0x531,0x556},{0x10A0,0x10C5},{0x1E00,0x1E00},{0x1E02,0x1E02},{0x1E04,0x1E04},{0x1E06,0x1E06},{0x1E08,0x1E08},{0x1E0A,0x1E0A},{0x1E0C,0x1E0C},{0x1E0E,0x1E0E},{0x1E10,0x1E10},{0x1E12,0x1E12},{0x1E14,0x1E14},{0x1E16,0x1E16},{0x1E18,0x1E18},{0x1E1A,0x1E1A},{0x1E1C,0x1E1C},{0x1E1E,0x1E1E},{0x1E20,0x1E20},{0x1E22,0x1E22},{0x1E24,0x1E24},{0x1E26,0x1E26},{0x1E28,0x1E28},{0x1E2A,0x1E2A},{0x1E2C,0x1E2C},{0x1E2E,0x1E2E},{0x1E30,0x1E30},{0x1E32,0x1E32},{0x1E34,0x1E34},{0x1E36,0x1E36},{0x1E38,0x1E38},{0x1E3A,0x1E3A},{0x1E3C,0x1E3C},{0x1E3E,0x1E3E},{0x1E40,0x1E40},{0x1E42,0x1E42},{0x1E44,0x1E44},{0x1E46,0x1E46},{0x1E48,0x1E48},{0x1E4A,0x1E4A},{0x1E4C,0x1E4C},{0x1E4E,0x1E4E},{0x1E50,0x1E50},{0x1E52,0x1E52},{0x1E54,0x1E54},{0x1E56,0x1E56},{0x1E58,0x1E58},{0x1E5A,0x1E5A},{0x1E5C,0x1E5C},{0x1E5E,0x1E5E},{0x1E60,0x1E60},{0x1E62,0x1E62},{0x1E64,0x1E64},{0x1E66,0x1E66},{0x1E68,0x1E68},{0x1E6A,0x1E6A},{0x1E6C,0x1E6C},{0x1E6E,0x1E6E},{0x1E70,0x1E70},{0x1E72,0x1E72},{0x1E4,0x1E74},{0x1E76,0x1E76},{0x1E78,0x1E78},{0x1E7A,0x1E7A},{0x1E7C,0x1E7C},{0x1E7E,0x1E7E},{0x1E80,0x1E80},{0x1E82,0x1E82},{0x1E84,0x1E84},{0x1E86,0x1E86},{0x1E88,0x1E88},{0x1E8A,0x1E8A},{0x1E8C,0x1E8C},{0x1E8E,0x1E8E},{0x1E90,0x1E90},{0x1E92,0x1E92},{0x1E94,0x1E94},{0x1EA0,0x1EA0},{0x1EA2,0x1EA2},{0x1EA4,0x1EA4},{0x1EA6,0x1EA6},{0x1EA8,0x1EA8},{0x1EAA,0x1EAA},{0x1EAC,0x1EAC},{0x1EAE,0x1EAE},{0x1EB0,0x1EB0},{0x1EB2,0x1EB2},{0x1EB4,0x1EB4},{0x1EB6,0x1EB6},{0x1EB8,0x1EB8},{0x1EBA,0x1EBA},{0x1EBC,0x1EBC},{0x1EBE,0x1EBE},{0x1EC0,0x1EC0},{0x1EC2,0x1EC2},{0x1EC4,0x1EC4},{0x1EC6,0x1EC6},{0x1EC8,0x1EC8},{0x1ECA,0x1ECA},{0x1ECC,0x1ECC},{0x1ECE,0x1ECE},{0x1ED0,0x1ED0},{0x1ED2,0x1ED2},{0x1ED4,0x1ED4},{0x1ED6,0x1ED6},{0x1ED8,0x1ED8},{0x1EDA,0x1EDA},{0x1EDC,0x1EDC},{0x1EDE,0x1EDE},{0x1EE0,0x1EE0},{0x1EE2,0x1EE2},{0x1EE4,0x1EE4},{0x1EE6,0x1EE6},{0x1EE8,0x1EE8},{0x1EEA,0x1EEA},{0x1EEC,0x1EEC},{0x1EEE,0x1EEE},{0x1EF0,0x1EF0},{0x1EF2,0x1EF2},{0x1EF4,0x1EF4},{0x1EF6,0x1EF6},{0x1EF8,0x1EF8},{0x1F08,0x1F0F},{0x1F18,0x1F1D},{0x128,0x1F2F},{0x1F38,0x1F3F},{0x1F48,0x1F4D},{0x1F59,0x1F59},{0x1F5B,0x1F5B},{0x1F5D,0x1F5D},{0x1F5F,0x1F5F},{0x1F68,0x1F6F},{0x1FB8,0x1FBB},{0x1FC8,0x1FCB},{0x1FD8,0x1FDB},{0x1FE8,0x1FEC},{0x1FF8,0x1FFB},{0x2102,0x2102},{0x2107,0x2107},{0x210B,0x210D},{0x2110,0x2112},{0x2115,0x2115},{0x2119,0x211D},{0x2124,0x2124},{0x2126,0x2126},{0x2128,0x2128},{0x212A,0x212D},{0x2130,0x2131},{0x2133,0x2133},{0xFF21,0xFF3A},{0x10400,0x10425},{0x1D400,0x1D419},{0x1D434,0x1D44D},{0x1D468,0x1D481},{0x1D49C,0x1D49C},{0x1D49E,0x1D49F},{0x1D4A2,0x1D4A2},{0x1D4A5,0x1D4A6},{0x1D4A9,0x1D4AC},{0x1D4AE,0x1D4B5},{0x1D4D0,0x1D4E9},{0x1D504,0x1D505},{0x1D507,0x1D50A},{0x1D50D,0x1D514},{0x1D516,0x1D51C},{0x1D538,0x1D539},{0x1D53B,0x1D53E},{0x1D540,0x1D544},{0x1D546,0x1D546},{0x1D54A,0x1D550},{0x1D56C,0x1D585},{0x1D5A0,0x1D5B9},{0x1D5D4,0x1D5ED},{0x1D608,0x1D621},{0x1D63C,0x1D655},{0x1D670,0x1D689},{0x1D6A8,0x1D6C0},{0x1D6E2,0x1D6FA},{0x1D71C,0x1D734},{0x1D756,0x1D76E},{0x1D790,0x1D7A8}}; static int LlRanges[][2]={{0x61,0x7A},{0xAA,0xAA},{0xB5,0xB5},{0xBA,0xBA},{0xDF,0xF6},{0xF8,0xFF},{0x101,0x101},{0x103,0x103},{0x105,0x105},{0x107,0x107},{0x109,0x109},{0x10B,0x10B},{0x10D,0x10D},{0x10F,0x10F},{0x111,0x111},{0x113,0x113},{0x115,0x115},{0x117,0x117},{0x119,0x119},{0x11B,0x11B},{0x11D,0x11D},{0x11F,0x11F},{0x121,0x121},{0x123,0x123},{0x125,0x125},{0x127,0x127},{0x129,0x129},{0x12B,0x12B},{0x12D,0x12D},{0x12F,0x12F},{0x131,0x131},{0x133,0x133},{0x135,0x135},{0x137,0x138},{0x13A,0x13A},{0x13C,0x13C},{0x13E,0x13E},{0x140,0x140},{0x142,0x142},{0x144,0x144},{0x146,0x146},{0x148,0x149},{0x14B,0x14B},{0x14D,0x14D},{0x14F,0x14F},{0x151,0x151},{0x153,0x153},{0x155,0x155},{0x157,0x157},{0x159,0x159},{0x15B,0x15B},{0x15D,0x15D},{0x15F,0x15F},{0x161,0x161},{0x163,0x163},{0x165,0x165},{0x167,0x167},{0x169,0x169},{0x16B,0x16B},{0x16D,0x16D},{0x16F,0x16F},{0x171,0x171},{0x173,0x173},{0x175,0x175},{0x177,0x177},{0x17A,0x17A},{0x17C,0x17C},{0x17E,0x180},{0x183,0x183},{0x185,0x185},{0x188,0x188},{0x18C,0x18D},0x192,0x192},{0x195,0x195},{0x199,0x19B},{0x19E,0x19E},{0x1A1,0x1A1},{0x1A3,0x1A3},{0x1A5,0x1A5},{0x1A8,0x1A8},{0x1AA,0x1AB},{0x1AD,0x1AD},{0x1B0,0x1B0},{0x1B4,0x1B4},{0x1B6,0x1B6},{0x1B9,0x1BA},{0x1BD,0x1BF},{0x1C6,0x1C6},{0x1C9,0x1C9},{0x1CC,0x1CC},{0x1CE,0x1CE},{0x1D0,0x1D0},{0x1D2,0x1D2},{0x1D4,0x1D4},{0x1D6,0x1D6},{0x1D8,0x1D8},{0x1DA,0x1DA},{0x1DC,0x1DD},{0x1DF,0x1DF},{0x1E1,0x1E1},{0x1E3,0x1E3},{0x1E5,0x1E5},{0x1E7,0x1E7},{0x1E9,0x1E9},{0x1EB,0x1EB},{0x1ED,0x1ED},{0x1EF,0x1F0},{0x1F3,0x1F3},{0x1F5,0x1F5},{0x1F9,0x1F9},{0x1FB,0x1FB},{0x1FD,0x1FD},{0x1FF,0x1FF},{0x201,0x201},{0x203,0x203},{0x205,0x205},{0x207,0x207},{0x209,0x209},{0x20B,0x20B},{0x20D,0x20D},{0x20F,0x20F},{0x211,0x211},{0x213,0x213},{0x215,0x215},{0x217,0x217},{0x219,0x219},{0x21B,0x21B},{0x21D,0x21D},{0x21F,0x21F},{0x223,0x223},{0x225,0x225},{0x227,0x227},{0x229,0x229},{0x22B,0x22B},{0x22D,0x22D},{0x22F,0x22F},{0x231,0x231},{0x233,0x233},{0x250,0x2AD},{0x390,0x390},{0x3AC,0x3CE},{0x3D0,0x3D1},{0x3D5,0x3D7},{0x3DB,0x3DB},{0x3DD,0x3DD},{x3DF,0x3DF},{0x3E1,0x3E1},{0x3E3,0x3E3},{0x3E5,0x3E5},{0x3E7,0x3E7},{0x3E9,0x3E9},{0x3EB,0x3EB},{0x3ED,0x3ED},{0x3EF,0x3F3},{0x3F5,0x3F5},{0x430,0x45F},{0x461,0x461},{0x463,0x463},{0x465,0x465},{0x467,0x467},{0x469,0x469},{0x46B,0x46B},{0x46D,0x46D},{0x46F,0x46F},{0x471,0x471},{0x473,0x473},{0x475,0x475},{0x477,0x477},{0x479,0x479},{0x47B,0x47B},{0x47D,0x47D},{0x47F,0x47F},{0x481,0x481},{0x48D,0x48D},{0x48F,0x48F},{0x491,0x491},{0x493,0x493},{0x495,0x495},{0x497,0x497},{0x499,0x499},{0x49B,0x49B},{0x49D,0x49D},{0x49F,0x49F},{0x4A1,0x4A1},{0x4A3,0x4A3},{0x4A5,0x4A5},{0x4A7,0x4A7},{0x4A9,0x4A9},{0x4AB,0x4AB},{0x4AD,0x4AD},{0x4AF,0x4AF},{0x4B1,0x4B1},{0x4B3,0x4B3},{0x4B5,0x4B5},{0x4B7,0x4B7},{0x4B9,0x4B9},{0x4BB,0x4BB},{0x4BD,0x4BD},{0x4BF,0x4BF},{0x4C2,0x4C2},{0x4C4,0x4C4},{0x4C8,0x4C8},{0x4CC,0x4CC},{0x4D1,0x4D1},{0x4D3,0x4D3},{0x4D5,0x4D5},{0x4D7,0x4D7},{0x4D9,0x4D9},{0x4DB,0x4DB},{0x4DD,0x4DD},{0x4DF,0x4DF},{0x4E1,0x4E1},{0x4E3,0x4E3},{0x4E5,0x4E5},{0x4E7,0x4E7},{0x4E9,0x4E9},{0x4EB,0x4EB},{0x4ED,0x4ED},{04EF,0x4EF},{0x4F1,0x4F1},{0x4F3,0x4F3},{0x4F5,0x4F5},{0x4F9,0x4F9},{0x561,0x587},{0x1E01,0x1E01},{0x1E03,0x1E03},{0x1E05,0x1E05},{0x1E07,0x1E07},{0x1E09,0x1E09},{0x1E0B,0x1E0B},{0x1E0D,0x1E0D},{0x1E0F,0x1E0F},{0x1E11,0x1E11},{0x1E13,0x1E13},{0x1E15,0x1E15},{0x1E17,0x1E17},{0x1E19,0x1E19},{0x1E1B,0x1E1B},{0x1E1D,0x1E1D},{0x1E1F,0x1E1F},{0x1E21,0x1E21},{0x1E23,0x1E23},{0x1E25,0x1E25},{0x1E27,0x1E27},{0x1E29,0x1E29},{0x1E2B,0x1E2B},{0x1E2D,0x1E2D},{0x1E2F,0x1E2F},{0x1E31,0x1E31},{0x1E33,0x1E33},{0x1E35,0x1E35},{0x1E37,0x1E37},{0x1E39,0x1E39},{0x1E3B,0x1E3B},{0x1E3D,0x1E3D},{0x1E3F,0x1E3F},{0x1E41,0x1E41},{0x1E43,0x1E43},{0x1E45,0x1E45},{0x1E47,0x1E47},{0x1E49,0x1E49},{0x1E4B,0x1E4B},{0x1E4D,0x1E4D},{0x1E4F,0x1E4F},{0x1E51,0x1E51},{0x1E53,0x1E53},{0x1E55,0x1E55},{0x1E57,0x1E57},{0x1E59,0x1E59},{0x1E5B,0x1E5B},{0x1E5D,0x1E5D},{0x1E5F,0x1E5F},{0x1E61,0x1E61},{0x1E63,0x1E63},{0x1E65,0x1E65},{0x1E67,0x1E67},{0x1E69,0x1E69},{0x1E6B,0x1E6B},{0x1E6D,0x1E6D},{0x1E6F,0x1E6F},{0x1E71,0x1E71},{0x1E73,0x1E73},{0x1E75,0x1E7},{0x1E77,0x1E77},{0x1E79,0x1E79},{0x1E7B,0x1E7B},{0x1E7D,0x1E7D},{0x1E7F,0x1E7F},{0x1E81,0x1E81},{0x1E83,0x1E83},{0x1E85,0x1E85},{0x1E87,0x1E87},{0x1E89,0x1E89},{0x1E8B,0x1E8B},{0x1E8D,0x1E8D},{0x1E8F,0x1E8F},{0x1E91,0x1E91},{0x1E93,0x1E93},{0x1E95,0x1E9B},{0x1EA1,0x1EA1},{0x1EA3,0x1EA3},{0x1EA5,0x1EA5},{0x1EA7,0x1EA7},{0x1EA9,0x1EA9},{0x1EAB,0x1EAB},{0x1EAD,0x1EAD},{0x1EAF,0x1EAF},{0x1EB1,0x1EB1},{0x1EB3,0x1EB3},{0x1EB5,0x1EB5},{0x1EB7,0x1EB7},{0x1EB9,0x1EB9},{0x1EBB,0x1EBB},{0x1EBD,0x1EBD},{0x1EBF,0x1EBF},{0x1EC1,0x1EC1},{0x1EC3,0x1EC3},{0x1EC5,0x1EC5},{0x1EC7,0x1EC7},{0x1EC9,0x1EC9},{0x1ECB,0x1ECB},{0x1ECD,0x1ECD},{0x1ECF,0x1ECF},{0x1ED1,0x1ED1},{0x1ED3,0x1ED3},{0x1ED5,0x1ED5},{0x1ED7,0x1ED7},{0x1ED9,0x1ED9},{0x1EDB,0x1EDB},{0x1EDD,0x1EDD},{0x1EDF,0x1EDF},{0x1EE1,0x1EE1},{0x1EE3,0x1EE3},{0x1EE5,0x1EE5},{0x1EE7,0x1EE7},{0x1EE9,0x1EE9},{0x1EEB,0x1EEB},{0x1EED,0x1EED},{0x1EEF,0x1EEF},{0x1EF1,0x1EF1},{0x1EF3,0x1EF3},{0x1EF5,0x1EF5},{0x1EF7,0x1EF7},{0x1EF9,0x1EF9},{0x1F00,0x1F07},{0x1F10,0x1F15},{0x1F20,0x1F7},{0x1F30,0x1F37},{0x1F40,0x1F45},{0x1F50,0x1F57},{0x1F60,0x1F67},{0x1F70,0x1F7D},{0x1F80,0x1F87},{0x1F90,0x1F97},{0x1FA0,0x1FA7},{0x1FB0,0x1FB4},{0x1FB6,0x1FB7},{0x1FBE,0x1FBE},{0x1FC2,0x1FC4},{0x1FC6,0x1FC7},{0x1FD0,0x1FD3},{0x1FD6,0x1FD7},{0x1FE0,0x1FE7},{0x1FF2,0x1FF4},{0x1FF6,0x1FF7},{0x207F,0x207F},{0x210A,0x210A},{0x210E,0x210F},{0x2113,0x2113},{0x212F,0x212F},{0x2134,0x2134},{0x2139,0x2139},{0xFB00,0xFB06},{0xFB13,0xFB17},{0xFF41,0xFF5A},{0x10428,0x1044D},{0x1D41A,0x1D433},{0x1D44E,0x1D454},{0x1D456,0x1D467},{0x1D482,0x1D49B},{0x1D4B6,0x1D4B9},{0x1D4BB,0x1D4BB},{0x1D4BD,0x1D4C0},{0x1D4C2,0x1D4C3},{0x1D4C5,0x1D4CF},{0x1D4EA,0x1D503},{0x1D51E,0x1D537},{0x1D552,0x1D56B},{0x1D586,0x1D59F},{0x1D5BA,0x1D5D3},{0x1D5EE,0x1D607},{0x1D622,0x1D63B},{0x1D656,0x1D66F},{0x1D68A,0x1D6A3},{0x1D6C2,0x1D6DA},{0x1D6DC,0x1D6E1},{0x1D6FC,0x1D714},{0x1D716,0x1D71B},{0x1D736,0x1D74E},{0x1D750,0x1D755},{0x1D770,0x1D788},{0x1D78A,0x1D78F},{0x1D7AA,0x1D7C2},{0x1D7C4,0x1D7C9}}; static int LtRanges[][2]={{0x1C5,0x1C5},{0x1C8,0x1C8},{0x1CB,0x1CB},{0x1F2,0x1F2},{0x1F88,0x1F8F},{0x1F98,0x1F9F},{0x1FA8,0x1FAF},{0x1FBC,0x1FBC},{0x1FCC,0x1FCC},{0x1FFC,0x1FFC}}; static int LmRanges[][2]={{0x2B0,0x2B8},{0x2BB,0x2C1},{0x2D0,0x2D1},{0x2E0,0x2E4},{0x2EE,0x2EE},{0x37A,0x37A},{0x559,0x559},{0x640,0x640},{0x6E5,0x6E6},{0xE46,0xE46},{0xEC6,0xEC6},{0x1843,0x1843},{0x3005,0x3005},{0x3031,0x3035},{0x309D,0x309E},{0x30FC,0x30FE},{0xFF70,0xFF70},{0xFF9E,0xFF9F}}; static int LoRanges[][2]={{0x1BB,0x1BB},{0x1C0,0x1C3},{0x5D0,0x5EA},{0x5F0,0x5F2},{0x621,0x63A},{0x641,0x64A},{0x671,0x6D3},{0x6D5,0x6D5},{0x6FA,0x6FC},{0x710,0x710},{0x712,0x72C},{0x780,0x7A5},{0x905,0x939},{0x93D,0x93D},{0x950,0x950},{0x958,0x961},{0x985,0x98C},{0x98F,0x990},{0x993,0x9A8},{0x9AA,0x9B0},{0x9B2,0x9B2},{0x9B6,0x9B9},{0x9DC,0x9DD},{0x9DF,0x9E1},{0x9F0,0x9F1},{0xA05,0xA0A},{0xA0F,0xA10},{0xA13,0xA28},{0xA2A,0xA30},{0xA32,0xA33},{0xA35,0xA36},{0xA38,0xA39},{0xA59,0xA5C},{0xA5E,0xA5E},{0xA72,0xA74},{0xA85,0xA8B},{0xA8D,0xA8D},{0xA8F,0xA91},{0xA93,0xAA8},{0xAAA,0xAB0},{0xAB2,0xAB3},{0xAB5,0xAB9},{0xABD,0xABD},{0xAD0,0xAD0},{0xAE0,0xAE0},{0xB05,0xB0C},{0xB0F,0xB10},{0xB13,0xB28},{0xB2A,0xB30},{0xB32,0xB33},{0xB36,0xB39},{0xB3D,0xB3D},{0xB5C,0xB5D},{0xB5F,0xB61},{0xB85,0xB8A},{0xB8E,0xB90},{0xB92,0xB95},{0xB99,0xB9A},{0xB9C,0xB9C},{0xB9E,0xB9F},{0xBA3,0xBA4},{0xBA8,0xBAA},{0xBAE,0xBB5},{0xBB7,0xBB9},{0xC05,0xC0C},{0xC0E,0xC10},{0xC12,0xC28},{0xC2A,0xC33},{0xC35,0xC39},{0xC60,0xC61},{0xC85,0xC8C},{0C8E,0xC90},{0xC92,0xCA8},{0xCAA,0xCB3},{0xCB5,0xCB9},{0xCDE,0xCDE},{0xCE0,0xCE1},{0xD05,0xD0C},{0xD0E,0xD10},{0xD12,0xD28},{0xD2A,0xD39},{0xD60,0xD61},{0xD85,0xD96},{0xD9A,0xDB1},{0xDB3,0xDBB},{0xDBD,0xDBD},{0xDC0,0xDC6},{0xE01,0xE30},{0xE32,0xE33},{0xE40,0xE45},{0xE81,0xE82},{0xE84,0xE84},{0xE87,0xE88},{0xE8A,0xE8A},{0xE8D,0xE8D},{0xE94,0xE97},{0xE99,0xE9F},{0xEA1,0xEA3},{0xEA5,0xEA5},{0xEA7,0xEA7},{0xEAA,0xEAB},{0xEAD,0xEB0},{0xEB2,0xEB3},{0xEBD,0xEBD},{0xEC0,0xEC4},{0xEDC,0xEDD},{0xF00,0xF00},{0xF40,0xF47},{0xF49,0xF6A},{0xF88,0xF8B},{0x1000,0x1021},{0x1023,0x1027},{0x1029,0x102A},{0x1050,0x1055},{0x10D0,0x10F6},{0x1100,0x1159},{0x115F,0x11A2},{0x11A8,0x11F9},{0x1200,0x1206},{0x1208,0x1246},{0x1248,0x1248},{0x124A,0x124D},{0x1250,0x1256},{0x1258,0x1258},{0x125A,0x125D},{0x1260,0x1286},{0x1288,0x1288},{0x128A,0x128D},{0x1290,0x12AE},{0x12B0,0x12B0},{0x12B2,0x12B5},{0x12B8,0x12BE},{0x12C0,0x12C0},{0x12C2,0x12C5},{0x12C8,0x12CE},{0x12D0,0x12D6},{0x12D8,0x12EE},{0x12F0,0x130E},{0x1310,0x1310},{0x1312,0x1315}{0x1318,0x131E},{0x1320,0x1346},{0x1348,0x135A},{0x13A0,0x13F4},{0x1401,0x166C},{0x166F,0x1676},{0x1681,0x169A},{0x16A0,0x16EA},{0x1780,0x17B3},{0x1820,0x1842},{0x1844,0x1877},{0x1880,0x18A8},{0x2135,0x2138},{0x3006,0x3006},{0x3041,0x3094},{0x30A1,0x30FA},{0x3105,0x312C},{0x3131,0x318E},{0x31A0,0x31B7},{0x3400,0x4DB5},{0x4E00,0x9FA5},{0xA000,0xA48C},{0xAC00,0xD7A3},{0xF900,0xFA2D},{0xFB1D,0xFB1D},{0xFB1F,0xFB28},{0xFB2A,0xFB36},{0xFB38,0xFB3C},{0xFB3E,0xFB3E},{0xFB40,0xFB41},{0xFB43,0xFB44},{0xFB46,0xFBB1},{0xFBD3,0xFD3D},{0xFD50,0xFD8F},{0xFD92,0xFDC7},{0xFDF0,0xFDFB},{0xFE70,0xFE72},{0xFE74,0xFE74},{0xFE76,0xFEFC},{0xFF66,0xFF6F},{0xFF71,0xFF9D},{0xFFA0,0xFFBE},{0xFFC2,0xFFC7},{0xFFCA,0xFFCF},{0xFFD2,0xFFD7},{0xFFDA,0xFFDC},{0x10300,0x1031E},{0x10330,0x10349},{0x20000,0x2A6D6},{0x2F800,0x2FA1D}}; static int MnRanges[][2]={{0x300,0x34E},{0x360,0x362},{0x483,0x486},{0x591,0x5A1},{0x5A3,0x5B9},{0x5BB,0x5BD},{0x5BF,0x5BF},{0x5C1,0x5C2},{0x5C4,0x5C4},{0x64B,0x655},{0x670,0x670},{0x6D6,0x6DC},{0x6DF,0x6E4},{0x6E7,0x6E8},{0x6EA,0x6ED},{0x711,0x711},{0x730,0x74A},{0x7A6,0x7B0},{0x901,0x902},{0x93C,0x93C},{0x941,0x948},{0x94D,0x94D},{0x951,0x954},{0x962,0x963},{0x981,0x981},{0x9BC,0x9BC},{0x9C1,0x9C4},{0x9CD,0x9CD},{0x9E2,0x9E3},{0xA02,0xA02},{0xA3C,0xA3C},{0xA41,0xA42},{0xA47,0xA48},{0xA4B,0xA4D},{0xA70,0xA71},{0xA81,0xA82},{0xABC,0xABC},{0xAC1,0xAC5},{0xAC7,0xAC8},{0xACD,0xACD},{0xB01,0xB01},{0xB3C,0xB3C},{0xB3F,0xB3F},{0xB41,0xB43},{0xB4D,0xB4D},{0xB56,0xB56},{0xB82,0xB82},{0xBC0,0xBC0},{0xBCD,0xBCD},{0xC3E,0xC40},{0xC46,0xC48},{0xC4A,0xC4D},{0xC55,0xC56},{0xCBF,0xCBF},{0xCC6,0xCC6},{0xCCC,0xCCD},{0xD41,0xD43},{0xD4D,0xD4D},{0xDCA,0xDCA},{0xDD2,0xDD4},{0xDD6,0xDD6},{0xE31,0xE31},{0xE34,0xE3A},{0xE47,0xE4E},{0xEB1,0xEB1},{0xEB4,0xEB9},{0xEBB,0xEBC},{0xEC8,0xECD},{0xF18,0xF19},{0xF35,0xF35},{0xF37,0xF37},{0F39,0xF39},{0xF71,0xF7E},{0xF80,0xF84},{0xF86,0xF87},{0xF90,0xF97},{0xF99,0xFBC},{0xFC6,0xFC6},{0x102D,0x1030},{0x1032,0x1032},{0x1036,0x1037},{0x1039,0x1039},{0x1058,0x1059},{0x17B7,0x17BD},{0x17C6,0x17C6},{0x17C9,0x17D3},{0x18A9,0x18A9},{0x20D0,0x20DC},{0x20E1,0x20E1},{0x302A,0x302F},{0x3099,0x309A},{0xFB1E,0xFB1E},{0xFE20,0xFE23},{0x1D167,0x1D169},{0x1D17B,0x1D182},{0x1D185,0x1D18B},{0x1D1AA,0x1D1AD}}; static int McRanges[][2]={{0x903,0x903},{0x93E,0x940},{0x949,0x94C},{0x982,0x983},{0x9BE,0x9C0},{0x9C7,0x9C8},{0x9CB,0x9CC},{0x9D7,0x9D7},{0xA3E,0xA40},{0xA83,0xA83},{0xABE,0xAC0},{0xAC9,0xAC9},{0xACB,0xACC},{0xB02,0xB03},{0xB3E,0xB3E},{0xB40,0xB40},{0xB47,0xB48},{0xB4B,0xB4C},{0xB57,0xB57},{0xB83,0xB83},{0xBBE,0xBBF},{0xBC1,0xBC2},{0xBC6,0xBC8},{0xBCA,0xBCC},{0xBD7,0xBD7},{0xC01,0xC03},{0xC41,0xC44},{0xC82,0xC83},{0xCBE,0xCBE},{0xCC0,0xCC4},{0xCC7,0xCC8},{0xCCA,0xCCB},{0xCD5,0xCD6},{0xD02,0xD03},{0xD3E,0xD40},{0xD46,0xD48},{0xD4A,0xD4C},{0xD57,0xD57},{0xD82,0xD83},{0xDCF,0xDD1},{0xDD8,0xDDF},{0xDF2,0xDF3},{0xF3E,0xF3F},{0xF7F,0xF7F},{0x102C,0x102C},{0x1031,0x1031},{0x1038,0x1038},{0x1056,0x1057},{0x17B4,0x17B6},{0x17BE,0x17C5},{0x17C7,0x17C8},{0x1D165,0x1D166},{0x1D16D,0x1D172}}; static int MeRanges[][2]={{0x488,0x489},{0x6DD,0x6DE},{0x20DD,0x20E0},{0x20E2,0x20E3}}; static int NdRanges[][2]={{0x30,0x39},{0x660,0x669},{0x6F0,0x6F9},{0x966,0x96F},{0x9E6,0x9EF},{0xA66,0xA6F},{0xAE6,0xAEF},{0xB66,0xB6F},{0xBE7,0xBEF},{0xC66,0xC6F},{0xCE6,0xCEF},{0xD66,0xD6F},{0xE50,0xE59},{0xED0,0xED9},{0xF20,0xF29},{0x1040,0x1049},{0x1369,0x1371},{0x17E0,0x17E9},{0x1810,0x1819},{0xFF10,0xFF19},{0x1D7CE,0x1D7FF}}; static int NlRanges[][2]={{0x16EE,0x16F0},{0x2160,0x2183},{0x3007,0x3007},{0x3021,0x3029},{0x3038,0x303A},{0x1034A,0x1034A}}; static int NoRanges[][2]={{0xB2,0xB3},{0xB9,0xB9},{0xBC,0xBE},{0x9F4,0x9F9},{0xBF0,0xBF2},{0xF2A,0xF33},{0x1372,0x137C},{0x2070,0x2070},{0x2074,0x2079},{0x2080,0x2089},{0x2153,0x215F},{0x2460,0x249B},{0x24EA,0x24EA},{0x2776,0x2793},{0x3192,0x3195},{0x3220,0x3229},{0x3280,0x3289},{0x10320,0x10323}}; static int PcRanges[][2]={{0x5F,0x5F},{0x203F,0x2040},{0x30FB,0x30FB},{0xFE33,0xFE34},{0xFE4D,0xFE4F},{0xFF3F,0xFF3F},{0xFF65,0xFF65}}; static int PdRanges[][2]={{0x2D,0x2D},{0xAD,0xAD},{0x58A,0x58A},{0x1806,0x1806},{0x2010,0x2015},{0x301C,0x301C},{0x3030,0x3030},{0xFE31,0xFE32},{0xFE58,0xFE58},{0xFE63,0xFE63},{0xFF0D,0xFF0D}}; static int PsRanges[][2]={{0x28,0x28},{0x5B,0x5B},{0x7B,0x7B},{0xF3A,0xF3A},{0xF3C,0xF3C},{0x169B,0x169B},{0x201A,0x201A},{0x201E,0x201E},{0x2045,0x2045},{0x207D,0x207D},{0x208D,0x208D},{0x2329,0x2329},{0x3008,0x3008},{0x300A,0x300A},{0x300C,0x300C},{0x300E,0x300E},{0x3010,0x3010},{0x3014,0x3014},{0x3016,0x3016},{0x3018,0x3018},{0x301A,0x301A},{0x301D,0x301D},{0xFD3E,0xFD3E},{0xFE35,0xFE35},{0xFE37,0xFE37},{0xFE39,0xFE39},{0xFE3B,0xFE3B},{0xFE3D,0xFE3D},{0xFE3F,0xFE3F},{0xFE41,0xFE41},{0xFE43,0xFE43},{0xFE59,0xFE59},{0xFE5B,0xFE5B},{0xFE5D,0xFE5D},{0xFF08,0xFF08},{0xFF3B,0xFF3B},{0xFF5B,0xFF5B},{0xFF62,0xFF62}}; static int PeRanges[][2]={{0x29,0x29},{0x5D,0x5D},{0x7D,0x7D},{0xF3B,0xF3B},{0xF3D,0xF3D},{0x169C,0x169C},{0x2046,0x2046},{0x207E,0x207E},{0x208E,0x208E},{0x232A,0x232A},{0x3009,0x3009},{0x300B,0x300B},{0x300D,0x300D},{0x300F,0x300F},{0x3011,0x3011},{0x3015,0x3015},{0x3017,0x3017},{0x3019,0x3019},{0x301B,0x301B},{0x301E,0x301F},{0xFD3F,0xFD3F},{0xFE36,0xFE36},{0xFE38,0xFE38},{0xFE3A,0xFE3A},{0xFE3C,0xFE3C},{0xFE3E,0xFE3E},{0xFE40,0xFE40},{0xFE42,0xFE42},{0xFE44,0xFE44},{0xFE5A,0xFE5A},{0xFE5C,0xFE5C},{0xFE5E,0xFE5E},{0xFF09,0xFF09},{0xFF3D,0xFF3D},{0xFF5D,0xFF5D},{0xFF63,0xFF63}}; static int PiRanges[][2]={{0xAB,0xAB},{0x2018,0x2018},{0x201B,0x201C},{0x201F,0x201F},{0x2039,0x2039}}; static int PfRanges[][2]={{0xBB,0xBB},{0x2019,0x2019},{0x201D,0x201D},{0x203A,0x203A}}; static int PoRanges[][2]={{0x21,0x23},{0x25,0x27},{0x2A,0x2A},{0x2C,0x2C},{0x2E,0x2F},{0x3A,0x3B},{0x3F,0x40},{0x5C,0x5C},{0xA1,0xA1},{0xB7,0xB7},{0xBF,0xBF},{0x37E,0x37E},{0x387,0x387},{0x55A,0x55F},{0x589,0x589},{0x5BE,0x5BE},{0x5C0,0x5C0},{0x5C3,0x5C3},{0x5F3,0x5F4},{0x60C,0x60C},{0x61B,0x61B},{0x61F,0x61F},{0x66A,0x66D},{0x6D4,0x6D4},{0x700,0x70D},{0x964,0x965},{0x970,0x970},{0xDF4,0xDF4},{0xE4F,0xE4F},{0xE5A,0xE5B},{0xF04,0xF12},{0xF85,0xF85},{0x104A,0x104F},{0x10FB,0x10FB},{0x1361,0x1368},{0x166D,0x166E},{0x16EB,0x16ED},{0x17D4,0x17DA},{0x17DC,0x17DC},{0x1800,0x1805},{0x1807,0x180A},{0x2016,0x2017},{0x2020,0x2027},{0x2030,0x2038},{0x203B,0x203E},{0x2041,0x2043},{0x2048,0x204D},{0x3001,0x3003},{0xFE30,0xFE30},{0xFE49,0xFE4C},{0xFE50,0xFE52},{0xFE54,0xFE57},{0xFE5F,0xFE61},{0xFE68,0xFE68},{0xFE6A,0xFE6B},{0xFF01,0xFF03},{0xFF05,0xFF07},{0xFF0A,0xFF0A},{0xFF0C,0xFF0C},{0xFF0E,0xFF0F},{0xFF1A,0xFF1B},{0xFF1F,0xFF20},{0xFF3C,0xFF3C},{0xFF61,0xFF61},{0xFF64,0xFF64}}; static int ZsRanges[][2]={{0x20,0x20},{0xA0,0xA0},{0x1680,0x1680},{0x2000,0x200B},{0x202F,0x202F},{0x3000,0x3000}}; static int ZlRanges[][2]={{0x2028,0x2028}}; static int ZpRanges[][2]={{0x2029,0x2029}}; static int SmRanges[][2]={{0x2B,0x2B},{0x3C,0x3E},{0x7C,0x7C},{0x7E,0x7E},{0xAC,0xAC},{0xB1,0xB1},{0xD7,0xD7},{0xF7,0xF7},{0x2044,0x2044},{0x207A,0x207C},{0x208A,0x208C},{0x2190,0x2194},{0x219A,0x219B},{0x21A0,0x21A0},{0x21A3,0x21A3},{0x21A6,0x21A6},{0x21AE,0x21AE},{0x21CE,0x21CF},{0x21D2,0x21D2},{0x21D4,0x21D4},{0x2200,0x22F1},{0x2308,0x230B},{0x2320,0x2321},{0x25B7,0x25B7},{0x25C1,0x25C1},{0x266F,0x266F},{0xFB29,0xFB29},{0xFE62,0xFE62},{0xFE64,0xFE66},{0xFF0B,0xFF0B},{0xFF1C,0xFF1E},{0xFF5C,0xFF5C},{0xFF5E,0xFF5E},{0xFFE2,0xFFE2},{0xFFE9,0xFFEC},{0x1D6C1,0x1D6C1},{0x1D6DB,0x1D6DB},{0x1D6FB,0x1D6FB},{0x1D715,0x1D715},{0x1D735,0x1D735},{0x1D74F,0x1D74F},{0x1D76F,0x1D76F},{0x1D789,0x1D789},{0x1D7A9,0x1D7A9},{0x1D7C3,0x1D7C3}}; static int ScRanges[][2]={{0x24,0x24},{0xA2,0xA5},{0x9F2,0x9F3},{0xE3F,0xE3F},{0x17DB,0x17DB},{0x20A0,0x20AF},{0xFE69,0xFE69},{0xFF04,0xFF04},{0xFFE0,0xFFE1},{0xFFE5,0xFFE6}}; static int SkRanges[][2]={{0x5E,0x5E},{0x60,0x60},{0xA8,0xA8},{0xAF,0xAF},{0xB4,0xB4},{0xB8,0xB8},{0x2B9,0x2BA},{0x2C2,0x2CF},{0x2D2,0x2DF},{0x2E5,0x2ED},{0x374,0x375},{0x384,0x385},{0x1FBD,0x1FBD},{0x1FBF,0x1FC1},{0x1FCD,0x1FCF},{0x1FDD,0x1FDF},{0x1FED,0x1FEF},{0x1FFD,0x1FFE},{0x309B,0x309C},{0xFF3E,0xFF3E},{0xFF40,0xFF40},{0xFFE3,0xFFE3}}; static int SoRanges[][2]={{0xA6,0xA7},{0xA9,0xA9},{0xAE,0xAE},{0xB0,0xB0},{0xB6,0xB6},{0x482,0x482},{0x6E9,0x6E9},{0x6FD,0x6FE},{0x9FA,0x9FA},{0xB70,0xB70},{0xF01,0xF03},{0xF13,0xF17},{0xF1A,0xF1F},{0xF34,0xF34},{0xF36,0xF36},{0xF38,0xF38},{0xFBE,0xFC5},{0xFC7,0xFCC},{0xFCF,0xFCF},{0x2100,0x2101},{0x2103,0x2106},{0x2108,0x2109},{0x2114,0x2114},{0x2116,0x2118},{0x211E,0x2123},{0x2125,0x2125},{0x2127,0x2127},{0x2129,0x2129},{0x212E,0x212E},{0x2132,0x2132},{0x213A,0x213A},{0x2195,0x2199},{0x219C,0x219F},{0x21A1,0x21A2},{0x21A4,0x21A5},{0x21A7,0x21AD},{0x21AF,0x21CD},{0x21D0,0x21D1},{0x21D3,0x21D3},{0x21D5,0x21F3},{0x2300,0x2307},{0x230C,0x231F},{0x2322,0x2328},{0x232B,0x237B},{0x237D,0x239A},{0x2400,0x2426},{0x2440,0x244A},{0x249C,0x24E9},{0x2500,0x2595},{0x25A0,0x25B6},{0x25B8,0x25C0},{0x25C2,0x25F7},{0x2600,0x2613},{0x2619,0x266E},{0x2670,0x2671},{0x2701,0x2704},{0x2706,0x2709},{0x270C,0x2727},{0x2729,0x274B},{0x274D,0x274D},{0x274F,0x2752},{0x2756,0x2756},{0x2758,0x275E},{0x2761,0x2767},{0x2794,0x2794},{0x298,0x27AF},{0x27B1,0x27BE},{0x2800,0x28FF},{0x2E80,0x2E99},{0x2E9B,0x2EF3},{0x2F00,0x2FD5},{0x2FF0,0x2FFB},{0x3004,0x3004},{0x3012,0x3013},{0x3020,0x3020},{0x3036,0x3037},{0x303E,0x303F},{0x3190,0x3191},{0x3196,0x319F},{0x3200,0x321C},{0x322A,0x3243},{0x3260,0x327B},{0x327F,0x327F},{0x328A,0x32B0},{0x32C0,0x32CB},{0x32D0,0x32FE},{0x3300,0x3376},{0x337B,0x33DD},{0x33E0,0x33FE},{0xA490,0xA4A1},{0xA4A4,0xA4B3},{0xA4B5,0xA4C0},{0xA4C2,0xA4C4},{0xA4C6,0xA4C6},{0xFFE4,0xFFE4},{0xFFE8,0xFFE8},{0xFFED,0xFFEE},{0xFFFC,0xFFFD},{0x1D000,0x1D0F5},{0x1D100,0x1D126},{0x1D12A,0x1D164},{0x1D16A,0x1D16C},{0x1D183,0x1D184},{0x1D18C,0x1D1A9},{0x1D1AE,0x1D1DD}}; static int CcRanges[][2]={{0x0,0x1F},{0x7F,0x9F}}; static int CfRanges[][2]={{0x70F,0x70F},{0x180B,0x180E},{0x200C,0x200F},{0x202A,0x202E},{0x206A,0x206F},{0xFEFF,0xFEFF},{0xFFF9,0xFFFB},{0x1D173,0x1D17A},{0xE0001,0xE0001},{0xE0020,0xE007F}}; static int CoRanges[][2]={{0xE000,0xF8FF},{0xF0000,0xFFFFD},{0x100000,0x10FFFD}}; --- NEW FILE: rn.c --- /* $Id: rn.c,v 1.1 2009/08/03 05:32:47 mike Exp $ */ #include <string.h> /* strcmp,strlen,strcpy*/ #include "m.h" #include "s.h" /* s_hval */ #include "ht.h" #include "ll.h" #include "rn.h" #include "rnx.h" #define LEN_P RN_LEN_P #define PRIME_P RN_PRIME_P #define LIM_P RN_LIM_P #define LEN_NC RN_LEN_NC #define PRIME_NC RN_PRIME_NC #define LEN_S RN_LEN_S #define P_SIZE 3 #define NC_SIZE 3 #define P_AVG_SIZE 2 #define NC_AVG_SIZE 2 #define S_AVG_SIZE 16 #define erased(i) (rn_pattern[i]&RN_P_FLG_ERS) #define erase(i) (rn_pattern[i]|=RN_P_FLG_ERS) static int p_size[]={1,1,1,1,3,3,3,2,2,3,3,3,3,3,2,3}; static int nc_size[]={1,3,2,1,3,3,3}; int *rn_pattern; int *rn_nameclass; char *rn_string; int rn_empty,rn_text,rn_notAllowed,rn_dt_string,rn_dt_token,rn_xsd_uri; static struct hashtable ht_p, ht_nc, ht_s; static int i_p,i_nc,i_s,BASE_P,base_p,i_ref; static int len_p,len_nc,len_s; static int adding_ps; void rn_new_schema(void) {base_p=i_p; i_ref=0;} void rn_del_p(int i) {ht_deli(&ht_p,i);} void rn_add_p(int i) {if(ht_get(&ht_p,i)==-1) ht_put(&ht_p,i);} int rn_contentType(int i) {return rn_pattern[i]&0x1C00;} void rn_setContentType(int i,int t1,int t2) {rn_pattern[i]|=(t1>t2?t1:t2);} int rn_groupable(int p1,int p2) { int ct1=rn_contentType(p1),ct2=rn_contentType(p2); return ((ct1&ct2&RN_P_FLG_CTC)||((ct1|ct2)&RN_P_FLG_CTE)); } static int add_s(char *s) { int len=strlen(s)+1; if(i_s+len>len_s) rn_string=(char*)m_stretch(rn_string, len_s=2*(i_s+len),i_s,sizeof(char)); strcpy(rn_string+i_s,s); return len; } /* the two functions below are structuraly identical; they used to be expanded from a macro using ##, but then I eliminated all occurences of ## -- it was an obstacle to porting; sam script to turn the first into the second is s/([^a-z])p([^a-z])/\1nc\2/g s/([^A-Z])P([^A-Z])/\1NC\2/g s/_pattern/_nameclass/g */ static int accept_p(void) { int j; if((j=ht_get(&ht_p,i_p))==-1) { ht_put(&ht_p,j=i_p); i_p+=p_size[RN_P_TYP(i_p)]; if(i_p+P_SIZE>len_p) rn_pattern=(int *)m_stretch(rn_pattern, len_p=2*(i_p+P_SIZE),i_p,sizeof(int)); } return j; } static int accept_nc(void) { int j; if((j=ht_get(&ht_nc,i_nc))==-1) { ht_put(&ht_nc,j=i_nc); i_nc+=nc_size[RN_NC_TYP(i_nc)]; if(i_nc+NC_SIZE>len_nc) rn_nameclass=(int *)m_stretch(rn_nameclass, len_nc=2*(i_nc+NC_SIZE),i_nc,sizeof(int)); } return j; } int rn_newString(char *s) { int d_s,j; assert(!adding_ps); d_s=add_s(s); if((j=ht_get(&ht_s,i_s))==-1) { ht_put(&ht_s,j=i_s); i_s+=d_s; } return j; } #define P_NEW(x) rn_pattern[i_p]=x int rn_newNotAllowed(void) { P_NEW(RN_P_NOT_ALLOWED); return accept_p(); } int rn_newEmpty(void) { P_NEW(RN_P_EMPTY); rn_setNullable(i_p,1); return accept_p(); } int rn_newText(void) { P_NEW(RN_P_TEXT); rn_setNullable(i_p,1); rn_setCdata(i_p,1); return accept_p(); } int rn_newChoice(int p1,int p2) { P_NEW(RN_P_CHOICE); rn_pattern[i_p+1]=p1; rn_pattern[i_p+2]=p2; rn_setNullable(i_p,rn_nullable(p1)||rn_nullable(p2)); rn_setCdata(i_p,rn_cdata(p1)||rn_cdata(p2)); return accept_p(); } int rn_newInterleave(int p1,int p2) { P_NEW(RN_P_INTERLEAVE); rn_pattern[i_p+1]=p1; rn_pattern[i_p+2]=p2; rn_setNullable(i_p,rn_nullable(p1)&&rn_nullable(p2)); rn_setCdata(i_p,rn_cdata(p1)||rn_cdata(p2)); return accept_p(); } int rn_newGroup(int p1,int p2) { P_NEW(RN_P_GROUP); rn_pattern[i_p+1]=p1; rn_pattern[i_p+2]=p2; rn_setNullable(i_p,rn_nullable(p1)&&rn_nullable(p2)); rn_setCdata(i_p,rn_cdata(p1)||rn_cdata(p2)); return accept_p(); } int rn_newOneOrMore(int p1) { P_NEW(RN_P_ONE_OR_MORE); rn_pattern[i_p+1]=p1; rn_setNullable(i_p,rn_nullable(p1)); rn_setCdata(i_p,rn_cdata(p1)); return accept_p(); } int rn_newList(int p1) { P_NEW(RN_P_LIST); rn_pattern[i_p+1]=p1; rn_setCdata(i_p,1); return accept_p(); } int rn_newData(int dt,int ps) { P_NEW(RN_P_DATA); rn_pattern[i_p+1]=dt; rn_pattern[i_p+2]=ps; rn_setCdata(i_p,1); return accept_p(); } int rn_newDataExcept(int p1,int p2) { P_NEW(RN_P_DATA_EXCEPT); rn_pattern[i_p+1]=p1; rn_pattern[i_p+2]=p2; rn_setCdata(i_p,1); return accept_p(); } int rn_newValue(int dt,int s) { P_NEW(RN_P_VALUE); rn_pattern[i_p+1]=dt; rn_pattern[i_p+2]=s; rn_setCdata(i_p,1); return accept_p(); } int rn_newAttribute(int nc,int p1) { P_NEW(RN_P_ATTRIBUTE); rn_pattern[i_p+2]=nc; rn_pattern[i_p+1]=p1; return accept_p(); } int rn_newElement(int nc,int p1) { P_NEW(RN_P_ELEMENT); rn_pattern[i_p+2]=nc; rn_pattern[i_p+1]=p1; return accept_p(); } int rn_newAfter(int p1,int p2) { P_NEW(RN_P_AFTER); rn_pattern[i_p+1]=p1; rn_pattern[i_p+2]=p2; rn_setCdata(i_p,rn_cdata(p1)); return accept_p(); } int rn_newRef(void) { P_NEW(RN_P_REF); rn_pattern[i_p+1]=0; return ht_deli(&ht_p,accept_p()); } int rn_one_or_more(int p) { if(RN_P_IS(p,RN_P_EMPTY)) return p; if(RN_P_IS(p,RN_P_NOT_ALLOWED)) return p; if(RN_P_IS(p,RN_P_TEXT)) return p; return rn_newOneOrMore(p); } int rn_group(int p1,int p2) { if(RN_P_IS(p1,RN_P_NOT_ALLOWED)) return p1; if(RN_P_IS(p2,RN_P_NOT_ALLOWED)) return p2; if(RN_P_IS(p1,RN_P_EMPTY)) return p2; if(RN_P_IS(p2,RN_P_EMPTY)) return p1; return rn_newGroup(p1,p2); } static int samechoice(int p1,int p2) { if(RN_P_IS(p1,RN_P_CHOICE)) { int p11,p12; rn_Choice(p1,p11,p12); return p12==p2||samechoice(p11,p2); } else return p1==p2; } int rn_choice(int p1,int p2) { if(RN_P_IS(p1,RN_P_NOT_ALLOWED)) return p2; if(RN_P_IS(p2,RN_P_NOT_ALLOWED)) return p1; if(RN_P_IS(p2,RN_P_CHOICE)) { int p21,p22; rn_Choice(p2,p21,p22); p1=rn_choice(p1,p21); return rn_choice(p1,p22); } if(samechoice(p1,p2)) return p1; if(rn_nullable(p1) && (RN_P_IS(p2,RN_P_EMPTY))) return p1; if(rn_nullable(p2) && (RN_P_IS(p1,RN_P_EMPTY))) return p2; return rn_newChoice(p1,p2); } int rn_ileave(int p1,int p2) { if(RN_P_IS(p1,RN_P_NOT_ALLOWED)) return p1; if(RN_P_IS(p2,RN_P_NOT_ALLOWED)) return p2; if(RN_P_IS(p1,RN_P_EMPTY)) return p2; if(RN_P_IS(p2,RN_P_EMPTY)) return p1; return rn_newInterleave(p1,p2); } int rn_after(int p1,int p2) { if(RN_P_IS(p1,RN_P_NOT_ALLOWED)) return p1; if(RN_P_IS(p2,RN_P_NOT_ALLOWED)) return p2; return rn_newAfter(p1,p2); } #define NC_NEW(x) rn_nameclass[i_nc]=x int rn_newQName(int uri,int name) { NC_NEW(RN_NC_QNAME); rn_nameclass[i_nc+1]=uri; rn_nameclass[i_nc+2]=name; return accept_nc(); } int rn_newNsName(int uri) { NC_NEW(RN_NC_NSNAME); rn_nameclass[i_nc+1]=uri; return accept_nc(); } int rn_newAnyName(void) { NC_NEW(RN_NC_ANY_NAME); return accept_nc(); } int rn_newNameClassExcept(int nc1,int nc2) { NC_NEW(RN_NC_EXCEPT); rn_nameclass[i_nc+1]=nc1; rn_nameclass[i_nc+2]=nc2; return accept_nc(); } int rn_newNameClassChoice(int nc1,int nc2) { NC_NEW(RN_NC_CHOICE); rn_nameclass[i_nc+1]=nc1; rn_nameclass[i_nc+2]=nc2; return accept_nc(); } int rn_newDatatype(int lib,int typ) { NC_NEW(RN_NC_DATATYPE); rn_nameclass[i_nc+1]=lib; rn_nameclass[i_nc+2]=typ; return accept_nc(); } int rn_i_ps(void) {adding_ps=1; return i_s;} void rn_add_pskey(char *s) {i_s+=add_s(s);} void rn_add_psval(char *s) {i_s+=add_s(s);} void rn_end_ps(void) {i_s+=add_s(""); adding_ps=0;} static int hash_p(int i); static int hash_nc(int i); static int hash_s(int i); static int equal_p(int p1,int p2); static int equal_nc(int nc1,int nc2); static int equal_s(int s1,int s2); static void windup(void); static int initialized=0; void rn_init(void) { if(!initialized) { initialized=1; rn_pattern=(int *)m_alloc(len_p=P_AVG_SIZE*LEN_P,sizeof(int)); rn_nameclass=(int *)m_alloc(len_nc=NC_AVG_SIZE*LEN_NC,sizeof(int)); rn_string=(char*)m_alloc(len_s=S_AVG_SIZE*LEN_S,sizeof(char)); ht_init(&ht_p,LEN_P,&hash_p,&equal_p); ht_init(&ht_nc,LEN_NC,&hash_nc,&equal_nc); ht_init(&ht_s,LEN_S,&hash_s,&equal_s); windup(); } } void rn_clear(void) { ht_clear(&ht_p); ht_clear(&ht_nc); ht_clear(&ht_s); windup(); } static void windup(void) { i_p=i_nc=i_s=0; adding_ps=0; rn_pattern[0]=RN_P_ERROR; accept_p(); rn_nameclass[0]=RN_NC_ERROR; accept_nc(); rn_newString(""); rn_notAllowed=rn_newNotAllowed(); rn_empty=rn_newEmpty(); rn_text=rn_newText(); BASE_P=i_p; rn_dt_string=rn_newDatatype(0,rn_newString("string")); rn_dt_token=rn_newDatatype(0,rn_newString("token")); rn_xsd_uri=rn_newString("http://www.w3.org/2001/XMLSchema-datatypes"); } static int hash_p(int p) { int *pp=rn_pattern+p; int h=0; switch(p_size[RN_P_TYP(p)]) { case 1: h=pp[0]&0xF; break; case 2: h=(pp[0]&0xF)|(pp[1]<<4); break; case 3: h=(pp[0]&0xF)|((pp[1]^pp[2])<<4); break; default: assert(0); } return h*PRIME_P; } static int hash_nc(int nc) { int *ncp=rn_nameclass+nc; int h=0; switch(nc_size[RN_NC_TYP(nc)]) { case 1: h=ncp[0]&0x7; break; case 2: h=(ncp[0]&0x7)|(ncp[1]<<3); break; case 3: h=(ncp[0]&0x7)|((ncp[1]^ncp[2])<<3); break; default: assert(0); } return h*PRIME_NC; } static int hash_s(int i) {return s_hval(rn_string+i);} static int equal_p(int p1,int p2) { int *pp1=rn_pattern+p1,*pp2=rn_pattern+p2; if(RN_P_TYP(p1)!=RN_P_TYP(p2)) return 0; switch(p_size[RN_P_TYP(p1)]) { case 3: if(pp1[2]!=pp2[2]) return 0; case 2: if(pp1[1]!=pp2[1]) return 0; case 1: return 1; default: assert(0); } return 0; } static int equal_nc(int nc1,int nc2) { int *ncp1=rn_nameclass+nc1,*ncp2=rn_nameclass+nc2; if(RN_NC_TYP(nc1)!=RN_NC_TYP(nc2)) return 0; switch(nc_size[RN_NC_TYP(nc1)]) { case 3: if(ncp1[2]!=ncp2[2]) return 0; case 2: if(ncp1[1]!=ncp2[1]) return 0; case 1: return 1; default: assert(0); } return 0; } static int equal_s(int s1,int s2) {return strcmp(rn_string+s1,rn_string+s2)==0;} /* marks patterns reachable from start, assumes that the references are resolved */ #define pick_p(p) do { \ if(p>=since && !rn_marked(p)) {flat[n_f++]=p; rn_mark(p);} \ } while(0) static void mark_p(int start,int since) { int p,p1,p2,nc,i,n_f; int *flat=(int*)m_alloc(i_p-since,sizeof(int)); n_f=0; pick_p(start); for(i=0;i!=n_f;++i) { p=flat[i]; switch(RN_P_TYP(p)) { case RN_P_NOT_ALLOWED: case RN_P_EMPTY: case RN_P_TEXT: case RN_P_DATA: case RN_P_VALUE: break; case RN_P_CHOICE: rn_Choice(p,p1,p2); goto BINARY; case RN_P_INTERLEAVE: rn_Interleave(p,p1,p2); goto BINARY; case RN_P_GROUP: rn_Group(p,p1,p2); goto BINARY; case RN_P_DATA_EXCEPT: rn_DataExcept(p,p1,p2); goto BINARY; BINARY: pick_p(p2); goto UNARY; case RN_P_ONE_OR_MORE: rn_OneOrMore(p,p1); goto UNARY; case RN_P_LIST: rn_List(p,p1); goto UNARY; case RN_P_ATTRIBUTE: rn_Attribute(p,nc,p1); goto UNARY; case RN_P_ELEMENT: rn_Element(p,nc,p1); goto UNARY; UNARY: pick_p(p1); break; default: assert(0); } } m_free(flat); } /* assumes that used patterns are marked */ #define redir_p() do { \ if(q<since || xlat[q-since]!=-1) { \ rn_unmark(p); xlat[p-since]=q; \ changed=1; \ } else { \ ht_deli(&ht_p,q); ht_put(&ht_p,p); \ } \ } while(0) static void sweep_p(int *starts,int n_st,int since) { int p,p1,p2,nc,q,changed,touched; int *xlat; xlat=(int*)m_alloc(i_p-since,sizeof(int)); changed=0; for(p=since;p!=i_p;p+=p_size[RN_P_TYP(p)]) { if(rn_marked(p)) xlat[p-since]=p; else xlat[p-since]=-1; } for(p=since;p!=i_p;p+=p_size[RN_P_TYP(p)]) { if(xlat[p-since]==p && (q=ht_get(&ht_p,p))!=p) redir_p(); } while(changed) { changed=0; for(p=since;p!=i_p;p+=p_size[RN_P_TYP(p)]) { if(xlat[p-since]==p) { touched=0; switch(RN_P_TYP(p)) { case RN_P_NOT_ALLOWED: case RN_P_EMPTY: case RN_P_TEXT: case RN_P_DATA: case RN_P_VALUE: break; case RN_P_CHOICE: rn_Choice(p,p1,p2); goto BINARY; case RN_P_INTERLEAVE: rn_Interleave(p,p1,p2); goto BINARY; case RN_P_GROUP: rn_Group(p,p1,p2); goto BINARY; case RN_P_DATA_EXCEPT: rn_DataExcept(p,p1,p2); goto BINARY; BINARY: if(p2>=since && (q=xlat[p2-since])!=p2) { ht_deli(&ht_p,p); touched=1; rn_pattern[p+2]=q; } goto UNARY; case RN_P_ONE_OR_MORE: rn_OneOrMore(p,p1); goto UNARY; case RN_P_LIST: rn_List(p,p1); goto UNARY; case RN_P_ATTRIBUTE: rn_Attribute(p,nc,p1); goto UNARY; case RN_P_ELEMENT: rn_Element(p,nc,p1); goto UNARY; UNARY: if(p1>=since && (q=xlat[p1-since])!=p1) { if(!touched) ht_deli(&ht_p,p); touched=1; rn_pattern[p+1]=q; } break; default: assert(0); } if(touched) { changed=1; /* recursion through redirection */ if((q=ht_get(&ht_p,p))==-1) { ht_put(&ht_p,p); } else { redir_p(); } } } } } while(n_st--!=0) { if(*starts>=since) *starts=xlat[*starts-since]; ++starts; } m_free(xlat); } static void unmark_p(int since) { int p; for(p=since;p!=i_p;p+=p_size[RN_P_TYP(p)]) { if(rn_marked(p)) rn_unmark(p); else {ht_deli(&ht_p,p); erase(p);} } } static void compress_p(int *starts,int n_st,int since) { int p,psiz, p1,p2,nc, q,i_q, newlen_p; int *xlat=(int*)m_alloc(i_p-since,sizeof(int)); p=q=since; while(p!=i_p) { psiz=p_size[RN_P_TYP(p)]; if(erased(p)) { xlat[p-since]=-1; } else { ht_deli(&ht_p,p); xlat[p-since]=q; q+=psiz; } p+=psiz; } i_q=q; p=since; while(p!=i_p) { psiz=p_size[RN_P_TYP(p)]; /* rn_pattern[p] changes */ if(xlat[p-since]!=-1) { switch(RN_P_TYP(p)) { case RN_P_NOT_ALLOWED: case RN_P_EMPTY: case RN_P_TEXT: case RN_P_DATA: case RN_P_VALUE: break; case RN_P_CHOICE: rn_Choice(p,p1,p2); goto BINARY; case RN_P_INTERLEAVE: rn_Interleave(p,p1,p2); goto BINARY; case RN_P_GROUP: rn_Group(p,p1,p2); goto BINARY; case RN_P_DATA_EXCEPT: rn_DataExcept(p,p1,p2); goto BINARY; BINARY: if(p2>=since && (q=xlat[p2-since])!=p2) rn_pattern[p+2]=q; goto UNARY; case RN_P_ONE_OR_MORE: rn_OneOrMore(p,p1); goto UNARY; case RN_P_LIST: rn_List(p,p1); goto UNARY; case RN_P_ATTRIBUTE: rn_Attribute(p,nc,p1); goto UNARY; case RN_P_ELEMENT: rn_Element(p,nc,p1); goto UNARY; UNARY: if(p1>=since && (q=xlat[p1-since])!=p1) rn_pattern[p+1]=q; break; default: assert(0); } if((q=xlat[p-since])!=p) { int i; for(i=0;i!=psiz;++i) rn_pattern[q+i]=rn_pattern[p+i]; assert(q+psiz<i_p); } ht_put(&ht_p,q); } p+=psiz; } while(n_st--!=0) { if(*starts>=since) *starts=xlat[*starts-since]; ++starts; } m_free(xlat); if(i_q!=i_p) { i_p=i_q; newlen_p=i_p*2; if(len_p>P_AVG_SIZE*LIM_P&&newlen_p<len_p) { rn_pattern=(int*)m_stretch(rn_pattern, len_p=newlen_p>P_AVG_SIZE*LEN_P?newlen_p:P_AVG_SIZE*LEN_P, i_p,sizeof(int)); } } } void rn_compress(int *starts,int n_st) { int i; for(i=0;i!=n_st;++i) mark_p(starts[i],BASE_P); sweep_p(starts,n_st,BASE_P); unmark_p(BASE_P); compress_p(starts,n_st,BASE_P); } int rn_compress_last(int start) { mark_p(start,base_p); sweep_p(&start,1,base_p); unmark_p(base_p); compress_p(&start,1,base_p); return start; } --- NEW FILE: rvp.c --- /* $Id: rvp.c,v 1.1 2009/08/03 05:32:47 mike Exp $ */ /* validation pipe: synopsis rvp -qsdevh grammar.rnc reads from 0, writes to 1, 2 for grammar parse errors only, then redirected. -q switches to numerical error codes -s takes less space but more time -d plugs in an external type checker -e the argument is a Scheme program providing a datatype library -v displays version -h help message exit code: 0 on valid, non-zero on invalid protocol query ::= (start | quit | start-tag-open | attribute | start-tag-close | text | end-tag) z. quit ::= "quit". start ::= "start" [gramno]. start-tag-open ::= "start-tag-open" patno name. attribute ::= "attribute" patno name value. start-tag-close :: = "start-tag-close" patno name. text ::= ("text"|"mixed") patno text. end-tag ::= "end-tag" patno name. response ::= (ok | er | error) z. ok ::= "ok" patno. er ::= "er" patno erno. error ::= "error" patno erno error. z ::= "\0" . conventions: last colon in name separates namespace uri and local part -q?er:error error==0 yields message 'protocol error' and happens when a query is not understood start assumes gramno=0 if the argument is omitted */ #include <stdlib.h> #include <stdarg.h> #include <stdio.h> #include <fcntl.h> /*open,close*/ #include <sys/types.h> #include UNISTD_H /*open,read,close*/ #include <string.h> /*strerror*/ #include <setjmp.h> #include <errno.h> #include <assert.h> #include "m.h" #include "s.h" #include "erbit.h" #include "drv.h" #include "rnl.h" #include "rnv.h" #include "dxl.h" #include "dsl.h" #include "er.h" extern int rn_notAllowed, drv_compact, rx_compact; #define ATT 0 #define ENT 1 #define MIX 2 #define QUIT 3 #define START 4 #define STC 5 #define STO 6 #define TXT 7 #define NKWD 8 char *kwdtab[NKWD]={ "attribute", "end-tag", "mixed", "quit", "start", "start-tag-close", "start-tag-open", "text" }; #define OK "ok %u" #define ER "er %u" #define ERROR "error %u" #define LEN_B 1024 static FILE *nstderr; static int explain=1, lasterr, *starts, n_st; static int len_q,n_q; char *quebuf; static int erp[2]; /* *erp to read error messages */ static jmp_buf IOER; static void verror_handler(int erno,va_list ap) { lasterr=erno; rnv_default_verror_handler(erno&~ERBIT_RNV,ap); } static void verror_handler_rnv(int erno,va_list ap) {verror_handler(erno|ERBIT_RNV,ap);} static int initialized=0; static void init(void) { if(!initialized) {initialized=1; rnl_init(); rnv_init(); rnv_verror_handler=&verror_handler_rnv; drv_add_dtl(DXL_URL,&dxl_equal,&dxl_allows); drv_add_dtl(DSL_URL,&dsl_equal,&dsl_allows); quebuf=(char*)m_alloc(len_q=LEN_B,sizeof(char)); } } static int tok(int i) { for(;;) { switch(quebuf[i]) { case '\t': case '\n': case '\r': case ' ': break; default: return i; } ++i; } } static int endtok(int i) { for(;;) { switch(quebuf[i]) { case '\0': case '\t': case '\n': case '\r': case ' ': return i; default: break; } ++i; } } static void writeall(int fd,char *buf,int len) { int ofs=0; do { int n=write(fd,buf+ofs,len); if(n==-1) longjmp(IOER,1); ofs+=n; len-=n; } while(len); } static void resp(int ok,int patno,int prevno) { int len; static char buf[LEN_B]; char *f=(char*)(ok?OK:explain?ERROR:ER); len=sprintf(buf,f,patno); assert(len<LEN_B); writeall(1,buf,len); if(!ok) { len=sprintf(buf," %u",lasterr); assert(len<LEN_B); writeall(1,buf,len); if(explain) {buf[0]=' '; writeall(1,buf,1);} } for(;;) { /* read always, write if verbose */ len=read(erp[0],buf,LEN_B); if(len<0) {if(errno==EAGAIN) break; else longjmp(IOER,1);} if(len==0) break; if(!ok&&explain&&prevno!=rn_notAllowed) writeall(1,buf,len); } buf[0]='\0'; writeall(1,buf,1); } static int query(void) { int i,j,n,dn, kwd, patno,prevno, ok=0; char *name; n=0; for(;;) { if(n==n_q) { if(len_q-n_q<LEN_B) quebuf=(char*)m_stretch(quebuf,len_q=n_q+LEN_B,n_q,sizeof(char)); dn=read(0,quebuf+n_q,LEN_B); if(dn<0) longjmp(IOER,1); if(dn==0) {errno=EIO; longjmp(IOER,1);} n_q+=dn; } if(quebuf[n++]=='\0') break; } j=endtok(i=tok(0)); if((kwd=s_ntab(quebuf+i,j-i,kwdtab,NKWD))==QUIT) {resp(1,0,0); return 0;} switch(kwd) { case START: j=endtok((i=tok(j))); patno=0; while(i!=j) patno=patno*10+quebuf[i++]-'0'; if(patno>=n_st) goto PROTER; ok=1; patno=starts[patno]; break; case STO: case ATT: case STC: case TXT: case MIX: case ENT: j=endtok((i=tok(j))); if(i==j) goto PROTER; patno=0; do patno=patno*10+quebuf[i++]-'0'; while(i!=j); if(patno==0) goto PROTER; /* 0 is ERROR, not allowed */ switch(kwd) { case STO: case ATT: case STC: case ENT: j=endtok((i=tok(j))); if(i==j||(kwd==ATT&&quebuf[j]=='\0')) goto PROTER; name=quebuf+i; quebuf[j]='\0'; switch(kwd) { case STO: ok=rnv_start_tag_open(&patno,&prevno,name); break; case ATT: ok=rnv_attribute(&patno,&prevno,name,quebuf+j+1); break; case STC: ok=rnv_start_tag_close(&patno,&prevno,name); break; case ENT: ok=rnv_end_tag(&patno,&prevno,name); break; } break; case TXT: case MIX: if(quebuf[j]) ++j; i=j; while(quebuf[j]) ++j; ok=rnv_text(&patno,&prevno,quebuf+i,j-i,kwd==MIX); break; } break; case NKWD: PROTER: (*er_printf)("protocol error\n"); lasterr=0; patno=0; ok=0; break; default: assert(0); } resp(ok,patno,prevno); i=0; while(n!=n_q) quebuf[i++]=quebuf[n++]; n_q=i; return 1; } static void version(void) {(*er_printf)("rvp version %s\n",RVP_VERSION);} static void usage(void) {(*er_printf)("usage: rvp {-[qs" #if DXL_EXC "d" #endif #if DSL_SCM "e" #endif "vh?]} {schema.rnc}\n");} int main(int argc,char **argv) { int i, ok; init(); --argc; while(*(++argv)&&**argv=='-') { --argc; i=1; for(;;) { switch(*(*argv+i)) { case '\0': goto END_OF_OPTIONS; case 'h': case '?': usage(); return 0; case 'v': version(); break; case 's': drv_compact=1; rx_compact=1; break; #if DXL_EXC case 'd': dxl_cmd=*(argv+1); if(*(argv+1)) ++argv; goto END_OF_OPTIONS; #endif #if DSL_SCM case 'e': dsl_ld(*(argv+1)); if(*(argv+1)) ++argv; goto END_OF_OPTIONS; #endif case 'q': explain=0; break; default: (*er_printf)("unknown option '-%c'\n",*(*argv+i)); break; } ++i; } END_OF_OPTIONS:; } if(*argv==NULL) {usage(); return 1;} starts=(int*)m_alloc(argc,sizeof(int)); ok=1; n_st=0; do { ok=(starts[n_st++]=rnl_fn(*(argv++)))&&ok; } while(*argv); if(ok) { int fd2; nstderr=stderr; if(setjmp(IOER)) { fprintf(nstderr,"%s\n",strerror(errno)); return EXIT_FAILURE; } if((fd2=dup(2))==-1) longjmp(IOER,1); nstderr=fdopen(fd2,"w"); if(pipe(erp)==-1||dup2(erp[1],2)==-1) longjmp(IOER,1); fcntl(erp[0],F_SETFL,O_NONBLOCK); setbuf(stderr,NULL); while(query()); return EXIT_SUCCESS; } return EXIT_FAILURE; } --- NEW FILE: xsdck.c --- #include <string.h> #include <stdlib.h> #include "m.h" #include "er.h" #include "xsd.h" int main(int argc,char **argv) { xsd_init(); ++argv; --argc; if(!*argv) goto USAGE; if(strcmp(*argv,"equal")==0) { if(argc!=4) goto USAGE; return !xsd_equal(*(argv+1),*(argv+2),*(argv+3),strlen(*(argv+3))); } else if(strcmp(*argv,"allows")==0) { int len,i; char *ps,*p,*a; if(argc<3||!(argc&1)) goto USAGE; len=argc-2; for(i=2;i!=argc-1;++i) len+=strlen(*(argv+i)); ps=(char*)m_alloc(len,sizeof(char)); ps[len-1]='\0'; p=ps; for(i=2;i!=argc-1;++i) { a=*(argv+i); while((*(p++)=*(a++))); } return !xsd_allows(*(argv+1),ps,*(argv+argc-1),strlen(*(argv+argc-1))); } USAGE: (*er_printf)("xsdck: invalid arguments\n"); return 255; } --- NEW FILE: xsd.h --- /* $Id: xsd.h,v 1.1 2009/08/03 05:32:48 mike Exp $ */ #include <stdarg.h> #ifndef XSD_H #define XSD_H 1 #define XSD_ER_TYP 0 #define XSD_ER_PAR 1 #define XSD_ER_PARVAL 2 #define XSD_ER_VAL 3 #define XSD_ER_NPAT 4 #define XSD_ER_WS 5 #define XSD_ER_ENUM 6 extern void (*xsd_verror_handler)(int erno,va_list ap); extern void xsd_default_verror_handler(int erno,va_list ap); extern void xsd_init(void); extern void xsd_clear(void); extern int xsd_allows(char *typ,char *ps,char *s,int n); extern int xsd_equal(char *typ,char *val,char *s,int n); extern void xsd_test(void); #endif --- NEW FILE: xmlc.c --- /* $Id: xmlc.c,v 1.1 2009/08/03 05:32:48 mike Exp $ */ #include "u.h" #include "xmlc.h" /* sorted range arrays */ int BASE_CHAR[][2]={{0x41,0x5a},{0x61,0x7a},{0xc0,0xd6},{0xd8,0xf6},{0xf8,0xff},{0x100,0x131},{0x134,0x13e},{0x141,0x148},{0x14a,0x17e},{0x180,0x1c3},{0x1cd,0x1f0},{0x1f4,0x1f5},{0x1fa,0x217},{0x250,0x2a8},{0x2bb,0x2c1},{0x386,0x386},{0x388,0x38a},{0x38c,0x38c},{0x38e,0x3a1},{0x3a3,0x3ce},{0x3d0,0x3d6},{0x3da,0x3da},{0x3dc,0x3dc},{0x3de,0x3de},{0x3e0,0x3e0},{0x3e2,0x3f3},{0x401,0x40c},{0x40e,0x44f},{0x451,0x45c},{0x45e,0x481},{0x490,0x4c4},{0x4c7,0x4c8},{0x4cb,0x4cc},{0x4d0,0x4eb},{0x4ee,0x4f5},{0x4f8,0x4f9},{0x531,0x556},{0x559,0x559},{0x561,0x586},{0x5d0,0x5ea},{0x5f0,0x5f2},{0x621,0x63a},{0x641,0x64a},{0x671,0x6b7},{0x6ba,0x6be},{0x6c0,0x6ce},{0x6d0,0x6d3},{0x6d5,0x6d5},{0x6e5,0x6e6},{0x905,0x939},{0x93d,0x93d},{0x958,0x961},{0x985,0x98c},{0x98f,0x990},{0x993,0x9a8},{0x9aa,0x9b0},{0x9b2,0x9b2},{0x9b6,0x9b9},{0x9dc,0x9dd},{0x9df,0x9e1},{0x9f0,0x9f1},{0xa05,0xa0a},{0xa0f,0xa10},{0xa13,0xa28},{0xa2a,0xa30},{0xa32,0xa33},{0xa35,0xa36},{0xa38,0xa39},{0xa59,0xa5c},{0xa5e,0xa5e},{0xa72,0xa74},{0xa85,0xa8b},{0xad,0xa8d},{0xa8f,0xa91},{0xa93,0xaa8},{0xaaa,0xab0},{0xab2,0xab3},{0xab5,0xab9},{0xabd,0xabd},{0xae0,0xae0},{0xb05,0xb0c},{0xb0f,0xb10},{0xb13,0xb28},{0xb2a,0xb30},{0xb32,0xb33},{0xb36,0xb39},{0xb3d,0xb3d},{0xb5c,0xb5d},{0xb5f,0xb61},{0xb85,0xb8a},{0xb8e,0xb90},{0xb92,0xb95},{0xb99,0xb9a},{0xb9c,0xb9c},{0xb9e,0xb9f},{0xba3,0xba4},{0xba8,0xbaa},{0xbae,0xbb5},{0xbb7,0xbb9},{0xc05,0xc0c},{0xc0e,0xc10},{0xc12,0xc28},{0xc2a,0xc33},{0xc35,0xc39},{0xc60,0xc61},{0xc85,0xc8c},{0xc8e,0xc90},{0xc92,0xca8},{0xcaa,0xcb3},{0xcb5,0xcb9},{0xcde,0xcde},{0xce0,0xce1},{0xd05,0xd0c},{0xd0e,0xd10},{0xd12,0xd28},{0xd2a,0xd39},{0xd60,0xd61},{0xe01,0xe2e},{0xe30,0xe30},{0xe32,0xe33},{0xe40,0xe45},{0xe81,0xe82},{0xe84,0xe84},{0xe87,0xe88},{0xe8a,0xe8a},{0xe8d,0xe8d},{0xe94,0xe97},{0xe99,0xe9f},{0xea1,0xea3},{0xea5,0xea5},{0xea7,0xea7},{0xeaa,0xeab},{0xead,0xeae},{0xeb0,0xeb0},{0xeb2,0xeb3},{0xebd,0xebd},{0xec0,0xec4},{0xf40,0xf47},{0xf49,0xf69},{0x10a0,0x10c5},{0x10d0,0x10f6},{0x1100,0x1100},{0x1102,0x1103},{0x1105,0x1107},{0x1109,01109},{0x110b,0x110c},{0x110e,0x1112},{0x113c,0x113c},{0x113e,0x113e},{0x1140,0x1140},{0x114c,0x114c},{0x114e,0x114e},{0x1150,0x1150},{0x1154,0x1155},{0x1159,0x1159},{0x115f,0x1161},{0x1163,0x1163},{0x1165,0x1165},{0x1167,0x1167},{0x1169,0x1169},{0x116d,0x116e},{0x1172,0x1173},{0x1175,0x1175},{0x119e,0x119e},{0x11a8,0x11a8},{0x11ab,0x11ab},{0x11ae,0x11af},{0x11b7,0x11b8},{0x11ba,0x11ba},{0x11bc,0x11c2},{0x11eb,0x11eb},{0x11f0,0x11f0},{0x11f9,0x11f9},{0x1e00,0x1e9b},{0x1ea0,0x1ef9},{0x1f00,0x1f15},{0x1f18,0x1f1d},{0x1f20,0x1f45},{0x1f48,0x1f4d},{0x1f50,0x1f57},{0x1f59,0x1f59},{0x1f5b,0x1f5b},{0x1f5d,0x1f5d},{0x1f5f,0x1f7d},{0x1f80,0x1fb4},{0x1fb6,0x1fbc},{0x1fbe,0x1fbe},{0x1fc2,0x1fc4},{0x1fc6,0x1fcc},{0x1fd0,0x1fd3},{0x1fd6,0x1fdb},{0x1fe0,0x1fec},{0x1ff2,0x1ff4},{0x1ff6,0x1ffc},{0x2126,0x2126},{0x212a,0x212b},{0x212e,0x212e},{0x2180,0x2182},{0x3041,0x3094},{0x30a1,0x30fa},{0x3105,0x312c},{0xac00,0xd7a3}}; int IDEOGRAPHIC[][2]={{0x3007,0x3007},{0x3021,0x3029},{0x4e00,0x9fa5}}; int COMBINING_CHAR[][2]={{0x300,0x345},{0x360,0x361},{0x483,0x486},{0x591,0x5a1},{0x5a3,0x5b9},{0x5bb,0x5bd},{0x5bf,0x5bf},{0x5c1,0x5c2},{0x5c4,0x5c4},{0x64b,0x652},{0x670,0x670},{0x6d6,0x6dc},{0x6dd,0x6df},{0x6e0,0x6e4},{0x6e7,0x6e8},{0x6ea,0x6ed},{0x901,0x903},{0x93c,0x93c},{0x93e,0x94c},{0x94d,0x94d},{0x951,0x954},{0x962,0x963},{0x981,0x983},{0x9bc,0x9bc},{0x9be,0x9be},{0x9bf,0x9bf},{0x9c0,0x9c4},{0x9c7,0x9c8},{0x9cb,0x9cd},{0x9d7,0x9d7},{0x9e2,0x9e3},{0xa02,0xa02},{0xa3c,0xa3c},{0xa3e,0xa3e},{0xa3f,0xa3f},{0xa40,0xa42},{0xa47,0xa48},{0xa4b,0xa4d},{0xa70,0xa71},{0xa81,0xa83},{0xabc,0xabc},{0xabe,0xac5},{0xac7,0xac9},{0xacb,0xacd},{0xb01,0xb03},{0xb3c,0xb3c},{0xb3e,0xb43},{0xb47,0xb48},{0xb4b,0xb4d},{0xb56,0xb57},{0xb82,0xb83},{0xbbe,0xbc2},{0xbc6,0xbc8},{0xbca,0xbcd},{0xbd7,0xbd7},{0xc01,0xc03},{0xc3e,0xc44},{0xc46,0xc48},{0xc4a,0xc4d},{0xc55,0xc56},{0xc82,0xc83},{0xcbe,0xcc4},{0xcc6,0xcc8},{0xcca,0xccd},{0xcd5,0xcd6},{0xd02,0xd03},{0xd3e,0xd43},{0xd46,0xd48},{0xd4a,0xd4d},{0xd57,0xd57},{0xe31,0xe31},{0x34,0xe3a},{0xe47,0xe4e},{0xeb1,0xeb1},{0xeb4,0xeb9},{0xebb,0xebc},{0xec8,0xecd},{0xf18,0xf19},{0xf35,0xf35},{0xf37,0xf37},{0xf39,0xf39},{0xf3e,0xf3e},{0xf3f,0xf3f},{0xf71,0xf84},{0xf86,0xf8b},{0xf90,0xf95},{0xf97,0xf97},{0xf99,0xfad},{0xfb1,0xfb7},{0xfb9,0xfb9},{0x20d0,0x20dc},{0x20e1,0x20e1},{0x302a,0x302f},{0x3099,0x3099},{0x309a,0x309a}}; int DIGIT[][2]={{0x30,0x39},{0x660,0x669},{0x6f0,0x6f9},{0x966,0x96f},{0x9e6,0x9ef},{0xa66,0xa6f},{0xae6,0xaef},{0xb66,0xb6f},{0xbe7,0xbef},{0xc66,0xc6f},{0xce6,0xcef},{0xd66,0xd6f},{0xe50,0xe59},{0xed0,0xed9},{0xf20,0xf29}}; int EXTENDER[][2]={{0xb7,0xb7},{0x2d0,0x2d1},{0x387,0x387},{0x640,0x640},{0xe46,0xe46},{0xec6,0xec6},{0x3005,0x3005},{0x3031,0x3035},{0x309d,0x309e},{0x30fc,0x30fe}}; #define isa(u,CHAR_CLASS) u_in_ranges(u,CHAR_CLASS,sizeof(CHAR_CLASS)/sizeof(int([2]))) int xmlc_white_space(int u) {return u=='\t'||u=='\n'||u=='\r'||u==' ';} int xmlc_base_char(int u) {return isa(u,BASE_CHAR);} int xmlc_ideographic(int u) {return isa(u,IDEOGRAPHIC);} int xmlc_combining_char(int u) {return isa(u,COMBINING_CHAR);} int xmlc_digit(int u) {return isa(u,DIGIT);} int xmlc_extender(int u) {return isa(u,EXTENDER);} --- NEW FILE: rnl.c --- /* $Id: rnl.c,v 1.1 2009/08/03 05:32:47 mike Exp $ */ #include <stdarg.h> #include "erbit.h" #include "rn.h" #include "rnc.h" #include "rnd.h" #include "rnl.h" void rnl_default_verror_handler(int erno,va_list ap) { if(erno&ERBIT_RNC) { rnc_default_verror_handler(erno&~ERBIT_RNC,ap); } else if(erno&ERBIT_RND) { rnd_default_verror_handler(erno&~ERBIT_RND,ap); } } void (*rnl_verror_handler)(int er_no,va_list ap)=&rnl_default_verror_handler; static void verror_handler_rnc(int erno,va_list ap) {rnl_verror_handler(erno|ERBIT_RNC,ap);} static void verror_handler_rnd(int erno,va_list ap) {rnl_verror_handler(erno|ERBIT_RND,ap);} static int initialized=0; void rnl_init(void) { if(!initialized) { initialized=1; rn_init(); rnc_init(); rnc_verror_handler=&verror_handler_rnc; rnd_init(); rnd_verror_handler=&verror_handler_rnd; } } void rnl_clear(void) {} static int load(struct rnc_source *sp) { int start=-1; if(!rnc_errors(sp)) start=rnc_parse(sp); rnc_close(sp); if(!rnc_errors(sp)&&(start=rnd_fixup(start))) { start=rn_compress_last(start); } else start=0; return start; } int rnl_fn(char *fn) { struct rnc_source src; rnc_open(&src,fn); return load(&src); } int rnl_fd(char *fn,int fd) { struct rnc_source src; rnc_bind(&src,fn,fd); return load(&src); } int rnl_s(char *fn,char *s,int len) { struct rnc_source src; rnc_stropen(&src,fn,s,len); return load(&src); } --- NEW FILE: rnc.c --- /* $Id: rnc.c,v 1.1 2009/08/03 05:32:47 mike Exp $ */ #include <fcntl.h> /* open, close */ #include <sys/types.h> #include UNISTD_H /* open,read,close */ #include <string.h> /* memcpy,strlen,strcpy,strcat */ #include <errno.h> /*errno*/ #include <assert.h> /*assert*/ #include "u.h" #include "xmlc.h" #include "m.h" #include "s.h" /* s_clone */ #include "rn.h" #include "sc.h" #include "er.h" #include "rnc.h" #define NKWD 19 [...1156 lines suppressed...] rn_new_schema(); sc_open(&nss); add_well_known_nss(0); open_scope(sp); start=topLevel(sp); if(start!=-1) sc_add(&defs,0,start,0); if((i=sc_find(&defs,0))) { start=defs.tab[i][1]; } else { error(1,sp,RNC_ER_NOSTART,sp->fn,CUR(sp).line,CUR(sp).col); start=0; } close_scope(sp); sc_close(&nss); return start; } --- NEW FILE: src.txt --- # $Id: src.txt,v 1.1 2009/08/03 05:32:48 mike Exp $ # source files xcl.c -- Expat-based command-line interface arx.c -- document and type association utility ll.h -- lengths of arrays erbit.h -- error classes er.c er.h -- low-level error output functions rnv.c rnv.h -- higher-level validator logic rn.c rn.h -- RNG patterns rnc.c rnc.h -- RNC loader rnd.c rnd.h -- second pass for RNC loader, restrictions and traits rnx.c rnx.h -- auxiliary functions for rn* drv.c drv.h -- derivative of RNG patterns, validation core xsd.c xsd.h -- xml schema datatypes xsd_tm.c xsd_tm.h -- dateTime datatype implementation dxl.c dxl.h -- executable datatypes dsl.c dsl.h -- scheme datatypes sc.c sc.h -- scope tables for rnc ht.c ht.h -- hash table s.c s.h -- common string operations m.c m.h -- common memory operations xmlc.c xmlc.h -- xml character classifiers u.c u.h -- utf-8 rx.c rx.h -- regular expressions engine rx_cls_u.c rx_cls_ranges.c -- auto-generated tables; included by rx.c # hierarchy ... ht,s,m,xmlc,u,er -- used in many places xcl,arx,rvp . dsl . dxl . rnv ... erbit.h ... ll ... rn .... ll ... rnc .... sc ..... ll ... rnd ... rnx .... ll ... drv .... ll .... xsd ..... erbit.h ..... xsd_tm ..... rx ...... rx_cls_u ...... rx_cls_ranges ...... ll --- NEW FILE: rnd.h --- /* $Id: rnd.h,v 1.1 2009/08/03 05:32:47 mike Exp $ */ #include <stdarg.h> #ifndef RND_H #define RND_H 1 #define RND_ER_LOOPST 0 #define RND_ER_LOOPEL 1 #define RND_ER_CTYPE 2 #define RND_ER_BADSTART 3 #define RND_ER_BADMORE 4 #define RND_ER_BADEXPT 5 #define RND_ER_BADLIST 6 #define RND_ER_BADATTR 7 extern void (*rnd_verror_handler)(int er_no,va_list ap); extern void rnd_default_verror_handler(int erno,va_list ap); extern void rnd_init(void); extern void rnd_clear(void); extern int rnd_fixup(int start); #endif --- NEW FILE: rnl.h --- /* $Id: rnl.h,v 1.1 2009/08/03 05:32:47 mike Exp $ */ #ifndef RNL_H #define RNL_H 1 extern void (*rnl_verror_handler)(int er_no,va_list ap); extern void rnl_default_verror_handler(int erno,va_list ap); extern void rnl_init(void); extern void rnl_clear(void); extern int rnl_fn(char *fn); extern int rnl_fd(char *fn,int fd); extern int rnl_s(char *fn,char *s,int len); #endif --- NEW FILE: m.h --- /* $Id: m.h,v 1.1 2009/08/03 05:32:46 mike Exp $ */ #ifndef M_H #define M_H 1 extern void m_free(void *p); extern void *m_alloc(int length,int size); extern void *m_stretch(void *p,int newlen,int oldlen,int size); #endif --- NEW FILE: rnd.c --- /* $Id: rnd.c,v 1.1 2009/08/03 05:32:47 mike Exp $ */ #include <stdlib.h> #include <assert.h> #include "m.h" #include "rn.h" #include "rnx.h" #include "ll.h" #include "er.h" #include "rnd.h" #define LEN_F RND_LEN_F static int len_f,n_f; static int *flat; static int errors; #define err(msg) (*er_vprintf)("error: "msg"\n",ap) void rnd_default_verror_handler(int er_no,va_list ap) { switch(er_no) { case RND_ER_LOOPST: err("loop in start pattern"); break; case RND_ER_LOOPEL: err("loop in pattern for element '%s'"); break; case RND_ER_CTYPE: err("content of element '%s' does not have a content-type"); break; case RND_ER_BADSTART: err("bad path in start pattern"); break; case RND_ER_BADMORE: err("bad path before '*' or '+' in element '%s'"); break; case RND_ER_BADEXPT: err("bad path after '-' in element '%s'"); break; case RND_ER_BADLIST: err("bad path after 'list' in element '%s'"); break; case RND_ER_BADATTR: err("bad path in attribute '%s' of element '%s'"); break; default: assert(0); } } void (*rnd_verror_handler)(int er_no,va_list ap)=&rnd_default_verror_handler; static int initialized=0; void rnd_init(void) { if(!initialized) { rn_init(); initialized=1; } } void rnd_clear(void) {} static void error(int er_no,...) { va_list ap; va_start(ap,er_no); (*rnd_verror_handler)(er_no,ap); va_end(ap); ++errors; } static int de(int p) { int p0=p,p1; RN_P_CHK(p,RN_P_REF); for(;;) { rn_Ref(p,p1); if(!RN_P_IS(p1,RN_P_REF)||p1==p0) break; p=p1; } return p1; } static void flatten(int p) { if(!rn_marked(p)) {flat[n_f++]=p; rn_mark(p);}} static void deref(int start) { int p,p1,p2,nc,i,changed; flat=(int*)m_alloc(len_f=LEN_F,sizeof(int)); n_f=0; if(RN_P_IS(start,RN_P_REF)) start=de(start); flatten(start); i=0; do { p=flat[i++]; switch(RN_P_TYP(p)) { case RN_P_NOT_ALLOWED: case RN_P_EMPTY: case RN_P_TEXT: case RN_P_DATA: case RN_P_VALUE: break; case RN_P_CHOICE: rn_Choice(p,p1,p2); goto BINARY; case RN_P_INTERLEAVE: rn_Interleave(p,p1,p2); goto BINARY; case RN_P_GROUP: rn_Group(p,p1,p2); goto BINARY; case RN_P_DATA_EXCEPT: rn_DataExcept(p,p1,p2); goto BINARY; BINARY: changed=0; if(RN_P_IS(p1,RN_P_REF)) {p1=de(p1); changed=1;} if(RN_P_IS(p2,RN_P_REF)) {p2=de(p2); changed=1;} if(changed) {rn_del_p(p); rn_pattern[p+1]=p1; rn_pattern[p+2]=p2; rn_add_p(p);} if(n_f+2>len_f) flat=(int*)m_stretch(flat,len_f=2*(n_f+2),n_f,sizeof(int)); flatten(p1); flatten(p2); break; case RN_P_ONE_OR_MORE: rn_OneOrMore(p,p1); goto UNARY; case RN_P_LIST: rn_List(p,p1); goto UNARY; case RN_P_ATTRIBUTE: rn_Attribute(p,nc,p1); goto UNARY; case RN_P_ELEMENT: rn_Element(p,nc,p1); goto UNARY; UNARY: changed=0; if(RN_P_IS(p1,RN_P_REF)) {p1=de(p1); changed=1;} if(changed) {rn_del_p(p); rn_pattern[p+1]=p1; rn_add_p(p);} if(n_f+1>len_f) flat=(int*)m_stretch(flat,len_f=2*(n_f+1),n_f,sizeof(int)); flatten(p1); break; case RN_P_REF: /* because of a loop, but will be handled in rnd_loops */ break; default: assert(0); } } while(i!=n_f); for(i=0;i!=n_f;++i) rn_unmark(flat[i]); } static int loop(int p) { int nc,p1,p2,ret=1; if(rn_marked(p)) return 1; rn_mark(p); switch(RN_P_TYP(p)) { case RN_P_NOT_ALLOWED: case RN_P_EMPTY: case RN_P_TEXT: case RN_P_DATA: case RN_P_VALUE: case RN_P_ELEMENT: ret=0; break; case RN_P_CHOICE: rn_Choice(p,p1,p2); goto BINARY; case RN_P_INTERLEAVE: rn_Interleave(p,p1,p2); goto BINARY; case RN_P_GROUP: rn_Group(p,p1,p2); goto BINARY; case RN_P_DATA_EXCEPT: rn_DataExcept(p,p1,p2); goto BINARY; BINARY: ret=loop(p1)||loop(p2); break; case RN_P_ONE_OR_MORE: rn_OneOrMore(p,p1); goto UNARY; case RN_P_LIST: rn_List(p,p1); goto UNARY; case RN_P_ATTRIBUTE: rn_Attribute(p,nc,p1); goto UNARY; UNARY: ret=loop(p1); break; case RN_P_REF: ret=1; break; default: assert(0); } rn_unmark(p); return ret; } static void loops(void) { int i=0,p=flat[i],nc=-1,p1; for(;;) { if(loop(p)) { if(i==0) error(RND_ER_LOOPST); else { char *s=rnx_nc2str(nc); error(RND_ER_LOOPEL,s); m_free(s); } } for(;;) {++i; if(i==n_f) return; p=flat[i]; if(RN_P_IS(p,RN_P_ELEMENT)) { rn_Element(p,nc,p1); p=p1; break; } } } } static void ctype(int p) { int p1,p2,nc; if(!rn_contentType(p)) { switch(RN_P_TYP(p)) { case RN_P_NOT_ALLOWED: rn_setContentType(p,RN_P_FLG_CTE,0); break; case RN_P_EMPTY: rn_setContentType(p,RN_P_FLG_CTE,0); break; case RN_P_TEXT: rn_setContentType(p,RN_P_FLG_CTC,0); break; case RN_P_CHOICE: rn_Choice(p,p1,p2); ctype(p1); ctype(p2); rn_setContentType(p,rn_contentType(p1),rn_contentType(p2)); break; case RN_P_INTERLEAVE: rn_Interleave(p,p1,p2); ctype(p1); ctype(p2); if(rn_groupable(p1,p2)) rn_setContentType(p,rn_contentType(p1),rn_contentType(p2)); break; case RN_P_GROUP: rn_Group(p,p1,p2); ctype(p1); ctype(p2); if(rn_groupable(p1,p2)) rn_setContentType(p,rn_contentType(p1),rn_contentType(p2)); break; case RN_P_ONE_OR_MORE: rn_OneOrMore(p,p1); ctype(p1); if(rn_groupable(p1,p1)) rn_setContentType(p,rn_contentType(p1),0); break; case RN_P_LIST: rn_setContentType(p,RN_P_FLG_CTS,0); break; case RN_P_DATA: rn_setContentType(p,RN_P_FLG_CTS,0); break; case RN_P_DATA_EXCEPT: rn_DataExcept(p,p1,p2); ctype(p1); ctype(p2); if(rn_contentType(p2)) rn_setContentType(p,RN_P_FLG_CTS,0); break; case RN_P_VALUE: rn_setContentType(p,RN_P_FLG_CTS,0); break; case RN_P_ATTRIBUTE: rn_Attribute(p,nc,p1); ctype(p1); if(rn_contentType(p1)) rn_setContentType(p,RN_P_FLG_CTE,0); break; case RN_P_ELEMENT: rn_setContentType(p,RN_P_FLG_CTC,0); break; default: assert(0); } } } static void ctypes(void) { int i,p,p1,nc; for(i=0;i!=n_f;++i) { p=flat[i]; if(RN_P_IS(p,RN_P_ELEMENT)) { rn_Element(p,nc,p1); ctype(p1); if(!rn_contentType(p1)) { char *s=rnx_nc2str(nc); error(RND_ER_CTYPE,s); m_free(s); } } } } static int bad_start(int p) { int p1,p2; switch(RN_P_TYP(p)) { case RN_P_EMPTY: case RN_P_TEXT: case RN_P_INTERLEAVE: case RN_P_GROUP: case RN_P_ONE_OR_MORE: case RN_P_LIST: case RN_P_DATA: case RN_P_DATA_EXCEPT: case RN_P_VALUE: case RN_P_ATTRIBUTE: return 1; case RN_P_NOT_ALLOWED: case RN_P_ELEMENT: return 0; case RN_P_CHOICE: rn_Choice(p,p1,p2); return bad_start(p1)||bad_start(p2); default: assert(0); } return 1; } static int bad_data_except(int p) { int p1,p2; switch(RN_P_TYP(p)) { case RN_P_NOT_ALLOWED: case RN_P_VALUE: case RN_P_DATA: return 0; case RN_P_CHOICE: rn_Choice(p,p1,p2); goto BINARY; case RN_P_DATA_EXCEPT: rn_Choice(p,p1,p2); goto BINARY; BINARY: return bad_data_except(p1)||bad_data_except(p2); case RN_P_EMPTY: case RN_P_TEXT: case RN_P_INTERLEAVE: case RN_P_GROUP: case RN_P_ONE_OR_MORE: case RN_P_LIST: case RN_P_ATTRIBUTE: case RN_P_ELEMENT: return 1; default: assert(0); } return 1; } static int bad_one_or_more(int p,int in_group) { int nc,p1,p2; switch(RN_P_TYP(p)) { case RN_P_NOT_ALLOWED: case RN_P_EMPTY: case RN_P_TEXT: case RN_P_DATA: case RN_P_VALUE: case RN_P_ELEMENT: return 0; case RN_P_CHOICE: rn_Choice(p,p1,p2); goto BINARY; case RN_P_INTERLEAVE: rn_Interleave(p,p1,p2); in_group=1; goto BINARY; case RN_P_GROUP: rn_Group(p,p1,p2); in_group=1; goto BINARY; case RN_P_DATA_EXCEPT: rn_DataExcept(p,p1,p2); goto BINARY; BINARY: return bad_one_or_more(p1,in_group)||bad_one_or_more(p2,in_group); case RN_P_ONE_OR_MORE: rn_OneOrMore(p,p1); goto UNARY; case RN_P_LIST: rn_List(p,p1); goto UNARY; case RN_P_ATTRIBUTE: if(in_group) return 1; rn_Attribute(p,nc,p1); goto UNARY; UNARY: return bad_one_or_more(p1,in_group); default: assert(0); } return 1; } static int bad_list(int p) { int p1,p2; switch(RN_P_TYP(p)) { case RN_P_NOT_ALLOWED: case RN_P_EMPTY: case RN_P_DATA: case RN_P_VALUE: return 0; case RN_P_CHOICE: rn_Choice(p,p1,p2); goto BINARY; case RN_P_GROUP: rn_Group(p,p1,p2); goto BINARY; case RN_P_DATA_EXCEPT: rn_DataExcept(p,p1,p2); goto BINARY; BINARY: return bad_list(p1)||bad_list(p2); case RN_P_ONE_OR_MORE: rn_OneOrMore(p,p1); goto UNARY; case RN_P_LIST: rn_List(p,p1); goto UNARY; UNARY: return bad_list(p1); case RN_P_TEXT: case RN_P_INTERLEAVE: case RN_P_ATTRIBUTE: case RN_P_ELEMENT: return 1; default: assert(0); } return 1; } static int bad_attribute(int p) { int p1,p2; switch(RN_P_TYP(p)) { case RN_P_NOT_ALLOWED: case RN_P_EMPTY: case RN_P_TEXT: case RN_P_DATA: case RN_P_VALUE: return 0; case RN_P_CHOICE: rn_Choice(p,p1,p2); goto BINARY; case RN_P_INTERLEAVE: rn_Interleave(p,p1,p2); goto BINARY; case RN_P_GROUP: rn_Group(p,p1,p2); goto BINARY; case RN_P_DATA_EXCEPT: rn_DataExcept(p,p1,p2); goto BINARY; BINARY: return bad_attribute(p1)||bad_attribute(p2); case RN_P_ONE_OR_MORE: rn_OneOrMore(p,p1); goto UNARY; case RN_P_LIST: rn_List(p,p1); goto UNARY; UNARY: return bad_attribute(p1); case RN_P_ATTRIBUTE: case RN_P_ELEMENT: return 1; default: assert(0); } return 1; } static void path(int p,int nc) { int p1,p2,nc1; switch(RN_P_TYP(p)) { case RN_P_NOT_ALLOWED: case RN_P_EMPTY: case RN_P_TEXT: case RN_P_DATA: case RN_P_VALUE: case RN_P_ELEMENT: break; case RN_P_CHOICE: rn_Choice(p,p1,p2); goto BINARY; case RN_P_INTERLEAVE: rn_Interleave(p,p1,p2); goto BINARY; case RN_P_GROUP: rn_Group(p,p1,p2); goto BINARY; case RN_P_DATA_EXCEPT: rn_DataExcept(p,p1,p2); if(bad_data_except(p2)) {char *s=rnx_nc2str(nc); error(RND_ER_BADEXPT,s); m_free(s);} goto BINARY; BINARY: path(p1,nc); path(p2,nc); break; case RN_P_ONE_OR_MORE: rn_OneOrMore(p,p1); if(bad_one_or_more(p1,0)) {char *s=rnx_nc2str(nc); error(RND_ER_BADMORE,s); m_free(s);} goto UNARY; case RN_P_LIST: rn_List(p,p1); if(bad_list(p1)) {char *s=rnx_nc2str(nc); error(RND_ER_BADLIST,s); m_free(s);} goto UNARY; case RN_P_ATTRIBUTE: rn_Attribute(p,nc1,p1); if(bad_attribute(p1)) {char *s=rnx_nc2str(nc),*s1=rnx_nc2str(nc1); error(RND_ER_BADATTR,s1,s); m_free(s1); m_free(s);} goto UNARY; UNARY: path(p1,nc); break; default: assert(0); } } static void paths(void) { int i,p,p1,nc; if(bad_start(flat[0])) error(RND_ER_BADSTART); for(i=0;i!=n_f;++i) { p=flat[i]; if(RN_P_IS(p,RN_P_ELEMENT)) { rn_Element(p,nc,p1); path(p1,nc); } } } static void restrictions(void) { loops(); if(errors) return; /* loops can cause endless loops in subsequent calls */ ctypes(); paths(); } static void nullables(void) { int i,p,p1,p2,changed; do { changed=0; for(i=0;i!=n_f;++i) { p=flat[i]; if(!rn_nullable(p)) { switch(RN_P_TYP(p)) { case RN_P_NOT_ALLOWED: case RN_P_DATA: case RN_P_DATA_EXCEPT: case RN_P_VALUE: case RN_P_LIST: case RN_P_ATTRIBUTE: case RN_P_ELEMENT: break; case RN_P_CHOICE: rn_Choice(p,p1,p2); rn_setNullable(p,rn_nullable(p1)||rn_nullable(p2)); break; case RN_P_INTERLEAVE: rn_Interleave(p,p1,p2); rn_setNullable(p,rn_nullable(p1)&&rn_nullable(p2)); break; case RN_P_GROUP: rn_Group(p,p1,p2); rn_setNullable(p,rn_nullable(p1)&&rn_nullable(p2)); break; case RN_P_ONE_OR_MORE: rn_OneOrMore(p,p1); rn_setNullable(p,rn_nullable(p1)); break; default: assert(0); } changed=changed||rn_nullable(p); } } } while(changed); } static void cdatas(void) { int i,p,p1,p2,changed; do { changed=0; for(i=0;i!=n_f;++i) { p=flat[i]; if(!rn_cdata(p)) { switch(RN_P_TYP(p)) { case RN_P_NOT_ALLOWED: case RN_P_EMPTY: case RN_P_ATTRIBUTE: case RN_P_ELEMENT: break; case RN_P_CHOICE: rn_Choice(p,p1,p2); rn_setCdata(p,rn_cdata(p1)||rn_cdata(p2)); break; case RN_P_INTERLEAVE: rn_Interleave(p,p1,p2); rn_setCdata(p,rn_cdata(p1)||rn_cdata(p2)); break; case RN_P_GROUP: rn_Group(p,p1,p2); rn_setCdata(p,rn_cdata(p1)||rn_cdata(p2)); break; case RN_P_ONE_OR_MORE: rn_OneOrMore(p,p1); rn_setCdata(p,rn_cdata(p1)); break; default: assert(0); } changed=changed||rn_cdata(p); } } } while(changed); } static void traits(void) { nullables(); cdatas(); } static int release(void) { int start=flat[0]; m_free(flat); flat=NULL; return start; } int rnd_fixup(int start) { errors=0; deref(start); if(!errors) {restrictions(); if(!errors) traits();} start=release(); return errors?0:start; } --- NEW FILE: drv.h --- /* $Id: drv.h,v 1.1 2009/08/03 05:32:46 mike Exp $ */ #include <stdarg.h> #ifndef DRV_H #define DRV_H 1 #define DRV_ER_NODTL 0 extern void (*drv_verror_handler)(int erno,va_list ap); extern int drv_compact; extern void drv_default_verror_handler(int erno,va_list ap); extern void drv_init(void); extern void drv_clear(void); /* Expat passes character data unterminated. Hence functions that can deal with cdata expect the length of the data */ extern void drv_add_dtl(char *suri,int (*equal)(char *typ,char *val,char *s,int n),int (*allows)(char *typ,char *ps,char *s,int n)); extern int drv_start_tag_open(int p,char *suri,char *sname); extern int drv_start_tag_open_recover(int p,char *suri,char *sname); extern int drv_attribute_open(int p,char *suri,char *s); extern int drv_attribute_open_recover(int p,char *suri,char *s); extern int drv_attribute_close(int p); extern int drv_attribute_close_recover(int p); extern int drv_start_tag_close(int p); extern int drv_start_tag_close_recover(int p); extern int drv_text(int p,char *s,int n); extern int drv_text_recover(int p,char *s,int n); extern int drv_mixed_text(int p); extern int drv_mixed_text_recover(int p); extern int drv_end_tag(int p); extern int drv_end_tag_recover(int p); #endif --- NEW FILE: build_vms.com --- $! Replace these two strings to indicate where your expat install is located. $ expat_headers = "my_disk:[ref.c_include]" $ expat_olb = "my_disk:[olb]expat.olb" $ $ create vms.h $ deck #ifndef VMS_H #define VMS_H #define UNISTD_H <unistd.h> #define EXPAT_H "expat.h" #define RNV_VERSION "1.7.7" #define ARX_VERSION "1.7.7" #define RVP_VERSION "1.7.7" #endif /* VMS_H */ $ eod $ $ cc := cc/first_include=vms.h/incl='expat_headers' $ modules = "XCL,RNV,ARY,DRV,DSL,DXL,ER,HT,M,RN,RNC,RND,RNL,RNX," + - "RX,RX_CLS_RANGES,RX_CLS_U,S,SC,U,XMLC,XSD,XSD_TM" $ $ library/create/object rnv.olb $ count = 0 $loop: $ module = f$element (count, ",", modules) $ if module .eqs. "," $ then $ goto end_loop $ endif $ cc 'module' $ library/object/insert rnv.olb 'module' $ delete/nolog 'module'.obj;* $ count = count + 1 $ goto loop $end_loop: $ link/exe=rnv.exe rnv/lib/include=xcl,'expat_olb'/lib $ $! Now for the supporting cast... $ cc test $ link test,rnv/lib $ cc arx $ link arx,rnv/lib,'expat_olb'/lib $ cc rvp $ link rvp,rnv/lib $ purge/nolog *.exe $ purge/nolog *.olb $ delete/nolog vms.h;* $ exit --- NEW FILE: drv.c --- /* $Id: drv.c,v 1.1 2009/08/03 05:32:46 mike Exp $ */ #include "xmlc.h" /*xmlc_white_space*/ #include "m.h" #include "s.h" /*s_tokcmpn*/ #include "ht.h" #include "rn.h" #include "xsd.h" #include "ll.h" #include "erbit.h" #include "er.h" #include "drv.h" struct dtl { int uri; int (*equal)(char *typ,char *val,char *s,int n); int (*allows)(char *typ,char *ps,char *s,int n); }; #define LEN_DTL DRV_LEN_DTL #define LEN_M DRV_LEN_M #define PRIME_M DRV_PRIME_M #define LIM_M DRV_LIM_M #define M_SIZE 5 #define M_STO 0 #define M_STC 1 #define M_ATT 2 #define M_TXT 3 #define M_END 4 #define M_SET(p) memo[i_m][M_SIZE-1]=p #define M_RET(m) memo[m][M_SIZE-1] int drv_compact=0; static struct dtl *dtl; static int len_dtl,n_dtl; static int (*memo)[M_SIZE]; static int i_m,len_m; static struct hashtable ht_m; #define err(msg) (*er_vprintf)(msg"\n",ap); void drv_default_verror_handler(int erno,va_list ap) { if(erno&ERBIT_XSD) { xsd_default_verror_handler(erno&~ERBIT_XSD,ap); } else { switch(erno) { case DRV_ER_NODTL: err("no datatype library for URI '%s'"); break; default: assert(0); } } } void (*drv_verror_handler)(int erno,va_list ap)=&drv_default_verror_handler; static void error_handler(int erno,...) { va_list ap; va_start(ap,erno); (*drv_verror_handler)(erno,ap); va_end(ap); } static void verror_handler_xsd(int erno,va_list ap) {(*drv_verror_handler)(erno|ERBIT_XSD,ap);} static void new_memo(int typ) { if(drv_compact) ht_deli(&ht_m,i_m); memo[i_m][0]=typ; } static int equal_m(int m1,int m2) { int *me1=memo[m1],*me2=memo[m2]; return (me1[0]==me2[0])&&(me1[1]==me2[1])&&(me1[2]==me2[2])&&(me1[3]==me2[3]); } static int hash_m(int m) { int *me=memo[m]; return ((me[0]&0x7)|((me[1]^me[2]^me[3])<<3))*PRIME_M; } static int newStartTagOpen(int p,int uri,int name) { int *me=memo[i_m]; new_memo(M_STO); me[1]=p; me[2]=uri; me[3]=name; return ht_get(&ht_m,i_m); } static int newAttributeOpen(int p,int uri,int name) { int *me=memo[i_m]; new_memo(M_ATT); me[1]=p; me[2]=uri; me[3]=name; return ht_get(&ht_m,i_m); } static int newStartTagClose(int p) { int *me=memo[i_m]; new_memo(M_STC); me[1]=p; me[2]=me[3]=0; return ht_get(&ht_m,i_m); } static int newMixedText(int p) { int *me=memo[i_m]; new_memo(M_TXT); me[1]=p; me[2]=me[3]=0; return ht_get(&ht_m,i_m); } static int newEndTag(int p) { int *me=memo[i_m]; new_memo(M_END); me[1]=p; me[2]=me[3]=0; return ht_get(&ht_m,i_m); } static void accept_m(void) { if(ht_get(&ht_m,i_m)!=-1) { if(drv_compact) ht_del(&ht_m,i_m); else return; } ht_put(&ht_m,i_m++); if(drv_compact&&i_m==LIM_M) i_m=0; if(i_m==len_m) memo=(int(*)[M_SIZE])m_stretch(memo,len_m=2*i_m,i_m,sizeof(int[M_SIZE])); } static int fallback_equal(char *typ,char *val,char *s,int n) {return 1;} static int fallback_allows(char *typ,char *ps,char *s,int n) {return 1;} static int builtin_equal(char *typ,char *val,char *s,int n) { int dt=rn_newDatatype(0,typ-rn_string); if(dt==rn_dt_string) return s_cmpn(val,s,n)==0; else if(dt==rn_dt_token) return s_tokcmpn(val,s,n)==0; else assert(0); return 0; } static int builtin_allows(char *typ,char *ps,char *s,int n) {return 1;} static void windup(void); static int initialized=0; void drv_init(void) { if(!initialized) { initialized=1; rn_init(); xsd_init(); xsd_verror_handler=&verror_handler_xsd; memo=(int (*)[M_SIZE])m_alloc(len_m=LEN_M,sizeof(int[M_SIZE])); dtl=(struct dtl*)m_alloc(len_dtl=LEN_DTL,sizeof(struct dtl)); ht_init(&ht_m,LEN_M,&hash_m,&equal_m); windup(); } } static void windup(void) { i_m=0; n_dtl=0; drv_add_dtl(rn_string+0,&fallback_equal,&fallback_allows); /* guard at 0 */ drv_add_dtl(rn_string+0,&builtin_equal,&builtin_allows); drv_add_dtl(rn_string+rn_xsd_uri,&xsd_equal,&xsd_allows); } void drv_clear(void) { ht_clear(&ht_m); windup(); } void drv_add_dtl(char *suri,int (*equal)(char *typ,char *val,char *s,int n),int (*allows)(char *typ,char *ps,char *s,int n)) { if(n_dtl==len_dtl) dtl=(struct dtl *)m_stretch(dtl,len_dtl=n_dtl*2,n_dtl,sizeof(struct dtl)); dtl[n_dtl].uri=rn_newString(suri); dtl[n_dtl].equal=equal; dtl[n_dtl].allows=allows; ++n_dtl; } static struct dtl *getdtl(int uri) { int i; dtl[0].uri=uri; i=n_dtl; while(dtl[--i].uri!=uri); if(i==0) error_handler(DRV_ER_NODTL,rn_string+uri); return dtl+i; } static int ncof(int nc,int uri,int name) { int uri2,name2,nc1,nc2; switch(RN_NC_TYP(nc)) { case RN_NC_QNAME: rn_QName(nc,uri2,name2); return uri2==uri&&name2==name; case RN_NC_NSNAME: rn_NsName(nc,uri2); return uri2==uri; case RN_NC_ANY_NAME: return 1; case RN_NC_EXCEPT: rn_NameClassExcept(nc,nc1,nc2); return ncof(nc1,uri,name)&&!ncof(nc2,uri,name); case RN_NC_CHOICE: rn_NameClassChoice(nc,nc1,nc2); return ncof(nc1,uri,name)||ncof(nc2,uri,name); default: assert(0); } return 0; } static int apply_after(int (*f)(int q1,int q2),int p1,int p0) { int p11,p12; switch(RN_P_TYP(p1)) { case RN_P_NOT_ALLOWED: case RN_P_EMPTY: case RN_P_TEXT: case RN_P_INTERLEAVE: case RN_P_GROUP: case RN_P_ONE_OR_MORE: case RN_P_LIST: case RN_P_DATA: case RN_P_DATA_EXCEPT: case RN_P_VALUE: case RN_P_ATTRIBUTE: case RN_P_ELEMENT: return rn_notAllowed; case RN_P_CHOICE: rn_Choice(p1,p11,p12); return rn_choice(apply_after(f,p11,p0),apply_after(f,p12,p0)); case RN_P_AFTER: rn_After(p1,p11,p12); return rn_after(p11,(*f)(p12,p0)); default: assert(0); } return 0; } static int start_tag_open(int p,int uri,int name,int recover) { int nc,p1,p2,m,ret=0; if(!recover) { m=newStartTagOpen(p,uri,name); if(m!=-1) return M_RET(m); } switch(RN_P_TYP(p)) { case RN_P_NOT_ALLOWED: case RN_P_EMPTY: case RN_P_TEXT: case RN_P_LIST: case RN_P_DATA: case RN_P_DATA_EXCEPT: case RN_P_VALUE: case RN_P_ATTRIBUTE: ret=rn_notAllowed; break; case RN_P_CHOICE: rn_Choice(p,p1,p2); ret=rn_choice(start_tag_open(p1,uri,name,recover),start_tag_open(p2,uri,name,recover)); break; case RN_P_ELEMENT: rn_Element(p,nc,p1); ret=ncof(nc,uri,name)?rn_after(p1,rn_empty):rn_notAllowed; break; case RN_P_INTERLEAVE: rn_Interleave(p,p1,p2); ret=rn_choice( apply_after(&rn_ileave,start_tag_open(p1,uri,name,recover),p2), apply_after(&rn_ileave,start_tag_open(p2,uri,name,recover),p1)); break; case RN_P_GROUP: rn_Group(p,p1,p2); { int p11=apply_after(&rn_group,start_tag_open(p1,uri,name,recover),p2); ret=(rn_nullable(p1)||recover)?rn_choice(p11,start_tag_open(p2,uri,name,recover)):p11; } break; case RN_P_ONE_OR_MORE: rn_OneOrMore(p,p1); ret=apply_after(&rn_group,start_tag_open(p1,uri,name,recover),rn_choice(p,rn_empty)); break; case RN_P_AFTER: rn_After(p,p1,p2); ret=apply_after(&rn_after,start_tag_open(p1,uri,name,recover),p2); break; default: assert(0); } if(!recover) { newStartTagOpen(p,uri,name); M_SET(ret); accept_m(); } return ret; } int drv_start_tag_open(int p,char *suri,char *sname) {return start_tag_open(p,rn_newString(suri),rn_newString(sname),0);} int drv_start_tag_open_recover(int p,char *suri,char *sname) {return start_tag_open(p,rn_newString(suri),rn_newString(sname),1);} static int puorg_rn(int p2,int p1) {return rn_group(p1,p2);} static int attribute_open(int p,int uri,int name) { int nc,p1,p2,m,ret=0; m=newAttributeOpen(p,uri,name); if(m!=-1) return M_RET(m); switch(RN_P_TYP(p)) { case RN_P_NOT_ALLOWED: case RN_P_EMPTY: case RN_P_TEXT: case RN_P_LIST: case RN_P_DATA: case RN_P_DATA_EXCEPT: case RN_P_VALUE: case RN_P_ELEMENT: ret=rn_notAllowed; break; case RN_P_CHOICE: rn_Choice(p,p1,p2); ret=rn_choice(attribute_open(p1,uri,name),attribute_open(p2,uri,name)); break; case RN_P_ATTRIBUTE: rn_Attribute(p,nc,p1); ret=ncof(nc,uri,name)?rn_after(p1,rn_empty):rn_notAllowed; break; case RN_P_INTERLEAVE: rn_Interleave(p,p1,p2); ret=rn_choice( apply_after(&rn_ileave,attribute_open(p1,uri,name),p2), apply_after(&rn_ileave,attribute_open(p2,uri,name),p1)); break; case RN_P_GROUP: rn_Group(p,p1,p2); ret=rn_choice( apply_after(&rn_group,attribute_open(p1,uri,name),p2), apply_after(&puorg_rn,attribute_open(p2,uri,name),p1)); break; case RN_P_ONE_OR_MORE: rn_OneOrMore(p,p1); ret=apply_after(&rn_group,attribute_open(p1,uri,name),rn_choice(p,rn_empty)); break; case RN_P_AFTER: rn_After(p,p1,p2); ret=apply_after(&rn_after,attribute_open(p1,uri,name),p2); break; default: assert(0); } newAttributeOpen(p,uri,name); M_SET(ret); accept_m(); return ret; } int drv_attribute_open(int p,char *suri,char *sname) {return attribute_open(p,rn_newString(suri),rn_newString(sname));} int drv_attribute_open_recover(int p,char *suri,char *sname) {return p;} extern int drv_attribute_close(int p) {return drv_end_tag(p);} extern int drv_attribute_close_recover(int p) {return drv_end_tag_recover(p);} static int start_tag_close(int p,int recover) { int p1,p2,ret=0,m; if(!recover) { m=newStartTagClose(p); if(m!=-1) return M_RET(m); } switch(RN_P_TYP(p)) { case RN_P_NOT_ALLOWED: case RN_P_EMPTY: case RN_P_TEXT: case RN_P_LIST: case RN_P_DATA: case RN_P_DATA_EXCEPT: case RN_P_VALUE: case RN_P_ELEMENT: ret=p; break; case RN_P_CHOICE: rn_Choice(p,p1,p2); ret=rn_choice(start_tag_close(p1,recover),start_tag_close(p2,recover)); break; case RN_P_INTERLEAVE: rn_Interleave(p,p1,p2); ret=rn_ileave(start_tag_close(p1,recover),start_tag_close(p2,recover)); break; case RN_P_GROUP: rn_Group(p,p1,p2); ret=rn_group(start_tag_close(p1,recover),start_tag_close(p2,recover)); break; case RN_P_ONE_OR_MORE: rn_OneOrMore(p,p1); ret=rn_one_or_more(start_tag_close(p1,recover)); break; case RN_P_ATTRIBUTE: ret=recover?rn_empty:rn_notAllowed; break; case RN_P_AFTER: rn_After(p,p1,p2); ret=rn_after(start_tag_close(p1,recover),p2); break; default: assert(0); } if(!recover) { newStartTagClose(p); M_SET(ret); accept_m(); } return ret; } int drv_start_tag_close(int p) {return start_tag_close(p,0);} int drv_start_tag_close_recover(int p) {return start_tag_close(p,1);} static int text(int p,char *s,int n); static int list(int p,char *s,int n) { char *end=s+n,*sp; for(;;) { while(s!=end&&xmlc_white_space(*s)) ++s; sp=s; while(sp!=end&&!xmlc_white_space(*sp)) ++sp; if(s==end) break; p=text(p,s,sp-s); s=sp; } return p; } static int text(int p,char *s,int n) { /* matches text, including whitespace */ int p1,p2,dt,ps,lib,typ,val,ret=0; switch(RN_P_TYP(p)) { case RN_P_NOT_ALLOWED: case RN_P_EMPTY: case RN_P_ATTRIBUTE: case RN_P_ELEMENT: ret=rn_notAllowed; break; case RN_P_TEXT: ret=p; break; case RN_P_AFTER: rn_After(p,p1,p2); ret=rn_after(text(p1,s,n),p2); break; case RN_P_CHOICE: rn_Choice(p,p1,p2); ret=rn_choice(text(p1,s,n),text(p2,s,n)); break; case RN_P_INTERLEAVE: rn_Interleave(p,p1,p2); ret=rn_choice(rn_ileave(text(p1,s,n),p2),rn_ileave(p1,text(p2,s,n))); break; case RN_P_GROUP: rn_Group(p,p1,p2); { int p11=rn_group(text(p1,s,n),p2); ret=rn_nullable(p1)?rn_choice(p11,text(p2,s,n)):p11; } break; case RN_P_ONE_OR_MORE: rn_OneOrMore(p,p1); ret=rn_group(text(p1,s,n),rn_choice(p,rn_empty)); break; case RN_P_LIST: rn_List(p,p1); ret=rn_nullable(list(p1,s,n))?rn_empty:rn_notAllowed; break; case RN_P_DATA: rn_Data(p,dt,ps); rn_Datatype(dt,lib,typ); ret=getdtl(lib)->allows(rn_string+typ,rn_string+ps,s,n)?rn_empty:rn_notAllowed; break; case RN_P_DATA_EXCEPT: rn_DataExcept(p,p1,p2); ret=text(p1,s,n)==rn_empty&&!rn_nullable(text(p2,s,n))?rn_empty:rn_notAllowed; break; case RN_P_VALUE: rn_Value(p,dt,val); rn_Datatype(dt,lib,typ); ret=getdtl(lib)->equal(rn_string+typ,rn_string+val,s,n)?rn_empty:rn_notAllowed; break; default: assert(0); } return ret; } static int textws(int p,char *s,int n) { int p1=text(p,s,n),ws=1; char *end=s+n; while(s!=end) {if(!xmlc_white_space(*s)) {ws=0; break;} ++s;} return ws?rn_choice(p,p1):p1; } int drv_text(int p,char *s,int n) {return textws(p,s,n);} int drv_text_recover(int p,char *s,int n) {return p;} static int mixed_text(int p) { /* matches text in mixed context */ int p1,p2,ret=0,m; m=newMixedText(p); if(m!=-1) return M_RET(m); switch(RN_P_TYP(p)) { case RN_P_NOT_ALLOWED: case RN_P_EMPTY: case RN_P_ATTRIBUTE: case RN_P_ELEMENT: case RN_P_LIST: case RN_P_DATA: case RN_P_DATA_EXCEPT: case RN_P_VALUE: ret=rn_notAllowed; break; case RN_P_TEXT: ret=p; break; case RN_P_AFTER: rn_After(p,p1,p2); ret=rn_after(mixed_text(p1),p2); break; case RN_P_CHOICE: rn_Choice(p,p1,p2); ret=rn_choice(mixed_text(p1),mixed_text(p2)); break; case RN_P_INTERLEAVE: rn_Interleave(p,p1,p2); ret=rn_choice(rn_ileave(mixed_text(p1),p2),rn_ileave(p1,mixed_text(p2))); break; case RN_P_GROUP: rn_Group(p,p1,p2); { int p11=rn_group(mixed_text(p1),p2); ret=rn_nullable(p1)?rn_choice(p11,mixed_text(p2)):p11; } break; case RN_P_ONE_OR_MORE: rn_OneOrMore(p,p1); ret=rn_group(mixed_text(p1),rn_choice(p,rn_empty)); break; default: assert(0); } newMixedText(p); M_SET(ret); accept_m(); return ret; } int drv_mixed_text(int p) {return mixed_text(p);} int drv_mixed_text_recover(int p) {return p;} static int end_tag(int p,int recover) { int p1,p2,ret=0,m; if(!recover) { m=newEndTag(p); if(m!=-1) return M_RET(m); } switch(RN_P_TYP(p)) { case RN_P_NOT_ALLOWED: case RN_P_EMPTY: case RN_P_TEXT: case RN_P_INTERLEAVE: case RN_P_GROUP: case RN_P_ONE_OR_MORE: case RN_P_LIST: case RN_P_DATA: case RN_P_DATA_EXCEPT: case RN_P_VALUE: case RN_P_ATTRIBUTE: case RN_P_ELEMENT: ret=rn_notAllowed; break; case RN_P_CHOICE: rn_Choice(p,p1,p2); ret=rn_choice(end_tag(p1,recover),end_tag(p2,recover)); break; case RN_P_AFTER: rn_After(p,p1,p2); ret=(rn_nullable(p1)||recover)?p2:rn_notAllowed; break; default: assert(0); } if(!recover) { newEndTag(p); M_SET(ret); accept_m(); } return ret; } int drv_end_tag(int p) {return end_tag(p,0);} int drv_end_tag_recover(int p) {return end_tag(p,1);} --- NEW FILE: dsl.h --- /* $Id: dsl.h,v 1.1 2009/08/03 05:32:46 mike Exp $ */ #ifndef DSL_H #define DSL_H 1 #ifndef DSL_SCM #define DSL_SCM 0 #endif #define DSL_URL "http://davidashen.net/relaxng/scheme-datatypes" extern void dsl_ld(char *dl); extern int dsl_allows(char *typ,char *ps,char *s,int n); extern int dsl_equal(char *typ,char *val,char *s,int n); #endif --- NEW FILE: m.c --- /* $Id: m.c,v 1.1 2009/08/03 05:32:46 mike Exp $ */ #include <stdlib.h> #include <string.h> #include "er.h" #include "m.h" #ifndef M_STATIC #define M_STATIC 0 #endif #if M_STATIC #ifndef M_FILL #define M_FILL '\0' #endif static char memory[M_STATIC]; static char *mp=memory,*pmp=memory; void m_free(void *p) { if(p==pmp) { mp=pmp; pmp=(char*)-1; } } void *m_alloc(int length,int size) { char *p=mp, *q=mp; int n=length*size; pmp=mp; mp+=(n+sizeof(int)-1)/sizeof(int)*sizeof(int); if(mp>=memory+M_STATIC) { (*er_printf)("failed to allocate %i bytes of memory\n",length*size); exit(1); } if(M_FILL!=-1) while(q!=mp) *(q++)=M_FILL; return (char*)p; } #else void m_free(void *p) { free(p); } void *m_alloc(int length,int size) { void *p=malloc(length*size); if(p==NULL) { (*er_printf)("failed to allocate %i bytes of memory\n",length*size); exit(1); } return p; } #endif void *m_stretch(void *p,int newlen,int oldlen,int size) { void *newp=m_alloc(newlen,size); memcpy(newp,p,oldlen*size); m_free(p); return newp; } --- NEW FILE: Makefile.gnu --- VERSION=1.7.8 CC=cc # optional features M_STATIC=0 M_FILL=0 DSL_SCM=0 DXL_EXC=0 EXPAT_H="<expat.h>" UNISTD_H="<unistd.h>" SCM_H="<scm/scm.h>" INC=-I/usr/local/include ${CPPFLAGS} LBL=-L/usr/local/lib ${LDFLAGS} DEF=\ -DM_STATIC=${M_STATIC} \ -DM_FILL=${M_FILL} \ -DEXPAT_H=${EXPAT_H} \ -DUNISTD_H=${UNISTD_H} \ -DRNV_VERSION="\"${VERSION}\"" \ -DARX_VERSION="\"${VERSION}\"" \ -DRVP_VERSION="\"${VERSION}\"" WARN=-Wall -Wstrict-prototypes -Wmissing-prototypes -Wcast-align OPT=-O -g CFLAGS=${INC} ${DEF} ${WARN} ${OPT} LFLAGS=${OPT} ${LBL} LIBEXPAT=-lexpat LIB_SCM=-lscm -lm \ `sh -c '[ -f /usr/lib/libdl.a ] && echo -ldl \ ; [ -f /usr/lib/libsocket.a ] && echo -lsocket \ '` LIB=${LIBEXPAT} ifeq (${DSL_SCM},1) DEF+=-DDSL_SCM=${DSL_SCM} -DSCM_H=${SCM_H} LIB+=${LIB_SCM} endif ifeq (${DXL_EXC},1) DEF+=-DDXL_EXC=${DXL_EXC} endif LIBRNVA=librnv.a LIBRNVSO=librnv.so LIBRNV=${LIBRNVA} SRC=\ ll.h \ erbit.h \ xcl.c \ arx.c \ rvp.c \ xsdck.c \ test.c \ ary.c ary.h \ rn.c rn.h \ rnc.c rnc.h \ rnd.c rnd.h \ rnl.c rnl.h \ rnv.c rnv.h \ rnx.c rnx.h \ drv.c drv.h \ xsd.c xsd.h \ xsd_tm.c xsd_tm.h \ dxl.c dxl.h \ dsl.c dsl.h \ sc.c sc.h \ ht.c ht.h \ er.c er.h \ u.c u.h \ xmlc.c xmlc.h \ s.c s.h \ m.c m.h \ rx.c rx.h \ rx_cls_u.c \ rx_cls_ranges.c OBJ=\ rn.o \ rnc.o \ rnd.o \ rnl.o \ rnv.o \ rnx.o \ drv.o \ ary.o \ xsd.o \ xsd_tm.o \ dxl.o \ dsl.o \ sc.o \ u.o \ ht.o \ er.o \ xmlc.o \ s.o \ m.o \ rx.o .SUFFIXES: .c .o .c.o: ${CC} ${CFLAGS} -c -o $@ $< all: rnv arx rvp xsdck test rnv: xcl.o ${LIBRNV} ${CC} ${LFLAGS} -o rnv xcl.o ${LIBRNV} ${LIB} arx: arx.o ${LIBRNV} ${CC} ${LFLAGS} -o arx arx.o ${LIBRNV} ${LIB} rvp: rvp.o ${LIBRNV} ${CC} ${LFLAGS} -o rvp rvp.o ${LIBRNV} ${LIB} xsdck: xsdck.o ${LIBRNV} ${CC} ${LFLAGS} -o xsdck xsdck.o ${LIBRNV} ${LIB} test: test.o ${LIBRNV} ${CC} ${LFLAGS} -o test test.o ${LIBRNV} ${LIB} ${LIBRNVA}: ${OBJ} ar rc $@ ${OBJ} ranlib ${LIBRNVA} ${LIBRNVSO}: ${OBJ} gcc -shared -o $@ ${OBJ} depend: ${SRC} makedepend -Y ${DEF} ${SRC} clean: -rm -f *.o tst/c/*.o *.a *.so rnv arx rnd_test *_test *.core *.gmon *.gprof rnv*.zip rnv.txt rnv.pdf rnv.html rnv.xml rnd_test: ${LIBRNV} tst/c/rnd_test.c ${CC} ${LFLAGS} -I. -o rnd_test tst/c/rnd_test.c ${LIBRNV} ${LIB} --- NEW FILE: Makefile.bcc --- # $Id: Makefile.bcc,v 1.1 2009/08/03 05:32:45 mike Exp $ # VERSION=1.7.7 CC=bcc32 EXPAT_H="<expat.h>" UNISTD_H="<io.h>" INC=-I. -Ic:\\expat\\source\\lib -Ic:\\borland\\bcc55\\include LBL=-L. -Lc:\\borland\\bcc55\\lib DEF=-DEXPAT_H=${EXPAT_H} -DUNISTD_H=${UNISTD_H} -DRNV_VERSION="\"${VERSION}\"" -DARX_VERSION="\"${VERSION}\"" WARN=-w-pia- -w-par- -w-aus- -w-ccc- OPT=-O2 CFLAGS=${INC} ${DEF} ${WARN} ${OPT} LFLAGS=${OPT} ${LBL} LIBEXPAT=libexpats_mtd.lib LIB=${LIBEXPAT} SRC=\ ll.h \ erbit.h \ xcl.c \ arx.c \ test.c \ ary.c ary.h \ rn.c rn.h \ rnc.c rnc.h \ rnd.c rnd.h \ rnl.c rnc.h \ rnv.c rnv.h \ rnx.c rnx.h \ drv.c drv.h \ xsd.c xsd.h \ dsl.c dsl.h \ dxl.c dxl.h \ xsd_tm.c xsd_tm.h \ sc.c sc.h \ ht.c ht.h \ er.c er.h \ u.c u.h \ xmlc.c xmlc.h \ s.c s.h \ m.c m.h \ rx.c rx.h \ rx_cls_u.c \ rx_cls_ranges.c OBJ=\ rn.obj \ rnc.obj \ rnd.obj \ rnl.obj \ rnv.obj \ rnx.obj \ drv.obj \ ary.obj \ xsd.obj \ dsl.obj \ dxl.obj \ xsd_tm.obj \ sc.obj \ ht.obj \ er.obj \ u.obj \ xmlc.obj \ s.obj \ m.obj \ rx.obj .SUFFIXES: .c .obj .c.obj: ${CC} ${CFLAGS} -c -o$@ $< all: rnv.exe arx.exe test.exe rnv.exe: xcl.obj ${OBJ} ${CC} ${LFLAGS} -ernv.exe xcl.obj ${OBJ} ${LIB} arx.exe: arx.obj ${OBJ} ${CC} ${LFLAGS} -earx.exe arx.obj ${OBJ} ${LIB} test.exe: test.obj ${OBJ} ${CC} ${LFLAGS} -etest.exe test.obj ${OBJ} ${LIB} clean: -rm -f *.obj *.exe -del *.obj *.exe xcl.obj: m.h erbit.h rnl.h rnv.h rnx.h er.h ll.h arx.obj: u.h m.h s.h xmlc.h ht.h erbit.h rnl.h rnv.h rx.h er.h ary.h ary.obj: rn.h ary.h rn.obj: m.h s.h ht.h ll.h rn.h rnc.obj: u.h xmlc.h m.h s.h rn.h sc.h er.h rnc.h rnd.obj: m.h rn.h rnx.h ll.h er.h rnd.h rnl.obj: erbit.h rn.h rnc.h rnd.h rnl.h rnv.obj: m.h xmlc.h erbit.h drv.h er.h rnv.h rnx.obj: m.h s.h rn.h ll.h rnx.h drv.obj: xmlc.h m.h s.h ht.h rn.h xsd.h ll.h erbit.h er.h drv.h xsd.obj: u.h xmlc.h s.h erbit.h rx.h xsd_tm.h er.h xsd.h xsd_tm.obj: xsd_tm.h dxl.obj: m.h er.h dxl.h dsl.obj: dsl.h sc.obj: m.h ll.h sc.h ht.obj: m.h ht.h er.obj: er.h u.obj: u.h xmlc.obj: u.h xmlc.h s.obj: xmlc.h m.h s.h m.obj: er.h m.h rx.obj: u.h xmlc.h m.h s.h ht.h ll.h er.h rx.h rx_cls_u.c rx_cls_ranges.c --- NEW FILE: dsl.c --- /* $Id: dsl.c,v 1.1 2009/08/03 05:32:46 mike Exp $ */ #include <stdlib.h> #include "dsl.h" static char *dsl_scm=NULL; #if DSL_SCM #include <string.h> #include <assert.h> #include UNISTD_H #include SCM_H #include "m.h" #include "er.h" /* simple rules better */ static char *implpath(void) { char *path=getenv("SCM_INIT_PATH"); return path&&access(path,R_OK)!=-1?path:(char*)IMPLINIT; } static void init_user_scm_dsl(void) {} static SCM toplvl(void) {return MAKINUM(scm_ldfile(dsl_scm));} void dsl_ld(char *dl) { assert(dsl_scm==NULL); dsl_scm=dl; init_user_scm=&init_user_scm_dsl; { char *argv[]={NULL,NULL}; argv[0]=dsl_scm; /*Init.scm wants args*/ scm_init_from_argv(sizeof(argv)/sizeof(char*)-1,argv,0,0,0); } if(MAKINUM(0)!=scm_top_level(implpath(),&toplvl)) { (*er_printf)("dsl: cannot load %s\n",dsl_scm); dsl_scm=NULL; } } /* these are parsed with shere macro, not used with sprintf */ #define ALLOWS "(dsl-allows? \"%s\" '(%s) \"%s\")" #define PARAM "(\"%s\".\"%s\")" #define EQUAL "(dsl-equal? \"%s\" \"%s\" \"%s\")" static int strnesc(char *d,char *s,int n) { char *t=d; while(n--!=0) {if(*s=='\\'||*s=='\"') *(t++)='\\'; *(t++)=*(s++);} *t=0; return t-d; } static int stresc(char *d,char *s) {return strnesc(d,s,strlen(s));} #define shere(bp,sp) while(!((*(bp++)=*(sp++))=='%'&&(*(bp++)=*(sp++))=='s')); bp-=2; int dsl_allows(char *typ,char *ps,char *s,int n) { char *buf,*sp,*bp, *p; int np,lenp; SCM ret=BOOL_F; if(dsl_scm) { p=ps; np=0; while(*p) {++np; while(*(p++)); while(*(p++));} lenp=p-ps-2*np; buf=(char*)m_alloc( strlen(ALLOWS)+np*strlen(PARAM)+2*(strlen(typ)+lenp+n)+1, sizeof(char)); bp=buf; sp=ALLOWS; shere(bp,sp); bp+=stresc(bp,typ); shere(bp,sp); /* parameters */ p=ps; while(np--) { char *sp=PARAM; shere(bp,sp); bp+=stresc(bp,p); while(*(p++)); shere(bp,sp); bp+=stresc(bp,p); while(*(p++)); while(*sp) *(bp++)=*(sp++); } shere(bp,sp); bp+=strnesc(bp,s,n); while((*(bp++)=*(sp++))); ret=scm_evstr(buf); m_free(buf); } return ret!=BOOL_F; } int dsl_equal(char *typ,char *val,char *s,int n) { char *buf,*sp,*bp; SCM ret=BOOL_F; if(dsl_scm) { buf=(char*)m_alloc( strlen(EQUAL)+2*(strlen(typ)+strlen(val)+n)+1, sizeof(char)); bp=buf; sp=EQUAL; shere(bp,sp); bp+=stresc(bp,typ); shere(bp,sp); bp+=stresc(bp,val); shere(bp,sp); bp+=strnesc(bp,s,n); while((*(bp++)=*(sp++))); ret=scm_evstr(buf); m_free(buf); } return ret!=BOOL_F; } #else void dsl_ld(char *dl) {} int dsl_allows(char *typ,char *ps,char *s,int n) {return 0;} int dsl_equal(char *typ,char *val,char *s,int n) {return 0;} #endif --- NEW FILE: license.txt --- Copyright (c) 2003, 2004 David Tolpin, Davidashen http://davidashen.net/ All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of Davidashen nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. --- NEW FILE: arx.c --- /* $Id: arx.c,v 1.1 2009/08/03 05:32:45 mike Exp $ */ /* Regular Associations for XML arx grammar: arx = grammars route* grammars = "grammars" "{" type2string+ "}" type2string = type "=" literal type = nmtoken route = match|nomatch|valid|invalid match = "=~" regexp "=>" type nomatch = "!~" regexp "=>" type valid = "valid" "{" rng "}" "=>" type invalid = "!valid" "{" rng "}" "=>" type literal=string in '"', '"' inside quoted by '\' regexp=string in '/', '/' inside quoted by '\' rng=relax ng compact syntax comments start with # and continue till end of line */ #include <stdlib.h> #include <string.h> #include <stdio.h> #include <sys/types.h> #include UNISTD_H #include <fcntl.h> #include <stdarg.h> #include <errno.h> #include <assert.h> #include EXPAT_H #include "u.h" #include "m.h" #include "s.h" #include "xmlc.h" #include "ht.h" #include "erbit.h" #include "rnl.h" #include "rnv.h" #include "rx.h" #include "er.h" #include "ary.h" extern int rn_notAllowed; /* rules */ #define VALID 1 #define INVAL 2 #define MATCH 3 #define NOMAT 4 #define LEN_2 16 #define LEN_R 64 #define LEN_S 64 #define S_AVG_SIZE 64 #define LEN_V 64 #define LEN_T 1024 #define LIM_T 65536 #define BUFSIZE 1024 static char *xml; static int len_2,len_r,len_s,i_2,i_r,i_s; static int (*t2s)[2],(*rules)[3]; static char *string; static struct hashtable ht_s; static int path2abs; /* arx parser */ static char *arxfn; static int arxfd, i_b,len_b, cc, line,col,prevline,rnc, sym,len_v, errors; static char buf[BUFSIZE]; static char *value; /* xml validator */ static XML_Parser expat=NULL; static int current,previous; static int mixed=0; static int ok,wf,any; static char *text; static int len_txt; static int n_txt; static int add_s(char *s) { int len=strlen(s)+1,j; if(i_s+len>len_s) string=(char*)m_stretch( string,len_s=2*(i_s+len),i_s,sizeof(char)); strcpy(string+i_s,s); if((j=ht_get(&ht_s,i_s))==-1) { ht_put(&ht_s,j=i_s); i_s+=len; } return j; } static int hash_s(int i) {return s_hval(string+i);} static int equal_s(int s1,int s2) {return strcmp(string+s1,string+s2)==0;} static void silent_verror_handler(int erno,va_list ap) { if(erno&ERBIT_DRV) rnv_default_verror_handler(erno,ap); /* low-level diagnostics */ } static void windup(void); static int initialized=0; static void init(void) { if(!initialized) {initialized=1; rnl_init(); rnv_init(); rnv_verror_handler=&silent_verror_handler; string=(char*)m_alloc(len_v=LEN_S*S_AVG_SIZE,sizeof(char)); t2s=(int(*)[2])m_alloc(len_2=LEN_2,sizeof(int[2])); rules=(int(*)[3])m_alloc(len_r=LEN_R,sizeof(int[3])); ht_init(&ht_s,LEN_S,&hash_s,&equal_s); value=(char*)m_alloc(len_v=LEN_V,sizeof(char)); text=(char*)m_alloc(len_txt=LEN_T,sizeof(char)); windup(); } } static void clear(void) { if(len_txt>LIM_T) {m_free(text); text=(char*)m_alloc(len_txt=LEN_T,sizeof(char));} ht_clear(&ht_s); windup(); } static void windup(void) { text[n_txt=0]='\0'; i_2=1; i_r=i_s=0; } /* parser */ #define SYM_EOF 0 #define SYM_GRMS 1 #define SYM_IDNT 2 #define SYM_LTRL 3 #define SYM_RGXP 4 #define SYM_RENG 5 #define SYM_MTCH 6 #define SYM_NMTC 7 #define SYM_VALD 8 #define SYM_NVAL 9 #define SYM_LCUR 10 #define SYM_RCUR 11 #define SYM_ASGN 12 #define SYM_INVL 13 static char *sym2str(int sym) { switch(sym) { case SYM_EOF: return "end of file"; case SYM_GRMS: return "'grammars'"; case SYM_IDNT: return "identifier"; case SYM_LTRL: return "literal"; case SYM_RGXP: return "regular expression"; case SYM_RENG: return "Relax NG"; case SYM_MTCH: return "'=~'"; case SYM_NMTC: return "'!~'"; case SYM_VALD: return "'valid'"; case SYM_NVAL: return "'!valid'"; case SYM_LCUR: return "'{'"; case SYM_RCUR: return "'}'"; case SYM_ASGN: return "'='"; case SYM_INVL: return "invalid character"; default: assert(0); } return NULL; } #define ARX_ER_IO 0 #define ARX_ER_SYN 1 #define ARX_ER_EXP 2 #define ARX_ER_REX 3 #define ARX_ER_RNG 4 #define ARX_ER_NOQ 5 #define ARX_ER_TYP 6 /* there is nothing in the grammar I need utf-8 processing for */ #define err(msg) (*er_vprintf)(msg"\n",ap) static void verror_handler(int erno,va_list ap) { (*er_printf)("%s:%i:%i: error: ",arxfn,line,col); switch(erno) { case ARX_ER_IO: err("I/O error: %s"); break; case ARX_ER_SYN: err("syntax error"); break; case ARX_ER_EXP: err("%s expected, %s found"); break; case ARX_ER_REX: err("invalid regular expression"); break; case ARX_ER_RNG: err("invalid Relax NG grammar"); break; case ARX_ER_NOQ: err("unterminated literal or regular expression"); break; case ARX_ER_TYP: err("undeclared type '%s'"); break; } } static void error(int erno,...) { if(line!=prevline) { va_list ap; va_start(ap,erno); verror_handler(erno,ap); va_end(ap); prevline=line; } ++errors; } static void getcc(void) { for(;;) { int cc0=cc; if(i_b==len_b) {i_b=0; if((len_b=read(arxfd,buf,BUFSIZE))==-1) error(ARX_ER_IO,strerror(errno));} cc=i_b>=len_b?-1:((unsigned char*)buf)[i_b++]; if(cc==-1) {if(cc0=='\n') break; else cc='\n';} if(cc=='\n' && cc0=='\r') continue; if(cc0=='\n' || cc0=='\r') {++line; col=0;} else ++col; break; } } static int nmtoken(int cc) {return cc>0x7F||xmlc_base_char(cc)||xmlc_digit(cc)||cc=='_'||cc=='.'||cc=='-'||cc==':';} static int getid(void) { if(nmtoken(cc)) { int i=0; do { value[i++]=cc; if(i==len_v) value=(char*)m_stretch(value,len_v=2*i,i,sizeof(char)); getcc(); } while(nmtoken(cc)); value[i]='\0'; return 1; } else return 0; } static void getq(void) { int cq=cc; int i=0; for(;;) { getcc(); if(cc==cq) { if(i!=0&&value[i-1]=='\\') --i; else {getcc(); break;} } else if(cc<' ') {error(ARX_ER_NOQ); break;} value[i++]=cc; if(i==len_v) value=(char*)m_stretch(value,len_v=2*i,i,sizeof(char)); } value[i]='\0'; } static void getrng(void) { int ircur=-1,i=0; int cc0; for(;;) { cc0=cc; getcc(); if(cc=='}') ircur=i; else if(cc=='>') {if(cc0=='=') {getcc(); break;}} /* use => as terminator */ else if(cc==-1) {error(ARX_ER_EXP,"=>",sym2str(SYM_EOF)); break;} value[i++]=cc; if(i==len_v) value=(char*)m_stretch(value,len_v=2*i,i,sizeof(char)); } if(ircur==-1) {error(ARX_ER_EXP,sym2str(SYM_RCUR),sym2str(SYM_EOF)); ircur=0;} value[ircur]='\0'; } static void getsym(void) { for(;;) { if(0<=cc&&cc<=' ') {getcc(); continue;} switch(cc) { case -1: sym=SYM_EOF; return; case '#': do getcc(); while(cc!='\n'&&cc!='\r'); getcc(); continue; case '{': if(sym==SYM_VALD||sym==SYM_NVAL) { getrng(); sym=SYM_RENG; } else { getcc(); sym=SYM_LCUR; } return; case '}': getcc(); sym=SYM_RCUR; return; case '!': getcc(); if(cc=='~') { getcc(); sym=SYM_NMTC; } else { if(getid()) { if(strcmp("valid",value)!=0) {error(ARX_ER_EXP,sym2str(SYM_NVAL),value);} sym=SYM_NVAL; } else {error(ARX_ER_SYN); sym=SYM_INVL;} } return; case '=': getcc(); switch(cc) { case '~': getcc(); sym=SYM_MTCH; return; case '>': getcc(); if(sym!=SYM_RGXP) error(ARX_ER_SYN); continue; default: sym=SYM_ASGN; return; } case '"': getq(); sym=SYM_LTRL; return; case '/': getq(); sym=SYM_RGXP; return; default: if(getid()) { sym=strcmp("grammars",value)==0?SYM_GRMS : strcmp("valid",value)==0?SYM_VALD:SYM_IDNT; } else {getcc(); error(ARX_ER_SYN); sym=SYM_INVL;} return; } } } static int chksym(int x) { if(sym!=x) {error(ARX_ER_EXP,sym2str(x),sym2str(sym)); return 0;} return 1; } static void chk_get(int x) { (void)chksym(x); getsym(); } static int typ2str(void) { int i=i_2,typ=add_s(value); t2s[0][0]=typ; for(;;) if(t2s[--i][0]==typ) break; if(i==0) error(ARX_ER_TYP,value); return t2s[i][1]; } static int arx(char *fn) { if((arxfd=open(arxfn=fn,O_RDONLY))==-1) { (*er_printf)("error (%s): %s\n",arxfn,strerror(errno)); return 0; } else { errors=0; len_b=read(arxfd,buf,BUFSIZE); i_b=u_bom(buf,len_b); prevline=-1; line=1; col=0; rnc=0; cc=' '; getsym(); chk_get(SYM_GRMS); chk_get(SYM_LCUR); do { if(i_2==len_2) t2s=(int(*)[2])m_stretch(t2s,len_2=i_2*2,i_2,sizeof(int[2])); if(chksym(SYM_IDNT)) t2s[i_2][0]=add_s(value); getsym(); chk_get(SYM_ASGN); if(chksym(SYM_LTRL)) { if(path2abs) { int len=strlen(arxfn)+strlen(value)+1; if(len>len_v) {value=(char*)m_stretch(value,len,len_v,sizeof(char)); len_v=len;} s_abspath(value,arxfn); } t2s[i_2][1]=add_s(value); } getsym(); ++i_2; } while(sym==SYM_IDNT); chk_get(SYM_RCUR); for(;;) { if(i_r==len_r) rules=(int(*)[3])m_stretch(rules,len_r=i_r*2,i_r,sizeof(int[3])); switch(sym) { case SYM_MTCH: rules[i_r][0]=MATCH; goto REGEXP; case SYM_NMTC: rules[i_r][0]=NOMAT; goto REGEXP; REGEXP: getsym(); if(chksym(SYM_RGXP)) { if(!rx_check(value)) error(ARX_ER_REX); rules[i_r][1]=add_s(value); } getsym(); if(chksym(SYM_IDNT)) rules[i_r][2]=typ2str(); goto NEXT; case SYM_VALD: rules[i_r][0]=VALID; goto RNG; case SYM_NVAL: rules[i_r][0]=INVAL; goto RNG; RNG: getsym(); if(chksym(SYM_RENG)) { char *rncfn=(char*)m_alloc(strlen(arxfn)+strlen("#rnc[]")+12,sizeof(char)); sprintf(rncfn,"%s#rnc[%i]",arxfn,rnc++); if(!(rules[i_r][1]=rnl_s(rncfn,value,strlen(value)))) error(ARX_ER_RNG); m_free(rncfn); } getsym(); if(chksym(SYM_IDNT)) rules[i_r][2]=typ2str(); goto NEXT; default: goto LAST; } NEXT: ++i_r; getsym(); } LAST: chk_get(SYM_EOF); close(arxfd); return !errors; } } static void flush_text(void) { ok=rnv_text(¤t,&previous,text,n_txt,mixed)&&ok; text[n_txt=0]='\0'; } static void start_element(void *userData,const char *name,const char **attrs) { if(current!=rn_notAllowed) { mixed=1; flush_text(); ok=rnv_start_tag(¤t,&previous,(char*)name,(char**)attrs)&&ok; mixed=0; any=any||ary_isany(current); } } static void end_element(void *userData,const char *name) { if(current!=rn_notAllowed) { flush_text(); ok=rnv_end_tag(¤t,&previous,(char*)name)&&ok; mixed=1; } } static void characters(void *userData,const char *s,int len) { if(current!=rn_notAllowed) { int newlen_txt=n_txt+len+1; if(newlen_txt<=LIM_T&&LIM_T<len_txt) newlen_txt=LIM_T; else if(newlen_txt<len_txt) newlen_txt=len_txt; if(len_txt!=newlen_txt) text=(char*)m_stretch(text,len_txt=newlen_txt,n_txt,sizeof(char)); memcpy(text+n_txt,s,len); n_txt+=len; text[n_txt]='\0'; /* '\0' guarantees that the text is bounded, and strto[ld] work for data */ } } static void validate(int start,int fd) { void *buf; int len; previous=current=start; expat=XML_ParserCreateNS(NULL,':'); XML_SetElementHandler(expat,&start_element,&end_element); XML_SetCharacterDataHandler(expat,&characters); ok=1; any=0; for(;;) { buf=XML_GetBuffer(expat,BUFSIZE); len=read(fd,buf,BUFSIZE); if(len<0) { (*er_printf)("error (%s): %s\n",xml,strerror(errno)); wf=ok=0; break; } if(!XML_ParseBuffer(expat,len,len==0)) wf=ok=0; if(!ok||any||len==0) break; } XML_ParserFree(expat); return; } static void version(void) {(*er_printf)("arx version %s\n",ARX_VERSION);} static void usage(void) {(*er_printf)("usage: arx {-[nvh?]} document.xml arx.conf {arx.conf}\n");} int main(int argc,char **argv) { int fd; init(); path2abs=1; while(*(++argv)&&**argv=='-') { int i=1; for(;;) { switch(*(*argv+i)) { case '\0': goto END_OF_OPTIONS; case 'h': case '?': usage(); return 1; case 'n': path2abs=0; break; case 'v': version(); break; default: (*er_printf)("unknown option '-%c'\n",*(*argv+i)); break; } ++i; } END_OF_OPTIONS:; } if(!(*(argv)&&*(argv+1))) {usage(); return 1;} xml=*(argv++); if((wf=(fd=open(xml,O_RDONLY))!=-1)) close(fd); do { if(arx(*(argv++))) { int i; for(i=0;i!=i_r;++i) { switch(rules[i][0]) { case VALID: if((ok=wf)) {validate(rules[i][1],fd=open(xml,O_RDONLY)); close(fd);} break; case INVAL: if((ok=wf)) {validate(rules[i][1],fd=open(xml,O_RDONLY)); close(fd); ok=wf&&!ok;} break; case MATCH: ok=rx_match(string+rules[i][1],xml,strlen(xml)); break; case NOMAT: ok=!rx_match(string+rules[i][1],xml,strlen(xml)); break; default: assert(0); } if(ok) { printf("%s\n",string+rules[i][2]); return EXIT_SUCCESS; } } } clear(); } while(*argv); return EXIT_FAILURE; } --- NEW FILE: dxl.c --- /* $Id: dxl.c,v 1.1 2009/08/03 05:32:46 mike Exp $ */ #include <stdlib.h> #include "dxl.h" char *dxl_cmd=NULL; #if DXL_EXC #include <string.h> #include <sys/types.h> #include <sys/wait.h> #include <unistd.h> #include <errno.h> #include <assert.h> #include "m.h" #include "er.h" int dxl_allows(char *typ,char *ps,char *s,int n) { int pid,status; if(!dxl_cmd) return 0; if((pid=fork())==0) { char **argv; int argc; char *p; int arg, i; argc=5; p=ps; arg=0; for(;;) { if(*p=='\0') { if(arg) {arg=0; ++argc;} else break; } else arg=1; ++p; } argv=(char**)m_alloc(argc,sizeof(char*)); argv[--argc]=NULL; argv[--argc]=(char*)m_alloc(n+1,sizeof(char)); argv[argc][n]='\0'; strncpy(argv[argc],s,n); argv[0]=dxl_cmd; argv[1]="allows"; argv[2]=typ; i=3; if(i<argc) { for(;;) { argv[i++]=ps; if(i==argc) break; while(*(ps++)); } } execv(dxl_cmd,argv); (*er_printf)("dxl: cannot execute %s: %s\n",dxl_cmd,strerror(errno)); } else if(pid>0) { wait(&status); return !WEXITSTATUS(status); } (*er_printf)("dxl: %s\n",strerror(errno)); return 0; } int dxl_equal(char *typ,char *val,char *s,int n) { int pid,status; if(!dxl_cmd) return 0; if((pid=fork())==0) { char *argv[]={NULL,"equal",NULL,NULL,NULL,NULL}; argv[0]=dxl_cmd; argv[2]=typ; argv[3]=val; argv[4]=(char*)m_alloc(n+1,sizeof(char)); argv[4][n]='\0'; strncpy(argv[4],s,n); execvp(dxl_cmd,argv); (*er_printf)("dxl: cannot execute %s\n",dxl_cmd,strerror(errno)); } else if(pid>0) { wait(&status); return !WEXITSTATUS(status); } (*er_printf)("dxl: %s\n",strerror(errno)); return 0; } #else int dxl_allows(char *typ,char *ps,char *s,int n) {return 0;} int dxl_equal(char *typ,char *val,char *s,int n) {return 0;} #endif --- NEW FILE: ll.h --- /* $Id: ll.h,v 1.1 2009/08/03 05:32:46 mike Exp $ */ #ifndef LL_H #define LL_H 1 /* all limits that can affect speed or memory consumption; prefixes correspond to module names */ #define RN_LEN_P 1024 #define RN_PRIME_P 0x3fd #define RN_LIM_P (4*RN_LEN_P) #define RN_LEN_NC 256 #define RN_PRIME_NC 0xfb #define RN_LEN_S 256 #define SC_LEN 64 #define RND_LEN_F 1024 #define DRV_LEN_DTL 4 #define DRV_LEN_M 4096 #define DRV_PRIME_M 0xffd #define DRV_LIM_M (8*DRV_LEN_M) #define RNX_LEN_EXP 16 #define RNX_LIM_EXP 64 #define XCL_LEN_T 1024 #define XCL_LIM_T 16384 #define RX_LEN_P 256 #define RX_PRIME_P 0xfb #define RX_LIM_P (4*RX_LEN_P) #define RX_LEN_R 32 #define RX_PRIME_R 0x1f #define RX_LEN_2 RX_PRIME_R #define RX_PRIME_2 RX_PRIME_R #define RX_LEN_M 1024 #define RX_PRIME_M 0x3fd #define RX_LIM_M (8*RX_LEN_M) #endif --- NEW FILE: ary.h --- /* $Id: ary.h,v 1.1 2009/08/03 05:32:46 mike Exp $ */ #ifndef ARY_H #define ARY_H 1 extern int ary_isany(int p); #endif --- NEW FILE: rnv.h --- /* $Id: rnv.h,v 1.1 2009/08/03 05:32:47 mike Exp $ */ #include <stdarg.h> #ifndef RNV_H #define RNV_H 1 #define RNV_ER_ELEM 0 #define RNV_ER_AKEY 1 #define RNV_ER_AVAL 2 #define RNV_ER_EMIS 3 #define RNV_ER_AMIS 4 #define RNV_ER_UFIN 5 #define RNV_ER_TEXT 6 #define RNV_ER_NOTX 7 extern void (*rnv_verror_handler)(int erno,va_list ap); extern void rnv_default_verror_handler(int erno,va_list ap); extern void rnv_init(void); extern void rnv_clear(void); extern int rnv_text(int *curp,int *prevp,char *text,int n_t,int mixed); extern int rnv_start_tag(int *curp,int *prevp,char *name,char **attrs); extern int rnv_start_tag_open(int *curp,int *prevp,char *name); extern int rnv_attribute(int *curp,int *prevp,char *name,char *val); extern int rnv_start_tag_close(int *curp,int *prevp,char *name); extern int rnv_end_tag(int *curp,int *prevp,char *name); #endif --- NEW FILE: Makefile --- VERSION=1.7.8 CC=cc # optional features M_STATIC=0 M_FILL=0 DSL_SCM=0 DXL_EXC=0 EXPAT_H="<expat.h>" UNISTD_H="<unistd.h>" SCM_H="<scm/scm.h>" INC=-I/usr/local/include ${CPPFLAGS} LBL=-L/usr/local/lib ${LDFLAGS} DEF=\ -DM_STATIC=${M_STATIC} \ -DM_FILL=${M_FILL} \ -DEXPAT_H=${EXPAT_H} \ -DUNISTD_H=${UNISTD_H} \ -DRNV_VERSION="\"${VERSION}\"" \ -DARX_VERSION="\"${VERSION}\"" \ -DRVP_VERSION="\"${VERSION}\"" WARN=-Wall -Wstrict-prototypes -Wmissing-prototypes -Wcast-align OPT=-O -g CFLAGS=${INC} ${DEF} ${WARN} ${OPT} LFLAGS=${OPT} ${LBL} LIBEXPAT=-lexpat LIB_SCM=-lscm -lm \ `sh -c '[ -f /usr/lib/libdl.a ] && echo -ldl \ ; [ -f /usr/lib/libsocket.a ] && echo -lsocket \ '` LIB=${LIBEXPAT} ifeq (${DSL_SCM},1) DEF+=-DDSL_SCM=${DSL_SCM} -DSCM_H=${SCM_H} LIB+=${LIB_SCM} endif ifeq (${DXL_EXC},1) DEF+=-DDXL_EXC=${DXL_EXC} endif LIBRNVA=librnv.a LIBRNVSO=librnv.so LIBRNV=${LIBRNVA} SRC=\ ll.h \ erbit.h \ xcl.c \ arx.c \ rvp.c \ xsdck.c \ test.c \ ary.c ary.h \ rn.c rn.h \ rnc.c rnc.h \ rnd.c rnd.h \ rnl.c rnl.h \ rnv.c rnv.h \ rnx.c rnx.h \ drv.c drv.h \ xsd.c xsd.h \ xsd_tm.c xsd_tm.h \ dxl.c dxl.h \ dsl.c dsl.h \ sc.c sc.h \ ht.c ht.h \ er.c er.h \ u.c u.h \ xmlc.c xmlc.h \ s.c s.h \ m.c m.h \ rx.c rx.h \ rx_cls_u.c \ rx_cls_ranges.c OBJ=\ rn.o \ rnc.o \ rnd.o \ rnl.o \ rnv.o \ rnx.o \ drv.o \ ary.o \ xsd.o \ xsd_tm.o \ dxl.o \ dsl.o \ sc.o \ u.o \ ht.o \ er.o \ xmlc.o \ s.o \ m.o \ rx.o .SUFFIXES: .c .o .c.o: ${CC} ${CFLAGS} -c -o $@ $< all: rnv arx rvp xsdck test rnv: xcl.o ${LIBRNV} ${CC} ${LFLAGS} -o rnv xcl.o ${LIBRNV} ${LIB} arx: arx.o ${LIBRNV} ${CC} ${LFLAGS} -o arx arx.o ${LIBRNV} ${LIB} rvp: rvp.o ${LIBRNV} ${CC} ${LFLAGS} -o rvp rvp.o ${LIBRNV} ${LIB} xsdck: xsdck.o ${LIBRNV} ${CC} ${LFLAGS} -o xsdck xsdck.o ${LIBRNV} ${LIB} test: test.o ${LIBRNV} ${CC} ${LFLAGS} -o test test.o ${LIBRNV} ${LIB} ${LIBRNVA}: ${OBJ} ar rc $@ ${OBJ} ranlib ${LIBRNVA} ${LIBRNVSO}: ${OBJ} gcc -shared -o $@ ${OBJ} depend: ${SRC} makedepend -Y ${DEF} ${SRC} clean: -rm -f *.o tst/c/*.o *.a *.so rnv arx rnd_test *_test *.core *.gmon *.gprof rnv*.zip rnv.txt rnv.pdf rnv.html rnv.xml rnd_test: ${LIBRNV} tst/c/rnd_test.c ${CC} ${LFLAGS} -I. -o rnd_test tst/c/rnd_test.c ${LIBRNV} ${LIB} --- NEW FILE: xmlc.h --- /* $Id: xmlc.h,v 1.1 2009/08/03 05:32:48 mike Exp $ */ #ifndef XMLC_H #define XMLC_H 1 /* character classes required for parsing XML */ extern int xmlc_white_space(int u); extern int xmlc_base_char(int u); extern int xmlc_ideographic(int u); extern int xmlc_combining_char(int u); extern int xmlc_digit(int u); extern int xmlc_extender(int u); extern int u_in_ranges(int u,int r[][2],int len); #endif --- NEW FILE: ary.c --- /* $Id: ary.c,v 1.1 2009/08/03 05:32:46 mike Exp $ */ #include "rn.h" #include "ary.h" /* ary_isany::Pattern->Bool ary_isany p = let isanycontent p@(OneOrMore (Choice (Choice (Element AnyName p1) (Attribute AnyName Text)) Text)) = p == p1 isanycontent _ = False isanymixed (OneOrMore (Choice (Element AnyName p1) Text)) = isanycontent p1 isanymixed _ = False in case p of (After p1 Empty) -> isanymixed p1 (After p1 p2) -> isanymixed p1 && ary_isany p2 _ -> False */ static int isanycont(int p) { int p0,nc,p1,p2,i,res,flat[3]; p0=p; if(!RN_P_IS(p0,RN_P_ONE_OR_MORE)) return 0; rn_OneOrMore(p0,p1); p0=p1; if(!RN_P_IS(p0,RN_P_CHOICE)) return 0; rn_Choice(p0,p1,p2); flat[0]=p2; p0=p1; if(!RN_P_IS(p0,RN_P_CHOICE)) return 0; rn_Choice(p0,p1,p2); flat[1]=p1; flat[2]=p2; res=0; for(i=0;i!=3;++i) { p0=flat[i]; switch(RN_P_TYP(p0)) { case RN_P_ELEMENT: rn_Element(p0,nc,p1); if(!(RN_NC_IS(nc,RN_NC_ANY_NAME)&&p==p1)) return 0; res|=1; break; case RN_P_ATTRIBUTE: rn_Attribute(p0,nc,p1); if(!(RN_NC_IS(nc,RN_NC_ANY_NAME)&&p1==rn_text)) return 0; res|=2; break; case RN_P_TEXT: break; default: return 0; } } return res==3; } static int isanymix(int p) { int p0,nc,p1,p2,i,res,flat[2]; p0=p; if(!RN_P_IS(p0,RN_P_ONE_OR_MORE)) return 0; rn_OneOrMore(p0,p1); p0=p1; if(!RN_P_IS(p0,RN_P_CHOICE)) return 0; rn_Choice(p0,p1,p2); flat[0]=p1; flat[1]=p2; res=0; for(i=0;i!=2;++i) { p0=flat[i]; switch(RN_P_TYP(p0)) { case RN_P_ELEMENT: rn_Element(p0,nc,p1); if(!(RN_NC_IS(nc,RN_NC_ANY_NAME)&& isanycont(p1))) return 0; res|=1; break; case RN_P_TEXT: break; default: return 0; } } return res==1; } int ary_isany(int p) { int p1,p2; if(!RN_P_IS(p,RN_P_AFTER)) return 0; rn_After(p,p1,p2); return isanymix(p1)&&(p2==rn_empty||ary_isany(p2)); } --- NEW FILE: rnx.h --- /* $Id: rnx.h,v 1.1 2009/08/03 05:32:47 mike Exp $ */ #ifndef RNX_H #define RNX_H 1 extern void rnx_init(void); extern void rnx_clear(void); extern int rnx_n_exp,*rnx_exp; extern void rnx_expected(int p,int req); extern char *rnx_p2str(int p); extern char *rnx_nc2str(int nc); #endif --- NEW FILE: rnv.c --- /* $Id: rnv.c,v 1.1 2009/08/03 05:32:47 mike Exp $ */ #include <string.h> /*strncpy,strrchr*/ #include <assert.h> #include "m.h" #include "xmlc.h" /*xmlc_white_space*/ #include "erbit.h" #include "drv.h" #include "er.h" #include "rnv.h" extern int rn_notAllowed; #define err(msg) (*er_vprintf)(msg"\n",ap); void rnv_default_verror_handler(int erno,va_list ap) { if(erno&ERBIT_DRV) { drv_default_verror_handler(erno&~ERBIT_DRV,ap); } else { switch(erno) { case RNV_ER_ELEM: err("element %s^%s not allowed"); break; case RNV_ER_AKEY: err("attribute %s^%s not allowed"); break; case RNV_ER_AVAL: err("attribute %s^%s with invalid value \"%s\""); break; case RNV_ER_EMIS: err("incomplete content"); break; case RNV_ER_AMIS: err("missing attributes of %s^%s"); break; case RNV_ER_UFIN: err("unfinished content of element %s^%s"); break; case RNV_ER_TEXT: err("invalid data or text not allowed"); break; case RNV_ER_NOTX: err("text not allowed"); break; default: assert(0); } } } void (*rnv_verror_handler)(int erno,va_list ap)=&rnv_default_verror_handler; static void error_handler(int erno,...) { va_list ap; va_start(ap,erno); (*rnv_verror_handler)(erno,ap); va_end(ap); } static void verror_handler_drv(int erno,va_list ap) {(*rnv_verror_handler)(erno|ERBIT_DRV,ap);} static void windup(void); static int initialized=0; void rnv_init(void) { if(!initialized) {initialized=1; drv_init(); drv_verror_handler=&verror_handler_drv; windup(); } } void rnv_clear(void) { windup(); } static void windup(void) { } static char *qname_open(char **surip,char **snamep,char *name) { char *sep; if((sep=strrchr(name,':'))) { *snamep=sep+1; *surip=name; *sep='\0'; } else { *snamep=name; while(*name) ++name; *surip=name; } return sep; /* NULL if no namespace */ } static void qname_close(char *sep) {if(sep) *sep=':';} static int whitespace(char *text,int n_txt) { char *s=text,*end=text+n_txt; for(;;) { if(s==end) return 1; if(!xmlc_white_space(*(s++))) return 0; } } int rnv_text(int *curp,int *prevp,char *text,int n_txt,int mixed) { int ok=1; if(mixed) { if(!whitespace(text,n_txt)) { *curp=drv_mixed_text(*prevp=*curp); if(*curp==rn_notAllowed) { ok=0; *curp=drv_mixed_text_recover(*prevp); error_handler(RNV_ER_NOTX); } } } else { *curp=drv_text(*prevp=*curp,text,n_txt); if(*curp==rn_notAllowed) { ok=0; *curp=drv_text_recover(*prevp,text,n_txt); error_handler(RNV_ER_TEXT); } } return ok; } int rnv_start_tag_open(int *curp,int *prevp,char *name) { int ok=1; char *suri,*sname,*sep; sep=qname_open(&suri,&sname,name); *curp=drv_start_tag_open(*prevp=*curp,suri,sname); if(*curp==rn_notAllowed) { ok=0; *curp=drv_start_tag_open_recover(*prevp,suri,sname); error_handler(*curp==rn_notAllowed?RNV_ER_ELEM:RNV_ER_EMIS,suri,sname); } qname_close(sep); return ok; } int rnv_attribute(int *curp,int *prevp,char *name,char *val) { int ok=1; char *suri,*sname,*sep; sep=qname_open(&suri,&sname,name); *curp=drv_attribute_open(*prevp=*curp,suri,sname); if(*curp==rn_notAllowed) { ok=0; *curp=drv_attribute_open_recover(*prevp,suri,sname); error_handler(RNV_ER_AKEY,suri,sname); } else { *curp=drv_text(*prevp=*curp,(char*)val,strlen(val)); if(*curp==rn_notAllowed || (*curp=drv_attribute_close(*prevp=*curp))==rn_notAllowed) { ok=0; *curp=drv_attribute_close_recover(*prevp); error_handler(RNV_ER_AVAL,suri,sname,val); } } qname_close(sep); return ok; } int rnv_start_tag_close(int *curp,int *prevp,char *name) { int ok=1; char *suri,*sname,*sep; *curp=drv_start_tag_close(*prevp=*curp); if(*curp==rn_notAllowed) { ok=0; *curp=drv_start_tag_close_recover(*prevp); sep=qname_open(&suri,&sname,name); error_handler(RNV_ER_AMIS,suri,sname); qname_close(sep); } return ok; } int rnv_start_tag(int *curp,int *prevp,char *name,char **attrs) { int ok=1; ok=rnv_start_tag_open(curp,prevp,name)&&ok; while(*curp!=rn_notAllowed) { if(!(*attrs)) break; ok = rnv_attribute(curp,prevp,*attrs,*(attrs+1))&&ok; attrs+=2; } if(*curp!=rn_notAllowed) ok=rnv_start_tag_close(curp,prevp,name)&&ok; return ok; } int rnv_end_tag(int *curp,int *prevp,char *name) { int ok=1; char *suri,*sname,*sep; *curp=drv_end_tag(*prevp=*curp); if(*curp==rn_notAllowed) { ok=0; sep=qname_open(&suri,&sname,name); error_handler(RNV_ER_UFIN,suri,sname); qname_close(sep); *curp=drv_end_tag_recover(*prevp); } return ok; } --- NEW FILE: rnx.c --- /* $Id: rnx.c,v 1.1 2009/08/03 05:32:47 mike Exp $ */ #include <stdlib.h> /*NULL*/ #include <string.h> /*strcat*/ #include "m.h" #include "s.h" #include "rn.h" #include "ll.h" #include "rnx.h" #define LEN_EXP RNX_LEN_EXP #define LIM_EXP RNX_LIM_EXP int rnx_n_exp,*rnx_exp=NULL; static int len_exp; static int initialized=0; void rnx_init(void) { if(!initialized) { initialized=1; rnx_exp=(int*)m_alloc(len_exp=LEN_EXP,sizeof(int)); } } void rnx_clear(void) {} static void expected(int p,int first,int req) { int p1,p2,px=0,i; if(req && rn_nullable(p)) return; switch(RN_P_TYP(p)) { case RN_P_ERROR: break; case RN_P_NOT_ALLOWED: break; case RN_P_EMPTY: break; case RN_P_TEXT: px=p; break; case RN_P_CHOICE: rn_Choice(p,p1,p2); expected(p1,first,req); expected(p2,first,req); break; case RN_P_INTERLEAVE: rn_Interleave(p,p1,p2); expected(p1,first,req); expected(p2,first,req); break; case RN_P_GROUP: rn_Group(p,p1,p2); expected(p1,first,req); expected(p2,first&&rn_nullable(p1),req); break; case RN_P_ONE_OR_MORE: rn_OneOrMore(p,p1); expected(p1,first,req); break; case RN_P_LIST: rn_List(p,p1); expected(p1,first,req); break; case RN_P_DATA: px=p; break; case RN_P_DATA_EXCEPT: rn_DataExcept(p,p1,p2); expected(p1,first,req); break; case RN_P_VALUE: px=p; break; case RN_P_ATTRIBUTE: px=p; break; case RN_P_ELEMENT: px=p; break; case RN_P_AFTER: rn_After(p,p1,p2); expected(p1,first,req); if(rn_nullable(p1)) px=p; break; case RN_P_REF: break; default: assert(0); } if(px&&(first||RN_P_IS(px,RN_P_ATTRIBUTE))) { for(i=0;i!=rnx_n_exp;++i) { if(rnx_exp[i]==px) {px=0; break;} } if(px) { if(rnx_n_exp==len_exp) rnx_exp=(int*)m_stretch(rnx_exp,len_exp=2*rnx_n_exp,rnx_n_exp,sizeof(int)); rnx_exp[rnx_n_exp++]=px; } } } void rnx_expected(int p,int req) { if(req) { if(len_exp>LIM_EXP) { m_free(rnx_exp); rnx_exp=(int*)m_alloc(len_exp=LIM_EXP,sizeof(int)); } rnx_n_exp=0; } expected(p,1,req); } char *rnx_p2str(int p) { char *s=NULL,*s1; int dt,ps,val,nc,p1; switch(RN_P_TYP(p)) { case RN_P_ERROR: s=s_clone("error"); break; case RN_P_NOT_ALLOWED: s=s_clone("notAllowed"); break; case RN_P_EMPTY: s=s_clone("empty"); break; case RN_P_TEXT: s=s_clone("text"); break; case RN_P_CHOICE: s=s_clone("choice (|)"); break; case RN_P_INTERLEAVE: s=s_clone("interleave (&)"); break; case RN_P_GROUP: s=s_clone("group (,)"); break; case RN_P_ONE_OR_MORE: s=s_clone("one or more (+)"); break; case RN_P_LIST: s=s_clone("list"); break; case RN_P_DATA: rn_Data(p,dt,ps); s1=rnx_nc2str(dt); s=(char*)m_alloc(strlen("data ")+1+strlen(s1),sizeof(char)); strcpy(s,"data "); strcat(s,s1); m_free(s1); break; case RN_P_DATA_EXCEPT: s=s_clone("dataExcept (-)"); break; case RN_P_VALUE: rn_Value(p,dt,val); s1=rnx_nc2str(dt); s=(char*)m_alloc(strlen("value \"\" ")+1+strlen(s1)+strlen(rn_string+val),sizeof(char)); strcpy(s,"value "); strcat(s,s1); strcat(s," \""); strcat(s,rn_string+val); strcat(s,"\""); m_free(s1); break; case RN_P_ATTRIBUTE: rn_Attribute(p,nc,p1); s1=rnx_nc2str(nc); s=(char*)m_alloc(strlen("attribute ")+1+strlen(s1),sizeof(char)); strcpy(s,"attribute "); strcat(s,s1); m_free(s1); break; case RN_P_ELEMENT: rn_Element(p,nc,p1); s1=rnx_nc2str(nc); s=(char*)m_alloc(strlen("element ")+1+strlen(s1),sizeof(char)); strcpy(s,"element "); strcat(s,s1); m_free(s1); break; case RN_P_REF: s=s_clone("ref"); break; case RN_P_AFTER: s=s_clone("after"); break; default: assert(0); } return s; } char *rnx_nc2str(int nc) { char *s=NULL,*s1,*s2; int nc1,nc2,uri,name; switch(RN_NC_TYP(nc)) { case RN_NC_ERROR: s=s_clone("?"); break; case RN_NC_NSNAME: rn_NsName(nc,uri); s=(char*)m_alloc(strlen(rn_string+uri)+3,sizeof(char)); strcpy(s,rn_string+uri); strcat(s,":*"); break; case RN_NC_QNAME: rn_QName(nc,uri,name); s=(char*)m_alloc(strlen(rn_string+uri)+strlen(rn_string+name)+2,sizeof(char)); strcpy(s,rn_string+uri); strcat(s,"^"); strcat(s,rn_string+name); break; case RN_NC_ANY_NAME: s=s_clone("*"); break; case RN_NC_EXCEPT: rn_NameClassExcept(nc,nc1,nc2); s1=rnx_nc2str(nc1); s2=rnx_nc2str(nc2); s=(char*)m_alloc(strlen(s1)+strlen(s2)+2,sizeof(char)); strcpy(s,s1); strcat(s,"-"); strcat(s,s2); m_free(s1); m_free(s2); break; case RN_NC_CHOICE: rn_NameClassChoice(nc,nc1,nc2); s1=rnx_nc2str(nc1); s2=rnx_nc2str(nc2); s=(char*)m_alloc(strlen(s1)+strlen(s2)+2,sizeof(char)); strcpy(s,s1); strcat(s,"|"); strcat(s,s2); m_free(s1); m_free(s2); break; case RN_NC_DATATYPE: rn_Datatype(nc,uri,name); s=(char*)m_alloc(strlen(rn_string+uri)+strlen(rn_string+name)+2,sizeof(char)); strcpy(s,rn_string+uri); strcat(s,"^"); strcat(s,rn_string+name); break; default: assert(0); } return s; } --- NEW FILE: xcl.c --- /* $Id: xcl.c,v 1.1 2009/08/03 05:32:48 mike Exp $ */ #include <stdlib.h> #include <stdarg.h> #include <fcntl.h> /*open,close*/ #include <sys/types.h> #include UNISTD_H /*open,read,close*/ #include <string.h> /*strerror*/ #include <errno.h> #include <assert.h> #include EXPAT_H #include "m.h" #include "s.h" #include "erbit.h" #include "drv.h" #include "rnl.h" #include "rnv.h" #include "rnx.h" #include "ll.h" #include "dxl.h" #include "dsl.h" #include "er.h" extern int rn_notAllowed,rx_compact,drv_compact; #define LEN_T XCL_LEN_T #define LIM_T XCL_LIM_T #define BUFSIZE 1024 /* maximum number of candidates to display */ #define NEXP 16 #define XCL_ER_IO 0 #define XCL_ER_XML 1 #define XCL_ER_XENT 2 #define PIXGFILE "davidashen-net-xg-file" #define PIXGPOS "davidashen-net-xg-pos" static int peipe,verbose,nexp,rnck; static char *xml; static XML_Parser expat=NULL; static int start,current,previous; static int mixed=0; static int lastline,lastcol,level; static char *xgfile=NULL,*xgpos=NULL; static int ok; /* Expat does not normalize strings on input */ static char *text; static int len_txt; static int n_txt; #define err(msg) (*er_vprintf)(msg"\n",ap); static void verror_handler(int erno,va_list ap) { if(erno&ERBIT_RNL) { rnl_default_verror_handler(erno&~ERBIT_RNL,ap); } else { int line=XML_GetCurrentLineNumber(expat),col=XML_GetCurrentColumnNumber(expat); if(line!=lastline||col!=lastcol) { lastline=line; lastcol=col; if(xgfile) (*er_printf)("%s:%s: error: ",xgfile,xgpos); else (*er_printf)("%s:%i:%i: error: ",xml,line,col); if(erno&ERBIT_RNV) { rnv_default_verror_handler(erno&~ERBIT_RNV,ap); if(nexp) { int req=2, i=0; char *s; while(req--) { rnx_expected(previous,req); if(i==rnx_n_exp) continue; if(rnx_n_exp>nexp) break; (*er_printf)((char*)(req?"required:\n":"allowed:\n")); for(;i!=rnx_n_exp;++i) { (*er_printf)("\t%s\n",s=rnx_p2str(rnx_exp[i])); m_free(s); } } } } else { switch(erno) { case XCL_ER_IO: err("%s"); break; case XCL_ER_XML: err("%s"); break; case XCL_ER_XENT: err("pipe through xx to expand external entities"); break; default: assert(0); } } } } } static void verror_handler_rnl(int erno,va_list ap) {verror_handler(erno|ERBIT_RNL,ap);} static void verror_handler_rnv(int erno,va_list ap) {verror_handler(erno|ERBIT_RNV,ap);} static void windup(void); static int initialized=0; static void init(void) { if(!initialized) {initialized=1; rnl_init(); rnl_verror_handler=&verror_handler_rnl; rnv_init(); rnv_verror_handler=&verror_handler_rnv; rnx_init(); drv_add_dtl(DXL_URL,&dxl_equal,&dxl_allows); drv_add_dtl(DSL_URL,&dsl_equal,&dsl_allows); text=(char*)m_alloc(len_txt=LEN_T,sizeof(char)); windup(); } } static void clear(void) { if(len_txt>LIM_T) {m_free(text); text=(char*)m_alloc(len_txt=LEN_T,sizeof(char));} windup(); } static void windup(void) { text[n_txt=0]='\0'; level=0; lastline=lastcol=-1; } static void error_handler(int erno,...) { va_list ap; va_start(ap,erno); verror_handler(erno,ap); va_end(ap); } static void flush_text(void) { ok=rnv_text(¤t,&previous,text,n_txt,mixed)&&ok; text[n_txt=0]='\0'; } static void start_element(void *userData,const char *name,const char **attrs) { if(current!=rn_notAllowed) { mixed=1; flush_text(); ok=rnv_start_tag(¤t,&previous,(char*)name,(char**)attrs)&&ok; mixed=0; } else { ++level; } } static void end_element(void *userData,const char *name) { if(current!=rn_notAllowed) { flush_text(); ok=rnv_end_tag(¤t,&previous,(char*)name)&&ok; mixed=1; } else { if(level==0) current=previous; else --level; } } static void characters(void *userData,const char *s,int len) { if(current!=rn_notAllowed) { int newlen_txt=n_txt+len+1; if(newlen_txt<=LIM_T&&LIM_T<len_txt) newlen_txt=LIM_T; else if(newlen_txt<len_txt) newlen_txt=len_txt; if(len_txt!=newlen_txt) text=(char*)m_stretch(text,len_txt=newlen_txt,n_txt,sizeof(char)); memcpy(text+n_txt,s,len); n_txt+=len; text[n_txt]='\0'; /* '\0' guarantees that the text is bounded, and strto[ld] work for data */ } } static void processingInstruction(void *userData, const char *target,const char *data) { if(strcmp(PIXGFILE,target)==0) { if(xgfile) m_free(xgfile); xgfile=s_clone((char*)data); } else if(strcmp(PIXGPOS,target)==0) { if(xgpos) m_free(xgpos); xgpos=s_clone((char*)data); *strchr(xgpos,' ')=':'; } } static int pipeout(void *buf,int len) { int ofs=0,iw,lenw=len; for(;;) { if((iw=write(1,(char*)buf+ofs,lenw))==-1) {error_handler(XCL_ER_IO,strerror(errno)); return 0;} ofs+=iw; lenw-=iw; if(lenw==0) return 1; } } static int process(int fd) { void *buf; int len; for(;;) { buf=XML_GetBuffer(expat,BUFSIZE); len=read(fd,buf,BUFSIZE); if(len<0) { error_handler(XCL_ER_IO,xml,strerror(errno)); goto ERROR; } if(peipe) peipe=peipe&&pipeout(buf,len); if(!XML_ParseBuffer(expat,len,len==0)) goto PARSE_ERROR; if(len==0) break; } return ok; PARSE_ERROR: error_handler(XCL_ER_XML,XML_ErrorString(XML_GetErrorCode(expat))); while(peipe&&(len=read(fd,buf,BUFSIZE))!=0) peipe=peipe&&pipeout(buf,len); ERROR: return 0; } static int externalEntityRef(XML_Parser p,const char *context, const char *base,const char *systemId,const char *publicId) { error_handler(XCL_ER_XENT); return 1; } static void validate(int fd) { previous=current=start; expat=XML_ParserCreateNS(NULL,':'); XML_SetParamEntityParsing(expat,XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetElementHandler(expat,&start_element,&end_element); XML_SetCharacterDataHandler(expat,&characters); XML_SetExternalEntityRefHandler(expat,&externalEntityRef); XML_SetProcessingInstructionHandler(expat,&processingInstruction); ok=process(fd); XML_ParserFree(expat); } static void version(void) {(*er_printf)("rnv version %s\n",RNV_VERSION);} static void usage(void) {(*er_printf)("usage: rnv {-[qnspc" #if DXL_EXC "d" #endif #if DSL_SCM "e" #endif "vh?]} schema.rnc {document.xml}\n");} int main(int argc,char **argv) { init(); peipe=0; verbose=1; nexp=NEXP; rnck=0; while(*(++argv)&&**argv=='-') { int i=1; for(;;) { switch(*(*argv+i)) { case '\0': goto END_OF_OPTIONS; case 'q': verbose=0; nexp=0; break; case 'n': if(*(argv+1)) nexp=atoi(*(++argv)); goto END_OF_OPTIONS; case 's': drv_compact=1; rx_compact=1; break; case 'p': peipe=1; break; case 'c': rnck=1; break; #if DXL_EXC case 'd': dxl_cmd=*(argv+1); if(*(argv+1)) ++argv; goto END_OF_OPTIONS; #endif #if DSL_SCM case 'e': dsl_ld(*(argv+1)); if(*(argv+1)) ++argv; goto END_OF_OPTIONS; #endif case 'v': version(); break; case 'h': case '?': usage(); return 1; default: (*er_printf)("unknown option '-%c'\n",*(*argv+i)); break; } ++i; } END_OF_OPTIONS:; } if(!*(argv)) {usage(); return 1;} if((ok=start=rnl_fn(*(argv++)))) { if(*argv) { do { int fd; xml=*argv; if((fd=open(xml,O_RDONLY))==-1) { (*er_printf)("I/O error (%s): %s\n",xml,strerror(errno)); ok=0; continue; } if(verbose) (*er_printf)("%s\n",xml); validate(fd); close(fd); clear(); } while(*(++argv)); if(!ok&&verbose) (*er_printf)("error: some documents are invalid\n"); } else { if(!rnck) { xml="stdin"; validate(0); clear(); if(!ok&&verbose) (*er_printf)("error: invalid input\n"); } } } return ok?EXIT_SUCCESS:EXIT_FAILURE; } --- NEW FILE: rn.h --- /* $Id: rn.h,v 1.1 2009/08/03 05:32:47 mike Exp $ */ #ifndef RN_H #define RN_H 1 #include <assert.h> /* Patterns */ #define RN_P_ERROR 0 #define RN_P_NOT_ALLOWED 1 #define RN_P_EMPTY 2 #define RN_P_TEXT 3 #define RN_P_CHOICE 4 #define RN_P_INTERLEAVE 5 #define RN_P_GROUP 6 #define RN_P_ONE_OR_MORE 7 #define RN_P_LIST 8 #define RN_P_DATA 9 #define RN_P_DATA_EXCEPT 10 #define RN_P_VALUE 11 #define RN_P_ATTRIBUTE 12 #define RN_P_ELEMENT 13 #define RN_P_REF 14 #define RN_P_AFTER 15 /* Patterns and nameclasses are stored in arrays of integers. an integer is either an index in the same or another array, or a value that denotes record type etc. Each record has a macro that accesses its fields by assigning them to variables in the local scope, and a creator. */ /* Pattern Bindings */ #define RN_P_TYP(i) (rn_pattern[i]&0xFF) #define RN_P_IS(i,x) (x==RN_P_TYP(i)) #define RN_P_CHK(i,x) assert(RN_P_IS(i,x)) #define RN_P_FLG_NUL 0x00000100 #define RN_P_FLG_TXT 0x00000200 #define RN_P_FLG_CTE 0x00000400 #define RN_P_FLG_CTC 0x00000800 #define RN_P_FLG_CTS 0x00001000 #define RN_P_FLG_ERS 0x40000000 #define RN_P_FLG_MRK 0x80000000 #define rn_marked(i) (rn_pattern[i]&RN_P_FLG_MRK) #define rn_mark(i) (rn_pattern[i]|=RN_P_FLG_MRK) #define rn_unmark(i) (rn_pattern[i]&=~RN_P_FLG_MRK) #define rn_nullable(i) (rn_pattern[i]&RN_P_FLG_NUL) #define rn_setNullable(i,x) if(x) rn_pattern[i]|=RN_P_FLG_NUL #define rn_cdata(i) rn_pattern[i]&RN_P_FLG_TXT #define rn_setCdata(i,x) if(x) rn_pattern[i]|=RN_P_FLG_TXT /* assert: p1 at 1, p2 at 2 */ #define rn_NotAllowed(i) RN_P_CHK(i,RN_P_NOT_ALLOWED) #define rn_Empty(i) RN_P_CHK(i,RN_P_EMPTY) #define rn_Text(i) RN_P_CHK(i,RN_P_TEXT) #define rn_Choice(i,p1,p2) RN_P_CHK(i,RN_P_CHOICE); p1=rn_pattern[i+1]; p2=rn_pattern[i+2] #define rn_Interleave(i,p1,p2) RN_P_CHK(i,RN_P_INTERLEAVE); p1=rn_pattern[i+1]; p2=rn_pattern[i+2] #define rn_Group(i,p1,p2) RN_P_CHK(i,RN_P_GROUP); p1=rn_pattern[i+1]; p2=rn_pattern[i+2] #define rn_OneOrMore(i,p1) RN_P_CHK(i,RN_P_ONE_OR_MORE); p1=rn_pattern[i+1] #define rn_List(i,p1) RN_P_CHK(i,RN_P_LIST); p1=rn_pattern[i+1] #define rn_Data(i,dt,ps) RN_P_CHK(i,RN_P_DATA); dt=rn_pattern[i+1]; ps=rn_pattern[i+2] #define rn_DataExcept(i,p1,p2) RN_P_CHK(i,RN_P_DATA_EXCEPT); p1=rn_pattern[i+1]; p2=rn_pattern[i+2] #define rn_Value(i,dt,s) RN_P_CHK(i,RN_P_VALUE); dt=rn_pattern[i+1]; s=rn_pattern[i+2] #define rn_Attribute(i,nc,p1) RN_P_CHK(i,RN_P_ATTRIBUTE); p1=rn_pattern[i+1]; nc=rn_pattern[i+2] #define rn_Element(i,nc,p1) RN_P_CHK(i,RN_P_ELEMENT); p1=rn_pattern[i+1]; nc=rn_pattern[i+2] #define rn_After(i,p1,p2) RN_P_CHK(i,RN_P_AFTER); p1=rn_pattern[i+1]; p2=rn_pattern[i+2] #define rn_Ref(i,p) RN_P_CHK(i,RN_P_REF); p=rn_pattern[i+1] /* Name Classes */ #define RN_NC_ERROR 0 #define RN_NC_QNAME 1 #define RN_NC_NSNAME 2 #define RN_NC_ANY_NAME 3 #define RN_NC_EXCEPT 4 #define RN_NC_CHOICE 5 #define RN_NC_DATATYPE 6 /* Name Class Bindings */ #define RN_NC_TYP(i) (rn_nameclass[i]&0xFF) #define RN_NC_IS(i,x) (x==RN_NC_TYP(i)) #define RN_NC_CHK(i,x) assert(RN_NC_IS(i,x)) #define rn_QName(i,uri,name) RN_NC_CHK(i,RN_NC_QNAME); uri=rn_nameclass[i+1]; name=rn_nameclass[i+2] #define rn_NsName(i,uri) RN_NC_CHK(i,RN_NC_NSNAME); uri=rn_nameclass[i+1] #define rn_AnyName(i) RN_NC_CHK(i,RN_NC_ANY_NAME) #define rn_NameClassExcept(i,nc1,nc2) RN_NC_CHK(i,RN_NC_EXCEPT); nc1=rn_nameclass[i+1]; nc2=rn_nameclass[i+2] #define rn_NameClassChoice(i,nc1,nc2) RN_NC_CHK(i,RN_NC_CHOICE); nc1=rn_nameclass[i+1]; nc2=rn_nameclass[i+2] #define rn_Datatype(i,lib,typ) RN_NC_CHK(i,RN_NC_DATATYPE); lib=rn_nameclass[i+1]; typ=rn_nameclass[i+2] extern int rn_empty,rn_text,rn_notAllowed,rn_dt_string,rn_dt_token,rn_xsd_uri; extern char *rn_string; extern int *rn_pattern; extern int *rn_nameclass; extern void rn_new_schema(void); extern int rn_contentType(int i); extern void rn_setContentType(int i,int t1,int t2); extern int rn_groupable(int p1,int p2); extern void rn_del_p(int i); extern void rn_add_p(int i); extern int rn_newString(char *s); extern int rn_newNotAllowed(void); extern int rn_newEmpty(void); extern int rn_newText(void); extern int rn_newChoice(int p1,int p2); extern int rn_newInterleave(int p1,int p2); extern int rn_newGroup(int p1,int p2); extern int rn_newOneOrMore(int p1); extern int rn_newList(int p1); extern int rn_newData(int dt,int ps); extern int rn_newDataExcept(int p1,int p2); extern int rn_newValue(int dt,int s); extern int rn_newAttribute(int nc,int p1); extern int rn_newElement(int nc,int p1); extern int rn_newAfter(int p1,int p2); extern int rn_newRef(void); extern int rn_one_or_more(int p); extern int rn_group(int p1,int p2); extern int rn_choice(int p1,int p2); extern int rn_ileave(int p1,int p2); extern int rn_after(int p1,int p2); extern int rn_newAnyName(void); extern int rn_newAnyNameExcept(int nc); extern int rn_newQName(int uri,int name); extern int rn_newNsName(int uri); extern int rn_newNameClassExcept(int nc1,int nc2); extern int rn_newNameClassChoice(int nc1,int nc2); extern int rn_newDatatype(int lib,int typ); extern int rn_i_ps(void); extern void rn_add_pskey(char *s); extern void rn_add_psval(char *s); extern void rn_end_ps(void); extern void rn_init(void); extern void rn_clear(void); extern void rn_compress(int *starts,int n); extern int rn_compress_last(int start); #endif --- NEW FILE: Makefile.bsd --- VERSION=1.7.8 CC=cc # optional features M_STATIC=0 M_FILL=0 DSL_SCM=0 DXL_EXC=0 EXPAT_H="<expat.h>" UNISTD_H="<unistd.h>" SCM_H="<scm/scm.h>" INC=-I/usr/local/include ${CPPFLAGS} LBL=-L/usr/local/lib ${LDFLAGS} DEF=\ -DM_STATIC=${M_STATIC} \ -DM_FILL=${M_FILL} \ -DEXPAT_H=${EXPAT_H} \ -DUNISTD_H=${UNISTD_H} \ -DRNV_VERSION="\"${VERSION}\"" \ -DARX_VERSION="\"${VERSION}\"" \ -DRVP_VERSION="\"${VERSION}\"" WARN=-Wall -Wstrict-prototypes -Wmissing-prototypes -Wcast-align OPT=-O -g CFLAGS=${INC} ${DEF} ${WARN} ${OPT} LFLAGS=${OPT} ${LBL} LIBEXPAT=-lexpat LIB_SCM=-lscm -lm \ `sh -c '[ -f /usr/lib/libdl.a ] && echo -ldl \ ; [ -f /usr/lib/libsocket.a ] && echo -lsocket \ '` LIB=${LIBEXPAT} .if ${DSL_SCM} DEF+=-DDSL_SCM=${DSL_SCM} -DSCM_H=${SCM_H} LIB+=${LIB_SCM} .endif .if ${DXL_EXC} DEF+=-DDXL_EXC=${DXL_EXC} .endif LIBRNVA=librnv.a LIBRNVSO=librnv.so LIBRNV=${LIBRNVA} SRC=\ ll.h \ erbit.h \ xcl.c \ arx.c \ rvp.c \ xsdck.c \ test.c \ ary.c ary.h \ rn.c rn.h \ rnc.c rnc.h \ rnd.c rnd.h \ rnl.c rnl.h \ rnv.c rnv.h \ rnx.c rnx.h \ drv.c drv.h \ xsd.c xsd.h \ xsd_tm.c xsd_tm.h \ dxl.c dxl.h \ dsl.c dsl.h \ sc.c sc.h \ ht.c ht.h \ er.c er.h \ u.c u.h \ xmlc.c xmlc.h \ s.c s.h \ m.c m.h \ rx.c rx.h \ rx_cls_u.c \ rx_cls_ranges.c OBJ=\ rn.o \ rnc.o \ rnd.o \ rnl.o \ rnv.o \ rnx.o \ drv.o \ ary.o \ xsd.o \ xsd_tm.o \ dxl.o \ dsl.o \ sc.o \ u.o \ ht.o \ er.o \ xmlc.o \ s.o \ m.o \ rx.o .SUFFIXES: .c .o .c.o: ${CC} ${CFLAGS} -c -o $@ $< all: rnv arx rvp xsdck test rnv: xcl.o ${LIBRNV} ${CC} ${LFLAGS} -o rnv xcl.o ${LIBRNV} ${LIB} arx: arx.o ${LIBRNV} ${CC} ${LFLAGS} -o arx arx.o ${LIBRNV} ${LIB} rvp: rvp.o ${LIBRNV} ${CC} ${LFLAGS} -o rvp rvp.o ${LIBRNV} ${LIB} xsdck: xsdck.o ${LIBRNV} ${CC} ${LFLAGS} -o xsdck xsdck.o ${LIBRNV} ${LIB} test: test.o ${LIBRNV} ${CC} ${LFLAGS} -o test test.o ${LIBRNV} ${LIB} ${LIBRNVA}: ${OBJ} ar rc $@ ${OBJ} ranlib ${LIBRNVA} ${LIBRNVSO}: ${OBJ} gcc -shared -o $@ ${OBJ} depend: ${SRC} makedepend -Y ${DEF} ${SRC} clean: -rm -f *.o tst/c/*.o *.a *.so rnv arx rnd_test *_test *.core *.gmon *.gprof rnv*.zip rnv.txt rnv.pdf rnv.html rnv.xml rnd_test: ${LIBRNV} tst/c/rnd_test.c ${CC} ${LFLAGS} -I. -o rnd_test tst/c/rnd_test.c ${LIBRNV} ${LIB}
Received on Monday, 3 August 2009 05:33:09 UTC