W3C home > Mailing lists > Public > public-html-commits@w3.org > August 2009

html5/tools/hv/rnv-1.7.8 Makefile,NONE,1.1 Makefile.bcc,NONE,1.1 Makefile.bsd,NONE,1.1 Makefile.gnu,NONE,1.1 arx.c,NONE,1.1 ary.c,NONE,1.1 ary.h,NONE,1.1 build_vms.com,NONE,1.1 changes.txt,NONE,1.1 drv.c,NONE,1.1 drv.h,NONE,1.1 dsl.c,NONE,1.1 dsl.h,NONE,1.1 dxl.c,NONE,1.1 dxl.h,NONE,1.1 er.c,NONE,1.1 er.h,NONE,1.1 erbit.h,NONE,1.1 ht.c,NONE,1.1 ht.h,NONE,1.1 license.txt,NONE,1.1 ll.h,NONE,1.1 m.c,NONE,1.1 m.h,NONE,1.1 readme.txt,NONE,1.1 rn.c,NONE,1.1 rn.h,NONE,1.1 rnc.c,NONE,1.1 rnc.h,NONE,1.1 rnd.c,NONE,1.1 rnd.h,NONE,1.1 rnl.c,NONE,1.1 rnl.h,NONE,1.1 rnv.c,NONE,1.1 rnv.h,NONE,1.1 rnx.c,NONE,1.1 rnx.h,NONE,1.1 rvp.c,NONE,1.1 rx.c,NONE,1.1 rx.h,NONE,1.1 rx_cls_ranges.c,NONE,1.1 rx_cls_u.c,NONE,1.1 s.c,NONE,1.1 s.h,NONE,1.1 sc.c,NONE,1.1 sc.h,NONE,1.1 src.txt,NONE,1.1 test.c,NONE,1.1 u.c,NONE,1.1 u.h,NONE,1.1 xcl.c,NONE,1.1 xmlc.c,NONE,1.1 xmlc.h,NONE,1.1 xsd.c,NONE,1.1 xsd.h,NONE,1.1 xsd_tm.c,NONE,1.1 xsd_tm.h,NONE,1.1 xsdck.c,NONE,1.1

From: Michael Smith via cvs-syncmail <cvsmail@w3.org>
Date: Mon, 03 Aug 2009 05:32:50 +0000
To: Message-Id: <E1MXqAF-0002yT-6N@lionel-hutz.w3.org>
Update of /sources/public/html5/tools/hv/rnv-1.7.8
In directory hutz:/tmp/cvs-serv11387

Added Files:
	Makefile Makefile.bcc Makefile.bsd Makefile.gnu arx.c ary.c 
	ary.h build_vms.com changes.txt drv.c drv.h dsl.c dsl.h dxl.c 
	dxl.h er.c er.h erbit.h ht.c ht.h license.txt ll.h m.c m.h 
	readme.txt rn.c rn.h rnc.c rnc.h rnd.c rnd.h rnl.c rnl.h rnv.c 
	rnv.h rnx.c rnx.h rvp.c rx.c rx.h rx_cls_ranges.c rx_cls_u.c 
	s.c s.h sc.c sc.h src.txt test.c u.c u.h xcl.c xmlc.c xmlc.h 
	xsd.c xsd.h xsd_tm.c xsd_tm.h xsdck.c 
Log Message:
initial add

--- NEW FILE: rx.h ---
/* $Id: rx.h,v 1.1 2009/08/03 05:32:47 mike Exp $ */

#include <stdarg.h>

#ifndef RX_H
#define RX_H

#define RX_ER_BADCH 0
#define RX_ER_UNFIN 1
#define RX_ER_NOLSQ 2
#define RX_ER_NORSQ 3
#define RX_ER_NOLCU 4
#define RX_ER_NORCU 5
#define RX_ER_NOLPA 6
#define RX_ER_NORPA 7
#define RX_ER_BADCL 8
#define RX_ER_NODGT 9
#define RX_ER_DNUOB 10
#define RX_ER_NOTRC 11

extern void (*rx_verror_handler)(int erno,va_list ap);
extern int rx_compact;

extern void rx_default_verror_handler(int erno,va_list ap);

extern void rx_init(void);
extern void rx_clear(void);

/* just compiles the expression to check the syntax */
extern int rx_check(char *rx);

/*
 returns positive value if the s[0..n] ~= rx, 0 if not, -1 on regex error;
 rx and s are in utf-8, rx is 0-terminated, s is n bytes long;
 rmatch replaces white space in s with 0x20,
 cmatch collapses white space.
 */
extern int rx_match(char *rx,char *s,int n);
extern int rx_rmatch(char *rx,char *s,int n);
extern int rx_cmatch(char *rx,char *s,int n);

#endif

--- NEW FILE: rnc.h ---
/* $Id: rnc.h,v 1.1 2009/08/03 05:32:47 mike Exp $ */

#include <stdarg.h>

#ifndef RNC_H
#define RNC_H 1

#define RNC_ER_IO 0
#define RNC_ER_UTF 10
#define RNC_ER_XESC 20
#define RNC_ER_LEXP 30
#define RNC_ER_LLIT 31
#define RNC_ER_LILL 32
#define RNC_ER_SEXP 40
#define RNC_ER_SILL 41
#define RNC_ER_NOTGR 42
#define RNC_ER_EXT 50
#define RNC_ER_DUPNS 51
#define RNC_ER_DUPDT 52
#define RNC_ER_DFLTNS 53
#define RNC_ER_DFLTDT 54
#define RNC_ER_NONS 55
#define RNC_ER_NODT 56
#define RNC_ER_NCEX 57
#define RNC_ER_2HEADS 58
#define RNC_ER_COMBINE 59
#define RNC_ER_OVRIDE 60
#define RNC_ER_EXPT 61
#define RNC_ER_INCONT 62
#define RNC_ER_NOSTART 70
#define RNC_ER_UNDEF 71

struct rnc_cym {
  char *s; int slen;
  int line,col;
  int sym;
};

struct rnc_source {
  int flags;
  char *fn; int fd;
  char *buf; int i,n;
  int complete;
  int line,col,prevline/*when error reported*/;
  int u,v,w; int nx;
  int cur;
  struct rnc_cym sym[2];
};

extern void (*rnc_verror_handler)(int er_no,va_list ap);

extern void rnc_default_verror_handler(int erno,va_list ap);

extern void rnc_init(void);
extern void rnc_clear(void);

extern int rnc_open(struct rnc_source *sp,char *fn);
extern int rnc_stropen(struct rnc_source *sp,char *fn,char *s,int len);
extern int rnc_bind(struct rnc_source *sp,char *fn,int fd);
extern int rnc_close(struct rnc_source *sp);

extern int rnc_parse(struct rnc_source *sp);

extern int rnc_errors(struct rnc_source *sp);

#endif

--- NEW FILE: erbit.h ---
/* $Id: erbit.h,v 1.1 2009/08/03 05:32:46 mike Exp $ */

#ifndef ERBIT_H
#define ERBIT_H 1

#define ERBIT_RNC 0x01000
#define ERBIT_RND 0x02000
#define ERBIT_RNL 0x04000
#define ERBIT_RX  0x08000
#define ERBIT_XSD 0x10000
#define ERBIT_DRV 0x20000
#define ERBIT_RNV 0x40000

#endif

--- NEW FILE: s.c ---
/* $Id: s.c,v 1.1 2009/08/03 05:32:48 mike Exp $ */

#include <string.h> /*strcpy,strlen*/
#include <assert.h>
#include "xmlc.h"
#include "m.h"
#include "s.h"

int s_cmpn(char *s1,char *s2,int n2) {
  char *end=s2+n2;
  for(;;++s1,++s2) {
    if(s2==end) return *s1;
    if(*s1=='\0') return -*s2;
    if(*s1!=*s2) return *s1-*s2;
  }
}

int s_tokcmpn(char *s1,char *s2,int n2) {
  char *end2=s2+n2;
 /* all white space characters are one byte long */
  while(xmlc_white_space(*s1)) ++s1;
  while(s2!=end2&&xmlc_white_space(*s2)) ++s2;
  for(;;) {
    if(s2==end2) {
      while(xmlc_white_space(*s1)) ++s1;
      return *s1;
    }
    if(*s1=='\0') {
      while(s2!=end2&&xmlc_white_space(*s2)) ++s2;
      return s2==end2?0:-*s2;
    }
    if(xmlc_white_space(*s1)&&xmlc_white_space(*s2)) {
      do ++s1; while(xmlc_white_space(*s1));
      do ++s2; while(s2!=end2&&xmlc_white_space(*s2));
    } else {
      if(*s1!=*s2) return *s1-*s2;
      ++s1; ++s2;
    }
  }
}

int s_hval(char *s) {
  int h=0;
  while(*s) h=h*31+*(s++);
  return h;
}

char *s_clone(char *s) {
  return strcpy((char*)m_alloc(strlen(s)+1,sizeof(char)),s);
}

char *s_abspath(char *r,char *b) {
  if(*r!='/') {
    char *c=b,*sep=(char*)0;
    for(;;) {if(!(*c)) break; if(*c++=='/') sep=c;}
    if(sep) {
      char *p=r,*q;
      while(*p++); q=p+(sep-b);
      do *(--q)=*(--p); while(p!=r);
      while(b!=sep) *r++=*b++;
    }
  }
  return r;
}

int s_tab(char *s,char *tab[],int size) {return s_ntab(s,strlen(s),tab,size);}
int s_ntab(char *s,int len,char *tab[],int size) {
  int n=0,m=size-1,i,cmp;
  for(;;) {
    if(n>m) return size;
    i=(n+m)/2;
    if((cmp=s_cmpn(tab[i],s,len))==0) return i; else {if(cmp>0) m=i-1; else n=i+1;}
  }
}

void s_test() {
  assert(s_cmpn("","",0)==0);
  assert(s_cmpn("/xyz","/xyz",4)==0);
  assert(s_cmpn("xyz","yz",2)<0);
  assert(s_cmpn("xyz","xxyz",4)>0);

  { char r[256];
    s_abspath(strcpy(r,"/x"),"/y");
    assert(strcmp(r,"/x")==0);
    s_abspath(strcpy(r,"x"),"/y");
    assert(strcmp(r,"/x")==0);
    s_abspath(strcpy(r,"x"),"/y/");
    assert(strcmp(r,"/y/x")==0);
    s_abspath(strcpy(r,"x"),"y/");
    assert(strcmp(r,"y/x")==0);
    s_abspath(strcpy(r,"x"),"y");
    assert(strcmp(r,"x")==0);
    s_abspath(strcpy(r,""),"y");
    assert(strcmp(r,"")==0);
  }

  assert(s_tokcmpn("","",0)==0);
  assert(s_tokcmpn(""," ",1)==0);
  assert(s_tokcmpn("A","A",1)==0);
  assert(s_tokcmpn(" A   B","A B  ",5)==0);
  assert(s_tokcmpn("AB","A B",3)>0);
  assert(s_tokcmpn("","A",1)<0);
}

--- NEW FILE: er.h ---
/* $Id: er.h,v 1.1 2009/08/03 05:32:46 mike Exp $ */

#ifndef ER_H
#define ER_H 1

#include <stdarg.h>

extern int (*er_printf)(char *format,...);
extern int (*er_vprintf)(char *format,va_list ap);

extern int er_default_printf(char *format,...);
extern int er_default_vprintf(char *format,va_list ap);

#endif

--- NEW FILE: rx.c ---
/* $Id: rx.c,v 1.1 2009/08/03 05:32:47 mike Exp $ */

#include <string.h> /*strlen,strcpy,strcmp*/
#include <assert.h>
#include "u.h" /*u_get,u_strlen*/
#include "xmlc.h"
#include "m.h"
#include "s.h"
#include "ht.h"
#include "ll.h"
#include "er.h"
#include "rx.h"

#define LEN_P RX_LEN_P
#define PRIME_P RX_PRIME_P
#define LIM_P RX_LIM_P
#define LEN_2 RX_LEN_2
#define PRIME_2 RX_PRIME_2
#define LEN_R RX_LEN_R
#define PRIME_R RX_PRIME_R

#define R_AVG_SIZE 16

/* it is good to have few patterns when deltas are memoized */
#define P_ERROR 0
#define P_NOT_ALLOWED  1
#define P_EMPTY 2
#define P_CHOICE 3
#define P_GROUP 4
#define P_ONE_OR_MORE 5 /*+*/
#define P_EXCEPT 6 /*single-single*/
#define P_RANGE 7 /*lower,upper inclusive*/
#define P_CLASS 8 /*complement is .-*/
#define P_ANY 9
#define P_CHAR 10

#define P_SIZE 3
#define P_AVG_SIZE 2

static int p_size[]={1,1,1,3,3,2,3,3,2,1,2};

#define P_TYP(i) (pattern[i]&0xF)
#define P_IS(i,x)  (x==P_TYP(i))
#define P_CHK(i,x)  assert(P_IS(i,x))

#define P_unop(TYP,p,p1) P_CHK(p,TYP); p1=pattern[p+1]
#define P_binop(TYP,p,p1,p2) P_unop(TYP,p,p1); p2=pattern[p+2]
#define NotAllowed(p) P_CHK(p,P_NotAllowed)
#define Empty(p) P_CHK(p,P_Empty)
#define Any(p) P_CHK(p,P_Any)
#define Choice(p,p1,p2) P_binop(P_CHOICE,p,p1,p2)
#define Group(p,p1,p2) P_binop(P_GROUP,p,p1,p2)
#define OneOrMore(p,p1) P_unop(P_ONE_OR_MORE,p,p1)
#define Except(p,p1,p2) P_binop(P_EXCEPT,p,p1,p2)
#define Range(p,cf,cl) P_binop(P_RANGE,p,cf,cl)
#define Class(p,cn) P_unop(P_CLASS,p,cn)
#define Char(p,c) P_unop(P_CHAR,p,c)

#define P_NUL 0x100

#define setNullable(x) if(x) pattern[i_p]|=P_NUL
#define nullable(p) (pattern[p]&P_NUL)

int rx_compact=0;
/* 'compact' in drv and rx do different things.
 In drv, it limits the size of the table of memoized deltas. In rx, it limits the size
 of the buffer for cached regular expressions; memoized deltas are always limited by LIM_M,
 since the whole repertoire of unicode characters can blow up the buffer.
 */

static char *regex;
static int *pattern;
static int (*r2p)[2];
static struct hashtable ht_r,ht_p,ht_2;
static int i_p,len_p,i_r,len_r,i_2,len_2;
static int empty,notAllowed,any;

static int accept_p(void) {
  int j;
  if((j=ht_get(&ht_p,i_p))==-1) {
    ht_put(&ht_p,j=i_p);
    i_p+=p_size[P_TYP(i_p)];
    if(i_p+P_SIZE>len_p) pattern=(int*)m_stretch(pattern,len_p=2*(i_p+P_SIZE),i_p,sizeof(int));
  }
  return j;
}

#define P_NEW(x) (pattern[i_p]=x)

#define P_newunop(TYP,p1) P_NEW(TYP); pattern[i_p+1]=p1
#define P_newbinop(TYP,p1,p2) P_newunop(TYP,p1); pattern[i_p+2]=p2
static int newNotAllowed(void) {P_NEW(P_NOT_ALLOWED); return accept_p();}
static int newEmpty(void) {P_NEW(P_EMPTY); setNullable(1); return accept_p();}
static int newAny(void) {P_NEW(P_ANY); return accept_p();}
static int newChoice(int p1,int p2) {P_newbinop(P_CHOICE,p1,p2); setNullable(nullable(p1)||nullable(p2)); return accept_p();}
static int newGroup(int p1,int p2) {P_newbinop(P_GROUP,p1,p2); setNullable(nullable(p1)&&nullable(p2)); return accept_p();}
static int newOneOrMore(int p1) {P_newunop(P_ONE_OR_MORE,p1); setNullable(nullable(p1)); return accept_p();}
static int newExcept(int p1,int p2) {P_newbinop(P_EXCEPT,p1,p2); return accept_p();}
static int newRange(int cf,int cl) {P_newbinop(P_RANGE,cf,cl); return accept_p();}
static int newClass(int cn) {P_newunop(P_CLASS,cn); return accept_p();}
static int newChar(int c) {P_newunop(P_CHAR,c); return accept_p();}

static int one_or_more(int p) {
  if(P_IS(p,P_EMPTY)) return p;
  if(P_IS(p,P_NOT_ALLOWED)) return p;
  return newOneOrMore(p);
}

static int group(int p1,int p2) {
  if(P_IS(p1,P_NOT_ALLOWED)) return p1;
  if(P_IS(p2,P_NOT_ALLOWED)) return p2;
  if(P_IS(p1,P_EMPTY)) return p2;
  if(P_IS(p2,P_EMPTY)) return p1;
  return newGroup(p1,p2);
}

static int samechoice(int p1,int p2) {
  if(P_IS(p1,P_CHOICE)) {
    int p11,p12; Choice(p1,p11,p12);
    return p12==p2||samechoice(p11,p2);
  } else return p1==p2;
}

static int choice(int p1,int p2) {
  if(P_IS(p1,P_NOT_ALLOWED)) return p2;
  if(P_IS(p2,P_NOT_ALLOWED)) return p1;
  if(P_IS(p2,P_CHOICE)) {
    int p21,p22; Choice(p2,p21,p22);
    p1=choice(p1,p21); return choice(p1,p22);
  }
  if(samechoice(p1,p2)) return p1;
  if(nullable(p1) && (P_IS(p2,P_EMPTY))) return p1;
  if(nullable(p2) && (P_IS(p1,P_EMPTY))) return p2;
  return newChoice(p1,p2);
}

static int cls(int cn) {
  if(cn<0) return newExcept(any,newClass(-cn));
  if(cn==0) return notAllowed;
  return newClass(cn);
}

static int equal_r(int r1,int r2) {return strcmp(regex+r1,regex+r2)==0;}
static int hash_r(int r) {return s_hval(regex+r);}

static int equal_p(int p1,int p2) {
  int *pp1=pattern+p1,*pp2=pattern+p2;
  if(P_TYP(p1)!=P_TYP(p2)) return 0;
  switch(p_size[P_TYP(p1)]) {
  case 3: if(pp1[2]!=pp2[2]) return 0;
  case 2: if(pp1[1]!=pp2[1]) return 0;
  case 1: return 1;
  default: assert(0);
  }
  return 0;
}
static int hash_p(int p) {
  int *pp=pattern+p; int h=0;
  switch(p_size[P_TYP(p)]) {
  case 1: h=pp[0]&0xF; break;
  case 2: h=(pp[0]&0xF)|(pp[1]<<4); break;
  case 3: h=(pp[0]&0xF)|((pp[1]^pp[2])<<4); break;
  default: assert(0);
  }
  return h*PRIME_P;
}

static int equal_2(int x1,int x2) {return r2p[x1][0]==r2p[x2][0];}
static int hash_2(int x) {return r2p[x][0]*PRIME_2;}

static int add_r(char *rx) {
  int len=strlen(rx)+1;
  if(i_r+len>len_r) regex=(char*)m_stretch(regex,len_r=2*(i_r+len),i_r,sizeof(char));
  strcpy(regex+i_r,rx);
  return len;
}

#define ERRPOS

#define err(msg) (*er_vprintf)(msg" in \"%s\" at offset %i\n",ap)
void rx_default_verror_handler(int erno,va_list ap) {
  (*er_printf)("regular expressions: ");
  switch(erno) {
  case RX_ER_BADCH: err("bad character"); break;
  case RX_ER_UNFIN: err("unfinished expression"); break;
  case RX_ER_NOLSQ: err("'[' expected"); break;
  case RX_ER_NORSQ: err("']' expected"); break;
  case RX_ER_NOLCU: err("'{' expected"); break;
  case RX_ER_NORCU: err("'}' expected"); break;
  case RX_ER_NOLPA: err("'(' expected"); break;
  case RX_ER_NORPA: err("')' expected"); break;
  case RX_ER_BADCL: err("unknown class"); break;
  case RX_ER_NODGT: err("digit expected"); break;
  case RX_ER_DNUOB: err("reversed bounds"); break;
  case RX_ER_NOTRC: err("range or class expected"); break;
  default: assert(0);
  }
}

void (*rx_verror_handler)(int erno,va_list ap)=&rx_default_verror_handler;

static void error_handler(int erno,...) {
  va_list ap; va_start(ap,erno); (*rx_verror_handler)(erno,ap); va_end(ap);
}

#define LEN_M RX_LEN_M
#define PRIME_M RX_PRIME_M
#define LIM_M RX_LIM_M

#define M_SIZE 3

#define M_SET(p) memo[i_m][M_SIZE-1]=p
#define M_RET(m) memo[m][M_SIZE-1]

static int (*memo)[M_SIZE];
static int i_m,len_m;
static struct hashtable ht_m;

static int new_memo(int p,int c) {
  int *me=memo[i_m];
  ht_deli(&ht_m,i_m);
  me[0]=p; me[1]=c;
  return ht_get(&ht_m,i_m);
}

static int equal_m(int m1,int m2) {
  int *me1=memo[m1],*me2=memo[m2];
  return (me1[0]==me2[0])&&(me1[1]==me2[1]);
}
static int hash_m(int m) {
  int *me=memo[m];
  return (me[0]^me[1])*PRIME_M;
}

static void accept_m(void) {
  if(ht_get(&ht_m,i_m)!=-1) ht_del(&ht_m,i_m);
  ht_put(&ht_m,i_m++);
  if(i_m>=LIM_M) i_m=0;
  if(i_m==len_m) memo=(int(*)[M_SIZE])m_stretch(memo,len_m=i_m*2,i_m,sizeof(int[M_SIZE]));
}

static void windup(void);
static int initialized=0;
void rx_init(void) {
  if(!initialized) { initialized=1;
    pattern=(int *)m_alloc(len_p=P_AVG_SIZE*LEN_P,sizeof(int));
    r2p=(int (*)[2])m_alloc(len_2=LEN_2,sizeof(int[2]));
    regex=(char*)m_alloc(len_r=R_AVG_SIZE*LEN_R,sizeof(char));
    memo=(int (*)[M_SIZE])m_alloc(len_m=LEN_M,sizeof(int[M_SIZE]));

    ht_init(&ht_p,LEN_P,&hash_p,&equal_p);
    ht_init(&ht_2,LEN_2,&hash_2,&equal_2);
    ht_init(&ht_r,LEN_R,&hash_r,&equal_r);
    ht_init(&ht_m,LEN_M,&hash_m,&equal_m);

    windup();
  }
}

void rx_clear(void) {
  ht_clear(&ht_p); ht_clear(&ht_2); ht_clear(&ht_r); ht_clear(&ht_m);
  windup();
}

static void windup(void) {
  i_p=i_r=i_2=i_m=0;
  pattern[0]=P_ERROR;  accept_p();
  empty=newEmpty(); notAllowed=newNotAllowed(); any=newAny();
}

#define SYM_END 0
#define SYM_CLS 1
#define SYM_ESC 2
#define SYM_CHR 3

static int r0,ri,sym,val,errors;

static void error(int erno) {
  if(!errors) error_handler(erno,regex+r0,u_strlen(regex+r0)-u_strlen(regex+ri));
  ++errors;
}

#include "rx_cls_u.c"

static int chclass(void) {
  int u,cl,rj;
  ri+=u_get(&u,regex+ri);
  if(u=='\0') {--ri; error(RX_ER_NOLCU); return 0;}
  if(u!='{') {error(RX_ER_NOLCU); return 0;}
  rj=ri;
  for(;;) {
    if(regex[rj]=='\0') {ri=rj; error(RX_ER_NORCU); return 0;}
    if(regex[rj]=='}') {
      if((cl=s_ntab(regex+ri,rj-ri,clstab,NUM_CLS_U))==NUM_CLS_U) {error(RX_ER_BADCL); cl=0;}
      ri=rj+1;
      return cl;
    }
    ++rj;
  }
}

#define CLS_NL (NUM_CLS_U+1)
#define CLS_S (NUM_CLS_U+2)
#define CLS_I (NUM_CLS_U+3)
#define CLS_C (NUM_CLS_U+4)
#define CLS_W (NUM_CLS_U+5)
#define NUM_CLS (NUM_CLS_U+6)

static void getsym(void) {
  int u;
  if(regex[ri]=='\0') sym=SYM_END; else {
    ri+=u_get(&u,regex+ri);
    if(u=='\\') {
      ri+=u_get(&u,regex+ri);
      switch(u) {
      case '\0': --ri; error(RX_ER_UNFIN); sym=SYM_END; break;
      case 'p': sym=SYM_CLS; val=chclass(); break;
      case 'P': sym=SYM_CLS; val=-chclass(); break;
      case 's': sym=SYM_CLS; val=CLS_S; break;
      case 'S': sym=SYM_CLS; val=-CLS_S; break;
      case 'i': sym=SYM_CLS; val=CLS_I; break;
      case 'I': sym=SYM_CLS; val=-CLS_I; break;
      case 'c': sym=SYM_CLS; val=CLS_C; break;
      case 'C': sym=SYM_CLS; val=-CLS_C; break;
      case 'd': sym=SYM_CLS; val=CLS_U_Nd; break;
      case 'D': sym=SYM_CLS; val=-CLS_U_Nd; break;
      case 'w': sym=SYM_CLS; val=CLS_W; break;
      case 'W': sym=SYM_CLS; val=-CLS_W; break;
      case 'n': sym=SYM_ESC; val=0xA; break;
      case 'r': sym=SYM_ESC; val=0xD; break;
      case 't': sym=SYM_ESC; val=0x9; break;
      case '\\': case '|': case '.': case '-': case '^': case '?': case '*': case '+':
      case '{': case '}': case '[': case ']': case '(': case ')':
	sym=SYM_ESC; val=u; break;
      default: error(RX_ER_BADCH); sym=SYM_ESC; val=u; break;
      }
    } else {
      switch(u) {
      case '.': sym=SYM_CLS; val=-CLS_NL; break;
      default: sym=SYM_CHR; val=u; break;
      }
    }
  }
}

static void chk_get(int v,int erno) {if(sym!=SYM_CHR||val!=v) error(erno); getsym();}


#define chkrch(val) if((val)=='['||(val)==']'||(val)=='-') error(RX_ER_NOTRC)

static int chgroup(void) {
  int p=notAllowed,c;
  for(;;) {
    switch(sym) {
    case SYM_CHR: chkrch(val);
    case SYM_ESC: c=val; getsym();
      if(sym==SYM_CHR&&val=='-') {
	if(regex[ri]=='[') {
	  p=choice(p,newChar(c));
	  goto END_OF_GROUP;
	} else {
	  getsym();
	  switch(sym) {
	  case SYM_CHR: chkrch(val);
	  case SYM_ESC: p=choice(p,newRange(c,val)); getsym(); break;
	  default: error(RX_ER_BADCH); getsym(); break;
	  }
	}
      } else {
	p=choice(p,newChar(c));
      }
      break;
    case SYM_CLS: p=choice(p,cls(val)); getsym(); break;
    case SYM_END: error(RX_ER_NORSQ); goto END_OF_GROUP;
    default: assert(0);
    }
    if(sym==SYM_CHR&&(val==']'||val=='-')) goto END_OF_GROUP;
  }
  END_OF_GROUP:;
  return p;
}

static int chexpr(void) {
  int p;
  if(sym==SYM_CHR&&val=='^') { getsym();
    p=newExcept(any,chgroup());
  } else {
    p=chgroup();
  }
  if(sym==SYM_CHR&&val=='-') { getsym();
    chk_get('[',RX_ER_NOLSQ); p=newExcept(p,chexpr()); chk_get(']',RX_ER_NORSQ);
  }
  return p;
}

static int expression(void);
static int atom(void) {
  int p=0;
  switch(sym) {
  case SYM_CHR:
    switch(val) {
    case '[': getsym(); p=chexpr(); chk_get(']',RX_ER_NORSQ); break;
    case '(': getsym(); p=expression(); chk_get(')',RX_ER_NORPA); break;
    case '{': case '?': case '*': case '+': case '|':
    case ')': case ']': case '}': error(RX_ER_BADCH); getsym(); break;
    default: p=newChar(val); getsym(); break;
    }
    break;
  case SYM_ESC: p=newChar(val); getsym(); break;
  case SYM_CLS: p=cls(val); getsym(); break;
  default: error(RX_ER_BADCH); getsym(); break;
  }
  return p;
}

static int number(void) {
  int n=0,m;
  for(;;) {
    if(sym!=SYM_CHR) goto END_OF_DIGITS;
    switch(val) {
    case '0': m=0; break;
    case '1': m=1; break;
    case '2': m=2; break;
    case '3': m=3; break;
    case '4': m=4; break;
    case '5': m=5; break;
    case '6': m=6; break;
    case '7': m=7; break;
    case '8': m=8; break;
    case '9': m=9; break;
    default: goto END_OF_DIGITS;
    }
    n=n*10+m;
    getsym();
  }
  END_OF_DIGITS:;
  return n;
}

static int quantifier(int p0) {
  int p=empty,n,n0;
  n=n0=number();
  while(n--) p=group(p,p0);
  if(sym==SYM_CHR) {
    if(val==',') {
      getsym();
      if(sym==SYM_CHR && val=='}') {
	p=group(p,choice(empty,one_or_more(p0)));
      } else {
	n=number()-n0; if(n<0) {error(RX_ER_DNUOB); n=0;}
	while(n--) p=group(p,choice(empty,p0));
      }
    }
  } else error(RX_ER_NODGT);
  return p;
}

static int piece(void) {
  int p;
  p=atom();
  if(sym==SYM_CHR) {
    switch(val) {
    case '{': getsym(); p=quantifier(p); chk_get('}',RX_ER_NOLCU); break;
    case '?': getsym(); p=choice(empty,p); break;
    case '*': getsym(); p=choice(empty,one_or_more(p)); break;
    case '+': getsym(); p=one_or_more(p); break;
    default: break;
    }
  }
  return p;
}

static int branch(void) {
  int p;
  p=empty;
  while(!(sym==SYM_END||(sym==SYM_CHR&&(val=='|'||val==')')))) p=group(p,piece());
  return p;
}

static int expression(void) {
  int p;
  p=branch();
  while(sym==SYM_CHR&&val=='|') {
    getsym();
    p=choice(p,branch());
  }
  return p;
}

static void bind(int r) {
  r0=ri=r; sym=-1; errors=0;
  getsym();
}

static int compile(char *rx) {
  int r=0,p=0,d_r;
  d_r=add_r(rx);
  if((r=ht_get(&ht_r,i_r))==-1) {
    if(rx_compact&&i_p>=P_AVG_SIZE*LIM_P) {rx_clear(); d_r=add_r(rx);}
    ht_put(&ht_r,r=i_r);
    i_r+=d_r;
    bind(r); p=expression(); if(sym!=SYM_END) error(RX_ER_BADCH);
    r2p[i_2][0]=r; r2p[i_2][1]=p;
    ht_put(&ht_2,i_2++);
    if(i_2==len_2) r2p=(int(*)[2])m_stretch(r2p,len_2=2*i_2,i_2,sizeof(int[2]));
  } else {
    r2p[i_2][0]=r;
    p=r2p[ht_get(&ht_2,i_2)][1];
  }
  return p;
}

#include "rx_cls_ranges.c"

static int in_class(int c,int cn) {
  switch(cn) {
  case 0: return 0;
  case CLS_U_C: return in_class(c,CLS_U_Cc)||in_class(c,CLS_U_Cf)||in_class(c,CLS_U_Co);
  case CLS_U_Cc: return u_in_ranges(c,CcRanges,sizeof(CcRanges)/sizeof(int[2]));
  case CLS_U_Cf: return u_in_ranges(c,CfRanges,sizeof(CfRanges)/sizeof(int[2]));
  case CLS_U_Co: return u_in_ranges(c,CoRanges,sizeof(CoRanges)/sizeof(int[2]));
  case CLS_U_IsAlphabeticPresentationForms: return u_in_ranges(c,IsAlphabeticPresentationFormsRanges,sizeof(IsAlphabeticPresentationFormsRanges)/sizeof(int[2]));
  case CLS_U_IsArabic: return u_in_ranges(c,IsArabicRanges,sizeof(IsArabicRanges)/sizeof(int[2]));
  case CLS_U_IsArabicPresentationForms_A: return u_in_ranges(c,IsArabicPresentationForms_ARanges,sizeof(IsArabicPresentationForms_ARanges)/sizeof(int[2]));
  case CLS_U_IsArabicPresentationForms_B: return u_in_ranges(c,IsArabicPresentationForms_BRanges,sizeof(IsArabicPresentationForms_BRanges)/sizeof(int[2]));
  case CLS_U_IsArmenian: return u_in_ranges(c,IsArmenianRanges,sizeof(IsArmenianRanges)/sizeof(int[2]));
  case CLS_U_IsArrows: return u_in_ranges(c,IsArrowsRanges,sizeof(IsArrowsRanges)/sizeof(int[2]));
  case CLS_U_IsBasicLatin: return u_in_ranges(c,IsBasicLatinRanges,sizeof(IsBasicLatinRanges)/sizeof(int[2]));
  case CLS_U_IsBengali: return u_in_ranges(c,IsBengaliRanges,sizeof(IsBengaliRanges)/sizeof(int[2]));
  case CLS_U_IsBlockElements: return u_in_ranges(c,IsBlockElementsRanges,sizeof(IsBlockElementsRanges)/sizeof(int[2]));
  case CLS_U_IsBopomofo: return u_in_ranges(c,IsBopomofoRanges,sizeof(IsBopomofoRanges)/sizeof(int[2]));
  case CLS_U_IsBopomofoExtended: return u_in_ranges(c,IsBopomofoExtendedRanges,sizeof(IsBopomofoExtendedRanges)/sizeof(int[2]));
  case CLS_U_IsBoxDrawing: return u_in_ranges(c,IsBoxDrawingRanges,sizeof(IsBoxDrawingRanges)/sizeof(int[2]));
  case CLS_U_IsBraillePatterns: return u_in_ranges(c,IsBraillePatternsRanges,sizeof(IsBraillePatternsRanges)/sizeof(int[2]));
  case CLS_U_IsByzantineMusicalSymbols: return u_in_ranges(c,IsByzantineMusicalSymbolsRanges,sizeof(IsByzantineMusicalSymbolsRanges)/sizeof(int[2]));
  case CLS_U_IsCJKCompatibility: return u_in_ranges(c,IsCJKCompatibilityRanges,sizeof(IsCJKCompatibilityRanges)/sizeof(int[2]));
  case CLS_U_IsCJKCompatibilityForms: return u_in_ranges(c,IsCJKCompatibilityFormsRanges,sizeof(IsCJKCompatibilityFormsRanges)/sizeof(int[2]));
  case CLS_U_IsCJKCompatibilityIdeographs: return u_in_ranges(c,IsCJKCompatibilityIdeographsRanges,sizeof(IsCJKCompatibilityIdeographsRanges)/sizeof(int[2]));
  case CLS_U_IsCJKCompatibilityIdeographsSupplement: return u_in_ranges(c,IsCJKCompatibilityIdeographsSupplementRanges,sizeof(IsCJKCompatibilityIdeographsSupplementRanges)/sizeof(int[2]));
  case CLS_U_IsCJKRadicalsSupplement: return u_in_ranges(c,IsCJKRadicalsSupplementRanges,sizeof(IsCJKRadicalsSupplementRanges)/sizeof(int[2]));
  case CLS_U_IsCJKSymbolsandPunctuation: return u_in_ranges(c,IsCJKSymbolsandPunctuationRanges,sizeof(IsCJKSymbolsandPunctuationRanges)/sizeof(int[2]));
  case CLS_U_IsCJKUnifiedIdeographs: return u_in_ranges(c,IsCJKUnifiedIdeographsRanges,sizeof(IsCJKUnifiedIdeographsRanges)/sizeof(int[2]));
  case CLS_U_IsCJKUnifiedIdeographsExtensionA: return u_in_ranges(c,IsCJKUnifiedIdeographsExtensionARanges,sizeof(IsCJKUnifiedIdeographsExtensionARanges)/sizeof(int[2]));
  case CLS_U_IsCJKUnifiedIdeographsExtensionB: return u_in_ranges(c,IsCJKUnifiedIdeographsExtensionBRanges,sizeof(IsCJKUnifiedIdeographsExtensionBRanges)/sizeof(int[2]));
  case CLS_U_IsCherokee: return u_in_ranges(c,IsCherokeeRanges,sizeof(IsCherokeeRanges)/sizeof(int[2]));
  case CLS_U_IsCombiningDiacriticalMarks: return u_in_ranges(c,IsCombiningDiacriticalMarksRanges,sizeof(IsCombiningDiacriticalMarksRanges)/sizeof(int[2]));
  case CLS_U_IsCombiningHalfMarks: return u_in_ranges(c,IsCombiningHalfMarksRanges,sizeof(IsCombiningHalfMarksRanges)/sizeof(int[2]));
  case CLS_U_IsCombiningMarksforSymbols: return u_in_ranges(c,IsCombiningMarksforSymbolsRanges,sizeof(IsCombiningMarksforSymbolsRanges)/sizeof(int[2]));
  case CLS_U_IsControlPictures: return u_in_ranges(c,IsControlPicturesRanges,sizeof(IsControlPicturesRanges)/sizeof(int[2]));
  case CLS_U_IsCurrencySymbols: return u_in_ranges(c,IsCurrencySymbolsRanges,sizeof(IsCurrencySymbolsRanges)/sizeof(int[2]));
  case CLS_U_IsCyrillic: return u_in_ranges(c,IsCyrillicRanges,sizeof(IsCyrillicRanges)/sizeof(int[2]));
  case CLS_U_IsDeseret: return u_in_ranges(c,IsDeseretRanges,sizeof(IsDeseretRanges)/sizeof(int[2]));
  case CLS_U_IsDevanagari: return u_in_ranges(c,IsDevanagariRanges,sizeof(IsDevanagariRanges)/sizeof(int[2]));
  case CLS_U_IsDingbats: return u_in_ranges(c,IsDingbatsRanges,sizeof(IsDingbatsRanges)/sizeof(int[2]));
  case CLS_U_IsEnclosedAlphanumerics: return u_in_ranges(c,IsEnclosedAlphanumericsRanges,sizeof(IsEnclosedAlphanumericsRanges)/sizeof(int[2]));
  case CLS_U_IsEnclosedCJKLettersandMonths: return u_in_ranges(c,IsEnclosedCJKLettersandMonthsRanges,sizeof(IsEnclosedCJKLettersandMonthsRanges)/sizeof(int[2]));
  case CLS_U_IsEthiopic: return u_in_ranges(c,IsEthiopicRanges,sizeof(IsEthiopicRanges)/sizeof(int[2]));
  case CLS_U_IsGeneralPunctuation: return u_in_ranges(c,IsGeneralPunctuationRanges,sizeof(IsGeneralPunctuationRanges)/sizeof(int[2]));
  case CLS_U_IsGeometricShapes: return u_in_ranges(c,IsGeometricShapesRanges,sizeof(IsGeometricShapesRanges)/sizeof(int[2]));
  case CLS_U_IsGeorgian: return u_in_ranges(c,IsGeorgianRanges,sizeof(IsGeorgianRanges)/sizeof(int[2]));
  case CLS_U_IsGothic: return u_in_ranges(c,IsGothicRanges,sizeof(IsGothicRanges)/sizeof(int[2]));
  case CLS_U_IsGreek: return u_in_ranges(c,IsGreekRanges,sizeof(IsGreekRanges)/sizeof(int[2]));
  case CLS_U_IsGreekExtended: return u_in_ranges(c,IsGreekExtendedRanges,sizeof(IsGreekExtendedRanges)/sizeof(int[2]));
  case CLS_U_IsGujarati: return u_in_ranges(c,IsGujaratiRanges,sizeof(IsGujaratiRanges)/sizeof(int[2]));
  case CLS_U_IsGurmukhi: return u_in_ranges(c,IsGurmukhiRanges,sizeof(IsGurmukhiRanges)/sizeof(int[2]));
  case CLS_U_IsHalfwidthandFullwidthForms: return u_in_ranges(c,IsHalfwidthandFullwidthFormsRanges,sizeof(IsHalfwidthandFullwidthFormsRanges)/sizeof(int[2]));
  case CLS_U_IsHangulCompatibilityJamo: return u_in_ranges(c,IsHangulCompatibilityJamoRanges,sizeof(IsHangulCompatibilityJamoRanges)/sizeof(int[2]));
  case CLS_U_IsHangulJamo: return u_in_ranges(c,IsHangulJamoRanges,sizeof(IsHangulJamoRanges)/sizeof(int[2]));
  case CLS_U_IsHangulSyllables: return u_in_ranges(c,IsHangulSyllablesRanges,sizeof(IsHangulSyllablesRanges)/sizeof(int[2]));
  case CLS_U_IsHebrew: return u_in_ranges(c,IsHebrewRanges,sizeof(IsHebrewRanges)/sizeof(int[2]));
  case CLS_U_IsHiragana: return u_in_ranges(c,IsHiraganaRanges,sizeof(IsHiraganaRanges)/sizeof(int[2]));
  case CLS_U_IsIPAExtensions: return u_in_ranges(c,IsIPAExtensionsRanges,sizeof(IsIPAExtensionsRanges)/sizeof(int[2]));
  case CLS_U_IsIdeographicDescriptionCharacters: return u_in_ranges(c,IsIdeographicDescriptionCharactersRanges,sizeof(IsIdeographicDescriptionCharactersRanges)/sizeof(int[2]));
  case CLS_U_IsKanbun: return u_in_ranges(c,IsKanbunRanges,sizeof(IsKanbunRanges)/sizeof(int[2]));
  case CLS_U_IsKangxiRadicals: return u_in_ranges(c,IsKangxiRadicalsRanges,sizeof(IsKangxiRadicalsRanges)/sizeof(int[2]));
  case CLS_U_IsKannada: return u_in_ranges(c,IsKannadaRanges,sizeof(IsKannadaRanges)/sizeof(int[2]));
  case CLS_U_IsKatakana: return u_in_ranges(c,IsKatakanaRanges,sizeof(IsKatakanaRanges)/sizeof(int[2]));
  case CLS_U_IsKhmer: return u_in_ranges(c,IsKhmerRanges,sizeof(IsKhmerRanges)/sizeof(int[2]));
  case CLS_U_IsLao: return u_in_ranges(c,IsLaoRanges,sizeof(IsLaoRanges)/sizeof(int[2]));
  case CLS_U_IsLatin_1Supplement: return u_in_ranges(c,IsLatin_1SupplementRanges,sizeof(IsLatin_1SupplementRanges)/sizeof(int[2]));
  case CLS_U_IsLatinExtended_A: return u_in_ranges(c,IsLatinExtended_ARanges,sizeof(IsLatinExtended_ARanges)/sizeof(int[2]));
  case CLS_U_IsLatinExtended_B: return u_in_ranges(c,IsLatinExtended_BRanges,sizeof(IsLatinExtended_BRanges)/sizeof(int[2]));
  case CLS_U_IsLatinExtendedAdditional: return u_in_ranges(c,IsLatinExtendedAdditionalRanges,sizeof(IsLatinExtendedAdditionalRanges)/sizeof(int[2]));
  case CLS_U_IsLetterlikeSymbols: return u_in_ranges(c,IsLetterlikeSymbolsRanges,sizeof(IsLetterlikeSymbolsRanges)/sizeof(int[2]));
  case CLS_U_IsMalayalam: return u_in_ranges(c,IsMalayalamRanges,sizeof(IsMalayalamRanges)/sizeof(int[2]));
  case CLS_U_IsMathematicalAlphanumericSymbols: return u_in_ranges(c,IsMathematicalAlphanumericSymbolsRanges,sizeof(IsMathematicalAlphanumericSymbolsRanges)/sizeof(int[2]));
  case CLS_U_IsMathematicalOperators: return u_in_ranges(c,IsMathematicalOperatorsRanges,sizeof(IsMathematicalOperatorsRanges)/sizeof(int[2]));
  case CLS_U_IsMiscellaneousSymbols: return u_in_ranges(c,IsMiscellaneousSymbolsRanges,sizeof(IsMiscellaneousSymbolsRanges)/sizeof(int[2]));
  case CLS_U_IsMiscellaneousTechnical: return u_in_ranges(c,IsMiscellaneousTechnicalRanges,sizeof(IsMiscellaneousTechnicalRanges)/sizeof(int[2]));
  case CLS_U_IsMongolian: return u_in_ranges(c,IsMongolianRanges,sizeof(IsMongolianRanges)/sizeof(int[2]));
  case CLS_U_IsMusicalSymbols: return u_in_ranges(c,IsMusicalSymbolsRanges,sizeof(IsMusicalSymbolsRanges)/sizeof(int[2]));
  case CLS_U_IsMyanmar: return u_in_ranges(c,IsMyanmarRanges,sizeof(IsMyanmarRanges)/sizeof(int[2]));
  case CLS_U_IsNumberForms: return u_in_ranges(c,IsNumberFormsRanges,sizeof(IsNumberFormsRanges)/sizeof(int[2]));
  case CLS_U_IsOgham: return u_in_ranges(c,IsOghamRanges,sizeof(IsOghamRanges)/sizeof(int[2]));
  case CLS_U_IsOldItalic: return u_in_ranges(c,IsOldItalicRanges,sizeof(IsOldItalicRanges)/sizeof(int[2]));
  case CLS_U_IsOpticalCharacterRecognition: return u_in_ranges(c,IsOpticalCharacterRecognitionRanges,sizeof(IsOpticalCharacterRecognitionRanges)/sizeof(int[2]));
  case CLS_U_IsOriya: return u_in_ranges(c,IsOriyaRanges,sizeof(IsOriyaRanges)/sizeof(int[2]));
  case CLS_U_IsPrivateUse: return u_in_ranges(c,IsPrivateUseRanges,sizeof(IsPrivateUseRanges)/sizeof(int[2]));
  case CLS_U_IsRunic: return u_in_ranges(c,IsRunicRanges,sizeof(IsRunicRanges)/sizeof(int[2]));
  case CLS_U_IsSinhala: return u_in_ranges(c,IsSinhalaRanges,sizeof(IsSinhalaRanges)/sizeof(int[2]));
  case CLS_U_IsSmallFormVariants: return u_in_ranges(c,IsSmallFormVariantsRanges,sizeof(IsSmallFormVariantsRanges)/sizeof(int[2]));
  case CLS_U_IsSpacingModifierLetters: return u_in_ranges(c,IsSpacingModifierLettersRanges,sizeof(IsSpacingModifierLettersRanges)/sizeof(int[2]));
  case CLS_U_IsSpecials: return u_in_ranges(c,IsSpecialsRanges,sizeof(IsSpecialsRanges)/sizeof(int[2]));
  case CLS_U_IsSuperscriptsandSubscripts: return u_in_ranges(c,IsSuperscriptsandSubscriptsRanges,sizeof(IsSuperscriptsandSubscriptsRanges)/sizeof(int[2]));
  case CLS_U_IsSyriac: return u_in_ranges(c,IsSyriacRanges,sizeof(IsSyriacRanges)/sizeof(int[2]));
  case CLS_U_IsTags: return u_in_ranges(c,IsTagsRanges,sizeof(IsTagsRanges)/sizeof(int[2]));
  case CLS_U_IsTamil: return u_in_ranges(c,IsTamilRanges,sizeof(IsTamilRanges)/sizeof(int[2]));
  case CLS_U_IsTelugu: return u_in_ranges(c,IsTeluguRanges,sizeof(IsTeluguRanges)/sizeof(int[2]));
  case CLS_U_IsThaana: return u_in_ranges(c,IsThaanaRanges,sizeof(IsThaanaRanges)/sizeof(int[2]));
  case CLS_U_IsThai: return u_in_ranges(c,IsThaiRanges,sizeof(IsThaiRanges)/sizeof(int[2]));
  case CLS_U_IsTibetan: return u_in_ranges(c,IsTibetanRanges,sizeof(IsTibetanRanges)/sizeof(int[2]));
  case CLS_U_IsUnifiedCanadianAboriginalSyllabics: return u_in_ranges(c,IsUnifiedCanadianAboriginalSyllabicsRanges,sizeof(IsUnifiedCanadianAboriginalSyllabicsRanges)/sizeof(int[2]));
  case CLS_U_IsYiRadicals: return u_in_ranges(c,IsYiRadicalsRanges,sizeof(IsYiRadicalsRanges)/sizeof(int[2]));
  case CLS_U_IsYiSyllables: return u_in_ranges(c,IsYiSyllablesRanges,sizeof(IsYiSyllablesRanges)/sizeof(int[2]));
  case CLS_U_L: return in_class(c,CLS_U_Ll)||in_class(c,CLS_U_Lm)||in_class(c,CLS_U_Lo)||in_class(c,CLS_U_Lt)||in_class(c,CLS_U_Lu);
  case CLS_U_Ll: return u_in_ranges(c,LlRanges,sizeof(LlRanges)/sizeof(int[2]));
  case CLS_U_Lm: return u_in_ranges(c,LmRanges,sizeof(LmRanges)/sizeof(int[2]));
  case CLS_U_Lo: return u_in_ranges(c,LoRanges,sizeof(LoRanges)/sizeof(int[2]));
  case CLS_U_Lt: return u_in_ranges(c,LtRanges,sizeof(LtRanges)/sizeof(int[2]));
  case CLS_U_Lu: return u_in_ranges(c,LuRanges,sizeof(LuRanges)/sizeof(int[2]));
  case CLS_U_M: return in_class(c,CLS_U_Mc)||in_class(c,CLS_U_Me)||in_class(c,CLS_U_Mn);
  case CLS_U_Mc: return u_in_ranges(c,McRanges,sizeof(McRanges)/sizeof(int[2]));
  case CLS_U_Me: return u_in_ranges(c,MeRanges,sizeof(MeRanges)/sizeof(int[2]));
  case CLS_U_Mn: return u_in_ranges(c,MnRanges,sizeof(MnRanges)/sizeof(int[2]));
  case CLS_U_N: return in_class(c,CLS_U_Nd)||in_class(c,CLS_U_Nl)||in_class(c,CLS_U_No);
  case CLS_U_Nd: return u_in_ranges(c,NdRanges,sizeof(NdRanges)/sizeof(int[2]));
  case CLS_U_Nl: return u_in_ranges(c,NlRanges,sizeof(NlRanges)/sizeof(int[2]));
  case CLS_U_No: return u_in_ranges(c,NoRanges,sizeof(NoRanges)/sizeof(int[2]));
  case CLS_U_P: return in_class(c,CLS_U_Pc)||in_class(c,CLS_U_Pd)||in_class(c,CLS_U_Pe)||in_class(c,CLS_U_Pf)||in_class(c,CLS_U_Pi)||in_class(c,CLS_U_Po)||in_class(c,CLS_U_Ps);
  case CLS_U_Pc: return u_in_ranges(c,PcRanges,sizeof(PcRanges)/sizeof(int[2]));
  case CLS_U_Pd: return u_in_ranges(c,PdRanges,sizeof(PdRanges)/sizeof(int[2]));
  case CLS_U_Pe: return u_in_ranges(c,PeRanges,sizeof(PeRanges)/sizeof(int[2]));
  case CLS_U_Pf: return u_in_ranges(c,PfRanges,sizeof(PfRanges)/sizeof(int[2]));
  case CLS_U_Pi: return u_in_ranges(c,PiRanges,sizeof(PiRanges)/sizeof(int[2]));
  case CLS_U_Po: return u_in_ranges(c,PoRanges,sizeof(PoRanges)/sizeof(int[2]));
  case CLS_U_Ps: return u_in_ranges(c,PsRanges,sizeof(PsRanges)/sizeof(int[2]));
  case CLS_U_S: return in_class(c,CLS_U_Sc)||in_class(c,CLS_U_Sk)||in_class(c,CLS_U_Sm)||in_class(c,CLS_U_So);
  case CLS_U_Sc: return u_in_ranges(c,ScRanges,sizeof(ScRanges)/sizeof(int[2]));
  case CLS_U_Sk: return u_in_ranges(c,SkRanges,sizeof(SkRanges)/sizeof(int[2]));
  case CLS_U_Sm: return u_in_ranges(c,SmRanges,sizeof(SmRanges)/sizeof(int[2]));
  case CLS_U_So: return u_in_ranges(c,SoRanges,sizeof(SoRanges)/sizeof(int[2]));
  case CLS_U_Z: return in_class(c,CLS_U_Zl)||in_class(c,CLS_U_Zp)||in_class(c,CLS_U_Zs);
  case CLS_U_Zl: return u_in_ranges(c,ZlRanges,sizeof(ZlRanges)/sizeof(int[2]));
  case CLS_U_Zp: return u_in_ranges(c,ZpRanges,sizeof(ZpRanges)/sizeof(int[2]));
  case CLS_U_Zs: return u_in_ranges(c,ZsRanges,sizeof(ZsRanges)/sizeof(int[2]));
  case CLS_NL: return c=='\n'||c=='\r';
  case CLS_S: return xmlc_white_space(c);
  case CLS_I: return xmlc_base_char(c)||xmlc_ideographic(c)||c=='_'||c==':';
  case CLS_C: return in_class(c,CLS_I)||xmlc_digit(c)||xmlc_combining_char(c)||xmlc_extender(c)||c=='.'||c=='-';
  case CLS_W: return !(in_class(c,CLS_U_P)||in_class(c,CLS_U_Z)||in_class(c,CLS_U_C));
  default: assert(0);
  }
  return 0;
}


static int drv(int p,int c) {
  int p1,p2,cf,cl,cn,ret,m;
  assert(!P_IS(p,P_ERROR));
  m=new_memo(p,c);
  if(m!=-1) return M_RET(m);
  switch(P_TYP(p)) {
  case P_NOT_ALLOWED: case P_EMPTY: ret=notAllowed; break;
  case P_CHOICE: Choice(p,p1,p2); ret=choice(drv(p1,c),drv(p2,c)); break;
  case P_GROUP: Group(p,p1,p2); {int p11=group(drv(p1,c),p2); ret=nullable(p1)?choice(p11,drv(p2,c)):p11;} break;
  case P_ONE_OR_MORE: OneOrMore(p,p1); ret=group(drv(p1,c),choice(empty,p)); break;
  case P_EXCEPT: Except(p,p1,p2); ret=nullable(drv(p1,c))&&!nullable(drv(p2,c))?empty:notAllowed; break;
  case P_RANGE: Range(p,cf,cl); ret=cf<=c&&c<=cl?empty:notAllowed; break;
  case P_CLASS: Class(p,cn); ret=in_class(c,cn)?empty:notAllowed; break;
  case P_ANY: ret=empty; break;
  case P_CHAR: Char(p,cf); ret=c==cf?empty:notAllowed; break;
  default: ret=0; assert(0);
  }
  new_memo(p,c); M_SET(ret);
  accept_m();
  return ret;
}

int rx_check(char *rx) {(void)compile(rx); return !errors;}

int rx_match(char *rx,char *s,int n) {
  int p=compile(rx);
  if(!errors) {
    char *end=s+n;
    int u;
    for(;;) {
      if(p==notAllowed) return 0;
      if(s==end) return nullable(p);
      s+=u_get(&u,s);
      p=drv(p,u);
    }
  } else return 0;
}

int rx_rmatch(char *rx,char *s,int n) {
  int p=compile(rx);
  if(!errors) {
    char *end=s+n;
    int u;
    for(;;) {
      if(p==notAllowed) return 0;
      if(s==end) return nullable(p);
      s+=u_get(&u,s);
      if(xmlc_white_space(u)) u=' ';
      p=drv(p,u);
    }
  } else return 0;
}

int rx_cmatch(char *rx,char *s,int n) {
  int p=compile(rx);
  if(!errors) {
    char *end=s+n;
    int u;
    SKIP_SPACE: for(;;) {
      if(s==end) return nullable(p);
      s+=u_get(&u,s);
      if(!xmlc_white_space(u)) break;
    }
    for(;;) {
      if(p==notAllowed) return 0;
      if(xmlc_white_space(u)) { u=' ';
	p=drv(p,u);
	if(p==notAllowed) {
	  for(;;) {
	    if(s==end) return 1;
	    s+=u_get(&u,s);
	    if(!xmlc_white_space(u)) return 0;
	  }
	} else goto SKIP_SPACE;
      }
      p=drv(p,u);
      if(s==end) goto SKIP_SPACE;
      s+=u_get(&u,s);
    }
  } else return 0;
}


--- NEW FILE: er.c ---
/* $Id: er.c,v 1.1 2009/08/03 05:32:46 mike Exp $ */

#include <stdio.h>
#include "er.h"

int (*er_printf)(char *format,...)=&er_default_printf;
int (*er_vprintf)(char *format,va_list ap)=&er_default_vprintf;

int er_default_printf(char *format,...) {
  int ret;
  va_list ap; va_start(ap,format); ret=(*er_vprintf)(format,ap); va_end(ap);
  return ret;
}
int er_default_vprintf(char *format,va_list ap) {return vfprintf(stderr,format,ap);}

--- NEW FILE: s.h ---
/* $Id: s.h,v 1.1 2009/08/03 05:32:48 mike Exp $ */

#ifndef S_H
#define S_H 1

/* compares two strings, s1 is null terminated, s2 is n characters long */
extern int s_cmpn(char *s1,char *s2,int n2);

/* compares two tokens, s1 is null terminated, s2 is n characters long */
extern int s_tokcmpn(char *s1,char *s2,int n2);

/* hash value for a zero-terminated string */
extern int s_hval(char *s);

/* strdup is a non-standard function */
extern char *s_clone(char *s);

/* compute the absolute path from a relative path and a base path;
 the caller must ensure that there is enough space in r:
 size(r) > strlen(r)+strlen(b)
 returns a pointer to the string containing the relative path
 */
extern char *s_abspath(char *r,char *b);

/* find a string in a sorted array, return the index,
  or size on failure */
extern int s_tab(char *s,char *tab[],int size);
extern int s_ntab(char *s,int len,char *tab[],int size);

extern void s_test(void);

#endif

--- NEW FILE: xsd_tm.h ---
/* $Id: xsd_tm.h,v 1.1 2009/08/03 05:32:48 mike Exp $ */

#ifndef XSD_TM_H
#define XSD_TM_H 1

struct xsd_tm {int days,secs,mics,tz;};

/* fmt is a combination of ymdtz */
extern void xsd_mktm(struct xsd_tm *tmp,char *fmt,char *val);
extern void xsd_mktmn(struct xsd_tm *tmp,char *fmt,char *s,int n);

/* -1 - less, 0 - equal, 1 - greater, other - unknown */
extern int xsd_tmcmp(struct xsd_tm *tmp1, struct xsd_tm *tmp2);

#endif

--- NEW FILE: xsd_tm.c ---
/* $Id: xsd_tm.c,v 1.1 2009/08/03 05:32:48 mike Exp $ */

#include <stdlib.h> /*strtol*/
#include <limits.h>
#include <string.h> /*strlen*/
#include <assert.h>
#include "xsd_tm.h"

static int leap(int yr) {return !(yr%4)&&((yr%100)||!(yr%400));}
static int y2d(int yr) {return yr*365+yr/4-yr/100+yr/400;}
static int ymd2dn(int yr,int mo,int dy) {
  switch(mo) {
  case 12: dy+=30;
  case 11: dy+=31;
  case 10: dy+=30;
  case 9: dy+=31;
  case 8: dy+=31;
  case 7: dy+=30;
  case 6: dy+=31;
  case 5: dy+=30;
  case 4: dy+=31;
  case 3: dy+=28;
  case 2: dy+=31;
  case 1: break;
  }
  if(mo>2&&leap(yr)) ++dy;
  return dy;
}

static int ymd2ds(int yr,int mo,int dy) {
  return (yr>=0?y2d(yr-1):y2d(yr)-366)+ymd2dn(yr,mo,dy);
}

#define DAYSECS 86400
#define TZSECS 50400

static void addsecs(struct xsd_tm *tmp,int secs) {
  tmp->secs+=secs;
  if(tmp->secs<0) {
    --tmp->days;
    tmp->secs+=DAYSECS;
  } else if(tmp->secs>=DAYSECS) {
    ++tmp->days;
    tmp->secs-=DAYSECS;
  }
}

void xsd_mktmn(struct xsd_tm *tmp,char *fmt,char *s,int n) {
  char *end=s+n;
  int yr=2000,mo=1,dy=1,hr=0,mi=0,zh=15,zm=0;
  double se=0.0;
  for(;;) {
    if(s==end||!*fmt) break;
    switch(*s) {
    case '-':
      switch(*fmt) {
      case 'y': ++fmt; yr=strtol(s,&s,10); continue;
      case 'z': ++fmt; ++s; zh=strtol(s,&s,10); ++s; zm=strtol(s,&s,10); continue;
      }
      break;
    case '+': assert(*fmt=='z'); ++fmt;
	  zh=-strtol(s,&s,10); ++s; zm=-strtol(s,&s,10); continue;
    case 'Z': assert(*fmt=='z'); ++fmt; zh=0; zm=0; ++s; continue;
    case '0': case '1': case '2': case '3': case '4':
    case '5': case '6': case '7': case '8': case '9':
      switch(*(fmt++)) {
      case 'y': yr=strtol(s,&s,10); continue;
      case 'm': mo=strtol(s,&s,10); continue;
      case 'd': dy=strtol(s,&s,10); continue;
      case 't': hr=strtol(s,&s,10); ++s; mi=strtol(s,&s,10); ++s; se=strtod(s,&s); continue;
      }
      break;
    }
    ++s;
  }
  tmp->mics=(int)((se-(int)se)*1000000+0.5);
  tmp->secs=(int)se+60*(mi+60*hr);
  tmp->days=ymd2ds(yr,mo,dy);
  if((tmp->tz=(zh!=15))) addsecs(tmp,60*(zm+60*zh));
}
void xsd_mktm(struct xsd_tm *tmp,char *fmt,char *val) {xsd_mktmn(tmp,fmt,val,strlen(val));}

static int tmcmp(struct xsd_tm *tmp1, struct xsd_tm *tmp2) {
  int dd=tmp1->days-tmp2->days, ds=tmp1->secs-tmp2->secs, dm=tmp1->mics-tmp2->mics;
  return dd<0?-1:dd>0?1:ds<0?-1:ds>0?1:dm<0?-1:dm>0?1:0;
}

extern int xsd_tmcmp(struct xsd_tm *tmp1, struct xsd_tm *tmp2) {
  if(tmp1->tz==tmp2->tz) {
    return tmcmp(tmp1,tmp2);
  } else if(tmp1->tz) {
    struct xsd_tm tm; tm.mics=tmp2->mics;
    tm.days=tmp2->days; tm.secs=tmp2->secs; addsecs(&tm,TZSECS);
    if(tmcmp(tmp1,&tm)==1) return 1;
    tm.days=tmp2->days; tm.secs=tmp2->secs; addsecs(&tm,-TZSECS);
    if(tmcmp(tmp1,&tm)==-1) return -1;
    return 2;
  } else return -xsd_tmcmp(tmp2,tmp1);
}


--- NEW FILE: readme.txt ---

RNV -- Relax NG Compact Syntax Validator in C

Version 1.7 

   Table of Contents

   News since 1.6
   New since 1.5 
   Aknowledgements
   Package Contents 
   Installation 
   Invocation 
   Limitations 
   Applications

        ARX 
        RVP 

   User-Defined Datatype Libraries

        Datatype Library Plug-in
        Scheme Datatypes

   New versions 

   Abstract

   RNV is an implementation of Relax NG Compact Syntax,
   http://relaxng.org/compact-20021121.html. It is written in ANSI C,
   the command-line utility uses Expat,
   http://www.jclark.com/xml/expat.html. It is distributed under BSD
   license, see license.txt for details.

   RNV is a part of an on-going work, and the current code can have bugs
   and shortcomings; however, it validates documents against a number of
   grammars. I use it.

News since 1.6

   The format for error messages is similar to that of Jing (file name,
   line and column are colon-separated). Entities and DTD processing is
   moved out of RNV, use XX, available from the same download location,
   to expand entities.

New since 1.5

   Better reporting: required and permitted content is reported
   separately; it helps debug grammars. Several bugfixes; I relied on an
   acquired test suite and published schemata, but have found that I can
   make more bugs than they cover, thus a reworked an extended test suite
   is now used for testing. The code has also been cleaned up and
   simplified in places during porting to Plan9.

Aknowledgements

   I would like to thank those who have helped me develop RNV.

   Dave Pawson has been the first user of the program.

   Alexander Peshkov helps me with testing and I have been able to
   correct very well hidden errors with his help.

   Sebastian Rahtz encouraged me to continue working on RNV since the
   first release, and has helped me to improve it on more than one
   occasion.

Package Contents

Note

   I have put rnv.exe and arx.exe, Win32 executables statically linked
   with a current version of Expat from
   http://expat.sourceforge.net/, into a separate distribution
   archive (with name ending in -win32bin). It contains only the program
   binaries and should be available from the same location as the source
   distribution.

   The package consists of:
     * the license, license.txt;
     * the source code, *.[ch];
     * the source code map, src.txt;
     * Makefile.bsd for BSD make;
     * Makefile.gnu for GNU Make;
     * Makefile.bcc for Win32 and Borland C/C++ Compiler;
     * tools/xck, a simple shell script I am using to validate documents;
     * tools/*.rnc, sample Relax NG grammars;
     * scm/*.scm, program modules in Scheme, for Scheme Datatypes
       Library;
     * the log of changes, changes.txt;
     * this file, readme.txt.
     * Other scripts, samples and plug-ins appear in tools/ eventually.

Installation

   On Unix-like systems, run make -f Makefile.gnu or make -f
   Makefile.bsd, depending on which flavour of make you have;
   Makefile.bsd should probably work on SysV, but, unfortunately, I have
   no place to check for the last couple of years. If you are using Expat
   1.2, define EXPAT_H as xmlparse.h instead of expat.h).

   On Windows, use rnv.exe. To recompile from the sources, use
   Makefile.bcc with Borland C/C++ Compiler, or create a makefile or
   project for your environment.

Invocation

   The command-line syntax is

        rnv {-q|-p|-c|-s|-v|-h} grammar.rnc {document1.xml}

   If no documents are specified, RNV attempts to read the XML document
   from the standard input. The options are:

   -q
          names of files being processed are not printed; in error
          messages, expected elements and attributes are not listed;

   -n <num>
          sets the maximum number of reported expected elements and
          attributes, -q sets this to 0 and can be overriden;

   -p
          copies the input to the output;

   -c
          if the only argument is a grammar, checks the grammar and
          exits;

   -s
          uses less memory and runs slower;

   -v
          prints version number;

   -h
          displays usage summary and exits.

Limitations

     * RNV assumes that the encoding of the syntax file is UTF-8.
     * Support for XML Schema Part 2: Datatypes is partial.
          + ordering for duration is not implemented;
          + only local parts of QName values are checked for equality,
            ENTITY values are only checked for lexical validity.
     * The schema parser does not check that all restrictions are obeyed,
       in particular, restrictions 7.3 and 7.4 are not checked.
     * RNV for Win32 platforms is a Unix program compiled on Win32. It
       expects file paths to be written with normal slashes; if a schema
       is in a different directory and includes or refers external files,
       then the schema's path must be written in the Unix way for the
       relative paths to work. For example, under Windows, rnv that uses
       ..\schema\docbook.rnc to validate userguide.dbx should be invoked
       as

      rnv.exe ../schema/docbook.rnc userguide.dbx

Applications

   The distribution includes several utilities built upon RNV; they are
   listed and described in the following sections.

ARX

   ARX is a tool to automatically determine the type of a document from
   its name and contents. It is inspired by James Clark's schema location
   approach for nXML,
   http://groups.yahoo.com/group/emacs-nxml-mode/message/259, and is
   a development of the idea described in
   http://relaxng.org/pipermail/relaxng-user/2003-December/000214.htm
   l.

   ARX is a command-line utility. The invocation syntax is

        arx {-n|-v|-h} document.xml  arx.conf {arx.conf}

   ARX either prints a string corresponding to the document's type or
   nothing if the type cannot be determined. The options are:

   -n
          turns off prepending base path of the configuration file to the
          result, even if it looks like a relative path (useful when the
          configuration file and the grammars are in separate
          directories, or for association with something that is not a
          file);

   -v
          prints current version;

   -h
          displays usage summary and exits.

   The configuration file must conform to the following grammar:

      arx = grammars route*
      grammars = "grammars"  "{" type2string+ "}"
      type2string =  type "=" literal
      type = nmtoken
      route = match|nomatch|valid|invalid
      match = "=~" regexp "=>" type
      nomatch = "!~" regexp "=>" type
      valid = "valid" "{" rng "}" "=>" type
      invalid = "!valid" "{" rng "}" "=>" type

      literal=string in '"', '"' inside must be prepended by '\'
      regexp=string in '/', '/' inside must be prepended by '\'
      rng=Relax NG Compact Syntax

      Comments start with # and continue till the end of line.

   Rules are processed sequentially, the first matching rule determines
   the file's type. Relax NG templates are matched against file contents,
   regular expressions are applied to file names. The sample below
   associates documents with grammars for XSLT, DocBook or XSL FO.

      grammars {
        docbook="docbook.rnc"
        xslt="xslt.rnc"
        xslfo="fo.rnc"
      }

      valid {
        start = element (book|article|chapter|reference) {any}
        any = (element * {any}|attribute * {text}|text)*
      } => docbook

      !valid {
        default namespace xsl = "http://www.w3.org/1999/XSL/Transform"
        start = element *-xsl:* {not-xsl}
        not-xsl = (element *-xsl:* {not-xsl}|attribute * {text}|text)*
      } => xslt

      =~/.*\.xsl/ => xslt
      =~/.*\.fo/ => xslfo

   ARX can also be used to link documents to any type of information or
   processing.

RVP

   RVP is abbreviation for Relax NG Validation Pipe. It reads validation
   primitives from the standard input and reports result to the standard
   output; it's main purpose is to ease embedding of a Relax NG validator
   into various languages and environment. An application would launch
   RVP as a parallel process and use a simple protocol to perform
   validation. The protocol, in BNF, is:

     query ::= (
           quit
         | start
         | start-tag-open
         | attribute
         | start-tag-close
         | text
         | end-tag) z.
       quit ::= "quit".
       start ::= "start" [gramno].
       start-tag-open ::= "start-tag-open" patno name.
       attribute ::= "attribute" patno name value.
       start-tag-close :: = "start-tag-close" patno name.
       text ::= ("text"|"mixed") patno text.
       end-tag ::= "end-tag" patno name.
     response ::= (ok | er | error) z.
       ok ::= "ok" patno.
       er ::= "er" patno erno.
       error ::= "error" patno erno error.
     z ::= "\0" .

     * RVP assumes that the last colon in a name separates the local part
       from the namespace URI (it is what one gets if specifies `:' as
       namespace separator to Expat).
     * Error codes can be grabbed from rvp sources by grep _ER_ *.h and
       OR-ing them with corresponding masks from erbit.h. Additionally,
       error 0 is the protocol format error.
     * Either er or error responses are returned, not both; -q chooses
       between concise and verbose forms (invocation syntax described
       later).
     * start passes the index of a grammar (first grammar in the list of
       command-line arguments has number 0); if the number is omitted, 0
       is assumed.
     * quit is not opposite of start; instead, it quits RVP.

   The command-line syntax is:

        rvp {-q|-s|-v|-h} {schema.rnc}

   The options are:

   -q
          returns only error numbers, suppresses messages;

   -s
          takes less memory and runs slower;

   -v
          prints current version;

   -h
          displays usage summary and exits.

   To assist embedding RVP, samples in Perl (tools/rvp.pl) and Python
   (tools/rvp.py) are provided. The scripts use Expat wrappers for each
   of the languages to parse documents; they take a Relax NG grammar (in
   the compact syntax) as the command line argument and read the XML from
   the standard input. For example, the following commands validate
   rnv.dbx against docbook.rnc:

      perl rvp.pl docbook.rnc < rnv.dbx
      python rvp.py docbook.rnc < rnv.dbx

   The scripts are kept simple and unobscured to illustrate the
   technique, rather than being designed as general-purpose modules.
   Programmers using Perl, Python, Ruby and other languages are
   encouraged to implement and share reusable RVP-based components for
   their languages of choice.

User-Defined Datatype Libraries

   Relax NG relies on XML Schema Datatypes to check validity of data in
   an XML document. The specification allows the implementation to
   support other datatype libraries, a library is required to provide two
   services, datatypeAllows and datatypeEqual.

   A powerful and popular technique is the use of string regular
   expressions to restrict values of attributes and character data.
   However, XML Schema regular expressions must be written as single
   strings, without any parameterization; they often grow to several
   dozens of characters in length and are very hard to read or debug.

   A solution for these problem would be to allow the user to define
   custom datatypes and to specify them in a high-level programming
   language. The user can then either use regular expressions as such,
   employ lex for lexical analysis, or any other technique which is best
   suited for each particular case (for example XSL FO datatypes would
   benefit from a custom datatype library). With many datatype libraries
   eventually implemented, it is likely that a clearer picture of the
   right language for validation of data will eventually emerge.

   RNV provides two different ways to implement this solution; I believe
   that they correspond to different tastes and traditions. In both
   cases, a high-level language can be used to implement a datatype
   library, the language is not related to the implementation language of
   RNV, and RNV need not be recompiled to add a new datatype library.

Datatype Library Plug-in

   A datatype plug-in is an executable. RNV invokes it as either
  program allows type key value ... data

   or
  program equal type data1 data2

   program is the executable's, name, the rest is the command line; key
   and value pairs are datatype parameters and can be repeated. The
   program is executed for each datatype in library
   http://davidashen.net/relaxng/pluggable-datatypes; if the exit status
   is 0 for success, non-zero for failure.

   Both RNV and RVP can use pluggable datatypes, and must be compiled
   with DXL_EXC set to 1 (make DXL_EXC=1) to support them, in which case
   they accept an additional command-line option -d with the name of the
   plugin as the argument. An implementation of XML Schema datatypes as a
   plugin (in C) is included in the distribution, see xsdck.c. For
   example,
    rnv -d xsdck xslt-dxl.rnc $HOME/work/docbook/xsl/*/*.xsl

   will validate all DocBook XSL stylesheets on my workstation against a
   grammar for XSLT 1.0 modified to use RNV Pluggable Datatypes Library
   instead of XML Schema Datatypes.

Scheme Datatypes

   Another way to add custom datatypes to RNV is to use the built-in
   Scheme interpeter (SCM,
   http://www.swiss.ai.mit.edu/~jaffer/SCM.html) to implement the
   library in Scheme, a dialect of Lisp. This solution is more flexible
   and robust than the previous one, but requires knowledge of a
   particular programming language (or at least desire to learn it, and
   the result is definitely worth the effort).

   To support it, SCM must be installed on the computer, and RNV or RVP
   must be compiled with DSL_SCM set to 1 (make DSL_SCM=1), in which case
   they accept an additional option -e with the name of a scheme program
   as an argument. The datatype library is bound to
   http://davidashen.net/relaxng/scheme-datatypes; a sample
   implementation is in scm/dsl.scm. For example,
    rnv -e scm/dsl.scm xslt-dsl.rnc $HOME/work/docbook/xsl/*/*.xsl

   check the stylesheets against an XSLT 1.0 grammar modified to use an
   RNV Scheme Datatypes Library implemented in scm/dsl.scm.

   A Datatype Library in Scheme must provide two functions in top-level
   environment:
(dsl-equal? string string string)

   and
(dsl-allows? string '((string . string)*) string)

   To assist development of datatype libraries, a Scheme implementation
   of XML Schema Regular Expressions is included in the distribution as
   scm/rx.scm. The Regular Expression library is not just a way to
   re-implement the built-in datatypes. Owing to flexibility of the
   language it is much easier to write and debug regular expressions in
   Scheme, even if they are to be used with built-in XML Schema Datatypes
   in the end. For example, a regular expression for e-mail address, with
   insignificant simplifications, is:
    pattern=
      "(\(([^\(\)\\]|\\.)*\) )?"
    ~ "([a-zA-Z0-9!#$%&'*+\-/=?\^_`{|}~]+"
    ~ "(\.[a-zA-Z0-9!#$%&'*+\-/=?\^_`{|}~]+)*"
    ~ """|"([^"\\]|\\.)*")"""
    ~ "@"
    ~ "([a-zA-Z0-9!#$%&'*+\-/=?\^_`{|}~]+"
    ~ "(\.[a-zA-Z0-9!#$%&'*+\-/=?\^_`{|}~]+)*"
    ~ "|\[([^\[\]\\]|\\.)*\])"
    ~ "( \(([^\(\)\\]|\\.)*\))?"

   which, even split into four lines, is ugly-looking and hard to read.
   Meanwhile, it consists of a few repeating subexpressions, which could
   easily be factored out, but the syntax does not have the means for
   that.

   Using Scheme interpreter, it is as simple as
(define addr-spec-regex
  (let* (
      (atom "[a-zA-Z0-9!#$%&'*+\\-/=?\\^_`{|}~]+")
      (person "\"([^"\\\\]|\\\\.)\"")
      (location "\\[([^\\[\\]\\\\]|\\\\.)*\\]")
      (domain (string-append atom "(\\." atom ")*")))
    (string-append
      "(" domain "|" person ")"
      "@"
      "(" domain "|" location ")")))

   This code is much simpler to read and debug, and then the parts can be
   joined and added to the grammar for production use. Furthermore, it is
   easy to implement the parsing of structured regular expressions
   embedded into parameters of datatypes in Relax NG itself. dsl.scm, the
   sample datatype library, can handle parameter s-pattern with regular
   expressions split into named parts, and the example above becomes:
    s-pattern="""
      comment = "\(([^\(\)\\]|\\.)*\)"
      atom = "[a-zA-Z0-9!#$%&'*+\-/=?\^_`{|}~]+"
      atoms = atom "(\." atom ")*"
      person = "\"([^\"\\]|\\.)*\""
      location = "\[([^\[\]\\]|\\.)*\]"
      local-part = "(" atom "|" person ")"
      domain = "(" atoms "|" location ")"
      start = "(" comment " )?" local-part "@" domain "( " comment ")?"
    """

   addr-spec-dsl.rnc is included in the distribution.

New versions

   Visit http://davidashen.net/ for news and downloads.


--- NEW FILE: sc.c ---
/* $Id: sc.c,v 1.1 2009/08/03 05:32:48 mike Exp $ */

#include <assert.h> /*assert*/
#include "m.h"
#include "ll.h"
#include "sc.h"

#define BASE -1
#define LOCK -2

#define LEN SC_LEN

static void windup(struct sc_stack *stp) {
  stp->top=0;
  sc_open(stp);
}

void sc_init(struct sc_stack *stp) {
  stp->tab=(int(*)[SC_RECSIZE])m_alloc(stp->len=LEN,sizeof(int[SC_RECSIZE]));
  windup(stp);
}

void sc_clear(struct sc_stack *stp) {
  windup(stp);
}

void sc_open(struct sc_stack *stp) {
  stp->tab[stp->base=stp->top++][1]=BASE;
  if(stp->top==stp->len) stp->tab=(int(*)[SC_RECSIZE])m_stretch(
    stp->tab,stp->len*=stp->top*2,stp->top,sizeof(int[SC_RECSIZE]));
}

int sc_void(struct sc_stack *stp) {
  return stp->base==0;
}

void sc_lock(struct sc_stack *stp) {
  stp->tab[stp->base][1]=LOCK;
}

int sc_locked(struct sc_stack *stp) {
  return stp->tab[stp->base][1]==LOCK;
}

void sc_close(struct sc_stack *stp) {
  stp->top=stp->base; while(stp->tab[--stp->base][1]>BASE);
}

int sc_find(struct sc_stack *stp,int key) {
  int i=stp->top; stp->tab[stp->base][0]=key;
  while(stp->tab[--i][0]!=key);
  return i!=stp->base?i:0;
}

int sc_add(struct sc_stack *stp,int key,int val,int aux) {
  int i=stp->top;
  assert(!sc_locked(stp));
  stp->tab[i][0]=key; stp->tab[i][1]=val; stp->tab[i][2]=aux;
  if(++stp->top==stp->len) stp->tab=(int(*)[SC_RECSIZE])m_stretch(
    stp->tab,stp->len=stp->top*2,stp->top,sizeof(int[SC_RECSIZE]));
  return i;
}

--- NEW FILE: test.c ---
#include "s.h"
#include "xsd.h"

int main() {
  s_test();
  xsd_test();
  return 0;
}

--- NEW FILE: sc.h ---
/* $Id: sc.h,v 1.1 2009/08/03 05:32:48 mike Exp $ */

#ifndef SC_H
#define SC_H 1

#define SC_RECSIZE 3 /* 0 - key, 1 - value, 2 - auxiliary */

struct sc_stack {
  int (*tab)[SC_RECSIZE];
  int len,base,top;
};

extern void sc_init(struct sc_stack *stp);
extern void sc_clear(struct sc_stack *stp);

extern void sc_open(struct sc_stack *stp);
extern void sc_lock(struct sc_stack *stp);
extern void sc_close(struct sc_stack *stp);

extern int sc_void(struct sc_stack *sp);
extern int sc_locked(struct sc_stack *stp);

extern int sc_find(struct sc_stack *stp,int key); /* returns 0 if not found, index in tab otherwise */
extern int sc_add(struct sc_stack *stp,int key,int val,int aux); /* returns index for the new record */

#endif

--- NEW FILE: rx_cls_u.c ---
/* $Id: rx_cls_u.c,v 1.1 2009/08/03 05:32:48 mike Exp $ */

#define CLS_U_  0
#define CLS_U_C  1
#define CLS_U_Cc  2
#define CLS_U_Cf  3
#define CLS_U_Co  4
#define CLS_U_IsAlphabeticPresentationForms  5
#define CLS_U_IsArabic  6
#define CLS_U_IsArabicPresentationForms_A  7
#define CLS_U_IsArabicPresentationForms_B  8
#define CLS_U_IsArmenian  9
#define CLS_U_IsArrows  10
#define CLS_U_IsBasicLatin  11
#define CLS_U_IsBengali  12
#define CLS_U_IsBlockElements  13
#define CLS_U_IsBopomofo  14
#define CLS_U_IsBopomofoExtended  15
#define CLS_U_IsBoxDrawing  16
#define CLS_U_IsBraillePatterns  17
#define CLS_U_IsByzantineMusicalSymbols  18
#define CLS_U_IsCJKCompatibility  19
#define CLS_U_IsCJKCompatibilityForms  20
#define CLS_U_IsCJKCompatibilityIdeographs  21
#define CLS_U_IsCJKCompatibilityIdeographsSupplement  22
#define CLS_U_IsCJKRadicalsSupplement  23
#define CLS_U_IsCJKSymbolsandPunctuation  24
#define CLS_U_IsCJKUnifiedIdeographs  25
#define CLS_U_IsCJKUnifiedIdeographsExtensionA  26
#define CLS_U_IsCJKUnifiedIdeographsExtensionB  27
#define CLS_U_IsCherokee  28
#define CLS_U_IsCombiningDiacriticalMarks  29
#define CLS_U_IsCombiningHalfMarks  30
#define CLS_U_IsCombiningMarksforSymbols  31
#define CLS_U_IsControlPictures  32
#define CLS_U_IsCurrencySymbols  33
#define CLS_U_IsCyrillic  34
#define CLS_U_IsDeseret  35
#define CLS_U_IsDevanagari  36
#define CLS_U_IsDingbats  37
#define CLS_U_IsEnclosedAlphanumerics  38
#define CLS_U_IsEnclosedCJKLettersandMonths  39
#define CLS_U_IsEthiopic  40
#define CLS_U_IsGeneralPunctuation  41
#define CLS_U_IsGeometricShapes  42
#define CLS_U_IsGeorgian  43
#define CLS_U_IsGothic  44
#define CLS_U_IsGreek  45
#define CLS_U_IsGreekExtended  46
#define CLS_U_IsGujarati  47
#define CLS_U_IsGurmukhi  48
#define CLS_U_IsHalfwidthandFullwidthForms  49
#define CLS_U_IsHangulCompatibilityJamo  50
#define CLS_U_IsHangulJamo  51
#define CLS_U_IsHangulSyllables  52
#define CLS_U_IsHebrew  53
#define CLS_U_IsHiragana  54
#define CLS_U_IsIPAExtensions  55
#define CLS_U_IsIdeographicDescriptionCharacters  56
#define CLS_U_IsKanbun  57
#define CLS_U_IsKangxiRadicals  58
#define CLS_U_IsKannada  59
#define CLS_U_IsKatakana  60
#define CLS_U_IsKhmer  61
#define CLS_U_IsLao  62
#define CLS_U_IsLatin_1Supplement  63
#define CLS_U_IsLatinExtended_A  64
#define CLS_U_IsLatinExtended_B  65
#define CLS_U_IsLatinExtendedAdditional  66
#define CLS_U_IsLetterlikeSymbols  67
#define CLS_U_IsMalayalam  68
#define CLS_U_IsMathematicalAlphanumericSymbols  69
#define CLS_U_IsMathematicalOperators  70
#define CLS_U_IsMiscellaneousSymbols  71
#define CLS_U_IsMiscellaneousTechnical  72
#define CLS_U_IsMongolian  73
#define CLS_U_IsMusicalSymbols  74
#define CLS_U_IsMyanmar  75
#define CLS_U_IsNumberForms  76
#define CLS_U_IsOgham  77
#define CLS_U_IsOldItalic  78
#define CLS_U_IsOpticalCharacterRecognition  79
#define CLS_U_IsOriya  80
#define CLS_U_IsPrivateUse  81
#define CLS_U_IsRunic  82
#define CLS_U_IsSinhala  83
#define CLS_U_IsSmallFormVariants  84
#define CLS_U_IsSpacingModifierLetters  85
#define CLS_U_IsSpecials  86
#define CLS_U_IsSuperscriptsandSubscripts  87
#define CLS_U_IsSyriac  88
#define CLS_U_IsTags  89
#define CLS_U_IsTamil  90
#define CLS_U_IsTelugu  91
#define CLS_U_IsThaana  92
#define CLS_U_IsThai  93
#define CLS_U_IsTibetan  94
#define CLS_U_IsUnifiedCanadianAboriginalSyllabics  95
#define CLS_U_IsYiRadicals  96
#define CLS_U_IsYiSyllables  97
#define CLS_U_L  98
#define CLS_U_Ll  99
#define CLS_U_Lm  100
#define CLS_U_Lo  101
#define CLS_U_Lt  102
#define CLS_U_Lu  103
#define CLS_U_M  104
#define CLS_U_Mc  105
#define CLS_U_Me  106
#define CLS_U_Mn  107
#define CLS_U_N  108
#define CLS_U_Nd  109
#define CLS_U_Nl  110
#define CLS_U_No  111
#define CLS_U_P  112
#define CLS_U_Pc  113
#define CLS_U_Pd  114
#define CLS_U_Pe  115
#define CLS_U_Pf  116
#define CLS_U_Pi  117
#define CLS_U_Po  118
#define CLS_U_Ps  119
#define CLS_U_S  120
#define CLS_U_Sc  121
#define CLS_U_Sk  122
#define CLS_U_Sm  123
#define CLS_U_So  124
#define CLS_U_Z  125
#define CLS_U_Zl  126
#define CLS_U_Zp  127
#define CLS_U_Zs  128
#define NUM_CLS_U 129
static char *clstab[NUM_CLS_U]={"",
  "C",
  "Cc",
  "Cf",
  "Co",
  "IsAlphabeticPresentationForms",
  "IsArabic",
  "IsArabicPresentationForms-A",
  "IsArabicPresentationForms-B",
  "IsArmenian",
  "IsArrows",
  "IsBasicLatin",
  "IsBengali",
  "IsBlockElements",
  "IsBopomofo",
  "IsBopomofoExtended",
  "IsBoxDrawing",
  "IsBraillePatterns",
  "IsByzantineMusicalSymbols",
  "IsCJKCompatibility",
  "IsCJKCompatibilityForms",
  "IsCJKCompatibilityIdeographs",
  "IsCJKCompatibilityIdeographsSupplement",
  "IsCJKRadicalsSupplement",
  "IsCJKSymbolsandPunctuation",
  "IsCJKUnifiedIdeographs",
  "IsCJKUnifiedIdeographsExtensionA",
  "IsCJKUnifiedIdeographsExtensionB",
  "IsCherokee",
  "IsCombiningDiacriticalMarks",
  "IsCombiningHalfMarks",
  "IsCombiningMarksforSymbols",
  "IsControlPictures",
  "IsCurrencySymbols",
  "IsCyrillic",
  "IsDeseret",
  "IsDevanagari",
  "IsDingbats",
  "IsEnclosedAlphanumerics",
  "IsEnclosedCJKLettersandMonths",
  "IsEthiopic",
  "IsGeneralPunctuation",
  "IsGeometricShapes",
  "IsGeorgian",
  "IsGothic",
  "IsGreek",
  "IsGreekExtended",
  "IsGujarati",
  "IsGurmukhi",
  "IsHalfwidthandFullwidthForms",
  "IsHangulCompatibilityJamo",
  "IsHangulJamo",
  "IsHangulSyllables",
  "IsHebrew",
  "IsHiragana",
  "IsIPAExtensions",
  "IsIdeographicDescriptionCharacters",
  "IsKanbun",
  "IsKangxiRadicals",
  "IsKannada",
  "IsKatakana",
  "IsKhmer",
  "IsLao",
  "IsLatin-1Supplement",
  "IsLatinExtended-A",
  "IsLatinExtended-B",
  "IsLatinExtendedAdditional",
  "IsLetterlikeSymbols",
  "IsMalayalam",
  "IsMathematicalAlphanumericSymbols",
  "IsMathematicalOperators",
  "IsMiscellaneousSymbols",
  "IsMiscellaneousTechnical",
  "IsMongolian",
  "IsMusicalSymbols",
  "IsMyanmar",
  "IsNumberForms",
  "IsOgham",
  "IsOldItalic",
  "IsOpticalCharacterRecognition",
  "IsOriya",
  "IsPrivateUse",
  "IsRunic",
  "IsSinhala",
  "IsSmallFormVariants",
  "IsSpacingModifierLetters",
  "IsSpecials",
  "IsSuperscriptsandSubscripts",
  "IsSyriac",
  "IsTags",
  "IsTamil",
  "IsTelugu",
  "IsThaana",
  "IsThai",
  "IsTibetan",
  "IsUnifiedCanadianAboriginalSyllabics",
  "IsYiRadicals",
  "IsYiSyllables",
  "L",
  "Ll",
  "Lm",
  "Lo",
  "Lt",
  "Lu",
  "M",
  "Mc",
  "Me",
  "Mn",
  "N",
  "Nd",
  "Nl",
  "No",
  "P",
  "Pc",
  "Pd",
  "Pe",
  "Pf",
  "Pi",
  "Po",
  "Ps",
  "S",
  "Sc",
  "Sk",
  "Sm",
  "So",
  "Z",
  "Zl",
  "Zp",
  "Zs"
  };

--- NEW FILE: ht.h ---
/* $Id: ht.h,v 1.1 2009/08/03 05:32:46 mike Exp $ */

#ifndef HT_H
#define HT_H 1

struct hashtable {
  int (*hash)(int i);
  int (*equal)(int i1,int i2);
  int tablen,used,limit;
  int *table;
};

extern void ht_init(struct hashtable *ht,int len,int (*hash)(int),int (*equal)(int,int));
extern void ht_clear(struct hashtable *ht);
extern void ht_dispose(struct hashtable *ht);
extern int ht_get(struct hashtable *ht,int i);
extern void ht_put(struct hashtable *ht,int i);
extern int ht_del(struct hashtable *ht,int i);
extern int ht_deli(struct hashtable *ht,int i); /* delete only if i refers to itself */

#endif

--- NEW FILE: u.c ---
/* $Id: u.c,v 1.1 2009/08/03 05:32:48 mike Exp $ */

#include "u.h"

#define ux(u,c) (((u)<<6)|(c&0x3F))
#define u1(t) t[0]
#define u2(t) ux(t[0]&0x1F,t[1])
#define u3(t) ux(ux(t[0]&0xF,t[1]),t[2])
#define u4(t) ux(ux(ux(t[0]&0x7,t[1]),t[2]),t[3])
#define u5(t) ux(ux(ux(ux(t[0]&0x3,t[1]),t[2]),t[3]),t[4])
#define u6(t) ux(ux(ux(ux(ux(t[0]&0x1,t[1]),t[2]),t[3]),t[4]),t[5])

#define vx(c,u) c=0x80|((u)&0x3F)
#define v1(t,u) t[0]=u
#define v2(t,u) t[0]=0xC0|(u>>6);vx(t[1],u)
#define v3(t,u) t[0]=0xE0|(u>>12);vx(t[1],u>>6);vx(t[2],u)
#define v4(t,u) t[0]=0xF0|(u>>18);vx(t[1],u>>12);vx(t[2],u>>6);vx(t[3],u)
#define v5(t,u) t[0]=0xF8|(u>>24);vx(t[1],u>>18);vx(t[2],u>>12);vx(t[3],u>>6);vx(t[4],u)
#define v6(t,u) t[0]=0xFC|(u>>30);vx(t[1],u>>24);vx(t[2],u>>18);vx(t[3],u>>12);vx(t[4],u>>6);vx(t[5],u)

#define B1 0xFFFFFF80
#define B2 0xFFFFF800
#define B3 0xFFFF0000
#define B4 0xFFE00000
#define B5 0xFC000000
#define B6 0x80000000

#define BOM "\xEF\xBB\xBF"
#define BOMLEN 3

int u_bom(char *s,int n) {
  char *bom=(char*)(BOM+BOMLEN);
  if(n>=BOMLEN) {
    n=BOMLEN; s+=n;
    while(n--!=0) if(*(--s)!=*(--bom)) return 0;
    return BOMLEN;
  }
  return 0;
}

int u_get(int *up,char *s) {
  unsigned char *t=(unsigned char*)s;
  if(*t<0x80) {*up=u1(t); return 1;}
  if(*t<0xC0) return 0;
  if(*t<0xE0) {*up=u2(t); return (*up&B1)?2:0;}
  if(*t<0xF0) {*up=u3(t); return (*up&B2)?3:0;}
  if(*t<0xF8) {*up=u4(t); return (*up&B3)?4:0;}
  if(*t<0xFC) {*up=u5(t); return (*up&B4)?5:0;}
  if(*t<0xFE) {*up=u6(t); return (*up&B5)?6:0;}
  return 0;
}

int u_put(char *s,int u) {
  unsigned char *t=(unsigned char*)s;
  if(!(u&B1)) {v1(t,u); return 1;}
  if(!(u&B2)) {v2(t,u); return 2;}
  if(!(u&B3)) {v3(t,u); return 3;}
  if(!(u&B4)) {v4(t,u); return 4;}
  if(!(u&B5)) {v5(t,u); return 5;}
  if(!(u&B6)) {v6(t,u); return 6;}
  return 0;
}

int u_strlen(char *s) {int n=0; while(*(s+n)) ++n; return u_strnlen(s,n);}
int u_strnlen(char *s,int n) {
  int i,len=0,u;
  char *end=s+n;
  for(;;) {
    if(s==end) break;
    i=u_get(&u,s);
    if(i==0) {len=-1; break;}
    s+=i;
    if(s>end) {len=-1; break;}
    ++len;
  }
  return len;
}

int u_in_ranges(int u,int r[][2],int len) {
  int n=0,m=len-1,i;
  for(;;) {
    if(n>m) return 0;
    i=(n+m)/2;
    if(u<r[i][0]) m=i-1;
    else if(u>r[i][1]) n=i+1;
    else return 1;
  }
}

--- NEW FILE: xsd.c ---
/* $Id: xsd.c,v 1.1 2009/08/03 05:32:48 mike Exp $ */

#include <limits.h> /*INT_MAX*/
#include <stdlib.h> /*atof,atol,strtol*/
#include <string.h> /*strlen*/
#include <math.h> /*HUGE_VAL*/
#include <assert.h>
#include "u.h"
#include "xmlc.h"
#include "s.h"
#include "erbit.h"
#include "rx.h"
#include "xsd_tm.h"
#include "er.h"
#include "xsd.h"

#define err(msg) (*er_vprintf)(msg"\n",ap)
void xsd_default_verror_handler(int erno,va_list ap) {
  (*er_printf)("XML Schema datatypes: ");
  if(erno&ERBIT_RX) {
    rx_default_verror_handler(erno&~ERBIT_RX,ap);
  } else {
    switch(erno) {
    case XSD_ER_TYP: err("unknown type %s"); break;
    case XSD_ER_PAR: err("unknown parameter %s"); break;
    case XSD_ER_PARVAL: err("invalid parameter value %s=\"%s\""); break;
    case XSD_ER_VAL: err("invalid typed value \"%s\" for type %s"); break;
    case XSD_ER_NPAT: err("no more than 16 patterns per type are supported"); break;
    case XSD_ER_WS: err("the builtin derived datatype that specifies the desired value for the whiteSpace facet should be used instead of 'whiteSpace'"); break;
    case XSD_ER_ENUM: err("'value' should be used instead of 'enumeration'"); break;
    default: assert(0);
    }
  }
}

void (*xsd_verror_handler)(int erno,va_list ap)=&xsd_default_verror_handler;

static void error_handler(int erno,...) {
  va_list ap; va_start(ap,erno); (*xsd_verror_handler)(erno,ap); va_end(ap);
}

static void verror_handler_rx(int erno,va_list ap) {(*xsd_verror_handler)(erno|ERBIT_RX,ap);}

static void windup(void);
static int initialized=0;
void xsd_init(void) {
  if(!initialized) { initialized=1;
    rx_init(); rx_verror_handler=&verror_handler_rx;
    windup();
  }
}

void xsd_clear(void) {
  windup();
}

static void windup(void) {
}

#define FCT_ENUMERATION 0
#define FCT_FRACTION_DIGITS 1
#define FCT_LENGTH 2
#define FCT_MAX_EXCLUSIVE 3
#define FCT_MAX_INCLUSIVE 4
#define FCT_MAX_LENGTH 5
#define FCT_MIN_EXCLUSIVE 6
#define FCT_MIN_INCLUSIVE 7
#define FCT_MIN_LENGTH 8
#define FCT_PATTERN 9
#define FCT_TOTAL_DIGITS 10
#define FCT_WHITE_SPACE 11
#define NFCT 12
static char *fcttab[NFCT]={
  "enumeration", "fractionDigits", "length", "maxExclusive", "maxInclusive", "maxLength",
  "minExclusive", "minInclusive", "minLength", "pattern", "totalDigits", "whiteSpace"};

#define FCT_IBOUNDS (1<<FCT_MIN_INCLUSIVE|1<<FCT_MAX_INCLUSIVE)
#define FCT_EBOUNDS (1<<FCT_MIN_EXCLUSIVE|1<<FCT_MAX_EXCLUSIVE)
#define FCT_BOUNDS (FCT_IBOUNDS|FCT_EBOUNDS)

#define WS_PRESERVE 0
#define WS_REPLACE 1
#define WS_COLLAPSE 2

static int (*match[])(char *r,char *s,int n)={&rx_match,&rx_rmatch,&rx_cmatch};

#define TYP_ENTITIES 0
#define TYP_ENTITY 1
#define TYP_ID 2
#define TYP_IDREF 3
#define TYP_IDREFS 4
#define TYP_NCNAME 5
#define TYP_NMTOKEN 6
#define TYP_NMTOKENS 7
#define TYP_NOTATION 8
#define TYP_NAME 9
#define TYP_QNAME 10
#define TYP_ANY_URI 11
#define TYP_BASE64_BINARY 12
#define TYP_BOOLEAN 13
#define TYP_BYTE 14
#define TYP_DATE 15
#define TYP_DATE_TIME 16
#define TYP_DECIMAL 17
#define TYP_DOUBLE 18
#define TYP_DURATION 19
#define TYP_FLOAT 20
#define TYP_G_DAY 21
#define TYP_G_MONTH 22
#define TYP_G_MONTH_DAY 23
#define TYP_G_YEAR 24
#define TYP_G_YEAR_MONTH 25
#define TYP_HEX_BINARY 26
#define TYP_INT 27
#define TYP_INTEGER 28
#define TYP_LANGUAGE 29
#define TYP_LONG 30
#define TYP_NEGATIVE_INTEGER 31
#define TYP_NON_NEGATIVE_INTEGER 32
#define TYP_NON_POSITIVE_INTEGER 33
#define TYP_NORMALIZED_STRING 34
#define TYP_POSITIVE_INTEGER 35
#define TYP_SHORT 36
#define TYP_STRING 37
#define TYP_TIME 38
#define TYP_TOKEN 39
#define TYP_UNSIGNED_BYTE 40
#define TYP_UNSIGNED_INT 41
#define TYP_UNSIGNED_LONG 42
#define TYP_UNSIGNED_SHORT 43
#define NTYP 44
static char *typtab[NTYP]={
"ENTITIES", "ENTITY", "ID", "IDREF", "IDREFS", "NCName", "NMTOKEN", "NMTOKENS",
"NOTATION", "Name", "QName", "anyURI", "base64Binary", "boolean", "byte", "date",
"dateTime", "decimal", "double", "duration", "float", "gDay", "gMonth",
"gMonthDay", "gYear", "gYearMonth", "hexBinary", "int", "integer", "language",
"long", "negativeInteger", "nonNegativeInteger", "nonPositiveInteger",
"normalizedString", "positiveInteger", "short", "string", "time", "token",
"unsignedByte", "unsignedInt", "unsignedLong", "unsignedShort"};

#define ERR_PARAMETER "invalid XML Schema datatype parameter '%s'"
#define ERR_DATATYPE "invalid XML Schema datatype name '%s'"
#define ERR_VALUE "invalid value '%s' for XML Schema datatype '%s'"

struct dura {int yr,mo,dy,hr,mi;double se;};
static void durainit(struct dura *d) {d->yr=d->mo=d->dy=d->hr=d->mi=0; d->se=0.0;}

static void s2dura(struct dura *dp,char *s,int n) {
  char *end=s+n,*np="0";
  int sign=1,time=0;
  durainit(dp);
  while(s!=end) {
    switch(*s) {
    case '-': sign=-1; break;
    case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7':
    case '8': case '9': case '.': np=s; break;
    case 'T': time=1; break;
    case 'Y': dp->yr=sign*atoi(np); break;
    case 'M': if(time) dp->mi=sign*atoi(np); else dp->mo=sign*atoi(np); break;
    case 'D': dp->dy=sign*atoi(np); break;
    case 'H': dp->hr=sign*atoi(np); break;
    case 'S': dp->se=sign*atof(np); break;
    }
    ++s;
  }
}

static int duracmp(char *s1,char *s2,int n) {
  struct dura d1,d2;
  s2dura(&d1,s1,strlen(s1)); s2dura(&d2,s2,n);
  if(d1.yr!=d2.yr) return d1.yr-d2.yr;
  if(d1.mo!=d2.mo) return d1.mo-d2.mo;
  if(d1.dy!=d2.dy) return d1.dy-d2.dy;
  if(d1.hr!=d2.hr) return d1.hr-d2.hr;
  if(d1.mi!=d2.mi) return d1.mi-d2.mi;
  if(d1.se!=d2.se) return d1.se<d2.se?-1:1;
  return 0;
}

static int dtcmpn(char *s1,char *s2,int n,char *fmt) {
  struct xsd_tm tm1,tm2;
  xsd_mktm(&tm1,fmt,s1); xsd_mktmn(&tm2,fmt,s2,n);
  return xsd_tmcmp(&tm1,&tm2);
}

static int toklenn(char *s,int n) {
  char *end=s+n;
  int u,len=0;
  SKIP_SPACE:
  for(;;) { if(s==end) return len?len-1:0;
    s+=u_get(&u,s);
    if(!xmlc_white_space(u)) break;
  }
  ++len;
  for(;;) { if(s==end) return len;
    ++len;
    s+=u_get(&u,s);
    if(xmlc_white_space(u)) goto SKIP_SPACE;
  }
}

static int tokcntn(char *s,int n) {
  char *end=s+n;
  int u,cnt=0;
  SKIP_SPACE:
  for(;;) { if(s==end) return cnt;
    s+=u_get(&u,s);
    if(!xmlc_white_space(u)) break;
  }
  ++cnt;
  for(;;) { if(s==end) return cnt;
    s+=u_get(&u,s);
    if(xmlc_white_space(u)) goto SKIP_SPACE;
  }
}

static int b64lenn(char *s,int n) {
  char *end=s+n;
  int l=0,len;
  for(;;) { if(end==s) break;
    --end;
    if(!xmlc_white_space(*end)&&*end!='=') {++end; break;}
  }
  while(s!=end) {if(!xmlc_white_space(*s)) ++l; ++s;}
  len=l/4*3;
  switch(l%4) {
  case 0: break;
  case 1: len=-1; break;
  case 2: len+=1; break;
  case 3: len+=2; break;
  }
  return len;
}

static int fdiglenn(char *s,int n) {
  char *end=s+n; int len=0;
  for(;;) { if(end==s) break;
    --end;
    if(*end!='0'&&!xmlc_white_space(*end)) {++end; break;}
  }
  for(;;) { if(s==end) break;
    if(*(s++)=='.') {
      while(s++!=end) ++len;
      break;
    }
  }
  return len;
}

static int diglenn(char *s,int n) {
  char *end=s+n; int len=0;
  for(;;) { if(s==end) break;
    if(!(xmlc_white_space(*s)||*s=='+'||*s=='-'||*s=='0')) break;
    ++s;
  }
  for(;;) { if(s==end||*s=='.'||xmlc_white_space(*s)) break;
    ++len; ++s;
  }
  if(len==0) len=1;
  if(*s=='.') len+=fdiglenn(s,end-s);
  return len;
}

#define NPAT 16

struct facets {
  int set;
  char *pattern[NPAT+1]; int npat;
  int length, minLength, maxLength, totalDigits, fractionDigits;
  char *maxExclusive, *maxInclusive, *minExclusive, *minInclusive;
  int whiteSpace;
};

/* PAT_DECIMAL is unsigned decimal, signed decimal matches PAT_FIXED */
#define PAT_ORDINAL "([0-9]+)"
#define PAT_FRACTIONAL "(\\.[0-9]+)"
#define PAT_DECIMAL "("PAT_ORDINAL"\\.?|"PAT_ORDINAL"?"PAT_FRACTIONAL")"

#define PAT_POSITIVE "\\+?"PAT_ORDINAL
#define PAT_NON_NEGATIVE "\\+?"PAT_ORDINAL
#define PAT_NON_POSITIVE "\\-"PAT_ORDINAL"|0+"
#define PAT_NEGATIVE "\\-"PAT_ORDINAL
#define PAT_INTEGER "([+\\-]?"PAT_ORDINAL")"

#define PAT_FIXED "([+\\-]?"PAT_DECIMAL")"
#define PAT_FLOATING PAT_FIXED"([Ee]"PAT_INTEGER")?|INF|-INF|NaN"

#define PAT_HEX_BINARY "[0-9a-fA-F]+"

#define PAT_BASE64 "([A-Za-z0-9+/] ?)"
#define PAT_BASE64_2 "([AQgw] ?)"
#define PAT_BASE64_1 "([AEIMQUYcgkosw048] ?)"
#define PAT_BASE64_BINARY \
   "("PAT_BASE64"{4})*" \
   "("PAT_BASE64 PAT_BASE64_2"= ?=" \
   "|"PAT_BASE64"{2}" PAT_BASE64_1"=)?"

#define PAT_ANY_URI "(([a-zA-Z][0-9a-zA-Z+\\-\\.]*:)?/{0,2}[0-9a-zA-Z;/?:@&=+$\\.\\-_!~*'()%]+)?(#[0-9a-zA-Z;/?:@&=+$\\.\\-_!~*'()%]+)?"

#define PAT_NCNAME "[\\i-[:]][\\c-[:]]*"
#define PAT_QNAME "("PAT_NCNAME":)?"PAT_NCNAME
#define PAT_NMTOKEN "\\c+"
#define PAT_NAME "\\i\\c*"
#define PAT_NCNAMES PAT_NCNAME"( "PAT_NCNAME")*"
#define PAT_NMTOKENS PAT_NMTOKEN"( "PAT_NMTOKEN")*"
#define PAT_NAMES PAT_NAME"( "PAT_NAME")*"

#define PAT_LANGUAGE "([a-zA-Z]{1,8}(-[a-zA-Z0-9]{1,8})*)"

#define PAT_DURAY "("PAT_ORDINAL"Y)"
#define PAT_DURAM "("PAT_ORDINAL"M)"
#define PAT_DURAD "("PAT_ORDINAL"D)"
#define PAT_DURADATE \
  "(" PAT_DURAY   PAT_DURAM"?"PAT_DURAD"?" \
  "|" PAT_DURAY"?"PAT_DURAM   PAT_DURAD"?" \
  "|" PAT_DURAY"?"PAT_DURAM"?"PAT_DURAD ")"
#define PAT_DURAH "("PAT_ORDINAL"H)"
#define PAT_DURAM "("PAT_ORDINAL"M)"
#define PAT_DURAS "("PAT_DECIMAL"S)"
#define PAT_DURATIME \
"(T(" PAT_DURAH   PAT_DURAM"?"PAT_DURAS"?" \
  "|" PAT_DURAM"?"PAT_DURAM   PAT_DURAS"?" \
  "|" PAT_DURAS"?"PAT_DURAM"?"PAT_DURAS "))"
#define PAT_DURATION "-?P("PAT_DURADATE PAT_DURATIME"|"PAT_DURADATE"|"PAT_DURATIME")"

#define PAT_ZONE "(Z|[+\\-](0[0-9]|1[0-4]):[0-5][0-9])"
#define PAT_YEAR0 "[0-9]{4,}"
#define PAT_MONTH0 "(0[1-9]|1[0-2])"
#define PAT_DAY0 "([0-2][0-9]|3[01])"
#define PAT_YEAR "-?"PAT_YEAR0 PAT_ZONE"?"
#define PAT_MONTH "--"PAT_MONTH0"--"PAT_ZONE"?"
#define PAT_DAY "---"PAT_DAY0 PAT_ZONE"?"
#define PAT_YEAR_MONTH "-?"PAT_YEAR0"-"PAT_MONTH0 PAT_ZONE"?"
#define PAT_MONTH_DAY "--"PAT_MONTH0"-"PAT_DAY0 PAT_ZONE"?"
#define PAT_DATE0 PAT_YEAR0"-"PAT_MONTH0"-"PAT_DAY0
#define PAT_TIME0 "([0-1][0-9]|2[0-3]):[0-5][0-9]:([0-5][0-9]|60)"PAT_FRACTIONAL"?"
#define PAT_DATE "-?"PAT_DATE0 PAT_ZONE"?"
#define PAT_TIME PAT_TIME0 PAT_ZONE"?"
#define PAT_DATE_TIME "-?"PAT_DATE0"T"PAT_TIME0 PAT_ZONE"?"

static void anchdec(int *plus,int *zero,char **beg,char **dp,char **end,char *s,int n) {
  char *end0=s+n;
  *beg=s; *zero=1; *plus=1;
  for(;;) { if(end0==*beg) break;
    --end0;
    if(!xmlc_white_space(*end0)) {++end0; break;}
  }
  *end=end0;
  for(;;) { if(*end==*beg) break;
    --*end;
    if(!(**end=='0'||**end=='+'||**end=='-')) {
      if(**end!='.') *zero=0; 
      ++*end; 
      break;
    }
  }
  *dp=*end;
  for(;;) { if(*beg==*end) break;
    if(**beg=='-') *plus=0;
    else if(!(**beg=='0'||**beg=='+'||xmlc_white_space(**beg))) {
      if(**beg!='.') *zero=0;
      for(;;) {
	if(*dp==*beg) {*dp=*end=end0; break;}
	--*dp;
	if(**dp=='.') break;
      }
      break;
    }
    ++*beg;
  }
}

static int deccmp(char *s1,int n1,char *s2,int n2) {
  int p1,p2,z1,z2,cmp;
  char *d1,*e1,*d2,*e2,*c1,*c2;
  anchdec(&p1,&z1,&s1,&d1,&e1,s1,n1); anchdec(&p2,&z2,&s2,&d2,&e2,s2,n2);
  if(z1&&z2) return 0;
  if(p1!=p2) return p1-p2;
  cmp=0;
  if(d1-s1!=d2-s2) cmp=d1-s1-(d2-s2);
  if(cmp!=0) return p1?cmp:-cmp;
  c1=s1; c2=s2;
  for(;;) {
    if(c1==d1) break;
    if(*c1!=*c2) {cmp=*c1-*c2; break;}
    ++c1; ++c2;
  }
  if(cmp!=0) return p1?cmp:-cmp;
  if(c1!=e1) ++c1; if(c2!=e2) ++c2;
  for(;;) {
    if(c1==e1) {cmp=-(c2!=e2); break;}
    if(c2==e2) {cmp=1; break;}
    if(*c1!=*c2) {cmp=*c1-*c2; break;}
    ++c1; ++c2;
  }
  return p1?cmp:-cmp;
}

static int chkdec(struct facets *fp,char *s,int n) {
  int ok=1;
  if(fp->set&(1<<FCT_MIN_EXCLUSIVE)) ok=ok&&deccmp(s,n,fp->minExclusive,strlen(fp->minExclusive))>0;
  if(fp->set&(1<<FCT_MIN_INCLUSIVE)) ok=ok&&deccmp(s,n,fp->minInclusive,strlen(fp->minInclusive))>=0;
  if(fp->set&(1<<FCT_MAX_INCLUSIVE)) ok=ok&&deccmp(s,n,fp->maxInclusive,strlen(fp->maxInclusive))<=0;
  if(fp->set&(1<<FCT_MAX_EXCLUSIVE)) ok=ok&&deccmp(s,n,fp->maxExclusive,strlen(fp->maxExclusive))<0;
  return ok;
}

static double atodn(char *s,int n) {
  return s_tokcmpn("-INF",s,n)==0?-HUGE_VAL
    : s_tokcmpn("INF",s,n)==0?HUGE_VAL
    : atof(s);
}
static double atod(char *s) {return atodn(s,strlen(s));}

static int chkdbl(struct facets *fp,char *s,int n) {
  int ok=1,nan=s_tokcmpn("NaN",s,n)==0;
  double d=atodn(s,n);
  if(fp->set&(1<<FCT_MIN_EXCLUSIVE)) ok=ok&&!nan&&d>atod(fp->minExclusive);
  if(fp->set&(1<<FCT_MIN_INCLUSIVE)) ok=ok&&!nan&&d>=atod(fp->minInclusive);
  if(fp->set&(1<<FCT_MAX_INCLUSIVE)) ok=ok&&!nan&&d<=atod(fp->maxInclusive);
  if(fp->set&(1<<FCT_MAX_EXCLUSIVE)) ok=ok&&!nan&&d<atod(fp->maxExclusive);
  return ok;
}

static int chktmlim(char *typ,char *fmt,char *val,int cmpmin,int cmpmax,struct xsd_tm *tmp) {
  struct xsd_tm tmf; int cmp;
  if(!xsd_allows(typ,"",val,strlen(val))) {(*error_handler)(XSD_ER_PARVAL); return 0;}
  xsd_mktm(&tmf,fmt,val);
  cmp=xsd_tmcmp(tmp,&tmf);
  return cmpmin<=cmp&&cmp<=cmpmax;
}

static int chktm(char *typ,char *fmt,struct facets *fp,char *s,int n) {
  int ok=1;
  struct xsd_tm tms;
  if(!xsd_allows(typ,"",s,n)) return 0;
  xsd_mktmn(&tms,fmt,s,n);
  if(fp->set&(1<<FCT_MIN_EXCLUSIVE)) ok=ok&&chktmlim(typ,fmt,fp->minExclusive,1,1,&tms);
  if(fp->set&(1<<FCT_MIN_INCLUSIVE)) ok=ok&&chktmlim(typ,fmt,fp->minInclusive,0,1,&tms);
  if(fp->set&(1<<FCT_MAX_INCLUSIVE)) ok=ok&&chktmlim(typ,fmt,fp->maxInclusive,-1,0,&tms);
  if(fp->set&(1<<FCT_MAX_EXCLUSIVE)) ok=ok&&chktmlim(typ,fmt,fp->maxExclusive,-1,-1,&tms);
  return ok;
}

int xsd_allows(char *typ,char *ps,char *s,int n) {
  int ok=1,length;
  int dt=s_tab(typ,typtab,NTYP);
  struct facets fct; fct.set=0; fct.npat=0;
  switch(dt) {
  case TYP_INTEGER:
    fct.pattern[fct.npat++]=PAT_INTEGER;
    dt=TYP_DECIMAL;
    break;
  case TYP_POSITIVE_INTEGER:
    fct.pattern[fct.npat++]=PAT_POSITIVE;
    dt=TYP_DECIMAL; fct.set|=1<<FCT_MIN_INCLUSIVE;
    fct.minInclusive="1";
    break;
  case TYP_NON_NEGATIVE_INTEGER:
    fct.pattern[fct.npat++]=PAT_NON_NEGATIVE;
    dt=TYP_DECIMAL; fct.set|=1<<FCT_MIN_INCLUSIVE;
    fct.minInclusive="0";
    break;
  case TYP_NON_POSITIVE_INTEGER:
    fct.pattern[fct.npat++]=PAT_NON_POSITIVE;
    dt=TYP_DECIMAL; fct.set|=1<<FCT_MAX_INCLUSIVE;
    fct.maxInclusive="0";
    break;
  case TYP_NEGATIVE_INTEGER:
    fct.pattern[fct.npat++]=PAT_NEGATIVE;
    dt=TYP_DECIMAL; fct.set|=1<<FCT_MAX_INCLUSIVE;
    fct.maxInclusive="-1";
    break;
  case TYP_BYTE:
    fct.pattern[fct.npat++]=PAT_INTEGER;
    dt=TYP_DECIMAL; fct.set|=FCT_IBOUNDS;
    fct.minInclusive="-128"; fct.maxInclusive="127";
    break;
  case TYP_UNSIGNED_BYTE:
    fct.pattern[fct.npat++]=PAT_NON_NEGATIVE;
    dt=TYP_DECIMAL; fct.set|=FCT_IBOUNDS;
    fct.minInclusive="0"; fct.maxInclusive="255";
    break;
  case TYP_SHORT:
    fct.pattern[fct.npat++]=PAT_INTEGER;
    dt=TYP_DECIMAL; fct.set|=FCT_IBOUNDS;
    fct.minInclusive="-32768"; fct.maxInclusive="32767";
    break;
  case TYP_UNSIGNED_SHORT:
    fct.pattern[fct.npat++]=PAT_NON_NEGATIVE;
    dt=TYP_DECIMAL; fct.set|=FCT_IBOUNDS;
    fct.minInclusive="0"; fct.maxInclusive="65535";
    break;
  case TYP_INT:
    fct.pattern[fct.npat++]=PAT_INTEGER;
    dt=TYP_DECIMAL; fct.set|=FCT_IBOUNDS;
    fct.minInclusive="-2147483648"; fct.maxInclusive="2147483647";
    break;
  case TYP_UNSIGNED_INT:
    fct.pattern[fct.npat++]=PAT_NON_NEGATIVE;
    dt=TYP_DECIMAL; fct.set|=FCT_IBOUNDS;
    fct.minInclusive="0"; fct.maxInclusive="4294967295";
    break;
  case TYP_LONG:
    fct.pattern[fct.npat++]=PAT_INTEGER;
    dt=TYP_DECIMAL; fct.set|=FCT_IBOUNDS;
    fct.minInclusive="-9223372036854775808"; fct.maxInclusive="9223372036854775807";
    break;
  case TYP_UNSIGNED_LONG:
    fct.pattern[fct.npat++]=PAT_NON_NEGATIVE;
    dt=TYP_DECIMAL; fct.set|=FCT_IBOUNDS;
    fct.minInclusive="0"; fct.maxInclusive="18446744073709551615";
    break;
  }

  { int n;
    while((n=strlen(ps))) {
      char *key=ps,*val=key+n+1,*end,i;
      switch(i=s_tab(key,fcttab,NFCT)) {
      case FCT_LENGTH: fct.length=(int)strtol(val,&end,10); if(!*val||*end) (*error_handler)(XSD_ER_PARVAL,key,val); break;
      case FCT_MAX_LENGTH: fct.maxLength=(int)strtol(val,&end,10); if(!*val||*end) (*error_handler)(XSD_ER_PARVAL,key,val); break;
      case FCT_MIN_LENGTH: fct.minLength=(int)strtol(val,&end,10); if(!*val||*end) (*error_handler)(XSD_ER_PARVAL,key,val); break;
      case FCT_FRACTION_DIGITS: fct.fractionDigits=(int)strtol(val,&end,10); if(!*val||*end) (*error_handler)(XSD_ER_PARVAL,key,val); break;
      case FCT_TOTAL_DIGITS: fct.totalDigits=(int)strtol(val,&end,10); if(!*val||*end) (*error_handler)(XSD_ER_PARVAL,key,val); break;
      case FCT_PATTERN:
	if(fct.npat==NPAT) (*error_handler)(XSD_ER_NPAT); else {
	  fct.pattern[fct.npat++]=val;
	} break;
      case FCT_MAX_EXCLUSIVE: fct.maxExclusive=val; break;
      case FCT_MAX_INCLUSIVE: fct.maxInclusive=val; break;
      case FCT_MIN_EXCLUSIVE: fct.minExclusive=val; break;
      case FCT_MIN_INCLUSIVE: fct.minInclusive=val; break;
      case FCT_WHITE_SPACE: (*error_handler)(XSD_ER_WS); break;
      case FCT_ENUMERATION: (*error_handler)(XSD_ER_ENUM); break;
      case NFCT: (*error_handler)(XSD_ER_PAR,key); break;
      default: assert(0);
      }
      fct.set|=1<<i;
      ps=val+strlen(val)+1;
    }
  }

  fct.whiteSpace=WS_COLLAPSE;
  length=INT_MAX;
  switch(dt) {
 /*primitive*/
  case TYP_STRING: fct.whiteSpace=WS_PRESERVE;
    length=u_strnlen(s,n);
    break;
  case TYP_BOOLEAN:
    fct.pattern[fct.npat++]="true|false|1|0";
    break;
  case TYP_DECIMAL:
    fct.pattern[fct.npat++]=PAT_FIXED;
    if(fct.set&(1<<FCT_FRACTION_DIGITS)) ok=ok&&fdiglenn(s,n)<=fct.fractionDigits;
    if(fct.set&(1<<FCT_TOTAL_DIGITS)) ok=ok&&diglenn(s,n)<=fct.totalDigits;
    if(fct.set&FCT_BOUNDS) ok=ok&chkdec(&fct,s,n);
    break;
  case TYP_FLOAT: case TYP_DOUBLE: /* float and double is the same type */
    fct.pattern[fct.npat++]=PAT_FLOATING;
    if(fct.set&FCT_BOUNDS) ok=ok&chkdbl(&fct,s,n);
    break;
  case TYP_DURATION:
    fct.pattern[fct.npat++]=PAT_DURATION;
    break;
  case TYP_DATE_TIME:
    fct.pattern[fct.npat++]=PAT_DATE_TIME;
    if(fct.set&FCT_BOUNDS) ok=ok&chktm(typ,"ymdtz",&fct,s,n);
    break;
  case TYP_DATE:
    fct.pattern[fct.npat++]=PAT_DATE;
    if(fct.set&FCT_BOUNDS) ok=ok&chktm(typ,"ymdz",&fct,s,n);
    break;
  case TYP_TIME:
    fct.pattern[fct.npat++]=PAT_TIME;
    if(fct.set&FCT_BOUNDS) ok=ok&chktm(typ,"tz",&fct,s,n);
    break;
  case TYP_G_YEAR_MONTH:
    fct.pattern[fct.npat++]=PAT_YEAR_MONTH;
    if(fct.set&FCT_BOUNDS) ok=ok&chktm(typ,"ymz",&fct,s,n);
    break;
  case TYP_G_YEAR:
    fct.pattern[fct.npat++]=PAT_YEAR;
    if(fct.set&FCT_BOUNDS) ok=ok&chktm(typ,"yz",&fct,s,n);
    break;
  case TYP_G_MONTH_DAY:
    fct.pattern[fct.npat++]=PAT_MONTH_DAY;
    if(fct.set&FCT_BOUNDS) ok=ok&chktm(typ,"mdz",&fct,s,n);
    break;
  case TYP_G_DAY:
    fct.pattern[fct.npat++]=PAT_DAY;
    if(fct.set&FCT_BOUNDS) ok=ok&chktm(typ,"dz",&fct,s,n);
    break;
  case TYP_G_MONTH:
    fct.pattern[fct.npat++]=PAT_MONTH;
    if(fct.set&FCT_BOUNDS) ok=ok&chktm(typ,"mz",&fct,s,n);
    break;
  case TYP_HEX_BINARY:
    fct.pattern[fct.npat++]=PAT_HEX_BINARY;
    length=(toklenn(s,n)+1)/2;
    break;
  case TYP_BASE64_BINARY:
    fct.pattern[fct.npat++]=PAT_BASE64_BINARY;
    length=b64lenn(s,n);
    break;
  case TYP_ANY_URI:
    fct.pattern[fct.npat++]=PAT_ANY_URI;
    length=toklenn(s,n);
    break;
  case TYP_QNAME: case TYP_NOTATION:
    fct.pattern[fct.npat++]=PAT_QNAME;
    fct.set&=~(1<<FCT_LENGTH|1<<FCT_MIN_LENGTH|1<<FCT_MAX_LENGTH); /* the errata states that any value is valid */
    break;
 /*derived*/
  case TYP_NORMALIZED_STRING: fct.whiteSpace=WS_REPLACE;
    length=u_strnlen(s,n);
    break;
  case TYP_TOKEN:
    length=toklenn(s,n);
    break;
  case TYP_LANGUAGE:
    fct.pattern[fct.npat++]=PAT_LANGUAGE;
    length=toklenn(s,n);
    break;
  case TYP_NMTOKEN:
    fct.pattern[fct.npat++]=PAT_NMTOKEN;
    length=toklenn(s,n);
    break;
  case TYP_NMTOKENS:
    fct.pattern[fct.npat++]=PAT_NMTOKENS;
    length=tokcntn(s,n);
    break;
  case TYP_NAME:
    fct.pattern[fct.npat++]=PAT_NAME;
    length=toklenn(s,n);
    break;
  case TYP_NCNAME:
    fct.pattern[fct.npat++]=PAT_NCNAME;
    length=toklenn(s,n);
    break;
  case TYP_ID:
    fct.pattern[fct.npat++]=PAT_NCNAME;
    length=toklenn(s,n);
    break;
  case TYP_IDREF:
    fct.pattern[fct.npat++]=PAT_NCNAME;
    length=toklenn(s,n);
    break;
  case TYP_IDREFS:
    fct.pattern[fct.npat++]=PAT_NCNAMES;
    length=tokcntn(s,n);
    break;
  case TYP_ENTITY:
    fct.pattern[fct.npat++]=PAT_NCNAME;
    length=toklenn(s,n);
    break;
  case TYP_ENTITIES:
    fct.pattern[fct.npat++]=PAT_NCNAMES;
    length=tokcntn(s,n);
    break;
  case NTYP: (*error_handler)(XSD_ER_TYP,typ); break;
  default: assert(0);
  }

  while(fct.npat--) ok=ok&&match[fct.whiteSpace](fct.pattern[fct.npat],s,n);

  if(fct.set&(1<<FCT_LENGTH)) ok=ok&&length==fct.length;
  if(fct.set&(1<<FCT_MAX_LENGTH)) ok=ok&&length<=fct.maxLength;
  if(fct.set&(1<<FCT_MIN_LENGTH)) ok=ok&&length>=fct.minLength;

  return ok;
}

static int dblcmpn(char *val,char *s,char n) {
  double d1,d2;
  return s_tokcmpn(val,s,n)==0?0
    : s_tokcmpn(val,"NaN",3)==0||s_tokcmpn("NaN",s,n)==0?1
    : (d1=atod(val),d2=atodn(s,n),d1<d2?-1:d1>d2?1:0);
}

static int hexcmpn(char *s1,char *s2,int n) {
  char *end=s2+n;
  for(;;++s1,++s2) {
    while(*s1&&xmlc_white_space(*s1)) ++s1;
    while(s2!=end&&xmlc_white_space(*s2)) ++s2;
    if(s2==end) return *s1;
    if(!*s1) return -*s2;
    switch(*s1) {
    case 'a': case 'A': if(*s2=='a'||*s2=='A') continue;
    case 'b': case 'B': if(*s2=='b'||*s2=='B') continue;
    case 'c': case 'C': if(*s2=='c'||*s2=='C') continue;
    case 'd': case 'D': if(*s2=='d'||*s2=='D') continue;
    case 'e': case 'E': if(*s2=='e'||*s2=='E') continue;
    case 'f': case 'F': if(*s2=='f'||*s2=='F') continue;
    default: if(*s1!=*s2) return *s1-*s2;
    }
  }
}

static int b64cmpn(char *s1,char *s2,int n) {
  char *end=s2+n;
  for(;;++s1,++s2) {
    while(*s1&&xmlc_white_space(*s1)) ++s1;
    while(s2!=end&&xmlc_white_space(*s2)) ++s2;
    if(s2==end) return *s1;
    if(!*s1) return -*s2;
    if(*s1!=*s2) return *s1-*s2;
  }
}

static int nrmcmpn(char *s1,char *s2,int n) {
  char *end=s2+n;
  for(;;++s1,++s2) {
    if(s2==end) return *s1;
    if(!*s1) return -*s2;
    if(!(*s1==*s2||(xmlc_white_space(*s1)&&xmlc_white_space(*s2))))
      return *s1-*s2;
  }
}

static int qncmpn(char *s1,char *s2,int n2) { /* context is not passed over; compare local parts */
  char *ln1=s1,*ln2=s2;
  int n=n2;
  while(*ln1&&*ln1!=':') ++ln1;
  while(n!=0&&*ln2!=':') {++ln2; --n;}
  if(*ln1) {
    return n?s_tokcmpn(ln1+1,ln2+1,n-1):s_tokcmpn(ln1+1,s2,n2);
  } else {
    return n?s_tokcmpn(s1,ln2+1,n-1):s_tokcmpn(s1,s2,n2);
  }
}

int xsd_equal(char *typ,char *val,char *s,int n) {
  if(!xsd_allows(typ,"",val,strlen(val))) {
    (*error_handler)(XSD_ER_VAL,val);
    return 0;
  }
  if(!xsd_allows(typ,"",s,n)) return 0;
  switch(s_tab(typ,typtab,NTYP)) {
 /*primitive*/
  case TYP_STRING: return s_cmpn(val,s,n)==0;
  case TYP_BOOLEAN: return (s_tokcmpn("true",val,strlen(val))==0||s_tokcmpn("1",val,strlen(val))==0)==(s_tokcmpn("true",s,n)==0||s_tokcmpn("1",s,n)==0);
  case TYP_DECIMAL: return deccmp(val,strlen(val),s,n)==0;
  case TYP_FLOAT: case TYP_DOUBLE: return dblcmpn(val,s,n)==0;
  case TYP_DURATION: return duracmp(val,s,n)==0;
  case TYP_DATE_TIME: return dtcmpn(val,s,n,"ymdtz")==0;
  case TYP_DATE: return dtcmpn(val,s,n,"ymdz")==0;
  case TYP_TIME: return dtcmpn(val,s,n,"tz")==0;
  case TYP_G_YEAR_MONTH: return dtcmpn(val,s,n,"ymz")==0;
  case TYP_G_YEAR: return dtcmpn(val,s,n,"yz")==0;
  case TYP_G_MONTH_DAY: return dtcmpn(val,s,n,"mdz")==0;
  case TYP_G_DAY: return dtcmpn(val,s,n,"dz")==0;
  case TYP_G_MONTH: return dtcmpn(val,s,n,"mz")==0;
  case TYP_HEX_BINARY: return hexcmpn(val,s,n)==0;
  case TYP_BASE64_BINARY: return b64cmpn(val,s,n)==0;
  case TYP_ANY_URI: return s_tokcmpn(val,s,n)==0;
  case TYP_QNAME: case TYP_NOTATION:
    return qncmpn(val,s,n)==0;
 /*derived*/
  case TYP_NORMALIZED_STRING: return nrmcmpn(val,s,n)==0;
  case TYP_TOKEN:
  case TYP_LANGUAGE:
  case TYP_NMTOKEN:
  case TYP_NMTOKENS:
  case TYP_NAME:
  case TYP_NCNAME:
  case TYP_ID:
  case TYP_IDREF:
  case TYP_IDREFS:
  case TYP_ENTITY:
  case TYP_ENTITIES: return s_tokcmpn(val,s,n)==0;
  case TYP_INTEGER:
  case TYP_POSITIVE_INTEGER:
  case TYP_NON_NEGATIVE_INTEGER:
  case TYP_NON_POSITIVE_INTEGER:
  case TYP_NEGATIVE_INTEGER:
  case TYP_BYTE:
  case TYP_UNSIGNED_BYTE:
  case TYP_SHORT:
  case TYP_UNSIGNED_SHORT:
  case TYP_INT:
  case TYP_UNSIGNED_INT:
  case TYP_LONG:
  case TYP_UNSIGNED_LONG: return deccmp(val,strlen(val),s,n)==0;
  case NTYP: (*error_handler)(XSD_ER_TYP,typ); return 0;
  default: assert(0);
  }
  return 0;
}

void xsd_test() {
  rx_init();

  assert(toklenn("",0)==0);
  assert(toklenn("A",1)==1);
  assert(toklenn(" A  ",4)==1);
  assert(toklenn(" A  B  ",7)==3);

  assert(tokcntn("",0)==0);
  assert(tokcntn("A",1)==1);
  assert(tokcntn("AB CD",5)==2);
  assert(tokcntn("   AB  C ",9)==2);

  assert(diglenn(" +14.25",7)==4);
  assert(diglenn("1",1)==1);
  assert(diglenn("0",1)==1);
  assert(diglenn("+00.0",5)==1);

  assert(fdiglenn(".1",2)==1);
  assert(fdiglenn("+0.0140",7)==3);
  assert(fdiglenn("0",1)==0);

  assert(deccmp("0",1,"0.0",3)==0);
  assert(deccmp("1 ",2," 1",2)==0);
  assert(deccmp("0.",2,".0",2)==0);
  assert(deccmp("1",1,"1.0",3)==0);
  assert(deccmp("01.1",4,"1.10",4)==0);
  assert(deccmp("+1",2,"1.0",3)==0);
  assert(deccmp("+0.",3,"-0",2)==0);
  assert(deccmp("0",1,"0.1",3)<0);
  assert(deccmp("1.",2,".0",2)>0);
  assert(deccmp("+1",2,"-1",2)>0);

  assert(hexcmpn("","",0)==0);
  assert(hexcmpn("ABC123","ABC123",6)==0);
  assert(hexcmpn("aBCd","AbCd",4)==0);
  assert(hexcmpn("ABC 123"," ABC123",7)==0);
  assert(hexcmpn("ABC124","ABC123",6)>0);

  assert(rx_match(PAT_BASE64_BINARY,"",0));
  assert(rx_match(PAT_BASE64_BINARY,"YmFz",4));
  assert(rx_match(PAT_BASE64_BINARY,"YA==",4));
  assert(rx_match(PAT_BASE64_BINARY,"Y w = =",7));
  assert(rx_match(PAT_BASE64_BINARY,"YF8=",4));

  assert(!rx_match(PAT_BASE64_BINARY,"YmF@",4));
  assert(!rx_match(PAT_BASE64_BINARY,"YmFgH",5));
  assert(!rx_match(PAT_BASE64_BINARY,"Y===",4));
  assert(!rx_match(PAT_BASE64_BINARY,"YF=O",4));
  assert(!rx_match(PAT_BASE64_BINARY,"YFZ=",4));
 
  assert(b64cmpn("","",0)==0);
  assert(b64cmpn("ABC123","ABC123",6)==0);
  assert(b64cmpn("ABC 123"," ABC123",7)==0);
  assert(b64cmpn("ABC124","ABC123",6)>0);
  assert(b64cmpn("ABC123","abc123",6)<0);

  assert(nrmcmpn("A B","A B",3)==0);
  assert(nrmcmpn("A B","A C",3)<0);
  assert(nrmcmpn("A B","A\nB",3)==0);
  assert(nrmcmpn(" A","A ",2)<0);
}

--- NEW FILE: changes.txt ---
/* $Id: changes.txt,v 1.1 2009/08/03 05:32:46 mike Exp $ */

1.7.8 Thu Nov  2 12:52:18 AMT 2006
dvd: build script for OpenVMS by Jim Duff, http://eight-cubed.com/

1.7.7 Sun Jan 29 11:13:23 PST 2006
dvd: rnv exits with non-zero return code if validation fails.

1.7.6 Fri Jan  7 01:07:37 AMT 2005
dvd: base64: I hope I got it right this time.

Thu Jan  6 15:46:55 AMT 2005
dvd: base64 regular expression checks full syntax of RFC2045.

1.7.5 Thu Jan  6 02:18:51 AMT 2005
dvd: build error fixed

1.7.4 Wed Jan  5 13:55:41 AMT 2005
dvd: unit tests for s.c and xsd.c. 

1.7.3 Wed Jan  5 05:57:55 AMT 2005
dvd: bug in xsd.c:b64cmpn fixed (thanks to Sridhar Guthula), 
     tiny code cleanup in comparison routines. 

1.7.2 Tue Nov  9 15:28:23 AMT 2004
dvd: fixed a bug in rvp.py, didn't update the current pattern after
     text or mixed

1.7.1 Thu Aug 19 00:58:58 AMST 2004
dvd: discovered and fixed a bug with parsing of builtin datatypes,
     noone seems to use them explicitely.

1.7.0 Sat Mar 27 19:26:39 AMT 2004
dvd: entities are not expanded, use XX if you need to.
     error messages have the same syntax as Jing's.

1.6.6 Sun Mar 21 22:41:10 AMT 2004
dvd: turned on parsing of external parameter entities;
     with the next next release rnv will be split in two;
     the core utility will just validate an XML stream, an optional
     preprocessor will handle entities, dtds and xinclude

1.6.5 Sat Mar 13 21:24:31 AMT 2004
dvd: continued overhaul of rn.c and other places

1.6.4 Fri Mar 12 13:15:33 AMT 2004
dvd: looked again at the code in rn.c and rnl.c, streamlined and simplified
     many things

Fri Mar 12 04:44:17 AMT 2004
dvd: better logics in mark_p

1.6.3 Thu Mar 11 22:47:17 AMT 2004
dvd: Bug fix in mark_p -- no more dangling references to unused patterns.

1.6.2 Mon Mar  8 13:22:52 AMT 2004
dvd: Added processing of davidashen-net-xg-file and -pos PIs to
     work with xg.

1.6.1 Mon Mar  1 12:55:48 AMT 2004
dvd: Added ranlib to Makefile to make rnv build smoothly on Mac OS X.

1.6.0 Thu Feb 26 21:14:39 AMT 2004
dvd: fixed mixed to be interleave, not choice. The test for this
     was wrong.

Wed Feb 25 03:59:45 AMT 2004
dvd: got rid of ##; prone to name conflicts

1.5.8 Wed Feb 18 16:53:05 AMT 2004
dvd: enhanced diagnostics: first required elements and attributes
     are reported, then optional

1.5.7 Mon Feb 16 20:03:13 AMT 2004
dvd: rewrote the pattern space compression algorithm -- it should
     be correct now, and if it is still not, debugging should be
     much easier

1.5.6 Tue Feb 10 19:37:07 AMT 2004
dvd: actually turned path checks on

1.5.5 Fri Jan 30 11:07:01 AMT 2004
dvd: added check for recursion in structured regexps

1.5.4 Fri Jan 30 00:10:32 AMT 2004
dvd: implemented structured regexps for scheme-datatypes

1.5.3 Thu Jan 29 03:19:29 AMT 2004
dvd: made it compile on SunOS 5.8 (name clashes)

1.5.2 Wed Jan 28 15:35:38 AMT 2004
dvd: added handler for external system entities
     Makefile.bcc is back; people still need win32, and cygwin is bulky.

Wed Jan 28 01:43:42 AMT 2004
dvd: removed static pointers from rnv and qualified name passed
     from Expat is modified instead. This is safe.

Mon Jan 26 21:13:30 AMT 2004
dvd: ++ moved out of assert in xsd_tm.c

1.5.1 Mon Jan 26 01:25:42 AMT 2004
dvd: Internal release, regexp in Scheme implemented

Sat Jan 24 22:42:58 AMT 2004
dvd: fixed rx.c: upper bound in quantifier

Fri Jan 23 21:51:34 AMT 2004
dvd: newRef+1 is initialized to 0 (bugfix, integrity check in ht)

Wed Jan 21 02:38:09 AMT 2004
dvd: SCM is now working as embedded engine; it was not obvious
     that the initialization call had to be lower on the stack
     than all other calls to SCM;
     static mode added to m, if M_STATIC is non-zero it is the
     size of available memory in bytes -- good for debugging.
     
Tue Jan 20 22:24:50 AMT 2004
dvd: changed rx to conform to XSD errata (- is not a valid range)
     added -d and -e to rnv; I was not sure for a long time whether
     I should do it.

Tue Jan 20 04:45:39 AMT 2004
dvd: rearranged a lot of things; put error reporting through a single
     function er_vprintf, useful for interfacing to scripting languages;
     some success with dsl -- dsl-equal? works

Tue Jan 20 00:24:56 AMT 2004
dvd: renamed strops to s, memops to m, functions has s_ and m_ prefixes

Mon Jan 19 13:43:57 AMT 2004
dvd: fixed typo in diagnostics when literal is missing after ~

Thu Jan 15 18:10:02 AMT 2004
dvd: added -d command to rvp to load external type checker;
     dxl.c dxl.h implement
     http://davidashen.net/relaxng/pluggable-datatypes datatype;
     xsdck makes xsd: pluggable.

1.4.1 Tue Jan 13 14:29:08 AMT 2004
dvd: added sys/types.h where appropriate; fixed Makefile.bcc
     to include rnl.{c,h,obj}

1.4.0 Mon Jan 12 17:39:58 AMT 2004
dvd: changed rvp.py to use os.read|os.write for performance

Mon Jan 12 03:53:56 AMT 2004
dvd: rvp is implemented; reads validation events from input, sends diagnostics
     to output; embedding examples in perl and python

Sat Jan 10 03:18:12 AMT 2004
dvd: factored out loading of rnc into rnl; now, instead of calling rnc
     and rnd separately, rnl_(fn|fd|s) is called.

1.3.5 Tue Jan  6 11:56:06 AMT 2004
dvd: learned that UTF-8 BOM is reality, added processing; fixed check for name
     characters in arx, made it >0x7f||nmtoken  (no good reason to process unicode);
     added end-of-line to error messages from Expat

1.3.4 Mon Jan  5 22:43:05 AMT 2004
dvd: grammar samples added to the distribution, entries for other grammars
     commented out in tools/arx.conf

Mon Jan  5 17:00:57 AMT 2004
dvd: isany is now ary_isany (ary is helpers for arx)

Mon Jan  5 00:56:25 AMT 2004
dvd: rnx_isany is added. It is used by arx and checks for a pattern that matches any element;
     should be written as
	      any = (element * {any}|attribute * {text}|text)*
     (triple choice, elements in any order)

Sun Jan  4 03:38:10 AMT 2004
dvd: Renamings

1.3.3 Sat Jan  3 18:58:36 AMT 2004
dvd: arx, a grammar (and not just grammar) association utility has been added;
     performance improvements, Makefile.bcc to compile the binaries under
     win32+Borland C/C++ builder

1.3.2 Fri Jan  2 04:30:30 AMT 2004
dvd: multiple cleanups and speedups; ht_deli added

Tue Dec 30 23:40:59 AMT 2003
dvd: uri2rnc.pl and rnv.vim as a proof-of-concept; learned a lot of things
     about vim

Tue Dec 30 17:24:05 AMT 2003
dvd: added warning about include in includeContent

Tue Dec 30 04:19:22 AMT 2003
dvd: expected now correctly returns attributes behind elements

Mon Dec 29 20:26:01 AMT 2003
dvd: error handling refactored, validation logic separated from command-line interface

1.3.1 Mon Dec 29 01:58:38 AMT 2003
dvd: invalid elements are correctly skipped (they were supposed to before), but the code
     contained a fallout that prevented them from doing so

1.3.0 Mon Dec 29 01:57:46 AMT 2003
dvd: pattern pools are now one-dimensional, pointer arithmetics is a little more
     complex, but performance and memory use are better; besides, this makes pattern
     datatype extensible, which is a good thing

1.2.1 Fri Dec 26 15:50:49 AMT 2003
dvd: error reporting redone; errors for xsd and rx are routed through validation
     errors' handler

Fri Dec 26 01:55:37 AMT 2003
dvd: debugged on NIST tests; the only tests which don't pass when they should
     are those with doubles due to overflow and underflow.

Thu Dec 25 19:30:55 AMT 2003
dvd: added puorg_nr to implement attribute_open correctly;
     empty elements are correctly validated against data and values;
     equality and ordering for dateTime (and its partial variants) implemented;
     rx,xsd use overloadable error handlers with variable lists.

1.2.0 Wed Dec 24 04:40:25 AMT 2003
dvd: XML Schema Part 2: regular expressions and datatypes. many things are
     implemented, with exception of context-dependent checks and a calendar
     library; optimizations and cleanups.

Fri Dec 19 22:19:05 AMT 2003
dvd: rn_params joined with rn_string

1.1.0 Fri Dec 19 03:56:46 AMT 2003
dvd: drv_attribute_open|drv_attribute_close and memoization implemented;
     hash values are multiplied by large primes - helps with current hashing
     algorithms; garbage collection added to rn (rn_compress,
     rn_compress_last); compact mode implemented for validation, the buffer for
     memoized patterns does not exceed a limit when the validator runs in
     compact mode.

1.0.9 Wed Dec 17 23:36:05 AMT 2003
dvd: drv_mixed_text added, results memoized; code cleanups to compile easier;
     added targets for static and shared libraries to the Makefile, many small
     touch-ups

1.0.8; Tue Dec 16 14:05:16 AMT 2003
dvd: memory for error message in rnv.c is allocated properly

1.0.7; Tue Dec 16 00:12:04 AMT 2003
dvd: added reporting of attribute value for invalid attributes

--- NEW FILE: dxl.h ---
/* $Id: dxl.h,v 1.1 2009/08/03 05:32:46 mike Exp $ */

#ifndef DXL_H
#define DXL_H 1

#ifndef DXL_EXC
#define DXL_EXC 0
#endif

#define DXL_URL "http://davidashen.net/relaxng/pluggable-datatypes"

extern char *dxl_cmd;

extern int dxl_allows(char *typ,char *ps,char *s,int n);
extern int dxl_equal(char *typ,char *val,char *s,int n);

#endif

--- NEW FILE: u.h ---
/* $Id: u.h,v 1.1 2009/08/03 05:32:48 mike Exp $ */

#ifndef U_H
#define U_H 1

#define U_MAXLEN 6

/* returns BOM length if the string starts with BOM */
extern int u_bom(char *s,int n);

/* computes a unicode character u off the head of s;
 returns number of bytes read. 0 means error.
 */
extern int u_get(int *up,char *s);

/* encodes u in utf-8, returns number of octets taken */
extern int u_put(char *s,int u);

/* number of unicode characters in the string; -1 means error */
extern int u_strlen(char *s);
extern int u_strnlen(char *s,int n);

/* checks whether a character falls within one of sorted ranges */
extern int u_in_ranges(int u,int r[][2],int len);

#endif

--- NEW FILE: ht.c ---
/* $Id: ht.c,v 1.1 2009/08/03 05:32:46 mike Exp $ */

#include <stdlib.h> /*NULL*/
#include <assert.h> /*assert*/
#include "m.h"
#include "ht.h"

#define LOAD_FACTOR 2

void ht_init(struct hashtable *ht,int len,int (*hash)(int),int (*equal)(int,int)) {
  assert(len>0);
  ht->tablen=1; len*=LOAD_FACTOR;
  while(ht->tablen<len) ht->tablen<<=1;
  ht->limit=ht->tablen/LOAD_FACTOR;
  ht->table=(int*)m_alloc(ht->tablen<<1,sizeof(int)); /* the second half is hash values */
  ht->hash=hash; ht->equal=equal;
  ht_clear(ht);
}

void ht_clear(struct hashtable *ht) {
  int i;
  ht->used=0; for(i=0;i!=ht->tablen;++i) ht->table[i]=-1;
}

void ht_dispose(struct hashtable *ht) {
  m_free(ht->table); ht->table=NULL;
}

#define first(ht,hv) (hv&(ht->tablen-1))
#define next(ht,i) (i==0?ht->tablen-1:i-1)

int ht_get(struct hashtable *ht,int i) {
  int hv=ht->hash(i),j;
  for(j=first(ht,hv);;j=next(ht,j)) {
    int tj=ht->table[j];
    if(tj==-1) break;
    if(ht->equal(i,tj)) return tj;
  }
  return -1;
}

void ht_put(struct hashtable *ht,int i) {
  int hv=ht->hash(i),j;
  if(ht->used==ht->limit) {
    int tablen=ht->tablen; int *table=ht->table;
    ht->tablen<<=1; ht->limit<<=1;
    ht->table=(int*)m_alloc(ht->tablen<<1,sizeof(int));
    for(j=0;j!=ht->tablen;++j) ht->table[j]=-1;
    for(j=0;j!=tablen;++j) {
      if(table[j]!=-1) {
	int hvj=table[j|tablen]; int k;
	for(k=first(ht,hvj);ht->table[k]!=-1;k=next(ht,k));
	ht->table[k]=table[j]; ht->table[k|ht->tablen]=hvj;
      }
    }
    m_free(table);
  }
  for(j=first(ht,hv);ht->table[j]!=-1;j=next(ht,j)) assert(!ht->equal(i,ht->table[j]));
  ht->table[j]=i;
  ht->table[ht->tablen|j]=hv;
  ++ht->used;
}

static int del(struct hashtable *ht,int i,int eq) {
  if(ht->used!=0) {
    int hv=ht->hash(i),j;
    for(j=first(ht,hv);;j=next(ht,j)) {
      int tj=ht->table[j];
      if(tj==-1) break;
      if(eq?i==tj:ht->equal(i,tj)) {
	do {
	  int k=j,j0;
	  ht->table[j]=-1;
	  for(;;) {
	    j=next(ht,j);
	    if(ht->table[j]==-1) break;
	    j0=first(ht,ht->table[j|ht->tablen]);
	    if((k<=j0||j0<j)&&(j0<j||j<=k)&&(j<=k||k<=j0)) break;
	  }
	  ht->table[k]=ht->table[j]; ht->table[k|ht->tablen]=ht->table[j|ht->tablen];
	} while(ht->table[j]!=-1);
	--ht->used;
	return tj;
      }
    }
  }
  return -1;
}
int ht_del(struct hashtable *ht,int i) {return del(ht,i,0);}
int ht_deli(struct hashtable *ht,int i) {return del(ht,i,1);}

--- NEW FILE: rx_cls_ranges.c ---
/* $Id: rx_cls_ranges.c,v 1.1 2009/08/03 05:32:47 mike Exp $ */

/* blocks */
static int IsBasicLatinRanges[][2]={{0x0000,0x007F}};
static int IsLatin_1SupplementRanges[][2]={{0x0080,0x00FF}};
static int IsLatinExtended_ARanges[][2]={{0x0100,0x017F}};
static int IsLatinExtended_BRanges[][2]={{0x0180,0x024F}};
static int IsIPAExtensionsRanges[][2]={{0x0250,0x02AF}};
static int IsSpacingModifierLettersRanges[][2]={{0x02B0,0x02FF}};
static int IsCombiningDiacriticalMarksRanges[][2]={{0x0300,0x036F}};
static int IsGreekRanges[][2]={{0x0370,0x03FF}};
static int IsCyrillicRanges[][2]={{0x0400,0x04FF}};
static int IsArmenianRanges[][2]={{0x0530,0x058F}};
static int IsHebrewRanges[][2]={{0x0590,0x05FF}};
static int IsArabicRanges[][2]={{0x0600,0x06FF}};
static int IsSyriacRanges[][2]={{0x0700,0x074F}};
static int IsThaanaRanges[][2]={{0x0780,0x07BF}};
static int IsDevanagariRanges[][2]={{0x0900,0x097F}};
static int IsBengaliRanges[][2]={{0x0980,0x09FF}};
static int IsGurmukhiRanges[][2]={{0x0A00,0x0A7F}};
static int IsGujaratiRanges[][2]={{0x0A80,0x0AFF}};
static int IsOriyaRanges[][2]={{0x0B00,0x0B7F}};
static int IsTamilRanges[][2]={{0x0B80,0x0BFF}};
static int IsTeluguRanges[][2]={{0x0C00,0x0C7F}};
static int IsKannadaRanges[][2]={{0x0C80,0x0CFF}};
static int IsMalayalamRanges[][2]={{0x0D00,0x0D7F}};
static int IsSinhalaRanges[][2]={{0x0D80,0x0DFF}};
static int IsThaiRanges[][2]={{0x0E00,0x0E7F}};
static int IsLaoRanges[][2]={{0x0E80,0x0EFF}};
static int IsTibetanRanges[][2]={{0x0F00,0x0FFF}};
static int IsMyanmarRanges[][2]={{0x1000,0x109F}};
static int IsGeorgianRanges[][2]={{0x10A0,0x10FF}};
static int IsHangulJamoRanges[][2]={{0x1100,0x11FF}};
static int IsEthiopicRanges[][2]={{0x1200,0x137F}};
static int IsCherokeeRanges[][2]={{0x13A0,0x13FF}};
static int IsUnifiedCanadianAboriginalSyllabicsRanges[][2]={{0x1400,0x167F}};
static int IsOghamRanges[][2]={{0x1680,0x169F}};
static int IsRunicRanges[][2]={{0x16A0,0x16FF}};
static int IsKhmerRanges[][2]={{0x1780,0x17FF}};
static int IsMongolianRanges[][2]={{0x1800,0x18AF}};
static int IsLatinExtendedAdditionalRanges[][2]={{0x1E00,0x1EFF}};
static int IsGreekExtendedRanges[][2]={{0x1F00,0x1FFF}};
static int IsGeneralPunctuationRanges[][2]={{0x2000,0x206F}};
static int IsSuperscriptsandSubscriptsRanges[][2]={{0x2070,0x209F}};
static int IsCurrencySymbolsRanges[][2]={{0x20A0,0x20CF}};
static int IsCombiningMarksforSymbolsRanges[][2]={{0x20D0,0x20FF}};
static int IsLetterlikeSymbolsRanges[][2]={{0x2100,0x214F}};
static int IsNumberFormsRanges[][2]={{0x2150,0x218F}};
static int IsArrowsRanges[][2]={{0x2190,0x21FF}};
static int IsMathematicalOperatorsRanges[][2]={{0x2200,0x22FF}};
static int IsMiscellaneousTechnicalRanges[][2]={{0x2300,0x23FF}};
static int IsControlPicturesRanges[][2]={{0x2400,0x243F}};
static int IsOpticalCharacterRecognitionRanges[][2]={{0x2440,0x245F}};
static int IsEnclosedAlphanumericsRanges[][2]={{0x2460,0x24FF}};
static int IsBoxDrawingRanges[][2]={{0x2500,0x257F}};
static int IsBlockElementsRanges[][2]={{0x2580,0x259F}};
static int IsGeometricShapesRanges[][2]={{0x25A0,0x25FF}};
static int IsMiscellaneousSymbolsRanges[][2]={{0x2600,0x26FF}};
static int IsDingbatsRanges[][2]={{0x2700,0x27BF}};
static int IsBraillePatternsRanges[][2]={{0x2800,0x28FF}};
static int IsCJKRadicalsSupplementRanges[][2]={{0x2E80,0x2EFF}};
static int IsKangxiRadicalsRanges[][2]={{0x2F00,0x2FDF}};
static int IsIdeographicDescriptionCharactersRanges[][2]={{0x2FF0,0x2FFF}};
static int IsCJKSymbolsandPunctuationRanges[][2]={{0x3000,0x303F}};
static int IsHiraganaRanges[][2]={{0x3040,0x309F}};
static int IsKatakanaRanges[][2]={{0x30A0,0x30FF}};
static int IsBopomofoRanges[][2]={{0x3100,0x312F}};
static int IsHangulCompatibilityJamoRanges[][2]={{0x3130,0x318F}};
static int IsKanbunRanges[][2]={{0x3190,0x319F}};
static int IsBopomofoExtendedRanges[][2]={{0x31A0,0x31BF}};
static int IsEnclosedCJKLettersandMonthsRanges[][2]={{0x3200,0x32FF}};
static int IsCJKCompatibilityRanges[][2]={{0x3300,0x33FF}};
static int IsCJKUnifiedIdeographsExtensionARanges[][2]={{0x3400,0x4DB5}};
static int IsCJKUnifiedIdeographsRanges[][2]={{0x4E00,0x9FFF}};
static int IsYiSyllablesRanges[][2]={{0xA000,0xA48F}};
static int IsYiRadicalsRanges[][2]={{0xA490,0xA4CF}};
static int IsHangulSyllablesRanges[][2]={{0xAC00,0xD7A3}};
static int IsCJKCompatibilityIdeographsRanges[][2]={{0xF900,0xFAFF}};
static int IsAlphabeticPresentationFormsRanges[][2]={{0xFB00,0xFB4F}};
static int IsArabicPresentationForms_ARanges[][2]={{0xFB50,0xFDFF}};
static int IsCombiningHalfMarksRanges[][2]={{0xFE20,0xFE2F}};
static int IsCJKCompatibilityFormsRanges[][2]={{0xFE30,0xFE4F}};
static int IsSmallFormVariantsRanges[][2]={{0xFE50,0xFE6F}};
static int IsArabicPresentationForms_BRanges[][2]={{0xFE70,0xFEFE}};
static int IsSpecialsRanges[][2]={{0xFEFF,0xFEFF},{0xFFF0,0xFFFD}};
static int IsHalfwidthandFullwidthFormsRanges[][2]={{0xFF00,0xFFEF}};
static int IsOldItalicRanges[][2]={{0x10300,0x1032F}};
static int IsGothicRanges[][2]={{0x10330,0x1034F}};
static int IsDeseretRanges[][2]={{0x10400,0x1044F}};
static int IsByzantineMusicalSymbolsRanges[][2]={{0x1D000,0x1D0FF}};
static int IsMusicalSymbolsRanges[][2]={{0x1D100,0x1D1FF}};
static int IsMathematicalAlphanumericSymbolsRanges[][2]={{0x1D400,0x1D7FF}};
static int IsCJKUnifiedIdeographsExtensionBRanges[][2]={{0x20000,0x2A6D6}};
static int IsCJKCompatibilityIdeographsSupplementRanges[][2]={{0x2F800,0x2FA1F}};
static int IsTagsRanges[][2]={{0xE0000,0xE007F}};
static int IsPrivateUseRanges[][2]={{0xE000,0xF8FF},{0xF0000,0xFFFFD},{0x100000,0x10FFFD}};

/* classes */
static int LuRanges[][2]={{0x41,0x5A},{0xC0,0xD6},{0xD8,0xDE},{0x100,0x100},{0x102,0x102},{0x104,0x104},{0x106,0x106},{0x108,0x108},{0x10A,0x10A},{0x10C,0x10C},{0x10E,0x10E},{0x110,0x110},{0x112,0x112},{0x114,0x114},{0x116,0x116},{0x118,0x118},{0x11A,0x11A},{0x11C,0x11C},{0x11E,0x11E},{0x120,0x120},{0x122,0x122},{0x124,0x124},{0x126,0x126},{0x128,0x128},{0x12A,0x12A},{0x12C,0x12C},{0x12E,0x12E},{0x130,0x130},{0x132,0x132},{0x134,0x134},{0x136,0x136},{0x139,0x139},{0x13B,0x13B},{0x13D,0x13D},{0x13F,0x13F},{0x141,0x141},{0x143,0x143},{0x145,0x145},{0x147,0x147},{0x14A,0x14A},{0x14C,0x14C},{0x14E,0x14E},{0x150,0x150},{0x152,0x152},{0x154,0x154},{0x156,0x156},{0x158,0x158},{0x15A,0x15A},{0x15C,0x15C},{0x15E,0x15E},{0x160,0x160},{0x162,0x162},{0x164,0x164},{0x166,0x166},{0x168,0x168},{0x16A,0x16A},{0x16C,0x16C},{0x16E,0x16E},{0x170,0x170},{0x172,0x172},{0x174,0x174},{0x176,0x176},{0x178,0x179},{0x17B,0x17B},{0x17D,0x17D},{0x181,0x182},{0x184,0x184},{0x186,0x187},{0x189,0x18B},{0x18E,0x191},{0x193,0x194},{0x196,0x198},{0x19C,0x19D},{0x19F,0x1A0},{0x1A2,0x1A2},{0x1A4,0x1A4},{0x1A6,0x1A7},{0x1A9,0x1A9},{0x1AC,0x1AC},{0x1AE,0x1AF},{0x1B1,0x1B3},{0x1B5,0x1B5},{0x1B7,0x1B8},{0x1BC,0x1BC},{0x1C4,0x1C4},{0x1C7,0x1C7},{0x1CA,0x1CA},{0x1CD,0x1CD},{0x1CF,0x1CF},{0x1D1,0x1D1},{0x1D3,0x1D3},{0x1D5,0x1D5},{0x1D7,0x1D7},{0x1D9,0x1D9},{0x1DB,0x1DB},{0x1DE,0x1DE},{0x1E0,0x1E0},{0x1E2,0x1E2},{0x1E4,0x1E4},{0x1E6,0x1E6},{0x1E8,0x1E8},{0x1EA,0x1EA},{0x1EC,0x1EC},{0x1EE,0x1EE},{0x1F1,0x1F1},{0x1F4,0x1F4},{0x1F6,0x1F8},{0x1FA,0x1FA},{0x1FC,0x1FC},{0x1FE,0x1FE},{0x200,0x200},{0x202,0x202},{0x204,0x204},{0x206,0x206},{0x208,0x208},{0x20A,0x20A},{0x20C,0x20C},{0x20E,0x20E},{0x210,0x210},{0x212,0x212},{0x214,0x214},{0x216,0x216},{0x218,0x218},{0x21A,0x21A},{0x21C,0x21C},{0x21E,0x21E},{0x222,0x222},{0x224,0x224},{0x226,0x226},{0x228,0x228},{0x22A,0x22A},{0x22C,0x22C},{0x22E,0x22E},{0x230,0x230},{0x232,0x232},{0x386,0x386},{0x388,0x38A},{0x38C,0x38C},{0x38E,0x38F},{0x391,0x3A1},{0x3A3,0x3AB},{0x3D2,0x3D4},{0x3DA,0x3DA},{0x3DC,0x3DC},{0x3DE,0x3DE},{0x3E0,0x3E0},{0x3E2,0x3E2},{0x3E4,0x3E4},{0x3E6,0x3E6},{0x3E8,0x3E8},{0x3EA,0x3EA},{0x3EC,0x3EC},{0x3EE,0x3EE},{0x3F4,0x3F4},{0x400,0x42F},{0x460,0x460},{0x462,0x462},{0x464,0x464},{0x466,0x466},{0x468,0x468},{0x46A,0x46A},{0x46C,0x46C},{0x46E,0x46E},{0x470,0x470},{0x472,0x472},{0x474,0x474},{0x476,0x476},{0x478,0x478},{0x47A,0x47A},{0x47C,0x47C},{0x47E,0x47E},{0x480,0x480},{0x48C,0x48C},{0x48E,0x48E},{0x490,0x490},{0x492,0x492},{0x494,0x494},{0x496,0x496},{0x498,0x498},{0x49A,0x49A},{0x49C,0x49C},{0x49E,0x49E},{0x4A0,0x4A0},{0x4A2,0x4A2},{0x4A4,0x4A4},{0x4A6,0x4A6},{0x4A8,0x4A8},{0x4AA,0x4AA},{0x4AC,0x4AC},{0x4AE,0x4AE},{0x4B0,0x4B0},{0x4B2,0x4B2},{0x4B4,0x4B4},{0x4B6,0x4B6},{0x4B8,0x4B8},{0x4BA,0x4BA},{0x4BC,0x4BC},{0x4BE,0x4BE},{0x4C0,0x4C1},{0x4C3,0x4C3},{0x4C7,0x4C7},{0x4CB,0x4CB},{0x4D0,0x4D0},{0x4D2,0x4D2},{0x4D4,0x4D4},{0x4D6,0x4D6},{0x4D8,0x4D8},{0x4DA,0x4DA},{0x4DC,0x4DC},{0x4DE,0x4DE},{0x4E0,0x4E0},{0x4E2,0x4E2},{0x4E4,0x4E4},{0x4E6,0x4E6},{0x4E8,0x4E8},{0x4EA,0x4EA},{0x4EC,0x4EC},{0x4EE,0x4EE},{0x4F0,0x4F0},{0x4F2,0x4F2},{0x4F4,0x4F4},{0x4F8,0x4F8},{0x531,0x556},{0x10A0,0x10C5},{0x1E00,0x1E00},{0x1E02,0x1E02},{0x1E04,0x1E04},{0x1E06,0x1E06},{0x1E08,0x1E08},{0x1E0A,0x1E0A},{0x1E0C,0x1E0C},{0x1E0E,0x1E0E},{0x1E10,0x1E10},{0x1E12,0x1E12},{0x1E14,0x1E14},{0x1E16,0x1E16},{0x1E18,0x1E18},{0x1E1A,0x1E1A},{0x1E1C,0x1E1C},{0x1E1E,0x1E1E},{0x1E20,0x1E20},{0x1E22,0x1E22},{0x1E24,0x1E24},{0x1E26,0x1E26},{0x1E28,0x1E28},{0x1E2A,0x1E2A},{0x1E2C,0x1E2C},{0x1E2E,0x1E2E},{0x1E30,0x1E30},{0x1E32,0x1E32},{0x1E34,0x1E34},{0x1E36,0x1E36},{0x1E38,0x1E38},{0x1E3A,0x1E3A},{0x1E3C,0x1E3C},{0x1E3E,0x1E3E},{0x1E40,0x1E40},{0x1E42,0x1E42},{0x1E44,0x1E44},{0x1E46,0x1E46},{0x1E48,0x1E48},{0x1E4A,0x1E4A},{0x1E4C,0x1E4C},{0x1E4E,0x1E4E},{0x1E50,0x1E50},{0x1E52,0x1E52},{0x1E54,0x1E54},{0x1E56,0x1E56},{0x1E58,0x1E58},{0x1E5A,0x1E5A},{0x1E5C,0x1E5C},{0x1E5E,0x1E5E},{0x1E60,0x1E60},{0x1E62,0x1E62},{0x1E64,0x1E64},{0x1E66,0x1E66},{0x1E68,0x1E68},{0x1E6A,0x1E6A},{0x1E6C,0x1E6C},{0x1E6E,0x1E6E},{0x1E70,0x1E70},{0x1E72,0x1E72},{0x1E74,0x1E74},{0x1E76,0x1E76},{0x1E78,0x1E78},{0x1E7A,0x1E7A},{0x1E7C,0x1E7C},{0x1E7E,0x1E7E},{0x1E80,0x1E80},{0x1E82,0x1E82},{0x1E84,0x1E84},{0x1E86,0x1E86},{0x1E88,0x1E88},{0x1E8A,0x1E8A},{0x1E8C,0x1E8C},{0x1E8E,0x1E8E},{0x1E90,0x1E90},{0x1E92,0x1E92},{0x1E94,0x1E94},{0x1EA0,0x1EA0},{0x1EA2,0x1EA2},{0x1EA4,0x1EA4},{0x1EA6,0x1EA6},{0x1EA8,0x1EA8},{0x1EAA,0x1EAA},{0x1EAC,0x1EAC},{0x1EAE,0x1EAE},{0x1EB0,0x1EB0},{0x1EB2,0x1EB2},{0x1EB4,0x1EB4},{0x1EB6,0x1EB6},{0x1EB8,0x1EB8},{0x1EBA,0x1EBA},{0x1EBC,0x1EBC},{0x1EBE,0x1EBE},{0x1EC0,0x1EC0},{0x1EC2,0x1EC2},{0x1EC4,0x1EC4},{0x1EC6,0x1EC6},{0x1EC8,0x1EC8},{0x1ECA,0x1ECA},{0x1ECC,0x1ECC},{0x1ECE,0x1ECE},{0x1ED0,0x1ED0},{0x1ED2,0x1ED2},{0x1ED4,0x1ED4},{0x1ED6,0x1ED6},{0x1ED8,0x1ED8},{0x1EDA,0x1EDA},{0x1EDC,0x1EDC},{0x1EDE,0x1EDE},{0x1EE0,0x1EE0},{0x1EE2,0x1EE2},{0x1EE4,0x1EE4},{0x1EE6,0x1EE6},{0x1EE8,0x1EE8},{0x1EEA,0x1EEA},{0x1EEC,0x1EEC},{0x1EEE,0x1EEE},{0x1EF0,0x1EF0},{0x1EF2,0x1EF2},{0x1EF4,0x1EF4},{0x1EF6,0x1EF6},{0x1EF8,0x1EF8},{0x1F08,0x1F0F},{0x1F18,0x1F1D},{0x1F28,0x1F2F},{0x1F38,0x1F3F},{0x1F48,0x1F4D},{0x1F59,0x1F59},{0x1F5B,0x1F5B},{0x1F5D,0x1F5D},{0x1F5F,0x1F5F},{0x1F68,0x1F6F},{0x1FB8,0x1FBB},{0x1FC8,0x1FCB},{0x1FD8,0x1FDB},{0x1FE8,0x1FEC},{0x1FF8,0x1FFB},{0x2102,0x2102},{0x2107,0x2107},{0x210B,0x210D},{0x2110,0x2112},{0x2115,0x2115},{0x2119,0x211D},{0x2124,0x2124},{0x2126,0x2126},{0x2128,0x2128},{0x212A,0x212D},{0x2130,0x2131},{0x2133,0x2133},{0xFF21,0xFF3A},{0x10400,0x10425},{0x1D400,0x1D419},{0x1D434,0x1D44D},{0x1D468,0x1D481},{0x1D49C,0x1D49C},{0x1D49E,0x1D49F},{0x1D4A2,0x1D4A2},{0x1D4A5,0x1D4A6},{0x1D4A9,0x1D4AC},{0x1D4AE,0x1D4B5},{0x1D4D0,0x1D4E9},{0x1D504,0x1D505},{0x1D507,0x1D50A},{0x1D50D,0x1D514},{0x1D516,0x1D51C},{0x1D538,0x1D539},{0x1D53B,0x1D53E},{0x1D540,0x1D544},{0x1D546,0x1D546},{0x1D54A,0x1D550},{0x1D56C,0x1D585},{0x1D5A0,0x1D5B9},{0x1D5D4,0x1D5ED},{0x1D608,0x1D621},{0x1D63C,0x1D655},{0x1D670,0x1D689},{0x1D6A8,0x1D6C0},{0x1D6E2,0x1D6FA},{0x1D71C,0x1D734},{0x1D756,0x1D76E},{0x1D790,0x1D7A8}};
static int LlRanges[][2]={{0x61,0x7A},{0xAA,0xAA},{0xB5,0xB5},{0xBA,0xBA},{0xDF,0xF6},{0xF8,0xFF},{0x101,0x101},{0x103,0x103},{0x105,0x105},{0x107,0x107},{0x109,0x109},{0x10B,0x10B},{0x10D,0x10D},{0x10F,0x10F},{0x111,0x111},{0x113,0x113},{0x115,0x115},{0x117,0x117},{0x119,0x119},{0x11B,0x11B},{0x11D,0x11D},{0x11F,0x11F},{0x121,0x121},{0x123,0x123},{0x125,0x125},{0x127,0x127},{0x129,0x129},{0x12B,0x12B},{0x12D,0x12D},{0x12F,0x12F},{0x131,0x131},{0x133,0x133},{0x135,0x135},{0x137,0x138},{0x13A,0x13A},{0x13C,0x13C},{0x13E,0x13E},{0x140,0x140},{0x142,0x142},{0x144,0x144},{0x146,0x146},{0x148,0x149},{0x14B,0x14B},{0x14D,0x14D},{0x14F,0x14F},{0x151,0x151},{0x153,0x153},{0x155,0x155},{0x157,0x157},{0x159,0x159},{0x15B,0x15B},{0x15D,0x15D},{0x15F,0x15F},{0x161,0x161},{0x163,0x163},{0x165,0x165},{0x167,0x167},{0x169,0x169},{0x16B,0x16B},{0x16D,0x16D},{0x16F,0x16F},{0x171,0x171},{0x173,0x173},{0x175,0x175},{0x177,0x177},{0x17A,0x17A},{0x17C,0x17C},{0x17E,0x180},{0x183,0x183},{0x185,0x185},{0x188,0x188},{0x18C,0x18D},{0x192,0x192},{0x195,0x195},{0x199,0x19B},{0x19E,0x19E},{0x1A1,0x1A1},{0x1A3,0x1A3},{0x1A5,0x1A5},{0x1A8,0x1A8},{0x1AA,0x1AB},{0x1AD,0x1AD},{0x1B0,0x1B0},{0x1B4,0x1B4},{0x1B6,0x1B6},{0x1B9,0x1BA},{0x1BD,0x1BF},{0x1C6,0x1C6},{0x1C9,0x1C9},{0x1CC,0x1CC},{0x1CE,0x1CE},{0x1D0,0x1D0},{0x1D2,0x1D2},{0x1D4,0x1D4},{0x1D6,0x1D6},{0x1D8,0x1D8},{0x1DA,0x1DA},{0x1DC,0x1DD},{0x1DF,0x1DF},{0x1E1,0x1E1},{0x1E3,0x1E3},{0x1E5,0x1E5},{0x1E7,0x1E7},{0x1E9,0x1E9},{0x1EB,0x1EB},{0x1ED,0x1ED},{0x1EF,0x1F0},{0x1F3,0x1F3},{0x1F5,0x1F5},{0x1F9,0x1F9},{0x1FB,0x1FB},{0x1FD,0x1FD},{0x1FF,0x1FF},{0x201,0x201},{0x203,0x203},{0x205,0x205},{0x207,0x207},{0x209,0x209},{0x20B,0x20B},{0x20D,0x20D},{0x20F,0x20F},{0x211,0x211},{0x213,0x213},{0x215,0x215},{0x217,0x217},{0x219,0x219},{0x21B,0x21B},{0x21D,0x21D},{0x21F,0x21F},{0x223,0x223},{0x225,0x225},{0x227,0x227},{0x229,0x229},{0x22B,0x22B},{0x22D,0x22D},{0x22F,0x22F},{0x231,0x231},{0x233,0x233},{0x250,0x2AD},{0x390,0x390},{0x3AC,0x3CE},{0x3D0,0x3D1},{0x3D5,0x3D7},{0x3DB,0x3DB},{0x3DD,0x3DD},{0x3DF,0x3DF},{0x3E1,0x3E1},{0x3E3,0x3E3},{0x3E5,0x3E5},{0x3E7,0x3E7},{0x3E9,0x3E9},{0x3EB,0x3EB},{0x3ED,0x3ED},{0x3EF,0x3F3},{0x3F5,0x3F5},{0x430,0x45F},{0x461,0x461},{0x463,0x463},{0x465,0x465},{0x467,0x467},{0x469,0x469},{0x46B,0x46B},{0x46D,0x46D},{0x46F,0x46F},{0x471,0x471},{0x473,0x473},{0x475,0x475},{0x477,0x477},{0x479,0x479},{0x47B,0x47B},{0x47D,0x47D},{0x47F,0x47F},{0x481,0x481},{0x48D,0x48D},{0x48F,0x48F},{0x491,0x491},{0x493,0x493},{0x495,0x495},{0x497,0x497},{0x499,0x499},{0x49B,0x49B},{0x49D,0x49D},{0x49F,0x49F},{0x4A1,0x4A1},{0x4A3,0x4A3},{0x4A5,0x4A5},{0x4A7,0x4A7},{0x4A9,0x4A9},{0x4AB,0x4AB},{0x4AD,0x4AD},{0x4AF,0x4AF},{0x4B1,0x4B1},{0x4B3,0x4B3},{0x4B5,0x4B5},{0x4B7,0x4B7},{0x4B9,0x4B9},{0x4BB,0x4BB},{0x4BD,0x4BD},{0x4BF,0x4BF},{0x4C2,0x4C2},{0x4C4,0x4C4},{0x4C8,0x4C8},{0x4CC,0x4CC},{0x4D1,0x4D1},{0x4D3,0x4D3},{0x4D5,0x4D5},{0x4D7,0x4D7},{0x4D9,0x4D9},{0x4DB,0x4DB},{0x4DD,0x4DD},{0x4DF,0x4DF},{0x4E1,0x4E1},{0x4E3,0x4E3},{0x4E5,0x4E5},{0x4E7,0x4E7},{0x4E9,0x4E9},{0x4EB,0x4EB},{0x4ED,0x4ED},{0x4EF,0x4EF},{0x4F1,0x4F1},{0x4F3,0x4F3},{0x4F5,0x4F5},{0x4F9,0x4F9},{0x561,0x587},{0x1E01,0x1E01},{0x1E03,0x1E03},{0x1E05,0x1E05},{0x1E07,0x1E07},{0x1E09,0x1E09},{0x1E0B,0x1E0B},{0x1E0D,0x1E0D},{0x1E0F,0x1E0F},{0x1E11,0x1E11},{0x1E13,0x1E13},{0x1E15,0x1E15},{0x1E17,0x1E17},{0x1E19,0x1E19},{0x1E1B,0x1E1B},{0x1E1D,0x1E1D},{0x1E1F,0x1E1F},{0x1E21,0x1E21},{0x1E23,0x1E23},{0x1E25,0x1E25},{0x1E27,0x1E27},{0x1E29,0x1E29},{0x1E2B,0x1E2B},{0x1E2D,0x1E2D},{0x1E2F,0x1E2F},{0x1E31,0x1E31},{0x1E33,0x1E33},{0x1E35,0x1E35},{0x1E37,0x1E37},{0x1E39,0x1E39},{0x1E3B,0x1E3B},{0x1E3D,0x1E3D},{0x1E3F,0x1E3F},{0x1E41,0x1E41},{0x1E43,0x1E43},{0x1E45,0x1E45},{0x1E47,0x1E47},{0x1E49,0x1E49},{0x1E4B,0x1E4B},{0x1E4D,0x1E4D},{0x1E4F,0x1E4F},{0x1E51,0x1E51},{0x1E53,0x1E53},{0x1E55,0x1E55},{0x1E57,0x1E57},{0x1E59,0x1E59},{0x1E5B,0x1E5B},{0x1E5D,0x1E5D},{0x1E5F,0x1E5F},{0x1E61,0x1E61},{0x1E63,0x1E63},{0x1E65,0x1E65},{0x1E67,0x1E67},{0x1E69,0x1E69},{0x1E6B,0x1E6B},{0x1E6D,0x1E6D},{0x1E6F,0x1E6F},{0x1E71,0x1E71},{0x1E73,0x1E73},{0x1E75,0x1E75},{0x1E77,0x1E77},{0x1E79,0x1E79},{0x1E7B,0x1E7B},{0x1E7D,0x1E7D},{0x1E7F,0x1E7F},{0x1E81,0x1E81},{0x1E83,0x1E83},{0x1E85,0x1E85},{0x1E87,0x1E87},{0x1E89,0x1E89},{0x1E8B,0x1E8B},{0x1E8D,0x1E8D},{0x1E8F,0x1E8F},{0x1E91,0x1E91},{0x1E93,0x1E93},{0x1E95,0x1E9B},{0x1EA1,0x1EA1},{0x1EA3,0x1EA3},{0x1EA5,0x1EA5},{0x1EA7,0x1EA7},{0x1EA9,0x1EA9},{0x1EAB,0x1EAB},{0x1EAD,0x1EAD},{0x1EAF,0x1EAF},{0x1EB1,0x1EB1},{0x1EB3,0x1EB3},{0x1EB5,0x1EB5},{0x1EB7,0x1EB7},{0x1EB9,0x1EB9},{0x1EBB,0x1EBB},{0x1EBD,0x1EBD},{0x1EBF,0x1EBF},{0x1EC1,0x1EC1},{0x1EC3,0x1EC3},{0x1EC5,0x1EC5},{0x1EC7,0x1EC7},{0x1EC9,0x1EC9},{0x1ECB,0x1ECB},{0x1ECD,0x1ECD},{0x1ECF,0x1ECF},{0x1ED1,0x1ED1},{0x1ED3,0x1ED3},{0x1ED5,0x1ED5},{0x1ED7,0x1ED7},{0x1ED9,0x1ED9},{0x1EDB,0x1EDB},{0x1EDD,0x1EDD},{0x1EDF,0x1EDF},{0x1EE1,0x1EE1},{0x1EE3,0x1EE3},{0x1EE5,0x1EE5},{0x1EE7,0x1EE7},{0x1EE9,0x1EE9},{0x1EEB,0x1EEB},{0x1EED,0x1EED},{0x1EEF,0x1EEF},{0x1EF1,0x1EF1},{0x1EF3,0x1EF3},{0x1EF5,0x1EF5},{0x1EF7,0x1EF7},{0x1EF9,0x1EF9},{0x1F00,0x1F07},{0x1F10,0x1F15},{0x1F20,0x1F27},{0x1F30,0x1F37},{0x1F40,0x1F45},{0x1F50,0x1F57},{0x1F60,0x1F67},{0x1F70,0x1F7D},{0x1F80,0x1F87},{0x1F90,0x1F97},{0x1FA0,0x1FA7},{0x1FB0,0x1FB4},{0x1FB6,0x1FB7},{0x1FBE,0x1FBE},{0x1FC2,0x1FC4},{0x1FC6,0x1FC7},{0x1FD0,0x1FD3},{0x1FD6,0x1FD7},{0x1FE0,0x1FE7},{0x1FF2,0x1FF4},{0x1FF6,0x1FF7},{0x207F,0x207F},{0x210A,0x210A},{0x210E,0x210F},{0x2113,0x2113},{0x212F,0x212F},{0x2134,0x2134},{0x2139,0x2139},{0xFB00,0xFB06},{0xFB13,0xFB17},{0xFF41,0xFF5A},{0x10428,0x1044D},{0x1D41A,0x1D433},{0x1D44E,0x1D454},{0x1D456,0x1D467},{0x1D482,0x1D49B},{0x1D4B6,0x1D4B9},{0x1D4BB,0x1D4BB},{0x1D4BD,0x1D4C0},{0x1D4C2,0x1D4C3},{0x1D4C5,0x1D4CF},{0x1D4EA,0x1D503},{0x1D51E,0x1D537},{0x1D552,0x1D56B},{0x1D586,0x1D59F},{0x1D5BA,0x1D5D3},{0x1D5EE,0x1D607},{0x1D622,0x1D63B},{0x1D656,0x1D66F},{0x1D68A,0x1D6A3},{0x1D6C2,0x1D6DA},{0x1D6DC,0x1D6E1},{0x1D6FC,0x1D714},{0x1D716,0x1D71B},{0x1D736,0x1D74E},{0x1D750,0x1D755},{0x1D770,0x1D788},{0x1D78A,0x1D78F},{0x1D7AA,0x1D7C2},{0x1D7C4,0x1D7C9}};
static int LtRanges[][2]={{0x1C5,0x1C5},{0x1C8,0x1C8},{0x1CB,0x1CB},{0x1F2,0x1F2},{0x1F88,0x1F8F},{0x1F98,0x1F9F},{0x1FA8,0x1FAF},{0x1FBC,0x1FBC},{0x1FCC,0x1FCC},{0x1FFC,0x1FFC}};
static int LmRanges[][2]={{0x2B0,0x2B8},{0x2BB,0x2C1},{0x2D0,0x2D1},{0x2E0,0x2E4},{0x2EE,0x2EE},{0x37A,0x37A},{0x559,0x559},{0x640,0x640},{0x6E5,0x6E6},{0xE46,0xE46},{0xEC6,0xEC6},{0x1843,0x1843},{0x3005,0x3005},{0x3031,0x3035},{0x309D,0x309E},{0x30FC,0x30FE},{0xFF70,0xFF70},{0xFF9E,0xFF9F}};
static int LoRanges[][2]={{0x1BB,0x1BB},{0x1C0,0x1C3},{0x5D0,0x5EA},{0x5F0,0x5F2},{0x621,0x63A},{0x641,0x64A},{0x671,0x6D3},{0x6D5,0x6D5},{0x6FA,0x6FC},{0x710,0x710},{0x712,0x72C},{0x780,0x7A5},{0x905,0x939},{0x93D,0x93D},{0x950,0x950},{0x958,0x961},{0x985,0x98C},{0x98F,0x990},{0x993,0x9A8},{0x9AA,0x9B0},{0x9B2,0x9B2},{0x9B6,0x9B9},{0x9DC,0x9DD},{0x9DF,0x9E1},{0x9F0,0x9F1},{0xA05,0xA0A},{0xA0F,0xA10},{0xA13,0xA28},{0xA2A,0xA30},{0xA32,0xA33},{0xA35,0xA36},{0xA38,0xA39},{0xA59,0xA5C},{0xA5E,0xA5E},{0xA72,0xA74},{0xA85,0xA8B},{0xA8D,0xA8D},{0xA8F,0xA91},{0xA93,0xAA8},{0xAAA,0xAB0},{0xAB2,0xAB3},{0xAB5,0xAB9},{0xABD,0xABD},{0xAD0,0xAD0},{0xAE0,0xAE0},{0xB05,0xB0C},{0xB0F,0xB10},{0xB13,0xB28},{0xB2A,0xB30},{0xB32,0xB33},{0xB36,0xB39},{0xB3D,0xB3D},{0xB5C,0xB5D},{0xB5F,0xB61},{0xB85,0xB8A},{0xB8E,0xB90},{0xB92,0xB95},{0xB99,0xB9A},{0xB9C,0xB9C},{0xB9E,0xB9F},{0xBA3,0xBA4},{0xBA8,0xBAA},{0xBAE,0xBB5},{0xBB7,0xBB9},{0xC05,0xC0C},{0xC0E,0xC10},{0xC12,0xC28},{0xC2A,0xC33},{0xC35,0xC39},{0xC60,0xC61},{0xC85,0xC8C},{0xC8E,0xC90},{0xC92,0xCA8},{0xCAA,0xCB3},{0xCB5,0xCB9},{0xCDE,0xCDE},{0xCE0,0xCE1},{0xD05,0xD0C},{0xD0E,0xD10},{0xD12,0xD28},{0xD2A,0xD39},{0xD60,0xD61},{0xD85,0xD96},{0xD9A,0xDB1},{0xDB3,0xDBB},{0xDBD,0xDBD},{0xDC0,0xDC6},{0xE01,0xE30},{0xE32,0xE33},{0xE40,0xE45},{0xE81,0xE82},{0xE84,0xE84},{0xE87,0xE88},{0xE8A,0xE8A},{0xE8D,0xE8D},{0xE94,0xE97},{0xE99,0xE9F},{0xEA1,0xEA3},{0xEA5,0xEA5},{0xEA7,0xEA7},{0xEAA,0xEAB},{0xEAD,0xEB0},{0xEB2,0xEB3},{0xEBD,0xEBD},{0xEC0,0xEC4},{0xEDC,0xEDD},{0xF00,0xF00},{0xF40,0xF47},{0xF49,0xF6A},{0xF88,0xF8B},{0x1000,0x1021},{0x1023,0x1027},{0x1029,0x102A},{0x1050,0x1055},{0x10D0,0x10F6},{0x1100,0x1159},{0x115F,0x11A2},{0x11A8,0x11F9},{0x1200,0x1206},{0x1208,0x1246},{0x1248,0x1248},{0x124A,0x124D},{0x1250,0x1256},{0x1258,0x1258},{0x125A,0x125D},{0x1260,0x1286},{0x1288,0x1288},{0x128A,0x128D},{0x1290,0x12AE},{0x12B0,0x12B0},{0x12B2,0x12B5},{0x12B8,0x12BE},{0x12C0,0x12C0},{0x12C2,0x12C5},{0x12C8,0x12CE},{0x12D0,0x12D6},{0x12D8,0x12EE},{0x12F0,0x130E},{0x1310,0x1310},{0x1312,0x1315},{0x1318,0x131E},{0x1320,0x1346},{0x1348,0x135A},{0x13A0,0x13F4},{0x1401,0x166C},{0x166F,0x1676},{0x1681,0x169A},{0x16A0,0x16EA},{0x1780,0x17B3},{0x1820,0x1842},{0x1844,0x1877},{0x1880,0x18A8},{0x2135,0x2138},{0x3006,0x3006},{0x3041,0x3094},{0x30A1,0x30FA},{0x3105,0x312C},{0x3131,0x318E},{0x31A0,0x31B7},{0x3400,0x4DB5},{0x4E00,0x9FA5},{0xA000,0xA48C},{0xAC00,0xD7A3},{0xF900,0xFA2D},{0xFB1D,0xFB1D},{0xFB1F,0xFB28},{0xFB2A,0xFB36},{0xFB38,0xFB3C},{0xFB3E,0xFB3E},{0xFB40,0xFB41},{0xFB43,0xFB44},{0xFB46,0xFBB1},{0xFBD3,0xFD3D},{0xFD50,0xFD8F},{0xFD92,0xFDC7},{0xFDF0,0xFDFB},{0xFE70,0xFE72},{0xFE74,0xFE74},{0xFE76,0xFEFC},{0xFF66,0xFF6F},{0xFF71,0xFF9D},{0xFFA0,0xFFBE},{0xFFC2,0xFFC7},{0xFFCA,0xFFCF},{0xFFD2,0xFFD7},{0xFFDA,0xFFDC},{0x10300,0x1031E},{0x10330,0x10349},{0x20000,0x2A6D6},{0x2F800,0x2FA1D}};
static int MnRanges[][2]={{0x300,0x34E},{0x360,0x362},{0x483,0x486},{0x591,0x5A1},{0x5A3,0x5B9},{0x5BB,0x5BD},{0x5BF,0x5BF},{0x5C1,0x5C2},{0x5C4,0x5C4},{0x64B,0x655},{0x670,0x670},{0x6D6,0x6DC},{0x6DF,0x6E4},{0x6E7,0x6E8},{0x6EA,0x6ED},{0x711,0x711},{0x730,0x74A},{0x7A6,0x7B0},{0x901,0x902},{0x93C,0x93C},{0x941,0x948},{0x94D,0x94D},{0x951,0x954},{0x962,0x963},{0x981,0x981},{0x9BC,0x9BC},{0x9C1,0x9C4},{0x9CD,0x9CD},{0x9E2,0x9E3},{0xA02,0xA02},{0xA3C,0xA3C},{0xA41,0xA42},{0xA47,0xA48},{0xA4B,0xA4D},{0xA70,0xA71},{0xA81,0xA82},{0xABC,0xABC},{0xAC1,0xAC5},{0xAC7,0xAC8},{0xACD,0xACD},{0xB01,0xB01},{0xB3C,0xB3C},{0xB3F,0xB3F},{0xB41,0xB43},{0xB4D,0xB4D},{0xB56,0xB56},{0xB82,0xB82},{0xBC0,0xBC0},{0xBCD,0xBCD},{0xC3E,0xC40},{0xC46,0xC48},{0xC4A,0xC4D},{0xC55,0xC56},{0xCBF,0xCBF},{0xCC6,0xCC6},{0xCCC,0xCCD},{0xD41,0xD43},{0xD4D,0xD4D},{0xDCA,0xDCA},{0xDD2,0xDD4},{0xDD6,0xDD6},{0xE31,0xE31},{0xE34,0xE3A},{0xE47,0xE4E},{0xEB1,0xEB1},{0xEB4,0xEB9},{0xEBB,0xEBC},{0xEC8,0xECD},{0xF18,0xF19},{0xF35,0xF35},{0xF37,0xF37},{0xF39,0xF39},{0xF71,0xF7E},{0xF80,0xF84},{0xF86,0xF87},{0xF90,0xF97},{0xF99,0xFBC},{0xFC6,0xFC6},{0x102D,0x1030},{0x1032,0x1032},{0x1036,0x1037},{0x1039,0x1039},{0x1058,0x1059},{0x17B7,0x17BD},{0x17C6,0x17C6},{0x17C9,0x17D3},{0x18A9,0x18A9},{0x20D0,0x20DC},{0x20E1,0x20E1},{0x302A,0x302F},{0x3099,0x309A},{0xFB1E,0xFB1E},{0xFE20,0xFE23},{0x1D167,0x1D169},{0x1D17B,0x1D182},{0x1D185,0x1D18B},{0x1D1AA,0x1D1AD}};
static int McRanges[][2]={{0x903,0x903},{0x93E,0x940},{0x949,0x94C},{0x982,0x983},{0x9BE,0x9C0},{0x9C7,0x9C8},{0x9CB,0x9CC},{0x9D7,0x9D7},{0xA3E,0xA40},{0xA83,0xA83},{0xABE,0xAC0},{0xAC9,0xAC9},{0xACB,0xACC},{0xB02,0xB03},{0xB3E,0xB3E},{0xB40,0xB40},{0xB47,0xB48},{0xB4B,0xB4C},{0xB57,0xB57},{0xB83,0xB83},{0xBBE,0xBBF},{0xBC1,0xBC2},{0xBC6,0xBC8},{0xBCA,0xBCC},{0xBD7,0xBD7},{0xC01,0xC03},{0xC41,0xC44},{0xC82,0xC83},{0xCBE,0xCBE},{0xCC0,0xCC4},{0xCC7,0xCC8},{0xCCA,0xCCB},{0xCD5,0xCD6},{0xD02,0xD03},{0xD3E,0xD40},{0xD46,0xD48},{0xD4A,0xD4C},{0xD57,0xD57},{0xD82,0xD83},{0xDCF,0xDD1},{0xDD8,0xDDF},{0xDF2,0xDF3},{0xF3E,0xF3F},{0xF7F,0xF7F},{0x102C,0x102C},{0x1031,0x1031},{0x1038,0x1038},{0x1056,0x1057},{0x17B4,0x17B6},{0x17BE,0x17C5},{0x17C7,0x17C8},{0x1D165,0x1D166},{0x1D16D,0x1D172}};
static int MeRanges[][2]={{0x488,0x489},{0x6DD,0x6DE},{0x20DD,0x20E0},{0x20E2,0x20E3}};
static int NdRanges[][2]={{0x30,0x39},{0x660,0x669},{0x6F0,0x6F9},{0x966,0x96F},{0x9E6,0x9EF},{0xA66,0xA6F},{0xAE6,0xAEF},{0xB66,0xB6F},{0xBE7,0xBEF},{0xC66,0xC6F},{0xCE6,0xCEF},{0xD66,0xD6F},{0xE50,0xE59},{0xED0,0xED9},{0xF20,0xF29},{0x1040,0x1049},{0x1369,0x1371},{0x17E0,0x17E9},{0x1810,0x1819},{0xFF10,0xFF19},{0x1D7CE,0x1D7FF}};
static int NlRanges[][2]={{0x16EE,0x16F0},{0x2160,0x2183},{0x3007,0x3007},{0x3021,0x3029},{0x3038,0x303A},{0x1034A,0x1034A}};
static int NoRanges[][2]={{0xB2,0xB3},{0xB9,0xB9},{0xBC,0xBE},{0x9F4,0x9F9},{0xBF0,0xBF2},{0xF2A,0xF33},{0x1372,0x137C},{0x2070,0x2070},{0x2074,0x2079},{0x2080,0x2089},{0x2153,0x215F},{0x2460,0x249B},{0x24EA,0x24EA},{0x2776,0x2793},{0x3192,0x3195},{0x3220,0x3229},{0x3280,0x3289},{0x10320,0x10323}};
static int PcRanges[][2]={{0x5F,0x5F},{0x203F,0x2040},{0x30FB,0x30FB},{0xFE33,0xFE34},{0xFE4D,0xFE4F},{0xFF3F,0xFF3F},{0xFF65,0xFF65}};
static int PdRanges[][2]={{0x2D,0x2D},{0xAD,0xAD},{0x58A,0x58A},{0x1806,0x1806},{0x2010,0x2015},{0x301C,0x301C},{0x3030,0x3030},{0xFE31,0xFE32},{0xFE58,0xFE58},{0xFE63,0xFE63},{0xFF0D,0xFF0D}};
static int PsRanges[][2]={{0x28,0x28},{0x5B,0x5B},{0x7B,0x7B},{0xF3A,0xF3A},{0xF3C,0xF3C},{0x169B,0x169B},{0x201A,0x201A},{0x201E,0x201E},{0x2045,0x2045},{0x207D,0x207D},{0x208D,0x208D},{0x2329,0x2329},{0x3008,0x3008},{0x300A,0x300A},{0x300C,0x300C},{0x300E,0x300E},{0x3010,0x3010},{0x3014,0x3014},{0x3016,0x3016},{0x3018,0x3018},{0x301A,0x301A},{0x301D,0x301D},{0xFD3E,0xFD3E},{0xFE35,0xFE35},{0xFE37,0xFE37},{0xFE39,0xFE39},{0xFE3B,0xFE3B},{0xFE3D,0xFE3D},{0xFE3F,0xFE3F},{0xFE41,0xFE41},{0xFE43,0xFE43},{0xFE59,0xFE59},{0xFE5B,0xFE5B},{0xFE5D,0xFE5D},{0xFF08,0xFF08},{0xFF3B,0xFF3B},{0xFF5B,0xFF5B},{0xFF62,0xFF62}};
static int PeRanges[][2]={{0x29,0x29},{0x5D,0x5D},{0x7D,0x7D},{0xF3B,0xF3B},{0xF3D,0xF3D},{0x169C,0x169C},{0x2046,0x2046},{0x207E,0x207E},{0x208E,0x208E},{0x232A,0x232A},{0x3009,0x3009},{0x300B,0x300B},{0x300D,0x300D},{0x300F,0x300F},{0x3011,0x3011},{0x3015,0x3015},{0x3017,0x3017},{0x3019,0x3019},{0x301B,0x301B},{0x301E,0x301F},{0xFD3F,0xFD3F},{0xFE36,0xFE36},{0xFE38,0xFE38},{0xFE3A,0xFE3A},{0xFE3C,0xFE3C},{0xFE3E,0xFE3E},{0xFE40,0xFE40},{0xFE42,0xFE42},{0xFE44,0xFE44},{0xFE5A,0xFE5A},{0xFE5C,0xFE5C},{0xFE5E,0xFE5E},{0xFF09,0xFF09},{0xFF3D,0xFF3D},{0xFF5D,0xFF5D},{0xFF63,0xFF63}};
static int PiRanges[][2]={{0xAB,0xAB},{0x2018,0x2018},{0x201B,0x201C},{0x201F,0x201F},{0x2039,0x2039}};
static int PfRanges[][2]={{0xBB,0xBB},{0x2019,0x2019},{0x201D,0x201D},{0x203A,0x203A}};
static int PoRanges[][2]={{0x21,0x23},{0x25,0x27},{0x2A,0x2A},{0x2C,0x2C},{0x2E,0x2F},{0x3A,0x3B},{0x3F,0x40},{0x5C,0x5C},{0xA1,0xA1},{0xB7,0xB7},{0xBF,0xBF},{0x37E,0x37E},{0x387,0x387},{0x55A,0x55F},{0x589,0x589},{0x5BE,0x5BE},{0x5C0,0x5C0},{0x5C3,0x5C3},{0x5F3,0x5F4},{0x60C,0x60C},{0x61B,0x61B},{0x61F,0x61F},{0x66A,0x66D},{0x6D4,0x6D4},{0x700,0x70D},{0x964,0x965},{0x970,0x970},{0xDF4,0xDF4},{0xE4F,0xE4F},{0xE5A,0xE5B},{0xF04,0xF12},{0xF85,0xF85},{0x104A,0x104F},{0x10FB,0x10FB},{0x1361,0x1368},{0x166D,0x166E},{0x16EB,0x16ED},{0x17D4,0x17DA},{0x17DC,0x17DC},{0x1800,0x1805},{0x1807,0x180A},{0x2016,0x2017},{0x2020,0x2027},{0x2030,0x2038},{0x203B,0x203E},{0x2041,0x2043},{0x2048,0x204D},{0x3001,0x3003},{0xFE30,0xFE30},{0xFE49,0xFE4C},{0xFE50,0xFE52},{0xFE54,0xFE57},{0xFE5F,0xFE61},{0xFE68,0xFE68},{0xFE6A,0xFE6B},{0xFF01,0xFF03},{0xFF05,0xFF07},{0xFF0A,0xFF0A},{0xFF0C,0xFF0C},{0xFF0E,0xFF0F},{0xFF1A,0xFF1B},{0xFF1F,0xFF20},{0xFF3C,0xFF3C},{0xFF61,0xFF61},{0xFF64,0xFF64}};
static int ZsRanges[][2]={{0x20,0x20},{0xA0,0xA0},{0x1680,0x1680},{0x2000,0x200B},{0x202F,0x202F},{0x3000,0x3000}};
static int ZlRanges[][2]={{0x2028,0x2028}};
static int ZpRanges[][2]={{0x2029,0x2029}};
static int SmRanges[][2]={{0x2B,0x2B},{0x3C,0x3E},{0x7C,0x7C},{0x7E,0x7E},{0xAC,0xAC},{0xB1,0xB1},{0xD7,0xD7},{0xF7,0xF7},{0x2044,0x2044},{0x207A,0x207C},{0x208A,0x208C},{0x2190,0x2194},{0x219A,0x219B},{0x21A0,0x21A0},{0x21A3,0x21A3},{0x21A6,0x21A6},{0x21AE,0x21AE},{0x21CE,0x21CF},{0x21D2,0x21D2},{0x21D4,0x21D4},{0x2200,0x22F1},{0x2308,0x230B},{0x2320,0x2321},{0x25B7,0x25B7},{0x25C1,0x25C1},{0x266F,0x266F},{0xFB29,0xFB29},{0xFE62,0xFE62},{0xFE64,0xFE66},{0xFF0B,0xFF0B},{0xFF1C,0xFF1E},{0xFF5C,0xFF5C},{0xFF5E,0xFF5E},{0xFFE2,0xFFE2},{0xFFE9,0xFFEC},{0x1D6C1,0x1D6C1},{0x1D6DB,0x1D6DB},{0x1D6FB,0x1D6FB},{0x1D715,0x1D715},{0x1D735,0x1D735},{0x1D74F,0x1D74F},{0x1D76F,0x1D76F},{0x1D789,0x1D789},{0x1D7A9,0x1D7A9},{0x1D7C3,0x1D7C3}};
static int ScRanges[][2]={{0x24,0x24},{0xA2,0xA5},{0x9F2,0x9F3},{0xE3F,0xE3F},{0x17DB,0x17DB},{0x20A0,0x20AF},{0xFE69,0xFE69},{0xFF04,0xFF04},{0xFFE0,0xFFE1},{0xFFE5,0xFFE6}};
static int SkRanges[][2]={{0x5E,0x5E},{0x60,0x60},{0xA8,0xA8},{0xAF,0xAF},{0xB4,0xB4},{0xB8,0xB8},{0x2B9,0x2BA},{0x2C2,0x2CF},{0x2D2,0x2DF},{0x2E5,0x2ED},{0x374,0x375},{0x384,0x385},{0x1FBD,0x1FBD},{0x1FBF,0x1FC1},{0x1FCD,0x1FCF},{0x1FDD,0x1FDF},{0x1FED,0x1FEF},{0x1FFD,0x1FFE},{0x309B,0x309C},{0xFF3E,0xFF3E},{0xFF40,0xFF40},{0xFFE3,0xFFE3}};
static int SoRanges[][2]={{0xA6,0xA7},{0xA9,0xA9},{0xAE,0xAE},{0xB0,0xB0},{0xB6,0xB6},{0x482,0x482},{0x6E9,0x6E9},{0x6FD,0x6FE},{0x9FA,0x9FA},{0xB70,0xB70},{0xF01,0xF03},{0xF13,0xF17},{0xF1A,0xF1F},{0xF34,0xF34},{0xF36,0xF36},{0xF38,0xF38},{0xFBE,0xFC5},{0xFC7,0xFCC},{0xFCF,0xFCF},{0x2100,0x2101},{0x2103,0x2106},{0x2108,0x2109},{0x2114,0x2114},{0x2116,0x2118},{0x211E,0x2123},{0x2125,0x2125},{0x2127,0x2127},{0x2129,0x2129},{0x212E,0x212E},{0x2132,0x2132},{0x213A,0x213A},{0x2195,0x2199},{0x219C,0x219F},{0x21A1,0x21A2},{0x21A4,0x21A5},{0x21A7,0x21AD},{0x21AF,0x21CD},{0x21D0,0x21D1},{0x21D3,0x21D3},{0x21D5,0x21F3},{0x2300,0x2307},{0x230C,0x231F},{0x2322,0x2328},{0x232B,0x237B},{0x237D,0x239A},{0x2400,0x2426},{0x2440,0x244A},{0x249C,0x24E9},{0x2500,0x2595},{0x25A0,0x25B6},{0x25B8,0x25C0},{0x25C2,0x25F7},{0x2600,0x2613},{0x2619,0x266E},{0x2670,0x2671},{0x2701,0x2704},{0x2706,0x2709},{0x270C,0x2727},{0x2729,0x274B},{0x274D,0x274D},{0x274F,0x2752},{0x2756,0x2756},{0x2758,0x275E},{0x2761,0x2767},{0x2794,0x2794},{0x2798,0x27AF},{0x27B1,0x27BE},{0x2800,0x28FF},{0x2E80,0x2E99},{0x2E9B,0x2EF3},{0x2F00,0x2FD5},{0x2FF0,0x2FFB},{0x3004,0x3004},{0x3012,0x3013},{0x3020,0x3020},{0x3036,0x3037},{0x303E,0x303F},{0x3190,0x3191},{0x3196,0x319F},{0x3200,0x321C},{0x322A,0x3243},{0x3260,0x327B},{0x327F,0x327F},{0x328A,0x32B0},{0x32C0,0x32CB},{0x32D0,0x32FE},{0x3300,0x3376},{0x337B,0x33DD},{0x33E0,0x33FE},{0xA490,0xA4A1},{0xA4A4,0xA4B3},{0xA4B5,0xA4C0},{0xA4C2,0xA4C4},{0xA4C6,0xA4C6},{0xFFE4,0xFFE4},{0xFFE8,0xFFE8},{0xFFED,0xFFEE},{0xFFFC,0xFFFD},{0x1D000,0x1D0F5},{0x1D100,0x1D126},{0x1D12A,0x1D164},{0x1D16A,0x1D16C},{0x1D183,0x1D184},{0x1D18C,0x1D1A9},{0x1D1AE,0x1D1DD}};
static int CcRanges[][2]={{0x0,0x1F},{0x7F,0x9F}};
static int CfRanges[][2]={{0x70F,0x70F},{0x180B,0x180E},{0x200C,0x200F},{0x202A,0x202E},{0x206A,0x206F},{0xFEFF,0xFEFF},{0xFFF9,0xFFFB},{0x1D173,0x1D17A},{0xE0001,0xE0001},{0xE0020,0xE007F}};
static int CoRanges[][2]={{0xE000,0xF8FF},{0xF0000,0xFFFFD},{0x100000,0x10FFFD}};

--- NEW FILE: rn.c ---
/* $Id: rn.c,v 1.1 2009/08/03 05:32:47 mike Exp $ */

#include <string.h> /* strcmp,strlen,strcpy*/
#include "m.h"
#include "s.h" /* s_hval */
#include "ht.h"
#include "ll.h"
#include "rn.h"
#include "rnx.h"

#define LEN_P RN_LEN_P
#define PRIME_P RN_PRIME_P
#define LIM_P RN_LIM_P
#define LEN_NC RN_LEN_NC
#define PRIME_NC RN_PRIME_NC
#define LEN_S RN_LEN_S

#define P_SIZE 3
#define NC_SIZE 3
#define P_AVG_SIZE 2
#define NC_AVG_SIZE 2
#define S_AVG_SIZE 16

#define erased(i) (rn_pattern[i]&RN_P_FLG_ERS)
#define erase(i) (rn_pattern[i]|=RN_P_FLG_ERS)

static int p_size[]={1,1,1,1,3,3,3,2,2,3,3,3,3,3,2,3};
static int nc_size[]={1,3,2,1,3,3,3};

int *rn_pattern;
int *rn_nameclass;
char *rn_string;
int rn_empty,rn_text,rn_notAllowed,rn_dt_string,rn_dt_token,rn_xsd_uri;

static struct hashtable ht_p, ht_nc, ht_s;

static int i_p,i_nc,i_s,BASE_P,base_p,i_ref;
static int len_p,len_nc,len_s;
static int adding_ps;

void rn_new_schema(void) {base_p=i_p; i_ref=0;}

void rn_del_p(int i) {ht_deli(&ht_p,i);}
void rn_add_p(int i) {if(ht_get(&ht_p,i)==-1) ht_put(&ht_p,i);}

int rn_contentType(int i) {return rn_pattern[i]&0x1C00;}
void rn_setContentType(int i,int t1,int t2) {rn_pattern[i]|=(t1>t2?t1:t2);}
int rn_groupable(int p1,int p2) {
  int ct1=rn_contentType(p1),ct2=rn_contentType(p2);
  return ((ct1&ct2&RN_P_FLG_CTC)||((ct1|ct2)&RN_P_FLG_CTE));
}

static int add_s(char *s) {
  int len=strlen(s)+1;
  if(i_s+len>len_s) rn_string=(char*)m_stretch(rn_string,
    len_s=2*(i_s+len),i_s,sizeof(char));
  strcpy(rn_string+i_s,s);
  return len;
}

/* the two functions below are structuraly identical;
 they used to be expanded from a macro using ##,
 but then I eliminated all occurences of ## --
 it was an obstacle to porting; sam script to turn
 the first into the second is
s/([^a-z])p([^a-z])/\1nc\2/g
s/([^A-Z])P([^A-Z])/\1NC\2/g
s/_pattern/_nameclass/g
 */

static int accept_p(void) {
  int j;
  if((j=ht_get(&ht_p,i_p))==-1) {
    ht_put(&ht_p,j=i_p);
    i_p+=p_size[RN_P_TYP(i_p)];
    if(i_p+P_SIZE>len_p) rn_pattern=(int *)m_stretch(rn_pattern,
      len_p=2*(i_p+P_SIZE),i_p,sizeof(int));
  }
  return j;
}

static int accept_nc(void) {
  int j;
  if((j=ht_get(&ht_nc,i_nc))==-1) {
    ht_put(&ht_nc,j=i_nc);
    i_nc+=nc_size[RN_NC_TYP(i_nc)];
    if(i_nc+NC_SIZE>len_nc) rn_nameclass=(int *)m_stretch(rn_nameclass,
      len_nc=2*(i_nc+NC_SIZE),i_nc,sizeof(int));
  }
  return j;
}

int rn_newString(char *s) {
  int d_s,j;
  assert(!adding_ps);
  d_s=add_s(s);
  if((j=ht_get(&ht_s,i_s))==-1) {
    ht_put(&ht_s,j=i_s);
    i_s+=d_s;
  }
  return j;
}

#define P_NEW(x) rn_pattern[i_p]=x

int rn_newNotAllowed(void) { P_NEW(RN_P_NOT_ALLOWED);
  return accept_p();
}

int rn_newEmpty(void) { P_NEW(RN_P_EMPTY);
  rn_setNullable(i_p,1);
  return accept_p();
}

int rn_newText(void) { P_NEW(RN_P_TEXT);
  rn_setNullable(i_p,1);
  rn_setCdata(i_p,1);
  return accept_p();
}

int rn_newChoice(int p1,int p2) { P_NEW(RN_P_CHOICE);
  rn_pattern[i_p+1]=p1; rn_pattern[i_p+2]=p2;
  rn_setNullable(i_p,rn_nullable(p1)||rn_nullable(p2));
  rn_setCdata(i_p,rn_cdata(p1)||rn_cdata(p2));
  return accept_p();
}

int rn_newInterleave(int p1,int p2) { P_NEW(RN_P_INTERLEAVE);
  rn_pattern[i_p+1]=p1; rn_pattern[i_p+2]=p2;
  rn_setNullable(i_p,rn_nullable(p1)&&rn_nullable(p2));
  rn_setCdata(i_p,rn_cdata(p1)||rn_cdata(p2));
  return accept_p();
}

int rn_newGroup(int p1,int p2) { P_NEW(RN_P_GROUP);
  rn_pattern[i_p+1]=p1; rn_pattern[i_p+2]=p2;
  rn_setNullable(i_p,rn_nullable(p1)&&rn_nullable(p2));
  rn_setCdata(i_p,rn_cdata(p1)||rn_cdata(p2));
  return accept_p();
}

int rn_newOneOrMore(int p1) { P_NEW(RN_P_ONE_OR_MORE);
  rn_pattern[i_p+1]=p1;
  rn_setNullable(i_p,rn_nullable(p1));
  rn_setCdata(i_p,rn_cdata(p1));
  return accept_p();
}

int rn_newList(int p1) { P_NEW(RN_P_LIST);
  rn_pattern[i_p+1]=p1;
  rn_setCdata(i_p,1);
  return accept_p();
}

int rn_newData(int dt,int ps) { P_NEW(RN_P_DATA);
  rn_pattern[i_p+1]=dt;
  rn_pattern[i_p+2]=ps;
  rn_setCdata(i_p,1);
  return accept_p();
}

int rn_newDataExcept(int p1,int p2) { P_NEW(RN_P_DATA_EXCEPT);
  rn_pattern[i_p+1]=p1; rn_pattern[i_p+2]=p2;
  rn_setCdata(i_p,1);
  return accept_p();
}

int rn_newValue(int dt,int s) { P_NEW(RN_P_VALUE);
  rn_pattern[i_p+1]=dt; rn_pattern[i_p+2]=s;
  rn_setCdata(i_p,1);
  return accept_p();
}

int rn_newAttribute(int nc,int p1) { P_NEW(RN_P_ATTRIBUTE);
  rn_pattern[i_p+2]=nc; rn_pattern[i_p+1]=p1;
  return accept_p();
}

int rn_newElement(int nc,int p1) { P_NEW(RN_P_ELEMENT);
  rn_pattern[i_p+2]=nc; rn_pattern[i_p+1]=p1;
  return accept_p();
}

int rn_newAfter(int p1,int p2) { P_NEW(RN_P_AFTER);
  rn_pattern[i_p+1]=p1; rn_pattern[i_p+2]=p2;
  rn_setCdata(i_p,rn_cdata(p1));
  return accept_p();
}

int rn_newRef(void) { P_NEW(RN_P_REF);
  rn_pattern[i_p+1]=0;
  return ht_deli(&ht_p,accept_p());
}

int rn_one_or_more(int p) {
  if(RN_P_IS(p,RN_P_EMPTY)) return p;
  if(RN_P_IS(p,RN_P_NOT_ALLOWED)) return p;
  if(RN_P_IS(p,RN_P_TEXT)) return p;
  return rn_newOneOrMore(p);
}

int rn_group(int p1,int p2) {
  if(RN_P_IS(p1,RN_P_NOT_ALLOWED)) return p1;
  if(RN_P_IS(p2,RN_P_NOT_ALLOWED)) return p2;
  if(RN_P_IS(p1,RN_P_EMPTY)) return p2;
  if(RN_P_IS(p2,RN_P_EMPTY)) return p1;
  return rn_newGroup(p1,p2);
}

static int samechoice(int p1,int p2) {
  if(RN_P_IS(p1,RN_P_CHOICE)) {
    int p11,p12; rn_Choice(p1,p11,p12);
    return p12==p2||samechoice(p11,p2);
  } else return p1==p2;
}

int rn_choice(int p1,int p2) {
  if(RN_P_IS(p1,RN_P_NOT_ALLOWED)) return p2;
  if(RN_P_IS(p2,RN_P_NOT_ALLOWED)) return p1;
  if(RN_P_IS(p2,RN_P_CHOICE)) {
    int p21,p22; rn_Choice(p2,p21,p22);
    p1=rn_choice(p1,p21); return rn_choice(p1,p22);
  }
  if(samechoice(p1,p2)) return p1;
  if(rn_nullable(p1) && (RN_P_IS(p2,RN_P_EMPTY))) return p1;
  if(rn_nullable(p2) && (RN_P_IS(p1,RN_P_EMPTY))) return p2;
  return rn_newChoice(p1,p2);
}

int rn_ileave(int p1,int p2) {
  if(RN_P_IS(p1,RN_P_NOT_ALLOWED)) return p1;
  if(RN_P_IS(p2,RN_P_NOT_ALLOWED)) return p2;
  if(RN_P_IS(p1,RN_P_EMPTY)) return p2;
  if(RN_P_IS(p2,RN_P_EMPTY)) return p1;
  return rn_newInterleave(p1,p2);
}

int rn_after(int p1,int p2) {
  if(RN_P_IS(p1,RN_P_NOT_ALLOWED)) return p1;
  if(RN_P_IS(p2,RN_P_NOT_ALLOWED)) return p2;
  return rn_newAfter(p1,p2);
}

#define NC_NEW(x) rn_nameclass[i_nc]=x

int rn_newQName(int uri,int name) { NC_NEW(RN_NC_QNAME);
  rn_nameclass[i_nc+1]=uri; rn_nameclass[i_nc+2]=name;
  return accept_nc();
}

int rn_newNsName(int uri) { NC_NEW(RN_NC_NSNAME);
  rn_nameclass[i_nc+1]=uri;
  return accept_nc();
}

int rn_newAnyName(void) { NC_NEW(RN_NC_ANY_NAME);
  return accept_nc();
}

int rn_newNameClassExcept(int nc1,int nc2) { NC_NEW(RN_NC_EXCEPT);
  rn_nameclass[i_nc+1]=nc1; rn_nameclass[i_nc+2]=nc2;
  return accept_nc();
}

int rn_newNameClassChoice(int nc1,int nc2) { NC_NEW(RN_NC_CHOICE);
  rn_nameclass[i_nc+1]=nc1; rn_nameclass[i_nc+2]=nc2;
  return accept_nc();
}

int rn_newDatatype(int lib,int typ) { NC_NEW(RN_NC_DATATYPE);
  rn_nameclass[i_nc+1]=lib; rn_nameclass[i_nc+2]=typ;
  return accept_nc();
}

int rn_i_ps(void) {adding_ps=1; return i_s;}
void rn_add_pskey(char *s) {i_s+=add_s(s);}
void rn_add_psval(char *s) {i_s+=add_s(s);}
void rn_end_ps(void) {i_s+=add_s(""); adding_ps=0;}

static int hash_p(int i);
static int hash_nc(int i);
static int hash_s(int i);

static int equal_p(int p1,int p2);
static int equal_nc(int nc1,int nc2);
static int equal_s(int s1,int s2);

static void windup(void);

static int initialized=0;
void rn_init(void) {
  if(!initialized) { initialized=1;
    rn_pattern=(int *)m_alloc(len_p=P_AVG_SIZE*LEN_P,sizeof(int));
    rn_nameclass=(int *)m_alloc(len_nc=NC_AVG_SIZE*LEN_NC,sizeof(int));
    rn_string=(char*)m_alloc(len_s=S_AVG_SIZE*LEN_S,sizeof(char));
    ht_init(&ht_p,LEN_P,&hash_p,&equal_p);
    ht_init(&ht_nc,LEN_NC,&hash_nc,&equal_nc);
    ht_init(&ht_s,LEN_S,&hash_s,&equal_s);
    windup();
  }
}

void rn_clear(void) {
  ht_clear(&ht_p); ht_clear(&ht_nc); ht_clear(&ht_s);
  windup();
}

static void windup(void) {
  i_p=i_nc=i_s=0;
  adding_ps=0;
  rn_pattern[0]=RN_P_ERROR;  accept_p();
  rn_nameclass[0]=RN_NC_ERROR; accept_nc();
  rn_newString("");
  rn_notAllowed=rn_newNotAllowed(); 
  rn_empty=rn_newEmpty(); 
  rn_text=rn_newText(); 
  BASE_P=i_p;
  rn_dt_string=rn_newDatatype(0,rn_newString("string")); 
  rn_dt_token=rn_newDatatype(0,rn_newString("token"));
  rn_xsd_uri=rn_newString("http://www.w3.org/2001/XMLSchema-datatypes");
}

static int hash_p(int p) {
  int *pp=rn_pattern+p; int h=0;
  switch(p_size[RN_P_TYP(p)]) {
  case 1: h=pp[0]&0xF; break;
  case 2: h=(pp[0]&0xF)|(pp[1]<<4); break;
  case 3: h=(pp[0]&0xF)|((pp[1]^pp[2])<<4); break;
  default: assert(0);
  }
  return h*PRIME_P;
}

static int hash_nc(int nc) {
  int *ncp=rn_nameclass+nc; int h=0;
  switch(nc_size[RN_NC_TYP(nc)]) {
  case 1: h=ncp[0]&0x7; break;
  case 2: h=(ncp[0]&0x7)|(ncp[1]<<3); break;
  case 3: h=(ncp[0]&0x7)|((ncp[1]^ncp[2])<<3); break;
  default: assert(0);
  }
  return h*PRIME_NC;
}

static int hash_s(int i) {return s_hval(rn_string+i);}

static int equal_p(int p1,int p2) {
  int *pp1=rn_pattern+p1,*pp2=rn_pattern+p2;
  if(RN_P_TYP(p1)!=RN_P_TYP(p2)) return 0;
  switch(p_size[RN_P_TYP(p1)]) {
  case 3: if(pp1[2]!=pp2[2]) return 0;
  case 2: if(pp1[1]!=pp2[1]) return 0;
  case 1: return 1;
  default: assert(0);
  }
  return 0;
}

static int equal_nc(int nc1,int nc2) {
  int *ncp1=rn_nameclass+nc1,*ncp2=rn_nameclass+nc2;
  if(RN_NC_TYP(nc1)!=RN_NC_TYP(nc2)) return 0;
  switch(nc_size[RN_NC_TYP(nc1)]) {
  case 3: if(ncp1[2]!=ncp2[2]) return 0;
  case 2: if(ncp1[1]!=ncp2[1]) return 0;
  case 1: return 1;
  default: assert(0);
  }
  return 0;
}

static int equal_s(int s1,int s2) {return strcmp(rn_string+s1,rn_string+s2)==0;}

/* marks patterns reachable from start, assumes that the references are resolved */
#define pick_p(p) do { \
  if(p>=since && !rn_marked(p)) {flat[n_f++]=p; rn_mark(p);}  \
} while(0)
static void mark_p(int start,int since) {
  int p,p1,p2,nc,i,n_f;
  int *flat=(int*)m_alloc(i_p-since,sizeof(int));

  n_f=0; pick_p(start);
  for(i=0;i!=n_f;++i) {
    p=flat[i];
    switch(RN_P_TYP(p)) {
    case RN_P_NOT_ALLOWED: case RN_P_EMPTY: case RN_P_TEXT:
    case RN_P_DATA: case RN_P_VALUE: break;

    case RN_P_CHOICE: rn_Choice(p,p1,p2); goto BINARY;
    case RN_P_INTERLEAVE: rn_Interleave(p,p1,p2); goto BINARY;
    case RN_P_GROUP: rn_Group(p,p1,p2); goto BINARY;
    case RN_P_DATA_EXCEPT: rn_DataExcept(p,p1,p2); goto BINARY;
    BINARY: pick_p(p2); goto UNARY;

    case RN_P_ONE_OR_MORE: rn_OneOrMore(p,p1); goto UNARY;
    case RN_P_LIST: rn_List(p,p1); goto UNARY;
    case RN_P_ATTRIBUTE: rn_Attribute(p,nc,p1); goto UNARY;
    case RN_P_ELEMENT: rn_Element(p,nc,p1); goto UNARY;
    UNARY: pick_p(p1); break;

    default:
      assert(0);
    }
  }
  m_free(flat);
}

/* assumes that used patterns are marked */
#define redir_p() do { \
  if(q<since || xlat[q-since]!=-1) { \
    rn_unmark(p); xlat[p-since]=q; \
    changed=1; \
  } else { \
    ht_deli(&ht_p,q); ht_put(&ht_p,p); \
  } \
} while(0)
static void sweep_p(int *starts,int n_st,int since) {
  int p,p1,p2,nc,q,changed,touched;
  int *xlat;
  xlat=(int*)m_alloc(i_p-since,sizeof(int));
  changed=0;
  for(p=since;p!=i_p;p+=p_size[RN_P_TYP(p)]) {
    if(rn_marked(p)) xlat[p-since]=p; else xlat[p-since]=-1;
  }
  for(p=since;p!=i_p;p+=p_size[RN_P_TYP(p)]) {
    if(xlat[p-since]==p && (q=ht_get(&ht_p,p))!=p) redir_p();
  }
  while(changed) {
    changed=0;
    for(p=since;p!=i_p;p+=p_size[RN_P_TYP(p)]) {
      if(xlat[p-since]==p) {
	touched=0;
	switch(RN_P_TYP(p)) {
	case RN_P_NOT_ALLOWED: case RN_P_EMPTY: case RN_P_TEXT:
	case RN_P_DATA: case RN_P_VALUE:
	  break;

	case RN_P_CHOICE: rn_Choice(p,p1,p2); goto BINARY;
	case RN_P_INTERLEAVE: rn_Interleave(p,p1,p2); goto BINARY;
	case RN_P_GROUP: rn_Group(p,p1,p2); goto BINARY;
	case RN_P_DATA_EXCEPT: rn_DataExcept(p,p1,p2); goto BINARY;
	BINARY:
	  if(p2>=since && (q=xlat[p2-since])!=p2) {
	    ht_deli(&ht_p,p);
	    touched=1;
	    rn_pattern[p+2]=q;
	  }
	  goto UNARY;

	case RN_P_ONE_OR_MORE: rn_OneOrMore(p,p1); goto UNARY;
	case RN_P_LIST: rn_List(p,p1); goto UNARY;
	case RN_P_ATTRIBUTE: rn_Attribute(p,nc,p1); goto UNARY;
	case RN_P_ELEMENT: rn_Element(p,nc,p1); goto UNARY;
	UNARY:
	  if(p1>=since && (q=xlat[p1-since])!=p1) {
	    if(!touched) ht_deli(&ht_p,p);
	    touched=1;
	    rn_pattern[p+1]=q;
	  }
	  break;

	default:
	  assert(0);
	}
	if(touched) {
	  changed=1; /* recursion through redirection */
	  if((q=ht_get(&ht_p,p))==-1) {
	    ht_put(&ht_p,p);
	  } else {
	    redir_p();
	  }
	}
      }
    }
  }
  while(n_st--!=0) {
    if(*starts>=since) *starts=xlat[*starts-since];
    ++starts;
  }
  m_free(xlat);
}

static void unmark_p(int since) {
  int p;
  for(p=since;p!=i_p;p+=p_size[RN_P_TYP(p)]) {
    if(rn_marked(p)) rn_unmark(p); else {ht_deli(&ht_p,p); erase(p);}
  }
}

static void compress_p(int *starts,int n_st,int since) {
  int p,psiz, p1,p2,nc, q,i_q, newlen_p;
  int *xlat=(int*)m_alloc(i_p-since,sizeof(int));
  p=q=since;
  while(p!=i_p) { psiz=p_size[RN_P_TYP(p)];
    if(erased(p)) {
      xlat[p-since]=-1;
    } else {
      ht_deli(&ht_p,p);
      xlat[p-since]=q;
      q+=psiz;
    }
    p+=psiz;
  }
  i_q=q; p=since;
  while(p!=i_p) { psiz=p_size[RN_P_TYP(p)]; /* rn_pattern[p] changes */
    if(xlat[p-since]!=-1) {
      switch(RN_P_TYP(p)) {
      case RN_P_NOT_ALLOWED: case RN_P_EMPTY: case RN_P_TEXT:
      case RN_P_DATA: case RN_P_VALUE:
	break;

      case RN_P_CHOICE: rn_Choice(p,p1,p2); goto BINARY;
      case RN_P_INTERLEAVE: rn_Interleave(p,p1,p2); goto BINARY;
      case RN_P_GROUP: rn_Group(p,p1,p2); goto BINARY;
      case RN_P_DATA_EXCEPT: rn_DataExcept(p,p1,p2); goto BINARY;
      BINARY:
	if(p2>=since && (q=xlat[p2-since])!=p2) rn_pattern[p+2]=q;
	goto UNARY;

      case RN_P_ONE_OR_MORE: rn_OneOrMore(p,p1); goto UNARY;
      case RN_P_LIST: rn_List(p,p1); goto UNARY;
      case RN_P_ATTRIBUTE: rn_Attribute(p,nc,p1); goto UNARY;
      case RN_P_ELEMENT: rn_Element(p,nc,p1); goto UNARY;
      UNARY:
	if(p1>=since && (q=xlat[p1-since])!=p1) rn_pattern[p+1]=q;
	break;

      default:
	assert(0);
      }
      if((q=xlat[p-since])!=p) { int i;
	for(i=0;i!=psiz;++i) rn_pattern[q+i]=rn_pattern[p+i];
	assert(q+psiz<i_p);
      }
      ht_put(&ht_p,q);
    }
    p+=psiz;
  }
  while(n_st--!=0) {
    if(*starts>=since) *starts=xlat[*starts-since];
    ++starts;
  }
  m_free(xlat);
  
  if(i_q!=i_p) { i_p=i_q; newlen_p=i_p*2;
    if(len_p>P_AVG_SIZE*LIM_P&&newlen_p<len_p) {
      rn_pattern=(int*)m_stretch(rn_pattern,
	len_p=newlen_p>P_AVG_SIZE*LEN_P?newlen_p:P_AVG_SIZE*LEN_P,
	i_p,sizeof(int));
    }
  }
}

void rn_compress(int *starts,int n_st) {
  int i;
  for(i=0;i!=n_st;++i) mark_p(starts[i],BASE_P);
  sweep_p(starts,n_st,BASE_P);
  unmark_p(BASE_P);
  compress_p(starts,n_st,BASE_P);
}

int rn_compress_last(int start) {
  mark_p(start,base_p);
  sweep_p(&start,1,base_p);
  unmark_p(base_p);
  compress_p(&start,1,base_p);
  return start;
}

--- NEW FILE: rvp.c ---
/* $Id: rvp.c,v 1.1 2009/08/03 05:32:47 mike Exp $ */

/* validation pipe:
 synopsis

   rvp -qsdevh grammar.rnc

 reads from 0, writes to 1, 2 for grammar parse errors only, then redirected.
   -q switches to numerical error codes
   -s takes less space but more time
   -d plugs in an external type checker
   -e the argument is a Scheme program providing a datatype library
   -v displays version
   -h help message
 exit code: 0 on valid, non-zero on invalid

 protocol
  query ::= (start | quit | start-tag-open | attribute | start-tag-close | text | end-tag) z.
   quit ::= "quit".
   start ::= "start" [gramno].
   start-tag-open ::= "start-tag-open" patno name.
   attribute ::= "attribute" patno name value.
   start-tag-close :: = "start-tag-close" patno name.
   text ::= ("text"|"mixed") patno text.
   end-tag ::= "end-tag" patno name.
  response ::= (ok | er | error) z.
   ok ::= "ok" patno.
   er ::= "er" patno erno.
   error ::= "error" patno erno error.
  z ::= "\0" .

  conventions:
    last colon in name separates namespace uri and local part
    -q?er:error
    error==0 yields message 'protocol error' and happens when a query is not understood
    start assumes gramno=0 if the argument is omitted
*/

#include <stdlib.h>
#include <stdarg.h>
#include <stdio.h>
#include <fcntl.h>  /*open,close*/
#include <sys/types.h>
#include UNISTD_H   /*open,read,close*/
#include <string.h> /*strerror*/
#include <setjmp.h>
#include <errno.h>
#include <assert.h>
#include "m.h"
#include "s.h"
#include "erbit.h"
#include "drv.h"
#include "rnl.h"
#include "rnv.h"
#include "dxl.h"
#include "dsl.h"
#include "er.h"

extern int rn_notAllowed, drv_compact, rx_compact;

#define ATT 0
#define ENT 1
#define MIX 2
#define QUIT 3
#define START 4
#define STC 5
#define STO 6
#define TXT 7
#define NKWD 8
char *kwdtab[NKWD]={
  "attribute",
  "end-tag",
  "mixed",
  "quit",
  "start",
  "start-tag-close",
  "start-tag-open",
  "text"
};

#define OK "ok %u"
#define ER "er %u"
#define ERROR "error %u"

#define LEN_B 1024

static FILE *nstderr;
static int explain=1, lasterr, *starts, n_st;
static int len_q,n_q; char *quebuf;
static int erp[2]; /* *erp to read error messages */
static jmp_buf IOER;

static void verror_handler(int erno,va_list ap) {
  lasterr=erno;
  rnv_default_verror_handler(erno&~ERBIT_RNV,ap);
}
static void verror_handler_rnv(int erno,va_list ap) {verror_handler(erno|ERBIT_RNV,ap);}

static int initialized=0;
static void init(void) {
  if(!initialized) {initialized=1;
    rnl_init();
    rnv_init(); rnv_verror_handler=&verror_handler_rnv;
    drv_add_dtl(DXL_URL,&dxl_equal,&dxl_allows);
    drv_add_dtl(DSL_URL,&dsl_equal,&dsl_allows);
    quebuf=(char*)m_alloc(len_q=LEN_B,sizeof(char));
  }
}

static int tok(int i) {
  for(;;) {
    switch(quebuf[i]) {
    case '\t': case '\n': case '\r': case ' ': break;
    default: return i;
    }
    ++i;
  }
}

static int endtok(int i) {
  for(;;) {
    switch(quebuf[i]) {
    case '\0': case '\t': case '\n': case '\r': case ' ': return i;
    default: break;
    }
    ++i;
  }
}

static void writeall(int fd,char *buf,int len) {
  int ofs=0;
  do {
    int n=write(fd,buf+ofs,len);
    if(n==-1) longjmp(IOER,1);
    ofs+=n; len-=n;
  } while(len);
}

static void resp(int ok,int patno,int prevno) {
  int len;
  static char buf[LEN_B];
  char *f=(char*)(ok?OK:explain?ERROR:ER);
  len=sprintf(buf,f,patno); assert(len<LEN_B);
  writeall(1,buf,len);
  if(!ok) {
    len=sprintf(buf," %u",lasterr); assert(len<LEN_B);
    writeall(1,buf,len);
    if(explain) {buf[0]=' '; writeall(1,buf,1);}
  }
  for(;;) { /* read always, write if verbose */
    len=read(erp[0],buf,LEN_B);
    if(len<0) {if(errno==EAGAIN) break; else longjmp(IOER,1);}
    if(len==0) break;
    if(!ok&&explain&&prevno!=rn_notAllowed) writeall(1,buf,len);
  }
  buf[0]='\0'; writeall(1,buf,1);
}

static int query(void) {
  int i,j,n,dn, kwd, patno,prevno, ok=0;
  char *name;
  n=0;
  for(;;) {
    if(n==n_q) {
      if(len_q-n_q<LEN_B) quebuf=(char*)m_stretch(quebuf,len_q=n_q+LEN_B,n_q,sizeof(char));
      dn=read(0,quebuf+n_q,LEN_B);
      if(dn<0) longjmp(IOER,1);
      if(dn==0) {errno=EIO; longjmp(IOER,1);}
      n_q+=dn;
    }
    if(quebuf[n++]=='\0') break;
  }

  j=endtok(i=tok(0));
  if((kwd=s_ntab(quebuf+i,j-i,kwdtab,NKWD))==QUIT) {resp(1,0,0); return 0;}
  switch(kwd) {
  case START:
    j=endtok((i=tok(j)));
    patno=0; while(i!=j) patno=patno*10+quebuf[i++]-'0';
    if(patno>=n_st) goto PROTER;
    ok=1; patno=starts[patno];
    break;
  case STO: case ATT: case STC: case TXT: case MIX: case ENT:
    j=endtok((i=tok(j))); if(i==j) goto PROTER;
    patno=0; do patno=patno*10+quebuf[i++]-'0'; while(i!=j);
    if(patno==0) goto PROTER; /* 0 is ERROR, not allowed */
    switch(kwd) {
    case STO: case ATT: case STC: case ENT:
      j=endtok((i=tok(j))); if(i==j||(kwd==ATT&&quebuf[j]=='\0')) goto PROTER;
      name=quebuf+i; quebuf[j]='\0';
      switch(kwd) {
      case STO: ok=rnv_start_tag_open(&patno,&prevno,name); break;
      case ATT: ok=rnv_attribute(&patno,&prevno,name,quebuf+j+1); break;
      case STC: ok=rnv_start_tag_close(&patno,&prevno,name); break;
      case ENT: ok=rnv_end_tag(&patno,&prevno,name); break;
      }
      break;
    case TXT: case MIX:
      if(quebuf[j]) ++j; i=j; while(quebuf[j]) ++j;
      ok=rnv_text(&patno,&prevno,quebuf+i,j-i,kwd==MIX);
      break;
    }
    break;

  case NKWD: PROTER: (*er_printf)("protocol error\n"); lasterr=0; patno=0; ok=0; break;
  default: assert(0);
  }
  resp(ok,patno,prevno);

  i=0; while(n!=n_q) quebuf[i++]=quebuf[n++]; n_q=i;
  return 1;
}

static void version(void) {(*er_printf)("rvp version %s\n",RVP_VERSION);}
static void usage(void) {(*er_printf)("usage: rvp {-[qs"
#if DXL_EXC
"d"
#endif
#if DSL_SCM
"e"
#endif
"vh?]} {schema.rnc}\n");}

int main(int argc,char **argv) {
  int i, ok;
  init();

  --argc;
  while(*(++argv)&&**argv=='-') {
    --argc; i=1;
    for(;;) {
      switch(*(*argv+i)) {
      case '\0': goto END_OF_OPTIONS;
      case 'h': case '?': usage(); return 0;
      case 'v': version(); break;
      case 's': drv_compact=1; rx_compact=1; break;
#if DXL_EXC
      case 'd': dxl_cmd=*(argv+1); if(*(argv+1)) ++argv; goto END_OF_OPTIONS;
#endif
#if DSL_SCM
      case 'e': dsl_ld(*(argv+1)); if(*(argv+1)) ++argv; goto END_OF_OPTIONS;
#endif
      case 'q': explain=0; break;
      default: (*er_printf)("unknown option '-%c'\n",*(*argv+i)); break;
      }
      ++i;
    }
    END_OF_OPTIONS:;
  }

  if(*argv==NULL) {usage(); return 1;}

  starts=(int*)m_alloc(argc,sizeof(int));
  ok=1; n_st=0;
  do {
    ok=(starts[n_st++]=rnl_fn(*(argv++)))&&ok;
  } while(*argv);
  if(ok) {
    int fd2;

    nstderr=stderr;
    if(setjmp(IOER)) {
      fprintf(nstderr,"%s\n",strerror(errno));
      return EXIT_FAILURE;
    }

    if((fd2=dup(2))==-1) longjmp(IOER,1);
    nstderr=fdopen(fd2,"w");
    if(pipe(erp)==-1||dup2(erp[1],2)==-1) longjmp(IOER,1);
    fcntl(erp[0],F_SETFL,O_NONBLOCK);
    setbuf(stderr,NULL);

    while(query());
    return EXIT_SUCCESS;
  }
  return EXIT_FAILURE;
}

--- NEW FILE: xsdck.c ---
#include <string.h>
#include <stdlib.h>
#include "m.h"
#include "er.h"
#include "xsd.h"

int main(int argc,char **argv) {
  xsd_init();
  ++argv; --argc;

  if(!*argv) goto USAGE;
  if(strcmp(*argv,"equal")==0) {
    if(argc!=4) goto USAGE;
    return !xsd_equal(*(argv+1),*(argv+2),*(argv+3),strlen(*(argv+3)));
  } else if(strcmp(*argv,"allows")==0) {
    int len,i;
    char *ps,*p,*a;
    if(argc<3||!(argc&1)) goto USAGE;
    len=argc-2; for(i=2;i!=argc-1;++i) len+=strlen(*(argv+i));
    ps=(char*)m_alloc(len,sizeof(char)); ps[len-1]='\0';
    p=ps; for(i=2;i!=argc-1;++i) {
      a=*(argv+i);
      while((*(p++)=*(a++)));
    }
    return !xsd_allows(*(argv+1),ps,*(argv+argc-1),strlen(*(argv+argc-1)));
  }
USAGE:
  (*er_printf)("xsdck: invalid arguments\n");
  return 255;
}

--- NEW FILE: xsd.h ---
/* $Id: xsd.h,v 1.1 2009/08/03 05:32:48 mike Exp $ */

#include <stdarg.h>

#ifndef XSD_H
#define XSD_H 1

#define XSD_ER_TYP 0
#define XSD_ER_PAR 1
#define XSD_ER_PARVAL 2
#define XSD_ER_VAL 3
#define XSD_ER_NPAT 4
#define XSD_ER_WS 5
#define XSD_ER_ENUM 6

extern void (*xsd_verror_handler)(int erno,va_list ap);

extern void xsd_default_verror_handler(int erno,va_list ap);

extern void xsd_init(void);
extern void xsd_clear(void);

extern int xsd_allows(char *typ,char *ps,char *s,int n);
extern int xsd_equal(char *typ,char *val,char *s,int n);

extern void xsd_test(void);

#endif

--- NEW FILE: xmlc.c ---
/* $Id: xmlc.c,v 1.1 2009/08/03 05:32:48 mike Exp $ */

#include "u.h"
#include "xmlc.h"

/* sorted range arrays */
int BASE_CHAR[][2]={{0x41,0x5a},{0x61,0x7a},{0xc0,0xd6},{0xd8,0xf6},{0xf8,0xff},{0x100,0x131},{0x134,0x13e},{0x141,0x148},{0x14a,0x17e},{0x180,0x1c3},{0x1cd,0x1f0},{0x1f4,0x1f5},{0x1fa,0x217},{0x250,0x2a8},{0x2bb,0x2c1},{0x386,0x386},{0x388,0x38a},{0x38c,0x38c},{0x38e,0x3a1},{0x3a3,0x3ce},{0x3d0,0x3d6},{0x3da,0x3da},{0x3dc,0x3dc},{0x3de,0x3de},{0x3e0,0x3e0},{0x3e2,0x3f3},{0x401,0x40c},{0x40e,0x44f},{0x451,0x45c},{0x45e,0x481},{0x490,0x4c4},{0x4c7,0x4c8},{0x4cb,0x4cc},{0x4d0,0x4eb},{0x4ee,0x4f5},{0x4f8,0x4f9},{0x531,0x556},{0x559,0x559},{0x561,0x586},{0x5d0,0x5ea},{0x5f0,0x5f2},{0x621,0x63a},{0x641,0x64a},{0x671,0x6b7},{0x6ba,0x6be},{0x6c0,0x6ce},{0x6d0,0x6d3},{0x6d5,0x6d5},{0x6e5,0x6e6},{0x905,0x939},{0x93d,0x93d},{0x958,0x961},{0x985,0x98c},{0x98f,0x990},{0x993,0x9a8},{0x9aa,0x9b0},{0x9b2,0x9b2},{0x9b6,0x9b9},{0x9dc,0x9dd},{0x9df,0x9e1},{0x9f0,0x9f1},{0xa05,0xa0a},{0xa0f,0xa10},{0xa13,0xa28},{0xa2a,0xa30},{0xa32,0xa33},{0xa35,0xa36},{0xa38,0xa39},{0xa59,0xa5c},{0xa5e,0xa5e},{0xa72,0xa74},{0xa85,0xa8b},{0xa8d,0xa8d},{0xa8f,0xa91},{0xa93,0xaa8},{0xaaa,0xab0},{0xab2,0xab3},{0xab5,0xab9},{0xabd,0xabd},{0xae0,0xae0},{0xb05,0xb0c},{0xb0f,0xb10},{0xb13,0xb28},{0xb2a,0xb30},{0xb32,0xb33},{0xb36,0xb39},{0xb3d,0xb3d},{0xb5c,0xb5d},{0xb5f,0xb61},{0xb85,0xb8a},{0xb8e,0xb90},{0xb92,0xb95},{0xb99,0xb9a},{0xb9c,0xb9c},{0xb9e,0xb9f},{0xba3,0xba4},{0xba8,0xbaa},{0xbae,0xbb5},{0xbb7,0xbb9},{0xc05,0xc0c},{0xc0e,0xc10},{0xc12,0xc28},{0xc2a,0xc33},{0xc35,0xc39},{0xc60,0xc61},{0xc85,0xc8c},{0xc8e,0xc90},{0xc92,0xca8},{0xcaa,0xcb3},{0xcb5,0xcb9},{0xcde,0xcde},{0xce0,0xce1},{0xd05,0xd0c},{0xd0e,0xd10},{0xd12,0xd28},{0xd2a,0xd39},{0xd60,0xd61},{0xe01,0xe2e},{0xe30,0xe30},{0xe32,0xe33},{0xe40,0xe45},{0xe81,0xe82},{0xe84,0xe84},{0xe87,0xe88},{0xe8a,0xe8a},{0xe8d,0xe8d},{0xe94,0xe97},{0xe99,0xe9f},{0xea1,0xea3},{0xea5,0xea5},{0xea7,0xea7},{0xeaa,0xeab},{0xead,0xeae},{0xeb0,0xeb0},{0xeb2,0xeb3},{0xebd,0xebd},{0xec0,0xec4},{0xf40,0xf47},{0xf49,0xf69},{0x10a0,0x10c5},{0x10d0,0x10f6},{0x1100,0x1100},{0x1102,0x1103},{0x1105,0x1107},{0x1109,0x1109},{0x110b,0x110c},{0x110e,0x1112},{0x113c,0x113c},{0x113e,0x113e},{0x1140,0x1140},{0x114c,0x114c},{0x114e,0x114e},{0x1150,0x1150},{0x1154,0x1155},{0x1159,0x1159},{0x115f,0x1161},{0x1163,0x1163},{0x1165,0x1165},{0x1167,0x1167},{0x1169,0x1169},{0x116d,0x116e},{0x1172,0x1173},{0x1175,0x1175},{0x119e,0x119e},{0x11a8,0x11a8},{0x11ab,0x11ab},{0x11ae,0x11af},{0x11b7,0x11b8},{0x11ba,0x11ba},{0x11bc,0x11c2},{0x11eb,0x11eb},{0x11f0,0x11f0},{0x11f9,0x11f9},{0x1e00,0x1e9b},{0x1ea0,0x1ef9},{0x1f00,0x1f15},{0x1f18,0x1f1d},{0x1f20,0x1f45},{0x1f48,0x1f4d},{0x1f50,0x1f57},{0x1f59,0x1f59},{0x1f5b,0x1f5b},{0x1f5d,0x1f5d},{0x1f5f,0x1f7d},{0x1f80,0x1fb4},{0x1fb6,0x1fbc},{0x1fbe,0x1fbe},{0x1fc2,0x1fc4},{0x1fc6,0x1fcc},{0x1fd0,0x1fd3},{0x1fd6,0x1fdb},{0x1fe0,0x1fec},{0x1ff2,0x1ff4},{0x1ff6,0x1ffc},{0x2126,0x2126},{0x212a,0x212b},{0x212e,0x212e},{0x2180,0x2182},{0x3041,0x3094},{0x30a1,0x30fa},{0x3105,0x312c},{0xac00,0xd7a3}};
int IDEOGRAPHIC[][2]={{0x3007,0x3007},{0x3021,0x3029},{0x4e00,0x9fa5}};
int COMBINING_CHAR[][2]={{0x300,0x345},{0x360,0x361},{0x483,0x486},{0x591,0x5a1},{0x5a3,0x5b9},{0x5bb,0x5bd},{0x5bf,0x5bf},{0x5c1,0x5c2},{0x5c4,0x5c4},{0x64b,0x652},{0x670,0x670},{0x6d6,0x6dc},{0x6dd,0x6df},{0x6e0,0x6e4},{0x6e7,0x6e8},{0x6ea,0x6ed},{0x901,0x903},{0x93c,0x93c},{0x93e,0x94c},{0x94d,0x94d},{0x951,0x954},{0x962,0x963},{0x981,0x983},{0x9bc,0x9bc},{0x9be,0x9be},{0x9bf,0x9bf},{0x9c0,0x9c4},{0x9c7,0x9c8},{0x9cb,0x9cd},{0x9d7,0x9d7},{0x9e2,0x9e3},{0xa02,0xa02},{0xa3c,0xa3c},{0xa3e,0xa3e},{0xa3f,0xa3f},{0xa40,0xa42},{0xa47,0xa48},{0xa4b,0xa4d},{0xa70,0xa71},{0xa81,0xa83},{0xabc,0xabc},{0xabe,0xac5},{0xac7,0xac9},{0xacb,0xacd},{0xb01,0xb03},{0xb3c,0xb3c},{0xb3e,0xb43},{0xb47,0xb48},{0xb4b,0xb4d},{0xb56,0xb57},{0xb82,0xb83},{0xbbe,0xbc2},{0xbc6,0xbc8},{0xbca,0xbcd},{0xbd7,0xbd7},{0xc01,0xc03},{0xc3e,0xc44},{0xc46,0xc48},{0xc4a,0xc4d},{0xc55,0xc56},{0xc82,0xc83},{0xcbe,0xcc4},{0xcc6,0xcc8},{0xcca,0xccd},{0xcd5,0xcd6},{0xd02,0xd03},{0xd3e,0xd43},{0xd46,0xd48},{0xd4a,0xd4d},{0xd57,0xd57},{0xe31,0xe31},{0xe34,0xe3a},{0xe47,0xe4e},{0xeb1,0xeb1},{0xeb4,0xeb9},{0xebb,0xebc},{0xec8,0xecd},{0xf18,0xf19},{0xf35,0xf35},{0xf37,0xf37},{0xf39,0xf39},{0xf3e,0xf3e},{0xf3f,0xf3f},{0xf71,0xf84},{0xf86,0xf8b},{0xf90,0xf95},{0xf97,0xf97},{0xf99,0xfad},{0xfb1,0xfb7},{0xfb9,0xfb9},{0x20d0,0x20dc},{0x20e1,0x20e1},{0x302a,0x302f},{0x3099,0x3099},{0x309a,0x309a}};
int DIGIT[][2]={{0x30,0x39},{0x660,0x669},{0x6f0,0x6f9},{0x966,0x96f},{0x9e6,0x9ef},{0xa66,0xa6f},{0xae6,0xaef},{0xb66,0xb6f},{0xbe7,0xbef},{0xc66,0xc6f},{0xce6,0xcef},{0xd66,0xd6f},{0xe50,0xe59},{0xed0,0xed9},{0xf20,0xf29}};
int EXTENDER[][2]={{0xb7,0xb7},{0x2d0,0x2d1},{0x387,0x387},{0x640,0x640},{0xe46,0xe46},{0xec6,0xec6},{0x3005,0x3005},{0x3031,0x3035},{0x309d,0x309e},{0x30fc,0x30fe}};

#define isa(u,CHAR_CLASS) u_in_ranges(u,CHAR_CLASS,sizeof(CHAR_CLASS)/sizeof(int([2])))

int xmlc_white_space(int u) {return u=='\t'||u=='\n'||u=='\r'||u==' ';}
int xmlc_base_char(int u) {return isa(u,BASE_CHAR);}
int xmlc_ideographic(int u) {return isa(u,IDEOGRAPHIC);}
int xmlc_combining_char(int u) {return isa(u,COMBINING_CHAR);}
int xmlc_digit(int u) {return isa(u,DIGIT);}
int xmlc_extender(int u) {return isa(u,EXTENDER);}

--- NEW FILE: rnl.c ---
/* $Id: rnl.c,v 1.1 2009/08/03 05:32:47 mike Exp $ */

#include <stdarg.h>
#include "erbit.h"
#include "rn.h"
#include "rnc.h"
#include "rnd.h"
#include "rnl.h"

void rnl_default_verror_handler(int erno,va_list ap) {
  if(erno&ERBIT_RNC) {
    rnc_default_verror_handler(erno&~ERBIT_RNC,ap);
  } else if(erno&ERBIT_RND) {
    rnd_default_verror_handler(erno&~ERBIT_RND,ap);
  }
}
void (*rnl_verror_handler)(int er_no,va_list ap)=&rnl_default_verror_handler;

static void verror_handler_rnc(int erno,va_list ap) {rnl_verror_handler(erno|ERBIT_RNC,ap);}
static void verror_handler_rnd(int erno,va_list ap) {rnl_verror_handler(erno|ERBIT_RND,ap);}

static int initialized=0;
void rnl_init(void) {
  if(!initialized) { initialized=1;
    rn_init();
    rnc_init(); rnc_verror_handler=&verror_handler_rnc;
    rnd_init(); rnd_verror_handler=&verror_handler_rnd;
  }
}

void rnl_clear(void) {}

static int load(struct rnc_source *sp) {
  int start=-1;
  if(!rnc_errors(sp)) start=rnc_parse(sp); rnc_close(sp);
  if(!rnc_errors(sp)&&(start=rnd_fixup(start))) {
    start=rn_compress_last(start);
  } else start=0;
  return start;
}

int rnl_fn(char *fn) {
  struct rnc_source src;
  rnc_open(&src,fn); return load(&src);
}

int rnl_fd(char *fn,int fd) {
  struct rnc_source src;
  rnc_bind(&src,fn,fd); return load(&src);
}

int rnl_s(char *fn,char *s,int len) {
  struct rnc_source src;
  rnc_stropen(&src,fn,s,len); return load(&src);
}

--- NEW FILE: rnc.c ---
/* $Id: rnc.c,v 1.1 2009/08/03 05:32:47 mike Exp $ */

#include <fcntl.h> /* open, close */
#include <sys/types.h>
#include UNISTD_H /* open,read,close */
#include <string.h> /* memcpy,strlen,strcpy,strcat */
#include <errno.h> /*errno*/
#include <assert.h> /*assert*/

#include "u.h"
#include "xmlc.h"
#include "m.h"
#include "s.h" /* s_clone */
#include "rn.h"
#include "sc.h"
#include "er.h"
#include "rnc.h"

#define NKWD 19
[...1156 lines suppressed...]

  rn_new_schema();

  sc_open(&nss); add_well_known_nss(0);
  open_scope(sp);

  start=topLevel(sp); if(start!=-1) sc_add(&defs,0,start,0);

  if((i=sc_find(&defs,0))) {
    start=defs.tab[i][1];
  } else {
    error(1,sp,RNC_ER_NOSTART,sp->fn,CUR(sp).line,CUR(sp).col);
    start=0;
  }

  close_scope(sp);
  sc_close(&nss);

  return start;
}

--- NEW FILE: src.txt ---
# $Id: src.txt,v 1.1 2009/08/03 05:32:48 mike Exp $

# source files

xcl.c -- Expat-based command-line interface
arx.c -- document and type association utility
ll.h -- lengths of arrays
erbit.h -- error classes
er.c er.h -- low-level error output functions
rnv.c rnv.h -- higher-level validator logic
rn.c rn.h -- RNG patterns
rnc.c rnc.h -- RNC loader
rnd.c rnd.h -- second pass for RNC loader, restrictions and traits
rnx.c rnx.h -- auxiliary functions for rn*
drv.c drv.h -- derivative of RNG patterns, validation core
xsd.c xsd.h -- xml schema datatypes
xsd_tm.c xsd_tm.h -- dateTime datatype implementation
dxl.c dxl.h -- executable datatypes
dsl.c dsl.h -- scheme datatypes
sc.c sc.h -- scope tables for rnc
ht.c ht.h -- hash table  
s.c s.h  -- common string operations
m.c m.h  -- common memory operations
xmlc.c xmlc.h -- xml character classifiers
u.c u.h  -- utf-8 
rx.c rx.h -- regular expressions engine
rx_cls_u.c rx_cls_ranges.c -- auto-generated tables; included by rx.c

# hierarchy

... ht,s,m,xmlc,u,er -- used in many places

xcl,arx,rvp
. dsl
. dxl
. rnv
... erbit.h
... ll
... rn
.... ll
... rnc
.... sc
..... ll
... rnd
... rnx
.... ll
... drv
.... ll
.... xsd
..... erbit.h
..... xsd_tm
..... rx
...... rx_cls_u
...... rx_cls_ranges
...... ll

--- NEW FILE: rnd.h ---
/* $Id: rnd.h,v 1.1 2009/08/03 05:32:47 mike Exp $ */

#include <stdarg.h>

#ifndef RND_H
#define RND_H 1

#define RND_ER_LOOPST 0
#define RND_ER_LOOPEL 1
#define RND_ER_CTYPE 2
#define RND_ER_BADSTART 3
#define RND_ER_BADMORE 4
#define RND_ER_BADEXPT 5
#define RND_ER_BADLIST 6
#define RND_ER_BADATTR 7

extern void (*rnd_verror_handler)(int er_no,va_list ap);

extern void rnd_default_verror_handler(int erno,va_list ap);

extern void rnd_init(void);
extern void rnd_clear(void);

extern int rnd_fixup(int start);

#endif

--- NEW FILE: rnl.h ---
/* $Id: rnl.h,v 1.1 2009/08/03 05:32:47 mike Exp $ */

#ifndef RNL_H
#define RNL_H 1

extern void (*rnl_verror_handler)(int er_no,va_list ap);
extern void rnl_default_verror_handler(int erno,va_list ap);

extern void rnl_init(void);
extern void rnl_clear(void);

extern int rnl_fn(char *fn);
extern int rnl_fd(char *fn,int fd);
extern int rnl_s(char *fn,char *s,int len);

#endif

--- NEW FILE: m.h ---
/* $Id: m.h,v 1.1 2009/08/03 05:32:46 mike Exp $ */

#ifndef M_H
#define M_H 1

extern void m_free(void *p);
extern void *m_alloc(int length,int size);
extern void *m_stretch(void *p,int newlen,int oldlen,int size);

#endif

--- NEW FILE: rnd.c ---
/* $Id: rnd.c,v 1.1 2009/08/03 05:32:47 mike Exp $ */

#include <stdlib.h>
#include <assert.h>
#include "m.h"
#include "rn.h"
#include "rnx.h"
#include "ll.h"
#include "er.h"
#include "rnd.h"

#define LEN_F RND_LEN_F

static int len_f,n_f;
static int *flat;
static int errors;

#define err(msg) (*er_vprintf)("error: "msg"\n",ap)
void rnd_default_verror_handler(int er_no,va_list ap) {
  switch(er_no) {
  case RND_ER_LOOPST: err("loop in start pattern"); break;
  case RND_ER_LOOPEL: err("loop in pattern for element '%s'"); break;
  case RND_ER_CTYPE: err("content of element '%s' does not have a content-type"); break;
  case RND_ER_BADSTART: err("bad path in start pattern"); break;
  case RND_ER_BADMORE: err("bad path before '*' or '+' in element '%s'"); break;
  case RND_ER_BADEXPT: err("bad path after '-' in element '%s'"); break;
  case RND_ER_BADLIST: err("bad path after 'list' in element '%s'"); break;
  case RND_ER_BADATTR: err("bad path in attribute '%s' of element '%s'"); break;
  default: assert(0);
  }
}

void (*rnd_verror_handler)(int er_no,va_list ap)=&rnd_default_verror_handler;

static int initialized=0;
void rnd_init(void) {
  if(!initialized) {
    rn_init();
    initialized=1;
  }
}

void rnd_clear(void) {}

static void error(int er_no,...) {
  va_list ap; va_start(ap,er_no); (*rnd_verror_handler)(er_no,ap); va_end(ap);
  ++errors;
}

static int de(int p) {
  int p0=p,p1;
  RN_P_CHK(p,RN_P_REF);
  for(;;) {
    rn_Ref(p,p1);
    if(!RN_P_IS(p1,RN_P_REF)||p1==p0) break;
    p=p1;
  }
  return p1;
}

static void flatten(int p) { if(!rn_marked(p)) {flat[n_f++]=p; rn_mark(p);}}

static void deref(int start) {
  int p,p1,p2,nc,i,changed;

  flat=(int*)m_alloc(len_f=LEN_F,sizeof(int)); n_f=0;
  if(RN_P_IS(start,RN_P_REF)) start=de(start);
  flatten(start);

  i=0;
  do {
    p=flat[i++];
    switch(RN_P_TYP(p)) {
    case RN_P_NOT_ALLOWED: case RN_P_EMPTY: case RN_P_TEXT: case RN_P_DATA: case RN_P_VALUE:
      break;

    case RN_P_CHOICE: rn_Choice(p,p1,p2); goto BINARY;
    case RN_P_INTERLEAVE: rn_Interleave(p,p1,p2); goto BINARY;
    case RN_P_GROUP: rn_Group(p,p1,p2); goto BINARY;
    case RN_P_DATA_EXCEPT: rn_DataExcept(p,p1,p2); goto BINARY;
    BINARY:
      changed=0;
      if(RN_P_IS(p1,RN_P_REF)) {p1=de(p1); changed=1;}
      if(RN_P_IS(p2,RN_P_REF)) {p2=de(p2); changed=1;}
      if(changed) {rn_del_p(p); rn_pattern[p+1]=p1; rn_pattern[p+2]=p2; rn_add_p(p);}
      if(n_f+2>len_f) flat=(int*)m_stretch(flat,len_f=2*(n_f+2),n_f,sizeof(int));
      flatten(p1); flatten(p2);
      break;

    case RN_P_ONE_OR_MORE: rn_OneOrMore(p,p1); goto UNARY;
    case RN_P_LIST: rn_List(p,p1); goto UNARY;
    case RN_P_ATTRIBUTE: rn_Attribute(p,nc,p1); goto UNARY;
    case RN_P_ELEMENT: rn_Element(p,nc,p1); goto UNARY;
    UNARY:
      changed=0;
      if(RN_P_IS(p1,RN_P_REF)) {p1=de(p1); changed=1;}
      if(changed) {rn_del_p(p); rn_pattern[p+1]=p1; rn_add_p(p);}
      if(n_f+1>len_f) flat=(int*)m_stretch(flat,len_f=2*(n_f+1),n_f,sizeof(int));
      flatten(p1);
      break;

    case RN_P_REF: /* because of a loop, but will be handled in rnd_loops */
      break;

    default:
      assert(0);
    }
  } while(i!=n_f);
  for(i=0;i!=n_f;++i) rn_unmark(flat[i]);
}

static int loop(int p) {
  int nc,p1,p2,ret=1;
  if(rn_marked(p)) return 1;
  rn_mark(p);
  switch(RN_P_TYP(p)) {
  case RN_P_NOT_ALLOWED: case RN_P_EMPTY: case RN_P_TEXT: case RN_P_DATA: case RN_P_VALUE:
  case RN_P_ELEMENT:
    ret=0; break;

  case RN_P_CHOICE: rn_Choice(p,p1,p2); goto BINARY;
  case RN_P_INTERLEAVE: rn_Interleave(p,p1,p2); goto BINARY;
  case RN_P_GROUP: rn_Group(p,p1,p2); goto BINARY;
  case RN_P_DATA_EXCEPT: rn_DataExcept(p,p1,p2); goto BINARY;
  BINARY:
    ret=loop(p1)||loop(p2); break;

  case RN_P_ONE_OR_MORE: rn_OneOrMore(p,p1); goto UNARY;
  case RN_P_LIST: rn_List(p,p1); goto UNARY;
  case RN_P_ATTRIBUTE:  rn_Attribute(p,nc,p1); goto UNARY;
  UNARY:
    ret=loop(p1); break;

  case RN_P_REF: ret=1; break;

  default: assert(0);
  }
  rn_unmark(p);
  return ret;
}

static void loops(void) {
  int i=0,p=flat[i],nc=-1,p1;
  for(;;) {
    if(loop(p)) {
      if(i==0) error(RND_ER_LOOPST); else {
	char *s=rnx_nc2str(nc);
	error(RND_ER_LOOPEL,s);
	m_free(s);
      }
    }
    for(;;) {++i;
      if(i==n_f) return;
      p=flat[i];
      if(RN_P_IS(p,RN_P_ELEMENT)) {
	rn_Element(p,nc,p1); p=p1;
	break;
      }
    }
  }
}

static void ctype(int p) {
  int p1,p2,nc;
  if(!rn_contentType(p)) {
    switch(RN_P_TYP(p)) {
    case RN_P_NOT_ALLOWED: rn_setContentType(p,RN_P_FLG_CTE,0); break;
    case RN_P_EMPTY: rn_setContentType(p,RN_P_FLG_CTE,0); break;
    case RN_P_TEXT: rn_setContentType(p,RN_P_FLG_CTC,0); break;
    case RN_P_CHOICE: rn_Choice(p,p1,p2); ctype(p1); ctype(p2);
      rn_setContentType(p,rn_contentType(p1),rn_contentType(p2)); break;
    case RN_P_INTERLEAVE: rn_Interleave(p,p1,p2); ctype(p1); ctype(p2);
      if(rn_groupable(p1,p2)) rn_setContentType(p,rn_contentType(p1),rn_contentType(p2)); break;
    case RN_P_GROUP: rn_Group(p,p1,p2); ctype(p1); ctype(p2);
      if(rn_groupable(p1,p2)) rn_setContentType(p,rn_contentType(p1),rn_contentType(p2)); break;
    case RN_P_ONE_OR_MORE: rn_OneOrMore(p,p1); ctype(p1);
      if(rn_groupable(p1,p1)) rn_setContentType(p,rn_contentType(p1),0); break;
    case RN_P_LIST: rn_setContentType(p,RN_P_FLG_CTS,0); break;
    case RN_P_DATA: rn_setContentType(p,RN_P_FLG_CTS,0); break;
    case RN_P_DATA_EXCEPT: rn_DataExcept(p,p1,p2); ctype(p1); ctype(p2);
      if(rn_contentType(p2)) rn_setContentType(p,RN_P_FLG_CTS,0); break;
    case RN_P_VALUE: rn_setContentType(p,RN_P_FLG_CTS,0); break;
    case RN_P_ATTRIBUTE: rn_Attribute(p,nc,p1); ctype(p1);
      if(rn_contentType(p1)) rn_setContentType(p,RN_P_FLG_CTE,0); break;
    case RN_P_ELEMENT: rn_setContentType(p,RN_P_FLG_CTC,0); break;
    default: assert(0);
    }
  }
}

static void ctypes(void) {
  int i,p,p1,nc;
  for(i=0;i!=n_f;++i) {
    p=flat[i];
    if(RN_P_IS(p,RN_P_ELEMENT)) {
      rn_Element(p,nc,p1);
      ctype(p1);
      if(!rn_contentType(p1)) {
	char *s=rnx_nc2str(nc);
	error(RND_ER_CTYPE,s);
	m_free(s);
      }
    }
  }
}

static int bad_start(int p) {
  int p1,p2;
  switch(RN_P_TYP(p)) {
  case RN_P_EMPTY: case RN_P_TEXT:
  case RN_P_INTERLEAVE: case RN_P_GROUP: case RN_P_ONE_OR_MORE:
  case RN_P_LIST: case RN_P_DATA: case RN_P_DATA_EXCEPT: case RN_P_VALUE:
  case RN_P_ATTRIBUTE:
    return 1;
  case RN_P_NOT_ALLOWED:
  case RN_P_ELEMENT:
    return 0;
  case RN_P_CHOICE: rn_Choice(p,p1,p2);
    return bad_start(p1)||bad_start(p2);
  default: assert(0);
  }
  return 1;
}

static int bad_data_except(int p) {
  int p1,p2;
  switch(RN_P_TYP(p)) {
  case RN_P_NOT_ALLOWED:
  case RN_P_VALUE: case RN_P_DATA:
    return 0;

  case RN_P_CHOICE: rn_Choice(p,p1,p2); goto BINARY;
  case RN_P_DATA_EXCEPT: rn_Choice(p,p1,p2); goto BINARY;
  BINARY: return bad_data_except(p1)||bad_data_except(p2);

  case RN_P_EMPTY: case RN_P_TEXT:
  case RN_P_INTERLEAVE: case RN_P_GROUP: case RN_P_ONE_OR_MORE:
  case RN_P_LIST:
  case RN_P_ATTRIBUTE: case RN_P_ELEMENT:
    return 1;
  default: assert(0);
  }
  return 1;
}

static int bad_one_or_more(int p,int in_group) {
  int nc,p1,p2;
  switch(RN_P_TYP(p)) {
  case RN_P_NOT_ALLOWED: case RN_P_EMPTY: case RN_P_TEXT:
  case RN_P_DATA: case RN_P_VALUE:
  case RN_P_ELEMENT:
    return 0;

  case RN_P_CHOICE: rn_Choice(p,p1,p2); goto BINARY;
  case RN_P_INTERLEAVE: rn_Interleave(p,p1,p2); in_group=1; goto BINARY;
  case RN_P_GROUP: rn_Group(p,p1,p2); in_group=1; goto BINARY;
  case RN_P_DATA_EXCEPT: rn_DataExcept(p,p1,p2); goto BINARY;
  BINARY: return  bad_one_or_more(p1,in_group)||bad_one_or_more(p2,in_group);

  case RN_P_ONE_OR_MORE: rn_OneOrMore(p,p1); goto UNARY;
  case RN_P_LIST: rn_List(p,p1); goto UNARY;
  case RN_P_ATTRIBUTE: if(in_group) return 1;
    rn_Attribute(p,nc,p1); goto UNARY;
  UNARY: return  bad_one_or_more(p1,in_group);
  default: assert(0);
  }
  return 1;
}

static int bad_list(int p) {
  int p1,p2;
  switch(RN_P_TYP(p)) {
  case RN_P_NOT_ALLOWED: case RN_P_EMPTY:
  case RN_P_DATA: case RN_P_VALUE:
    return 0;

  case RN_P_CHOICE: rn_Choice(p,p1,p2); goto BINARY;
  case RN_P_GROUP: rn_Group(p,p1,p2); goto BINARY;
  case RN_P_DATA_EXCEPT: rn_DataExcept(p,p1,p2); goto BINARY;
  BINARY: return bad_list(p1)||bad_list(p2);

  case RN_P_ONE_OR_MORE: rn_OneOrMore(p,p1); goto UNARY;
  case RN_P_LIST: rn_List(p,p1); goto UNARY;
  UNARY: return bad_list(p1);

  case RN_P_TEXT:
  case RN_P_INTERLEAVE:
  case RN_P_ATTRIBUTE:
  case RN_P_ELEMENT:
    return 1;
  default: assert(0);
  }
  return 1;
}

static int bad_attribute(int p) {
  int p1,p2;
  switch(RN_P_TYP(p)) {
  case RN_P_NOT_ALLOWED: case RN_P_EMPTY: case RN_P_TEXT:
  case RN_P_DATA: case RN_P_VALUE:
    return 0;

  case RN_P_CHOICE: rn_Choice(p,p1,p2); goto BINARY;
  case RN_P_INTERLEAVE: rn_Interleave(p,p1,p2); goto BINARY;
  case RN_P_GROUP: rn_Group(p,p1,p2); goto BINARY;
  case RN_P_DATA_EXCEPT: rn_DataExcept(p,p1,p2); goto BINARY;
  BINARY: return bad_attribute(p1)||bad_attribute(p2);


  case RN_P_ONE_OR_MORE: rn_OneOrMore(p,p1); goto UNARY;
  case RN_P_LIST: rn_List(p,p1); goto UNARY;
  UNARY: return bad_attribute(p1);

  case RN_P_ATTRIBUTE: case RN_P_ELEMENT:
    return 1;
  default: assert(0);
  }
  return 1;
}

static void path(int p,int nc) {
  int p1,p2,nc1;
  switch(RN_P_TYP(p)) {
  case RN_P_NOT_ALLOWED: case RN_P_EMPTY: case RN_P_TEXT:
  case RN_P_DATA: case RN_P_VALUE:
  case RN_P_ELEMENT:
    break;

  case RN_P_CHOICE: rn_Choice(p,p1,p2); goto BINARY;
  case RN_P_INTERLEAVE: rn_Interleave(p,p1,p2); goto BINARY;
  case RN_P_GROUP: rn_Group(p,p1,p2); goto BINARY;
  case RN_P_DATA_EXCEPT: rn_DataExcept(p,p1,p2);
    if(bad_data_except(p2)) {char *s=rnx_nc2str(nc); error(RND_ER_BADEXPT,s); m_free(s);}
    goto BINARY;
  BINARY: path(p1,nc); path(p2,nc); break;

  case RN_P_ONE_OR_MORE: rn_OneOrMore(p,p1);
    if(bad_one_or_more(p1,0)) {char *s=rnx_nc2str(nc); error(RND_ER_BADMORE,s); m_free(s);}
    goto UNARY;
  case RN_P_LIST: rn_List(p,p1);
    if(bad_list(p1)) {char *s=rnx_nc2str(nc); error(RND_ER_BADLIST,s); m_free(s);}
    goto UNARY;
  case RN_P_ATTRIBUTE: rn_Attribute(p,nc1,p1);
    if(bad_attribute(p1)) {char *s=rnx_nc2str(nc),*s1=rnx_nc2str(nc1); error(RND_ER_BADATTR,s1,s); m_free(s1); m_free(s);}
    goto UNARY;
  UNARY: path(p1,nc); break;

  default: assert(0);
  }
}

static void paths(void) {
  int i,p,p1,nc;
  if(bad_start(flat[0])) error(RND_ER_BADSTART);
  for(i=0;i!=n_f;++i) {
    p=flat[i];
    if(RN_P_IS(p,RN_P_ELEMENT)) {
      rn_Element(p,nc,p1);
      path(p1,nc);
    }
  }
}

static void restrictions(void) {
  loops(); if(errors) return; /* loops can cause endless loops in subsequent calls */
  ctypes();
  paths();
}

static void nullables(void) {
  int i,p,p1,p2,changed;
  do {
    changed=0;
    for(i=0;i!=n_f;++i) {
      p=flat[i];
      if(!rn_nullable(p)) {
	switch(RN_P_TYP(p)) {
	case RN_P_NOT_ALLOWED:
	case RN_P_DATA: case RN_P_DATA_EXCEPT: case RN_P_VALUE: case RN_P_LIST:
	case RN_P_ATTRIBUTE: case RN_P_ELEMENT:
	  break;

	case RN_P_CHOICE: rn_Choice(p,p1,p2); rn_setNullable(p,rn_nullable(p1)||rn_nullable(p2)); break;
	case RN_P_INTERLEAVE: rn_Interleave(p,p1,p2); rn_setNullable(p,rn_nullable(p1)&&rn_nullable(p2)); break;
	case RN_P_GROUP: rn_Group(p,p1,p2);  rn_setNullable(p,rn_nullable(p1)&&rn_nullable(p2)); break;

	case RN_P_ONE_OR_MORE: rn_OneOrMore(p,p1); rn_setNullable(p,rn_nullable(p1)); break;

	default: assert(0);
	}
	changed=changed||rn_nullable(p);
      }
    }
  } while(changed);
}

static void cdatas(void) {
  int i,p,p1,p2,changed;
  do {
    changed=0;
    for(i=0;i!=n_f;++i) {
      p=flat[i];
      if(!rn_cdata(p)) {
	switch(RN_P_TYP(p)) {
	case RN_P_NOT_ALLOWED: case RN_P_EMPTY:
	case RN_P_ATTRIBUTE: case RN_P_ELEMENT:
	  break;

	case RN_P_CHOICE: rn_Choice(p,p1,p2); rn_setCdata(p,rn_cdata(p1)||rn_cdata(p2)); break;
	case RN_P_INTERLEAVE: rn_Interleave(p,p1,p2); rn_setCdata(p,rn_cdata(p1)||rn_cdata(p2)); break;
	case RN_P_GROUP: rn_Group(p,p1,p2);  rn_setCdata(p,rn_cdata(p1)||rn_cdata(p2)); break;

	case RN_P_ONE_OR_MORE: rn_OneOrMore(p,p1); rn_setCdata(p,rn_cdata(p1)); break;

	default: assert(0);
	}
	changed=changed||rn_cdata(p);
      }
    }
  } while(changed);
}

static void traits(void) {
  nullables();
  cdatas();
}

static int release(void) {
  int start=flat[0];
  m_free(flat); flat=NULL;
  return start;
}

int rnd_fixup(int start) {
  errors=0; deref(start);
  if(!errors) {restrictions(); if(!errors) traits();}
  start=release(); return errors?0:start;
}

--- NEW FILE: drv.h ---
/* $Id: drv.h,v 1.1 2009/08/03 05:32:46 mike Exp $ */

#include <stdarg.h>

#ifndef DRV_H
#define DRV_H 1

#define DRV_ER_NODTL 0

extern void (*drv_verror_handler)(int erno,va_list ap);
extern int drv_compact;

extern void drv_default_verror_handler(int erno,va_list ap);

extern void drv_init(void);
extern void drv_clear(void);

/* Expat passes character data unterminated.  Hence functions that can deal with cdata expect the length of the data */
extern void drv_add_dtl(char *suri,int (*equal)(char *typ,char *val,char *s,int n),int (*allows)(char *typ,char *ps,char *s,int n));

extern int drv_start_tag_open(int p,char *suri,char *sname);
extern int drv_start_tag_open_recover(int p,char *suri,char *sname);
extern int drv_attribute_open(int p,char *suri,char *s);
extern int drv_attribute_open_recover(int p,char *suri,char *s);
extern int drv_attribute_close(int p);
extern int drv_attribute_close_recover(int p);
extern int drv_start_tag_close(int p);
extern int drv_start_tag_close_recover(int p);
extern int drv_text(int p,char *s,int n);
extern int drv_text_recover(int p,char *s,int n);
extern int drv_mixed_text(int p);
extern int drv_mixed_text_recover(int p);
extern int drv_end_tag(int p);
extern int drv_end_tag_recover(int p);

#endif

--- NEW FILE: build_vms.com ---
$! Replace these two strings to indicate where your expat install is located.
$	expat_headers = "my_disk:[ref.c_include]"
$	expat_olb = "my_disk:[olb]expat.olb"
$
$	create vms.h
$	deck
#ifndef VMS_H
#define VMS_H
#define UNISTD_H <unistd.h>
#define EXPAT_H "expat.h"
#define RNV_VERSION "1.7.7"
#define ARX_VERSION "1.7.7"
#define RVP_VERSION "1.7.7"
#endif /* VMS_H */
$	eod
$
$	cc := cc/first_include=vms.h/incl='expat_headers'
$	modules = "XCL,RNV,ARY,DRV,DSL,DXL,ER,HT,M,RN,RNC,RND,RNL,RNX," + -
	   	  "RX,RX_CLS_RANGES,RX_CLS_U,S,SC,U,XMLC,XSD,XSD_TM"
$
$	library/create/object rnv.olb
$	count = 0
$loop:
$	module = f$element (count, ",", modules)
$	if module .eqs. ","
$	then
$	    goto end_loop
$	endif
$	cc 'module'
$	library/object/insert rnv.olb 'module'
$	delete/nolog 'module'.obj;*
$	count = count + 1
$	goto loop
$end_loop:
$	link/exe=rnv.exe rnv/lib/include=xcl,'expat_olb'/lib
$
$! Now for the supporting cast...
$	cc test
$	link test,rnv/lib
$	cc arx
$	link arx,rnv/lib,'expat_olb'/lib
$	cc rvp
$	link rvp,rnv/lib
$	purge/nolog *.exe
$	purge/nolog *.olb
$	delete/nolog vms.h;*
$	exit

--- NEW FILE: drv.c ---
/* $Id: drv.c,v 1.1 2009/08/03 05:32:46 mike Exp $ */

#include "xmlc.h" /*xmlc_white_space*/
#include "m.h"
#include "s.h" /*s_tokcmpn*/
#include "ht.h"
#include "rn.h"
#include "xsd.h"
#include "ll.h"
#include "erbit.h"
#include "er.h"
#include "drv.h"

struct dtl {
  int uri;
  int (*equal)(char *typ,char *val,char *s,int n);
  int (*allows)(char *typ,char *ps,char *s,int n);
};

#define LEN_DTL DRV_LEN_DTL
#define LEN_M DRV_LEN_M
#define PRIME_M DRV_PRIME_M
#define LIM_M DRV_LIM_M

#define M_SIZE 5

#define M_STO 0
#define M_STC 1
#define M_ATT 2
#define M_TXT 3
#define M_END 4
#define M_SET(p) memo[i_m][M_SIZE-1]=p
#define M_RET(m) memo[m][M_SIZE-1]

int drv_compact=0;

static struct dtl *dtl;
static int len_dtl,n_dtl;
static int (*memo)[M_SIZE];
static int i_m,len_m;
static struct hashtable ht_m;

#define err(msg) (*er_vprintf)(msg"\n",ap);
void drv_default_verror_handler(int erno,va_list ap) {
  if(erno&ERBIT_XSD) {
    xsd_default_verror_handler(erno&~ERBIT_XSD,ap);
  } else {
    switch(erno) {
    case DRV_ER_NODTL: err("no datatype library for URI '%s'"); break;
    default: assert(0);
    }
  }
}

void (*drv_verror_handler)(int erno,va_list ap)=&drv_default_verror_handler;

static void error_handler(int erno,...) {
  va_list ap; va_start(ap,erno); (*drv_verror_handler)(erno,ap); va_end(ap);
}

static void verror_handler_xsd(int erno,va_list ap) {(*drv_verror_handler)(erno|ERBIT_XSD,ap);}

static void new_memo(int typ) {
  if(drv_compact) ht_deli(&ht_m,i_m);
  memo[i_m][0]=typ;
}

static int equal_m(int m1,int m2) {
  int *me1=memo[m1],*me2=memo[m2];
  return (me1[0]==me2[0])&&(me1[1]==me2[1])&&(me1[2]==me2[2])&&(me1[3]==me2[3]);
}
static int hash_m(int m) {
  int *me=memo[m];
  return ((me[0]&0x7)|((me[1]^me[2]^me[3])<<3))*PRIME_M;
}

static int newStartTagOpen(int p,int uri,int name) {
  int *me=memo[i_m];
  new_memo(M_STO);
  me[1]=p; me[2]=uri; me[3]=name;
  return ht_get(&ht_m,i_m);
}

static int newAttributeOpen(int p,int uri,int name) {
  int *me=memo[i_m];
  new_memo(M_ATT);
  me[1]=p; me[2]=uri; me[3]=name;
  return ht_get(&ht_m,i_m);
}

static int newStartTagClose(int p) {
  int *me=memo[i_m];
  new_memo(M_STC);
  me[1]=p; me[2]=me[3]=0;
  return ht_get(&ht_m,i_m);
}

static int newMixedText(int p) {
  int *me=memo[i_m];
  new_memo(M_TXT);
  me[1]=p; me[2]=me[3]=0;
  return ht_get(&ht_m,i_m);
}

static int newEndTag(int p) {
  int *me=memo[i_m];
  new_memo(M_END);
  me[1]=p; me[2]=me[3]=0;
  return ht_get(&ht_m,i_m);
}

static void accept_m(void) {
  if(ht_get(&ht_m,i_m)!=-1) {
    if(drv_compact) ht_del(&ht_m,i_m); else return;
  }
  ht_put(&ht_m,i_m++);
  if(drv_compact&&i_m==LIM_M) i_m=0;
  if(i_m==len_m) memo=(int(*)[M_SIZE])m_stretch(memo,len_m=2*i_m,i_m,sizeof(int[M_SIZE]));
}

static int fallback_equal(char *typ,char *val,char *s,int n) {return 1;}
static int fallback_allows(char *typ,char *ps,char *s,int n) {return 1;}

static int builtin_equal(char *typ,char *val,char *s,int n) {
  int dt=rn_newDatatype(0,typ-rn_string);
  if(dt==rn_dt_string) return s_cmpn(val,s,n)==0;
  else if(dt==rn_dt_token) return s_tokcmpn(val,s,n)==0;
  else assert(0);
  return 0;
}

static int builtin_allows(char *typ,char *ps,char *s,int n) {return 1;}

static void windup(void);

static int initialized=0;
void drv_init(void) {
  if(!initialized) { initialized=1;
    rn_init();
    xsd_init(); xsd_verror_handler=&verror_handler_xsd;
    memo=(int (*)[M_SIZE])m_alloc(len_m=LEN_M,sizeof(int[M_SIZE]));
    dtl=(struct dtl*)m_alloc(len_dtl=LEN_DTL,sizeof(struct dtl));
    ht_init(&ht_m,LEN_M,&hash_m,&equal_m);
    windup();
  }
}

static void windup(void) {
  i_m=0; n_dtl=0;
  drv_add_dtl(rn_string+0,&fallback_equal,&fallback_allows); /* guard at 0 */
  drv_add_dtl(rn_string+0,&builtin_equal,&builtin_allows);
  drv_add_dtl(rn_string+rn_xsd_uri,&xsd_equal,&xsd_allows);
}

void drv_clear(void) {
  ht_clear(&ht_m);
  windup();
}

void drv_add_dtl(char *suri,int (*equal)(char *typ,char *val,char *s,int n),int (*allows)(char *typ,char *ps,char *s,int n)) {
  if(n_dtl==len_dtl) dtl=(struct dtl *)m_stretch(dtl,len_dtl=n_dtl*2,n_dtl,sizeof(struct dtl));
  dtl[n_dtl].uri=rn_newString(suri);
  dtl[n_dtl].equal=equal;
  dtl[n_dtl].allows=allows;
  ++n_dtl;
}

static struct dtl *getdtl(int uri) {
  int i;
  dtl[0].uri=uri; i=n_dtl;
  while(dtl[--i].uri!=uri);
  if(i==0) error_handler(DRV_ER_NODTL,rn_string+uri);
  return dtl+i;
}

static int ncof(int nc,int uri,int name) {
  int uri2,name2,nc1,nc2;
  switch(RN_NC_TYP(nc)) {
  case RN_NC_QNAME: rn_QName(nc,uri2,name2); return uri2==uri&&name2==name;
  case RN_NC_NSNAME: rn_NsName(nc,uri2); return uri2==uri;
  case RN_NC_ANY_NAME: return 1;
  case RN_NC_EXCEPT: rn_NameClassExcept(nc,nc1,nc2); return ncof(nc1,uri,name)&&!ncof(nc2,uri,name);
  case RN_NC_CHOICE: rn_NameClassChoice(nc,nc1,nc2); return ncof(nc1,uri,name)||ncof(nc2,uri,name);
  default: assert(0);
  }
  return 0;
}

static int apply_after(int (*f)(int q1,int q2),int p1,int p0) {
  int p11,p12;
  switch(RN_P_TYP(p1)) {
  case RN_P_NOT_ALLOWED: case RN_P_EMPTY: case RN_P_TEXT:
  case RN_P_INTERLEAVE: case RN_P_GROUP: case RN_P_ONE_OR_MORE:
  case RN_P_LIST: case RN_P_DATA: case RN_P_DATA_EXCEPT: case RN_P_VALUE:
  case RN_P_ATTRIBUTE: case RN_P_ELEMENT:
    return rn_notAllowed;
  case RN_P_CHOICE: rn_Choice(p1,p11,p12); return rn_choice(apply_after(f,p11,p0),apply_after(f,p12,p0));
  case RN_P_AFTER: rn_After(p1,p11,p12); return rn_after(p11,(*f)(p12,p0));
  default: assert(0);
  }
  return 0;
}

static int start_tag_open(int p,int uri,int name,int recover) {
  int nc,p1,p2,m,ret=0;
  if(!recover) {
    m=newStartTagOpen(p,uri,name);
    if(m!=-1) return M_RET(m);
  }
  switch(RN_P_TYP(p)) {
  case RN_P_NOT_ALLOWED: case RN_P_EMPTY: case RN_P_TEXT:
  case RN_P_LIST: case RN_P_DATA: case RN_P_DATA_EXCEPT: case RN_P_VALUE:
  case RN_P_ATTRIBUTE:
    ret=rn_notAllowed;
    break;
  case RN_P_CHOICE: rn_Choice(p,p1,p2);
    ret=rn_choice(start_tag_open(p1,uri,name,recover),start_tag_open(p2,uri,name,recover));
    break;
  case RN_P_ELEMENT: rn_Element(p,nc,p1);
    ret=ncof(nc,uri,name)?rn_after(p1,rn_empty):rn_notAllowed;
    break;
  case RN_P_INTERLEAVE: rn_Interleave(p,p1,p2);
    ret=rn_choice(
      apply_after(&rn_ileave,start_tag_open(p1,uri,name,recover),p2),
      apply_after(&rn_ileave,start_tag_open(p2,uri,name,recover),p1));
    break;
  case RN_P_GROUP: rn_Group(p,p1,p2);
    { int p11=apply_after(&rn_group,start_tag_open(p1,uri,name,recover),p2);
      ret=(rn_nullable(p1)||recover)?rn_choice(p11,start_tag_open(p2,uri,name,recover)):p11;
    } break;
  case RN_P_ONE_OR_MORE: rn_OneOrMore(p,p1);
    ret=apply_after(&rn_group,start_tag_open(p1,uri,name,recover),rn_choice(p,rn_empty));
    break;
  case RN_P_AFTER: rn_After(p,p1,p2);
    ret=apply_after(&rn_after,start_tag_open(p1,uri,name,recover),p2);
    break;
  default: assert(0);
  }
  if(!recover) {
    newStartTagOpen(p,uri,name); M_SET(ret);
    accept_m();
  }
  return ret;
}

int drv_start_tag_open(int p,char *suri,char *sname) {return start_tag_open(p,rn_newString(suri),rn_newString(sname),0);}
int drv_start_tag_open_recover(int p,char *suri,char *sname) {return start_tag_open(p,rn_newString(suri),rn_newString(sname),1);}

static int puorg_rn(int p2,int p1) {return rn_group(p1,p2);}

static int attribute_open(int p,int uri,int name) {
  int nc,p1,p2,m,ret=0;
  m=newAttributeOpen(p,uri,name);
  if(m!=-1) return M_RET(m);
  switch(RN_P_TYP(p)) {
  case RN_P_NOT_ALLOWED: case RN_P_EMPTY: case RN_P_TEXT:
  case RN_P_LIST: case RN_P_DATA: case RN_P_DATA_EXCEPT: case RN_P_VALUE:
  case RN_P_ELEMENT:
    ret=rn_notAllowed;
    break;
  case RN_P_CHOICE: rn_Choice(p,p1,p2);
    ret=rn_choice(attribute_open(p1,uri,name),attribute_open(p2,uri,name));
    break;
  case RN_P_ATTRIBUTE: rn_Attribute(p,nc,p1);
    ret=ncof(nc,uri,name)?rn_after(p1,rn_empty):rn_notAllowed;
    break;
  case RN_P_INTERLEAVE: rn_Interleave(p,p1,p2);
    ret=rn_choice(
      apply_after(&rn_ileave,attribute_open(p1,uri,name),p2),
      apply_after(&rn_ileave,attribute_open(p2,uri,name),p1));
    break;
  case RN_P_GROUP: rn_Group(p,p1,p2);
    ret=rn_choice(
      apply_after(&rn_group,attribute_open(p1,uri,name),p2),
      apply_after(&puorg_rn,attribute_open(p2,uri,name),p1));
    break;
  case RN_P_ONE_OR_MORE: rn_OneOrMore(p,p1);
    ret=apply_after(&rn_group,attribute_open(p1,uri,name),rn_choice(p,rn_empty));
    break;
  case RN_P_AFTER: rn_After(p,p1,p2);
    ret=apply_after(&rn_after,attribute_open(p1,uri,name),p2);
    break;
  default: assert(0);
  }
  newAttributeOpen(p,uri,name); M_SET(ret);
  accept_m();
  return ret;
}

int drv_attribute_open(int p,char *suri,char *sname) {return attribute_open(p,rn_newString(suri),rn_newString(sname));}
int drv_attribute_open_recover(int p,char *suri,char *sname) {return p;}

extern int drv_attribute_close(int p) {return drv_end_tag(p);}
extern int drv_attribute_close_recover(int p) {return drv_end_tag_recover(p);}

static int start_tag_close(int p,int recover) {
  int p1,p2,ret=0,m;
  if(!recover) {
    m=newStartTagClose(p);
    if(m!=-1) return M_RET(m);
  }
  switch(RN_P_TYP(p)) {
  case RN_P_NOT_ALLOWED: case RN_P_EMPTY: case RN_P_TEXT:
  case RN_P_LIST: case RN_P_DATA: case RN_P_DATA_EXCEPT: case RN_P_VALUE:
  case RN_P_ELEMENT:
    ret=p;
    break;
  case RN_P_CHOICE: rn_Choice(p,p1,p2);
    ret=rn_choice(start_tag_close(p1,recover),start_tag_close(p2,recover));
    break;
  case RN_P_INTERLEAVE: rn_Interleave(p,p1,p2);
    ret=rn_ileave(start_tag_close(p1,recover),start_tag_close(p2,recover));
    break;
  case RN_P_GROUP: rn_Group(p,p1,p2);
    ret=rn_group(start_tag_close(p1,recover),start_tag_close(p2,recover));
    break;
  case RN_P_ONE_OR_MORE: rn_OneOrMore(p,p1);
    ret=rn_one_or_more(start_tag_close(p1,recover));
    break;
  case RN_P_ATTRIBUTE:
    ret=recover?rn_empty:rn_notAllowed;
    break;
  case RN_P_AFTER: rn_After(p,p1,p2);
    ret=rn_after(start_tag_close(p1,recover),p2);
    break;
  default: assert(0);
  }
  if(!recover) {
    newStartTagClose(p); M_SET(ret);
    accept_m();
  }
  return ret;
}
int drv_start_tag_close(int p) {return start_tag_close(p,0);}
int drv_start_tag_close_recover(int p) {return start_tag_close(p,1);}

static int text(int p,char *s,int n);
static int list(int p,char *s,int n) {
  char *end=s+n,*sp;
  for(;;) {
    while(s!=end&&xmlc_white_space(*s)) ++s;
    sp=s;
    while(sp!=end&&!xmlc_white_space(*sp)) ++sp;
    if(s==end) break;
    p=text(p,s,sp-s);
    s=sp;
  }
  return p;
}

static int text(int p,char *s,int n) { /* matches text, including whitespace */
  int p1,p2,dt,ps,lib,typ,val,ret=0;
  switch(RN_P_TYP(p)) {
  case RN_P_NOT_ALLOWED: case RN_P_EMPTY:
  case RN_P_ATTRIBUTE: case RN_P_ELEMENT:
    ret=rn_notAllowed;
    break;
  case RN_P_TEXT:
    ret=p;
    break;
  case RN_P_AFTER: rn_After(p,p1,p2);
    ret=rn_after(text(p1,s,n),p2);
    break;
  case RN_P_CHOICE: rn_Choice(p,p1,p2);
    ret=rn_choice(text(p1,s,n),text(p2,s,n));
    break;
  case RN_P_INTERLEAVE: rn_Interleave(p,p1,p2);
    ret=rn_choice(rn_ileave(text(p1,s,n),p2),rn_ileave(p1,text(p2,s,n)));
    break;
  case RN_P_GROUP: rn_Group(p,p1,p2);
    { int p11=rn_group(text(p1,s,n),p2);
      ret=rn_nullable(p1)?rn_choice(p11,text(p2,s,n)):p11;
    } break;
  case RN_P_ONE_OR_MORE: rn_OneOrMore(p,p1);
    ret=rn_group(text(p1,s,n),rn_choice(p,rn_empty));
    break;
  case RN_P_LIST: rn_List(p,p1);
    ret=rn_nullable(list(p1,s,n))?rn_empty:rn_notAllowed;
    break;
  case RN_P_DATA: rn_Data(p,dt,ps); rn_Datatype(dt,lib,typ);
    ret=getdtl(lib)->allows(rn_string+typ,rn_string+ps,s,n)?rn_empty:rn_notAllowed;
    break;
  case RN_P_DATA_EXCEPT: rn_DataExcept(p,p1,p2);
    ret=text(p1,s,n)==rn_empty&&!rn_nullable(text(p2,s,n))?rn_empty:rn_notAllowed;
    break;
  case RN_P_VALUE: rn_Value(p,dt,val); rn_Datatype(dt,lib,typ);
    ret=getdtl(lib)->equal(rn_string+typ,rn_string+val,s,n)?rn_empty:rn_notAllowed;
    break;
  default: assert(0);
  }
  return ret;
}

static int textws(int p,char *s,int n) {
  int p1=text(p,s,n),ws=1;
  char *end=s+n;
  while(s!=end) {if(!xmlc_white_space(*s)) {ws=0; break;} ++s;}
  return ws?rn_choice(p,p1):p1;
}
int drv_text(int p,char *s,int n) {return textws(p,s,n);}
int drv_text_recover(int p,char *s,int n) {return p;}

static int mixed_text(int p) { /* matches text in mixed context */
  int p1,p2,ret=0,m;
  m=newMixedText(p);
  if(m!=-1) return M_RET(m);
  switch(RN_P_TYP(p)) {
  case RN_P_NOT_ALLOWED: case RN_P_EMPTY:
  case RN_P_ATTRIBUTE: case RN_P_ELEMENT:
  case RN_P_LIST: case RN_P_DATA: case RN_P_DATA_EXCEPT: case RN_P_VALUE:
    ret=rn_notAllowed;
    break;
  case RN_P_TEXT:
    ret=p;
    break;
  case RN_P_AFTER: rn_After(p,p1,p2);
    ret=rn_after(mixed_text(p1),p2);
    break;
  case RN_P_CHOICE: rn_Choice(p,p1,p2);
    ret=rn_choice(mixed_text(p1),mixed_text(p2));
    break;
  case RN_P_INTERLEAVE: rn_Interleave(p,p1,p2);
    ret=rn_choice(rn_ileave(mixed_text(p1),p2),rn_ileave(p1,mixed_text(p2)));
    break;
  case RN_P_GROUP: rn_Group(p,p1,p2);
    { int p11=rn_group(mixed_text(p1),p2);
      ret=rn_nullable(p1)?rn_choice(p11,mixed_text(p2)):p11;
    } break;
  case RN_P_ONE_OR_MORE: rn_OneOrMore(p,p1);
    ret=rn_group(mixed_text(p1),rn_choice(p,rn_empty));
    break;
  default: assert(0);
  }
  newMixedText(p); M_SET(ret);
  accept_m();
  return ret;
}
int drv_mixed_text(int p) {return mixed_text(p);}
int drv_mixed_text_recover(int p) {return p;}

static int end_tag(int p,int recover) {
  int p1,p2,ret=0,m;
  if(!recover) {
    m=newEndTag(p);
    if(m!=-1) return M_RET(m);
  }
  switch(RN_P_TYP(p)) {
  case RN_P_NOT_ALLOWED: case RN_P_EMPTY: case RN_P_TEXT:
  case RN_P_INTERLEAVE: case RN_P_GROUP: case RN_P_ONE_OR_MORE:
  case RN_P_LIST: case RN_P_DATA: case RN_P_DATA_EXCEPT: case RN_P_VALUE:
  case RN_P_ATTRIBUTE: case RN_P_ELEMENT:
    ret=rn_notAllowed;
    break;
  case RN_P_CHOICE: rn_Choice(p,p1,p2);
    ret=rn_choice(end_tag(p1,recover),end_tag(p2,recover));
    break;
  case RN_P_AFTER: rn_After(p,p1,p2);
    ret=(rn_nullable(p1)||recover)?p2:rn_notAllowed;
    break;
  default: assert(0);
  }
  if(!recover) {
    newEndTag(p); M_SET(ret);
    accept_m();
  }
  return ret;
}
int drv_end_tag(int p) {return end_tag(p,0);}
int drv_end_tag_recover(int p) {return end_tag(p,1);}

--- NEW FILE: dsl.h ---
/* $Id: dsl.h,v 1.1 2009/08/03 05:32:46 mike Exp $ */

#ifndef DSL_H
#define DSL_H 1

#ifndef DSL_SCM
#define DSL_SCM 0
#endif

#define DSL_URL "http://davidashen.net/relaxng/scheme-datatypes"

extern void dsl_ld(char *dl);

extern int dsl_allows(char *typ,char *ps,char *s,int n);
extern int dsl_equal(char *typ,char *val,char *s,int n);

#endif

--- NEW FILE: m.c ---
/* $Id: m.c,v 1.1 2009/08/03 05:32:46 mike Exp $ */

#include <stdlib.h>
#include <string.h>
#include "er.h"
#include "m.h"

#ifndef M_STATIC
#define M_STATIC 0
#endif

#if M_STATIC

#ifndef M_FILL
#define M_FILL '\0'
#endif

static char memory[M_STATIC];
static char *mp=memory,*pmp=memory;

void m_free(void *p) {
  if(p==pmp) {
    mp=pmp; pmp=(char*)-1;
  }
}

void *m_alloc(int length,int size) {
  char *p=mp, *q=mp; int n=length*size;
  pmp=mp; mp+=(n+sizeof(int)-1)/sizeof(int)*sizeof(int);
  if(mp>=memory+M_STATIC) {
    (*er_printf)("failed to allocate %i bytes of memory\n",length*size);
    exit(1);
  }
  if(M_FILL!=-1) while(q!=mp) *(q++)=M_FILL;
  return (char*)p;
}

#else

void m_free(void *p) {
  free(p);
}

void *m_alloc(int length,int size) {
  void *p=malloc(length*size);
  if(p==NULL) {
    (*er_printf)("failed to allocate %i bytes of memory\n",length*size);
    exit(1);
  }
  return p;
}

#endif

void *m_stretch(void *p,int newlen,int oldlen,int size) {
  void *newp=m_alloc(newlen,size);
  memcpy(newp,p,oldlen*size);
  m_free(p);
  return newp;
}

--- NEW FILE: Makefile.gnu ---

VERSION=1.7.8
CC=cc

# optional features
M_STATIC=0
M_FILL=0
DSL_SCM=0
DXL_EXC=0

EXPAT_H="<expat.h>"
UNISTD_H="<unistd.h>"
SCM_H="<scm/scm.h>"

INC=-I/usr/local/include ${CPPFLAGS}
LBL=-L/usr/local/lib ${LDFLAGS}

DEF=\
-DM_STATIC=${M_STATIC} \
-DM_FILL=${M_FILL} \
-DEXPAT_H=${EXPAT_H} \
-DUNISTD_H=${UNISTD_H} \
-DRNV_VERSION="\"${VERSION}\"" \
-DARX_VERSION="\"${VERSION}\"" \
-DRVP_VERSION="\"${VERSION}\""
WARN=-Wall -Wstrict-prototypes  -Wmissing-prototypes -Wcast-align
OPT=-O -g

CFLAGS=${INC} ${DEF} ${WARN} ${OPT}
LFLAGS=${OPT} ${LBL}

LIBEXPAT=-lexpat
LIB_SCM=-lscm -lm \
`sh -c '[ -f /usr/lib/libdl.a ] && echo -ldl \
      ; [ -f /usr/lib/libsocket.a ] && echo -lsocket \
'` 

LIB=${LIBEXPAT}

ifeq (${DSL_SCM},1)
DEF+=-DDSL_SCM=${DSL_SCM} -DSCM_H=${SCM_H}
LIB+=${LIB_SCM}
endif

ifeq (${DXL_EXC},1)
DEF+=-DDXL_EXC=${DXL_EXC}
endif

LIBRNVA=librnv.a
LIBRNVSO=librnv.so
LIBRNV=${LIBRNVA}

SRC=\
ll.h \
erbit.h \
xcl.c \
arx.c \
rvp.c \
xsdck.c \
test.c \
ary.c ary.h \
rn.c rn.h \
rnc.c rnc.h \
rnd.c rnd.h \
rnl.c rnl.h \
rnv.c rnv.h \
rnx.c rnx.h \
drv.c drv.h \
xsd.c xsd.h \
xsd_tm.c xsd_tm.h \
dxl.c dxl.h \
dsl.c dsl.h \
sc.c sc.h \
ht.c ht.h \
er.c er.h \
u.c u.h \
xmlc.c xmlc.h \
s.c s.h \
m.c m.h \
rx.c rx.h \
rx_cls_u.c \
rx_cls_ranges.c

OBJ=\
rn.o \
rnc.o \
rnd.o \
rnl.o \
rnv.o \
rnx.o \
drv.o \
ary.o \
xsd.o \
xsd_tm.o \
dxl.o \
dsl.o \
sc.o \
u.o \
ht.o \
er.o \
xmlc.o \
s.o \
m.o \
rx.o

.SUFFIXES: .c .o

.c.o:
	${CC} ${CFLAGS} -c -o $@ $<

all: rnv arx rvp xsdck test

rnv: xcl.o ${LIBRNV}
	${CC} ${LFLAGS} -o rnv xcl.o ${LIBRNV} ${LIB}

arx: arx.o ${LIBRNV}
	${CC} ${LFLAGS} -o arx arx.o ${LIBRNV} ${LIB}

rvp: rvp.o ${LIBRNV}
	${CC} ${LFLAGS} -o rvp rvp.o ${LIBRNV} ${LIB}

xsdck: xsdck.o ${LIBRNV}
	${CC} ${LFLAGS} -o xsdck xsdck.o ${LIBRNV} ${LIB}

test: test.o ${LIBRNV}
	${CC} ${LFLAGS} -o test test.o ${LIBRNV} ${LIB}

${LIBRNVA}: ${OBJ}
	ar rc $@ ${OBJ}
	ranlib ${LIBRNVA}

${LIBRNVSO}: ${OBJ}
	gcc -shared -o $@ ${OBJ}

depend: ${SRC}
	makedepend -Y ${DEF} ${SRC}

clean:
	-rm -f *.o tst/c/*.o  *.a *.so rnv arx rnd_test *_test *.core *.gmon *.gprof rnv*.zip rnv.txt rnv.pdf rnv.html rnv.xml

rnd_test: ${LIBRNV} tst/c/rnd_test.c
	${CC} ${LFLAGS} -I. -o rnd_test tst/c/rnd_test.c ${LIBRNV} ${LIB}


--- NEW FILE: Makefile.bcc ---
# $Id: Makefile.bcc,v 1.1 2009/08/03 05:32:45 mike Exp $
#
VERSION=1.7.7
CC=bcc32

EXPAT_H="<expat.h>"
UNISTD_H="<io.h>"

INC=-I. -Ic:\\expat\\source\\lib -Ic:\\borland\\bcc55\\include
LBL=-L. -Lc:\\borland\\bcc55\\lib

DEF=-DEXPAT_H=${EXPAT_H} -DUNISTD_H=${UNISTD_H} -DRNV_VERSION="\"${VERSION}\"" -DARX_VERSION="\"${VERSION}\""
WARN=-w-pia- -w-par- -w-aus- -w-ccc-
OPT=-O2

CFLAGS=${INC} ${DEF} ${WARN} ${OPT}
LFLAGS=${OPT} ${LBL}

LIBEXPAT=libexpats_mtd.lib
LIB=${LIBEXPAT}

SRC=\
ll.h \
erbit.h \
xcl.c \
arx.c \
test.c \
ary.c ary.h \
rn.c rn.h \
rnc.c rnc.h \
rnd.c rnd.h \
rnl.c rnc.h \
rnv.c rnv.h \
rnx.c rnx.h \
drv.c drv.h \
xsd.c xsd.h \
dsl.c dsl.h \
dxl.c dxl.h \
xsd_tm.c xsd_tm.h \
sc.c sc.h \
ht.c ht.h \
er.c er.h \
u.c u.h \
xmlc.c xmlc.h \
s.c s.h \
m.c m.h \
rx.c rx.h \
rx_cls_u.c \
rx_cls_ranges.c

OBJ=\
rn.obj \
rnc.obj \
rnd.obj \
rnl.obj \
rnv.obj \
rnx.obj \
drv.obj \
ary.obj \
xsd.obj \
dsl.obj \
dxl.obj \
xsd_tm.obj \
sc.obj \
ht.obj \
er.obj \
u.obj \
xmlc.obj \
s.obj \
m.obj \
rx.obj

.SUFFIXES: .c .obj

.c.obj:
	${CC} ${CFLAGS} -c -o$@ $<

all: rnv.exe arx.exe test.exe

rnv.exe: xcl.obj ${OBJ}
	${CC} ${LFLAGS} -ernv.exe xcl.obj ${OBJ} ${LIB}

arx.exe: arx.obj ${OBJ}
	${CC} ${LFLAGS} -earx.exe arx.obj ${OBJ} ${LIB}

test.exe: test.obj ${OBJ}
	${CC} ${LFLAGS} -etest.exe test.obj ${OBJ} ${LIB}

clean:
	-rm -f *.obj *.exe
	-del *.obj *.exe
	
xcl.obj: m.h erbit.h rnl.h rnv.h rnx.h er.h ll.h
arx.obj: u.h m.h s.h xmlc.h ht.h erbit.h rnl.h rnv.h rx.h er.h ary.h
ary.obj: rn.h ary.h
rn.obj: m.h s.h ht.h ll.h rn.h
rnc.obj: u.h xmlc.h m.h s.h rn.h sc.h er.h rnc.h
rnd.obj: m.h rn.h rnx.h ll.h er.h rnd.h
rnl.obj: erbit.h rn.h rnc.h rnd.h rnl.h
rnv.obj: m.h xmlc.h erbit.h drv.h er.h rnv.h
rnx.obj: m.h s.h rn.h ll.h rnx.h
drv.obj: xmlc.h m.h s.h ht.h rn.h xsd.h ll.h erbit.h er.h drv.h
xsd.obj: u.h xmlc.h s.h erbit.h rx.h xsd_tm.h er.h xsd.h
xsd_tm.obj: xsd_tm.h
dxl.obj: m.h er.h dxl.h
dsl.obj: dsl.h
sc.obj: m.h ll.h sc.h
ht.obj: m.h ht.h
er.obj: er.h
u.obj: u.h
xmlc.obj: u.h xmlc.h
s.obj: xmlc.h m.h s.h
m.obj: er.h m.h
rx.obj: u.h xmlc.h m.h s.h ht.h ll.h er.h rx.h rx_cls_u.c rx_cls_ranges.c

--- NEW FILE: dsl.c ---
/* $Id: dsl.c,v 1.1 2009/08/03 05:32:46 mike Exp $ */

#include <stdlib.h>
#include "dsl.h"

static char *dsl_scm=NULL;

#if DSL_SCM

#include <string.h>
#include <assert.h>
#include UNISTD_H
#include SCM_H
#include "m.h"
#include "er.h"

/* simple rules better */
static char *implpath(void) {
  char *path=getenv("SCM_INIT_PATH");
  return path&&access(path,R_OK)!=-1?path:(char*)IMPLINIT;
}

static void init_user_scm_dsl(void) {}
static SCM  toplvl(void) {return MAKINUM(scm_ldfile(dsl_scm));}

void dsl_ld(char *dl) {
  assert(dsl_scm==NULL); dsl_scm=dl;
  init_user_scm=&init_user_scm_dsl;
  { char *argv[]={NULL,NULL}; argv[0]=dsl_scm; /*Init.scm wants args*/
    scm_init_from_argv(sizeof(argv)/sizeof(char*)-1,argv,0,0,0);
  }
  if(MAKINUM(0)!=scm_top_level(implpath(),&toplvl)) {
    (*er_printf)("dsl: cannot load %s\n",dsl_scm);
    dsl_scm=NULL;
  }
}

/* these are parsed with shere macro, not used with sprintf */
#define ALLOWS "(dsl-allows? \"%s\" '(%s) \"%s\")"
#define PARAM "(\"%s\".\"%s\")"
#define EQUAL "(dsl-equal? \"%s\" \"%s\" \"%s\")"

static int strnesc(char *d,char *s,int n) {
  char *t=d;
  while(n--!=0) {if(*s=='\\'||*s=='\"') *(t++)='\\'; *(t++)=*(s++);} *t=0;
  return t-d;
}
static int stresc(char *d,char *s) {return strnesc(d,s,strlen(s));}

#define shere(bp,sp) while(!((*(bp++)=*(sp++))=='%'&&(*(bp++)=*(sp++))=='s')); bp-=2;

int dsl_allows(char *typ,char *ps,char *s,int n) {
  char *buf,*sp,*bp, *p;
  int np,lenp;
  SCM ret=BOOL_F;

  if(dsl_scm) {
    p=ps; np=0;
    while(*p) {++np; while(*(p++)); while(*(p++));}
    lenp=p-ps-2*np;
    buf=(char*)m_alloc(
      strlen(ALLOWS)+np*strlen(PARAM)+2*(strlen(typ)+lenp+n)+1,
      sizeof(char));
    bp=buf; sp=ALLOWS;
    shere(bp,sp); bp+=stresc(bp,typ);
    shere(bp,sp); /* parameters */
    p=ps;
    while(np--) {
      char *sp=PARAM;
      shere(bp,sp); bp+=stresc(bp,p); while(*(p++));
      shere(bp,sp); bp+=stresc(bp,p); while(*(p++));
      while(*sp) *(bp++)=*(sp++);
    }
    shere(bp,sp); bp+=strnesc(bp,s,n);
    while((*(bp++)=*(sp++)));
    ret=scm_evstr(buf);
    m_free(buf);
  }
  return ret!=BOOL_F;
}

int dsl_equal(char *typ,char *val,char *s,int n) {
  char *buf,*sp,*bp;
  SCM ret=BOOL_F;

  if(dsl_scm) {
    buf=(char*)m_alloc(
      strlen(EQUAL)+2*(strlen(typ)+strlen(val)+n)+1,
      sizeof(char));
    bp=buf; sp=EQUAL;
    shere(bp,sp); bp+=stresc(bp,typ);
    shere(bp,sp); bp+=stresc(bp,val);
    shere(bp,sp); bp+=strnesc(bp,s,n);
    while((*(bp++)=*(sp++)));
    ret=scm_evstr(buf);
    m_free(buf);
  }
  return ret!=BOOL_F;
}

#else

void dsl_ld(char *dl) {}
int dsl_allows(char *typ,char *ps,char *s,int n) {return 0;}
int dsl_equal(char *typ,char *val,char *s,int n) {return 0;}

#endif

--- NEW FILE: license.txt ---
Copyright (c) 2003, 2004 David Tolpin, Davidashen 
http://davidashen.net/

All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
   notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
   notice, this list of conditions and the following disclaimer in
   the documentation and/or other materials provided with the
   distribution.
3. Neither the name of Davidashen nor the names of its
   contributors may be used to endorse or promote products derived
   from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


--- NEW FILE: arx.c ---
/* $Id: arx.c,v 1.1 2009/08/03 05:32:45 mike Exp $ */
/* Regular Associations for XML

arx grammar:

arx = grammars route*
grammars = "grammars"  "{" type2string+ "}"
type2string =  type "=" literal
type = nmtoken
route = match|nomatch|valid|invalid
match = "=~" regexp "=>" type
nomatch = "!~" regexp "=>" type
valid = "valid" "{" rng "}" "=>" type
invalid = "!valid" "{" rng "}" "=>" type

literal=string in '"', '"' inside quoted by '\'
regexp=string in '/', '/' inside quoted by '\'
rng=relax ng compact syntax

comments start with # and continue till end of line
*/

#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <sys/types.h>
#include UNISTD_H
#include <fcntl.h>
#include <stdarg.h>
#include <errno.h>
#include <assert.h>
#include EXPAT_H
#include "u.h"
#include "m.h"
#include "s.h"
#include "xmlc.h"
#include "ht.h"
#include "erbit.h"
#include "rnl.h"
#include "rnv.h"
#include "rx.h"
#include "er.h"
#include "ary.h"

extern int rn_notAllowed;

/* rules */
#define VALID 1
#define INVAL 2
#define MATCH 3
#define NOMAT 4

#define LEN_2 16
#define LEN_R 64
#define LEN_S 64
#define S_AVG_SIZE 64

#define LEN_V 64

#define LEN_T 1024
#define LIM_T 65536

#define BUFSIZE 1024

static char *xml;
static int len_2,len_r,len_s,i_2,i_r,i_s;
static int (*t2s)[2],(*rules)[3];
static char *string; static struct hashtable ht_s;
static int path2abs;

/* arx parser */
static char *arxfn;
static int arxfd, i_b,len_b, cc, line,col,prevline,rnc, sym,len_v, errors;
static char buf[BUFSIZE];
static char *value;

/* xml validator */
static XML_Parser expat=NULL;
static int current,previous;
static int mixed=0;
static int ok,wf,any;
static char *text; static int len_txt;
static int n_txt;


static int add_s(char *s) {
  int len=strlen(s)+1,j;
  if(i_s+len>len_s) string=(char*)m_stretch(
    string,len_s=2*(i_s+len),i_s,sizeof(char));
  strcpy(string+i_s,s);
  if((j=ht_get(&ht_s,i_s))==-1) {
    ht_put(&ht_s,j=i_s);
    i_s+=len;
  }
  return j;
}

static int hash_s(int i) {return s_hval(string+i);}
static int equal_s(int s1,int s2) {return strcmp(string+s1,string+s2)==0;}

static void silent_verror_handler(int erno,va_list ap) {
  if(erno&ERBIT_DRV) rnv_default_verror_handler(erno,ap); /* low-level diagnostics */
}

static void windup(void);
static int initialized=0;
static void init(void) {
  if(!initialized) {initialized=1;
    rnl_init(); rnv_init();
    rnv_verror_handler=&silent_verror_handler;
    string=(char*)m_alloc(len_v=LEN_S*S_AVG_SIZE,sizeof(char));
    t2s=(int(*)[2])m_alloc(len_2=LEN_2,sizeof(int[2]));
    rules=(int(*)[3])m_alloc(len_r=LEN_R,sizeof(int[3]));
    ht_init(&ht_s,LEN_S,&hash_s,&equal_s);
    value=(char*)m_alloc(len_v=LEN_V,sizeof(char));
    text=(char*)m_alloc(len_txt=LEN_T,sizeof(char));
    windup();
  }
}

static void clear(void) {
  if(len_txt>LIM_T) {m_free(text); text=(char*)m_alloc(len_txt=LEN_T,sizeof(char));}
  ht_clear(&ht_s);
  windup();
}

static void windup(void) {
  text[n_txt=0]='\0';
  i_2=1; i_r=i_s=0;
}

/* parser */
#define SYM_EOF 0
#define SYM_GRMS 1
#define SYM_IDNT 2
#define SYM_LTRL 3
#define SYM_RGXP 4
#define SYM_RENG 5
#define SYM_MTCH 6
#define SYM_NMTC 7
#define SYM_VALD 8
#define SYM_NVAL 9
#define SYM_LCUR 10
#define SYM_RCUR 11
#define SYM_ASGN 12
#define SYM_INVL 13

static char *sym2str(int sym) {
  switch(sym) {
  case SYM_EOF: return "end of file";
  case SYM_GRMS: return "'grammars'";
  case SYM_IDNT: return "identifier";
  case SYM_LTRL: return "literal";
  case SYM_RGXP: return "regular expression";
  case SYM_RENG: return "Relax NG";
  case SYM_MTCH: return "'=~'";
  case SYM_NMTC: return "'!~'";
  case SYM_VALD: return "'valid'";
  case SYM_NVAL: return "'!valid'";
  case SYM_LCUR: return "'{'";
  case SYM_RCUR: return "'}'";
  case SYM_ASGN: return "'='";
  case SYM_INVL: return "invalid character";
  default: assert(0);
  }
  return NULL;
}

#define ARX_ER_IO 0
#define ARX_ER_SYN 1
#define ARX_ER_EXP 2
#define ARX_ER_REX 3
#define ARX_ER_RNG 4
#define ARX_ER_NOQ 5
#define ARX_ER_TYP 6

/* there is nothing in the grammar I need utf-8 processing for */
#define err(msg) (*er_vprintf)(msg"\n",ap)
static void verror_handler(int erno,va_list ap) {
  (*er_printf)("%s:%i:%i: error: ",arxfn,line,col);
  switch(erno) {
  case ARX_ER_IO: err("I/O error: %s"); break;
  case ARX_ER_SYN: err("syntax error"); break;
  case ARX_ER_EXP: err("%s expected, %s found"); break;
  case ARX_ER_REX: err("invalid regular expression"); break;
  case ARX_ER_RNG: err("invalid Relax NG grammar"); break;
  case ARX_ER_NOQ: err("unterminated literal or regular expression"); break;
  case ARX_ER_TYP: err("undeclared type '%s'"); break;
  }
}

static void error(int erno,...) {
  if(line!=prevline) {
    va_list ap; va_start(ap,erno); verror_handler(erno,ap); va_end(ap);
    prevline=line;
  }
  ++errors;
}

static void getcc(void) {
  for(;;) { int cc0=cc;
    if(i_b==len_b) {i_b=0; if((len_b=read(arxfd,buf,BUFSIZE))==-1) error(ARX_ER_IO,strerror(errno));}
    cc=i_b>=len_b?-1:((unsigned char*)buf)[i_b++];
    if(cc==-1) {if(cc0=='\n') break; else cc='\n';}
    if(cc=='\n' && cc0=='\r') continue;
    if(cc0=='\n' || cc0=='\r') {++line; col=0;} else ++col;
    break;
  }
}

static int nmtoken(int cc) {return cc>0x7F||xmlc_base_char(cc)||xmlc_digit(cc)||cc=='_'||cc=='.'||cc=='-'||cc==':';}
static int getid(void) {
  if(nmtoken(cc)) {
    int i=0;
    do {
      value[i++]=cc;
      if(i==len_v) value=(char*)m_stretch(value,len_v=2*i,i,sizeof(char));
      getcc();
    } while(nmtoken(cc));
    value[i]='\0';
    return 1;
  } else return 0;
}

static void getq(void) {
  int cq=cc;
  int i=0;
  for(;;) {
    getcc();
    if(cc==cq) {
      if(i!=0&&value[i-1]=='\\') --i; else {getcc(); break;}
    } else if(cc<' ') {error(ARX_ER_NOQ); break;}
    value[i++]=cc;
    if(i==len_v) value=(char*)m_stretch(value,len_v=2*i,i,sizeof(char));
  }
  value[i]='\0';
}

static void getrng(void) {
  int ircur=-1,i=0;
  int cc0;
  for(;;) {
    cc0=cc; getcc();
    if(cc=='}') ircur=i;
    else if(cc=='>') {if(cc0=='=') {getcc(); break;}} /* use => as terminator */
    else if(cc==-1) {error(ARX_ER_EXP,"=>",sym2str(SYM_EOF)); break;}
    value[i++]=cc;
    if(i==len_v) value=(char*)m_stretch(value,len_v=2*i,i,sizeof(char));
  }
  if(ircur==-1) {error(ARX_ER_EXP,sym2str(SYM_RCUR),sym2str(SYM_EOF)); ircur=0;}
  value[ircur]='\0';
}

static void getsym(void) {
  for(;;) {
    if(0<=cc&&cc<=' ') {getcc(); continue;}
    switch(cc) {
    case -1: sym=SYM_EOF; return;
    case '#': do getcc(); while(cc!='\n'&&cc!='\r'); getcc(); continue;
    case '{':
      if(sym==SYM_VALD||sym==SYM_NVAL) {
	getrng(); sym=SYM_RENG;
      } else {
	getcc(); sym=SYM_LCUR;
      }
      return;
    case '}': getcc(); sym=SYM_RCUR; return;
    case '!': getcc();
      if(cc=='~') {
	getcc(); sym=SYM_NMTC;
      } else {
	if(getid()) {
	  if(strcmp("valid",value)!=0) {error(ARX_ER_EXP,sym2str(SYM_NVAL),value);} sym=SYM_NVAL;
	} else {error(ARX_ER_SYN); sym=SYM_INVL;}
      }
      return;
    case '=': getcc();
      switch(cc) {
      case '~': getcc(); sym=SYM_MTCH; return;
      case '>': getcc(); if(sym!=SYM_RGXP) error(ARX_ER_SYN); continue;
      default: sym=SYM_ASGN; return;
      }
    case '"': getq(); sym=SYM_LTRL; return;
    case '/': getq(); sym=SYM_RGXP; return;
    default:
      if(getid()) {
	sym=strcmp("grammars",value)==0?SYM_GRMS
	 : strcmp("valid",value)==0?SYM_VALD:SYM_IDNT;
      } else {getcc(); error(ARX_ER_SYN); sym=SYM_INVL;}
      return;
    }
  }
}

static int chksym(int x) {
  if(sym!=x) {error(ARX_ER_EXP,sym2str(x),sym2str(sym)); return 0;}
  return 1;
}

static void chk_get(int x) {
  (void)chksym(x); getsym();
}

static int typ2str(void) {
  int i=i_2,typ=add_s(value);
  t2s[0][0]=typ; for(;;) if(t2s[--i][0]==typ) break;
  if(i==0) error(ARX_ER_TYP,value);
  return t2s[i][1];
}

static int arx(char *fn) {
  if((arxfd=open(arxfn=fn,O_RDONLY))==-1) {
    (*er_printf)("error (%s): %s\n",arxfn,strerror(errno));
    return 0;
  } else {
    errors=0;
    len_b=read(arxfd,buf,BUFSIZE); i_b=u_bom(buf,len_b);
    prevline=-1; line=1; col=0; rnc=0;
    cc=' '; getsym();
    chk_get(SYM_GRMS); chk_get(SYM_LCUR);
    do {
      if(i_2==len_2) t2s=(int(*)[2])m_stretch(t2s,len_2=i_2*2,i_2,sizeof(int[2]));
      if(chksym(SYM_IDNT)) t2s[i_2][0]=add_s(value);
      getsym();
      chk_get(SYM_ASGN);
      if(chksym(SYM_LTRL)) {
	if(path2abs) {
	  int len=strlen(arxfn)+strlen(value)+1;
	  if(len>len_v) {value=(char*)m_stretch(value,len,len_v,sizeof(char)); len_v=len;}
	  s_abspath(value,arxfn);
	}
	t2s[i_2][1]=add_s(value);
      }
      getsym();
      ++i_2;
    } while(sym==SYM_IDNT);
    chk_get(SYM_RCUR);
    for(;;) {
      if(i_r==len_r) rules=(int(*)[3])m_stretch(rules,len_r=i_r*2,i_r,sizeof(int[3]));
      switch(sym) {
      case SYM_MTCH: rules[i_r][0]=MATCH; goto REGEXP;
      case SYM_NMTC: rules[i_r][0]=NOMAT; goto REGEXP;
      REGEXP: getsym();
	if(chksym(SYM_RGXP)) {
	  if(!rx_check(value)) error(ARX_ER_REX);
	  rules[i_r][1]=add_s(value);
	}
	getsym();
	if(chksym(SYM_IDNT)) rules[i_r][2]=typ2str();
	goto NEXT;
      case SYM_VALD: rules[i_r][0]=VALID; goto RNG;
      case SYM_NVAL: rules[i_r][0]=INVAL; goto RNG;
      RNG: getsym();
	if(chksym(SYM_RENG)) {
	  char *rncfn=(char*)m_alloc(strlen(arxfn)+strlen("#rnc[]")+12,sizeof(char));
	  sprintf(rncfn,"%s#rnc[%i]",arxfn,rnc++);
	  if(!(rules[i_r][1]=rnl_s(rncfn,value,strlen(value)))) error(ARX_ER_RNG);
	  m_free(rncfn);
	}
	getsym();
	if(chksym(SYM_IDNT)) rules[i_r][2]=typ2str();
	goto NEXT;
      default: goto LAST;
      }
      NEXT: ++i_r; getsym();
    }
    LAST: chk_get(SYM_EOF);
    close(arxfd);
    return !errors;
  }
}

static void flush_text(void) {
  ok=rnv_text(&current,&previous,text,n_txt,mixed)&&ok;
  text[n_txt=0]='\0';
}

static void start_element(void *userData,const char *name,const char **attrs) {
  if(current!=rn_notAllowed) {
    mixed=1;
    flush_text();
    ok=rnv_start_tag(&current,&previous,(char*)name,(char**)attrs)&&ok;
    mixed=0; any=any||ary_isany(current);
  }
}

static void end_element(void *userData,const char *name) {
  if(current!=rn_notAllowed) {
    flush_text();
    ok=rnv_end_tag(&current,&previous,(char*)name)&&ok;
    mixed=1;
  }
}

static void characters(void *userData,const char *s,int len) {
  if(current!=rn_notAllowed) {
    int newlen_txt=n_txt+len+1;
    if(newlen_txt<=LIM_T&&LIM_T<len_txt) newlen_txt=LIM_T;
    else if(newlen_txt<len_txt) newlen_txt=len_txt;
    if(len_txt!=newlen_txt) text=(char*)m_stretch(text,len_txt=newlen_txt,n_txt,sizeof(char));
    memcpy(text+n_txt,s,len); n_txt+=len; text[n_txt]='\0'; /* '\0' guarantees that the text is bounded, and strto[ld] work for data */
  }
}

static void validate(int start,int fd) {
  void *buf; int len;
  previous=current=start;

  expat=XML_ParserCreateNS(NULL,':');
  XML_SetElementHandler(expat,&start_element,&end_element);
  XML_SetCharacterDataHandler(expat,&characters);
  ok=1; any=0;
  for(;;) {
    buf=XML_GetBuffer(expat,BUFSIZE);
    len=read(fd,buf,BUFSIZE);
    if(len<0) {
      (*er_printf)("error (%s): %s\n",xml,strerror(errno));
      wf=ok=0; break;
    }
    if(!XML_ParseBuffer(expat,len,len==0)) wf=ok=0;
    if(!ok||any||len==0) break;
  }
  XML_ParserFree(expat);
  return;
}

static void version(void) {(*er_printf)("arx version %s\n",ARX_VERSION);}
static void usage(void) {(*er_printf)("usage: arx {-[nvh?]} document.xml arx.conf {arx.conf}\n");}

int main(int argc,char **argv) {
  int fd;
  init();

  path2abs=1;
  while(*(++argv)&&**argv=='-') {
    int i=1;
    for(;;) {
      switch(*(*argv+i)) {
      case '\0': goto END_OF_OPTIONS;
      case 'h': case '?': usage(); return 1;
      case 'n': path2abs=0; break;
      case 'v': version(); break;
      default: (*er_printf)("unknown option '-%c'\n",*(*argv+i)); break;
      }
      ++i;
    }
    END_OF_OPTIONS:;
  }

  if(!(*(argv)&&*(argv+1))) {usage(); return 1;}

  xml=*(argv++); if((wf=(fd=open(xml,O_RDONLY))!=-1)) close(fd);
  do {
    if(arx(*(argv++))) {
      int i;
      for(i=0;i!=i_r;++i) {
	switch(rules[i][0]) {
	case VALID: if((ok=wf)) {validate(rules[i][1],fd=open(xml,O_RDONLY)); close(fd);} break;
	case INVAL: if((ok=wf)) {validate(rules[i][1],fd=open(xml,O_RDONLY)); close(fd); ok=wf&&!ok;} break;
	case MATCH: ok=rx_match(string+rules[i][1],xml,strlen(xml)); break;
	case NOMAT: ok=!rx_match(string+rules[i][1],xml,strlen(xml)); break;
	default: assert(0);
	}
	if(ok) {
	  printf("%s\n",string+rules[i][2]);
	  return EXIT_SUCCESS;
	}
      }
    }
    clear();
  } while(*argv);
  return EXIT_FAILURE;
}

--- NEW FILE: dxl.c ---
/* $Id: dxl.c,v 1.1 2009/08/03 05:32:46 mike Exp $ */

#include <stdlib.h>
#include "dxl.h"

char *dxl_cmd=NULL;

#if DXL_EXC

#include <string.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include <errno.h>
#include <assert.h>
#include "m.h"
#include "er.h"

int dxl_allows(char *typ,char *ps,char *s,int n) {
  int pid,status;

  if(!dxl_cmd) return 0;
  if((pid=fork())==0) {
    char **argv; int argc;
    char *p; int arg, i;

    argc=5; p=ps; arg=0;
    for(;;) {
      if(*p=='\0') {
	if(arg) {arg=0; ++argc;} else break;
      } else arg=1;
      ++p;
    }
    argv=(char**)m_alloc(argc,sizeof(char*));
    argv[--argc]=NULL;
    argv[--argc]=(char*)m_alloc(n+1,sizeof(char)); argv[argc][n]='\0'; strncpy(argv[argc],s,n);
    argv[0]=dxl_cmd; argv[1]="allows"; argv[2]=typ;
    i=3; if(i<argc) {
      for(;;) {
	argv[i++]=ps;
	if(i==argc) break;
	while(*(ps++));
      }
    }
    execv(dxl_cmd,argv);
    (*er_printf)("dxl: cannot execute %s: %s\n",dxl_cmd,strerror(errno));
 } else if(pid>0) {
    wait(&status);
    return !WEXITSTATUS(status);
  }
  (*er_printf)("dxl: %s\n",strerror(errno));
  return 0;
}

int dxl_equal(char *typ,char *val,char *s,int n) {
  int pid,status;

  if(!dxl_cmd) return 0;
  if((pid=fork())==0) {
    char *argv[]={NULL,"equal",NULL,NULL,NULL,NULL};
    argv[0]=dxl_cmd; argv[2]=typ; argv[3]=val;

   argv[4]=(char*)m_alloc(n+1,sizeof(char)); argv[4][n]='\0'; strncpy(argv[4],s,n);
     execvp(dxl_cmd,argv);
    (*er_printf)("dxl: cannot execute %s\n",dxl_cmd,strerror(errno));
  } else if(pid>0) {
    wait(&status);
    return !WEXITSTATUS(status);
  }
  (*er_printf)("dxl: %s\n",strerror(errno));
  return 0;
}

#else

int dxl_allows(char *typ,char *ps,char *s,int n) {return 0;}
int dxl_equal(char *typ,char *val,char *s,int n) {return 0;}

#endif

--- NEW FILE: ll.h ---
/* $Id: ll.h,v 1.1 2009/08/03 05:32:46 mike Exp $ */

#ifndef LL_H
#define LL_H 1

/* all limits that can affect speed or memory consumption;
 prefixes correspond to module names
 */

#define RN_LEN_P 1024
#define RN_PRIME_P 0x3fd
#define RN_LIM_P (4*RN_LEN_P)
#define RN_LEN_NC 256
#define RN_PRIME_NC 0xfb
#define RN_LEN_S 256

#define SC_LEN 64

#define RND_LEN_F 1024

#define DRV_LEN_DTL 4
#define DRV_LEN_M 4096
#define DRV_PRIME_M 0xffd
#define DRV_LIM_M (8*DRV_LEN_M)

#define RNX_LEN_EXP 16
#define RNX_LIM_EXP 64

#define XCL_LEN_T 1024
#define XCL_LIM_T 16384

#define RX_LEN_P 256
#define RX_PRIME_P 0xfb
#define RX_LIM_P (4*RX_LEN_P)
#define RX_LEN_R 32
#define RX_PRIME_R 0x1f
#define RX_LEN_2 RX_PRIME_R
#define RX_PRIME_2 RX_PRIME_R
#define RX_LEN_M 1024
#define RX_PRIME_M 0x3fd
#define RX_LIM_M (8*RX_LEN_M)

#endif

--- NEW FILE: ary.h ---
/* $Id: ary.h,v 1.1 2009/08/03 05:32:46 mike Exp $ */

#ifndef ARY_H
#define ARY_H 1

extern int ary_isany(int p);

#endif

--- NEW FILE: rnv.h ---
/* $Id: rnv.h,v 1.1 2009/08/03 05:32:47 mike Exp $ */

#include <stdarg.h>

#ifndef RNV_H
#define RNV_H 1

#define RNV_ER_ELEM 0
#define RNV_ER_AKEY 1
#define RNV_ER_AVAL 2
#define RNV_ER_EMIS 3
#define RNV_ER_AMIS 4
#define RNV_ER_UFIN 5
#define RNV_ER_TEXT 6
#define RNV_ER_NOTX 7

extern void (*rnv_verror_handler)(int erno,va_list ap);

extern void rnv_default_verror_handler(int erno,va_list ap);

extern void rnv_init(void);
extern void rnv_clear(void);

extern int rnv_text(int *curp,int *prevp,char *text,int n_t,int mixed);
extern int rnv_start_tag(int *curp,int *prevp,char *name,char **attrs);
  extern int rnv_start_tag_open(int *curp,int *prevp,char *name);
  extern int rnv_attribute(int *curp,int *prevp,char *name,char *val);
  extern int rnv_start_tag_close(int *curp,int *prevp,char *name);
extern int rnv_end_tag(int *curp,int *prevp,char *name);

#endif

--- NEW FILE: Makefile ---

VERSION=1.7.8
CC=cc

# optional features
M_STATIC=0
M_FILL=0
DSL_SCM=0
DXL_EXC=0

EXPAT_H="<expat.h>"
UNISTD_H="<unistd.h>"
SCM_H="<scm/scm.h>"

INC=-I/usr/local/include ${CPPFLAGS}
LBL=-L/usr/local/lib ${LDFLAGS}

DEF=\
-DM_STATIC=${M_STATIC} \
-DM_FILL=${M_FILL} \
-DEXPAT_H=${EXPAT_H} \
-DUNISTD_H=${UNISTD_H} \
-DRNV_VERSION="\"${VERSION}\"" \
-DARX_VERSION="\"${VERSION}\"" \
-DRVP_VERSION="\"${VERSION}\""
WARN=-Wall -Wstrict-prototypes  -Wmissing-prototypes -Wcast-align
OPT=-O -g

CFLAGS=${INC} ${DEF} ${WARN} ${OPT}
LFLAGS=${OPT} ${LBL}

LIBEXPAT=-lexpat
LIB_SCM=-lscm -lm \
`sh -c '[ -f /usr/lib/libdl.a ] && echo -ldl \
      ; [ -f /usr/lib/libsocket.a ] && echo -lsocket \
'` 

LIB=${LIBEXPAT}

ifeq (${DSL_SCM},1)
DEF+=-DDSL_SCM=${DSL_SCM} -DSCM_H=${SCM_H}
LIB+=${LIB_SCM}
endif

ifeq (${DXL_EXC},1)
DEF+=-DDXL_EXC=${DXL_EXC}
endif

LIBRNVA=librnv.a
LIBRNVSO=librnv.so
LIBRNV=${LIBRNVA}

SRC=\
ll.h \
erbit.h \
xcl.c \
arx.c \
rvp.c \
xsdck.c \
test.c \
ary.c ary.h \
rn.c rn.h \
rnc.c rnc.h \
rnd.c rnd.h \
rnl.c rnl.h \
rnv.c rnv.h \
rnx.c rnx.h \
drv.c drv.h \
xsd.c xsd.h \
xsd_tm.c xsd_tm.h \
dxl.c dxl.h \
dsl.c dsl.h \
sc.c sc.h \
ht.c ht.h \
er.c er.h \
u.c u.h \
xmlc.c xmlc.h \
s.c s.h \
m.c m.h \
rx.c rx.h \
rx_cls_u.c \
rx_cls_ranges.c

OBJ=\
rn.o \
rnc.o \
rnd.o \
rnl.o \
rnv.o \
rnx.o \
drv.o \
ary.o \
xsd.o \
xsd_tm.o \
dxl.o \
dsl.o \
sc.o \
u.o \
ht.o \
er.o \
xmlc.o \
s.o \
m.o \
rx.o

.SUFFIXES: .c .o

.c.o:
	${CC} ${CFLAGS} -c -o $@ $<

all: rnv arx rvp xsdck test

rnv: xcl.o ${LIBRNV}
	${CC} ${LFLAGS} -o rnv xcl.o ${LIBRNV} ${LIB}

arx: arx.o ${LIBRNV}
	${CC} ${LFLAGS} -o arx arx.o ${LIBRNV} ${LIB}

rvp: rvp.o ${LIBRNV}
	${CC} ${LFLAGS} -o rvp rvp.o ${LIBRNV} ${LIB}

xsdck: xsdck.o ${LIBRNV}
	${CC} ${LFLAGS} -o xsdck xsdck.o ${LIBRNV} ${LIB}

test: test.o ${LIBRNV}
	${CC} ${LFLAGS} -o test test.o ${LIBRNV} ${LIB}

${LIBRNVA}: ${OBJ}
	ar rc $@ ${OBJ}
	ranlib ${LIBRNVA}

${LIBRNVSO}: ${OBJ}
	gcc -shared -o $@ ${OBJ}

depend: ${SRC}
	makedepend -Y ${DEF} ${SRC}

clean:
	-rm -f *.o tst/c/*.o  *.a *.so rnv arx rnd_test *_test *.core *.gmon *.gprof rnv*.zip rnv.txt rnv.pdf rnv.html rnv.xml

rnd_test: ${LIBRNV} tst/c/rnd_test.c
	${CC} ${LFLAGS} -I. -o rnd_test tst/c/rnd_test.c ${LIBRNV} ${LIB}


--- NEW FILE: xmlc.h ---
/* $Id: xmlc.h,v 1.1 2009/08/03 05:32:48 mike Exp $ */

#ifndef XMLC_H
#define XMLC_H 1

/* character classes required for parsing XML */
extern int xmlc_white_space(int u);
extern int xmlc_base_char(int u);
extern int xmlc_ideographic(int u);
extern int xmlc_combining_char(int u);
extern int xmlc_digit(int u);
extern int xmlc_extender(int u);

extern int u_in_ranges(int u,int r[][2],int len);

#endif

--- NEW FILE: ary.c ---
/* $Id: ary.c,v 1.1 2009/08/03 05:32:46 mike Exp $ */

#include "rn.h"
#include "ary.h"

/*
ary_isany::Pattern->Bool
ary_isany p =
  let
    isanycontent
      p@(OneOrMore
	  (Choice
	    (Choice
	      (Element AnyName p1)
	      (Attribute AnyName Text))
	    Text)) = p == p1
    isanycontent _ = False
    isanymixed (OneOrMore (Choice (Element AnyName p1) Text)) = isanycontent p1
    isanymixed _ = False
  in
     case p of
       (After p1 Empty) -> isanymixed p1
       (After p1 p2) -> isanymixed p1 && ary_isany p2
       _ -> False
*/

static int isanycont(int p) {
  int p0,nc,p1,p2,i,res,flat[3];
  p0=p; if(!RN_P_IS(p0,RN_P_ONE_OR_MORE)) return 0;
  rn_OneOrMore(p0,p1);
  p0=p1; if(!RN_P_IS(p0,RN_P_CHOICE)) return 0;
  rn_Choice(p0,p1,p2); flat[0]=p2;
  p0=p1; if(!RN_P_IS(p0,RN_P_CHOICE)) return 0;
  rn_Choice(p0,p1,p2); flat[1]=p1; flat[2]=p2;
  res=0;
  for(i=0;i!=3;++i) {
    p0=flat[i];
    switch(RN_P_TYP(p0)) {
    case RN_P_ELEMENT: rn_Element(p0,nc,p1);
      if(!(RN_NC_IS(nc,RN_NC_ANY_NAME)&&p==p1)) return 0;
      res|=1; break;
    case RN_P_ATTRIBUTE: rn_Attribute(p0,nc,p1);
      if(!(RN_NC_IS(nc,RN_NC_ANY_NAME)&&p1==rn_text)) return 0;
      res|=2; break;
    case RN_P_TEXT: break;
    default: return 0;
    }
  }
  return res==3;
}

static int isanymix(int p) {
  int p0,nc,p1,p2,i,res,flat[2];
  p0=p; if(!RN_P_IS(p0,RN_P_ONE_OR_MORE)) return 0;
  rn_OneOrMore(p0,p1);
  p0=p1; if(!RN_P_IS(p0,RN_P_CHOICE)) return 0;
  rn_Choice(p0,p1,p2); flat[0]=p1; flat[1]=p2;
  res=0;
  for(i=0;i!=2;++i) {
    p0=flat[i];
    switch(RN_P_TYP(p0)) {
    case RN_P_ELEMENT: rn_Element(p0,nc,p1);
      if(!(RN_NC_IS(nc,RN_NC_ANY_NAME)&& isanycont(p1))) return 0;
      res|=1; break;
    case RN_P_TEXT: break;
    default: return 0;
    }
  }
  return res==1;
}

int ary_isany(int p) {
  int p1,p2;
  if(!RN_P_IS(p,RN_P_AFTER)) return 0;
  rn_After(p,p1,p2); return isanymix(p1)&&(p2==rn_empty||ary_isany(p2));
}

--- NEW FILE: rnx.h ---
/* $Id: rnx.h,v 1.1 2009/08/03 05:32:47 mike Exp $ */

#ifndef RNX_H
#define RNX_H 1

extern void rnx_init(void);
extern void rnx_clear(void);

extern int rnx_n_exp,*rnx_exp;
extern void rnx_expected(int p,int req);

extern char *rnx_p2str(int p);
extern char *rnx_nc2str(int nc);

#endif

--- NEW FILE: rnv.c ---
/* $Id: rnv.c,v 1.1 2009/08/03 05:32:47 mike Exp $ */

#include <string.h> /*strncpy,strrchr*/
#include <assert.h>
#include "m.h"
#include "xmlc.h" /*xmlc_white_space*/
#include "erbit.h"
#include "drv.h"
#include "er.h"
#include "rnv.h"

extern int rn_notAllowed;

#define err(msg) (*er_vprintf)(msg"\n",ap);
void rnv_default_verror_handler(int erno,va_list ap) {
  if(erno&ERBIT_DRV) {
    drv_default_verror_handler(erno&~ERBIT_DRV,ap);
  } else {
    switch(erno) {
    case RNV_ER_ELEM: err("element %s^%s not allowed"); break;
    case RNV_ER_AKEY: err("attribute %s^%s not allowed"); break;
    case RNV_ER_AVAL: err("attribute %s^%s with invalid value \"%s\""); break;
    case RNV_ER_EMIS: err("incomplete content"); break;
    case RNV_ER_AMIS: err("missing attributes of %s^%s"); break;
    case RNV_ER_UFIN: err("unfinished content of element %s^%s"); break;
    case RNV_ER_TEXT: err("invalid data or text not allowed"); break;
    case RNV_ER_NOTX: err("text not allowed"); break;
    default: assert(0);
    }
  }
}

void (*rnv_verror_handler)(int erno,va_list ap)=&rnv_default_verror_handler;

static void error_handler(int erno,...) {
  va_list ap; va_start(ap,erno); (*rnv_verror_handler)(erno,ap); va_end(ap);
}

static void verror_handler_drv(int erno,va_list ap) {(*rnv_verror_handler)(erno|ERBIT_DRV,ap);}

static void windup(void);
static int initialized=0;
void rnv_init(void) {
  if(!initialized) {initialized=1;
    drv_init(); drv_verror_handler=&verror_handler_drv;
    windup();
  }
}

void rnv_clear(void) {
  windup();
}

static void windup(void) {
}

static char *qname_open(char **surip,char **snamep,char *name) {
  char *sep;
  if((sep=strrchr(name,':'))) {
    *snamep=sep+1; *surip=name; *sep='\0';
  } else {
    *snamep=name; while(*name) ++name; *surip=name;
  }
  return sep; /* NULL if no namespace */
}

static void qname_close(char *sep) {if(sep) *sep=':';}

static int whitespace(char *text,int n_txt) {
  char *s=text,*end=text+n_txt;
  for(;;) {
    if(s==end) return 1;
    if(!xmlc_white_space(*(s++))) return 0;
  }
}

int rnv_text(int *curp,int *prevp,char *text,int n_txt,int mixed) {
  int ok=1;
  if(mixed) {
    if(!whitespace(text,n_txt)) {
      *curp=drv_mixed_text(*prevp=*curp);
      if(*curp==rn_notAllowed) { ok=0;
	*curp=drv_mixed_text_recover(*prevp);
	error_handler(RNV_ER_NOTX);
      }
    }
  } else {
    *curp=drv_text(*prevp=*curp,text,n_txt);
    if(*curp==rn_notAllowed) { ok=0;
      *curp=drv_text_recover(*prevp,text,n_txt);
      error_handler(RNV_ER_TEXT);
    }
  }
  return ok;
}

int rnv_start_tag_open(int *curp,int *prevp,char *name) {
  int ok=1; char *suri,*sname,*sep;
  sep=qname_open(&suri,&sname,name);
  *curp=drv_start_tag_open(*prevp=*curp,suri,sname);
  if(*curp==rn_notAllowed) { ok=0;
    *curp=drv_start_tag_open_recover(*prevp,suri,sname);
    error_handler(*curp==rn_notAllowed?RNV_ER_ELEM:RNV_ER_EMIS,suri,sname);
  }
  qname_close(sep);
  return ok;
}

int rnv_attribute(int *curp,int *prevp,char *name,char *val) {
  int ok=1; char *suri,*sname,*sep;
  sep=qname_open(&suri,&sname,name);
  *curp=drv_attribute_open(*prevp=*curp,suri,sname);
  if(*curp==rn_notAllowed) { ok=0;
    *curp=drv_attribute_open_recover(*prevp,suri,sname);
    error_handler(RNV_ER_AKEY,suri,sname);
  } else {
    *curp=drv_text(*prevp=*curp,(char*)val,strlen(val));
    if(*curp==rn_notAllowed || (*curp=drv_attribute_close(*prevp=*curp))==rn_notAllowed) { ok=0;
      *curp=drv_attribute_close_recover(*prevp);
      error_handler(RNV_ER_AVAL,suri,sname,val);
    }
  }
  qname_close(sep);
  return ok;
}

int rnv_start_tag_close(int *curp,int *prevp,char *name) {
  int ok=1; char *suri,*sname,*sep;
  *curp=drv_start_tag_close(*prevp=*curp);
  if(*curp==rn_notAllowed) { ok=0;
    *curp=drv_start_tag_close_recover(*prevp);
    sep=qname_open(&suri,&sname,name);
    error_handler(RNV_ER_AMIS,suri,sname);
    qname_close(sep);
  }
  return ok;
}

int rnv_start_tag(int *curp,int *prevp,char *name,char **attrs) {
  int ok=1;
  ok=rnv_start_tag_open(curp,prevp,name)&&ok;
  while(*curp!=rn_notAllowed) {
    if(!(*attrs)) break;
    ok = rnv_attribute(curp,prevp,*attrs,*(attrs+1))&&ok;
    attrs+=2;
  }
  if(*curp!=rn_notAllowed) ok=rnv_start_tag_close(curp,prevp,name)&&ok;
  return ok;
}

int rnv_end_tag(int *curp,int *prevp,char *name) {
  int ok=1; char *suri,*sname,*sep;
  *curp=drv_end_tag(*prevp=*curp);
  if(*curp==rn_notAllowed) { ok=0;
    sep=qname_open(&suri,&sname,name);
    error_handler(RNV_ER_UFIN,suri,sname);
    qname_close(sep);
    *curp=drv_end_tag_recover(*prevp);
  }
  return ok;
}

--- NEW FILE: rnx.c ---
/* $Id: rnx.c,v 1.1 2009/08/03 05:32:47 mike Exp $ */

#include <stdlib.h> /*NULL*/
#include <string.h> /*strcat*/
#include "m.h"
#include "s.h"
#include "rn.h"
#include "ll.h"
#include "rnx.h"

#define LEN_EXP RNX_LEN_EXP
#define LIM_EXP RNX_LIM_EXP

int rnx_n_exp,*rnx_exp=NULL;
static int len_exp;

static int initialized=0;
void rnx_init(void) {
  if(!initialized) { initialized=1;
    rnx_exp=(int*)m_alloc(len_exp=LEN_EXP,sizeof(int));
  }
}

void rnx_clear(void) {}

static void expected(int p,int first,int req) {
  int p1,p2,px=0,i;
  if(req && rn_nullable(p)) return;
  switch(RN_P_TYP(p)) {
  case RN_P_ERROR: break;
  case RN_P_NOT_ALLOWED: break;
  case RN_P_EMPTY: break;
  case RN_P_TEXT: px=p; break;
  case RN_P_CHOICE: rn_Choice(p,p1,p2);
    expected(p1,first,req); expected(p2,first,req); break;
  case RN_P_INTERLEAVE: rn_Interleave(p,p1,p2);
    expected(p1,first,req); expected(p2,first,req); break;
  case RN_P_GROUP: rn_Group(p,p1,p2);
    expected(p1,first,req); expected(p2,first&&rn_nullable(p1),req); break;
  case RN_P_ONE_OR_MORE: rn_OneOrMore(p,p1); expected(p1,first,req); break;
  case RN_P_LIST: rn_List(p,p1); expected(p1,first,req); break;
  case RN_P_DATA: px=p; break;
  case RN_P_DATA_EXCEPT: rn_DataExcept(p,p1,p2);
    expected(p1,first,req); break;
  case RN_P_VALUE: px=p; break;
  case RN_P_ATTRIBUTE: px=p; break;
  case RN_P_ELEMENT: px=p; break;
  case RN_P_AFTER: rn_After(p,p1,p2);
    expected(p1,first,req); if(rn_nullable(p1)) px=p; break;
  case RN_P_REF: break;
  default: assert(0);
  }
  if(px&&(first||RN_P_IS(px,RN_P_ATTRIBUTE))) {
    for(i=0;i!=rnx_n_exp;++i) {
      if(rnx_exp[i]==px) {px=0; break;}
    }
    if(px) {
      if(rnx_n_exp==len_exp) rnx_exp=(int*)m_stretch(rnx_exp,len_exp=2*rnx_n_exp,rnx_n_exp,sizeof(int));
      rnx_exp[rnx_n_exp++]=px;
    }
  }
}
void rnx_expected(int p,int req) {
  if(req) {
    if(len_exp>LIM_EXP) {
      m_free(rnx_exp);
      rnx_exp=(int*)m_alloc(len_exp=LIM_EXP,sizeof(int));
    }
    rnx_n_exp=0;
  }
  expected(p,1,req);
}

char *rnx_p2str(int p) {
  char *s=NULL,*s1;
  int dt,ps,val,nc,p1;
  switch(RN_P_TYP(p)) {
  case RN_P_ERROR: s=s_clone("error"); break;
  case RN_P_NOT_ALLOWED: s=s_clone("notAllowed"); break;
  case RN_P_EMPTY: s=s_clone("empty"); break;
  case RN_P_TEXT: s=s_clone("text"); break;
  case RN_P_CHOICE: s=s_clone("choice (|)"); break;
  case RN_P_INTERLEAVE: s=s_clone("interleave (&)"); break;
  case RN_P_GROUP: s=s_clone("group (,)"); break;
  case RN_P_ONE_OR_MORE: s=s_clone("one or more (+)"); break;
  case RN_P_LIST: s=s_clone("list"); break;
  case RN_P_DATA: rn_Data(p,dt,ps);
    s1=rnx_nc2str(dt);
    s=(char*)m_alloc(strlen("data ")+1+strlen(s1),sizeof(char));
    strcpy(s,"data "); strcat(s,s1);
    m_free(s1);
    break;
  case RN_P_DATA_EXCEPT: s=s_clone("dataExcept (-)");  break;
  case RN_P_VALUE: rn_Value(p,dt,val);
    s1=rnx_nc2str(dt);
    s=(char*)m_alloc(strlen("value \"\" ")+1+strlen(s1)+strlen(rn_string+val),sizeof(char));
    strcpy(s,"value "); strcat(s,s1); strcat(s," \""); strcat(s,rn_string+val); strcat(s,"\"");
    m_free(s1);
    break;
  case RN_P_ATTRIBUTE: rn_Attribute(p,nc,p1);
    s1=rnx_nc2str(nc);
    s=(char*)m_alloc(strlen("attribute ")+1+strlen(s1),sizeof(char));
    strcpy(s,"attribute "); strcat(s,s1);
    m_free(s1);
    break;
  case RN_P_ELEMENT: rn_Element(p,nc,p1);
    s1=rnx_nc2str(nc);
    s=(char*)m_alloc(strlen("element ")+1+strlen(s1),sizeof(char));
    strcpy(s,"element "); strcat(s,s1);
    m_free(s1);
    break;
  case RN_P_REF: s=s_clone("ref"); break;
  case RN_P_AFTER: s=s_clone("after"); break;
  default: assert(0);
  }
  return s;
}

char *rnx_nc2str(int nc) {
  char *s=NULL,*s1,*s2;
  int nc1,nc2,uri,name;
  switch(RN_NC_TYP(nc)) {
  case RN_NC_ERROR: s=s_clone("?"); break;
  case RN_NC_NSNAME:
    rn_NsName(nc,uri);
    s=(char*)m_alloc(strlen(rn_string+uri)+3,sizeof(char));
    strcpy(s,rn_string+uri); strcat(s,":*");
    break;
  case RN_NC_QNAME:
    rn_QName(nc,uri,name);
    s=(char*)m_alloc(strlen(rn_string+uri)+strlen(rn_string+name)+2,sizeof(char));
    strcpy(s,rn_string+uri); strcat(s,"^"); strcat(s,rn_string+name);
    break;
  case RN_NC_ANY_NAME: s=s_clone("*"); break;
  case RN_NC_EXCEPT:
    rn_NameClassExcept(nc,nc1,nc2);
    s1=rnx_nc2str(nc1); s2=rnx_nc2str(nc2);
    s=(char*)m_alloc(strlen(s1)+strlen(s2)+2,sizeof(char));
    strcpy(s,s1); strcat(s,"-"); strcat(s,s2);
    m_free(s1); m_free(s2);
    break;
  case RN_NC_CHOICE:
    rn_NameClassChoice(nc,nc1,nc2);
    s1=rnx_nc2str(nc1); s2=rnx_nc2str(nc2);
    s=(char*)m_alloc(strlen(s1)+strlen(s2)+2,sizeof(char));
    strcpy(s,s1); strcat(s,"|"); strcat(s,s2);
    m_free(s1); m_free(s2);
    break;
  case RN_NC_DATATYPE:
    rn_Datatype(nc,uri,name);
    s=(char*)m_alloc(strlen(rn_string+uri)+strlen(rn_string+name)+2,sizeof(char));
    strcpy(s,rn_string+uri); strcat(s,"^"); strcat(s,rn_string+name);
    break;
  default: assert(0);
  }
  return s;
}

--- NEW FILE: xcl.c ---
/* $Id: xcl.c,v 1.1 2009/08/03 05:32:48 mike Exp $ */

#include <stdlib.h>
#include <stdarg.h>
#include <fcntl.h>  /*open,close*/
#include <sys/types.h>
#include UNISTD_H   /*open,read,close*/
#include <string.h> /*strerror*/
#include <errno.h>
#include <assert.h>
#include EXPAT_H
#include "m.h"
#include "s.h"
#include "erbit.h"
#include "drv.h"
#include "rnl.h"
#include "rnv.h"
#include "rnx.h"
#include "ll.h"
#include "dxl.h"
#include "dsl.h"
#include "er.h"

extern int rn_notAllowed,rx_compact,drv_compact;

#define LEN_T XCL_LEN_T
#define LIM_T XCL_LIM_T

#define BUFSIZE 1024

/* maximum number of candidates to display */
#define NEXP 16

#define XCL_ER_IO 0
#define XCL_ER_XML 1
#define XCL_ER_XENT 2

#define PIXGFILE "davidashen-net-xg-file"
#define PIXGPOS "davidashen-net-xg-pos"

static int peipe,verbose,nexp,rnck;
static char *xml;
static XML_Parser expat=NULL;
static int start,current,previous;
static int mixed=0;
static int lastline,lastcol,level;
static char *xgfile=NULL,*xgpos=NULL;
static int ok;

/* Expat does not normalize strings on input */
static char *text; static int len_txt;
static int n_txt;

#define err(msg) (*er_vprintf)(msg"\n",ap);
static void verror_handler(int erno,va_list ap) {
  if(erno&ERBIT_RNL) {
    rnl_default_verror_handler(erno&~ERBIT_RNL,ap);
  } else {
    int line=XML_GetCurrentLineNumber(expat),col=XML_GetCurrentColumnNumber(expat);
    if(line!=lastline||col!=lastcol) { lastline=line; lastcol=col;
      if(xgfile) (*er_printf)("%s:%s: error: ",xgfile,xgpos); else
      (*er_printf)("%s:%i:%i: error: ",xml,line,col);
      if(erno&ERBIT_RNV) {
	rnv_default_verror_handler(erno&~ERBIT_RNV,ap);
	if(nexp) { int req=2, i=0; char *s;
	  while(req--) {
	    rnx_expected(previous,req);
	    if(i==rnx_n_exp) continue;
	    if(rnx_n_exp>nexp) break;
	    (*er_printf)((char*)(req?"required:\n":"allowed:\n"));
	    for(;i!=rnx_n_exp;++i) {
	      (*er_printf)("\t%s\n",s=rnx_p2str(rnx_exp[i]));
	      m_free(s);
	    }
	  }
	}
      } else {
	switch(erno) {
	case XCL_ER_IO: err("%s"); break;
	case XCL_ER_XML: err("%s"); break;
	case XCL_ER_XENT: err("pipe through xx to expand external entities"); break;
	default: assert(0);
	}
      }
    }
  }
}

static void verror_handler_rnl(int erno,va_list ap) {verror_handler(erno|ERBIT_RNL,ap);}
static void verror_handler_rnv(int erno,va_list ap) {verror_handler(erno|ERBIT_RNV,ap);}

static void windup(void);
static int initialized=0;
static void init(void) {
  if(!initialized) {initialized=1;
    rnl_init(); rnl_verror_handler=&verror_handler_rnl;
    rnv_init(); rnv_verror_handler=&verror_handler_rnv;
    rnx_init();
    drv_add_dtl(DXL_URL,&dxl_equal,&dxl_allows);
    drv_add_dtl(DSL_URL,&dsl_equal,&dsl_allows);
    text=(char*)m_alloc(len_txt=LEN_T,sizeof(char));
    windup();
  }
}

static void clear(void) {
  if(len_txt>LIM_T) {m_free(text); text=(char*)m_alloc(len_txt=LEN_T,sizeof(char));}
  windup();
}

static void windup(void) {
  text[n_txt=0]='\0';
  level=0; lastline=lastcol=-1;
}

static void error_handler(int erno,...) {
  va_list ap; va_start(ap,erno); verror_handler(erno,ap); va_end(ap);
}

static void flush_text(void) {
  ok=rnv_text(&current,&previous,text,n_txt,mixed)&&ok;
  text[n_txt=0]='\0';
}

static void start_element(void *userData,const char *name,const char **attrs) {
  if(current!=rn_notAllowed) {
    mixed=1;
    flush_text();
    ok=rnv_start_tag(&current,&previous,(char*)name,(char**)attrs)&&ok;
    mixed=0;
  } else {
    ++level;
  }
}

static void end_element(void *userData,const char *name) {
  if(current!=rn_notAllowed) {
    flush_text();
    ok=rnv_end_tag(&current,&previous,(char*)name)&&ok;
    mixed=1;
  } else {
    if(level==0) current=previous; else --level;
  }
}

static void characters(void *userData,const char *s,int len) {
  if(current!=rn_notAllowed) {
    int newlen_txt=n_txt+len+1;
    if(newlen_txt<=LIM_T&&LIM_T<len_txt) newlen_txt=LIM_T;
    else if(newlen_txt<len_txt) newlen_txt=len_txt;
    if(len_txt!=newlen_txt) text=(char*)m_stretch(text,len_txt=newlen_txt,n_txt,sizeof(char));
    memcpy(text+n_txt,s,len); n_txt+=len; text[n_txt]='\0'; /* '\0' guarantees that the text is bounded, and strto[ld] work for data */
  }
}

static void processingInstruction(void *userData,
    const char *target,const char *data) {
  if(strcmp(PIXGFILE,target)==0) {
    if(xgfile) m_free(xgfile); 
    xgfile=s_clone((char*)data);
  } else if(strcmp(PIXGPOS,target)==0) {
    if(xgpos) m_free(xgpos);
    xgpos=s_clone((char*)data);
    *strchr(xgpos,' ')=':';
  }
}

static int pipeout(void *buf,int len) {
  int ofs=0,iw,lenw=len;
  for(;;) {
    if((iw=write(1,(char*)buf+ofs,lenw))==-1) {error_handler(XCL_ER_IO,strerror(errno)); return 0;}
    ofs+=iw; lenw-=iw; if(lenw==0) return 1;
  }
}

static int process(int fd) {
  void *buf; int len;
  for(;;) {
    buf=XML_GetBuffer(expat,BUFSIZE);
    len=read(fd,buf,BUFSIZE);
    if(len<0) {
      error_handler(XCL_ER_IO,xml,strerror(errno));
      goto ERROR;
    }
    if(peipe) peipe=peipe&&pipeout(buf,len);
    if(!XML_ParseBuffer(expat,len,len==0)) goto PARSE_ERROR;
    if(len==0) break;
  }
  return ok;

PARSE_ERROR:
  error_handler(XCL_ER_XML,XML_ErrorString(XML_GetErrorCode(expat)));
  while(peipe&&(len=read(fd,buf,BUFSIZE))!=0) peipe=peipe&&pipeout(buf,len);
ERROR:
  return 0;
}

static int externalEntityRef(XML_Parser p,const char *context,
    const char *base,const char *systemId,const char *publicId) {
  error_handler(XCL_ER_XENT);
  return 1;
}

static void validate(int fd) {
  previous=current=start;
  expat=XML_ParserCreateNS(NULL,':');
  XML_SetParamEntityParsing(expat,XML_PARAM_ENTITY_PARSING_ALWAYS);
  XML_SetElementHandler(expat,&start_element,&end_element);
  XML_SetCharacterDataHandler(expat,&characters);
  XML_SetExternalEntityRefHandler(expat,&externalEntityRef);
  XML_SetProcessingInstructionHandler(expat,&processingInstruction);
  ok=process(fd);
  XML_ParserFree(expat);
}

static void version(void) {(*er_printf)("rnv version %s\n",RNV_VERSION);}
static void usage(void) {(*er_printf)("usage: rnv {-[qnspc"
#if DXL_EXC
"d"
#endif
#if DSL_SCM
"e"
#endif
"vh?]} schema.rnc {document.xml}\n");}

int main(int argc,char **argv) {
  init();

  peipe=0; verbose=1; nexp=NEXP; rnck=0;
  while(*(++argv)&&**argv=='-') {
    int i=1;
    for(;;) {
      switch(*(*argv+i)) {
      case '\0': goto END_OF_OPTIONS;
      case 'q': verbose=0; nexp=0; break;
      case 'n': if(*(argv+1)) nexp=atoi(*(++argv)); goto END_OF_OPTIONS;
      case 's': drv_compact=1; rx_compact=1; break;
      case 'p': peipe=1; break;
      case 'c': rnck=1; break;
#if DXL_EXC
      case 'd': dxl_cmd=*(argv+1); if(*(argv+1)) ++argv; goto END_OF_OPTIONS;
#endif
#if DSL_SCM
      case 'e': dsl_ld(*(argv+1)); if(*(argv+1)) ++argv; goto END_OF_OPTIONS;
#endif
      case 'v': version(); break;
      case 'h': case '?': usage(); return 1;
      default: (*er_printf)("unknown option '-%c'\n",*(*argv+i)); break;
      }
      ++i;
    }
    END_OF_OPTIONS:;
  }

  if(!*(argv)) {usage(); return 1;}

  if((ok=start=rnl_fn(*(argv++)))) {
    if(*argv) {
      do {
	int fd; xml=*argv;
	if((fd=open(xml,O_RDONLY))==-1) {
	  (*er_printf)("I/O error (%s): %s\n",xml,strerror(errno));
	  ok=0;
	  continue;
	}
	if(verbose) (*er_printf)("%s\n",xml);
	validate(fd);
	close(fd);
	clear();
      } while(*(++argv));
      if(!ok&&verbose) (*er_printf)("error: some documents are invalid\n");
    } else {
      if(!rnck) {
	xml="stdin";
	validate(0);
	clear();
	if(!ok&&verbose) (*er_printf)("error: invalid input\n");
      }
    }
  }

  return ok?EXIT_SUCCESS:EXIT_FAILURE;
}

--- NEW FILE: rn.h ---
/* $Id: rn.h,v 1.1 2009/08/03 05:32:47 mike Exp $ */

#ifndef RN_H
#define RN_H 1

#include <assert.h>

/* Patterns */
#define RN_P_ERROR 0
#define RN_P_NOT_ALLOWED 1
#define RN_P_EMPTY 2
#define RN_P_TEXT 3
#define RN_P_CHOICE 4
#define RN_P_INTERLEAVE 5
#define RN_P_GROUP 6
#define RN_P_ONE_OR_MORE 7
#define RN_P_LIST 8
#define RN_P_DATA 9
#define RN_P_DATA_EXCEPT 10
#define RN_P_VALUE 11
#define RN_P_ATTRIBUTE 12
#define RN_P_ELEMENT 13
#define RN_P_REF 14
#define RN_P_AFTER 15

/*
Patterns and nameclasses are stored in arrays of integers.
an integer is either an index in the same or another array,
or a value that denotes record type etc.

Each record has a macro that accesses its fields by assigning
them to variables in the local scope, and a creator.
*/

/* Pattern Bindings */
#define RN_P_TYP(i) (rn_pattern[i]&0xFF)
#define RN_P_IS(i,x)  (x==RN_P_TYP(i))
#define RN_P_CHK(i,x)  assert(RN_P_IS(i,x))

#define RN_P_FLG_NUL 0x00000100
#define RN_P_FLG_TXT 0x00000200
#define RN_P_FLG_CTE 0x00000400
#define RN_P_FLG_CTC 0x00000800
#define RN_P_FLG_CTS 0x00001000
#define RN_P_FLG_ERS 0x40000000
#define RN_P_FLG_MRK 0x80000000

#define rn_marked(i) (rn_pattern[i]&RN_P_FLG_MRK)
#define rn_mark(i) (rn_pattern[i]|=RN_P_FLG_MRK)
#define rn_unmark(i) (rn_pattern[i]&=~RN_P_FLG_MRK)

#define rn_nullable(i) (rn_pattern[i]&RN_P_FLG_NUL)
#define rn_setNullable(i,x) if(x) rn_pattern[i]|=RN_P_FLG_NUL

#define rn_cdata(i) rn_pattern[i]&RN_P_FLG_TXT
#define rn_setCdata(i,x) if(x) rn_pattern[i]|=RN_P_FLG_TXT

/* assert: p1 at 1, p2 at 2 */

#define rn_NotAllowed(i) RN_P_CHK(i,RN_P_NOT_ALLOWED)
#define rn_Empty(i) RN_P_CHK(i,RN_P_EMPTY)
#define rn_Text(i) RN_P_CHK(i,RN_P_TEXT)
#define rn_Choice(i,p1,p2) RN_P_CHK(i,RN_P_CHOICE); p1=rn_pattern[i+1]; p2=rn_pattern[i+2]
#define rn_Interleave(i,p1,p2) RN_P_CHK(i,RN_P_INTERLEAVE); p1=rn_pattern[i+1]; p2=rn_pattern[i+2]
#define rn_Group(i,p1,p2) RN_P_CHK(i,RN_P_GROUP); p1=rn_pattern[i+1]; p2=rn_pattern[i+2]
#define rn_OneOrMore(i,p1) RN_P_CHK(i,RN_P_ONE_OR_MORE); p1=rn_pattern[i+1]
#define rn_List(i,p1) RN_P_CHK(i,RN_P_LIST); p1=rn_pattern[i+1]
#define rn_Data(i,dt,ps) RN_P_CHK(i,RN_P_DATA); dt=rn_pattern[i+1]; ps=rn_pattern[i+2]
#define rn_DataExcept(i,p1,p2) RN_P_CHK(i,RN_P_DATA_EXCEPT); p1=rn_pattern[i+1]; p2=rn_pattern[i+2]
#define rn_Value(i,dt,s) RN_P_CHK(i,RN_P_VALUE); dt=rn_pattern[i+1]; s=rn_pattern[i+2]
#define rn_Attribute(i,nc,p1) RN_P_CHK(i,RN_P_ATTRIBUTE);  p1=rn_pattern[i+1]; nc=rn_pattern[i+2]
#define rn_Element(i,nc,p1) RN_P_CHK(i,RN_P_ELEMENT); p1=rn_pattern[i+1]; nc=rn_pattern[i+2]
#define rn_After(i,p1,p2) RN_P_CHK(i,RN_P_AFTER); p1=rn_pattern[i+1]; p2=rn_pattern[i+2]
#define rn_Ref(i,p) RN_P_CHK(i,RN_P_REF); p=rn_pattern[i+1]

/* Name Classes */
#define RN_NC_ERROR 0
#define RN_NC_QNAME 1
#define RN_NC_NSNAME 2
#define RN_NC_ANY_NAME 3
#define RN_NC_EXCEPT 4
#define RN_NC_CHOICE 5
#define RN_NC_DATATYPE 6

/* Name Class Bindings  */
#define RN_NC_TYP(i) (rn_nameclass[i]&0xFF)
#define RN_NC_IS(i,x) (x==RN_NC_TYP(i))
#define RN_NC_CHK(i,x) assert(RN_NC_IS(i,x))

#define rn_QName(i,uri,name) RN_NC_CHK(i,RN_NC_QNAME); uri=rn_nameclass[i+1]; name=rn_nameclass[i+2]
#define rn_NsName(i,uri) RN_NC_CHK(i,RN_NC_NSNAME); uri=rn_nameclass[i+1]
#define rn_AnyName(i) RN_NC_CHK(i,RN_NC_ANY_NAME)
#define rn_NameClassExcept(i,nc1,nc2) RN_NC_CHK(i,RN_NC_EXCEPT); nc1=rn_nameclass[i+1]; nc2=rn_nameclass[i+2]
#define rn_NameClassChoice(i,nc1,nc2) RN_NC_CHK(i,RN_NC_CHOICE); nc1=rn_nameclass[i+1]; nc2=rn_nameclass[i+2]
#define rn_Datatype(i,lib,typ) RN_NC_CHK(i,RN_NC_DATATYPE); lib=rn_nameclass[i+1]; typ=rn_nameclass[i+2]

extern int rn_empty,rn_text,rn_notAllowed,rn_dt_string,rn_dt_token,rn_xsd_uri;

extern char *rn_string;

extern int *rn_pattern;
extern int *rn_nameclass;

extern void rn_new_schema(void);

extern int rn_contentType(int i);
extern void rn_setContentType(int i,int t1,int t2);
extern int rn_groupable(int p1,int p2);

extern void rn_del_p(int i);
extern void rn_add_p(int i);

extern int rn_newString(char *s);

extern int rn_newNotAllowed(void);
extern int rn_newEmpty(void);
extern int rn_newText(void);
extern int rn_newChoice(int p1,int p2);
extern int rn_newInterleave(int p1,int p2);
extern int rn_newGroup(int p1,int p2);
extern int rn_newOneOrMore(int p1);
extern int rn_newList(int p1);
extern int rn_newData(int dt,int ps);
extern int rn_newDataExcept(int p1,int p2);
extern int rn_newValue(int dt,int s);
extern int rn_newAttribute(int nc,int p1);
extern int rn_newElement(int nc,int p1);
extern int rn_newAfter(int p1,int p2);
extern int rn_newRef(void);

extern int rn_one_or_more(int p);
extern int rn_group(int p1,int p2);
extern int rn_choice(int p1,int p2);
extern int rn_ileave(int p1,int p2);
extern int rn_after(int p1,int p2);

extern int rn_newAnyName(void);
extern int rn_newAnyNameExcept(int nc);
extern int rn_newQName(int uri,int name);
extern int rn_newNsName(int uri);
extern int rn_newNameClassExcept(int nc1,int nc2);
extern int rn_newNameClassChoice(int nc1,int nc2);
extern int rn_newDatatype(int lib,int typ);

extern int rn_i_ps(void);
extern void rn_add_pskey(char *s);
extern void rn_add_psval(char *s);
extern void rn_end_ps(void);

extern void rn_init(void);
extern void rn_clear(void);

extern void rn_compress(int *starts,int n);
extern int rn_compress_last(int start);

#endif

--- NEW FILE: Makefile.bsd ---

VERSION=1.7.8
CC=cc

# optional features
M_STATIC=0
M_FILL=0
DSL_SCM=0
DXL_EXC=0

EXPAT_H="<expat.h>"
UNISTD_H="<unistd.h>"
SCM_H="<scm/scm.h>"

INC=-I/usr/local/include ${CPPFLAGS}
LBL=-L/usr/local/lib ${LDFLAGS}

DEF=\
-DM_STATIC=${M_STATIC} \
-DM_FILL=${M_FILL} \
-DEXPAT_H=${EXPAT_H} \
-DUNISTD_H=${UNISTD_H} \
-DRNV_VERSION="\"${VERSION}\"" \
-DARX_VERSION="\"${VERSION}\"" \
-DRVP_VERSION="\"${VERSION}\""
WARN=-Wall -Wstrict-prototypes  -Wmissing-prototypes -Wcast-align
OPT=-O -g

CFLAGS=${INC} ${DEF} ${WARN} ${OPT}
LFLAGS=${OPT} ${LBL}

LIBEXPAT=-lexpat
LIB_SCM=-lscm -lm \
`sh -c '[ -f /usr/lib/libdl.a ] && echo -ldl \
      ; [ -f /usr/lib/libsocket.a ] && echo -lsocket \
'` 

LIB=${LIBEXPAT}

.if ${DSL_SCM}
DEF+=-DDSL_SCM=${DSL_SCM} -DSCM_H=${SCM_H}
LIB+=${LIB_SCM}
.endif

.if ${DXL_EXC}
DEF+=-DDXL_EXC=${DXL_EXC}
.endif

LIBRNVA=librnv.a
LIBRNVSO=librnv.so
LIBRNV=${LIBRNVA}

SRC=\
ll.h \
erbit.h \
xcl.c \
arx.c \
rvp.c \
xsdck.c \
test.c \
ary.c ary.h \
rn.c rn.h \
rnc.c rnc.h \
rnd.c rnd.h \
rnl.c rnl.h \
rnv.c rnv.h \
rnx.c rnx.h \
drv.c drv.h \
xsd.c xsd.h \
xsd_tm.c xsd_tm.h \
dxl.c dxl.h \
dsl.c dsl.h \
sc.c sc.h \
ht.c ht.h \
er.c er.h \
u.c u.h \
xmlc.c xmlc.h \
s.c s.h \
m.c m.h \
rx.c rx.h \
rx_cls_u.c \
rx_cls_ranges.c

OBJ=\
rn.o \
rnc.o \
rnd.o \
rnl.o \
rnv.o \
rnx.o \
drv.o \
ary.o \
xsd.o \
xsd_tm.o \
dxl.o \
dsl.o \
sc.o \
u.o \
ht.o \
er.o \
xmlc.o \
s.o \
m.o \
rx.o

.SUFFIXES: .c .o

.c.o:
	${CC} ${CFLAGS} -c -o $@ $<

all: rnv arx rvp xsdck test

rnv: xcl.o ${LIBRNV}
	${CC} ${LFLAGS} -o rnv xcl.o ${LIBRNV} ${LIB}

arx: arx.o ${LIBRNV}
	${CC} ${LFLAGS} -o arx arx.o ${LIBRNV} ${LIB}

rvp: rvp.o ${LIBRNV}
	${CC} ${LFLAGS} -o rvp rvp.o ${LIBRNV} ${LIB}

xsdck: xsdck.o ${LIBRNV}
	${CC} ${LFLAGS} -o xsdck xsdck.o ${LIBRNV} ${LIB}

test: test.o ${LIBRNV}
	${CC} ${LFLAGS} -o test test.o ${LIBRNV} ${LIB}

${LIBRNVA}: ${OBJ}
	ar rc $@ ${OBJ}
	ranlib ${LIBRNVA}

${LIBRNVSO}: ${OBJ}
	gcc -shared -o $@ ${OBJ}

depend: ${SRC}
	makedepend -Y ${DEF} ${SRC}

clean:
	-rm -f *.o tst/c/*.o  *.a *.so rnv arx rnd_test *_test *.core *.gmon *.gprof rnv*.zip rnv.txt rnv.pdf rnv.html rnv.xml

rnd_test: ${LIBRNV} tst/c/rnd_test.c
	${CC} ${LFLAGS} -I. -o rnd_test tst/c/rnd_test.c ${LIBRNV} ${LIB}
Received on Monday, 3 August 2009 05:33:09 GMT

This archive was generated by hypermail 2.2.0+W3C-0.50 : Monday, 3 August 2009 05:33:10 GMT