Re: more proof checking goodies... from Dan Connolly on 2000-01-20 (sw99@w3.org from January to March 2000)

From: Dan Connolly <connolly@w3.org>
Date: Thu, 20 Jan 2000 13:40:22 -0600
To: sw99@w3.org
Message-ID: <388764A6.C780C775@w3.org>

I took a look. It's so small and straightfoward that I attach it here.

I offer a 500 brownie-point bounty to anybody who converts it to Java
and converts the ()'s in the input format to <>'s.

5 points for perl. ;-)

100 points for explaining resolution theorem proving to me; 10 points
for pointers to where I can learn it myself.

Dan Connolly wrote:
> 
> "Proofs found by programs are always questionable. Our approach to this
> problem is to
> have the theorem prover construct a detailed proof object and have a
> very simple
> program (written in a high-level language) check that the proof object
> is correct. The proof
> checking program is simple enough that it can be scrutinized by humans,
> and formal
> verification is probably feasible.
[...] "
> 
> -- http://www-unix.mcs.anl.gov/~mccune/papers/robbins/
> 
> <-- http://www.cs.utexas.edu/users/boyer/
> 
> I gotta take a look at the proof checker...

-- 
Dan Connolly
tel:+1-512-310-2971
http://www.w3.org/People/Connolly/

; This code checks Otter proof objects.  (The Otter command
; "set(build_proof_object)" tells Otter to output a proof object
; whenever it finds a proof.)  Otter proof objects have 6 kinds of step:
; 
;   1. input        input clause
;   2. resolve      resolution on identical literals
;   3. merge        merge identical literals
;   4. instantiate  instantiate variables
;   5. paramod      paramodulation on identical terms
;   6. flip         flip an equality literal (positive or negative)
; 
; The steps contain detailed justifications, including substitutions and
; positions of terms/literals, so that this code doesn't have to know
; about unification and it doesn't have to look for the correct terms to
; operate on.
; 
; NOTES
; 
;   1. Otter can't build a proof object for a proof that uses
;      lex-dependent demodulation or answer literals.  All
;      other main-stream stuff should be ok.
;   2. Some symbols allowed by Otter aren't accepted by Nqthm,
;      e.g., =, *, ^, +.  A sed script sed.fix_some_symbols is used
;      to replace some of them with new symbols.
;
; TO DO:
;   1. Case matters to Otter: (a & -A).
;
;
; ------------------------------------------------ some utilities

(defn ith-member (c i)
  (if (nlistp c)
      0
      (if (equal i 1)
	  (car c)
	  (ith-member (cdr c) (sub1 i)))))

(defn remove-ith-member (c i)
  (if (nlistp c)
      c
      (if (equal i 1)
	  (cdr c)
	  (cons (car c) (remove-ith-member (cdr c) (sub1 i))))))

(defn subset (x y)
  (if (nlistp x)
      t
      (and (member (car x) y) (subset (cdr x) y))))

(defn length (x)
  (if (nlistp x) 0 (add1 (length (cdr x)))))

; ------------------------------------------------

; Proof steps have the form (id justification clause).
; A clause is a list of literals.  A variable is a nonlist term.
; The first member of the justification is the type of the step.
; Types of step: input, resolve, merge, instantiate, paramod, flip.
; The rest of the justification list depends on the type.
;
; The position of a subterm is given by a list of integers >= 1;
; Positions apply to lists of terms and point at terms.
; For example, in ((not (p x (a)))), (a) is in position (1 1 2).
; 
; Examples of proof steps:
; 
; (3 (input) ((not (p)) (r)))
; (32 (resolve 3 (1) 9 (3)) ((not (mc)) (not (sm)) (not (mc)) (sd)))
; (7 (merge 6 (2)) ((p)))
; (119 (instantiate 69 ((v0 . (f1 v64 v65)))) ((p (f1 v64 v65) (f1 v64 v65))))
; (36 (paramod 34 (1 1) 35 (1 1 3)) ((eq (f (g v66) v65 v66) v65)))
; (26 (flip 25 (1)) ((eq (f v65 v66 (f (g v66) v65 v67)) v65)))
;
; ---------------------------------------- check that proofs are well formed. 

(defn wf-sym (s)
  (and (nlistp s)
       (not (equal s t))
       (not (equal s f))
       (not (equal s nil))
       (not (equal s 0))))

(defn wf-pred-sym (s)
  (wf-sym s))

(defn wf-func-sym (s)
  (wf-sym s))

(defn wf-var-sym (s)
  (wf-sym s))

(defn wf-term-list (l)
  (if (nlistp l)
      t
      (and (if (nlistp (car l))
	       (wf-var-sym (car l))       
	       (and (wf-func-sym (caar l))
		    (wf-term-list (cdar l))))
	   (wf-term-list (cdr l)))))

(defn wf-term (x)
  (wf-term-list (list x)))

(defn wf-eq-atom (a)
  (and (equal (length a) 3)
       (equal (car a) 'equal)
       (wf-term-list (cdr a))))

(defn wf-atom (a)
  (or (wf-eq-atom a)
      (and (listp a)
	   (wf-pred-sym (car a))
	   (wf-term-list (cdr a)))))

(defn wf-literal (lit)
  (and (listp lit)
       (if (equal (car lit) 'not)
	   (and (equal (length lit) 2)
		(wf-atom (cadr lit)))
	   (wf-atom lit))))

(defn wf-clause (c)
  (if (nlistp c)
      t
      (and (wf-literal (car c))
	   (wf-clause (cdr c)))))

(defn wf-subst (subst)
  (if (nlistp subst)
      t
      (and (listp (car subst))
	   (wf-var-sym (caar subst))
	   (wf-term (cdar subst))
	   (wf-subst (cdr subst)))))

(defn wf-justification (just)
  (if (nlistp just)
      f
      (if (equal (car just) 'input)
	  (equal (length just) 1)
      (if (equal (car just) 'resolve)
	  (equal (length just) 5)
      (if (equal (car just) 'merge)
	  (equal (length just) 3)
      (if (equal (car just) 'instantiate)
          (and (equal (length just) 3)
	       (wf-subst (cadr just)))
      (if (equal (car just) 'paramod)
	  (equal (length just) 5)
      (if (equal (car just) 'flip)
	  (equal (length just) 3)
	  f))))))))
	
(defn wf-step (st)
  (if (not (equal (length st) 3))
      f
      (and (nlistp (car st))
	   (wf-clause (caddr st))
	   (wf-justification (cadr st)))))

(defn wf-proof (p)
  (if (nlistp p)
      t
      (and (wf-step (car p))
	   (wf-proof (cdr p)))))

(defn wf-proof-verbose (p)
  (if (nlistp p)
      nil
      (cons (cons (caar p) (wf-step (car p)))
	    (wf-proof-verbose (cdr p)))))

; Here are routines to get stuff out of proof steps.  Note that "rule"
; and "clause" apply to all types of step, but the others don't.

(defn rule         (step) (caadr step))
(defn clause       (step) (caddr step))
(defn par1-id      (step) (cadadr step))
(defn pos1         (step) (caddadr step))
(defn par2-id      (step) (cadddadr step))
(defn pos2         (step) (caddddadr step))
(defn substitution (step) (caddadr step))

; Just check that the steps are sound; don't check if there are extra
; literals in the results.
;
; ------------------------------------------------ resolution

(defn complementary (p q)
  (or (and (equal (car p) 'not) (equal (cadr p) q))
      (and (equal (car q) 'not) (equal (cadr q) p))))

(defn check-resolve-2 (parent1 pos1 parent2 pos2 resolvent)
  (and (complementary (ith-member parent1 (car pos1))
		      (ith-member parent2 (car pos2)))
       (subset (remove-ith-member parent1 (car pos1)) resolvent)
       (subset (remove-ith-member parent2 (car pos2)) resolvent)))

(defn check-resolve (step checked)
  (check-resolve-2 (clause (assoc (par1-id step) checked)) (pos1 step)
		   (clause (assoc (par2-id step) checked)) (pos2 step)
		   (clause step)))

; ------------------------------------------------ merge
; The position doesn't make it any easier to check.

(defn check-merge-2 (parent position result)
  (subset parent result))

(defn check-merge (step checked)
  (check-merge-2 (clause (assoc (par1-id step) checked)) (pos1 step)
		 (clause step)))

; ------------------------------------------------ instantiation

(defn apply-list (x subst)  ; x is a list of terms.
  (if (nlistp x)
      x
      (cons (if (nlistp (car x))
                (if (assoc (car x) subst)
                    (cdr (assoc (car x) subst))
                    (car x))
                (cons (caar x) (apply-list (cdar x) subst)))
            (apply-list (cdr x) subst))))

(defn check-instance-2 (parent subst result)
  (equal (apply-list parent subst) result))

(defn check-instance (step checked)
  (check-instance-2 (clause (assoc (par1-id step) checked)) (substitution step)
		    (clause step)))

; ------------------------------------------------ paramodulation

(defn subterm-at-pos (x pos)  ; x is a list of terms.
  (if (nlistp x)
      0 ; won't hapen with valid pos.
      (if (equal (car pos) 1)
	  (if (nlistp (cdr pos))
	      (car x)
	      (subterm-at-pos (cdar x) (cdr pos)))
	  (subterm-at-pos (cdr x) (cons (sub1 (car pos)) (cdr pos))))))

(defn replace-at-pos (x pos replacement)  ; x is a list of terms.
  (if (nlistp x)
      0 ; won't hapen with valid pos.
      (if (equal (car pos) 1)
	  (cons (if (nlistp (cdr pos))
		    replacement
		    (cons (caar x)
			  (replace-at-pos (cdar x) (cdr pos) replacement)))
		(cdr x))
	  (cons (car x)
		(replace-at-pos (cdr x)
				(cons (sub1 (car pos)) (cdr pos))
				replacement)))))

(defn beta (clause pos)  ; replacement term
  (subterm-at-pos clause (list (car pos) (if (equal (cadr pos) 1) 2 1))))

(defn check-paramod-2 (from-cl from-pos into-cl into-pos para)
  (and (wf-eq-atom (ith-member from-cl (car from-pos)))
       (equal (subterm-at-pos into-cl into-pos)
	      (subterm-at-pos from-cl from-pos))
       (member (cons (car (ith-member into-cl (car into-pos))) 
	             (replace-at-pos (cdr (ith-member into-cl (car into-pos)))
				     (cdr into-pos)
				     (beta from-cl from-pos)))
	       para)
       (subset (remove-ith-member from-cl (car from-pos)) para)
       (subset (remove-ith-member into-cl (car into-pos)) para)))

(defn check-paramod (step checked)
  (check-paramod-2 (clause (assoc (par1-id step) checked))
		   (pos1 step)
		   (clause (assoc (par2-id step) checked))
		   (pos2 step)
		   (clause step)))

; ------------------------------------------------ flip

(defn check-flip-2 (parent pos eq-lit result)
  (and (if (equal (car eq-lit) 'not)
	   (and (wf-eq-atom (cadr eq-lit))
		(member (list 'not (list (caadr eq-lit)
					 (caddadr eq-lit)
					 (cadadr eq-lit))) result))
	   (and (wf-eq-atom eq-lit)
		(member (list (car eq-lit)
			      (caddr eq-lit)
			      (cadr eq-lit)) result)))
       (subset (remove-ith-member parent (car pos)) result)))
	   
(defn check-flip (step checked)
  (check-flip-2 (clause (assoc (par1-id step) checked))
		(pos1 step)
		(ith-member (clause (assoc (par1-id step) checked))
			    (car (pos1 step)))
		(clause step)))

; ------------------------------------------------ 

(defn check-step (step checked)
  (if (not (wf-step step))             f
  (if (equal (rule step) 'input)       t
  (if (equal (rule step) 'resolve)     (check-resolve  step checked)
  (if (equal (rule step) 'merge)       (check-merge    step checked)
  (if (equal (rule step) 'instantiate) (check-instance step checked)
  (if (equal (rule step) 'paramod)     (check-paramod  step checked)
  (if (equal (rule step) 'flip)        (check-flip     step checked)
      f))))))))

; ------------------------------------------------ 
;
; The proof is partitioned into CHECKED and NOT-CHECKED; the
; CHECKed part happens to get constructed backwards.
;
; This version returns the result for each step.

(defn check-proof-verbose (not-checked checked)
  (if (nlistp not-checked)
      nil
      (cons (cons (caar not-checked) (check-step (car not-checked) checked))
	    (check-proof-verbose (cdr not-checked)
		       (cons (car not-checked) checked)))))

; This version returns just T or F.

(defn check-proof (not-checked checked)
  (if (nlistp not-checked)
      t
      (and (check-step (car not-checked) checked)
	   (check-proof (cdr not-checked)
			(cons (car not-checked) checked)))))

; This version returns a list of the bad steps

(defn check-proof-errors (not-checked checked)
  (if (nlistp not-checked)
      nil
      (append (if (check-step (car not-checked) checked)
		  nil
		  (list (caar not-checked)))
	      (check-proof-errors (cdr not-checked)
				  (cons (car not-checked) checked)))))

; ------------------------------------------------ 
;
; Here are the top-level routines.  Pick one.
;

(defn check (proof)             ; return T or F
  (check-proof proof nil))

(defn check-verbose (proof)     ; return T or F for each step
  (check-proof-verbose proof nil))

(defn check-errors (proof)      ; return a list of the bad steps
  (list 'the-bad-steps-are
	(check-proof-errors proof nil)))

Received on Thursday, 20 January 2000 14:41:50 UTC