- From: Dave Beckett <dave.beckett@bristol.ac.uk>
- Date: Thu, 6 Jan 2005 14:31:45 +0000
- To: "Sean B. Palmer" <sean+wa@infomesh.net>
- Cc: www-archive@w3.org
Thoughts so far as annotations to your code. I don't have python 2.4
and don't need it for 1 feature, sets that aren't really exercised
here. Redland provides triple set by default anyway.
vhash(term) is called way too many times, you need to cache the result
Dave
#!/usr/bin/env python
"""
Vanilla RDF Graph Isomorphism Tester
Author: Sean B. Palmer, inamidst.com
Uses the pyrple algorithm
Usage: ./rdfdiff-vanilla.py <ntriplesP> <ntriplesQ>
Requirements:
Python2.4+
http://inamidst.com/proj/rdf/ntriples.py
References:
http://inamidst.com/proj/rdf/rdfdiff.py
http://miscoranda.com/comments/129#id2005010405560004
"""
# Built-in function
# hash(object)
# Return the hash value of the object (if it has one). Hash values
# are integers. They are used to quickly compare dictionary keys
# during a dictionary lookup. Numeric values that compare equal
# have the same hash value (even if they are of different types,
# as is the case for 1 and 1.0).
import sys, re, urllib
import ntriples
from ntriples import bNode
ntriples.r_uriref = re.compile(r'<([^\s"<>]+)>')
class Graph(object):
def __init__(self, uri):
# FIXME 2.4 set
# self.triples = set()
# changed to 2.3 list
self.triples = []
self.parse(uri)
def parse(self, uri):
"""Read triples at uri into Graph"""
class Sink(object):
def triple(sink, s, p, o):
# FIXME 2.4 set add operation
# self.triples.add((s, p, o))
# changed to 2.3 list add
self.triples.append((s, p, o))
p = ntriples.NTriplesParser(sink=Sink())
u = urllib.urlopen(uri)
p.parse(u)
u.close()
def __hash__(self):
"""Make a graph hash based on the triples inside it"""
# builds a sorted sequence of hashes of triples
result = []
# set iteration
for triple in self.triples:
newTriple = []
# for each part of the triple, s, p, o
for term in triple:
if isinstance(term, bNode):
# bnodes get a tuple equivalent to the use of the bnode
# in the graph
newTriple.append(self.vhash(term))
else:
# named items add themselves
newTriple.append(term)
# newTriple is redundant as it is immediately turned into an
# integer hash. Should just make a content digest (say md5)
# instead of newTriple.append()
tripleHash = hash(tuple(newTriple))
result.append(tripleHash)
# sort sequence by hash integer order
# O(#triples)
result.sort()
# which immediately is used to make an integer
h=hash(tuple(result))
print "__hash__ result: '"+str(h)+"'"
return h
def vhash(self, term, done=False):
"""Return a tuple () for a blank node s,p,o term
equivalent to the use of the blank node in the graph.
This is called multiple times for the same term in every triple.
It is a very expensive operation and the result MUST BE CACHED
but isn't so far.
It could just return a content digest that was equiv to the
tuple's contents as the return value is only ever used as
to form a content digest via hash().
"""
result = []
# set iteration
for triple in self.triples:
# if term is s, p or o in triple
if term in triple:
# for each part of the triple, pos 1(s),2(p),3(o)
for pos in xrange(3):
if not isinstance(triple[pos], bNode):
# non-bnode, add the triple term
result.append(triple[pos])
elif done or (triple[pos] == term):
# no recursion or match, add the position
result.append(pos)
else:
# else recurse and add the vhash of this triple term
# which is itself a tuple
result.append(self.vhash(triple[pos], done=True))
# sort sequence by hash integer order
# O(#triples containing term)
result.sort()
v=tuple(result)
print "vhash '"+term+"' result: '"+str(v)+"'"
return v
def compare(p, q):
return hash(Graph(p)) == hash(Graph(q))
def main():
result = compare(sys.argv[1], sys.argv[2])
print ('no', 'yes')[result]
if __name__=="__main__":
main()
Received on Thursday, 6 January 2005 14:34:51 UTC