- From: Dave Beckett <dave.beckett@bristol.ac.uk>
- Date: Thu, 6 Jan 2005 14:31:45 +0000
- To: "Sean B. Palmer" <sean+wa@infomesh.net>
- Cc: www-archive@w3.org
Thoughts so far as annotations to your code. I don't have python 2.4 and don't need it for 1 feature, sets that aren't really exercised here. Redland provides triple set by default anyway. vhash(term) is called way too many times, you need to cache the result Dave #!/usr/bin/env python """ Vanilla RDF Graph Isomorphism Tester Author: Sean B. Palmer, inamidst.com Uses the pyrple algorithm Usage: ./rdfdiff-vanilla.py <ntriplesP> <ntriplesQ> Requirements: Python2.4+ http://inamidst.com/proj/rdf/ntriples.py References: http://inamidst.com/proj/rdf/rdfdiff.py http://miscoranda.com/comments/129#id2005010405560004 """ # Built-in function # hash(object) # Return the hash value of the object (if it has one). Hash values # are integers. They are used to quickly compare dictionary keys # during a dictionary lookup. Numeric values that compare equal # have the same hash value (even if they are of different types, # as is the case for 1 and 1.0). import sys, re, urllib import ntriples from ntriples import bNode ntriples.r_uriref = re.compile(r'<([^\s"<>]+)>') class Graph(object): def __init__(self, uri): # FIXME 2.4 set # self.triples = set() # changed to 2.3 list self.triples = [] self.parse(uri) def parse(self, uri): """Read triples at uri into Graph""" class Sink(object): def triple(sink, s, p, o): # FIXME 2.4 set add operation # self.triples.add((s, p, o)) # changed to 2.3 list add self.triples.append((s, p, o)) p = ntriples.NTriplesParser(sink=Sink()) u = urllib.urlopen(uri) p.parse(u) u.close() def __hash__(self): """Make a graph hash based on the triples inside it""" # builds a sorted sequence of hashes of triples result = [] # set iteration for triple in self.triples: newTriple = [] # for each part of the triple, s, p, o for term in triple: if isinstance(term, bNode): # bnodes get a tuple equivalent to the use of the bnode # in the graph newTriple.append(self.vhash(term)) else: # named items add themselves newTriple.append(term) # newTriple is redundant as it is immediately turned into an # integer hash. Should just make a content digest (say md5) # instead of newTriple.append() tripleHash = hash(tuple(newTriple)) result.append(tripleHash) # sort sequence by hash integer order # O(#triples) result.sort() # which immediately is used to make an integer h=hash(tuple(result)) print "__hash__ result: '"+str(h)+"'" return h def vhash(self, term, done=False): """Return a tuple () for a blank node s,p,o term equivalent to the use of the blank node in the graph. This is called multiple times for the same term in every triple. It is a very expensive operation and the result MUST BE CACHED but isn't so far. It could just return a content digest that was equiv to the tuple's contents as the return value is only ever used as to form a content digest via hash(). """ result = [] # set iteration for triple in self.triples: # if term is s, p or o in triple if term in triple: # for each part of the triple, pos 1(s),2(p),3(o) for pos in xrange(3): if not isinstance(triple[pos], bNode): # non-bnode, add the triple term result.append(triple[pos]) elif done or (triple[pos] == term): # no recursion or match, add the position result.append(pos) else: # else recurse and add the vhash of this triple term # which is itself a tuple result.append(self.vhash(triple[pos], done=True)) # sort sequence by hash integer order # O(#triples containing term) result.sort() v=tuple(result) print "vhash '"+term+"' result: '"+str(v)+"'" return v def compare(p, q): return hash(Graph(p)) == hash(Graph(q)) def main(): result = compare(sys.argv[1], sys.argv[2]) print ('no', 'yes')[result] if __name__=="__main__": main()
Received on Thursday, 6 January 2005 14:34:51 UTC