# Re: Redland Isomorphism Tool?

From: Dave Beckett <dave.beckett@bristol.ac.uk>
Date: Thu, 6 Jan 2005 14:31:45 +0000
To: "Sean B. Palmer" <sean+wa@infomesh.net>

Message-ID: <20050106143145.73e97ee3.dave.beckett@bristol.ac.uk>
```
Thoughts so far as annotations to your code.  I don't have python 2.4
and don't need it for 1 feature, sets that aren't really exercised
here.  Redland provides triple set by default anyway.

vhash(term) is called way too many times, you need to cache the result

Dave

#!/usr/bin/env python
"""
Vanilla RDF Graph Isomorphism Tester
Author: Sean B. Palmer, inamidst.com
Uses the pyrple algorithm
Usage: ./rdfdiff-vanilla.py <ntriplesP> <ntriplesQ>
Requirements:
Python2.4+
http://inamidst.com/proj/rdf/ntriples.py
References:
http://inamidst.com/proj/rdf/rdfdiff.py
"""

# Built-in function
# hash(object)
#    Return the hash value of the object (if it has one). Hash values
#    are integers. They are used to quickly compare dictionary keys
#    during a dictionary lookup. Numeric values that compare equal
#    have the same hash value (even if they are of different types,
#    as is the case for 1 and 1.0).

import sys, re, urllib
import ntriples
from ntriples import bNode

ntriples.r_uriref = re.compile(r'<([^\s"<>]+)>')

class Graph(object):
def __init__(self, uri):
# FIXME 2.4 set
# self.triples = set()
# changed to 2.3 list
self.triples = []
self.parse(uri)

def parse(self, uri):
"""Read triples at uri into Graph"""
class Sink(object):
def triple(sink, s, p, o):
# FIXME 2.4 set add operation
# changed to 2.3 list add
self.triples.append((s, p, o))

p = ntriples.NTriplesParser(sink=Sink())
u = urllib.urlopen(uri)
p.parse(u)
u.close()

def __hash__(self):
"""Make a graph hash based on the triples inside it"""

# builds a sorted sequence of hashes of triples
result = []

# set iteration
for triple in self.triples:
newTriple = []
# for each part of the triple, s, p, o
for term in triple:
if isinstance(term, bNode):
# bnodes get a tuple equivalent to the use of the bnode
# in the graph
newTriple.append(self.vhash(term))
else:
newTriple.append(term)

# newTriple is redundant as it is immediately turned into an
# integer hash.  Should just make a content digest (say md5)
tripleHash = hash(tuple(newTriple))
result.append(tripleHash)

# sort sequence by hash integer order
# O(#triples)
result.sort()

# which immediately is used to make an integer
h=hash(tuple(result))
print "__hash__ result: '"+str(h)+"'"
return h

def vhash(self, term, done=False):
"""Return a tuple () for a blank node s,p,o term
equivalent to the use of the blank node in the graph.

This is called multiple times for the same term in every triple.
It is a very expensive operation and the result MUST BE CACHED
but isn't so far.

It could just return a content digest that was equiv to the
tuple's contents as the return value is only ever used as
to form a content digest via hash().
"""
result = []

# set iteration
for triple in self.triples:
# if term is s, p or o in triple
if term in triple:
# for each part of the triple, pos 1(s),2(p),3(o)
for pos in xrange(3):
if not isinstance(triple[pos], bNode):
# non-bnode, add the triple term
result.append(triple[pos])
elif done or (triple[pos] == term):
# no recursion or match, add the position
result.append(pos)
else:
# else recurse and add the vhash of this triple term
# which is itself a tuple
result.append(self.vhash(triple[pos], done=True))

# sort sequence by hash integer order
# O(#triples containing term)
result.sort()

v=tuple(result)

print "vhash '"+term+"' result: '"+str(v)+"'"
return v

def compare(p, q):
return hash(Graph(p)) == hash(Graph(q))

def main():
result = compare(sys.argv[1], sys.argv[2])
print ('no', 'yes')[result]

if __name__=="__main__":
main()
```
Received on Thursday, 6 January 2005 14:34:51 GMT

This archive was generated by hypermail 2.2.0+W3C-0.50 : Wednesday, 7 November 2012 14:17:50 GMT