- From: Sean B. Palmer <sean+wa@infomesh.net>
- Date: Sat, 08 Jan 2005 23:16:35 +0000
- To: Dave Beckett <dave.beckett@bristol.ac.uk>
- CC: www-archive@w3.org
Sean B. Palmer wrote: > tripleHash = md5.new(str(self.vhash(subj))) This should've been vhashmemo, and necessitated a change to the memoizer as tuples cannot have weakrefs. New code: [[[ [...] import sys, re, md5, urllib import ntriples from ntriples import bNode ntriples.r_uriref = re.compile(r'<([^\s"<>]+)>') class Graph(object): def __init__(self, uri): self.triples = {} self.cache = {} self.parse(uri) def parse(self, uri): class Sink(object): def triple(sink, s, p, o): self.triples[(s, p, o)] = True p = ntriples.NTriplesParser(sink=Sink()) u = urllib.urlopen(uri) p.parse(u) u.close() def __hash__(self): result = [] for (subj, pred, objt) in self.triples.iterkeys(): if isinstance(subj, bNode): tripleHash = md5.new(str(self.vhashmemo(subj))) else: tripleHash = md5.new(subj) for term in (pred, objt): if isinstance(term, bNode): tripleHash.update(str(self.vhashmemo(term))) else: tripleHash.update(term) result.append(tripleHash.digest()) result.sort() return hash(tuple(result)) def vhashmemo(self, term, done=False): if self.cache.has_key((term, done)): return self.cache[(term, done)] result = self.vhash(term, done=done) self.cache[(term, done)] = result return result def vhash(self, term, done=False): result = [] for triple in self.triples: if term in triple: for pos in xrange(3): if not isinstance(triple[pos], bNode): result.append(triple[pos]) elif done or (triple[pos] == term): result.append(pos) else: result.append(self.vhash(triple[pos], done=True)) result.sort() return tuple(result) def compare(p, q): return hash(Graph(p)) == hash(Graph(q)) def main(): result = compare(sys.argv[1], sys.argv[2]) print ('no', 'yes')[result] if __name__=="__main__": main() ]]] - http://inamidst.com/proj/rdf/rdfdiff-vanilla.py -- Sean B. Palmer, http://inamidst.com/sbp/
Received on Saturday, 8 January 2005 23:17:11 UTC