#!/usr/bin/python """""" import re, string, random S, P, O = 0, 1, 2 class Article: def __init__(self, s): s, self.r = str(s), str(s) if s[0] == '"': self.v, self.t = s[1:-1], 'Literal' elif s[0] == '?': self.v, self.t = s[1:], 'Univar' elif s[0] == '_': self.v, self.t = s[2:], 'Exivar' elif s[0] == '<': self.v, self.t = s[1:-1], 'URI' else: self.v, self.t, self.r = s, 'URI', '<%s>' % s def __repr__(self): return self.r def set(self, s): self.__init__(s) def parse(s, t=[]): if len(s) == 0: raise 'Document has no content' rc, a = re.compile(r'(\#[^\n]*)'), r'(<.*?>|_:\S+|\?\S+|"(?:\\"|[^"])*")[ \t]' rw, rt, y = re.compile(r'[ \t]+'), r'[ \t]*%s%s%s*.[ \t]*' % (a, a, a), t[:] for line in s.replace('\r\n', '\n').replace('\r', '\n').split('\n'): if re.compile(rt).match(line): x = re.compile(rt).findall(line)[0] y.append([Article(x[0]), Article(x[1]), Article(x[2])]) elif rc.match(line) or rw.match(line) or (len(line) == 0): continue else: raise 'Line is invalid', line return y def query(q, triples, r=[]): result = r[:] for t in triples: if (((q[0].t == 'Univar') or (t[0].r == q[0].r)) and ((q[1].t == 'Univar') or (t[1].r == q[1].r)) and ((q[2].t == 'Univar') or (t[2].r == q[2].r))): result.append(t) return result def nquery(s, x): return query(parse(s)[0], x) def u(uri): return '<'+uri+'>' def b(label=''): if label == '': # 208,827,064,576 combinations for x in range(8): label += string.lowercase[random.randrange(0, 25)] return '_:'+label else: return '_:'+label def l(s): return '"'+s+'"' def serialize(triples): result = '' for triple in triples: result += '%s %s %s .\n' % (triple[S], triple[P], triple[O]) return result.rstrip() def nt2xmlrdf(ntriples): return serializeXML(parse(ntriples)) def serializeXML(triples): xrdf = '\n' for t in triples: for x in (S, P, O): if '#' in t[x].v: term, txmlns = t[x].v, t[x].v.split('#')[0]+'#' tname = t[x].v.split('#')[1] else: term, txmlns, tname = t[x].v, t[x].v[:-1], t[x].v[-1] if x == S and t[x].t == 'URI': xrdf += '\n' elif x == S and t[x].t == 'Exivar': xrdf += '\n' elif x == S and t[x].t == 'Literal': # @@ encoding xrdf += '\n' elif x == P and t[x].t == 'URI': xrdf += ' <'+tname+' xmlns="'+txmlns+'"' blargh = tname[:] elif x == P and t[x].t == 'Exivar': sys.stderr.write('Kaboom!') # i.e. not implementable xrdf += ' <'+term+' xmlns="anon:_" \n' # A new URI Scheme... elif x == P and t[x].t == 'Literal': raise 'Literal as predicate' elif x == O and t[x].t == 'URI': xrdf += '\n rdf:resource="'+term+'"/>\n' elif x == O and t[x].t == 'Exivar': xrdf += '\n rdf:resource="anon:_'+term+'"/>\n' elif x == O and t[x].t == 'Literal': # This is a hack if blargh: xrdf += '>'+term+'\n' else: xrdf += '\n rdf:resource="data:,'+term+'"/>\n' xrdf += '\n' return xrdf + '' class Namespace: def __init__(self, ns='', sep=''): self.ns, self.sep = ns, sep def __getattr__(self, name): return self.ns+self.sep+name def test(): print serialize(parse('

_:q ?r .\n "x"\t"y" <#z> . ')) if __name__=="__main__": print __doc__