- From: Yosi Scharf <syosi@mit.edu>
- Date: Thu, 20 Jan 2005 18:56:20 -0500
- To: "Sean B. Palmer" <sean+cwm@infomesh.net>
- Cc: public-cwm-talk@w3.org
- Message-ID: <41F04524.10907@mit.edu>
I have been working on the last couple of days at getting cwm to use n3p.py as its n3 parser. This neccessitated the following changes to n3p.py: - the regexp for whitespace was fixed so a comment could end the file, with no trailing newline - I had to create a way for a N3Parser to be created where it would just accept a string, instead of trying to open the n3 file itself - On larger files (notably http://www.w3.org/2000/10/swap/test/rdfcore-tests.n3 , it was running into the recursion limit. I made it no longer recursive. - "has" was added to the initial list of known keywords attached is the diff. It is less clean than it could be, because I was running into indent spacing issues. The way to invode N3Parser without it opening anything is an ugly hack. Strangely enough, the modifications to parse() actually work, as far as I can tell. Yosi Scharf
40c40
< r_whitespace = re.compile(r'[ \t\r\n]*(?:(?:#[^\n]*)?\r?\n)?')
---
> r_whitespace = re.compile(r'[ \t\r\n]*(?:(?:#[^\n]*)?\r?(?:$|\n))?')
51,56c51,59
< def __init__(self, uri, branches, regexps):
< if (uri != 'file:///dev/stdin'):
< u = urllib.urlopen(uri)
< self.data = u.read()
< u.close()
< else: self.data = sys.stdin.read()
---
> def __init__(self, uri, branches, regexps):
> if uri == 'nowhere':
> pass
> else:
> if (uri != 'file:///dev/stdin'):
> u = urllib.urlopen(uri)
> self.data = u.read()
> u.close()
> else: self.data = sys.stdin.read()
61c64
< self.keywords = set(("a", "is", "of", "this"))
---
> self.keywords = set(("a", "is", "of", "this", "has"))
65,102c68,116
< def parse(self, prod):
< tok = self.token()
< # Got an opened production
< self.onStart(abbr(prod))
< if not tok:
< return tok # EOF
<
< prodBranch = self.branches[prod]
< sequence = prodBranch.get(tok, None)
< if sequence is None:
< print >> sys.stderr, 'prodBranch', prodBranch
< raise Exception("Found %s when expecting a %s" % (tok, prod))
< for term in sequence:
< if isinstance(term, unicode):
< j = self.pos + len(term)
< word = self.data[self.pos:j]
< if word == term:
< self.onToken(term, word)
< self.pos = j
< elif '@' + word[:-1] == term:
< self.onToken(term, word[:-1])
< self.pos = j - 1
< else: raise Exception("Found %s; %s expected" % \
< (self.data[self.pos:self.pos+10], term))
< elif not self.regexps.has_key(term):
< self.parse(term)
< continue
< else:
< regexp = self.regexps[term]
< m = regexp.match(self.data, self.pos)
< if not m:
< raise Exception("Token: %r should match %s" % \
< (self.data[self.pos:self.pos+10], regexp.pattern))
< end = m.end()
< self.onToken(abbr(term), self.data[self.pos:end])
< self.pos = end
< self.token()
< self.onFinish()
---
> def parse(self, prod):
> todo_stack = [[prod, None]]
> while todo_stack:
> #print todo_stack
> #prod = todo_stack.pop()
> if todo_stack[-1][1] is None:
> todo_stack[-1][1] = []
> tok = self.token()
> # Got an opened production
> self.onStart(abbr(todo_stack[-1][0]))
> if not tok:
> return tok # EOF
>
> prodBranch = self.branches[todo_stack[-1][0]]
> sequence = prodBranch.get(tok, None)
> if sequence is None:
> print >> sys.stderr, 'prodBranch', prodBranch
> raise Exception("Found %s when expecting a %s . todo_stack=%s" % (tok, todo_stack[-1][0], `todo_stack`))
> for term in sequence:
> todo_stack[-1][1].append(term)
> while todo_stack[-1][1]:
> term = todo_stack[-1][1].pop(0)
> if isinstance(term, unicode):
> j = self.pos + len(term)
> word = self.data[self.pos:j]
> if word == term:
> self.onToken(term, word)
> self.pos = j
> elif '@' + word[:-1] == term:
> self.onToken(term, word[:-1])
> self.pos = j - 1
> else: raise Exception("Found %s; %s expected" % \
> (self.data[self.pos:self.pos+10], term))
> elif not self.regexps.has_key(term):
> todo_stack.append([term, None])
> continue
> else:
> regexp = self.regexps[term]
> m = regexp.match(self.data, self.pos)
> if not m:
> raise Exception("Token: %r should match %s" % \
> (self.data[self.pos:self.pos+10], regexp.pattern))
> end = m.end()
> self.onToken(abbr(term), self.data[self.pos:end])
> self.pos = end
> self.token()
> while todo_stack[-1][1] == []:
> todo_stack.pop()
> self.onFinish()
Received on Thursday, 20 January 2005 23:56:55 UTC