Approximate turtle in BNF n3 from Dave Beckett on 2004-11-03 (www-archive@w3.org from November 2004)

From: Dave Beckett <dave.beckett@bristol.ac.uk>
Date: Wed, 3 Nov 2004 20:55:11 +0000
To: www-archive@w3.org
Message-ID: <20041103205511.66640d53@hedwig.dajobe.org>
# Turtle approx in N3
# based on http://www.w3.org/2000/10/swap/grammar/n3.n3
# untested!

# BNF without tokenization
#
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>.
@prefix bnf: <http://www.w3.org/2000/10/swap/grammar/bnf#>.
@prefix rul: <http://www.w3.org/2000/10/swap/grammar/bnf-rules#>.
@prefix : <http://www.w3.org/2000/10/swap/grammar/n3#>.
@prefix n3: <http://www.w3.org/2000/10/swap/grammar/n3#>.
@prefix list: <http://www.w3.org/2000/10/swap/list#>.
@prefix string: <http://www.w3.org/2000/10/swap/string#>.
@keywords a, is, of.


# Issues:
# - string token regexp not right
# - tokenizing rules in general: whitespace
# - encoding really needs specifying
# - @keywords affects tokenizing
# - Use of dot for !
# - comments (tokenizer deals with)
# - We assume ASCII, in fact should use not notNameChars for i18n

# tokenizing:
# Absorb anything until end of regexp, then stil white space
#  period followed IMMEDIATELY by an opener or name char is taken as
"!".#  Except after a "." used instead of in those circumstances,
#	ws may be inserted between tokens.
#  WS MUST be inserted between tokens where ambiguity would arise.
#  (possible ending characters of one and beginning characters overlap)
#

<> bnf:syntaxFor [ bnf:internetMediaType 
		<http://www.w3.org/2003/mediatypes#application/n3>].

# <> rdfsem:semanticsFor ""   .....

# __________________________________________________________________
#
# The N3 Full Grammar

document a rul:Used;
	bnf:mustBeOneSequence(
	
		(
			[ bnf:zeroOrMore declaration ]
			[ bnf:zeroOrMore universal ]
			[ bnf:zeroOrMore existential ]
			statements_optional
			bnf:eof
		)
	).

statements_optional bnf:mustBeOneSequence (() ( statement "."
statements_optional ) ).

statementlist bnf:mustBeOneSequence (
		( )
		( statement statementtail )
	).

statementtail bnf:mustBeOneSequence (
		( )
		( "." statementlist )
	).

declaration bnf:mustBeOneSequence(
		( "@prefix" qname explicituri "." )
	).


statement bnf:mustBeOneSequence(( subject propertylist )).

propertylist bnf:mustBeOneSequence (
		( )
		( verb  object objecttail propertylisttail )
	).

propertylisttail bnf:mustBeOneSequence (
		( )
#		( ";" )
		( ";" verb object objecttail propertylisttail )
	).


objecttail bnf:mustBeOneSequence (
		( )
		( ","   object objecttail )
	).


verb bnf:mustBeOneSequence (
		( prop )
		( "@a" )
	).

# prop cannot be an integerliteral, literal or blank
prop bnf:mustBeOneSequence (
		( explicituri )
		( qname )
		( "[" propertylist "]"  )
		(  "("  pathlist ")"  )
).

# subject cannot be an integerliteral or literal
subject bnf:mustBeOneSequence (
		( explicituri )
		( qname )
		( bname )
		( "[" propertylist "]"  )
		(  "("  pathlist ")"  )
).

object bnf:mustBeOneSequence (
		( explicituri )
		( qname )
		( bname )
		( integerliteral )
		( literal )
		( "[" propertylist "]"  )
		(  "("  pathlist ")"  )
).


pathlist bnf:mustBeOneSequence (() (path pathlist)).

symbol bnf:mustBeOneSequence (
		(explicituri)
		(qname)
	).


literal bnf:mustBeOneSequence(( string dtlang)).

dtlang bnf:mustBeOneSequence(  ()  ("@" 1)  ("^^" symbol)).


#______________________________________________________________________
#
#   TERMINALS

integerliteral	bnf:matches	"""[0-9]+""";
		bnf:canStartWith 	"0".

explicituri 	bnf:matches 	"<[^>]*>";
		bnf:canStartWith 	"<".

qname 		bnf:matches  
"(([a-zA-Z][a-zA-Z0-9_]*)?:)?([a-zA-Z_][a-zA-Z0-9_]*)?";	
bnf:canStartWith 	"a", "_", ":".  # @@ etc unicode

bname 		bnf:matches  	"_:([a-zA-Z_][a-zA-Z0-9_]*)?";
		bnf:canStartWith 	"_".

# Maybe dtlang should just be part of string regexp?
# Whitespace is not allowed

# was: "[a-zA-Z][a-zA-Z0-9]*(-[a-zA-Z0-9]+)?";
langcode	bnf:matches  	"[a-z]+(-[a-z0-9]+)*"; #
http://www.w3.org/TR/rdf-testcases/#language
		bnf:canStartWith 	"a".


#               raw regexp single quoted would be   "([^"]|(\\"))*"
# See:
# 	$ PYTHONPATH=$SWAP python
# 	>>> import tokenize 
# 	>>> import notation3
# 	>>> print notation3.stringToN3(tokenize.Double3)
# 	"[^\"\\\\]*(?:(?:\\\\.|\"(?!\"\"))[^\"\\\\]*)*\"\"\""
# 	>>> print notation3.stringToN3(tokenize.Double)
# 	"[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*\""
# After that we have to prefix with one or three opening \"  which
# the python regexp doesn't have them.
#
# string3		bnf:matches	
"\"\"\"[^\"\\\\]*(?:(?:\\\\.|\"(?!\"\"))[^\"\\\\]*)*\"\"\"".# string1	
bnf:matches		"\"[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*\"".

string		bnf:matches	
"(\"\"\"[^\"\\\\]*(?:(?:\\\\.|\"(?!\"\"))[^\"\\\\]*)*\"\"\")|(\"[^\"\\\
\]*(?:\\\\.[^\"\\\\]*)*\")";		bnf:canStartWith 	"\"".

#____________________________________________________

#  Axioms reducing the shortcut BNF terms to bnf:musBeOneSequence.

{ ?x bnf:zeroOrMore ?y } => {?x bnf:mustBeOneSequence ( () (?y ?x) ) }.


{ ?x bnf:commaSeparatedPeriodTerminatedListOf ?y } =>
{
	?x bnf:mustBeOneSequence (
		( "." )
		( ?y [bnf:CSLTail ?y]  )
	)
}.

{ ?x bnf:CSLTail ?y } =>
{
	?x bnf:mustBeOneSequence (
		( "." )
		( "," ?y ?x )
	)
}.


#  labelling of things which do not have explicit URIs:

{ ?x bnf:zeroOrMore [ bnf:label ?y].
	( ?y "_s" ) string:concatenation ?str } => { ?x bnf:label ?str }.

{ ?x bnf:commaSeparatedPeriodTerminatedListOf [ bnf:label ?y].
	( ?y "_csl" ) string:concatenation ?str } => { ?x bnf:label ?str }.

{ ?x bnf:CSLTail [ bnf:label ?y].
	( ?y "_necsl" ) string:concatenation ?str } => { ?x bnf:label ?str }.


#ends
Received on Wednesday, 3 November 2004 20:55:48 UTC