- From: Dave Beckett <dave.beckett@bristol.ac.uk>
- Date: Wed, 3 Nov 2004 20:55:11 +0000
- To: www-archive@w3.org
# Turtle approx in N3
# based on http://www.w3.org/2000/10/swap/grammar/n3.n3
# untested!
# BNF without tokenization
#
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>.
@prefix bnf: <http://www.w3.org/2000/10/swap/grammar/bnf#>.
@prefix rul: <http://www.w3.org/2000/10/swap/grammar/bnf-rules#>.
@prefix : <http://www.w3.org/2000/10/swap/grammar/n3#>.
@prefix n3: <http://www.w3.org/2000/10/swap/grammar/n3#>.
@prefix list: <http://www.w3.org/2000/10/swap/list#>.
@prefix string: <http://www.w3.org/2000/10/swap/string#>.
@keywords a, is, of.
# Issues:
# - string token regexp not right
# - tokenizing rules in general: whitespace
# - encoding really needs specifying
# - @keywords affects tokenizing
# - Use of dot for !
# - comments (tokenizer deals with)
# - We assume ASCII, in fact should use not notNameChars for i18n
# tokenizing:
# Absorb anything until end of regexp, then stil white space
# period followed IMMEDIATELY by an opener or name char is taken as
"!".# Except after a "." used instead of in those circumstances,
# ws may be inserted between tokens.
# WS MUST be inserted between tokens where ambiguity would arise.
# (possible ending characters of one and beginning characters overlap)
#
<> bnf:syntaxFor [ bnf:internetMediaType
<http://www.w3.org/2003/mediatypes#application/n3>].
# <> rdfsem:semanticsFor "" .....
# __________________________________________________________________
#
# The N3 Full Grammar
document a rul:Used;
bnf:mustBeOneSequence(
(
[ bnf:zeroOrMore declaration ]
[ bnf:zeroOrMore universal ]
[ bnf:zeroOrMore existential ]
statements_optional
bnf:eof
)
).
statements_optional bnf:mustBeOneSequence (() ( statement "."
statements_optional ) ).
statementlist bnf:mustBeOneSequence (
( )
( statement statementtail )
).
statementtail bnf:mustBeOneSequence (
( )
( "." statementlist )
).
declaration bnf:mustBeOneSequence(
( "@prefix" qname explicituri "." )
).
statement bnf:mustBeOneSequence(( subject propertylist )).
propertylist bnf:mustBeOneSequence (
( )
( verb object objecttail propertylisttail )
).
propertylisttail bnf:mustBeOneSequence (
( )
# ( ";" )
( ";" verb object objecttail propertylisttail )
).
objecttail bnf:mustBeOneSequence (
( )
( "," object objecttail )
).
verb bnf:mustBeOneSequence (
( prop )
( "@a" )
).
# prop cannot be an integerliteral, literal or blank
prop bnf:mustBeOneSequence (
( explicituri )
( qname )
( "[" propertylist "]" )
( "(" pathlist ")" )
).
# subject cannot be an integerliteral or literal
subject bnf:mustBeOneSequence (
( explicituri )
( qname )
( bname )
( "[" propertylist "]" )
( "(" pathlist ")" )
).
object bnf:mustBeOneSequence (
( explicituri )
( qname )
( bname )
( integerliteral )
( literal )
( "[" propertylist "]" )
( "(" pathlist ")" )
).
pathlist bnf:mustBeOneSequence (() (path pathlist)).
symbol bnf:mustBeOneSequence (
(explicituri)
(qname)
).
literal bnf:mustBeOneSequence(( string dtlang)).
dtlang bnf:mustBeOneSequence( () ("@" 1) ("^^" symbol)).
#______________________________________________________________________
#
# TERMINALS
integerliteral bnf:matches """[0-9]+""";
bnf:canStartWith "0".
explicituri bnf:matches "<[^>]*>";
bnf:canStartWith "<".
qname bnf:matches
"(([a-zA-Z][a-zA-Z0-9_]*)?:)?([a-zA-Z_][a-zA-Z0-9_]*)?";
bnf:canStartWith "a", "_", ":". # @@ etc unicode
bname bnf:matches "_:([a-zA-Z_][a-zA-Z0-9_]*)?";
bnf:canStartWith "_".
# Maybe dtlang should just be part of string regexp?
# Whitespace is not allowed
# was: "[a-zA-Z][a-zA-Z0-9]*(-[a-zA-Z0-9]+)?";
langcode bnf:matches "[a-z]+(-[a-z0-9]+)*"; #
http://www.w3.org/TR/rdf-testcases/#language
bnf:canStartWith "a".
# raw regexp single quoted would be "([^"]|(\\"))*"
# See:
# $ PYTHONPATH=$SWAP python
# >>> import tokenize
# >>> import notation3
# >>> print notation3.stringToN3(tokenize.Double3)
# "[^\"\\\\]*(?:(?:\\\\.|\"(?!\"\"))[^\"\\\\]*)*\"\"\""
# >>> print notation3.stringToN3(tokenize.Double)
# "[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*\""
# After that we have to prefix with one or three opening \" which
# the python regexp doesn't have them.
#
# string3 bnf:matches
"\"\"\"[^\"\\\\]*(?:(?:\\\\.|\"(?!\"\"))[^\"\\\\]*)*\"\"\"".# string1
bnf:matches "\"[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*\"".
string bnf:matches
"(\"\"\"[^\"\\\\]*(?:(?:\\\\.|\"(?!\"\"))[^\"\\\\]*)*\"\"\")|(\"[^\"\\\
\]*(?:\\\\.[^\"\\\\]*)*\")"; bnf:canStartWith "\"".
#____________________________________________________
# Axioms reducing the shortcut BNF terms to bnf:musBeOneSequence.
{ ?x bnf:zeroOrMore ?y } => {?x bnf:mustBeOneSequence ( () (?y ?x) ) }.
{ ?x bnf:commaSeparatedPeriodTerminatedListOf ?y } =>
{
?x bnf:mustBeOneSequence (
( "." )
( ?y [bnf:CSLTail ?y] )
)
}.
{ ?x bnf:CSLTail ?y } =>
{
?x bnf:mustBeOneSequence (
( "." )
( "," ?y ?x )
)
}.
# labelling of things which do not have explicit URIs:
{ ?x bnf:zeroOrMore [ bnf:label ?y].
( ?y "_s" ) string:concatenation ?str } => { ?x bnf:label ?str }.
{ ?x bnf:commaSeparatedPeriodTerminatedListOf [ bnf:label ?y].
( ?y "_csl" ) string:concatenation ?str } => { ?x bnf:label ?str }.
{ ?x bnf:CSLTail [ bnf:label ?y].
( ?y "_necsl" ) string:concatenation ?str } => { ?x bnf:label ?str }.
#ends
Received on Wednesday, 3 November 2004 20:55:48 UTC