- From: Dave Beckett <dave.beckett@bristol.ac.uk>
- Date: Wed, 3 Nov 2004 20:55:11 +0000
- To: www-archive@w3.org
# Turtle approx in N3 # based on http://www.w3.org/2000/10/swap/grammar/n3.n3 # untested! # BNF without tokenization # @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>. @prefix bnf: <http://www.w3.org/2000/10/swap/grammar/bnf#>. @prefix rul: <http://www.w3.org/2000/10/swap/grammar/bnf-rules#>. @prefix : <http://www.w3.org/2000/10/swap/grammar/n3#>. @prefix n3: <http://www.w3.org/2000/10/swap/grammar/n3#>. @prefix list: <http://www.w3.org/2000/10/swap/list#>. @prefix string: <http://www.w3.org/2000/10/swap/string#>. @keywords a, is, of. # Issues: # - string token regexp not right # - tokenizing rules in general: whitespace # - encoding really needs specifying # - @keywords affects tokenizing # - Use of dot for ! # - comments (tokenizer deals with) # - We assume ASCII, in fact should use not notNameChars for i18n # tokenizing: # Absorb anything until end of regexp, then stil white space # period followed IMMEDIATELY by an opener or name char is taken as "!".# Except after a "." used instead of in those circumstances, # ws may be inserted between tokens. # WS MUST be inserted between tokens where ambiguity would arise. # (possible ending characters of one and beginning characters overlap) # <> bnf:syntaxFor [ bnf:internetMediaType <http://www.w3.org/2003/mediatypes#application/n3>]. # <> rdfsem:semanticsFor "" ..... # __________________________________________________________________ # # The N3 Full Grammar document a rul:Used; bnf:mustBeOneSequence( ( [ bnf:zeroOrMore declaration ] [ bnf:zeroOrMore universal ] [ bnf:zeroOrMore existential ] statements_optional bnf:eof ) ). statements_optional bnf:mustBeOneSequence (() ( statement "." statements_optional ) ). statementlist bnf:mustBeOneSequence ( ( ) ( statement statementtail ) ). statementtail bnf:mustBeOneSequence ( ( ) ( "." statementlist ) ). declaration bnf:mustBeOneSequence( ( "@prefix" qname explicituri "." ) ). statement bnf:mustBeOneSequence(( subject propertylist )). propertylist bnf:mustBeOneSequence ( ( ) ( verb object objecttail propertylisttail ) ). propertylisttail bnf:mustBeOneSequence ( ( ) # ( ";" ) ( ";" verb object objecttail propertylisttail ) ). objecttail bnf:mustBeOneSequence ( ( ) ( "," object objecttail ) ). verb bnf:mustBeOneSequence ( ( prop ) ( "@a" ) ). # prop cannot be an integerliteral, literal or blank prop bnf:mustBeOneSequence ( ( explicituri ) ( qname ) ( "[" propertylist "]" ) ( "(" pathlist ")" ) ). # subject cannot be an integerliteral or literal subject bnf:mustBeOneSequence ( ( explicituri ) ( qname ) ( bname ) ( "[" propertylist "]" ) ( "(" pathlist ")" ) ). object bnf:mustBeOneSequence ( ( explicituri ) ( qname ) ( bname ) ( integerliteral ) ( literal ) ( "[" propertylist "]" ) ( "(" pathlist ")" ) ). pathlist bnf:mustBeOneSequence (() (path pathlist)). symbol bnf:mustBeOneSequence ( (explicituri) (qname) ). literal bnf:mustBeOneSequence(( string dtlang)). dtlang bnf:mustBeOneSequence( () ("@" 1) ("^^" symbol)). #______________________________________________________________________ # # TERMINALS integerliteral bnf:matches """[0-9]+"""; bnf:canStartWith "0". explicituri bnf:matches "<[^>]*>"; bnf:canStartWith "<". qname bnf:matches "(([a-zA-Z][a-zA-Z0-9_]*)?:)?([a-zA-Z_][a-zA-Z0-9_]*)?"; bnf:canStartWith "a", "_", ":". # @@ etc unicode bname bnf:matches "_:([a-zA-Z_][a-zA-Z0-9_]*)?"; bnf:canStartWith "_". # Maybe dtlang should just be part of string regexp? # Whitespace is not allowed # was: "[a-zA-Z][a-zA-Z0-9]*(-[a-zA-Z0-9]+)?"; langcode bnf:matches "[a-z]+(-[a-z0-9]+)*"; # http://www.w3.org/TR/rdf-testcases/#language bnf:canStartWith "a". # raw regexp single quoted would be "([^"]|(\\"))*" # See: # $ PYTHONPATH=$SWAP python # >>> import tokenize # >>> import notation3 # >>> print notation3.stringToN3(tokenize.Double3) # "[^\"\\\\]*(?:(?:\\\\.|\"(?!\"\"))[^\"\\\\]*)*\"\"\"" # >>> print notation3.stringToN3(tokenize.Double) # "[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*\"" # After that we have to prefix with one or three opening \" which # the python regexp doesn't have them. # # string3 bnf:matches "\"\"\"[^\"\\\\]*(?:(?:\\\\.|\"(?!\"\"))[^\"\\\\]*)*\"\"\"".# string1 bnf:matches "\"[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*\"". string bnf:matches "(\"\"\"[^\"\\\\]*(?:(?:\\\\.|\"(?!\"\"))[^\"\\\\]*)*\"\"\")|(\"[^\"\\\ \]*(?:\\\\.[^\"\\\\]*)*\")"; bnf:canStartWith "\"". #____________________________________________________ # Axioms reducing the shortcut BNF terms to bnf:musBeOneSequence. { ?x bnf:zeroOrMore ?y } => {?x bnf:mustBeOneSequence ( () (?y ?x) ) }. { ?x bnf:commaSeparatedPeriodTerminatedListOf ?y } => { ?x bnf:mustBeOneSequence ( ( "." ) ( ?y [bnf:CSLTail ?y] ) ) }. { ?x bnf:CSLTail ?y } => { ?x bnf:mustBeOneSequence ( ( "." ) ( "," ?y ?x ) ) }. # labelling of things which do not have explicit URIs: { ?x bnf:zeroOrMore [ bnf:label ?y]. ( ?y "_s" ) string:concatenation ?str } => { ?x bnf:label ?str }. { ?x bnf:commaSeparatedPeriodTerminatedListOf [ bnf:label ?y]. ( ?y "_csl" ) string:concatenation ?str } => { ?x bnf:label ?str }. { ?x bnf:CSLTail [ bnf:label ?y]. ( ?y "_necsl" ) string:concatenation ?str } => { ?x bnf:label ?str }. #ends
Received on Wednesday, 3 November 2004 20:55:48 UTC