- From: Graham Klyne <GK-lists@ninebynine.org>
- Date: Tue, 22 Feb 2005 11:09:27 +0000
- To: Dave Beckett <dave.beckett@bristol.ac.uk>, public-cwm-talk@w3.org
At 17:02 21/02/05 +0000, Dave Beckett wrote:
>In terms of changes, I want to add """triple quotes""" but haven't had
>the time to get into the detail of what's allowed inside according to
>the specs, and what various implementations do. I'm prepared to leave
>that until after a first note pub.
>
>Thoughts/comments?
FWIW, here is the triple-quote string parser from the Notation3 parser in
my Swish implementation. I don't recall if this came from any formal spec
but I think it reasonably matches a range of actual data. Within the
string, it allows:
stringLetter (non-control, non-", non-\)
escape (\c, \uxxxx, \Uxxxxxxxx)
" followed by non-"
"" followed by non-"
newline (\n)
[[
-- Triple-quoted string -- may include line breaks, '"' or '""'.
tripleQuoteString :: N3Parser String
tripleQuoteString =
lexeme
( do { str <- between (try $ string "\"\"\"")
(string "\"\"\"" <?> "end of string (\"\"\")")
(many tripleQuoteSubstring)
; return (foldr (++) "" str)
}
<?> "triple-quoted literal string" )
-- Match non-quote substring or one or two quote characters
tripleQuoteSubstring :: N3Parser String
tripleQuoteSubstring =
tripleQuoteSubstring1
<|> try sqTripleQuoteSubstring1
<|> try dqTripleQuoteSubstring1
dqTripleQuoteSubstring1 =
do { string "\"\""
; s <- tripleQuoteSubstring1
; return $ "\"\""++s
}
sqTripleQuoteSubstring1 =
do { char '"'
; s <- tripleQuoteSubstring1
; return $ "\""++s
}
-- match at least one non-quote character in a triple-quoted string
tripleQuoteSubstring1 :: N3Parser String
tripleQuoteSubstring1 =
do { str <- many1 tripleQuoteStringChar
; return $ foldr (maybe id (:)) "" str
}
tripleQuoteStringChar :: CharParser st (Maybe Char)
tripleQuoteStringChar =
stringChar
<|> do { (string "\n")
; return $ Just '\n'
}
stringChar :: CharParser st (Maybe Char)
stringChar =
do { c <- stringLetter
; return $ Just c
}
<|> stringEscape
<?> "string character"
stringLetter = satisfy (\c -> (c /= '"') && (c /= '\\') && (c >= '\032'))
stringEscape =
do { char '\\'
; do { esc <- escapeCode; return (Just esc) }
}
-- escape codes
escapeCode = charEsc <|> charUCS2 <|> charUCS4 <?> "escape code"
-- \c
charEsc = choice (map parseEsc escMap)
where
parseEsc (c,code) = do { char c; return code }
escMap = zip ("nrt\\\"\'") ("\n\r\t\\\"\'")
-- \uhhhh
charUCS2 =
do { char 'u'
; n <- numberFW 16 hexDigit 4 0
; return $ chr n
}
-- \Uhhhhhhhh
charUCS4 =
do { char 'U'
; n <- numberFW 16 hexDigit 8 0
; return $ chr n
}
-- parse fixed-width number:
numberFW :: Int -> CharParser st Char -> Int -> Int -> CharParser st Int
numberFW base baseDigit 0 val = return val
numberFW base baseDigit width val =
do { d <- baseDigit
; numberFW base baseDigit (width-1) ((val*base) + (digitToInt d))
}
]]
#g
------------
Graham Klyne
For email:
http://www.ninebynine.org/#Contact
Received on Tuesday, 22 February 2005 12:26:25 UTC