- From: Bjoern Hoehrmann <derhoermi@gmx.net>
- Date: Thu, 12 Jan 2006 14:22:53 +0100
- To: public-iri@w3.org
Hi,
I made the following regular expression for the IRI token as defined
in RFC 3987 but I think I made a mistake? Who can spot it? (You have to
remove all white-space to make it a legal expression of course).
[A-Za-z][-+.0-9A-Za-z]*:(?:/(?:(?:%[0-9A-F]{2}|[^\x00-\x20"#%/<>?[-
^`{-}\x7F-\x9F\x{D800}-\x{F8FF}\x{FDD0}-\x{FDEF}\x{FFF0}-\x{FFFF}\x
{1FFFE}\x{1FFFF}\x{2FFFE}\x{2FFFF}\x{3FFFE}\x{3FFFF}\x{4FFFE}\x{4FF
FF}\x{5FFFE}\x{5FFFF}\x{6FFFE}\x{6FFFF}\x{7FFFE}\x{7FFFF}\x{8FFFE}\
x{8FFFF}\x{9FFFE}\x{9FFFF}\x{AFFFE}\x{AFFFF}\x{BFFFE}\x{BFFFF}\x{CF
FFE}\x{CFFFF}\x{DFFFE}-\x{E0FFF}\x{EFFFE}-\x{10FFFF}])+(?:/(?:%[0-9
A-F]{2}|[^\x00-\x20"#%/<>?[-^`{-}\x7F-\x9F\x{D800}-\x{F8FF}\x{FDD0}
-\x{FDEF}\x{FFF0}-\x{FFFF}\x{1FFFE}\x{1FFFF}\x{2FFFE}\x{2FFFF}\x{3F
FFE}\x{3FFFF}\x{4FFFE}\x{4FFFF}\x{5FFFE}\x{5FFFF}\x{6FFFE}\x{6FFFF}
\x{7FFFE}\x{7FFFF}\x{8FFFE}\x{8FFFF}\x{9FFFE}\x{9FFFF}\x{AFFFE}\x{A
FFFF}\x{BFFFE}\x{BFFFF}\x{CFFFE}\x{CFFFF}\x{DFFFE}-\x{E0FFF}\x{EFFF
E}-\x{10FFFF}])*)*)?|(?:%[0-9A-F]{2}|[^\x00-\x20"#%/<>?[-^`{-}\x7F-
\x9F\x{D800}-\x{F8FF}\x{FDD0}-\x{FDEF}\x{FFF0}-\x{FFFF}\x{1FFFE}\x{
1FFFF}\x{2FFFE}\x{2FFFF}\x{3FFFE}\x{3FFFF}\x{4FFFE}\x{4FFFF}\x{5FFF
E}\x{5FFFF}\x{6FFFE}\x{6FFFF}\x{7FFFE}\x{7FFFF}\x{8FFFE}\x{8FFFF}\x
{9FFFE}\x{9FFFF}\x{AFFFE}\x{AFFFF}\x{BFFFE}\x{BFFFF}\x{CFFFE}\x{CFF
FF}\x{DFFFE}-\x{E0FFF}\x{EFFFE}-\x{10FFFF}])*|(?://(?:(?:%[0-9A-F]{
2}|[^\x00-\x20"#%/<>-@[-^`{-}\x7F-\x9F\x{D800}-\x{F8FF}\x{FDD0}-\x{
FDEF}\x{FFF0}-\x{FFFF}\x{1FFFE}\x{1FFFF}\x{2FFFE}\x{2FFFF}\x{3FFFE}
\x{3FFFF}\x{4FFFE}\x{4FFFF}\x{5FFFE}\x{5FFFF}\x{6FFFE}\x{6FFFF}\x{7
FFFE}\x{7FFFF}\x{8FFFE}\x{8FFFF}\x{9FFFE}\x{9FFFF}\x{AFFFE}\x{AFFFF
}\x{BFFFE}\x{BFFFF}\x{CFFFE}\x{CFFFF}\x{DFFFE}-\x{E0FFF}\x{EFFFE}-\
x{10FFFF}])*@)?(?:\[(?:(?:(?:[0-9A-F]{1,4}:){,5}[0-9A-F]{1,4})?::[0
-9A-F]{1,4}|(?:(?:[0-9A-F]{1,4}:){,6}[0-9A-F]{1,4})?::|(?:(?:[0-9A-
F]{1,4}:){6}|::(?:[0-9A-F]{1,4}:){5}|(?:[0-9A-F]{1,4})?::(?:[0-9A-F
]{1,4}:){4}|(?:(?:[0-9A-F]{1,4}:)?[0-9A-F]{1,4})?::(?:[0-9A-F]{1,4}
:){3}|(?:(?:[0-9A-F]{1,4}:){,2}[0-9A-F]{1,4})?::(?:[0-9A-F]{1,4}:){
2}|(?:(?:[0-9A-F]{1,4}:){,3}[0-9A-F]{1,4})?::[0-9A-F]{1,4}:|(?:(?:[
0-9A-F]{1,4}:){,4}[0-9A-F]{1,4})?::)(?:[0-9A-F]{1,4}:[0-9A-F]{1,4}|
(?:(?:1[0-9]{2}|2(?:[0-4][0-9]|5[0-5])|(?:|[1-9])[0-9])\.){3}(?:1[0
-9]{2}|2(?:[0-4][0-9]|5[0-5])|(?:|[1-9])[0-9]))|v[0-9A-F]+\.[!$&-.0
-;=A-Z_a-z~]+)]|(?:(?:1[0-9]{2}|2(?:[0-4][0-9]|5[0-5])|(?:|[1-9])[0
-9])\.){3}(?:1[0-9]{2}|2(?:[0-4][0-9]|5[0-5])|(?:|[1-9])[0-9])|(?:%
[0-9A-F]{2}|[^\x00-\x20"#%/:<>-@[-^`{-}\x7F-\x9F\x{D800}-\x{F8FF}\x
{FDD0}-\x{FDEF}\x{FFF0}-\x{FFFF}\x{1FFFE}\x{1FFFF}\x{2FFFE}\x{2FFFF
}\x{3FFFE}\x{3FFFF}\x{4FFFE}\x{4FFFF}\x{5FFFE}\x{5FFFF}\x{6FFFE}\x{
6FFFF}\x{7FFFE}\x{7FFFF}\x{8FFFE}\x{8FFFF}\x{9FFFE}\x{9FFFF}\x{AFFF
E}\x{AFFFF}\x{BFFFE}\x{BFFFF}\x{CFFFE}\x{CFFFF}\x{DFFFE}-\x{E0FFF}\
x{EFFFE}-\x{10FFFF}])*)(?::[0-9]*)?|(?:%[0-9A-F]{2}|[^\x00-\x20"#%/
<>?[-^`{-}\x7F-\x9F\x{D800}-\x{F8FF}\x{FDD0}-\x{FDEF}\x{FFF0}-\x{FF
FF}\x{1FFFE}\x{1FFFF}\x{2FFFE}\x{2FFFF}\x{3FFFE}\x{3FFFF}\x{4FFFE}\
x{4FFFF}\x{5FFFE}\x{5FFFF}\x{6FFFE}\x{6FFFF}\x{7FFFE}\x{7FFFF}\x{8F
FFE}\x{8FFFF}\x{9FFFE}\x{9FFFF}\x{AFFFE}\x{AFFFF}\x{BFFFE}\x{BFFFF}
\x{CFFFE}\x{CFFFF}\x{DFFFE}-\x{E0FFF}\x{EFFFE}-\x{10FFFF}])+)(?:/(?
:%[0-9A-F]{2}|[^\x00-\x20"#%/<>?[-^`{-}\x7F-\x9F\x{D800}-\x{F8FF}\x
{FDD0}-\x{FDEF}\x{FFF0}-\x{FFFF}\x{1FFFE}\x{1FFFF}\x{2FFFE}\x{2FFFF
}\x{3FFFE}\x{3FFFF}\x{4FFFE}\x{4FFFF}\x{5FFFE}\x{5FFFF}\x{6FFFE}\x{
6FFFF}\x{7FFFE}\x{7FFFF}\x{8FFFE}\x{8FFFF}\x{9FFFE}\x{9FFFF}\x{AFFF
E}\x{AFFFF}\x{BFFFE}\x{BFFFF}\x{CFFFE}\x{CFFFF}\x{DFFFE}-\x{E0FFF}\
x{EFFFE}-\x{10FFFF}])*)*)(?:\?(?:%[0-9A-F]{2}|[^\x00-\x20"#%<>[-^`{
-}\x7F-\x9F\x{D800}-\x{DFFF}\x{FDD0}-\x{FDEF}\x{FFF0}-\x{FFFF}\x{1F
FFE}\x{1FFFF}\x{2FFFE}\x{2FFFF}\x{3FFFE}\x{3FFFF}\x{4FFFE}\x{4FFFF}
\x{5FFFE}\x{5FFFF}\x{6FFFE}\x{6FFFF}\x{7FFFE}\x{7FFFF}\x{8FFFE}\x{8
FFFF}\x{9FFFE}\x{9FFFF}\x{AFFFE}\x{AFFFF}\x{BFFFE}\x{BFFFF}\x{CFFFE
}\x{CFFFF}\x{DFFFE}-\x{E0FFF}\x{EFFFE}\x{EFFFF}\x{FFFFE}\x{FFFFF}\x
{10FFFE}\x{10FFFF}])*)?(?:#(?:%[0-9A-F]{2}|[^\x00-\x20"#%<>[-^`{-}\
x7F-\x9F\x{D800}-\x{F8FF}\x{FDD0}-\x{FDEF}\x{FFF0}-\x{FFFF}\x{1FFFE
}\x{1FFFF}\x{2FFFE}\x{2FFFF}\x{3FFFE}\x{3FFFF}\x{4FFFE}\x{4FFFF}\x{
5FFFE}\x{5FFFF}\x{6FFFE}\x{6FFFF}\x{7FFFE}\x{7FFFF}\x{8FFFE}\x{8FFF
F}\x{9FFFE}\x{9FFFF}\x{AFFFE}\x{AFFFF}\x{BFFFE}\x{BFFFF}\x{CFFFE}\x
{CFFFF}\x{DFFFE}-\x{E0FFF}\x{EFFFE}-\x{10FFFF}])*)?
Thanks,
--
Björn Höhrmann · mailto:bjoern@hoehrmann.de · http://bjoern.hoehrmann.de
Weinh. Str. 22 · Telefon: +49(0)621/4309674 · http://www.bjoernsworld.de
68309 Mannheim · PGP Pub. KeyID: 0xA4357E78 · http://www.websitedev.de/
Received on Thursday, 12 January 2006 13:29:57 UTC