- From: Steven Pemberton <steven.pemberton@cwi.nl>
- Date: Tue, 14 Mar 2023 14:14:32 +0000
- To: "C. M. Sperberg-McQueen" <cmsmcq@blackmesatech.com>, public-ixml@w3.org
- Message-Id: <1678802831865.1414885986.709189861@cwi.nl>
> ACTION 2022-12-13-c: Steven to propose an iXML grammar for IRIs and
> illustrate the places where it might fail.
Here is my current version. It only addresses absolute IRIs, but is otherwise complete. It is slightly lax (it does not check for the number of segments in an IPv6 address). It also does not distinguish at the top level between an IPv4 address, and a domain address, since
192.168.10.org
is permissible.
Check out the last example input!
Steven
ixml
1 IRIs: absoluteIRI**nl, nl?.
2 -nl: cr?, lf.
3 -cr: -#d.
4 -lf: -#a.
5
6 absoluteIRI: scheme, -"://", user?, host, port?, path?, query?, fragment?.
7 scheme: letter, letgit*.
8 -letter: ["a"-"z"; "A"-"Z"].
9 -letgit: ["a"-"z"; "A"-"Z"; "0"-"9"; "+.-"].
10 {Example: http}
11 user: uch*, -"@".
12 -uch: enc ; iletter ; punct.
13 {Example: user05:pw12345@}
14 host: domain++-"." ; -"[", ipv6, -"]".
15 domain: (iletter+)++"-".
16 {Example: www.w3.org}
17 { was:
18 ipv6: (h4c, (h4c, (h4c, (h4c, (h4c, h4c?)?)?)?)?)?, (h4c, h4; ipv4).
19 -h4c: h4?, ":".
20 -h4: h, (h, (h, h?)?)?.
21 -h: ["0"-"9"; "a"-"f"; "A"-"F"].
22 {Example: [2001:db8:1::8a2e:370:7334]}
23 ipv4: d3, ".", d3, ".", d3, ".", d3.
24 -d3: d, (d, d?)?.
25 -d: ["0"-"9"].
26 {Example: 192.168.0.1}
27 }
28
29 ipv6: h4++-":", (-":", ipv4)?;
30 head, zeros, tail.
31 ipv4: d3, -".", d3, -".", d3, -".", d3.
32 -head: h4**-":".
33 -tail: ipv4;
34 h4++-":", (-":", ipv4)?;
35 .
36 zeros: -"::".
37
38 h4: h; h, h; h, h, h; h, h, h, h.
39 -h: ["0"-"9"; "a"-"f"; "A"-"F"].
40 d3: d;
41 d, d;
42 ["01"], d, d;
43 "2", ["01234"], d;
44 "25", ["012345"].
45 -d: ["0"-"9"].
46
47 port: -":", d*.
48 {Example: :80}
49 path: segment+.
50 segment: -"/", pch*.
51 -pch: enc ; iletter ; punct ; "@".
52 {Example: /2002/xforms/index.xhtml}
53 query: -"?", qfch*.
54 -qfch: enc ; iletter ; punct ; ["/?@"].
55 {Example: ?q=test}
56 fragment: -"#", qfch*.
57 {Example: #toc}
58 -iletter: ["a"-"z"; "A"-"Z"; "0"-"9"; #A0-#EFFFD].
59 -enc: "%", ["0"-"9"; "A"-"F"], ["0"-"9"; "A"-"F"].
60 -punct: [".!$&'()*+,;=:_~-"].
input
1 http://www.w3.org/
2 http://www.w3.org/2002/xforms
3 irc://irc.w3.org:6665/#forms
4 http://search.example.org?q=a
5 ssh://user@host.example.com:2222
6 ftp://anonymous@example.net:4916/;type=d
7 http://example/my%20file
8 http://user05:pw12345@[2001:db8:1::8a2e:370:7334]:80/2002/xforms/index.xhtml?q=test#toc
9 http://[::1]/
10 https://[::ffff:192.0.2.128]:8080/
11 http://192.168.0.1/
12 http://192.168.0.1:/
13 https://école.fr.example.org/élève.xhtml
14 https://zh.wikipedia.org/wiki/Wikipedia:关于中文维基百科/en
15 https://www.石川.日本/雅康#mimasa
16 http://search.example.org?q=☺
17 http://http://http://@http://http://?http://#http://
result
<IRIs>
<absoluteIRI>
<scheme>http</scheme>
<host>
<domain>www</domain>
<domain>w3</domain>
<domain>org</domain>
</host>
<path>
<segment/>
</path>
</absoluteIRI>
<absoluteIRI>
<scheme>http</scheme>
<host>
<domain>www</domain>
<domain>w3</domain>
<domain>org</domain>
</host>
<path>
<segment>2002</segment>
<segment>xforms</segment>
</path>
</absoluteIRI>
<absoluteIRI>
<scheme>irc</scheme>
<host>
<domain>irc</domain>
<domain>w3</domain>
<domain>org</domain>
</host>
<port>6665</port>
<path>
<segment/>
</path>
<fragment>forms</fragment>
</absoluteIRI>
<absoluteIRI>
<scheme>http</scheme>
<host>
<domain>search</domain>
<domain>example</domain>
<domain>org</domain>
</host>
<query>q=a</query>
</absoluteIRI>
<absoluteIRI>
<scheme>ssh</scheme>
<user>user</user>
<host>
<domain>host</domain>
<domain>example</domain>
<domain>com</domain>
</host>
<port>2222</port>
</absoluteIRI>
<absoluteIRI>
<scheme>ftp</scheme>
<user>anonymous</user>
<host>
<domain>example</domain>
<domain>net</domain>
</host>
<port>4916</port>
<path>
<segment>;type=d</segment>
</path>
</absoluteIRI>
<absoluteIRI>
<scheme>http</scheme>
<host>
<domain>example</domain>
</host>
<path>
<segment>my%20file</segment>
</path>
</absoluteIRI>
<absoluteIRI>
<scheme>http</scheme>
<user>user05:pw12345</user>
<host>
<ipv6>
<h4>2001</h4>
<h4>db8</h4>
<h4>1</h4>
<zeros/>
<h4>8a2e</h4>
<h4>370</h4>
<h4>7334</h4>
</ipv6>
</host>
<port>80</port>
<path>
<segment>2002</segment>
<segment>xforms</segment>
<segment>index.xhtml</segment>
</path>
<query>q=test</query>
<fragment>toc</fragment>
</absoluteIRI>
<absoluteIRI>
<scheme>http</scheme>
<host>
<ipv6>
<zeros/>
<h4>1</h4>
</ipv6>
</host>
<path>
<segment/>
</path>
</absoluteIRI>
<absoluteIRI>
<scheme>https</scheme>
<host>
<ipv6>
<zeros/>
<h4>ffff</h4>
<ipv4>
<d3>192</d3>
<d3>0</d3>
<d3>2</d3>
<d3>128</d3>
</ipv4>
</ipv6>
</host>
<port>8080</port>
<path>
<segment/>
</path>
</absoluteIRI>
<absoluteIRI>
<scheme>http</scheme>
<host>
<domain>192</domain>
<domain>168</domain>
<domain>0</domain>
<domain>1</domain>
</host>
<path>
<segment/>
</path>
</absoluteIRI>
<absoluteIRI>
<scheme>http</scheme>
<host>
<domain>192</domain>
<domain>168</domain>
<domain>0</domain>
<domain>1</domain>
</host>
<port/>
<path>
<segment/>
</path>
</absoluteIRI>
<absoluteIRI>
<scheme>https</scheme>
<host>
<domain>école</domain>
<domain>fr</domain>
<domain>example</domain>
<domain>org</domain>
</host>
<path>
<segment>élève.xhtml</segment>
</path>
</absoluteIRI>
<absoluteIRI>
<scheme>https</scheme>
<host>
<domain>zh</domain>
<domain>wikipedia</domain>
<domain>org</domain>
</host>
<path>
<segment>wiki</segment>
<segment>Wikipedia:关于中文维基百科</segment>
<segment>en</segment>
</path>
</absoluteIRI>
<absoluteIRI>
<scheme>https</scheme>
<host>
<domain>www</domain>
<domain>石川</domain>
<domain>日本</domain>
</host>
<path>
<segment>雅康</segment>
</path>
<fragment>mimasa</fragment>
</absoluteIRI>
<absoluteIRI>
<scheme>http</scheme>
<host>
<domain>search</domain>
<domain>example</domain>
<domain>org</domain>
</host>
<query>q=☺</query>
</absoluteIRI>
<absoluteIRI>
<scheme>http</scheme>
<host>
<domain>http</domain>
</host>
<port/>
<path>
<segment/>
<segment>http:</segment>
<segment/>
<segment>@http:</segment>
<segment/>
<segment>http:</segment>
<segment/>
<segment/>
</path>
<query>http://</query>
<fragment>http://</fragment>
</absoluteIRI>
</IRIs>
end
Received on Tuesday, 14 March 2023 14:14:54 UTC