ACTION 2022-12-13-c (was Re: draft agenda for invisible XML community group call of 14 March 2023)

> ACTION 2022-12-13-c: Steven to propose an iXML grammar for IRIs and

> illustrate the places where it might fail.



Here is my current version. It only addresses absolute IRIs, but is otherwise complete. It is slightly lax (it does not check for the number of segments in an IPv6 address). It also does not distinguish at the top level between an IPv4 address, and a domain address, since 


 192.168.10.org


is permissible.


Check out the last example input!


Steven
ixml
     1 IRIs: absoluteIRI**nl, nl?.
     2 -nl: cr?, lf.
     3 -cr: -#d.
     4 -lf: -#a.
     5 
     6 absoluteIRI: scheme, -"://", user?, host, port?, path?, query?, fragment?.
     7 scheme: letter, letgit*.
     8 -letter: ["a"-"z"; "A"-"Z"].
     9 -letgit: ["a"-"z"; "A"-"Z"; "0"-"9"; "+.-"].
    10                          {Example: http}
    11 user:   uch*, -"@".
    12 -uch:    enc ; iletter ; punct.
    13                          {Example: user05:pw12345@}
    14 host:   domain++-"." ; -"[", ipv6, -"]".
    15 domain: (iletter+)++"-".
    16                          {Example: www.w3.org}
    17 { was:
    18 ipv6: (h4c, (h4c, (h4c, (h4c, (h4c, h4c?)?)?)?)?)?, (h4c, h4; ipv4).
    19 -h4c: h4?, ":".
    20 -h4:     h, (h, (h, h?)?)?.
    21 -h:      ["0"-"9"; "a"-"f"; "A"-"F"].
    22                          {Example: [2001:db8:1::8a2e:370:7334]}
    23 ipv4:   d3, ".", d3, ".", d3, ".", d3.
    24 -d3:     d, (d, d?)?.
    25 -d:      ["0"-"9"].
    26                          {Example: 192.168.0.1}
    27 }
    28 
    29 ipv6: h4++-":", (-":", ipv4)?;
    30       head, zeros, tail.
    31 ipv4: d3, -".", d3, -".", d3, -".", d3.
    32 -head: h4**-":".
    33 -tail: ipv4;
    34        h4++-":", (-":", ipv4)?;
    35        .
    36 zeros: -"::".
    37 
    38 h4: h; h, h; h, h, h; h, h, h, h.
    39 -h: ["0"-"9"; "a"-"f"; "A"-"F"].
    40 d3: d;
    41     d, d;
    42     ["01"], d, d;
    43     "2", ["01234"], d;
    44     "25", ["012345"].
    45 -d: ["0"-"9"].
    46 
    47 port:   -":", d*.
    48                          {Example: :80}
    49 path:   segment+.
    50 segment: -"/", pch*.
    51 -pch:    enc ; iletter ; punct ; "@".
    52                          {Example: /2002/xforms/index.xhtml}
    53 query:  -"?", qfch*.
    54 -qfch:   enc ; iletter ; punct ; ["/?@"].
    55                          {Example: ?q=test}
    56 fragment: -"#", qfch*.
    57                          {Example: #toc}
    58 -iletter: ["a"-"z"; "A"-"Z"; "0"-"9"; #A0-#EFFFD].
    59 -enc:    "%", ["0"-"9"; "A"-"F"], ["0"-"9"; "A"-"F"].
    60 -punct:  [".!$&'()*+,;=:_~-"].
input
     1 http://www.w3.org/
     2 http://www.w3.org/2002/xforms
     3 irc://irc.w3.org:6665/#forms
     4 http://search.example.org?q=a
     5 ssh://user@host.example.com:2222
     6 ftp://anonymous@example.net:4916/;type=d
     7 http://example/my%20file
     8 http://user05:pw12345@[2001:db8:1::8a2e:370:7334]:80/2002/xforms/index.xhtml?q=test#toc
     9 http://[::1]/
    10 https://[::ffff:192.0.2.128]:8080/
    11 http://192.168.0.1/
    12 http://192.168.0.1:/
    13 https://école.fr.example.org/élève.xhtml
    14 https://zh.wikipedia.org/wiki/Wikipedia:关于中文维基百科/en
    15 https://www.石川.日本/雅康#mimasa
    16 http://search.example.org?q=☺
    17 http://http://http://@http://http://?http://#http://

result
<IRIs>
   <absoluteIRI>
      <scheme>http</scheme>
      <host>
         <domain>www</domain>
         <domain>w3</domain>
         <domain>org</domain>
      </host>
      <path>
         <segment/>
      </path>
   </absoluteIRI>
   <absoluteIRI>
      <scheme>http</scheme>
      <host>
         <domain>www</domain>
         <domain>w3</domain>
         <domain>org</domain>
      </host>
      <path>
         <segment>2002</segment>
         <segment>xforms</segment>
      </path>
   </absoluteIRI>
   <absoluteIRI>
      <scheme>irc</scheme>
      <host>
         <domain>irc</domain>
         <domain>w3</domain>
         <domain>org</domain>
      </host>
      <port>6665</port>
      <path>
         <segment/>
      </path>
      <fragment>forms</fragment>
   </absoluteIRI>
   <absoluteIRI>
      <scheme>http</scheme>
      <host>
         <domain>search</domain>
         <domain>example</domain>
         <domain>org</domain>
      </host>
      <query>q=a</query>
   </absoluteIRI>
   <absoluteIRI>
      <scheme>ssh</scheme>
      <user>user</user>
      <host>
         <domain>host</domain>
         <domain>example</domain>
         <domain>com</domain>
      </host>
      <port>2222</port>
   </absoluteIRI>
   <absoluteIRI>
      <scheme>ftp</scheme>
      <user>anonymous</user>
      <host>
         <domain>example</domain>
         <domain>net</domain>
      </host>
      <port>4916</port>
      <path>
         <segment>;type=d</segment>
      </path>
   </absoluteIRI>
   <absoluteIRI>
      <scheme>http</scheme>
      <host>
         <domain>example</domain>
      </host>
      <path>
         <segment>my%20file</segment>
      </path>
   </absoluteIRI>
   <absoluteIRI>
      <scheme>http</scheme>
      <user>user05:pw12345</user>
      <host>
         <ipv6>
            <h4>2001</h4>
            <h4>db8</h4>
            <h4>1</h4>
            <zeros/>
            <h4>8a2e</h4>
            <h4>370</h4>
            <h4>7334</h4>
         </ipv6>
      </host>
      <port>80</port>
      <path>
         <segment>2002</segment>
         <segment>xforms</segment>
         <segment>index.xhtml</segment>
      </path>
      <query>q=test</query>
      <fragment>toc</fragment>
   </absoluteIRI>
   <absoluteIRI>
      <scheme>http</scheme>
      <host>
         <ipv6>
            <zeros/>
            <h4>1</h4>
         </ipv6>
      </host>
      <path>
         <segment/>
      </path>
   </absoluteIRI>
   <absoluteIRI>
      <scheme>https</scheme>
      <host>
         <ipv6>
            <zeros/>
            <h4>ffff</h4>
            <ipv4>
               <d3>192</d3>
               <d3>0</d3>
               <d3>2</d3>
               <d3>128</d3>
            </ipv4>
         </ipv6>
      </host>
      <port>8080</port>
      <path>
         <segment/>
      </path>
   </absoluteIRI>
   <absoluteIRI>
      <scheme>http</scheme>
      <host>
         <domain>192</domain>
         <domain>168</domain>
         <domain>0</domain>
         <domain>1</domain>
      </host>
      <path>
         <segment/>
      </path>
   </absoluteIRI>
   <absoluteIRI>
      <scheme>http</scheme>
      <host>
         <domain>192</domain>
         <domain>168</domain>
         <domain>0</domain>
         <domain>1</domain>
      </host>
      <port/>
      <path>
         <segment/>
      </path>
   </absoluteIRI>
   <absoluteIRI>
      <scheme>https</scheme>
      <host>
         <domain>école</domain>
         <domain>fr</domain>
         <domain>example</domain>
         <domain>org</domain>
      </host>
      <path>
         <segment>élève.xhtml</segment>
      </path>
   </absoluteIRI>
   <absoluteIRI>
      <scheme>https</scheme>
      <host>
         <domain>zh</domain>
         <domain>wikipedia</domain>
         <domain>org</domain>
      </host>
      <path>
         <segment>wiki</segment>
         <segment>Wikipedia:关于中文维基百科</segment>
         <segment>en</segment>
      </path>
   </absoluteIRI>
   <absoluteIRI>
      <scheme>https</scheme>
      <host>
         <domain>www</domain>
         <domain>石川</domain>
         <domain>日本</domain>
      </host>
      <path>
         <segment>雅康</segment>
      </path>
      <fragment>mimasa</fragment>
   </absoluteIRI>
   <absoluteIRI>
      <scheme>http</scheme>
      <host>
         <domain>search</domain>
         <domain>example</domain>
         <domain>org</domain>
      </host>
      <query>q=☺</query>
   </absoluteIRI>
   <absoluteIRI>
      <scheme>http</scheme>
      <host>
         <domain>http</domain>
      </host>
      <port/>
      <path>
         <segment/>
         <segment>http:</segment>
         <segment/>
         <segment>@http:</segment>
         <segment/>
         <segment>http:</segment>
         <segment/>
         <segment/>
      </path>
      <query>http://</query>
      <fragment>http://</fragment>
   </absoluteIRI>
</IRIs>

end

Received on Tuesday, 14 March 2023 14:14:54 UTC