1. # CWM rules for HTTP protocol: What can be inferred from a GET? 2. # This is a DRAFT and surely will change without notice. 3. # 4. # Author: David Booth 5. # Date: 26-Feb-2008 6. # License: GPLv3: http://www.gnu.org/licenses/gpl-3.0.html 7. # 8. # See associated test data. 9. 10. ######################## Prefixes ########################### 11. # I see python code for parsing URIs here: 12. # http://www.w3.org/2000/10/swap/uripath.py 13. # but I haven't yet found an ontology for parsing URIs, 14. # though maybe the POWDER working group will eventually make one. 15. # @@@@ TODO: Find URI parsing ontology 16. @prefix uri: . 17. @prefix rdf: . 18. @prefix rdfs: . 19. @prefix log: . 20. # These two HTTP ontologies by David Sheets are not currently used, 21. # but relationships between my http: ontology and his 22. # are shown in comments: 23. # @prefix dshttp: . 24. # @prefix dshttph: . 25. @prefix http: . 26. @prefix sumo: . 27. @prefix owl: . 28. @prefix xsd: . 29. @prefix decl: . 30. @prefix n3: . 31. @prefix string: . 32. @prefix awww: . 33. 34. ########################## AWWW ############################# 35. # Concepts from the Architecture of the World Wide Web: 36. # http://www.w3.org/TR/webarch/ 37. # 38. awww:Resource a rdfs:Class ; 39. rdf:comment "A resource, as defined in http://www.w3.org/TR/webarch/#def-resource : 'We do not limit the scope of what might be a resource. The term ''resource'' is used in a general sense for whatever might be identified by a URI. It is conventional on the hypertext Web to describe Web pages, images, product catalogs, etc. as ''resources''." . 40. 41. awww:InformationResource a rdfs:Class ; 42. rdf:label "InformationResource" ; 43. rdf:comment "An information resource, roughly as defined in http://www.w3.org/TR/webarch/#def-information-resource though that definition is flawed. According to http://www.w3.org/TR/webarch/#p43 'Other things, such as cars and dogs (and, if you've printed this document on physical sheets of paper, the artifact that you are holding in your hand), are resources too. They are not information resources, however . . . .', so I will take this to mean that an information resource is an abstract entity. This property may either be asserted explicitly or inferred by the httpRange-14 rule." ; 44. rdfs:subClassOf sumo:AbstractEntity . 45. 46. ######################### URI Parsing ############################## 47. 48. uri:hasRacine a rdf:Property ; 49. rdf:label "hasRacine" ; 50. rdf:comment "Parse a URI with optional fragment identifier to extract the racine (the part before the #). The URI is NOT required to contain a fragment identifier. A URI with no fragID will map to itself. Compare hasProperRacine. This property should NOT be asserted explicitly -- it will be inferred from the URI." ; 51. rdfs:domain xsd:anyURI ; 52. rdfs:range xsd:anyURI . # But no fragID 53. 54. uri:hasProperRacine a rdf:Property ; 55. rdf:label "hasProperRacine" ; 56. rdf:comment "Parse a URI with fragment identifier to extract the racine (the part before the #). The URI must have a fragment identifier for this property to hold. Compare hasRacine. This property should NOT be asserted explicitly -- it will be inferred from the URI." ; 57. rdfs:domain xsd:anyURI ; 58. rdfs:range xsd:anyURI . # But no fragID 59. 60. # Rule for hasRacine. 61. # Test with: 62. # "http://example/people#fred"^^xsd:anyURI a xsd:anyURI . 63. # "http://example/people#"^^xsd:anyURI a xsd:anyURI . 64. # "http://example/people"^^xsd:anyURI a xsd:anyURI . 65. { ?u a xsd:anyURI . 66. # FragID would be: 67. # (?u "\\A[^\\#]*\\#(.+)\\Z") string:scrape ?fragid . 68. # Racine as a simple string: 69. (?u "\\A([^\\#]+)") string:scrape ?stringRacine . 70. # Turn ?stringRacine into type xsd:anyURI: 71. (?stringRacine xsd:anyURI) log:dtlit ?racine . 72. # ("FIRED: " ?u " uri:hasRacine " ?racine "\n") string:concatenation ?fired . # Debug 73. } => { ?u uri:hasRacine ?racine . 74. # "a" log:outputString ?fired . # Debug 75. } . 76. 77. # Rule for hasProperRacine. 78. # Test with: 79. # "http://example/people#fred"^^xsd:anyURI a xsd:anyURI . 80. # "http://example/people#"^^xsd:anyURI a xsd:anyURI . 81. # This last one should fail to match, because it has no #: 82. # "http://example/people"^^xsd:anyURI a xsd:anyURI . 83. { ?u a xsd:anyURI . 84. # FragID would be: 85. # (?u "\\A[^\\#]*\\#(.+)\\Z") string:scrape ?fragid . 86. # Proper racine as a simple string: 87. (?u "\\A([^\\#]+)\\#") string:scrape ?stringRacine . 88. # Turn ?stringRacine into type xsd:anyURI: 89. (?stringRacine xsd:anyURI) log:dtlit ?racine . 90. # ("FIRED: " ?u " uri:hasProperRacine " ?racine "\n") string:concatenation ?fired . # Debug 91. } => { ?u uri:hasProperRacine ?racine . 92. # "a" log:outputString ?fired . # Debug 93. } . 94. 95. uri:hasURI a rdf:Property ; 96. rdf:label "hasURI" ; 97. rdf:comment "The subject resource is denoted by the object URI. It is basically the same as log:uri, but has a range of xsd:anyURI, so that a simple assertion like {r hasURI u} will cause u to be recognized as type xsd:anyURI without having to assert it explicitly. This property should be asserted explicitly -- it is NOT inferred." ; 98. rdfs:subPropertyOf log:uri ; 99. # rdfs:domain rdfs:Resource ; 100. rdfs:range xsd:anyURI . 101. 102. 103. ########################## HTTP ############################# 104. # HTTP 1.1 105. 106. ########## Classes 107. http:Reply a rdfs:Class ; 108. # Maybe: owl:sameClassAs dshttp:ResponseMessage ; 109. rdf:comment "An HTTP 1.1 reply, as defined in http://www.w3.org/Protocols/rfc2616/rfc2616-sec6.html#sec6 ." . 110. 111. ## Deleted class http:StatusCode, as it wasn't used. 112. 113. ########### Properties 114. http:hasLocation a rdf:Property ; 115. rdf:comment "The Reply has an HTTP 1.1 Location response-header field, as defined in http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.30 ." ; 116. # Would probably be: rdfs:subPropertyOf dshttph:location ; 117. # except that dshttph: doesn't currently define that property. 118. rdfs:domain http:Reply ; 119. rdfs:range xsd:anyURI . 120. 121. http:hasStatusCode a rdf:Property ; 122. rdf:comment "The Reply has an HTTP 1.1 Status-Code, as defined in http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10 ." ; 123. rdfs:domain http:Reply ; 124. rdfs:range rdfs:Literal . # Was rdfs:Literal 125. 126. http:hasContentType a rdf:Property ; 127. rdf:comment "The Reply has an HTTP 1.1 Content-Type entity-header field, as defined in http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.17 ." ; 128. # Would probably be: rdfs:subPropertyOf dshttp:status ; 129. rdfs:domain http:Reply ; 130. rdfs:range rdfs:Literal . 131. 132. http:hasEntityBody a rdf:Property ; 133. rdf:comment "The Reply has an HTTP 1.1 Entity Body, as defined in http://www.w3.org/Protocols/rfc2616/rfc2616-sec7.html#sec7.2 . The Entity Body 'is obtained from the message-body by decoding any Transfer-Encoding that might have been applied to ensure safe and proper transfer of the message'. I did not bother to model the message-body, as it was not needed. Also, the spec defines an Entity Body as a sequence of octets. I don't know if this is the same data type as xsd:hexBinary, but for simplicity I assumed that an Entity Body can be modeled as an rdfs:Literal." ; 134. # Would probably be: rdfs:subPropertyOf dshttp:entity-body ; 135. rdfs:domain http:Reply ; 136. rdfs:range rdfs:Literal . 137. 138. http:hasDirectGetReply a rdf:Property ; 139. rdf:comment "An HTTP 1.1 GET on the URI directly yielded a Reply. By the HTTP 1.1 spec, http://www.w3.org/Protocols/rfc2616/rfc2616-sec5.html#sec5.1.2 the URI must not contain a fragment identifier. GET is defined in http://www.w3.org/Protocols/rfc2616/rfc2616-sec9.html#sec9.3 . Compare hasGetReply. Test data should assert hasDirectGetReply, from which hasGetReply will be inferred." ; 140. rdfs:domain xsd:anyURI ; 141. rdfs:range http:Reply . 142. 143. http:hasGetReply a rdf:Property ; 144. rdf:comment "An HTTP 1.1 GET on the URI directly or indirectly (through 301, 302 or 307 redirects) yielded a Reply. Compare hasDirectGetReply. Among other things, hasGetReply is used to delegate authority for URI declaration: if ?u hasGetReply ?r then ?r is treated as authoritative in the sense described in http://dbooth.org/2007/uri-decl/20070817.htm#precise-def-uri-decl , such that the act of serving the reply via the original URI satisfies the requirement for a performative speech act. Test data should NOT assert hasGetReply, but should instead assert hasDirectGetReply and let hasGetReply be inferred." ; 145. rdfs:domain xsd:anyURI ; 146. rdfs:range http:Reply . 147. 148. ########### Rules 149. 150. ##### OBSOLETE RULE. The decl:hasDeclaration rule for an 151. ##### awww:InformationResource makes this rule obsolete. 152. # httpRange-14 rule: 200 response => InformationResource 153. # http://lists.w3.org/Archives/Public/www-tag/2005Jun/0039.html 154. { 155. ?r uri:hasURI ?u . 156. ?u http:hasGetReply ?reply . 157. ?reply http:hasStatusCode 200 . 158. ("FIRED: " ?r " a awww:InformationResource \n") string:concatenation ?fired . # Debug 159. # @@ TODO: WTF???? Somehow test6 fails if this rule is commented out, 160. # but succeeds if this rule is here, even when it does 161. # cannot fire because of this antecedent: 162. . 163. } => { 164. # ?r a awww:InformationResource . 165. "a" log:outputString ?fired . # Debug 166. } . 167. 168. # Definition of hasGetReply (base case). 169. # hasDirectGetReply => hasGetReply. 170. { ?u http:hasDirectGetReply ?reply . # IF direct deref ?u yields ?reply 171. # ("FIRED base: " ?u " http:hasGetReply " ?reply "\n") string:concatenation ?fired . # Debug 172. } => { ?u http:hasGetReply ?reply . # THEN it derefs to ?reply. 173. # "a" log:outputString ?fired . # Debug 174. } . 175. 176. # Definition of hasGetReply (recursive case). 177. # For the purpose of hasGetReply, 301, 302 and 307 are treated the same: 178. # they are all deemed to delegate URI declaration authority to the new URI. 179. { 180. ?u1 http:hasGetReply ?reply1 . # IF ?u1 derefs to ?reply1 181. ?reply1 http:hasStatusCode 301 . # ... with 301 status 182. ?reply1 http:hasLocation ?u2 . # ... and new URI ?u2 183. ?u2 http:hasGetReply ?reply2 . # ... which derefs to ?reply2 184. # ("FIRED recursive: " ?u1 " http:hasGetReply " ?reply2 "\n") string:concatenation ?fired . # Debug 185. } => { # THEN 186. ?u1 http:hasGetReply ?reply2 . # ... ?u1 derefs to ?reply2 187. # "a" log:outputString ?fired . # Debug 188. } . 189. 190. # Furthermore, 301, 302 and 307 (but NOT 303) redirects are 191. # treated as saying that the resources denoted by the old and 192. # new URIs are the same resource. This is significant 193. # because it highlights the difference between talking about 194. # the resource denoted by a URI, versus the use of the URI 195. # in an HTTP GET: two URIs may denote the same resource, but 196. # dereferencing them may yield *different* results. For 197. # example, dereferencing a URI may yield a 301 redirect, 198. # but dereferencing the new URI may yield a 200 response. 199. # This helps explain why these HTTP rules are written in 200. # terms of URIs rather than awww:InformationResources. 201. { 202. ?u1 a xsd:anyURI . # Old URI 203. # ?r1 a rdfs:Resource . 204. ?r1 uri:hasURI ?u1 . 205. ?u2 a xsd:anyURI . # New URI 206. # ?r2 a rdfs:Resource . 207. ?r2 uri:hasURI ?u2 . 208. # @@ TODO: I suspect this should use http:hasDirectGetReply 209. # instead of http:hasGetReply, and then let the 301 210. # rule make a transitive inference about the URI declaration, 211. # but I haven't fully thought it through yet. 212. ?u1 http:hasGetReply ?reply1 . # IF ?u1 derefs to ?reply1 213. ?reply1 http:hasStatusCode 301 . # ... with 301 status 214. ?reply1 http:hasLocation ?u2 . # ... and new URI ?u2 215. # ("FIRED 301: " ?r1 " = " ?r2 "\n") string:concatenation ?fired . # Debug 216. } => { # THEN they denote 217. ?r1 = ?r2 . # ... the same thing. 218. # "a" log:outputString ?fired . # Debug 219. } . 220. 221. # 222. # @@@@ TODO: Implement 302 and 307 cases the same as 301. 223. 224. 225. ########################## URI Declaration ############################ 226. # This section defines concepts involved in URI declaration, as 227. # described in http://dbooth.org/2007/uri-decl/ 228. # Rules that assert these predicates are defined by each media type. 229. 230. decl:parsesTo a rdf:Property ; 231. rdf:label "parsesTo" ; 232. rdf:comment "The given EntityBody parses to an N3 formula according to the given media type. The subject is a list: the first element is the entity body to be parsed; the second element is the media type. The resulting object formula is a set of RDF statements as described in http://dbooth.org/2007/uri-decl/ . This property should NOT normally be asserted explicitly, but should be inferred by media-type-specific parsing rules." ; 233. # @@@@ TODO: Change the entity body type to octet stream. 234. # rdfs:domain ( rdfs:Literal rdfs:Literal ) ; # ( EntityBody, MediaType ) 235. rdfs:range log:Formula . 236. 237. decl:hasDeclaration a rdf:Property ; 238. rdf:label "hasDeclaration" ; 239. rdf:comment "The subject URI has a URI declaration consisting of the object formula -- a set of RDF statements as described in http://dbooth.org/2007/uri-decl/ . This property will normally be inferred from a successful http response, either from the URI's racine or via a 303 redirect using media-type-specific rules." ; 240. rdfs:domain xsd:anyURI ; # May have a fragID 241. rdfs:range log:Formula . 242. 243. # URI declaration rule for hash URIs. The rules for each media 244. # type must specify how a body is parsed to a formula. 245. { ?u a xsd:anyURI . 246. ?u uri:hasProperRacine ?racine . 247. ?racine http:hasGetReply ?reply . 248. ?reply http:hasStatusCode 200 . 249. ?reply http:hasContentType ?mediaType . 250. ?reply http:hasEntityBody ?body . 251. ( ?body ?mediaType ) decl:parsesTo ?formula . 252. # ("FIRED hash: " ?u " decl:hasDeclaration " ?formula "\n") string:concatenation ?fired . # Debug 253. } => { 254. ?u decl:hasDeclaration ?formula . 255. # "a" log:outputString ?fired . # Debug 256. } . 257. 258. # URI declaration rule for 303 redirect URIs (which may also contain 259. # a fragment identifier). 260. # Note that this rule may involve any number of 301, 302 or 307 261. # redirects before or after the 303, but only a single 303. 262. # Thus, 303 is viewed as a less transferrable delegation of authority 263. # for URI declaration as 301, 302 and 307. The idea is that if we 264. # have u1 --303--> u2 --303--> u3, then u2 is treated as authoritative 265. # for URI declaration of u1, and u3 is treates as authoritative for 266. # URI declaration of u2, but u3 is NOT treated as authoritative for 267. # URI declaration of u1. I am not certain that this is the right 268. # choice -- perhaps it should be fully transitive -- but given that 269. # 303 is a weaker relationship than 301, 302 or 307 I think it may 270. # be a good choice. Comments on this question are invited. 271. { ?u a xsd:anyURI . 272. ?u uri:hasRacine ?racine . 273. ?racine http:hasGetReply ?reply1 . 274. ?reply1 http:hasStatusCode 303 . 275. ?reply1 http:hasLocation ?u2 . # ... forwarding to ?u2 276. ?u2 http:hasGetReply ?reply2 . # ... which derefs to reply2 277. ?reply2 http:hasStatusCode 200 . 278. ?reply2 http:hasContentType ?mediaType . 279. ?reply2 http:hasEntityBody ?body . 280. ( ?body ?mediaType ) decl:parsesTo ?formula . 281. # ("FIRED 303: " ?u " decl:hasDeclaration " ?formula "\n") string:concatenation ?fired . # Debug 282. } => { 283. ?u decl:hasDeclaration ?formula . 284. # "a" log:outputString ?fired . # Debug 285. } . 286. 287. # URI declaration rule for awww:InformationResource (using 288. # httpRange-14 rule): 200 response => InformationResource 289. # and declares the URI. 290. # http://lists.w3.org/Archives/Public/www-tag/2005Jun/0039.html 291. # Notice that assertions contained in the information resource's 292. # representation (i.e., the HTTP Response) are *not* 293. # a part of the resulting URI declaration. 294. # This is intentional, to permit 295. # users to make assertions about the information resource that 296. # such a URI denotes without accepting the assertions served by 297. # that information resource. For example, if dereferencing 298. # http://example/foo yields a 200 response with RDF/N3 content 299. # that parses to an n3 formula (i.e., a set of RDF assertions), 300. # then the rules for URI declaration will not automatically 301. # require everyone who writes that URI to accept those assertions. 302. { ?u a xsd:anyURI . 303. ?r uri:hasURI ?u . 304. ?u http:hasDirectGetReply ?reply . 305. ?reply http:hasStatusCode 200 . 306. ?formula = { 307. ?r a awww:InformationResource . 308. ?r uri:hasURI ?u . 309. } . 310. ("FIRED 200: " ?u " decl:hasDeclaration " ?formula "\n") string:concatenation ?fired . # Debug 311. } => { 312. ?u decl:hasDeclaration ?formula . 313. # "a" log:outputString ?fired . # Debug 314. } . 315. 316. # URI declaration rule for rdfs:isDefinedBy, which is viewed 317. # as providing core assertions for a URI declaration, whereas 318. # the rdfs:seeAlso relationship is viewed as providing 319. # ancillary assertions, as described in 320. # http://dbooth.org/2007/uri-decl/#ancillary . 321. { ?r uri:hasURI ?u . 322. ?rdef uri:hasURI ?udef . 323. ?r rdfs:isDefinedBy ?rdef . 324. ?udef http:hasGetReply ?reply . 325. ?reply http:hasStatusCode 200 . 326. ?reply http:hasContentType ?mediaType . 327. ?reply http:hasEntityBody ?body . 328. ( ?body ?mediaType ) decl:parsesTo ?formula . 329. # ("FIRED isDefinedBy: " ?u " decl:hasDeclaration " ?formula "\n") string:concatenation ?fired . # Debug 330. } => { 331. ?u decl:hasDeclaration ?formula . 332. # "a" log:outputString ?fired . # Debug 333. } . 334. 335. ######################### N3 Media Type ############################## 336. # Properties and rule for media type: text/n3. 337. # Not sure if this media type is registered yet, but 338. # TimBL suggests text/rdf+n3 here: 339. # http://www.nabble.com/N-Triples-MIME-type-should-not-be-text-plain----comment-on-RDF-Test-Cases.-td13220788.html 340. # but here mentions that there was strong push for text/n3 instead: 341. # http://lists.w3.org/Archives/Public/public-awwsw/2008Feb/0027.html . 342. # Either way, this is good enough for demonstrating the concepts. 343. 344. # @@@@ TODO: Define/find rule for parsing octet stream as RDF/n3. 345. # Should be able to use log:parsedAsN3 346. # In the meantime, this will do for test1: 347. { 348. ?mediaType = "text/n3" . 349. ?s = "@prefix sumo: . a sumo:Human . " . 350. ?f = { a sumo:Human . } . 351. # ("FIRED: ( " ?s " " ?mediaType " ) n3:parsesTo " ?f "\n") string:concatenation ?fired . # Debug 352. } => { 353. ( ?s ?mediaType ) decl:parsesTo ?f . 354. # "a" log:outputString ?fired . # Debug 355. } . 356. # And this will do for test2: 357. { 358. ?mediaType = "text/n3" . 359. ?s = "@prefix sumo: . a sumo:Human . " . 360. ?f = { a sumo:Human . } . 361. # ("FIRED: ( " ?s " " ?mediaType " ) n3:parsesTo " ?f "\n") string:concatenation ?fired . # Debug 362. } => { 363. ( ?s ?mediaType ) decl:parsesTo ?f . 364. # "a" log:outputString ?fired . # Debug 365. } . 366. 367. ########################## SUMO ############################# 368. # Concepts from Suggested Upper Merged Ontology (SUMO): 369. # http://www.ontologyportal.org/ 370. # This is used only: 371. # - to assert a simple, interesting fact, i.e., 372. # that <...#dan> is a sumo:Human; and 373. # - to demonstrate how one could detect when the same URI is 374. # used to denote both an awww:InformationResource and a 375. # a sumo:Human, which the AWWW says is contradictory: 376. # http://www.w3.org/TR/webarch/#def-information-resource 377. # "Other things, such as cars and dogs (and, if you've printed 378. # this document on physical sheets of paper, the artifact that 379. # you are holding in your hand), are resources too. They are 380. # not information resources, however, . . . ." 381. 382. sumo:AbstractEntity a rdfs:Class ; 383. rdf:label "sumo:AbstractEntity" ; 384. rdf:comment "An abstract entity, as defined in http://sigma.ontologyportal.org:4010/sigma/Browse.jsp?lang=EnglishLanguage&kb=SUMO&term=Abstract . 'Entity is exhaustively partitioned into physical and abstract.'" ; 385. owl:disjointWith sumo:PhysicalEntity . 386. 387. sumo:PhysicalEntity a rdfs:Class ; 388. rdf:label "sumo:PhysicalEntity" ; 389. rdf:comment "A physical entity, as defined in http://sigma.ontologyportal.org:4010/sigma/Browse.jsp?lang=EnglishLanguage&kb=SUMO&term=Physical ." . 390. 391. sumo:Human a rdfs:Class ; 392. rdf:label "sumo:Human" ; 393. rdf:comment "A human, as defined in http://sigma.ontologyportal.org:4010/sigma/Browse.jsp?kb=SUMO&term=Human . The SUMO ontology has a long chain of superclassing to get from human to physical entity: human, hominid, primate, mammal, warm blooded vertebrate, vertebrate, animal, organism, organic object, corpuscular object, self connected object, object, physical entity. Hence, I have abbreviated this chain for simplicity." ; 394. rdfs:subClassOf sumo:PhysicalEntity . 395. 396. ########################## Standard RDF and OWL Rules ########################## 397. # I haven't yet found out where to get these standard rules for cwm, 398. # so I hand coded them for the moment. 399. 400. # rdfs:subClassOf 401. { ?a a ?ca . ?ca rdfs:subClassOf ?cb . } 402. => { ?a a ?cb . } . 403. 404. # owl:sameAs 405. { ?a = ?b } => { ?b = ?a } . 406. { ?a = ?b . ?a ?p ?c } => { ?b ?p ?c } . 407. { ?a = ?b . ?c ?p ?a } => { ?c ?p ?b } . 408. 409. # rdfs:domain 410. { ?p rdfs:domain ?d . ?a ?p ?b } => { ?a a ?d } . 411. 412. # rdfs:range 413. { ?p rdfs:range ?r . ?a ?p ?b } => { ?b a ?r } .