Re: [HCLSIG] Uniprot RDF data set and benchmarks

I've included below the SQL queries that were used in the VLDB paper for testing the performance of the Oracle RDF Data Model.

Please note that we simply took some of the examples given on Eric Jain's Uniprot RDF website and rewrote them to work with our system. It seems the document with example queries is no longer available via Eric's site, but is available via archive.org:
http://web.archive.org/web/20041011125430/http://www.isb-sib.ch/~ejain/rdf/s
tore.html#queries

Susie



-- Q1
SELECT AVG(LENGTH(protein)),
       AVG(LENGTH(begin)),
       AVG(LENGTH(end))
FROM TABLE(RDF_MATCH('(?protein     rdf:type     :Protein)
                      (?protein     :annotation  ?annotation)
                      (?annotation  rdf:type     :Transmembrane_Annotation)
                      (?annotation  :range       ?range)
                      (?range       :begin       ?begin)
                      (?range       :end         ?end)',
           RDFModels('UniProt'), NULL,
           RDFAliases(RDFAlias('', 'urn:lsid:uniprot.org:ontology:'))))
WHERE rownum <= 15000;

-- Q2
SELECT AVG(LENGTH(protein)),
       AVG(LENGTH(author)),
       AVG(LENGTH(title))
FROM TABLE(RDF_MATCH('(?protein   rdf:type    :Protein)
                      (?protein   :modified   ?modified)
                      (?protein   :citation   ?citation)
                      (?citation  :author     ?author)
                      (?citation  :title      ?title)',
           RDFModels('UniProt'), NULL,
           RDFAliases(RDFAlias('', 'urn:lsid:uniprot.org:ontology:'))))
WHERE author LIKE 'Bairoch %' AND rownum <= 16

-- Q3
SELECT count(*)
FROM TABLE(RDF_MATCH('(?protein   rdf:type   :Protein)
                      (?protein   :citation  ?citation)
                      (?citation  :author    "Bairoch A.")',
           RDFModels('UniProt'), NULL,
           RDFAliases(RDFAlias('', 'urn:lsid:uniprot.org:ontology:'))))
WHERE rownum <= 32;

-- Q4
SELECT AVG(LENGTH(related))
FROM TABLE(RDF_MATCH('(?protein  rdf:type      :Protein)
                      (?protein  :keyword   
<urn:lsid:uniprot.org:keywords:48>)
                      (?protein  rdfs:seeAlso  ?related)',
           RDFModels('UniProt'), NULL,
           RDFAliases(RDFAlias('', 'urn:lsid:uniprot.org:ontology:'))))
WHERE rownum <= 3000;

-- Q5
SELECT AVG(LENGTH(gene)),
       AVG(LENGTH(name)),
       AVG(LENGTH(text))
FROM TABLE(RDF_MATCH('(?protein     rdf:type     :Protein)
                      (?protein     :gene        ?gene)
                      (?gene        :name        ?name)
                      (?protein     :organism
                                          
<urn:lsid:uniprot.org:taxonomy:9606>)
                      (?protein     :annotation  ?annotation)
                      (?annotation  rdf:type     :Disease_Annotation)
                      (?annotation  rdfs:comment ?text)',
           RDFModels('UniProt'), NULL,
           RDFAliases(RDFAlias('', 'urn:lsid:uniprot.org:ontology:'))))
WHERE rownum <= 750;

-- Q6
SELECT AVG(LENGTH(protein)),
       AVG(LENGTH(mod_date))
FROM TABLE(RDF_MATCH('(?protein  rdf:type   :Protein)
                      (?protein  :modified  ?mod_date)',
           RDFModels('UniProt'), NULL,
           RDFAliases(RDFAlias('', 'urn:lsid:uniprot.org:ontology:'))))
WHERE mod_date > '2004-08' AND rownum <= 8000;

Received on Friday, 17 February 2006 19:04:31 UTC