wiki:vSPARQLCleanupQueries
Last modified 8 years ago Last modified on 07/10/09 12:42:08

vSPARQL Cleanup Queries

Manipulating ontologies at the RDF triple level can result in RDF graphs that may cause problems for graphs with higher-level semantics. On this page, we list several vSPARQL queries that can be used to clean up some of these problems.

In each of these examples, the FROM <file:///.../culprit4.xml>is considered the input source and needs to be replaced according to your input dataset.


RDF Lists with empty elements

# Find all of the RDF list nodes that do not have an element;
# recursively follow the list until it either hits a node with
# an element or it hits rdf:nil. Track the node that points
# to this errant list and the entire set of reachable nodes
# before the errant list has an element.
FROM NAMED <identify_rdfListEmptyNodes> [
    CONSTRUCT {
          ?a rdf:rest ?b .      # to be changed
          ?b temp:rest ?c .     # to be deleted
          ?a temp:starts ?b .   # first node in rest list
          ?a temp:reaches ?c .  # reachability list
    }
    FROM <file:///.../culprit4.xml>
    WHERE {
          ?a rdf:first ?noop .
          ?a rdf:rest ?b .
          OPTIONAL { ?b rdf:first ?a_bogus } . FILTER(!bound(?a_bogus)) .
          ?b rdf:rest ?c .
    }

    UNION

    CONSTRUCT {
              ?a ?pred ?b .     # to be changed
              ?b temp:rest ?c . # to be deleted
              ?a temp:starts ?b . # first node in rest list
              ?a temp:reaches ?c . # reachability list
    }
    FROM <file:///.../culprit4.xml>
    WHERE {
          ?a ?pred ?b . FILTER(?pred != rdf:rest) .
          ?b rdf:rest ?c .
          OPTIONAL { ?b rdf:first ?b_bogus } . FILTER(!bound(?b_bogus)) .
    }

    UNION

    CONSTRUCT {
              ?c temp:rest ?d . # to be deleted
              ?a temp:reaches ?d . # reachability list
    }
    FROM NAMED <identify_rdfListEmptyNodes>
    FROM <file:///.../culprit4.xml>
    WHERE {
          GRAPH <identify_rdfListEmptyNodes> { ?a temp:reaches ?c . } .
          OPTIONAL { ?c rdf:first ?noop } . FILTER(!bound(?noop)) .
          ?c rdf:rest ?d .
    }
]

# Eliminate all of the edges corresponding to RDF lists elements with no value
FROM NAMED <remove_rdfListEmptyNodes> [
     CONSTRUCT {
               ?x ?y ?z .
     }
     FROM NAMED <identify_rdfListEmptyNodes>
     FROM <file:///.../culprit4.xml>
     WHERE {
           ?x ?y ?z .
           OPTIONAL { GRAPH <identify_rdfListEmptyNodes> { ?x ?ynot ?z } .
                      FILTER(?ynot=temp:rest && ?y=rdf:rest) }
           FILTER(!bound(?ynot)) .
     }
]

# Remove all of the edges to the start of RDF lists containing empty nodes;
# we replace these edges with new edges in <add_startEdgeRdfListEmptyNodes>
FROM NAMED <remove_startEdgeRdfListEmptyNodes> [
     CONSTRUCT {
               ?q ?r ?s .
     }
     FROM NAMED <identify_rdfListEmptyNodes>
     FROM NAMED <remove_rdfListEmptyNodes>
     WHERE {
           GRAPH <remove_rdfListEmptyNodes> { ?q ?r ?s . } .
           OPTIONAL { GRAPH <identify_rdfListEmptyNodes> { ?q ?r ?t } . FILTER(?s = ?t) } .
           FILTER(!bound(?t)) .
     }
]

# Add in the new edges to the RDF lists containing empty nodes
FROM NAMED <add_startEdgeRdfListEmptyNodes> [
     CONSTRUCT {
         ?t ?u ?v .
         ?a2 ?pred2 ?d2 .
     }
     FROM NAMED <identify_rdfListEmptyNodes>
     FROM NAMED <remove_startEdgeRdfListEmptyNodes>
     FROM <file:///.../culprit4.xml>
     WHERE {
           {
             GRAPH <remove_startEdgeRdfListEmptyNodes> { ?t ?u ?v  }
           } UNION {
             GRAPH <identify_rdfListEmptyNodes> {
                   ?a2 temp:starts ?b2 .
                   ?a2 ?pred2 ?gone2 .
                   FILTER(!REGEX(str(?pred2),"http://sig.biostr.washington.edu/temp#") ) .
                   ?a2 temp:reaches ?d2 . }
             OPTIONAL {  ?d2 rdf:first ?nxt2 } .
             FILTER( (?d2 = rdf:nil) || bound(?nxt2) ) .
           }
     }
]


owl:unionOf statements with 0 or 1 elements

# Find owl:unionOf statements that have RDF lists of 0 or 1 elements.
# Replace the owl:unionOf statements with either the element or rdf:nil.
FROM NAMED <removed_shortUnionOfs> [
     CONSTRUCT {
               ?q ?r ?s .
               ?before1 ?pred1 ?c1 .
               ?before2 ?pred2 rdf:nil .
     }
     FROM <file:///.../culprit4.xml>
     # Find owl:unionOfs that have 0 or 1 element.
     FROM NAMED <identify_shortUnionOfs> [
          CONSTRUCT { 
              ?before1 ?pred1 ?a1 .
              ?a1 owl:unionOf ?b1 .
              ?a1 rdf:type owl:Class .
              ?b1 rdf:first ?c1 .
              ?b1 rdf:rest rdf:nil .

              ?before2 ?pred2 ?a2 .
              ?a2 owl:unionOf ?b2 .
              ?a2 rdf:type owl:Class .
              ?b2 rdf:rest rdf:nil .
         }
         FROM <file:///.../culprit4.xml>
         WHERE {
                 {
                        ?before1 ?pred1 ?a1 .
                        ?a1 owl:unionOf ?b1 .
                        ?a1 rdf:type owl:Class .
                        ?b1 rdf:first ?c1 .
                        ?b1 rdf:rest rdf:nil .
                 } UNION {
                        ?before2 ?pred2 ?a2 .
                        ?a2 owl:unionOf ?b2 .
                        ?a2 rdf:type owl:Class .
                        OPTIONAL { ?b2 rdf:first ?c2 . } . FILTER(!bound(?c2)) .
                        ?b2 rdf:rest rdf:nil .
                 }
         }
     ]
     WHERE {
             {
                ?q ?r ?s . 
                OPTIONAL { GRAPH <identify_shortUnionOfs> { ?q ?r ?t . FILTER(?s = ?t) } }
                FILTER(!bound(?t)) .
             } UNION {
                GRAPH <identify_shortUnionOfs> {
                   ?before1 ?pred1 ?a1 .
                   ?a1 owl:unionOf ?b1 .
                   ?b1 rdf:first ?c1 . 
                   ?b1 rdf:rest rdf:nil .
                   }
             } UNION {
               GRAPH <identify_shortUnionOfs> {
                   ?before2 ?pred2 ?a2 .
                   ?a2 owl:unionOf ?b2 .
                   OPTIONAL { ?b2 rdf:first ?c2 . } . FILTER(!bound(?c2)) . }
                   ?b2 rdf:rest rdf:nil .
             }
     }
]


owl:allValuesFrom with empty owl:unionOf lists

# Remove all owl:allValuesFrom that have empty owl:unionOf lists.
FROM NAMED <remove_emptyAllValuesFrom> [
     CONSTRUCT {
               ?a ?b ?c .
     }
     FROM <file:///.../culprit4.xml>
     # identify allValuesFrom with empty RDF lists
     FROM NAMED <identify_emptyAllValuesFrom> [
         CONSTRUCT { 
                   ?x owl:allValuesFrom ?allB .
                   ?allB rdf:type owl:Class .
                   ?allB owl:unionOf rdf:nil .
         }
         FROM <file:///.../culprit4.xml>
         WHERE {
               ?x owl:allValuesFrom ?allB .
               ?allB rdf:type owl:Class .
               ?allB owl:unionOf rdf:nil .
         }
     ]
     WHERE {
           ?a ?b ?c .
           OPTIONAL { GRAPH <identify_emptyAllValuesFrom> { ?a ?b ?cnot . FILTER(?c=?cnot) } } .
           FILTER(!bound(?cnot)) .
     }
]


owl:Restrictions that have no values associated with owl:onProperty restrictions

# remove the set of owl:Restrictions that do not have values associated with
# owl:onProperty restrictions
FROM NAMED <remove_emptyOwlOnPropertyRestrictions> [
     CONSTRUCT {
               ?a ?b ?c .
     }
     FROM <file:///.../culprit4.xml>
     # identify the set of owl:Restrictions that do not have values associated with
     # owl:onProperty restrictions
     FROM NAMED <identify_emptyOwlOnPropertyRestrictions> [
         CONSTRUCT {
              ?x rdfs:subClassOf ?owlrestrict .
              ?owlrestrict rdf:type owl:Restriction .
              ?owlrestrict owl:onProperty ?onprop .
         }
         FROM <file:///.../culprit4.xml>
         WHERE { 
                       ?x rdfs:subClassOf ?owlrestrict .
                       ?owlrestrict rdf:type owl:Restriction .
                       ?owlrestrict owl:onProperty ?onprop .
                       OPTIONAL { ?owlrestrict ?anyp ?anyo . 
                                  FILTER((?anyp!=rdf:type)&&(?anyp!=owl:onProperty)) . }
                       FILTER(!bound(?anyp)) . 
         }
      ]
      WHERE {
           ?a ?b ?c . 
           OPTIONAL { GRAPH <identify_emptyOwlOnPropertyRestrictions> { ?a ?b ?cnot . FILTER(?c=?cno
t) } } .
           FILTER(!bound(?cnot)) .
     }
]