Source code

Revision control

Copy as Markdown

Other Tools

# Author: Rob Sanderson (azaroth42@gmail.com)
# License: Apache2
# Last Modified: 2016-09-02
import json
from rdflib import ConjunctiveGraph, URIRef
from pyld import jsonld
from pyld.jsonld import compact, expand, frame, from_rdf, to_rdf, JsonLdProcessor
import urllib
# Stop code from looking up the contexts online for every operation
docCache = {}
def fetch(url):
fh = urllib.urlopen(url)
data = fh.read()
fh.close()
return data
def load_document_and_cache(url):
if docCache.has_key(url):
return docCache[url]
doc = {
'contextUrl': None,
'documentUrl': None,
'document': ''
}
data = fetch(url)
doc['document'] = data;
docCache[url] = doc
return doc
jsonld.set_document_loader(load_document_and_cache)
class Validator(object):
def __init__(self):
self.rdflib_class_map = {
"Annotation": "oa:Annotation",
"Dataset": "dctypes:Dataset",
"Image": "dctypes:StillImage",
"Video": "dctypes:MovingImage",
"Audio": "dctypes:Sound",
"Text": "dctypes:Text",
"TextualBody": "oa:TextualBody",
"ResourceSelection": "oa:ResourceSelection",
"SpecificResource": "oa:SpecificResource",
"FragmentSelector": "oa:FragmentSelector",
"CssSelector": "oa:CssSelector",
"XPathSelector": "oa:XPathSelector",
"TextQuoteSelector": "oa:TextQuoteSelector",
"TextPositionSelector": "oa:TextPositionSelector",
"DataPositionSelector": "oa:DataPositionSelector",
"SvgSelector": "oa:SvgSelector",
"RangeSelector": "oa:RangeSelector",
"TimeState": "oa:TimeState",
"HttpState": "oa:HttpRequestState",
"CssStylesheet": "oa:CssStyle",
"Choice": "oa:Choice",
"Composite": "oa:Composite",
"List": "oa:List",
"Independents": "oa:Independents",
"Person": "foaf:Person",
"Software": "as:Application",
"Organization": "foaf:Organization",
"AnnotationCollection": "as:OrderedCollection",
"AnnotationPage": "as:OrderedCollectionPage",
"Audience": "schema:Audience"
}
def _clean_bnode_ids(self, js):
new = {}
for (k,v) in js.items():
if k == 'id' and v.startswith("_:"):
continue
elif type(v) == dict:
# recurse
res = self._clean_bnode_ids(v)
new[k] = res
else:
new[k] = v
return new
def _mk_rdflib_jsonld(self, js):
# rdflib's json-ld implementation sucks
# Pre-process to make it work
# recurse the structure looking for types, and replacing them.
new = {}
for (k,v) in js.items():
if k == 'type':
if type(v) == list:
nl = []
for i in v:
if self.rdflib_class_map.has_key(i):
nl.append(self.rdflib_class_map[i])
new['type'] = nl
else:
if self.rdflib_class_map.has_key(v):
new['type'] = self.rdflib_class_map[v]
elif type(v) == dict:
# recurse
res = self._mk_rdflib_jsonld(v)
new[k] = res
else:
new[k] = v
return new
def json_to_rdf(self, js, fmt=None):
d2 = self._mk_rdflib_jsonld(js)
js = json.dumps(d2)
g = ConjunctiveGraph()
g.parse(data=js, format='json-ld')
if fmt:
out = g.serialize(format=fmt)
return out
else:
return g
def rdf_to_jsonld(self, rdf, fmt):
g = ConjunctiveGraph()
g.parse(data=rdf, format=fmt)
out = g.serialize(format='json-ld')
j2 = json.loads(out)
j2 = {"@context": context_js, "@graph": j2}
framed = frame(j2, frame_js)
out = compact(framed, context_js)
# recursively clean blank node ids
#out = self._clean_bnode_ids(out)
return out
def compact_and_clean(self, js):
newjs = compact(js, context_js)
newjs['@context'] = context
if newjs.has_key("@graph"):
for k,v in newjs['@graph'].items():
newjs[k] = v
del newjs['@graph']
return newjs
validator = Validator()
data = fetch(context)
context_js = json.loads(data)
data = fetch(example)
example_js = json.loads(data)
data = fetch(frameURI)
frame_js = json.loads(data)
# Test1: JSON-LD context document can be parsed without errors by JSON-LD validators
# Context document is parsable if it can be loaded and used to expand the example
try:
expanded = expand(example_js, context_js)
except:
print("Context is invalid, failed Test 1")
# Test2: JSON-LD context document can be used to convert JSON-LD serialized Annotations into RDF triples.
try:
jsonld_nq = to_rdf(example_js, {"base": "http://example.org/", "format": "application/nquads"})
except:
print("Cannot use context to convert JSON-LD to NQuads")
# Test3: Graphs produced are isomorphic
try:
rl_g = validator.json_to_rdf(example_js)
g = ConjunctiveGraph()
js_g = g.parse(data=jsonld_nq, format="nt")
rl_g_nq = rl_g.serialize(format="nquads")
assert(len(rl_g.store) == len(js_g.store))
assert(rl_g.isomorphic(js_g))
except:
print("Different triples from two parsers, or non-isomorphic graphs")
# Test4: The graphs produced can be converted back into JSON-LD without loss of information
try:
js = validator.rdf_to_jsonld(jsonld_nq, "nt")
js2 = validator.compact_and_clean(js)
assert(js2 == example_js)
except:
print("Failed to recompact parsed data")
raise
# Test5: ontology documents can be parsed without errors by validators
try:
g = ConjunctiveGraph().parse(ontology, format="turtle")
except:
raise
# Test6: ontology is internally consistent with respect to domains, ranges, etc
# step 1: find all the classes.
otherClasses = [asColl, skosConcept]
classes = list(g.subjects(rdftype, URIRef("http://www.w3.org/2000/01/rdf-schema#Class")))
props = list(g.subjects(rdftype, URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#Property")))
for p in props:
domains = list(g.objects(p, rdfsdomain))
for d in domains:
assert(d in classes)
for p in props:
ranges = list(g.objects(p, rdfsrange))
for r in ranges:
if not r in classes and not str(r).startswith("http://www.w3.org/2001/XMLSchema#") and \
not r == rdfsresource:
print("Found inconsistent property: %s has unknown range" % p)
for c in classes:
parents = list(g.objects(c, rdfssco))
for p in parents:
if not p in classes and not p in otherClasses:
print("Found inconsistent class: %s has unknown superClass" % c)
print("Done.")