-
Notifications
You must be signed in to change notification settings - Fork 844
/
Copy pathpelican_rdf.py
175 lines (147 loc) · 6.47 KB
/
pelican_rdf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
from pelican.readers import BaseReader
from pelican.generators import CachingGenerator
from pelican.contents import Page, is_valid_content
from pelican import signals
import logging
from blinker import signal
import requests
from os import listdir
from os.path import isfile, join
"""
pelican-rdf
===============
This plugin integrates to pelican a new type of media, the vocabulary.
Vocabularies are .rdf or .owl files, and metadata about them is collected
through sparql queries.
"""
try:
import rdflib
from rdflib.query import Processor
rdflib_loaded=True
except ImportError:
rdflib_loaded=False
logger = logging.getLogger(__name__)
voc_generator_init = signal('voc_generator_init')
voc_generator_finalized = signal('voc_generator_finalized')
voc_writer_finalized = signal('voc_writer_finalized')
voc_generator_preread = signal('voc_generator_preread')
voc_generator_context = signal('voc_generator_context')
class VocabularyGenerator(CachingGenerator):
"""Generate vocabulary descriptions"""
# temporary file where the vocabulary is dereferenced to
# when collected online
_local_vocabulary_path = "/tmp/"
def __init__(self, *args, **kwargs):
logger.debug("Vocabulary generator called")
self.vocabularies =[]
super(VocabularyGenerator, self).__init__(*args, **kwargs)
# Called both for local and remote vocabulary context creation.
# Performs the actual Vocabulary generation.
def generate_vocabulary_context(
self, vocabulary_file_name, path_to_vocabulary):
logger.debug("Generating__ vocabulary context for "+
path_to_vocabulary+"/"+vocabulary_file_name)
voc = self.get_cached_data(vocabulary_file_name, None)
if voc is None:
try:
voc = self.readers.read_file(
base_path=path_to_vocabulary,
path=vocabulary_file_name,
content_class=Vocabulary,
context=self.context,
preread_signal=voc_generator_preread,
preread_sender=self,
context_signal=voc_generator_context,
context_sender=self)
except Exception as e:
logger.error(
'Could not process %s\n%s', vocabulary_file_name, e,
exc_info=self.settings.get('DEBUG', False))
self._add_failed_source_path(vocabulary_file_name)
if not is_valid_content(voc, vocabulary_file_name):
self._add_failed_source_path(vocabulary_file_name)
self.cache_data(vocabulary_file_name, voc)
self.vocabularies.append(voc)
self.add_source_path(voc)
def generate_local_context(self):
for f in self.get_files(
self.settings['VOC_PATHS'],
exclude=self.settings['VOC_EXCLUDES']):
self.generate_vocabulary_context(f, self.path)
def dereference(self, uri, local_file):
logger.debug("Dereferencing "+uri+" into "+local_file)
headers={"Accept":"application/rdf+xml"}
r = requests.get(uri, headers=headers)
with open(self._local_vocabulary_path+local_file, 'w') as f:
f.write(r.text)
def generate_remote_context(self):
for uri in self.settings["VOC_URIS"]:
logger.debug("Generating context for remote "+uri)
local_name = uri.split("/")[-1]+".rdf"
self.dereference(uri, local_name)
self.generate_vocabulary_context(
local_name,
self._local_vocabulary_path)
def generate_context(self):
self.generate_local_context()
self.generate_remote_context()
self._update_context(('vocabularies',))
self.save_cache()
self.readers.save_cache()
def generate_output(self, writer):
for voc in self.vocabularies:
writer.write_file(
voc.save_as, self.get_template(voc.template),
self.context, voc=voc,
relative_urls=self.settings['RELATIVE_URLS'],
override_output=hasattr(voc, 'override_save_as'))
voc_writer_finalized.send(self, writer=writer)
class RdfReader(BaseReader):
file_extensions = ['rdf', 'owl']
enabled = bool(rdflib_loaded)
def __init__(self, *args, **kwargs):
super(RdfReader, self).__init__(*args, **kwargs)
def read(self, source_path):
"""Parse content and metadata of an rdf file"""
logger.debug("Loading graph described in "+source_path)
graph = rdflib.Graph()
graph.load(source_path)
meta = {}
queries = [
f for f in listdir(self.settings["VOC_QUERIES_PATH"])
if (isfile(join(self.settings["VOC_QUERIES_PATH"], f))
and f.endswith(".sparql"))]
for query_path in queries:
query_file_path = self.settings["VOC_QUERIES_PATH"]+"/"+query_path
with open(query_file_path, "r") as query_file:
query = query_file.read()
# The name of the query identifies the elements in the context
query_key=query_path.split(".")[0]
result_set = graph.query(query)
# Each query result will be stored as a dictionnary in the
# vocabulary context, referenced by the query name as its key.
# Multiple results are stored in a list.
for result in result_set:
if not query_key in meta.keys():
meta[query_key]=result.asdict()
elif type(meta[query_key]) == list:
meta[query_key].append(result.asdict())
else:
meta[query_key]=[meta[query_key], result.asdict()]
meta["iri"] = meta["lov_metadata"]["iri"]
meta["description"] = meta["lov_metadata"]["description"]
meta["version"] = meta["lov_metadata"]["version"]
meta["title"] = meta["lov_metadata"]["title"]
return "", meta
class Vocabulary(Page):
mandatory_properties = ('iri','description','version', 'title')
default_template = 'vocabulary'
def add_reader(readers):
for ext in RdfReader.file_extensions:
readers.reader_classes[ext] = RdfReader
def add_generator(pelican_object):
print("Adding the generator")
return VocabularyGenerator
def register():
signals.get_generators.connect(add_generator)
signals.readers_init.connect(add_reader)