1 | ;;+----------------------------------------------------------------------------- |
---|
2 | ;;+ Isidorus |
---|
3 | ;;+ (c) 2008-2010 Marc Kuester, Christoph Ludwig, Lukas Georgieff |
---|
4 | ;;+ |
---|
5 | ;;+ Isidorus is freely distributable under the LLGPL license. |
---|
6 | ;;+ You can find a detailed description in trunk/docs/LLGPL-LICENSE.txt and |
---|
7 | ;;+ trunk/docs/LGPL-LICENSE.txt. |
---|
8 | ;;+----------------------------------------------------------------------------- |
---|
9 | |
---|
10 | (defpackage :rdf-importer |
---|
11 | (:use :cl :cxml :elephant :datamodel :isidorus-threading :datamodel |
---|
12 | :base-tools :constants :xml-constants :xml-tools |
---|
13 | :xtm-importer :isidorus-threading :exceptions) |
---|
14 | (:export :setup-rdf-module |
---|
15 | :import-from-rdf |
---|
16 | :init-rdf-module |
---|
17 | :*rdf-core-xtm* |
---|
18 | :*document-id*)) |
---|
19 | |
---|
20 | (in-package :rdf-importer) |
---|
21 | |
---|
22 | (defvar *rdf-types* (list "Description" "List" "Alt" "Bag" "Seq" |
---|
23 | "Statement" "Property" "XMLLiteral" "nil")) |
---|
24 | |
---|
25 | (defvar *rdf-properties* (list "type" "first" "rest" "subject" "predicate" |
---|
26 | "object" "li" "first" "rest")) |
---|
27 | |
---|
28 | (defvar *rdfs-types* (list "Resource" "Literal" "Class" "Datatype" |
---|
29 | "Container" "ContainerMembershipProperty")) |
---|
30 | |
---|
31 | (defvar *rdfs-properties* (list "subClassOf" "subPropertyOf" "domain" |
---|
32 | "range" "range" "label" "comment" |
---|
33 | "member" "seeAlso" "isDefinedBy")) |
---|
34 | |
---|
35 | (defvar *rdf-core-xtm* "rdf_core.xtm") |
---|
36 | |
---|
37 | (defvar *_n-map* nil) |
---|
38 | |
---|
39 | (defvar *document-id* "isidorus-rdf-document") |
---|
40 | |
---|
41 | |
---|
42 | (defun _n-p (node) |
---|
43 | "Returns t if the given value is of the form _[0-9]+" |
---|
44 | (let ((node-name (get-node-name node))) |
---|
45 | (when (and node-name |
---|
46 | (> (length node-name) 0) |
---|
47 | (eql (elt node-name 0) #\_)) |
---|
48 | (let ((rest |
---|
49 | (subseq node-name 1 (length node-name)))) |
---|
50 | (declare (string node-name)) |
---|
51 | (handler-case (let ((int |
---|
52 | (parse-integer rest))) |
---|
53 | int) |
---|
54 | (condition () nil)))))) |
---|
55 | |
---|
56 | |
---|
57 | |
---|
58 | (defun find-_n-name-of-property (property) |
---|
59 | "Returns the properties name of the form rdf:_n or nil." |
---|
60 | (let ((owner |
---|
61 | (find-if |
---|
62 | #'(lambda(x) |
---|
63 | (find-if |
---|
64 | #'(lambda(y) |
---|
65 | (eql (getf y :elem) property)) |
---|
66 | (getf x :props))) |
---|
67 | *_n-map*))) |
---|
68 | (let ((elem (find-if #'(lambda(x) |
---|
69 | (eql (getf x :elem) property)) |
---|
70 | (getf owner :props)))) |
---|
71 | (when elem |
---|
72 | (getf elem :name))))) |
---|
73 | |
---|
74 | |
---|
75 | |
---|
76 | (defun find-_n-name (owner-identifier property) |
---|
77 | "Returns a name of the form rdf:_n of the property element |
---|
78 | when it owns the tagname rdf:li and exists in the *_n-map* list. |
---|
79 | Otherwise the return value is nil." |
---|
80 | (let ((owner (find-if #'(lambda(x) |
---|
81 | (string= (getf x :owner) owner-identifier)) |
---|
82 | *_n-map*))) |
---|
83 | (when owner |
---|
84 | (let ((prop (find-if #'(lambda(x) |
---|
85 | (eql (getf x :elem) property)) |
---|
86 | (getf owner :props)))) |
---|
87 | (getf prop :name))))) |
---|
88 | |
---|
89 | |
---|
90 | (defun set-_n-name (owner-identifier property) |
---|
91 | "Sets a new name of the form _n for the passed property element and |
---|
92 | adds it to the list *_n-map*. If the property already exists in the |
---|
93 | *_n-map* list, there won't be created a new entry but returned the |
---|
94 | stored value name." |
---|
95 | (let ((name (find-_n-name owner-identifier property))) |
---|
96 | (if name |
---|
97 | name |
---|
98 | (let ((owner (find-if #'(lambda(x) |
---|
99 | (string= (getf x :owner) owner-identifier)) |
---|
100 | *_n-map*))) |
---|
101 | (if owner |
---|
102 | (let ((new-name |
---|
103 | (concat |
---|
104 | *rdf-ns* "_" |
---|
105 | (write-to-string (+ (length (getf owner :props)) 1))))) |
---|
106 | (push (list :elem property |
---|
107 | :name new-name) |
---|
108 | (getf owner :props)) |
---|
109 | new-name) |
---|
110 | (progn |
---|
111 | (push |
---|
112 | (list :owner owner-identifier |
---|
113 | :props (list |
---|
114 | (list :elem property |
---|
115 | :name (concat *rdf-ns* "_1")))) |
---|
116 | *_n-map*) |
---|
117 | "_1")))))) |
---|
118 | |
---|
119 | |
---|
120 | (defun get-type-of-node-name (node) |
---|
121 | (let ((map-item (find-_n-name-of-property node))) |
---|
122 | (if map-item |
---|
123 | map-item |
---|
124 | (let ((node-name (get-node-name node)) |
---|
125 | (node-ns (dom:namespace-uri node))) |
---|
126 | (concatenate-uri node-ns node-name))))) |
---|
127 | |
---|
128 | |
---|
129 | (defun parse-node-name (node) |
---|
130 | "Parses the given node's name to the known rdf/rdfs nodes and arcs. |
---|
131 | If the given name es equal to a property an error is thrown otherwise |
---|
132 | there is displayed a warning when the rdf ord rdfs namespace is used." |
---|
133 | (declare (dom:element node)) |
---|
134 | (let ((node-name (get-node-name node)) |
---|
135 | (node-ns (dom:namespace-uri node)) |
---|
136 | (err-pref "From parse-node-name(): ")) |
---|
137 | (when (string= node-ns *rdf-ns*) |
---|
138 | (when (find node-name *rdf-properties* :test #'string=) |
---|
139 | (error "~ardf:~a is a property and not allowed here!" |
---|
140 | err-pref node-name)) |
---|
141 | (when (string= node-name "RDF") |
---|
142 | (error "~ardf:RDF not allowed here!" |
---|
143 | err-pref)) |
---|
144 | (unless (find node-name *rdf-types* :test #'string=) |
---|
145 | (format t "~aWarning: ~a is not a known RDF type!~%" |
---|
146 | err-pref node-name))) |
---|
147 | (when (string= node-ns *rdfs-ns*) |
---|
148 | (when (find node-name *rdfs-properties* :test #'string=) |
---|
149 | (error "~ardfs:~a is a property and not allowed here!" |
---|
150 | err-pref node-name)) |
---|
151 | (unless (find node-name *rdfs-types* :test #'string=) |
---|
152 | (format t "~aWarning: rdfs:~a is not a known rdfs:type!~%" |
---|
153 | err-pref node-name)))) |
---|
154 | t) |
---|
155 | |
---|
156 | |
---|
157 | (defun parse-node(node) |
---|
158 | "Parses a node that represents a rdf-resource." |
---|
159 | (declare (dom:element node)) |
---|
160 | (parse-node-name node) |
---|
161 | (let ((ID (get-ns-attribute node "ID")) |
---|
162 | (nodeID (get-ns-attribute node "nodeID")) |
---|
163 | (about (get-ns-attribute node "about")) |
---|
164 | (err-pref "From parse-node(): ") |
---|
165 | (resource (get-ns-attribute node "resource")) |
---|
166 | (datatype (get-ns-attribute node "datatype")) |
---|
167 | (parseType (get-ns-attribute node "parseType")) |
---|
168 | (class (get-ns-attribute node "Class" :ns-uri *rdfs-ns*)) |
---|
169 | (subClassOf (get-ns-attribute node "subClassOf" :ns-uri *rdfs-ns*))) |
---|
170 | (when (and about nodeID) |
---|
171 | (error "~ardf:about and rdf:nodeID are not allowed in parallel use: (~a) (~a)!" |
---|
172 | err-pref about nodeID)) |
---|
173 | (when (and ID |
---|
174 | (or about nodeID)) |
---|
175 | (error "~awhen rdf:ID is set the attributes rdf:~a is not allowed: ~a!" |
---|
176 | err-pref (if about "about" "nodeID") (or about nodeID))) |
---|
177 | (unless (or ID nodeID about (dom:has-attribute-ns node *rdf2tm-ns* "UUID")) |
---|
178 | (dom:set-attribute-ns node *rdf2tm-ns* "UUID" (get-uuid))) |
---|
179 | (handler-case (let ((content (child-nodes-or-text node :trim t))) |
---|
180 | (when (stringp content) |
---|
181 | (error "text-content not allowed here!"))) |
---|
182 | (condition (err) (error "~a~a" err-pref err))) |
---|
183 | (when (or resource datatype parseType class subClassOf) |
---|
184 | (error "~a~a is not allowed here (~a)!" |
---|
185 | err-pref (cond |
---|
186 | (resource (concat "resource(" resource ")")) |
---|
187 | (datatype (concat "datatype(" datatype ")")) |
---|
188 | (parseType (concat "parseType(" parseType ")")) |
---|
189 | (class (concat "Class(" class ")")) |
---|
190 | (subClassOf (concat "subClassOf(" subClassOf ")"))) |
---|
191 | (dom:node-name node))) |
---|
192 | (dolist (item *rdf-types*) |
---|
193 | (when (get-ns-attribute node item) |
---|
194 | (error "~ardf:~a is a type and not allowed here!" |
---|
195 | err-pref item))) |
---|
196 | (dolist (item *rdfs-types*) |
---|
197 | (when (get-ns-attribute node item :ns-uri *rdfs-ns*) |
---|
198 | (error "~ardfs:~a is a type and not allowed here!" |
---|
199 | err-pref item)))) |
---|
200 | t) |
---|
201 | |
---|
202 | |
---|
203 | (defun get-node-refs (nodes tm-id parent-xml-base) |
---|
204 | "Returns a list of node references that can be used as topic IDs." |
---|
205 | (when (and nodes |
---|
206 | (> (length nodes) 0)) |
---|
207 | (loop for node across nodes |
---|
208 | collect (let ((xml-base (get-xml-base node :old-base parent-xml-base))) |
---|
209 | (parse-node node) |
---|
210 | (let ((ID (when (get-ns-attribute node "ID") |
---|
211 | (absolutize-id (get-ns-attribute node "ID") |
---|
212 | xml-base tm-id))) |
---|
213 | (nodeID (get-ns-attribute node "nodeID")) |
---|
214 | (about (when (get-ns-attribute node "about") |
---|
215 | (absolutize-value |
---|
216 | (get-ns-attribute node "about") |
---|
217 | xml-base tm-id))) |
---|
218 | (UUID (get-ns-attribute node "UUID" :ns-uri *rdf2tm-ns*))) |
---|
219 | (list :topicid (or ID about nodeID UUID) |
---|
220 | :psi (or ID about))))))) |
---|
221 | |
---|
222 | |
---|
223 | (defun parse-property-name (property owner-identifier) |
---|
224 | "Parses the given property's name to the known rdf/rdfs nodes and arcs. |
---|
225 | If the given name es equal to an node an error is thrown otherwise |
---|
226 | there is displayed a warning when the rdf ord rdfs namespace is used." |
---|
227 | (declare (dom:element property)) |
---|
228 | (let ((property-name (get-node-name property)) |
---|
229 | (property-ns (dom:namespace-uri property)) |
---|
230 | (err-pref "From parse-property-name(): ")) |
---|
231 | (when (string= property-ns *rdf-ns*) |
---|
232 | (when (find property-name *rdf-types* :test #'string=) |
---|
233 | (error "~ardf:~a is a node and not allowed here!" |
---|
234 | err-pref property-name)) |
---|
235 | (when (string= property-name "RDF") |
---|
236 | (error "~ardf:RDF not allowed here!" |
---|
237 | err-pref)) |
---|
238 | (unless (or (find property-name *rdf-properties* :test #'string=) |
---|
239 | (_n-p property)) |
---|
240 | (format t "~aWarning: rdf:~a is not a known RDF property!~%" |
---|
241 | err-pref property-name))) |
---|
242 | (when (string= property-ns *rdfs-ns*) |
---|
243 | (when (find property-name *rdfs-types* :test #'string=) |
---|
244 | (error "~ardfs:~a is a type and not allowed here!" |
---|
245 | err-pref property-name)) |
---|
246 | (unless (find property-name *rdfs-properties* :test #'string=) |
---|
247 | (format t "~aWarning: rdfs:~a is not a known rdfs:type!~%" |
---|
248 | err-pref property-name))) |
---|
249 | (when (and (string= property-ns *rdf-ns*) |
---|
250 | (string= property-name "li")) |
---|
251 | (set-_n-name owner-identifier property))) |
---|
252 | t) |
---|
253 | |
---|
254 | |
---|
255 | (defun parse-property (property owner-identifier) |
---|
256 | "Parses a property that represents a rdf-arc." |
---|
257 | (declare (dom:element property)) |
---|
258 | (let ((err-pref "From parse-property(): ") |
---|
259 | (node-name (get-node-name property)) |
---|
260 | (node-ns (dom:namespace-uri property)) |
---|
261 | (nodeID (get-ns-attribute property "nodeID")) |
---|
262 | (resource (get-ns-attribute property "resource")) |
---|
263 | (datatype (get-ns-attribute property "datatype")) |
---|
264 | (type (get-ns-attribute property "type")) |
---|
265 | (parseType (get-ns-attribute property "parseType")) |
---|
266 | (about (get-ns-attribute property "about")) |
---|
267 | (subClassOf (get-ns-attribute property "subClassOf" :ns-uri *rdfs-ns*)) |
---|
268 | (literals (get-literals-of-property property nil)) |
---|
269 | (content (child-nodes-or-text property :trim t))) |
---|
270 | (parse-property-name property owner-identifier) |
---|
271 | (when (and parseType |
---|
272 | (or nodeID resource datatype type literals)) |
---|
273 | (error "~awhen rdf:parseType is set the attributes: ~a => ~a are not allowed!" |
---|
274 | err-pref |
---|
275 | (append (list (cond (nodeID "rdf:nodeID") |
---|
276 | (resource "rdf:resource") |
---|
277 | (datatype "rdf:datatype") |
---|
278 | (type "rdf:type"))) |
---|
279 | (map 'list #'(lambda(x)(getf x :type)) literals)) |
---|
280 | (append (list (or nodeID resource datatype type)) |
---|
281 | (map 'list #'(lambda(x)(getf x :value)) literals)))) |
---|
282 | (when (and parseType |
---|
283 | (not (or (string= parseType "Resource") |
---|
284 | (string= parseType "Literal") |
---|
285 | (string= parseType "Collection")))) |
---|
286 | (error "~aunknown rdf:parseType: ~a" |
---|
287 | err-pref parseType)) |
---|
288 | (when (and parseType |
---|
289 | (or (string= parseType "Resource") |
---|
290 | (string= parseType "Collection"))) |
---|
291 | (unless (dom:has-attribute-ns property *rdf2tm-ns* "UUID") |
---|
292 | (dom:set-attribute-ns property *rdf2tm-ns* "UUID" (get-uuid)))) |
---|
293 | (when (and parseType (string= parseType "Resource") (stringp content)) |
---|
294 | (error "~ardf:parseType is set to 'Resource' expecting xml content: ~a!" |
---|
295 | err-pref content)) |
---|
296 | (when (and parseType |
---|
297 | (string= parseType "Collection") |
---|
298 | (stringp content)) |
---|
299 | (error "~ardf:parseType is set to 'Collection' expecting resource content: ~a" |
---|
300 | err-pref content)) |
---|
301 | (when (and nodeID resource) |
---|
302 | (error "~aondly one of rdf:nodeID and rdf:resource is allowed: (~a) (~a)!" |
---|
303 | err-pref nodeID resource)) |
---|
304 | (when (and (or nodeID resource type literals) |
---|
305 | datatype) |
---|
306 | (error "~aonly one of ~a and rdf:datatype (~a) is allowed!" |
---|
307 | err-pref |
---|
308 | (cond |
---|
309 | (nodeID (concat "rdf:nodeID (" nodeID ")")) |
---|
310 | (resource (concat "rdf:resource (" resource ")")) |
---|
311 | (type (concat "rdf:type (" type ")")) |
---|
312 | (literals literals)) |
---|
313 | datatype)) |
---|
314 | (when (and (or nodeID resource) |
---|
315 | (> (length content) 0)) |
---|
316 | (error "~awhen ~a is set no content is allowed: ~a!" |
---|
317 | err-pref |
---|
318 | (cond |
---|
319 | (nodeID (concat "rdf:nodeID (" nodeID ")")) |
---|
320 | (resource (concat "rdf:resource (" resource ")"))) |
---|
321 | content)) |
---|
322 | (when (and type |
---|
323 | (stringp content) |
---|
324 | (> (length content) 0)) |
---|
325 | (error "~awhen rdf:type is set no literal content is allowed: ~a!" |
---|
326 | err-pref content)) |
---|
327 | (when (and (or type |
---|
328 | (and (string= node-name "type") |
---|
329 | (string= node-ns *rdf-ns*)) |
---|
330 | (> (length literals) 0)) |
---|
331 | (not (or nodeID resource)) |
---|
332 | (not content)) |
---|
333 | (unless (dom:has-attribute-ns property *rdf2tm-ns* "UUID") |
---|
334 | (dom:set-attribute-ns property *rdf2tm-ns* "UUID" (get-uuid)))) |
---|
335 | (when (or about subClassOf) |
---|
336 | (error "~a~a not allowed here!" |
---|
337 | err-pref |
---|
338 | (if about |
---|
339 | (concat "rdf:about (" about ")") |
---|
340 | (concat "rdfs:subClassOf (" subClassOf ")")))) |
---|
341 | (when (and (string= node-name "subClassOf") |
---|
342 | (string= node-ns *rdfs-ns*) |
---|
343 | (not (or nodeID resource content))) |
---|
344 | (unless (dom:has-attribute-ns property *rdf2tm-ns* "UUID") |
---|
345 | (dom:set-attribute-ns property *rdf2tm-ns* "UUID" (get-uuid)))) |
---|
346 | (when (and (or (and (string= node-name "type") |
---|
347 | (string= node-ns *rdf-ns*)) |
---|
348 | (and (string= node-name "subClassOf") |
---|
349 | (string= node-ns *rdfs-ns*))) |
---|
350 | (and (> (length content) 0) |
---|
351 | (stringp content))) |
---|
352 | (error "~awhen property is ~a literal content is not allowed: ~a!" |
---|
353 | err-pref (if (string= node-name "type") |
---|
354 | "rdf:type" |
---|
355 | "rdfs:subClassOf") |
---|
356 | content)) |
---|
357 | (dolist (item *rdf-types*) |
---|
358 | (when (get-ns-attribute property item) |
---|
359 | (error "~ardf:~a is a type and not allowed here!" |
---|
360 | err-pref item))) |
---|
361 | (dolist (item *rdfs-types*) |
---|
362 | (when (get-ns-attribute property item :ns-uri *rdfs-ns*) |
---|
363 | (error "~ardfs:~a is a type and not allowed here!" |
---|
364 | err-pref item)))) |
---|
365 | t) |
---|
366 | |
---|
367 | |
---|
368 | (defun parse-properties-of-node (node owner-identifier) |
---|
369 | "Parses all node's properties by calling the parse-propery |
---|
370 | function and sets all rdf:li properties as a tupple to the |
---|
371 | *_n-map* list." |
---|
372 | (let ((child-nodes (child-nodes-or-text node :trim t))) |
---|
373 | (when (get-ns-attribute node "li") |
---|
374 | (dom:map-node-map |
---|
375 | #'(lambda(attr) |
---|
376 | (when (and (string= (get-node-name attr) "li") |
---|
377 | (string= (dom:namespace-uri attr) *rdf-ns*)) |
---|
378 | (set-_n-name owner-identifier attr))) |
---|
379 | (dom:attributes node))) |
---|
380 | (when child-nodes |
---|
381 | (loop for property across child-nodes |
---|
382 | do (parse-property property owner-identifier)))) |
---|
383 | t) |
---|
384 | |
---|
385 | |
---|
386 | (defun get-absolute-attribute (elem tm-id parent-xml-base attr-name |
---|
387 | &key (ns-uri *rdf-ns*)) |
---|
388 | "Returns an absolute 'attribute' or nil." |
---|
389 | (declare (dom:element elem)) |
---|
390 | (declare (string attr-name)) |
---|
391 | (tm-id-p tm-id "get-ID") |
---|
392 | (let ((attr (get-ns-attribute elem attr-name :ns-uri ns-uri)) |
---|
393 | (xml-base (get-xml-base elem :old-base parent-xml-base))) |
---|
394 | (when attr |
---|
395 | (if (and (string= ns-uri *rdf-ns*) |
---|
396 | (string= attr-name "ID")) |
---|
397 | (absolutize-id attr xml-base tm-id) |
---|
398 | (absolutize-value attr xml-base tm-id))))) |
---|
399 | |
---|
400 | |
---|
401 | (defun get-datatype (elem tm-id parent-xml-base) |
---|
402 | "Returns a datatype value. The default is xml:string." |
---|
403 | (let ((datatype |
---|
404 | (get-absolute-attribute elem tm-id parent-xml-base "datatype"))) |
---|
405 | (if datatype |
---|
406 | datatype |
---|
407 | *xml-string*))) |
---|
408 | |
---|
409 | |
---|
410 | (defun tm-id-p (tm-id fun-name) |
---|
411 | "Checks the validity of the passed tm-id." |
---|
412 | (unless (absolute-uri-p tm-id) |
---|
413 | (error "From ~a(): you must provide a stable identifier (PSI-style) for this TM: ~a!" |
---|
414 | fun-name tm-id))) |
---|
415 | |
---|
416 | |
---|
417 | (defun get-types-of-node (elem tm-id &key (parent-xml-base nil)) |
---|
418 | "Returns a plist of all node's types of the form |
---|
419 | (:topicid <string> :psi <string> :ID <string>)." |
---|
420 | (remove-if |
---|
421 | #'null |
---|
422 | (append (unless (string= (get-type-of-node-name elem) |
---|
423 | (concat *rdf-ns* "Description")) |
---|
424 | (list |
---|
425 | (list :topicid (get-type-of-node-name elem) |
---|
426 | :psi (get-type-of-node-name elem) |
---|
427 | :ID nil))) |
---|
428 | (get-types-of-node-content elem tm-id parent-xml-base)))) |
---|