1 | ;;+----------------------------------------------------------------------------- |
---|
2 | ;;+ Isidorus |
---|
3 | ;;+ (c) 2008-2009 Marc Kuester, Christoph Ludwig, Lukas Giessmann |
---|
4 | ;;+ |
---|
5 | ;;+ Isidorus is freely distributable under the LGPL license. |
---|
6 | ;;+ You can find a detailed description in trunk/docs/LGPL-LICENSE.txt. |
---|
7 | ;;+----------------------------------------------------------------------------- |
---|
8 | |
---|
9 | |
---|
10 | (defpackage :xml-tools |
---|
11 | (:use :cl :cxml) |
---|
12 | (:import-from :constants |
---|
13 | *xml-ns* |
---|
14 | *xmlns-ns* |
---|
15 | *rdf-ns*) |
---|
16 | (:export :get-attribute |
---|
17 | :xpath-fn-string |
---|
18 | :xpath-child-elems-by-qname |
---|
19 | :xpath-single-child-elem-by-qname |
---|
20 | :xpath-select-location-path |
---|
21 | :xpath-select-single-location-path |
---|
22 | :get-ns-attribute |
---|
23 | :clear-child-nodes |
---|
24 | :absolute-uri-p |
---|
25 | :get-node-name |
---|
26 | :child-nodes-or-text |
---|
27 | :get-xml-lang |
---|
28 | :get-xml-base |
---|
29 | :absolutize-value |
---|
30 | :absolutize-id |
---|
31 | :concatenate-uri |
---|
32 | :push-string |
---|
33 | :node-to-string)) |
---|
34 | |
---|
35 | (in-package :xml-tools) |
---|
36 | |
---|
37 | (defmacro push-string (obj place) |
---|
38 | "Imitates the push macro but instead of pushing object in a list, |
---|
39 | there will be appended the given string to the main string object." |
---|
40 | `(setf ,place (concatenate 'string ,place ,obj))) |
---|
41 | |
---|
42 | |
---|
43 | (defun concatenate-uri (absolute-ns value) |
---|
44 | "Returns a string conctenated of the absolut namespace an the given value |
---|
45 | separated by either '#' or '/'." |
---|
46 | (declare (string absolute-ns value)) |
---|
47 | (unless (and (> (length absolute-ns) 0) |
---|
48 | (> (length value) 0)) |
---|
49 | (error "From concatenate-uri(): absolute-ns and value must be of length > 0")) |
---|
50 | (unless (absolute-uri-p absolute-ns) |
---|
51 | (error "From concatenate-uri(): absolute-ns has to be an absolute URI: ~a" absolute-ns)) |
---|
52 | (let ((last-char |
---|
53 | (elt absolute-ns (- (length absolute-ns) 1))) |
---|
54 | (first-char |
---|
55 | (elt value 0))) |
---|
56 | (let ((separator |
---|
57 | (cond |
---|
58 | ((or (eql first-char #\#) |
---|
59 | (eql first-char #\/)) |
---|
60 | "") |
---|
61 | ((or (eql last-char #\#) |
---|
62 | (eql last-char #\/)) |
---|
63 | "") |
---|
64 | (t |
---|
65 | "/")))) |
---|
66 | (let ((prep-ns |
---|
67 | (if (and (eql last-char first-char) |
---|
68 | (or (eql last-char #\#) |
---|
69 | (eql last-char #\/))) |
---|
70 | (subseq absolute-ns 0 (- (length absolute-ns) 1)) |
---|
71 | (if (and (eql last-char #\#) |
---|
72 | (find #\/ value)) |
---|
73 | (progn |
---|
74 | (when (not (eql first-char #\/)) |
---|
75 | (setf separator "/")) |
---|
76 | (subseq absolute-ns 0 (- (length absolute-ns) 1))) |
---|
77 | absolute-ns)))) |
---|
78 | (concatenate 'string prep-ns separator value))))) |
---|
79 | |
---|
80 | |
---|
81 | (defun absolutize-id (id xml-base tm-id) |
---|
82 | "Returns the passed id as an absolute uri computed |
---|
83 | with the given base and tm-id." |
---|
84 | (declare (string id tm-id)) |
---|
85 | (when (= (length id) 0) |
---|
86 | (error "From absolutize-id(): id must be set to a string with length > 0!")) |
---|
87 | (let ((prep-id (if (and (> (length id) 0) |
---|
88 | (eql (elt id 0) #\#)) |
---|
89 | id |
---|
90 | (concatenate 'string "#" (string-left-trim "/" id))))) |
---|
91 | (absolutize-value prep-id xml-base tm-id))) |
---|
92 | |
---|
93 | |
---|
94 | (defun absolutize-value(value xml-base tm-id) |
---|
95 | "Returns the passed value as an absolute uri computed |
---|
96 | with the given xml-base and tm-id." |
---|
97 | (declare (string value tm-id)) |
---|
98 | (unless (absolute-uri-p tm-id) |
---|
99 | (error "From absolutize-value(): you must provide a stable identifier (PSI-style) for this TM: ~a" tm-id)) |
---|
100 | (when (> (count #\# value) 1) |
---|
101 | (error "From absolutize-value(): value is allowed to have only one \"#\": ~a" value)) |
---|
102 | (if (absolute-uri-p value) |
---|
103 | value |
---|
104 | (let ((prep-value |
---|
105 | (if (> (length value) 0) |
---|
106 | (string-left-trim "/" value) |
---|
107 | "")) |
---|
108 | (prep-base |
---|
109 | (if (> (length xml-base) 0) |
---|
110 | (string-right-trim "/" xml-base) |
---|
111 | ""))) |
---|
112 | (let ((fragment |
---|
113 | (if (and (> (length prep-value) 0) |
---|
114 | (eql (elt prep-value 0) #\#)) |
---|
115 | (concatenate 'string prep-base prep-value) |
---|
116 | (concatenate 'string prep-base "/" prep-value)))) |
---|
117 | (if (absolute-uri-p fragment) |
---|
118 | fragment |
---|
119 | (let ((prep-fragment |
---|
120 | (when (> (length fragment) 0) |
---|
121 | (string-left-trim "/" fragment))) |
---|
122 | (prep-tm-id |
---|
123 | (when (> (length tm-id) 0) |
---|
124 | (string-right-trim "/" tm-id)))) |
---|
125 | (let ((separator |
---|
126 | (if (eql (elt prep-fragment 0) #\#) |
---|
127 | "" |
---|
128 | "/"))) |
---|
129 | (concatenate 'string prep-tm-id separator prep-fragment)))))))) |
---|
130 | |
---|
131 | |
---|
132 | (defun get-xml-lang(elem &key (old-lang nil)) |
---|
133 | "Computes the current xml-lang attribute and returns |
---|
134 | its value as a string." |
---|
135 | (declare (dom:element elem)) |
---|
136 | (let ((new-lang |
---|
137 | (let ((val |
---|
138 | (get-ns-attribute elem "lang" :ns-uri *xml-ns*))) |
---|
139 | (when val |
---|
140 | (string-trim '(#\Space #\Tab #\Newline) val))))) |
---|
141 | (if (dom:has-attribute-ns elem *xml-ns* "lang") |
---|
142 | (if (= (length new-lang) 0) |
---|
143 | nil |
---|
144 | new-lang) |
---|
145 | (if (= (length old-lang) 0) |
---|
146 | nil |
---|
147 | old-lang)))) |
---|
148 | |
---|
149 | |
---|
150 | (defun get-xml-base(elem &key (old-base nil)) |
---|
151 | "Computes the current xml-base attribute and returns |
---|
152 | its value as a string." |
---|
153 | (declare (dom:element elem)) |
---|
154 | (let ((new-base |
---|
155 | (let ((inner-base |
---|
156 | (if (> (count #\# (get-ns-attribute elem "base" |
---|
157 | :ns-uri *xml-ns*)) |
---|
158 | 1) |
---|
159 | (error "From get-xml-base(): the base-uri ~a is not valid" |
---|
160 | (get-ns-attribute elem "base" :ns-uri *xml-ns*)) |
---|
161 | (when (get-ns-attribute elem "base" :ns-uri *xml-ns*) |
---|
162 | (string-trim '(#\Space #\Tab #\Newline) |
---|
163 | (get-ns-attribute elem "base" :ns-uri *xml-ns*)))))) |
---|
164 | (if (and (> (length inner-base) 0) |
---|
165 | (eql (elt inner-base 0) #\/)) |
---|
166 | (subseq inner-base 1 (length inner-base)) |
---|
167 | inner-base)))) |
---|
168 | (if (or (absolute-uri-p new-base) |
---|
169 | (not old-base)) |
---|
170 | new-base |
---|
171 | (if (not new-base) |
---|
172 | old-base |
---|
173 | (concatenate 'string (string-right-trim "/" old-base) |
---|
174 | "/" (string-left-trim "/" new-base)))))) |
---|
175 | |
---|
176 | |
---|
177 | (defun child-nodes-or-text (elem &key (trim nil)) |
---|
178 | "Returns a list of dom-elements or a string. |
---|
179 | Is there only one child which is not a text node it will be |
---|
180 | returned as a list. Are only text nodes available their |
---|
181 | results are concatenated and returned as a string. |
---|
182 | comment nodes are removed anyway." |
---|
183 | (declare (dom:element elem)) |
---|
184 | (let ((children |
---|
185 | (remove-if #'(lambda(node) |
---|
186 | (when (dom:comment-p node) |
---|
187 | t)) |
---|
188 | (dom:child-nodes elem))) |
---|
189 | (trim-fun (lambda(str) |
---|
190 | (if trim |
---|
191 | (string-trim '(#\Space #\Tab #\Newline) str) |
---|
192 | str)))) |
---|
193 | (if (find-if #'(lambda(node) |
---|
194 | (unless (dom:text-node-p node) |
---|
195 | t)) |
---|
196 | children) |
---|
197 | (remove-if #'(lambda(node) |
---|
198 | (when (dom:text-node-p node) |
---|
199 | (when (> (length |
---|
200 | (string-trim '(#\Space #\Tab #\Newline) |
---|
201 | (dom:node-value node))) |
---|
202 | 0) |
---|
203 | (error "Found literal content and xml-content in one node: ~a" |
---|
204 | (dom:node-value node))) |
---|
205 | t)) |
---|
206 | children) |
---|
207 | (let ((entire-string "")) |
---|
208 | (map 'list #'(lambda(text-node) |
---|
209 | (push-string (dom:node-value text-node) entire-string)) |
---|
210 | children) |
---|
211 | (if (> (length (apply trim-fun (list entire-string))) 0) |
---|
212 | (apply trim-fun (list entire-string)) |
---|
213 | nil))))) ;there were no text nodes available |
---|
214 | |
---|
215 | |
---|
216 | (defun absolute-uri-p (uri) |
---|
217 | "Returns t if the passed uri is an absolute one. This |
---|
218 | is indicated by a ':' with no leadgin '/'." |
---|
219 | (when uri |
---|
220 | (let ((position-of-colon |
---|
221 | (position #\: uri))) |
---|
222 | (declare (string uri)) |
---|
223 | (and position-of-colon (> position-of-colon 0) |
---|
224 | (not (find #\/ (subseq uri 0 position-of-colon))))))) |
---|
225 | |
---|
226 | |
---|
227 | (defun get-node-name (elem) |
---|
228 | "Returns the node's name without a prefix." |
---|
229 | (if (find #\: (dom:node-name elem)) |
---|
230 | (subseq (dom:node-name elem) |
---|
231 | (length (concatenate 'string (dom:prefix elem) ":"))) |
---|
232 | (dom:node-name elem))) |
---|
233 | |
---|
234 | |
---|
235 | (defun conditional-fn (fn b) |
---|
236 | (if b |
---|
237 | fn |
---|
238 | (lambda(x) (list x)) |
---|
239 | ) |
---|
240 | ) |
---|
241 | |
---|
242 | (defun handle-whitespace (strip-whitespace s) |
---|
243 | (cond |
---|
244 | (strip-whitespace |
---|
245 | (string-trim '(#\Space #\Tab #\Newline) s)) |
---|
246 | (t s))) |
---|
247 | |
---|
248 | (defun xpath-fn-string (elem &optional (strip-whitespace t)) |
---|
249 | "Extract the string value of an XML DOM element (with subelements)" |
---|
250 | (declare (dom:element elem)) |
---|
251 | ;; ((conditional-fn #'(lambda(s) (string-trim " #\t#\n" s)) strip-whitespace ; |
---|
252 | (handle-whitespace strip-whitespace |
---|
253 | (apply #'concatenate 'string |
---|
254 | (map 'list |
---|
255 | (lambda (s) |
---|
256 | (cond |
---|
257 | ((dom:text-node-p s) |
---|
258 | (dom:node-value s)) |
---|
259 | ((dom:element-p s) |
---|
260 | (xpath-fn-string s)))) |
---|
261 | (dom:child-nodes elem))))) |
---|
262 | |
---|
263 | (defun attr-value (attr) |
---|
264 | (dom:node-value attr)) |
---|
265 | |
---|
266 | |
---|
267 | (defun has-qname (elem namespace-uri local-name) |
---|
268 | (declare (dom:node elem)) |
---|
269 | (and |
---|
270 | (dom:element-p elem) |
---|
271 | (equal (dom:local-name elem) local-name) |
---|
272 | (equal (dom:namespace-uri elem) namespace-uri)) |
---|
273 | ) |
---|
274 | |
---|
275 | |
---|
276 | (defun xpath-child-elems-by-qname (elem namespace-uri local-name) |
---|
277 | "Returns a vector(!) with all the child elements of elem that have the |
---|
278 | qname '(namespace-uri local-name)" |
---|
279 | (declare (dom:element elem)) |
---|
280 | (remove-if-not (lambda (el) (has-qname el namespace-uri local-name)) (dom:child-nodes elem)) |
---|
281 | ) |
---|
282 | |
---|
283 | |
---|
284 | (defun xpath-single-child-elem-by-qname (elem namespace-uri local-name) |
---|
285 | "Returns some child of elem that has qname (namespace-uri local-name) or |
---|
286 | nil if no such child exists." |
---|
287 | (declare (dom:element elem)) |
---|
288 | (find-if (lambda (el) (has-qname el namespace-uri local-name)) (dom:child-nodes elem)) |
---|
289 | ) |
---|
290 | |
---|
291 | |
---|
292 | (defun string-starts-with (begin str) |
---|
293 | (equal (char str 0) begin)) |
---|
294 | |
---|
295 | |
---|
296 | (defun xpath-select-location-path (elem list-of-qnames) |
---|
297 | "Takes a list of qnames (expressed as pairs of namespace-uris and |
---|
298 | local names) and evaluates this as a location path" |
---|
299 | (declare (dom:element elem)) |
---|
300 | (let |
---|
301 | ((namespace-uri (eval (first (first list-of-qnames)))) |
---|
302 | (local-name (eval (second (first list-of-qnames)))) |
---|
303 | (rest-of-qnames (rest list-of-qnames))) |
---|
304 | (cond |
---|
305 | (list-of-qnames |
---|
306 | (cond |
---|
307 | ((string-starts-with #\@ local-name) |
---|
308 | (list (dom:get-attribute-node-ns elem namespace-uri (string-left-trim "@" local-name)))) |
---|
309 | (t |
---|
310 | (apply #'append |
---|
311 | (map 'list |
---|
312 | (lambda (child) |
---|
313 | (xpath-select-location-path child rest-of-qnames)) |
---|
314 | (xpath-child-elems-by-qname elem namespace-uri local-name)) |
---|
315 | )))) |
---|
316 | (t (list elem))))) |
---|
317 | |
---|
318 | |
---|
319 | (defun xpath-select-single-location-path (elem list-of-qnames) |
---|
320 | "Takes a list of qnames (expressed as pairs of namespace-uris and |
---|
321 | local names) and evaluates this as a location path; returns one element that |
---|
322 | satisfies this location path or nil if there is no such element." |
---|
323 | (first (xpath-select-location-path elem list-of-qnames)) |
---|
324 | ;; TODO: Optimize - it is inefficient to first construct the full list and then |
---|
325 | ;; to throw away all but the first element! |
---|
326 | ) |
---|
327 | |
---|
328 | |
---|
329 | (defun attributes-to-strings (attrs) |
---|
330 | (map 'list #'attr-value attrs) |
---|
331 | ) |
---|
332 | |
---|
333 | (defun get-attribute (elem attrname) |
---|
334 | "Returns the value of the attribute attrname of element elem. If |
---|
335 | elem is nil or does not have the attribut attrname, the function |
---|
336 | returns nil" |
---|
337 | (when elem |
---|
338 | (let |
---|
339 | ((attr-node |
---|
340 | (dom:get-attribute-node elem attrname))) |
---|
341 | (when attr-node |
---|
342 | (dom:node-value attr-node))))) |
---|
343 | |
---|
344 | ;;(defvar top (elt *topic-list* 501)) |
---|
345 | ;;(defvar scopes (xpath-select-location-path top '((*xtm-ns* "baseName") (*xtm-ns* "scope")))) |
---|
346 | |
---|
347 | (defun get-ns-attribute (elem name &key (ns-uri *rdf-ns*)) |
---|
348 | "Returns athe attributes value. If the value is |
---|
349 | a string of the length 0, the return value is nil" |
---|
350 | (declare (dom:element elem)) |
---|
351 | (declare (string ns-uri name)) |
---|
352 | (let ((attr |
---|
353 | (dom:get-attribute-ns elem ns-uri name))) |
---|
354 | (if (= (length attr) 0) |
---|
355 | nil |
---|
356 | attr))) |
---|
357 | |
---|
358 | |
---|
359 | (defun clear-child-nodes (elem) |
---|
360 | "Returns a list of child nodes, where all text-nodes and |
---|
361 | all comment nodes are removed." |
---|
362 | (declare (dom:element elem)) |
---|
363 | (loop for child-node across (dom:child-nodes elem) |
---|
364 | unless (or (dom:text-node-p child-node) |
---|
365 | (dom:comment-p child-node)) |
---|
366 | collect child-node)) |
---|
367 | |
---|
368 | |
---|
369 | (defun node-to-string (elem) |
---|
370 | "Transforms the passed node element recursively to a string." |
---|
371 | (if (dom:text-node-p elem) |
---|
372 | (dom:node-value elem) |
---|
373 | (let ((node-name (dom:node-name elem)) |
---|
374 | (attributes (dom:attributes elem)) |
---|
375 | (child-nodes (dom:child-nodes elem)) |
---|
376 | (elem-string "")) |
---|
377 | (push-string (concatenate 'string "<" node-name) elem-string) |
---|
378 | (dom:map-node-map |
---|
379 | #'(lambda(attr) |
---|
380 | (let ((attr-name (dom:node-name attr)) |
---|
381 | (attr-value (dom:node-value attr))) |
---|
382 | (push-string (concatenate 'string " " attr-name "=\"" |
---|
383 | attr-value "\"") |
---|
384 | elem-string))) |
---|
385 | attributes) |
---|
386 | (push-string ">" elem-string) |
---|
387 | (loop for child-node across child-nodes |
---|
388 | do (push-string (node-to-string child-node) elem-string)) |
---|
389 | (push-string (concatenate 'string "</" node-name ">") elem-string)))) |
---|