1 | ;;+----------------------------------------------------------------------------- |
---|
2 | ;;+ Isidorus |
---|
3 | ;;+ (c) 2008-2010 Marc Kuester, Christoph Ludwig, Lukas Georgieff |
---|
4 | ;;+ |
---|
5 | ;;+ Isidorus is freely distributable under the LLGPL license. |
---|
6 | ;;+ You can find a detailed description in trunk/docs/LLGPL-LICENSE.txt and |
---|
7 | ;;+ trunk/docs/LGPL-LICENSE.txt. |
---|
8 | ;;+----------------------------------------------------------------------------- |
---|
9 | |
---|
10 | (defpackage :xml-tools |
---|
11 | (:use :cl :cxml :base-tools) |
---|
12 | (:import-from :constants |
---|
13 | *xml-ns* |
---|
14 | *xmlns-ns* |
---|
15 | *rdf-ns*) |
---|
16 | (:export :get-attribute |
---|
17 | :xpath-fn-string |
---|
18 | :xpath-child-elems-by-qname |
---|
19 | :xpath-single-child-elem-by-qname |
---|
20 | :xpath-select-location-path |
---|
21 | :xpath-select-single-location-path |
---|
22 | :get-ns-attribute |
---|
23 | :clear-child-nodes |
---|
24 | :get-node-name |
---|
25 | :child-nodes-or-text |
---|
26 | :get-xml-lang |
---|
27 | :get-xml-base |
---|
28 | :absolutize-value |
---|
29 | :absolutize-id |
---|
30 | :node-to-string)) |
---|
31 | |
---|
32 | (in-package :xml-tools) |
---|
33 | |
---|
34 | (defun absolutize-id (id xml-base tm-id) |
---|
35 | "Returns the passed id as an absolute uri computed |
---|
36 | with the given base and tm-id." |
---|
37 | (declare (string id tm-id)) |
---|
38 | (when (= (length id) 0) |
---|
39 | (error "From absolutize-id(): id must be set to a string with length > 0!")) |
---|
40 | (let ((prep-id (if (and (> (length id) 0) |
---|
41 | (eql (elt id 0) #\#)) |
---|
42 | id |
---|
43 | (concat "#" (string-left-trim "/" id))))) |
---|
44 | (absolutize-value prep-id xml-base tm-id))) |
---|
45 | |
---|
46 | |
---|
47 | (defun absolutize-value(value xml-base tm-id) |
---|
48 | "Returns the passed value as an absolute uri computed |
---|
49 | with the given xml-base and tm-id." |
---|
50 | (declare (string value tm-id)) |
---|
51 | (unless (absolute-uri-p tm-id) |
---|
52 | (error "From absolutize-value(): you must provide a stable identifier (PSI-style) for this TM: ~a" tm-id)) |
---|
53 | (when (> (count #\# value) 1) |
---|
54 | (error "From absolutize-value(): value is allowed to have only one \"#\": ~a" value)) |
---|
55 | (if (absolute-uri-p value) |
---|
56 | value |
---|
57 | (let ((prep-value |
---|
58 | (if (> (length value) 0) |
---|
59 | (string-left-trim "/" value) |
---|
60 | "")) |
---|
61 | (prep-base |
---|
62 | (if (> (length xml-base) 0) |
---|
63 | (string-right-trim "/" xml-base) |
---|
64 | ""))) |
---|
65 | (let ((fragment |
---|
66 | (if (and (> (length prep-value) 0) |
---|
67 | (eql (elt prep-value 0) #\#)) |
---|
68 | (concat prep-base prep-value) |
---|
69 | (concat prep-base "/" prep-value)))) |
---|
70 | (if (absolute-uri-p fragment) |
---|
71 | fragment |
---|
72 | (let ((prep-fragment |
---|
73 | (when (> (length fragment) 0) |
---|
74 | (string-left-trim "/" fragment))) |
---|
75 | (prep-tm-id |
---|
76 | (when (> (length tm-id) 0) |
---|
77 | (string-right-trim "/" tm-id)))) |
---|
78 | (let ((separator |
---|
79 | (if (eql (elt prep-fragment 0) #\#) |
---|
80 | "" |
---|
81 | "/"))) |
---|
82 | (concat prep-tm-id separator prep-fragment)))))))) |
---|
83 | |
---|
84 | |
---|
85 | (defun get-xml-lang(elem &key (old-lang nil)) |
---|
86 | "Computes the current xml-lang attribute and returns |
---|
87 | its value as a string." |
---|
88 | (declare (dom:element elem)) |
---|
89 | (let ((new-lang |
---|
90 | (let ((val |
---|
91 | (get-ns-attribute elem "lang" :ns-uri *xml-ns*))) |
---|
92 | (when val |
---|
93 | (string-trim '(#\Space #\Tab #\Newline) val))))) |
---|
94 | (if (dom:has-attribute-ns elem *xml-ns* "lang") |
---|
95 | (if (= (length new-lang) 0) |
---|
96 | nil |
---|
97 | new-lang) |
---|
98 | (if (= (length old-lang) 0) |
---|
99 | nil |
---|
100 | old-lang)))) |
---|
101 | |
---|
102 | |
---|
103 | (defun get-xml-base(elem &key (old-base nil)) |
---|
104 | "Computes the current xml-base attribute and returns |
---|
105 | its value as a string." |
---|
106 | (declare (dom:element elem)) |
---|
107 | (let ((new-base |
---|
108 | (let ((inner-base |
---|
109 | (if (> (count #\# (get-ns-attribute elem "base" |
---|
110 | :ns-uri *xml-ns*)) |
---|
111 | 1) |
---|
112 | (error "From get-xml-base(): the base-uri ~a is not valid" |
---|
113 | (get-ns-attribute elem "base" :ns-uri *xml-ns*)) |
---|
114 | (when (get-ns-attribute elem "base" :ns-uri *xml-ns*) |
---|
115 | (string-trim '(#\Space #\Tab #\Newline) |
---|
116 | (get-ns-attribute elem "base" :ns-uri *xml-ns*)))))) |
---|
117 | (if (and (> (length inner-base) 0) |
---|
118 | (eql (elt inner-base 0) #\/)) |
---|
119 | (subseq inner-base 1 (length inner-base)) |
---|
120 | inner-base)))) |
---|
121 | (if (or (absolute-uri-p new-base) |
---|
122 | (not old-base)) |
---|
123 | new-base |
---|
124 | (if (not new-base) |
---|
125 | old-base |
---|
126 | (concat (string-right-trim "/" old-base) |
---|
127 | "/" (string-left-trim "/" new-base)))))) |
---|
128 | |
---|
129 | |
---|
130 | (defun child-nodes-or-text (elem &key (trim nil)) |
---|
131 | "Returns a list of dom-elements or a string. |
---|
132 | Is there only one child which is not a text node it will be |
---|
133 | returned as a list. Are only text nodes available their |
---|
134 | results are concatenated and returned as a string. |
---|
135 | comment nodes are removed anyway." |
---|
136 | (declare (dom:element elem)) |
---|
137 | (let ((children |
---|
138 | (remove-if #'(lambda(node) |
---|
139 | (when (dom:comment-p node) |
---|
140 | t)) |
---|
141 | (dom:child-nodes elem))) |
---|
142 | (trim-fun (lambda(str) |
---|
143 | (if trim |
---|
144 | (string-trim '(#\Space #\Tab #\Newline) str) |
---|
145 | str)))) |
---|
146 | (if (find-if #'(lambda(node) |
---|
147 | (unless (dom:text-node-p node) |
---|
148 | t)) |
---|
149 | children) |
---|
150 | (remove-if #'(lambda(node) |
---|
151 | (when (dom:text-node-p node) |
---|
152 | (when (> (length |
---|
153 | (string-trim '(#\Space #\Tab #\Newline) |
---|
154 | (dom:node-value node))) |
---|
155 | 0) |
---|
156 | (error "Found literal content and xml-content in one node: ~a" |
---|
157 | (dom:node-value node))) |
---|
158 | t)) |
---|
159 | children) |
---|
160 | (let ((entire-string "")) |
---|
161 | (map 'list #'(lambda(text-node) |
---|
162 | (push-string (dom:node-value text-node) entire-string)) |
---|
163 | children) |
---|
164 | (if (> (length (apply trim-fun (list entire-string))) 0) |
---|
165 | (apply trim-fun (list entire-string)) |
---|
166 | nil))))) ;there were no text nodes available |
---|
167 | |
---|
168 | |
---|
169 | (defun get-node-name (elem) |
---|
170 | "Returns the node's name without a prefix." |
---|
171 | (if (find #\: (dom:node-name elem)) |
---|
172 | (subseq (dom:node-name elem) |
---|
173 | (length (concat (dom:prefix elem) ":"))) |
---|
174 | (dom:node-name elem))) |
---|
175 | |
---|
176 | |
---|
177 | (defun conditional-fn (fn b) |
---|
178 | (if b |
---|
179 | fn |
---|
180 | (lambda(x) (list x)) |
---|
181 | ) |
---|
182 | ) |
---|
183 | |
---|
184 | (defun handle-whitespace (strip-whitespace s) |
---|
185 | (cond |
---|
186 | (strip-whitespace |
---|
187 | (string-trim '(#\Space #\Tab #\Newline) s)) |
---|
188 | (t s))) |
---|
189 | |
---|
190 | (defun xpath-fn-string (elem &optional (strip-whitespace t)) |
---|
191 | "Extract the string value of an XML DOM element (with subelements)" |
---|
192 | (declare (dom:element elem)) |
---|
193 | (handle-whitespace strip-whitespace |
---|
194 | (apply #'concatenate 'string |
---|
195 | (map 'list |
---|
196 | (lambda (s) |
---|
197 | (cond |
---|
198 | ((dom:text-node-p s) |
---|
199 | (dom:node-value s)) |
---|
200 | ((dom:element-p s) |
---|
201 | (xpath-fn-string s)))) |
---|
202 | (dom:child-nodes elem))))) |
---|
203 | |
---|
204 | (defun attr-value (attr) |
---|
205 | (dom:node-value attr)) |
---|
206 | |
---|
207 | |
---|
208 | (defun has-qname (elem namespace-uri local-name) |
---|
209 | (declare (dom:node elem)) |
---|
210 | (and |
---|
211 | (dom:element-p elem) |
---|
212 | (equal (dom:local-name elem) local-name) |
---|
213 | (equal (dom:namespace-uri elem) namespace-uri)) |
---|
214 | ) |
---|
215 | |
---|
216 | |
---|
217 | (defun xpath-child-elems-by-qname (elem namespace-uri local-name) |
---|
218 | "Returns a vector(!) with all the child elements of elem that have the |
---|
219 | qname '(namespace-uri local-name)" |
---|
220 | (declare (dom:element elem)) |
---|
221 | (remove-if-not (lambda (el) (has-qname el namespace-uri local-name)) (dom:child-nodes elem)) |
---|
222 | ) |
---|
223 | |
---|
224 | |
---|
225 | (defun xpath-single-child-elem-by-qname (elem namespace-uri local-name) |
---|
226 | "Returns some child of elem that has qname (namespace-uri local-name) |
---|
227 | or nil if no such child exists." |
---|
228 | (declare (dom:element elem)) |
---|
229 | (find-if (lambda (el) (has-qname el namespace-uri local-name)) |
---|
230 | (dom:child-nodes elem))) |
---|
231 | |
---|
232 | |
---|
233 | (defun xpath-select-location-path (elem list-of-qnames) |
---|
234 | "Takes a list of qnames (expressed as pairs of namespace-uris and |
---|
235 | local names) and evaluates this as a location path" |
---|
236 | (declare (dom:element elem)) |
---|
237 | (let |
---|
238 | ((namespace-uri (eval (first (first list-of-qnames)))) |
---|
239 | (local-name (eval (second (first list-of-qnames)))) |
---|
240 | (rest-of-qnames (rest list-of-qnames))) |
---|
241 | (cond |
---|
242 | (list-of-qnames |
---|
243 | (cond |
---|
244 | ((string-starts-with-char #\@ local-name) |
---|
245 | (list (dom:get-attribute-node-ns elem namespace-uri (string-left-trim "@" local-name)))) |
---|
246 | (t |
---|
247 | (apply #'append |
---|
248 | (map 'list |
---|
249 | (lambda (child) |
---|
250 | (xpath-select-location-path child rest-of-qnames)) |
---|
251 | (xpath-child-elems-by-qname elem namespace-uri local-name)) |
---|
252 | )))) |
---|
253 | (t (list elem))))) |
---|
254 | |
---|
255 | |
---|
256 | (defun xpath-select-single-location-path (elem list-of-qnames) |
---|
257 | "Takes a list of qnames (expressed as pairs of namespace-uris and |
---|
258 | local names) and evaluates this as a location path; returns one element that |
---|
259 | satisfies this location path or nil if there is no such element." |
---|
260 | (first (xpath-select-location-path elem list-of-qnames)) |
---|
261 | ;; TODO: Optimize - it is inefficient to first construct the full list and then |
---|
262 | ;; to throw away all but the first element! |
---|
263 | ) |
---|
264 | |
---|
265 | |
---|
266 | (defun attributes-to-strings (attrs) |
---|
267 | (map 'list #'attr-value attrs) |
---|
268 | ) |
---|
269 | |
---|
270 | (defun get-attribute (elem attrname) |
---|
271 | "Returns the value of the attribute attrname of element elem. If |
---|
272 | elem is nil or does not have the attribut attrname, the function |
---|
273 | returns nil" |
---|
274 | (when elem |
---|
275 | (let |
---|
276 | ((attr-node |
---|
277 | (dom:get-attribute-node elem attrname))) |
---|
278 | (when attr-node |
---|
279 | (dom:node-value attr-node))))) |
---|
280 | |
---|
281 | ;;(defvar top (elt *topic-list* 501)) |
---|
282 | ;;(defvar scopes (xpath-select-location-path top '((*xtm-ns* "baseName") (*xtm-ns* "scope")))) |
---|
283 | |
---|
284 | (defun get-ns-attribute (elem name &key (ns-uri *rdf-ns*)) |
---|
285 | "Returns athe attributes value. If the value is |
---|
286 | a string of the length 0, the return value is nil" |
---|
287 | (declare (dom:element elem)) |
---|
288 | (declare (string ns-uri name)) |
---|
289 | (let ((attr |
---|
290 | (dom:get-attribute-ns elem ns-uri name))) |
---|
291 | (if (= (length attr) 0) |
---|
292 | nil |
---|
293 | attr))) |
---|
294 | |
---|
295 | |
---|
296 | (defun clear-child-nodes (elem) |
---|
297 | "Returns a list of child nodes, where all text-nodes and |
---|
298 | all comment nodes are removed." |
---|
299 | (declare (dom:element elem)) |
---|
300 | (loop for child-node across (dom:child-nodes elem) |
---|
301 | unless (or (dom:text-node-p child-node) |
---|
302 | (dom:comment-p child-node)) |
---|
303 | collect child-node)) |
---|
304 | |
---|
305 | |
---|
306 | (defun node-to-string (elem) |
---|
307 | "Transforms the passed node element recursively to a string." |
---|
308 | (if (dom:text-node-p elem) |
---|
309 | (dom:node-value elem) |
---|
310 | (let ((node-name (dom:node-name elem)) |
---|
311 | (attributes (dom:attributes elem)) |
---|
312 | (child-nodes (dom:child-nodes elem)) |
---|
313 | (elem-string "")) |
---|
314 | (push-string (concat "<" node-name) elem-string) |
---|
315 | (dom:map-node-map |
---|
316 | #'(lambda(attr) |
---|
317 | (let ((attr-name (dom:node-name attr)) |
---|
318 | (attr-value (dom:node-value attr))) |
---|
319 | (push-string (concat " " attr-name "=\"" attr-value "\"") |
---|
320 | elem-string))) |
---|
321 | attributes) |
---|
322 | (push-string ">" elem-string) |
---|
323 | (loop for child-node across child-nodes |
---|
324 | do (push-string (node-to-string child-node) elem-string)) |
---|
325 | (push-string (concat "</" node-name ">") elem-string)))) |
---|