1 | ;;+----------------------------------------------------------------------------- |
---|
2 | ;;+ Isidorus |
---|
3 | ;;+ (c) 2008-2010 Marc Kuester, Christoph Ludwig, Lukas Georgieff |
---|
4 | ;;+ |
---|
5 | ;;+ Isidorus is freely distributable under the LLGPL license. |
---|
6 | ;;+ You can find a detailed description in trunk/docs/LLGPL-LICENSE.txt and |
---|
7 | ;;+ trunk/docs/LGPL-LICENSE.txt. |
---|
8 | ;;+----------------------------------------------------------------------------- |
---|
9 | |
---|
10 | (in-package :rest-interface) |
---|
11 | |
---|
12 | ;in the midterm write a reader thread |
---|
13 | ;(make-thread (lambda () (write-line "Hello, world"))) in sbcl |
---|
14 | ;http://www.sbcl.org/manual/Threading-basics.html#Threading-basics |
---|
15 | |
---|
16 | (defparameter *read-frequency* 3600) ;read frequency in seconds |
---|
17 | |
---|
18 | |
---|
19 | (setf drakma:*drakma-default-external-format* :utf-8) |
---|
20 | |
---|
21 | (setf drakma:*text-content-types* |
---|
22 | (append drakma:*text-content-types* |
---|
23 | (list (cons "application" "x-tm+xml")) |
---|
24 | (list (cons "application" "xml")) |
---|
25 | (list (cons "application" "atom+xml")))) |
---|
26 | |
---|
27 | |
---|
28 | (defun read-url (url) |
---|
29 | "takes a url and returns a string with the url's contents if |
---|
30 | successful. Throws an error otherwise" |
---|
31 | (multiple-value-bind (reply status) |
---|
32 | (drakma:http-request url) |
---|
33 | (if (= status 200) |
---|
34 | reply |
---|
35 | (error "no successful connection in read-url")))) |
---|
36 | |
---|
37 | |
---|
38 | (defun read-fragments-feed (fragment-feed-url) |
---|
39 | "read a feed of TM fragments and build a fragments-feed object |
---|
40 | containing fragment-entries from it" |
---|
41 | ;from the feed we need only the source locator |
---|
42 | (let |
---|
43 | ((fragment-feed |
---|
44 | (read-url fragment-feed-url))) |
---|
45 | (parse-fragments-feed fragment-feed))) |
---|
46 | |
---|
47 | (defun read-snapshots-feed (snapshot-feed-url) |
---|
48 | "read a feed of TM snapshots and build a snapshot-feed object |
---|
49 | containing fragment-entries from it" |
---|
50 | ;from the feed we need only the source locator |
---|
51 | (let |
---|
52 | ((snapshot-feed |
---|
53 | (read-url snapshot-feed-url))) |
---|
54 | (parse-snapshots-feed snapshot-feed))) |
---|
55 | |
---|
56 | (defun import-fragments-feed (fragment-feed-url imported-snapshot-entry &key tm-id) |
---|
57 | ;a bit of a borderline case if that should be here or in the |
---|
58 | ;importer. Since it deals with the network interface, I think it |
---|
59 | ;makes sense to have it here, though |
---|
60 | (let |
---|
61 | ((feed (read-fragments-feed fragment-feed-url)) |
---|
62 | (revision (d:get-revision))) |
---|
63 | (loop for entry in (slot-value feed 'atom:entries) do |
---|
64 | (let |
---|
65 | ((top (d:get-item-by-psi (psi entry) :revision revision)) |
---|
66 | (xtm-id (atom:id entry)) |
---|
67 | (source-locator (source-locator-prefix feed))) |
---|
68 | ;check if xtm-id has already been imported or if the entry is older |
---|
69 | ;than the snapshot feed. If so, don't do it again |
---|
70 | (unless (or (string> (atom:updated entry) (atom:updated imported-snapshot-entry))) |
---|
71 | (when top |
---|
72 | (mark-as-deleted top :source-locator source-locator :revision revision)) |
---|
73 | ;(format t "Fragment feed: ~a~&" (link entry)) |
---|
74 | (importer-xtm1.0 |
---|
75 | (dom:document-element |
---|
76 | (cxml:parse-rod (read-url (link entry)) (cxml-dom:make-dom-builder))) |
---|
77 | :tm-id tm-id :xtm-id xtm-id :revision revision) |
---|
78 | ;the consequence of the algorithm is to add the source |
---|
79 | ;locator + a suitable internal id as an identifier to all |
---|
80 | ;characteristics and associations that don't already have |
---|
81 | ;one and then reuse it next time |
---|
82 | (add-source-locator |
---|
83 | (d:get-item-by-psi (psi entry) :revision revision) ;works even if the topic is only created during import |
---|
84 | :source-locator source-locator :revision revision)))))) |
---|
85 | |
---|
86 | (defun string-max (string-list &optional (max nil)) |
---|
87 | (cond |
---|
88 | ((null string-list) |
---|
89 | max) |
---|
90 | ((string> (first string-list) max) |
---|
91 | (string-max (rest string-list) (first string-list))) |
---|
92 | (t |
---|
93 | (string-max (rest string-list) max)))) |
---|
94 | |
---|
95 | (defun most-recent-entry (entry-list) |
---|
96 | (let |
---|
97 | ((most-recent-update (string-max (mapcar #'atom:updated entry-list)))) |
---|
98 | (find most-recent-update entry-list :key #'updated :test #'string=))) |
---|
99 | |
---|
100 | (defun most-recent-imported-snapshot (all-snapshot-entries) |
---|
101 | ; (let |
---|
102 | ; ((all-imported-entries |
---|
103 | ; (remove-if-not #'xtm-id-p all-snapshot-entries :key #'atom:id))) |
---|
104 | ; (most-recent-entry all-imported-entries)) |
---|
105 | (most-recent-entry all-snapshot-entries)) |
---|
106 | |
---|
107 | (defun import-snapshots-feed (snapshot-feed-url &key tm-id) |
---|
108 | "checks if we already imported any of this feed's snapshots. If not, |
---|
109 | finds the most recent snapshot and imports that. It returns the entry |
---|
110 | corresponding to the snapshot imported (now or previously)." |
---|
111 | (let* |
---|
112 | ((feed (read-snapshots-feed snapshot-feed-url)) |
---|
113 | (all-entries (slot-value feed 'atom:entries)) |
---|
114 | (most-recent-imported-entry (most-recent-entry all-entries))) |
---|
115 | (if most-recent-imported-entry |
---|
116 | most-recent-imported-entry |
---|
117 | (let* |
---|
118 | ((entry (most-recent-entry all-entries)) |
---|
119 | (snapshot-dom |
---|
120 | (dom:document-element |
---|
121 | (cxml:parse-rod (read-url (link entry)) (cxml-dom:make-dom-builder)))) |
---|
122 | (xtm-id (id entry)) |
---|
123 | (revision (get-revision))) |
---|
124 | ;;that *should* be the algorithm... |
---|
125 | ;; If a client has a local topic map that contains topic map |
---|
126 | ;; data from more than one server and wants to fetch and update |
---|
127 | ;; the latest full topic map from ONE source then it MUST do the |
---|
128 | ;; following. Apply the delete topic algorithm from below, but |
---|
129 | ;; apply it to the entire topic map. Then proceed in terms of 'A |
---|
130 | ;; Clean Start', by fetching the topic map and merging it in |
---|
131 | ;; (1b, 1.4.3.2) |
---|
132 | (importer-xtm1.0 snapshot-dom :tm-id tm-id :xtm-id xtm-id :revision revision) |
---|
133 | entry)))) |
---|
134 | |
---|
135 | (defun import-tm-feed (feed-url &optional (processed-feed-urls nil)) |
---|
136 | "takes the feed url of a collection feed, processes the dependencies, |
---|
137 | imports the first snapshot if necessary and then applies all fragments to it" |
---|
138 | ;the implementation may be a bit brutal, but relies only on |
---|
139 | ;guaranteed rel-attributes on the links |
---|
140 | (let* |
---|
141 | ((feed-string (read-url feed-url)) |
---|
142 | (feed-dom (dom:document-element |
---|
143 | (cxml:parse-rod feed-string (cxml-dom:make-dom-builder)))) |
---|
144 | (link-elems |
---|
145 | (xpath-select-location-path feed-dom |
---|
146 | '((*atom-ns* "entry") |
---|
147 | (*atom-ns* "link")))) |
---|
148 | (snapshot-feed-link-elem |
---|
149 | (find-if (lambda(elem) |
---|
150 | (string= (get-attribute elem "rel") |
---|
151 | "http://www.egovpt.org/sdshare/snapshotsfeed")) link-elems)) |
---|
152 | (fragment-feed-link-elem |
---|
153 | (find-if (lambda(elem) |
---|
154 | (string= (get-attribute elem "rel") |
---|
155 | "http://www.egovpt.org/sdshare/fragmentsfeed")) link-elems))) |
---|
156 | |
---|
157 | ;;Process dependencies |
---|
158 | (dolist (dependency-elem |
---|
159 | (xpath-select-location-path feed-dom |
---|
160 | '((*egovpt-ns* "dependency")))) |
---|
161 | (let ;;prevent circular dependencies |
---|
162 | ((dependent-feed-url |
---|
163 | (xpath-fn-string dependency-elem))) |
---|
164 | (unless (find dependent-feed-url processed-feed-urls) |
---|
165 | (format t "Recursively processing feed ~a~&" dependent-feed-url) |
---|
166 | (import-tm-feed dependent-feed-url (append processed-feed-urls feed-url))))) |
---|
167 | |
---|
168 | ;; import a snapshot (if necessary) and the process all fragments more |
---|
169 | ;; recent than the snapshot |
---|
170 | (let |
---|
171 | ((imported-snapshot-entry |
---|
172 | (import-snapshots-feed |
---|
173 | (get-attribute snapshot-feed-link-elem "href") |
---|
174 | :tm-id feed-url))) |
---|
175 | (assert imported-snapshot-entry) |
---|
176 | (with-writer-lock |
---|
177 | (import-fragments-feed |
---|
178 | (get-attribute fragment-feed-link-elem "href") |
---|
179 | imported-snapshot-entry :tm-id feed-url))))) |
---|
180 | |
---|
181 | |
---|
182 | |
---|