Skip to content

Commit 60ba605

Browse files
committed
DXML-49: pervasively use pu-map
The ticket described a failure of using the builtin prefix xml:. The reason for this commit being so large is, that in the process of fixing it, I discovered that half of the system was still using an old method of keeping track of the xmlns environment. Properly fixing this simplified the whole library a lot and removed lots of workarounds, obsoleted by pu-map.
1 parent 21a0e8d commit 60ba605

9 files changed

Lines changed: 152 additions & 78 deletions

File tree

src/main/clojure/clojure/data/xml/event.clj

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,23 +11,24 @@
1111
{:author "Herwig Hochleitner"}
1212
(:require [clojure.data.xml.protocols :refer
1313
[EventGeneration gen-event next-events xml-str]]
14-
[clojure.data.xml.name :refer [merge-nss separate-xmlns]]
14+
[clojure.data.xml.name :refer [separate-xmlns]]
1515
[clojure.data.xml.node :refer [element* cdata xml-comment]]
16-
[clojure.data.xml.impl :refer [extend-protocol-fns compile-if]])
16+
[clojure.data.xml.impl :refer [extend-protocol-fns compile-if]]
17+
[clojure.data.xml.pu-map :as pu])
1718
(:import (clojure.data.xml.node Element CData Comment)
1819
(clojure.lang Sequential IPersistentMap Keyword)
1920
(java.net URI URL)
2021
(java.util Date)
2122
(javax.xml.namespace QName)))
2223

2324
(definline element-nss* [element]
24-
`(get (meta ~element) :clojure.data.xml/nss {}))
25+
`(get (meta ~element) :clojure.data.xml/nss pu/EMPTY))
2526

2627
(defn element-nss
2728
"Get xmlns environment from element"
2829
[{:keys [attrs] :as element}]
2930
(separate-xmlns
30-
attrs #(merge-nss (element-nss* element) %2)))
31+
attrs #(pu/merge-prefix-map (element-nss* element) %2)))
3132

3233
; Represents a parse event.
3334
(defrecord StartElementEvent [tag attrs nss location-info])
@@ -50,7 +51,7 @@
5051
(separate-xmlns
5152
attrs #((if (seq content)
5253
->StartElementEvent ->EmptyElementEvent)
53-
tag %1 (merge-nss (element-nss* element) %2) nil)))
54+
tag %1 (pu/merge-prefix-map (element-nss* element) %2) nil)))
5455
:next-events (fn elem-next-events [{:keys [tag content]} next-items]
5556
(if (seq content)
5657
(list* content end-element-event next-items)

src/main/clojure/clojure/data/xml/jvm/emit.clj

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -80,19 +80,15 @@
8080
(recur (gen-prefix))
8181
prefix))))
8282

83-
(defn- compute-pu [pu ns-attrs attr-uris tag-uri tag-local]
83+
(defn- compute-pu [pu elem-pu attr-uris tag-uri tag-local]
8484
(let [tpu (pu/transient pu)
85-
;; add xmlns, if exists
86-
tpu (if-let [uri (get ns-attrs "xmlns")]
87-
(pu/assoc! tpu "" uri)
88-
tpu)
8985
;; add namespaces from current environment
9086
tpu (reduce-kv (fn [tpu ns-attr uri]
91-
(assert (string? ns-attr) (pr-str ns-attr))
87+
(assert (string? ns-attr) (pr-str ns-attr uri))
9288
(pu/assoc! tpu
9389
(compute-prefix tpu uri ns-attr)
9490
uri))
95-
tpu ns-attrs)
91+
tpu (pu/prefix-map elem-pu))
9692
;; add implicit namespaces used by tag, attrs
9793
tpu (reduce (fn [tpu uri]
9894
(pu/assoc! tpu (compute-prefix tpu uri nil) uri))

src/main/clojure/clojure/data/xml/jvm/parse.clj

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@
1414
[clojure.data.xml.impl :refer
1515
[static-case]]
1616
[clojure.data.xml.name :refer
17-
[qname]])
17+
[qname]]
18+
[clojure.data.xml.pu-map :as pu])
1819
(:import
1920
(javax.xml.stream
2021
XMLInputFactory XMLStreamReader XMLStreamConstants)
@@ -47,13 +48,12 @@
4748
(range (.getAttributeCount sreader)))))
4849

4950
(defn- nss-hash [^XMLStreamReader sreader parent-hash]
50-
(persistent!
51+
(pu/persistent!
5152
(reduce (fn [tr ^long i]
52-
(let [ns-pf (.getNamespacePrefix sreader i)]
53-
(assoc! tr (if (str/blank? ns-pf)
54-
"xmlns" ns-pf)
55-
(.getNamespaceURI ^XMLStreamReader sreader i))))
56-
(transient parent-hash)
53+
(pu/assoc! tr
54+
(.getNamespacePrefix sreader i)
55+
(.getNamespaceURI ^XMLStreamReader sreader i)))
56+
(pu/transient parent-hash)
5757
(range (.getNamespaceCount sreader)))))
5858

5959
(defn- location-hash
@@ -77,7 +77,7 @@
7777
(.next sreader)
7878
XMLStreamConstants/START_ELEMENT
7979
(if (include-node? :element)
80-
(let [ns-env (nss-hash sreader (or (first ns-envs) {}))
80+
(let [ns-env (nss-hash sreader (or (first ns-envs) pu/EMPTY))
8181
tag (qname (.getNamespaceURI sreader)
8282
(.getLocalName sreader)
8383
(.getPrefix sreader))

src/main/clojure/clojure/data/xml/name.cljc

Lines changed: 25 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -127,17 +127,6 @@
127127
(alias al xn)
128128
(recur rst))))))
129129

130-
(defn merge-nss
131-
"Merge two attribute sets, deleting assignments of empty-string"
132-
[nss1 nss2]
133-
(persistent!
134-
(reduce-kv (fn [a k v]
135-
(if (str/blank? v)
136-
(dissoc! a k)
137-
(assoc! a k v)))
138-
(transient nss1)
139-
nss2)))
140-
141130
(defn xmlns-attr?
142131
"Is this qname an xmlns declaration?"
143132
[qn]
@@ -146,6 +135,26 @@
146135
(and (str/blank? uri)
147136
(= "xmlns" (qname-local qn))))))
148137

138+
(defn xmlns-attr-prefix [qn]
139+
(let [uri (qname-uri qn)]
140+
(if (str/blank? uri)
141+
(do (when-not (= "xmlns" (qname-local qn))
142+
(throw (ex-info "Not an xmlns-attr name" {:qname qn})))
143+
"")
144+
(do (when-not (= xmlns-uri uri)
145+
(throw (ex-info "Not an xmlns-attr name" {:qname qn})))
146+
(qname-local qn)))))
147+
148+
(defn legal-xmlns-binding! [prefix uri]
149+
(when (not= (= "xml" prefix)
150+
(= xml-uri uri))
151+
(throw (ex-info (str "The xmlns binding for prefix `xml` is fixed to `" xml-uri "`")
152+
{:attempted-mapping {:prefix prefix :uri uri}})))
153+
(when (not= (= "xmlns" prefix)
154+
(= xmlns-uri uri))
155+
(throw (ex-info (str "The xmlns binding for prefix `xmlns` is fixed to `" xmlns-uri "`")
156+
{:attempted-mapping {:prefix prefix :uri uri}}))))
157+
149158
(defn separate-xmlns
150159
"Call cont with two args: attributes and xmlns attributes"
151160
[attrs cont]
@@ -155,9 +164,11 @@
155164
(if (seq attrs')
156165
(let [val (get attrs qn)]
157166
(if (xmlns-attr? qn)
158-
(recur attrs*
159-
(assoc! xmlns* (qname-local qn) val)
160-
(next attrs'))
167+
(let [prefix (xmlns-attr-prefix qn)]
168+
(legal-xmlns-binding! prefix val)
169+
(recur attrs*
170+
(assoc! xmlns* prefix val)
171+
(next attrs')))
161172
(recur (assoc! attrs* qn val)
162173
xmlns*
163174
(next attrs'))))

src/main/clojure/clojure/data/xml/process.clj

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33
[clojure.data.xml.name :as name :refer [gen-prefix *gen-prefix-counter* qname-uri]]
44
[clojure.data.xml.node :refer [element] :as node]
55
[clojure.data.xml.tree :refer [flatten-elements] :as tree]
6-
[clojure.string :as str]))
6+
[clojure.string :as str]
7+
[clojure.data.xml.pu-map :as pu]))
78

89
(defn- reduce-tree
910
"Optimized reducer for in-order traversal of nodes, with reduce-like accumulator"
@@ -26,7 +27,8 @@
2627
(if (map? el)
2728
(reduce-kv
2829
(fn [s attr _] (xf s (qname-uri attr)))
29-
(xf s (qname-uri (:tag el))) (:attrs el))
30+
(xf s (qname-uri (:tag el)))
31+
(:attrs el))
3032
s)))
3133

3234
(defn find-xmlns
@@ -42,11 +44,8 @@
4244
(with-meta
4345
xml {:clojure.data.xml/nss
4446
(binding [*gen-prefix-counter* 0]
45-
(persistent!
46-
(reduce (fn [tm uri]
47-
(if (str/blank? uri)
48-
tm
49-
(assoc! tm (keyword "xmlns" (gen-prefix)) uri)))
50-
(transient {}) (find-xmlns xml))))}))
51-
52-
47+
(-> (fn [tm uri]
48+
(pu/assoc! tm (gen-prefix) uri))
49+
qname-uri-xf
50+
(reduce-tree (pu/transient pu/EMPTY) xml)
51+
pu/persistent!))}))

src/main/clojure/clojure/data/xml/pu_map.cljc

Lines changed: 38 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,24 @@
1212
[clojure.core :as core])
1313
(:refer-clojure :exclude [assoc! dissoc! transient persistent! get assoc]))
1414

15-
(defn transient [{:keys [u->ps p->u]}]
16-
(core/assoc! (core/transient {})
17-
:p->u (core/transient p->u)
18-
:u->ps (core/transient u->ps)))
15+
(def prefix-map :p->u)
16+
(def uri-map :u->ps)
17+
18+
;; TODO replace this with a deftype for memory savings
19+
(def EMPTY {:u->ps {name/xml-uri ["xml"]
20+
name/xmlns-uri ["xmlns"]}
21+
:p->u {"xml" name/xml-uri
22+
"xmlns" name/xmlns-uri}})
23+
24+
;; TODO implement valid? with internal consistency check
25+
26+
(defn transient [pu]
27+
(let [{:keys [u->ps p->u] :as pu*}
28+
(or pu EMPTY)]
29+
(assert (and u->ps p->u) (str "Not a pu-map " (pr-str pu*)))
30+
(core/assoc! (core/transient {})
31+
:p->u (core/transient p->u)
32+
:u->ps (core/transient u->ps))))
1933

2034
(defn persistent! [put]
2135
(core/persistent!
@@ -35,27 +49,24 @@
3549
(core/dissoc! u->ps uri)))
3650

3751
(defn assoc! [{:as put :keys [p->u u->ps]} prefix uri]
38-
(when (or (core/get #{"xml" "xmlns"} prefix)
39-
(core/get #{name/xml-uri name/xmlns-uri} uri))
40-
(throw (ex-info "Mapping for xml: and xmlns: prefixes are fixed by the standard"
41-
{:attempted-mapping {:prefix prefix
42-
:uri uri}})))
43-
(let [prev-uri (core/get p->u prefix)]
52+
(name/legal-xmlns-binding! prefix uri)
53+
(let [prefix* (str prefix)
54+
prev-uri (core/get p->u prefix*)]
4455
(core/assoc! put
4556
:p->u (if (str/blank? uri)
46-
(core/dissoc! p->u prefix)
47-
(core/assoc! p->u prefix uri))
57+
(core/dissoc! p->u prefix*)
58+
(core/assoc! p->u prefix* uri))
4859
:u->ps (if (str/blank? uri)
49-
(dissoc-uri! u->ps prev-uri prefix)
60+
(dissoc-uri! u->ps prev-uri prefix*)
5061
(cond
5162
(= uri prev-uri) u->ps
52-
(not prev-uri) (assoc-uri! u->ps uri prefix)
63+
(not prev-uri) (assoc-uri! u->ps uri prefix*)
5364
:else (-> u->ps
54-
(dissoc-uri! prev-uri prefix)
55-
(assoc-uri! uri prefix)))))))
65+
(dissoc-uri! prev-uri prefix*)
66+
(assoc-uri! uri prefix*)))))))
5667

5768
(defn get [{:keys [p->u]} prefix]
58-
(core/get p->u prefix))
69+
(core/get p->u (str prefix)))
5970

6071
(defn get-prefixes [{:keys [u->ps]} uri]
6172
(core/get u->ps uri))
@@ -66,13 +77,6 @@
6677
(persistent!
6778
(reduce-kv assoc! (transient put) kvs)))
6879

69-
70-
;; TODO replace this with a deftype for memory savings
71-
(def EMPTY {:u->ps {name/xml-uri ["xml"]
72-
name/xmlns-uri ["xmlns"]}
73-
:p->u {"xml" name/xml-uri
74-
"xmlns" name/xmlns-uri}})
75-
7680
(defn reduce-diff
7781
"A high-performance diffing operation, that reduces f over changed and removed prefixes"
7882
[f s
@@ -87,3 +91,13 @@
8791
s (f s p u)))
8892
s pu)]
8993
s))
94+
95+
(defn merge-prefix-map
96+
"Merge a prefix map into pu-map"
97+
[pu pm]
98+
(persistent! (reduce-kv assoc! (transient pu) pm)))
99+
100+
(defn merge
101+
"Merge two pu-maps, left to right"
102+
[pu {:keys [:p->u]}]
103+
(merge-prefix-map pu p->u))

src/test/clojure/clojure/data/xml/test_emit.clj

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,14 @@
88

99
(ns ^{:doc "Tests for emit to print XML text."
1010
:author "Chris Houser"}
11-
clojure.data.xml.test-emit
11+
clojure.data.xml.test-emit
1212
(:require
1313
[clojure.test :refer :all]
1414
[clojure.data.xml :refer :all]
1515
[clojure.data.xml.test-utils :refer [test-stream lazy-parse*]]
16-
[clojure.data.xml.impl :refer [compile-if]])
16+
[clojure.data.xml.impl :refer [compile-if]]
17+
[clojure.data.xml.name :as name]
18+
[clojure.data.xml.pu-map :as pu])
1719
(:import (javax.xml.namespace QName)))
1820

1921
(def deep-tree
@@ -219,11 +221,23 @@
219221
(element (as-qname "{NS2}bar")))]
220222
(is (= (parse-str (emit-str el)) el))))
221223

224+
(alias-uri :xml name/xml-uri)
225+
222226
(deftest test-default-xmlns
223227
(let [nss-meta (comp :clojure.data.xml/nss meta)]
224-
(is (= {"xmlns" "NS"}
228+
(is (= (pu/merge-prefix-map nil {"" "NS"})
225229
(nss-meta (parse-str "<foo xmlns=\"NS\"/>"))
226-
(nss-meta (parse-str (emit-str (parse-str "<foo xmlns=\"NS\"/>"))))))))
230+
(nss-meta (parse-str (emit-str (parse-str "<foo xmlns=\"NS\"/>")))))))
231+
(is (thrown? Exception (emit-str {:tag :el :attrs {(name/qname name/xmlns-uri "xml") "foo"}})))
232+
(is (thrown? Exception (emit-str {:tag :el :attrs {(name/qname name/xmlns-uri "xmlns") "foo"}})))
233+
(is (thrown? Exception (emit-str {:tag :el :attrs {:xmlns/xml "foo"}})))
234+
(is (thrown? Exception (emit-str {:tag :el :attrs {:xmlns/xmlns "foo"}})))
235+
(is (thrown? Exception (parse-str "<element xmlns:xmlns=\"http://www.w3.org/2000/xmlns/\" />"))
236+
"TODO: find out if this is standard conforming, or a bug in StAX")
237+
(is (= (emit-str {:tag :el :attrs {:xmlns/xmlns "http://www.w3.org/2000/xmlns/"}})
238+
"<?xml version=\"1.0\" encoding=\"UTF-8\"?><el/>"))
239+
(is (= (emit-str {:tag :el :attrs {:xmlns/xml "http://www.w3.org/XML/1998/namespace" ::xml/lang "en"}})
240+
"<?xml version=\"1.0\" encoding=\"UTF-8\"?><el xml:lang=\"en\"/>")))
227241

228242
(deftest test-empty-elements
229243
(is (= (emit-str {:tag :a :content []}) "<?xml version=\"1.0\" encoding=\"UTF-8\"?><a/>"))
@@ -233,5 +247,12 @@
233247
(is (= (emit-str (with-meta (parse-str "<foo:element xmlns:foo=\"FOO:\"/>")
234248
nil))
235249
"<?xml version=\"1.0\" encoding=\"UTF-8\"?><a:element xmlns:a=\"FOO:\"/>"))
236-
(is (= (emit-str (parse-str "<foo:element xmlns:foo=\"FOO:\"/>"))
237-
"<?xml version=\"1.0\" encoding=\"UTF-8\"?><foo:element xmlns:foo=\"FOO:\"/>")))
250+
(is (= (emit-str (parse-str "<foo:element xmlns:xml=\"http://www.w3.org/XML/1998/namespace\" xmlns:foo=\"FOO:\"/>"))
251+
"<?xml version=\"1.0\" encoding=\"UTF-8\"?><foo:element xmlns:foo=\"FOO:\"/>"))
252+
(is (= (emit-str (parse-str "<element xmlns=\"FOO:\"/>"))
253+
"<?xml version=\"1.0\" encoding=\"UTF-8\"?><element xmlns=\"FOO:\"/>"))
254+
; builtins
255+
(is (= (emit-str (parse-str "<element xmlns:xml=\"http://www.w3.org/XML/1998/namespace\" xml:foo=\"FOO!\"/>"))
256+
"<?xml version=\"1.0\" encoding=\"UTF-8\"?><element xml:foo=\"FOO!\"/>"))
257+
(is (thrown? Exception (parse-str "<xmlns:el/>"))
258+
"TODO: find out if this is standard conforming, or a bug in StAX"))

src/test/clojure/clojure/data/xml/test_process.cljc

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,14 @@
44
find-xmlns])]]
55
[clojure.test :refer [deftest is]]
66
[clojure.walk :as w]
7-
[clojure.string :as str]))
7+
[clojure.string :as str]
8+
[clojure.data.xml.pu-map :as pu]))
89

910
(def test-data
1011
(element
1112
:foo nil
1213
(with-meta (element :bar {:xmlns "MOO:"} "some" "content")
13-
{:clojure.data.xml/nss {:xmlns/p "PAR:"}})
14+
{:clojure.data.xml/nss (pu/merge-prefix-map nil {"p" "PAR:"})})
1415
"more content"
1516
(element (qname "GOO:" "ho") {(qname "GEE:" "hi") "ma"} "ii")
1617
"end"))
@@ -19,7 +20,8 @@
1920
(:clj
2021
(deftest process
2122
(is (= (find-xmlns test-data) #{"" "GEE:" "GOO:"}))
22-
(is (= (set (vals (element-nss (aggregate-xmlns test-data)))) #{"GEE:" "GOO:"}))))
23+
(let [nss (set (vals (:p->u (element-nss (aggregate-xmlns test-data)))))]
24+
(is (every? #(contains? nss %) ["GEE:" "GOO:"])))))
2325

2426
(deftest walk-test
2527
(is (= {:tag :FOO, :attrs {}, :content ()}

0 commit comments

Comments
 (0)