Skip to content

Commit 3e589ad

Browse files
committed
address a corner-case in XMLLiteral creation
1 parent 4752efb commit 3e589ad

File tree

2 files changed

+56
-5
lines changed

2 files changed

+56
-5
lines changed

html2rdf/src/host_language/xhtml.rs

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -156,19 +156,38 @@ impl super::Element for (&uppsala::Document<'_>, uppsala::NodeId) {
156156
// NOTE: I assume that ids are stable when cloning
157157
let el = doc.element_mut(self.1).unwrap();
158158

159+
// Collect inherited namespace declarations from ancestors of the current
160+
// element. We must be careful to not shadow these with a prefix that
161+
// only differs by case.
162+
let mut inherited_ns: std::collections::HashMap<&str, &str> =
163+
std::collections::HashMap::new();
164+
for ancestor_id in self.0.ancestors(self.1).into_iter().rev() {
165+
if let Some(ancestor_el) = self.0.element(ancestor_id) {
166+
for (prefix, uri) in &ancestor_el.namespace_declarations {
167+
inherited_ns.insert(prefix.as_ref(), uri.as_ref());
168+
}
169+
}
170+
}
171+
159172
// copy in all the active mappings as xmlns
160173
// this is simpler than tracking whether we need to add them as xmlns
161174
// or as prefixes, and it matches what the test suite does
162175
let mut prefixes = Vec::new();
163176
for (prefix, value) in active_mappings.mappings() {
164177
// note that there's a little tricky case where a prefix redefined a xmlns
165178
// in which case we need to ensure that _both_ are present on child nodes
166-
// so the XML side and the RDFa side remain correct
167-
if el
179+
// so the XML side and the RDFa side remain correct.
180+
// We must also check inherited ancestor namespaces — a lowercased @prefix
181+
// colliding with an inherited xmlns would corrupt element name resolution.
182+
let conflicts_with_existing = el
168183
.namespace_declarations
169184
.iter()
170185
.any(|(p, v)| p == prefix && v != value)
171-
{
186+
|| inherited_ns
187+
.get(prefix.as_str())
188+
.is_some_and(|v| *v != value);
189+
190+
if conflicts_with_existing {
172191
prefixes.push((prefix, value));
173192
} else {
174193
el.namespace_declarations
@@ -197,7 +216,7 @@ impl super::Element for (&uppsala::Document<'_>, uppsala::NodeId) {
197216
if !prefix_attr.is_empty() {
198217
prefix_attr.push(' ');
199218
}
200-
_ = write!(prefix_attr, "{prefix}: {value} ");
219+
_ = write!(prefix_attr, "{prefix}: {value}");
201220
}
202221
}
203222
if !prefix_attr.is_empty() {

html2rdf/tests/xml-tests.rs

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use insta::assert_snapshot;
12
use oxrdf::{Graph, TermRef, vocab::rdf};
23

34
use crate::utils::serialize_graph;
@@ -102,7 +103,7 @@ fn element_prefix_xmlns_conflict_parent() {
102103
insta::assert_snapshot!(serialize_graph(&g, utils::base().as_str()), @r#"
103104
@base <http://example.test/> .
104105
@prefix rdf: <//www.w3.org/1999/02/22-rdf-syntax-ns#> .
105-
<> <ex:desc> "<k:p xmlns:k=\"http://another.ns/\" prefix=\"k: http://prefix.ns/ \">.<b xmlns=\"http://www.w3.org/1999/xhtml\" property=\"k:foo\">value</b>.</k:p>"^^rdf:XMLLiteral ;
106+
<> <ex:desc> "<k:p xmlns:k=\"http://another.ns/\" prefix=\"k: http://prefix.ns/\">.<b xmlns=\"http://www.w3.org/1999/xhtml\" property=\"k:foo\">value</b>.</k:p>"^^rdf:XMLLiteral ;
106107
<//prefix.ns/foo> "value" .
107108
"#);
108109
}
@@ -181,3 +182,34 @@ fn element_prefix_xmlns_conflict_child() {
181182
<//prefix.ns/foo> "value" .
182183
"#);
183184
}
185+
186+
/// When @prefix lowercases FOO→foo, and xmlns:foo already exists with a
187+
/// different URI, we must ensure that it doesn't affect the XML namespaces.
188+
#[test]
189+
fn lowered_prefix_shadows_xmlns_in_xmlliteral() {
190+
let input = r#"<?xml version="1.0" encoding="UTF-8"?>
191+
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:foo="http://original.ns/">
192+
<body prefix="FOO: http://rdfa-prefix.ns/">
193+
<div property="ex:desc" datatype="rdf:XMLLiteral">
194+
<foo:bar>content</foo:bar>
195+
</div>
196+
</body>
197+
</html>"#;
198+
let (g, _) = utils::parse_xhtml(input);
199+
200+
// The XMLLiteral should preserve the original XML namespace for foo:bar,
201+
// i.e. xmlns:foo="http://original.ns/", NOT the lowered @prefix value.
202+
let xml_literal = g
203+
.iter()
204+
.find_map(|t| match t.object {
205+
oxrdf::TermRef::Literal(l) if l.datatype() == rdf::XML_LITERAL => {
206+
Some(l.value().to_string())
207+
}
208+
_ => None,
209+
})
210+
.expect("should have XMLLiteral");
211+
212+
// foo:bar must be in namespace http://original.ns/, not http://rdfa-prefix.ns/
213+
assert_snapshot!(xml_literal,
214+
@r#"<foo:bar xmlns:foo="http://original.ns/" prefix="foo: http://rdfa-prefix.ns/">content</foo:bar>"#);
215+
}

0 commit comments

Comments
 (0)