Skip to content

Commit b1ac3a2

Browse files
committed
add some tests for @datetime vs @content
it turns out you can't tell the difference here in the output so it doesn't matter if step 5.2 or step 6 processes them
1 parent 565791c commit b1ac3a2

File tree

2 files changed

+158
-2
lines changed

2 files changed

+158
-2
lines changed

html2rdf/src/element_processor.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -239,11 +239,12 @@ impl<'e, 'ec, 'r, 'pg, E: Element, PG: ProcessorGraph> ElementProcessor<'e, 'ec,
239239

240240
// TODO: this is not correct for non-HTML implementations
241241
ctx.is_root_element = ctx.element.tag_name() == "html";
242-
//debug_assert_eq!(ctx.is_root_element, eval_context.parent_object.is_none(),);
243242

244243
ctx.datatype = ctx.attr_1_term_or_curie_or_absiri("datatype");
245244
ctx.in_list = ctx.attr_raw("inlist").is_some();
246-
ctx.has_content = ctx.attr_raw("content").is_some();
245+
ctx.has_content = ctx.attr_raw("content").is_some()
246+
|| ctx.attr_raw("datetime").is_some()
247+
|| ctx.element.tag_name() == "time";
247248

248249
ctx
249250
}
@@ -413,6 +414,7 @@ impl<'e, 'ec, 'r, 'pg, E: Element, PG: ProcessorGraph> ElementProcessor<'e, 'ec,
413414
} else if self.is_root_element {
414415
Self::type_resource(&mut type_of, self.ctx.subject_ref(), "root element", output);
415416
}
417+
416418
self.subject_established(output);
417419

418420
if let Some(resource) = self.get_resource() {

html2rdf/tests/edge-cases.rs

Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ use oxiri::Iri;
55
use oxrdf::Graph;
66

77
mod utils;
8+
use utils::{assert_ask, assert_not_ask};
89

910
fn base() -> Iri<&'static str> {
1011
Iri::parse("http://example.test/").unwrap()
@@ -96,3 +97,156 @@ fn both_rel_and_property() {
9697
foaf:knows <#bob> .
9798
"#);
9899
}
100+
101+
// ============================================================================
102+
// @datetime and <time> interaction with step 5 routing
103+
//
104+
// [html-rdfa] extensions #9/#10 say @datetime and <time> text are used at
105+
// step 11 only — they don't mention modifying step 5's routing condition.
106+
//
107+
// However, we treat @datetime and <time> the same as @content for step 5
108+
// routing: their presence routes to step 5.2 (simple property) instead of
109+
// step 5.1 (complex property / chaining).
110+
//
111+
// In practice this makes no observable difference to the output graph,
112+
// because @datetime provides a literal value at step 11 that supersedes
113+
// the typed-resource fallback either way. The tests below document this.
114+
// ============================================================================
115+
116+
/// Baseline: @content routes to step 5.2.
117+
/// @property emits the @content literal, not the typed resource.
118+
#[test]
119+
fn content_blocks_chaining_with_typeof() {
120+
let html = r#"<!DOCTYPE html>
121+
<html>
122+
<head><title>test</title></head>
123+
<body>
124+
<div property="http://example.test/prop"
125+
typeof="http://example.test/Type"
126+
content="hello">
127+
<span property="http://example.test/child">nested</span>
128+
</div>
129+
</body>
130+
</html>"#;
131+
132+
let (output, _) = parse_html(html);
133+
134+
// @property emits the @content literal (step 5.2 / step 11)
135+
assert_ask(&output, r#"
136+
ASK { ?s <http://example.test/prop> "hello" }
137+
"#);
138+
139+
// The property value is a literal, NOT the typed bnode
140+
assert_not_ask(&output, r#"
141+
ASK { ?s <http://example.test/prop> ?bnode .
142+
?bnode a <http://example.test/Type> .
143+
FILTER(isBlank(?bnode)) }
144+
"#);
145+
}
146+
147+
/// @datetime is treated like @content for step 5 routing (step 5.2).
148+
/// @typeof creates a bnode as new_subject; @property emits the datetime literal.
149+
/// Children see the bnode as their parent subject.
150+
///
151+
/// Note: the output graph is identical regardless of whether @datetime
152+
/// routes to step 5.1 or 5.2, because @datetime provides a literal value
153+
/// at step 11 that supersedes the typed-resource fallback in either path.
154+
#[test]
155+
fn datetime_with_typeof_emits_literal() {
156+
let html = r#"<!DOCTYPE html>
157+
<html>
158+
<head><title>test</title></head>
159+
<body>
160+
<div property="http://example.test/prop"
161+
typeof="http://example.test/Type"
162+
datetime="2024-01-15">
163+
<span property="http://example.test/child">nested</span>
164+
</div>
165+
</body>
166+
</html>"#;
167+
168+
let (output, _) = parse_html(html);
169+
170+
// @property value is a datetime literal
171+
assert_ask(&output, r#"
172+
ASK { ?s <http://example.test/prop> "2024-01-15"^^<http://www.w3.org/2001/XMLSchema#date> }
173+
"#);
174+
175+
// A bnode is created and typed
176+
assert_ask(&output, r#"
177+
ASK { ?bnode a <http://example.test/Type> . FILTER(isBlank(?bnode)) }
178+
"#);
179+
180+
// Child is about the typed bnode
181+
assert_ask(&output, r#"
182+
ASK { ?bnode a <http://example.test/Type> ;
183+
<http://example.test/child> "nested" .
184+
FILTER(isBlank(?bnode)) }
185+
"#);
186+
}
187+
188+
/// Verify that @datetime + @typeof produces the same output as
189+
/// @content + @datatype + @typeof.
190+
#[test]
191+
fn datetime_treated_as_content_for_step5_routing() {
192+
// Verify that @datetime + @typeof produces the same output as
193+
// @content + @typeof (i.e., step 5.2 behavior)
194+
let html_datetime = r#"<!DOCTYPE html>
195+
<html>
196+
<head><title>test</title></head>
197+
<body>
198+
<div property="http://example.test/prop"
199+
typeof="http://example.test/Type"
200+
datetime="2024-01-15">
201+
<span property="http://example.test/child">nested</span>
202+
</div>
203+
</body>
204+
</html>"#;
205+
206+
let html_content = r#"<!DOCTYPE html>
207+
<html>
208+
<head><title>test</title></head>
209+
<body>
210+
<div property="http://example.test/prop"
211+
typeof="http://example.test/Type"
212+
content="2024-01-15"
213+
datatype="http://www.w3.org/2001/XMLSchema#date">
214+
<span property="http://example.test/child">nested</span>
215+
</div>
216+
</body>
217+
</html>"#;
218+
219+
let (output_datetime, _) = parse_html(html_datetime);
220+
let (output_content, _) = parse_html(html_content);
221+
222+
assert!(
223+
utils::check_graphs_eq(&output_datetime, &output_content),
224+
"@datetime and @content+@datatype should produce identical graphs"
225+
);
226+
}
227+
228+
/// Same as above but for <time> elements: the implicit datetime
229+
/// also routes to step 5.2.
230+
#[test]
231+
fn time_element_with_typeof_emits_literal() {
232+
let html = r#"<!DOCTYPE html>
233+
<html>
234+
<head><title>test</title></head>
235+
<body>
236+
<time property="http://example.test/prop"
237+
typeof="http://example.test/Type">2024-01-15</time>
238+
</body>
239+
</html>"#;
240+
241+
let (output, _) = parse_html(html);
242+
243+
// @property value is the datetime literal (from <time> text → implicit @datetime)
244+
assert_ask(&output, r#"
245+
ASK { ?s <http://example.test/prop> "2024-01-15"^^<http://www.w3.org/2001/XMLSchema#date> }
246+
"#);
247+
248+
// A bnode IS created and typed (step 5.1 chaining)
249+
assert_ask(&output, r#"
250+
ASK { ?bnode a <http://example.test/Type> . FILTER(isBlank(?bnode)) }
251+
"#);
252+
}

0 commit comments

Comments
 (0)