Skip to content

Commit 512b3e7

Browse files
committed
prep for publish: doc update & fix Linux build
1 parent aa73ce1 commit 512b3e7

File tree

9 files changed

+24
-15
lines changed

9 files changed

+24
-15
lines changed

Cargo.toml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,13 @@ authors = ["George Pollard <porges@porg.es>"]
77
edition = "2024"
88
keywords = ["html", "rdf", "rdfa"]
99
license = "Apache-2.0"
10-
publish = false
1110
readme = "README.md"
1211
repository = "https://github.com/Porges/html2rdf"
1312
rust-version = "1.88.0"
1413

1514
[workspace.dependencies]
16-
html2rdf = { path = "html2rdf" }
17-
jiff = "^0.2.23"
15+
html2rdf = { path = "html2rdf", version = "0.2" }
16+
jiff = "0.2.23"
1817
librdfa-wrapper = { path = "librdfa-wrapper" }
1918
oxiri = "0.2.11"
2019
oxrdf = "0.3.3"

README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,5 +20,7 @@ Currently, html2rdf supports (nearly all of?) RDFa 1.1. RDFa 1.0 is not supporte
2020

2121
Known missing features are:
2222
- XMLLiteral output in HTML5 host language is not canonicalized at all
23+
- `xml:base` attributes are not processed at all
2324

24-
Eventually, html2rdf will also implement Microdata and JSON-LD, but this has not been started at all.
25+
Eventually, html2rdf will also implement Microdata (and Microdata-to-RDF conversion)
26+
and JSON-LD, but this has not been started at all.

deny.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@ ignore = [ ]
44
[licenses]
55
allow = [
66
"Apache-2.0",
7+
"BSD-2-Clause",
78
"BSD-3-Clause",
9+
"CDLA-Permissive-2.0",
810
"ISC",
911
"MIT",
1012
"MPL-2.0",

html2rdf-cli/Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,10 @@ authors.workspace = true
55
edition.workspace = true
66
keywords.workspace = true
77
license.workspace = true
8-
publish.workspace = true
98
readme.workspace = true
109
repository.workspace = true
11-
10+
publish = true
11+
description = "A command-line tool to extract RDF from (X)HTML documents."
1212

1313
[[bin]]
1414
name = "html2rdf"

html2rdf/Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,10 @@ authors.workspace = true
55
edition.workspace = true
66
keywords.workspace = true
77
license.workspace = true
8-
publish.workspace = true
98
readme.workspace = true
109
repository.workspace = true
10+
publish = true
11+
description = "Implements RDFa 1.1 processing for HTML and XHTML documents."
1112

1213
exclude = [
1314
"tests/test-suite/test-cases/*/html4*/*",

html2rdf/src/lib.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,7 @@
66
//! It supports full [RDFa Core 1.1][rdfa-core], as well as
77
//! [XHTML+RDFa 1.1][xhtml-rdfa] and [HTML+RDFa 1.1][html-rdfa].
88
//!
9-
//! Core functionality is well-tested but there might be some edge-cases
10-
//! which are not yet properly handled, especialyl in the X/HTML-specific
11-
//! extensions.
9+
//! RDFa 1.0 and HTML4 are not supported.
1210
//!
1311
//! [rdfa-core]: https://www.w3.org/TR/rdfa-core/
1412
//! [xhtml-rdfa]: https://www.w3.org/TR/xhtml-rdfa/
@@ -18,10 +16,12 @@
1816
//! The following features are available, all enabled by default:
1917
//! - `html`: enables HTML5 processing (via `scraper`)
2018
//! - `xhtml`: enables XHTML processing (via `uppsala`)
21-
//! - `vocab-online`: enables the use of [`algorithms::OnlineVocabularyProcessor`] (this also enables `html`)
19+
//! - `vocab-online`: enables the use of [`OnlineVocabularyProcessor`](crate::rdfa::algorithms::OnlineVocabularyResolver)
20+
//! (this also implies `html`)
2221
//!
2322
//! ## Known Issues
2423
//! - `XMLLiteral` values are not yet correctly canonicalized in HTML5
24+
//! - `xml:base` attributes are not processed
2525
2626
use oxiri::Iri;
2727
use oxrdf::Graph;
@@ -62,7 +62,7 @@ impl<H> Options<H> {
6262

6363
/// Enables the RDFA vocabulary expansion feature.
6464
///
65-
/// Note that if using [`OnlineVocabularyResolver`],
65+
/// Note that if using [`OnlineVocabularyResolver`](crate::rdfa::algorithms::OnlineVocabularyResolver),
6666
/// this may reach out to untrusted HTTP(S) endpoints.
6767
#[must_use]
6868
pub fn enable_vocabulary_expansion(

html2rdf/src/rdfa/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
//! Implements RDFa processing.
2+
13
use std::{cell::RefCell, rc::Rc};
24

35
use itertools::Itertools;

html2rdf/tests/fuzz-oracle.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#![cfg(not(target_os = "windows"))]
22
// ^ don't want to mess around with getting librdfa to run on Windows
33

4+
use html2rdf::{Options, host_language::Html5};
45
use oxrdf::Graph;
56
use pyo3::prelude::*;
67

@@ -20,13 +21,15 @@ mod utils;
2021
/// - does not implement the content-sniffing used to determine a datatype for time values
2122
/// - does not read @datetime on <time>
2223
#[test]
24+
#[ignore = "not yet working - need to loosen IRI parsing"]
2325
fn fuzz_oracle() {
2426
Python::initialize();
2527
let base = oxiri::Iri::parse("https://rdfa.test/").unwrap();
2628
bolero::check!().for_each(|input: &[u8]| {
2729
let input_str = String::from_utf8_lossy(input);
2830
let (my_output, my_processor) =
29-
html2rdf::doc_to_graphs(&input_str, base.clone(), html2rdf::Options::default());
31+
html2rdf::doc_to_graphs::<Html5, Graph>(&input_str, base.clone(), Options::default())
32+
.unwrap();
3033

3134
// try this first cause it's faster
3235
let librdfa_output = run_librdfa(&input_str, base.as_str());
@@ -43,7 +46,7 @@ fn fuzz_oracle() {
4346

4447
eprintln!(
4548
"Processor graph:\n{}",
46-
utils::serialize_graph(my_processor, base.as_str())
49+
utils::serialize_graph(&my_processor, base.as_str())
4750
);
4851

4952
// neither matched, so assert against both to get a diff

librdfa-wrapper/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,10 @@ authors.workspace = true
55
edition.workspace = true
66
keywords.workspace = true
77
license.workspace = true
8-
publish.workspace = true
98
readme.workspace = true
109
repository.workspace = true
1110
rust-version.workspace = true
11+
publish = false
1212

1313
[target.'cfg(not(target_os = "windows"))'.build-dependencies]
1414
bindgen = "0.72.1"

0 commit comments

Comments
 (0)