Skip to content

Commit 5ec7b96

Browse files
committed
canonicalization, tracing, mitsein
1 parent cbe00af commit 5ec7b96

File tree

6 files changed

+308
-116
lines changed

6 files changed

+308
-116
lines changed

Cargo.lock

Lines changed: 189 additions & 7 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

html2rdf-cli/Cargo.toml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,13 @@ name = "html2rdf"
1414
path = "src/main.rs"
1515

1616
[dependencies]
17-
clap = { version = "4.5.59", features = ["derive", "wrap_help"] }
17+
clap = { version = "4.5.59", features = ["cargo", "derive", "wrap_help"] }
1818
html2rdf = { workspace = true }
19+
jiff = "0.2.20"
1920
oxiri = { workspace = true }
20-
oxrdf = { workspace = true }
21+
oxrdf = { workspace = true, features = ["rdfc-10"] }
2122
oxttl = { workspace = true }
2223
reqwest = { version = "0.13.2", default-features = false, features = ["charset", "http2", "native-tls", "system-proxy"] }
2324
tokio = { version = "1.49.0", features = ["macros", "rt"] }
25+
tracing-subscriber = { version = "0.3.22" }
2426
url = "2.5.8"

html2rdf-cli/src/main.rs

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,22 @@
1-
use std::process::ExitCode;
1+
use std::{io::Write, process::ExitCode};
22

33
use clap::Parser;
4+
use oxrdf::graph::{CanonicalizationAlgorithm, CanonicalizationHashAlgorithm};
45

56
#[derive(Parser)]
67
#[command(version, about)]
78
struct Args {
89
#[arg(value_name = "URL")]
910
target: url::Url,
11+
12+
#[arg(long, short = 'c')]
13+
canonicalize: bool,
1014
}
1115

1216
#[tokio::main(flavor = "current_thread")]
1317
async fn main() -> Result<ExitCode, Box<dyn std::error::Error>> {
18+
tracing_subscriber::fmt::init();
19+
1420
let args = Args::parse();
1521
let client = reqwest::Client::new();
1622
let base = args.target.to_string();
@@ -28,6 +34,8 @@ async fn main() -> Result<ExitCode, Box<dyn std::error::Error>> {
2834

2935
drop(client);
3036

37+
let final_url = response.url().clone();
38+
3139
let content = response.text().await?;
3240
let mut output_graph = oxrdf::Graph::new();
3341
let mut processor_graph = oxrdf::Graph::new();
@@ -58,7 +66,27 @@ async fn main() -> Result<ExitCode, Box<dyn std::error::Error>> {
5866
|serializer, (prefix, value)| serializer.with_prefix(prefix, value),
5967
)?;
6068

69+
if args.canonicalize {
70+
output_graph.canonicalize(CanonicalizationAlgorithm::Rdfc10 {
71+
hash_algorithm: CanonicalizationHashAlgorithm::Sha256,
72+
});
73+
}
74+
6175
let mut locked_out = std::io::stdout().lock();
76+
locked_out.write_all(
77+
format!(
78+
"# generated by html2rdf {} at {:.0} from: {}\n",
79+
clap::crate_version!(),
80+
jiff::Timestamp::now(),
81+
final_url,
82+
)
83+
.as_bytes(),
84+
)?;
85+
86+
if args.canonicalize {
87+
locked_out.write_all(b"# (output has been canonicalized)\n")?;
88+
}
89+
6290
let mut writer = serializer.for_writer(&mut locked_out);
6391
for triple in output_graph.iter() {
6492
writer.serialize_triple(triple)?;

0 commit comments

Comments
 (0)