Skip to content

Commit 552933d

Browse files
committed
Documentation for LoadConfig
1 parent 4acb543 commit 552933d

2 files changed

Lines changed: 169 additions & 1 deletion

File tree

webgraph/src/graphs/bvgraph/load.rs

Lines changed: 115 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,121 @@ impl LoadMode for LoadMmap {
244244
///
245245
/// A basic configuration is returned by
246246
/// [`BvGraph::with_basename`]/[`BvGraphSeq::with_basename`]. The configuration
247-
/// can then be customized using the methods of this struct.
247+
/// can then be customized using the setter methods of this struct, chained in
248+
/// builder style, and finalized by calling [`load`](LoadConfig::load).
249+
///
250+
/// # Defaults
251+
///
252+
/// The default configuration returned by `with_basename` uses:
253+
/// - big endianness ([`BE`]);
254+
/// - [dynamic dispatch](`Dynamic`);
255+
/// - [memory mapping](`Mmap`) for both the graph and the offsets.
256+
///
257+
/// # Configuration Axes
258+
///
259+
/// ## Access Mode
260+
///
261+
/// - [`BvGraph::with_basename`] returns a configuration for **random access**,
262+
/// which requires the Elias–Fano offsets file (`.ef`). The resulting graph
263+
/// supports both random access and sequential iteration.
264+
/// - [`BvGraphSeq::with_basename`] returns a configuration for **sequential
265+
/// access**, which only needs the graph file (`.graph`). The resulting graph
266+
/// supports only sequential iteration.
267+
///
268+
/// ## Endianness
269+
///
270+
/// - [`endianness`](LoadConfig::endianness): sets the endianness of the graph
271+
/// file. Use `endianness::<BE>()` for big-endian (the default and the Java
272+
/// convention) or `endianness::<LE>()` for little-endian.
273+
///
274+
/// ## Code Dispatch
275+
///
276+
/// - [`dispatch`](LoadConfig::dispatch): chooses between:
277+
/// - [`Dynamic`] (default): reads the codes from the properties file;
278+
/// slightly slower due to indirect dispatch, but works with any graph.
279+
/// - [`Static`]: the codes are fixed at compile time via const generics,
280+
/// enabling more aggressive optimization. The defaults match the Java
281+
/// defaults (γ for outdegrees, unary for references, γ for blocks, γ for
282+
/// intervals, ζ₃ for residuals). If your graph uses non-default codes,
283+
/// you must specify them explicitly.
284+
///
285+
/// ## Load Mode
286+
///
287+
/// Controls how the graph bitstream and the offsets are accessed.
288+
///
289+
/// - [`mode`](LoadConfig::mode): sets the load mode for **both** the graph
290+
/// and the offsets. You can also set them independently:
291+
/// - [`graph_mode`](LoadConfig::graph_mode): sets the mode for the graph
292+
/// only;
293+
/// - [`offsets_mode`](LoadConfig::offsets_mode): sets the mode for the
294+
/// offsets only (random access only).
295+
///
296+
/// The available modes are:
297+
///
298+
/// - [`Mmap`] (default): memory maps the file. This is the most
299+
/// memory-efficient mode, as the OS manages paging. It is the recommended
300+
/// mode for large graphs.
301+
/// - [`LoadMem`]: reads the file into allocated memory.
302+
/// - [`LoadMmap`]: reads the file into memory obtained via `mmap`, rather than
303+
/// the standard allocator.
304+
/// - [`File`]: reads the graph from a file stream. The offsets are fully
305+
/// deserialized in memory using [ε-serde]'s
306+
/// [`load_full`](epserde::deser::Deserialize::load_full). Note that the
307+
/// graph file must be padded correctly for this mode.
308+
///
309+
/// ## Memory flags
310+
///
311+
/// When using [`Mmap`] or [`LoadMmap`], you can set [`MemoryFlags`] to
312+
/// request transparent huge pages, etc.:
313+
///
314+
/// - [`flags`](LoadConfig::flags): sets flags for both the graph and offsets.
315+
/// - [`graph_flags`](LoadConfig::graph_flags): sets flags for the graph only.
316+
/// - [`offsets_flags`](LoadConfig::offsets_flags): sets flags for the offsets
317+
/// only (random access only).
318+
///
319+
/// # Examples
320+
///
321+
/// Load with all defaults (big-endian, dynamic dispatch, memory-mapped):
322+
/// ```ignore
323+
/// let graph = BvGraph::with_basename("BASENAME").load()?;
324+
/// ```
325+
///
326+
/// Load a little-endian graph:
327+
/// ```ignore
328+
/// let graph = BvGraph::with_basename("BASENAME")
329+
/// .endianness::<LE>()
330+
/// .load()?;
331+
/// ```
332+
///
333+
/// Load with static dispatch (using default codes):
334+
/// ```ignore
335+
/// let graph = BvGraph::with_basename("BASENAME")
336+
/// .dispatch::<Static>()
337+
/// .load()?;
338+
/// ```
339+
///
340+
/// Load into memory rather than memory-mapping:
341+
/// ```ignore
342+
/// let graph = BvGraph::with_basename("BASENAME")
343+
/// .mode::<LoadMem>()
344+
/// .load()?;
345+
/// ```
346+
///
347+
/// Load a sequential-access graph (no `.ef` file needed):
348+
/// ```ignore
349+
/// let graph = BvGraphSeq::with_basename("BASENAME").load()?;
350+
/// ```
351+
///
352+
/// Combine options:
353+
/// ```ignore
354+
/// let graph = BvGraph::with_basename("BASENAME")
355+
/// .endianness::<LE>()
356+
/// .dispatch::<Static>()
357+
/// .mode::<LoadMem>()
358+
/// .load()?;
359+
/// ```
360+
///
361+
/// [ε-serde]: <https://docs.rs/epserde/latest/epserde/>
248362
#[derive(Debug, Clone)]
249363
pub struct LoadConfig<E: Endianness, A: Access, D: Dispatch, GLM: LoadMode, OLM: LoadMode> {
250364
pub(crate) basename: PathBuf,

webgraph/tests/common/mod.rs

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
/*
2+
* SPDX-FileCopyrightText: 2025 Sebastiano Vigna
3+
*
4+
* SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
5+
*/
6+
7+
use anyhow::Result;
8+
use dsi_bitstream::prelude::*;
9+
use std::path::Path;
10+
use webgraph::prelude::EF;
11+
12+
/// Builds the Elias-Fano representation of offsets for a graph.
13+
///
14+
/// Replicates the core of `webgraph build ef` by reading the .offsets file.
15+
pub fn build_ef(basename: &Path) -> Result<()> {
16+
use epserde::ser::Serialize;
17+
use std::io::{BufWriter, Seek};
18+
use sux::prelude::*;
19+
20+
let graph_path = basename.with_extension("graph");
21+
let mut f = std::fs::File::open(&graph_path)?;
22+
let file_len = 8 * f.seek(std::io::SeekFrom::End(0))? as usize;
23+
24+
let properties_path = basename.with_extension("properties");
25+
let props = std::fs::read_to_string(&properties_path)?;
26+
let num_nodes: usize = props
27+
.lines()
28+
.find(|l| l.starts_with("nodes="))
29+
.unwrap()
30+
.strip_prefix("nodes=")
31+
.unwrap()
32+
.parse()?;
33+
34+
// Read from the .offsets file (gamma-coded in BE)
35+
let offsets_path = basename.with_extension("offsets");
36+
let of =
37+
webgraph::utils::MmapHelper::<u32>::mmap(&offsets_path, mmap_rs::MmapFlags::SEQUENTIAL)?;
38+
let mut reader: BufBitReader<BE, _> = BufBitReader::new(MemWordReader::new(of.as_ref()));
39+
40+
let mut efb = EliasFanoBuilder::new(num_nodes + 1, file_len);
41+
let mut offset = 0u64;
42+
for _ in 0..num_nodes + 1 {
43+
offset += reader.read_gamma()?;
44+
efb.push(offset as _);
45+
}
46+
47+
let ef = efb.build();
48+
let ef: EF = unsafe { ef.map_high_bits(SelectAdaptConst::<_, _, 12, 4>::new) };
49+
50+
let ef_path = basename.with_extension("ef");
51+
let mut ef_file = BufWriter::new(std::fs::File::create(&ef_path)?);
52+
unsafe { ef.serialize(&mut ef_file)? };
53+
Ok(())
54+
}

0 commit comments

Comments
 (0)