Skip to content

Commit 58cdf04

Browse files
committed
More documentation
1 parent 552933d commit 58cdf04

7 files changed

Lines changed: 360 additions & 70 deletions

File tree

algo/src/distances/hyperball.rs

Lines changed: 123 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -141,9 +141,128 @@ use webgraph::utils::Granularity;
141141

142142
/// A builder for [`HyperBall`].
143143
///
144-
/// After creating a builder with [`HyperBallBuilder::new`] you can configure it
145-
/// using setters such as [`HyperBallBuilder`] its methods, then call
146-
/// [`HyperBallBuilder::build`] on it to create a [`HyperBall`] instance.
144+
/// # Creating a Builder
145+
///
146+
/// There are three constructors, depending on the type of graph and
147+
/// cardinality estimator:
148+
///
149+
/// - [`with_hyper_log_log`](Self::with_hyper_log_log): the most common entry
150+
/// point—it creates a builder using [`HyperLogLog`] counters, requiring
151+
/// only the base-2 logarithm of the number of registers per counter
152+
/// (`log2m`). Higher values of `log2m` give more precise estimates at the
153+
/// cost of more memory;
154+
/// - [`new`](Self::new): creates a builder from two pre-built estimator
155+
/// arrays and a graph (without its transpose);
156+
/// - [`with_transpose`](Self::with_transpose): same, but also accepts the
157+
/// transpose of the graph, enabling [systolic
158+
/// computation](super::hyperball#systolic-computation).
159+
///
160+
/// # Configuration
161+
///
162+
/// After creation, the builder can be configured using the following
163+
/// methods:
164+
///
165+
/// - [`sum_of_distances`](Self::sum_of_distances): enables the computation
166+
/// of the sum of distances from each node (needed for closeness, Lin, and
167+
/// Nieminen centrality);
168+
/// - [`sum_of_inverse_distances`](Self::sum_of_inverse_distances): enables
169+
/// the computation of harmonic centrality;
170+
/// - [`discount_function`](Self::discount_function): adds a custom discount
171+
/// function;
172+
/// - [`granularity`](Self::granularity): sets the arc granularity for the
173+
/// parallel iterations;
174+
/// - [`weights`](Self::weights): sets optional nonnegative integer node
175+
/// weights.
176+
///
177+
/// Finally, call [`build`](Self::build) to obtain a [`HyperBall`] instance,
178+
/// and then [`run`](HyperBall::run) or
179+
/// [`run_until_done`](HyperBall::run_until_done) to perform the actual
180+
/// computation.
181+
///
182+
/// # Examples
183+
///
184+
/// ```
185+
/// use webgraph::graphs::vec_graph::VecGraph;
186+
/// use webgraph::graphs::bvgraph::DCF;
187+
/// use webgraph::traits::{RandomAccessLabeling, SequentialLabeling};
188+
/// use webgraph_algo::distances::hyperball::*;
189+
/// use dsi_progress_logger::no_logging;
190+
/// use sux::prelude::*;
191+
/// use rand::SeedableRng;
192+
/// use lender::prelude::*;
193+
///
194+
/// // A small graph: 0 → 1 → 2 → 0, 1 → 3
195+
/// let graph = VecGraph::from_arcs([(0, 1), (1, 2), (2, 0), (1, 3)]);
196+
///
197+
/// // Build the degree cumulative function (DCF)
198+
/// let mut efb = EliasFanoBuilder::new(
199+
/// graph.num_nodes() + 1,
200+
/// graph.num_arcs() as usize,
201+
/// );
202+
/// efb.push(0);
203+
/// let mut cumul = 0;
204+
/// let mut lender = graph.iter();
205+
/// while let Some((_, succs)) = lender.next() {
206+
/// cumul += succs.into_iter().count();
207+
/// efb.push(cumul);
208+
/// }
209+
/// let dcf: DCF = unsafe {
210+
/// efb.build().map_high_bits(|high_bits| {
211+
/// SelectZeroAdaptConst::<_, _, 12, 4>::new(
212+
/// SelectAdaptConst::<_, _, 12, 4>::new(high_bits),
213+
/// )
214+
/// })
215+
/// };
216+
///
217+
/// // Build and run HyperBall (neighborhood function only)
218+
/// let rng = rand::rngs::SmallRng::seed_from_u64(0);
219+
/// let mut hyperball = HyperBallBuilder::with_hyper_log_log(
220+
/// &graph, None::<&VecGraph>, &dcf, 6, None,
221+
/// )?.build(no_logging![]);
222+
/// hyperball.run_until_done(rng, no_logging![])?;
223+
///
224+
/// let nf = hyperball.neighborhood_function()?;
225+
/// assert!(nf.len() >= 4);
226+
/// # Ok::<(), anyhow::Error>(())
227+
/// ```
228+
///
229+
/// To compute harmonic centrality, enable it on the builder:
230+
///
231+
/// ```
232+
/// # use webgraph::graphs::vec_graph::VecGraph;
233+
/// # use webgraph::graphs::bvgraph::DCF;
234+
/// # use webgraph::traits::{RandomAccessLabeling, SequentialLabeling};
235+
/// # use webgraph_algo::distances::hyperball::*;
236+
/// # use dsi_progress_logger::no_logging;
237+
/// # use sux::prelude::*;
238+
/// # use rand::SeedableRng;
239+
/// # use lender::prelude::*;
240+
/// # let graph = VecGraph::from_arcs([(0, 1), (1, 2), (2, 0), (1, 3)]);
241+
/// # let mut efb = EliasFanoBuilder::new(
242+
/// # graph.num_nodes() + 1, graph.num_arcs() as usize);
243+
/// # efb.push(0);
244+
/// # let mut cumul = 0;
245+
/// # let mut lender = graph.iter();
246+
/// # while let Some((_, succs)) = lender.next() {
247+
/// # cumul += succs.into_iter().count();
248+
/// # efb.push(cumul);
249+
/// # }
250+
/// # let dcf: DCF = unsafe {
251+
/// # efb.build().map_high_bits(|high_bits| {
252+
/// # SelectZeroAdaptConst::<_, _, 12, 4>::new(
253+
/// # SelectAdaptConst::<_, _, 12, 4>::new(high_bits))})};
254+
/// let rng = rand::rngs::SmallRng::seed_from_u64(0);
255+
/// let mut hyperball = HyperBallBuilder::with_hyper_log_log(
256+
/// &graph, None::<&VecGraph>, &dcf, 6, None,
257+
/// )?
258+
/// .sum_of_inverse_distances(true)
259+
/// .build(no_logging![]);
260+
/// hyperball.run_until_done(rng, no_logging![])?;
261+
///
262+
/// let centralities = hyperball.harmonic_centralities()?;
263+
/// assert_eq!(centralities.len(), graph.num_nodes());
264+
/// # Ok::<(), anyhow::Error>(())
265+
/// ```
147266
pub struct HyperBallBuilder<
148267
'a,
149268
G1: RandomAccessGraph + Sync,
@@ -261,7 +380,7 @@ impl<
261380
/// * `graph`: the graph to analyze.
262381
/// * `cumul_outdeg`: the outdegree cumulative function of the graph.
263382
/// * `array_0`: a first array of estimators.
264-
/// * `array_1`: A second array of estimators of the same length and with the same logic of
383+
/// * `array_1`: a second array of estimators of the same length and with the same logic of
265384
/// `array_0`.
266385
pub fn new(graph: &'a G, cumul_outdeg: &'a D, array_0: A, array_1: A) -> Self {
267386
assert!(array_0.logic() == array_1.logic(), "Incompatible logic");

algo/src/llp/mod.rs

Lines changed: 50 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5,28 +5,62 @@
55
* SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
66
*/
77

8-
//! Layered label propagation.
8+
//! Layered Label Propagation.
99
//!
1010
//! An implementation of the _layered label propagation_ algorithm described by
1111
//! Paolo Boldi, Sebastiano Vigna, Marco Rosa, Massimo Santini, and Sebastiano
12-
//! Vigna in Layered label propagation: A multiresolution coordinate-free
13-
//! ordering for compressing social networks”, _Proceedings of the 20th
14-
//! international conference on World Wide Web_, pages 587–596, ACM, 2011.
12+
//! Vigna in "[Layered Label Propagation: A MultiResolution Coordinate-Free
13+
//! Ordering for Compressing Social Networks][LLP paper]", _Proceedings of the
14+
//! 20th international conference on World Wide Web_, pages 587–596, ACM, 2011.
1515
//!
16-
//! The function [`layered_label_propagation`] returns node labels of the
17-
//! provided symmetric graph, and [permuting the
18-
//! graph](webgraph::transform::permute) in label order will (hopefully)
19-
//! increase locality (see the paper).
16+
//! # Requirements
2017
//!
21-
//! Note that the graph provided should be _symmetric_ and _loopless_. If this
22-
//! is not the case, please use [`simplify`](webgraph::transform::simplify) to generate a
18+
//! The graph provided should be _symmetric_ and _loopless_. If this is not the
19+
//! case, please use [`simplify`](webgraph::transform::simplify) to generate a
2320
//! suitable graph.
2421
//!
25-
//! # Memory requirements
22+
//! # Memory Requirements
2623
//!
2724
//! LLP requires three `usize` and a boolean per node, plus the memory that is
2825
//! necessary to load the graph.
2926
//!
27+
//! # Algorithm
28+
//!
29+
//! Label propagation assigns a _label_ to each node and then iteratively
30+
//! updates every label to the one that maximizes an objective function based on
31+
//! the frequency of labels among the node's neighbors and on a resolution
32+
//! parameter ɣ. Low ɣ values produce many small communities, while high ɣ
33+
//! values produce few large ones. _Layered_ label propagation runs label
34+
//! propagation for several values of ɣ and combines the resulting labelings
35+
//! into a single one that captures community structure at multiple resolutions.
36+
//!
37+
//! Nodes of the resulting labeling that share the same label are likely
38+
//! co-located in the graph, so [permuting the
39+
//! graph](webgraph::transform::permute) in label order will increase locality,
40+
//! yielding better compression.
41+
//!
42+
//! # Functions
43+
//!
44+
//! - [`layered_label_propagation`]: runs LLP and returns the final combined
45+
//! labels;
46+
//! - [`layered_label_propagation_labels_only`]: runs LLP and stores
47+
//! per-ɣ labels to disk, but does not combine them; this is useful when
48+
//! you want to combine labels in a separate step;
49+
//! - [`combine_labels`]: combines the per-ɣ labels stored on disk by a
50+
//! previous call to [`layered_label_propagation_labels_only`];
51+
//! - [`labels_to_ranks`]: converts labels to ranks by their natural order,
52+
//! yielding a permutation that can be passed to
53+
//! [`permute`](webgraph::transform::permute).
54+
//!
55+
//! # Choosing ɣ Values
56+
//!
57+
//! More values improve the resulting combined labeling, but each value needs a
58+
//! full run of the label propagation algorithm, so there is a trade-off between
59+
//! quality and running time. A common choice is a set exponentially-spaced
60+
//! values, for example ɣ ∈ {1, 1/2, 1/4, …} or ɣ ∈ {1, 1/4, 1/16, …}.
61+
//!
62+
//! [LLP paper]: <https://vigna.di.unimi.it/papers.php#BRSLLP>
63+
//!
3064
use anyhow::{Context, Result};
3165
use crossbeam_utils::CachePadded;
3266
use dsi_progress_logger::prelude::*;
@@ -100,7 +134,7 @@ pub fn layered_label_propagation<R: RandomAccessGraph + Sync>(
100134
deg_cumul: &(impl for<'a> Succ<Input = usize, Output<'a> = usize> + Send + Sync),
101135
gammas: Vec<f64>,
102136
chunk_size: Option<usize>,
103-
arc_granularity: Granularity,
137+
granularity: Granularity,
104138
seed: u64,
105139
predicate: impl Predicate<preds::PredParams>,
106140
work_dir: impl AsRef<Path>,
@@ -111,7 +145,7 @@ pub fn layered_label_propagation<R: RandomAccessGraph + Sync>(
111145
deg_cumul,
112146
gammas,
113147
chunk_size,
114-
arc_granularity,
148+
granularity,
115149
seed,
116150
predicate,
117151
&work_dir,
@@ -129,7 +163,7 @@ pub fn layered_label_propagation_labels_only<R: RandomAccessGraph + Sync>(
129163
deg_cumul: &(impl for<'a> Succ<Input = usize, Output<'a> = usize> + Send + Sync),
130164
gammas: Vec<f64>,
131165
chunk_size: Option<usize>,
132-
arc_granularity: Granularity,
166+
granularity: Granularity,
133167
seed: u64,
134168
predicate: impl Predicate<preds::PredParams>,
135169
work_dir: impl AsRef<Path>,
@@ -302,7 +336,7 @@ pub fn layered_label_propagation_labels_only<R: RandomAccessGraph + Sync>(
302336
local_obj_func
303337
},
304338
|delta_obj_func_0: f64, delta_obj_func_1| delta_obj_func_0 + delta_obj_func_1,
305-
arc_granularity,
339+
granularity,
306340
deg_cumul,
307341
&mut update_pl,
308342
);
@@ -373,7 +407,7 @@ pub fn layered_label_propagation_labels_only<R: RandomAccessGraph + Sync>(
373407
graph: &sym_graph,
374408
perm: &volumes,
375409
},
376-
arc_granularity,
410+
granularity,
377411
deg_cumul,
378412
&mut update_pl,
379413
);

webgraph/README.md

Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ techniques. More precisely, it is currently made of:
3636
now in maintenance mode.
3737

3838
- This crate, providing a complete, documented implementation of the algorithms
39-
above in Rust. It is free software distributed under either the [GNU Lesser
39+
above in Rust. It is free software distributed under either the [GNU Lesser
4040
General Public License
4141
2.1+](https://www.gnu.org/licenses/old-licenses/lgpl-2.1.html) or the [Apache
4242
Software License 2.0](https://www.apache.org/licenses/LICENSE-2.0).
@@ -141,7 +141,7 @@ There are many operations available on graphs, such as [transpose],
141141
A simple way to compress a graph is to provide it as a list of arcs. The
142142
`webgraph` CLI provides a command `from` with a subcommand `arcs` that reads a
143143
list of TAB-separated list of arcs from standard input and writes a compressed
144-
graph in BvGraph format. For example,
144+
graph in BV graph format. For example,
145145

146146
```bash
147147
echo -e "0\t1\n1\t2\n2\t3" >3-cycle.tsv
@@ -193,27 +193,27 @@ opinions expressed are however those of the authors only and do not necessarily
193193
reflect those of the European Union or the Italian MUR. Neither the European
194194
Union nor the Italian MUR can be held responsible for them.
195195

196-
[transpose]: <https://docs.rs/webgraph/latest/webgraph/transform/fn.transpose.html>
197-
[simplify]: <https://docs.rs/webgraph/latest/webgraph/transform/fn.simplify.html>
198-
[permute]: <https://docs.rs/webgraph/latest/webgraph/transform/fn.permute.html>
199-
[`with_basename`]: <https://docs.rs/webgraph/latest/webgraph/graphs/bvgraph/random_access/struct.BvGraph.html#method.with_basename>
200-
[`BvGraphSeq`]: <https://docs.rs/webgraph/latest/webgraph/graphs/bvgraph/sequential/struct.BvGraphSeq.html>
201-
[`BvGraph`]: <https://docs.rs/webgraph/latest/webgraph/graphs/bvgraph/random_access/struct.BvGraph.html>
202-
[`LoadConfig`]: <https://docs.rs/webgraph/latest/webgraph/graphs/bvgraph/load/struct.LoadConfig.html>
203-
[iterate on the whole graph]: <https://docs.rs/webgraph/latest/webgraph/traits/labels/trait.SequentialLabeling.html#method.iter>
204-
[zipping]: <https://docs.rs/webgraph/latest/webgraph/labels/zip/struct.Zip.html>
205-
[labeling]: <https://docs.rs/webgraph/latest/webgraph/traits/labels/trait.SequentialLabeling.html>
206-
[iteration]: <https://docs.rs/webgraph/latest/webgraph/traits/labels/trait.SequentialLabeling.html#method.iter>
207-
[retrieve the successors of a node]: <https://docs.rs/webgraph/latest/webgraph/traits/graph/trait.RandomAccessGraph.html#method.successors>
208-
[LAW web site]: <http://law.di.unimi.it/>
209-
[Elias–Fano]: <sux::dict::EliasFano>
210-
[WebGraph framework]: <https://webgraph.di.unimi.it/>
211-
[ε-serde]: <https://crates.io/crates/epserde/>
212-
[`for_`]: <https://docs.rs/lender/latest/lender/macro.for_.html>
213-
[`VecGraph`]: <https://docs.rs/webgraph/latest/webgraph/graphs/vec_graph/struct.VecGraph.html>
214-
[`LabeledVecGraph`]: <https://docs.rs/webgraph/latest/webgraph/graphs/vec_graph/struct.LabeledVecGraph.html>
215-
[`BTreeGraph`]: <https://docs.rs/webgraph/latest/webgraph/graphs/btree_graph/struct.BTreeGraph.html>
216-
[`LabeledBTreeGraph`]: <https://docs.rs/webgraph/latest/webgraph/graphs/btree_graph/struct.LabeledBTreeGraph.html>
217-
[Common Crawl web site]: <https://commoncrawl.org/>
218-
[command-line interface]: <https://docs.rs/webgraph-cli/latest/index.html>
219-
[`CsrGraph`]: <https://docs.rs/webgraph/latest/webgraph/graphs/csr_graph/struct.CsrGraph.html>
196+
[transpose]: https://docs.rs/webgraph/latest/webgraph/transform/fn.transpose.html
197+
[simplify]: https://docs.rs/webgraph/latest/webgraph/transform/fn.simplify.html
198+
[permute]: https://docs.rs/webgraph/latest/webgraph/transform/fn.permute.html
199+
[`with_basename`]: https://docs.rs/webgraph/latest/webgraph/graphs/bvgraph/random_access/struct.BvGraph.html#method.with_basename
200+
[`BvGraphSeq`]: https://docs.rs/webgraph/latest/webgraph/graphs/bvgraph/sequential/struct.BvGraphSeq.html
201+
[`BvGraph`]: https://docs.rs/webgraph/latest/webgraph/graphs/bvgraph/random_access/struct.BvGraph.html
202+
[`LoadConfig`]: https://docs.rs/webgraph/latest/webgraph/graphs/bvgraph/load/struct.LoadConfig.html
203+
[iterate on the whole graph]: https://docs.rs/webgraph/latest/webgraph/traits/labels/trait.SequentialLabeling.html#method.iter
204+
[zipping]: https://docs.rs/webgraph/latest/webgraph/labels/zip/struct.Zip.html
205+
[labeling]: https://docs.rs/webgraph/latest/webgraph/traits/labels/trait.SequentialLabeling.html
206+
[iteration]: https://docs.rs/webgraph/latest/webgraph/traits/labels/trait.SequentialLabeling.html#method.iter
207+
[retrieve the successors of a node]: https://docs.rs/webgraph/latest/webgraph/traits/graph/trait.RandomAccessGraph.html#method.successors
208+
[LAW web site]: http://law.di.unimi.it/
209+
[Elias–Fano]: sux::dict::EliasFano
210+
[WebGraph framework]: https://webgraph.di.unimi.it/
211+
[ε-serde]: https://crates.io/crates/epserde/
212+
[`for_`]: https://docs.rs/lender/latest/lender/macro.for_.html
213+
[`VecGraph`]: https://docs.rs/webgraph/latest/webgraph/graphs/vec_graph/struct.VecGraph.html
214+
[`LabeledVecGraph`]: https://docs.rs/webgraph/latest/webgraph/graphs/vec_graph/struct.LabeledVecGraph.html
215+
[`BTreeGraph`]: https://docs.rs/webgraph/latest/webgraph/graphs/btree_graph/struct.BTreeGraph.html
216+
[`LabeledBTreeGraph`]: https://docs.rs/webgraph/latest/webgraph/graphs/btree_graph/struct.LabeledBTreeGraph.html
217+
[Common Crawl web site]: https://commoncrawl.org/
218+
[command-line interface]: https://docs.rs/webgraph-cli/latest/index.html
219+
[`CsrGraph`]: https://docs.rs/webgraph/latest/webgraph/graphs/csr_graph/struct.CsrGraph.html

webgraph/src/graphs/bvgraph/comp/bvcomp.rs

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,31 @@ pub struct CompStats {
2525
pub offsets_written_bits: u64,
2626
}
2727

28-
/// A BvGraph compressor, this is used to compress a graph into a BvGraph
28+
/// Compresses a graph into the [BV graph format](super::super).
29+
///
30+
/// This is the standard compressor: for each node, it considers the
31+
/// preceding nodes in a window of configurable size and greedily selects the
32+
/// reference that minimizes the bitstream length, subject to a maximum
33+
/// reference-chain depth (`max_ref_count`). It then splits the "extra" nodes
34+
/// (those that cannot be copied from the reference list) into intervals and
35+
/// residuals, as documented in the [module-level documentation](super::super).
36+
///
37+
/// The compressor writes two bitstreams:
38+
///
39+
/// - the _graph_ bitstream, through the encoder `E`;
40+
/// - the _offsets_ bitstream, through the [`OffsetsWriter`].
41+
///
42+
/// Nodes must be pushed in order via [`push`](Self::push) (or
43+
/// [`extend`](Self::extend)) and the compressor must be finalized with
44+
/// [`flush`](Self::flush), which returns the [`CompStats`].
45+
///
46+
/// In most cases you do not need to instantiate this struct directly: use
47+
/// [`BvComp::with_basename`] to obtain a [`BvCompConfig`] with suitable
48+
/// defaults, then call [`comp_graph`](BvCompConfig::comp_graph) or
49+
/// [`par_comp_graph`](BvCompConfig::par_comp_graph) on it.
50+
///
51+
/// For a compressor that uses an alternative reference-selection strategy
52+
/// based on dynamic programming, see [`BvCompZ`](super::BvCompZ).
2953
#[derive(Debug)]
3054
pub struct BvComp<E, W: Write> {
3155
/// The ring-buffer that stores the neighbors of the last

0 commit comments

Comments
 (0)