Skip to content

Commit b7ba145

Browse files
committed
New SequentialLabeling::build_dcf method
1 parent 4827b16 commit b7ba145

15 files changed

Lines changed: 331 additions & 129 deletions

File tree

algo/src/distances/hyperball.rs

Lines changed: 9 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -182,37 +182,13 @@ use webgraph::utils::Granularity;
182182
/// # Examples
183183
///
184184
/// ```
185-
/// use webgraph::graphs::vec_graph::VecGraph;
186-
/// use webgraph::graphs::bvgraph::DCF;
187-
/// use webgraph::traits::{RandomAccessLabeling, SequentialLabeling};
188-
/// use webgraph_algo::distances::hyperball::*;
189-
/// use dsi_progress_logger::no_logging;
190-
/// use sux::prelude::*;
191-
/// use rand::SeedableRng;
192-
/// use lender::prelude::*;
193-
///
194-
/// // A small graph: 0 → 1 → 2 → 0, 1 → 3
185+
/// # use webgraph::graphs::vec_graph::VecGraph;
186+
/// # use webgraph::traits::SequentialLabeling;
187+
/// # use webgraph_algo::distances::hyperball::*;
188+
/// # use dsi_progress_logger::no_logging;
189+
/// # use rand::SeedableRng;
195190
/// let graph = VecGraph::from_arcs([(0, 1), (1, 2), (2, 0), (1, 3)]);
196-
///
197-
/// // Build the degree cumulative function (DCF)
198-
/// let mut efb = EliasFanoBuilder::new(
199-
/// graph.num_nodes() + 1,
200-
/// graph.num_arcs() as usize,
201-
/// );
202-
/// efb.push(0);
203-
/// let mut cumul = 0;
204-
/// let mut lender = graph.iter();
205-
/// while let Some((_, succs)) = lender.next() {
206-
/// cumul += succs.into_iter().count();
207-
/// efb.push(cumul);
208-
/// }
209-
/// let dcf: DCF = unsafe {
210-
/// efb.build().map_high_bits(|high_bits| {
211-
/// SelectZeroAdaptConst::<_, _, 12, 4>::new(
212-
/// SelectAdaptConst::<_, _, 12, 4>::new(high_bits),
213-
/// )
214-
/// })
215-
/// };
191+
/// let dcf = graph.build_dcf();
216192
///
217193
/// // Build and run HyperBall (neighborhood function only)
218194
/// let rng = rand::rngs::SmallRng::seed_from_u64(0);
@@ -230,28 +206,13 @@ use webgraph::utils::Granularity;
230206
///
231207
/// ```
232208
/// # use webgraph::graphs::vec_graph::VecGraph;
233-
/// # use webgraph::graphs::bvgraph::DCF;
234-
/// # use webgraph::traits::{RandomAccessLabeling, SequentialLabeling};
209+
/// # use webgraph::traits::SequentialLabeling;
235210
/// # use webgraph_algo::distances::hyperball::*;
236211
/// # use dsi_progress_logger::no_logging;
237-
/// # use sux::prelude::*;
238212
/// # use rand::SeedableRng;
239-
/// # use lender::prelude::*;
240213
/// # let graph = VecGraph::from_arcs([(0, 1), (1, 2), (2, 0), (1, 3)]);
241-
/// # let mut efb = EliasFanoBuilder::new(
242-
/// # graph.num_nodes() + 1, graph.num_arcs() as usize);
243-
/// # efb.push(0);
244-
/// # let mut cumul = 0;
245-
/// # let mut lender = graph.iter();
246-
/// # while let Some((_, succs)) = lender.next() {
247-
/// # cumul += succs.into_iter().count();
248-
/// # efb.push(cumul);
249-
/// # }
250-
/// # let dcf: DCF = unsafe {
251-
/// # efb.build().map_high_bits(|high_bits| {
252-
/// # SelectZeroAdaptConst::<_, _, 12, 4>::new(
253-
/// # SelectAdaptConst::<_, _, 12, 4>::new(high_bits))})};
254-
/// let rng = rand::rngs::SmallRng::seed_from_u64(0);
214+
/// # let dcf = graph.build_dcf();
215+
/// # let rng = rand::rngs::SmallRng::seed_from_u64(0);
255216
/// let mut hyperball = HyperBallBuilder::with_hyper_log_log(
256217
/// &graph, None::<&VecGraph>, &dcf, 6, None,
257218
/// )?

algo/tests/test_llp.rs

Lines changed: 4 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -5,40 +5,12 @@
55
*/
66

77
use anyhow::Result;
8-
use lender::prelude::*;
98
use predicates::prelude::PredicateBooleanExt;
10-
use sux::prelude::*;
11-
use webgraph::graphs::bvgraph::DCF;
129
use webgraph::graphs::vec_graph::VecGraph;
1310
use webgraph::traits::SequentialLabeling;
1411
use webgraph_algo::llp;
1512
use webgraph_algo::llp::preds::*;
1613

17-
/// Builds a degree cumulative function (DCF) from a graph.
18-
///
19-
/// The DCF is an Elias-Fano representation of the sequence
20-
/// 0, d₀, d₀+d₁, ..., total_arcs where dᵢ is the outdegree of node i.
21-
fn build_dcf(graph: &VecGraph) -> DCF {
22-
let num_nodes = graph.num_nodes();
23-
let num_arcs = graph.num_arcs_hint().unwrap_or(0) as usize;
24-
25-
let mut efb = EliasFanoBuilder::new(num_nodes + 1, num_arcs);
26-
efb.push(0);
27-
let mut cumul = 0usize;
28-
let mut lender = graph.iter();
29-
while let Some((_node, succs)) = lender.next() {
30-
cumul += succs.into_iter().count();
31-
efb.push(cumul);
32-
}
33-
34-
let ef = efb.build();
35-
unsafe {
36-
ef.map_high_bits(|bits| {
37-
SelectZeroAdaptConst::<_, _, 12, 4>::new(SelectAdaptConst::<_, _, 12, 4>::new(bits))
38-
})
39-
}
40-
}
41-
4214
#[test]
4315
fn test_llp_small_symmetric_graph() -> Result<()> {
4416
use webgraph::utils::Granularity;
@@ -60,7 +32,7 @@ fn test_llp_small_symmetric_graph() -> Result<()> {
6032
let num_nodes = graph.num_nodes();
6133
assert_eq!(num_nodes, 4);
6234

63-
let deg_cumul = build_dcf(&graph);
35+
let deg_cumul = graph.build_dcf();
6436

6537
let dir = tempfile::tempdir()?;
6638
let gammas = vec![0.0, 1.0];
@@ -101,7 +73,7 @@ fn test_llp_labels_only_and_combine() -> Result<()> {
10173
(4, 3),
10274
]);
10375
let num_nodes = graph.num_nodes();
104-
let deg_cumul = build_dcf(&graph);
76+
let deg_cumul = graph.build_dcf();
10577

10678
let dir = tempfile::tempdir()?;
10779

@@ -136,7 +108,7 @@ fn test_llp_multiple_gammas() -> Result<()> {
136108
(0, 4),
137109
(4, 0),
138110
]);
139-
let deg_cumul = build_dcf(&graph);
111+
let deg_cumul = graph.build_dcf();
140112

141113
let dir = tempfile::tempdir()?;
142114
let gammas = vec![0.0, 0.5, 1.0, 2.0];
@@ -175,7 +147,7 @@ fn test_llp_complete_graph() -> Result<()> {
175147
(2, 3),
176148
(3, 2),
177149
]);
178-
let deg_cumul = build_dcf(&graph);
150+
let deg_cumul = graph.build_dcf();
179151

180152
let dir = tempfile::tempdir()?;
181153

webgraph/CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,11 @@
1313

1414
- Support for π codes, both in the file format and in the CLI tools.
1515

16+
- `SequentialLabeling::build_dcf` new default method for building a degree
17+
cumulative function. It is overridden by more efficient methods, for example,
18+
in `BvGraphSeq`. All iterators have fast constant-time implementations of
19+
`count` whenever possible to support the method.
20+
1621
### Changed
1722

1823
- All parallel methods now use the current Rayon global thread pool rather than

webgraph/src/graphs/btree_graph.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -397,6 +397,10 @@ impl<L: Clone + 'static> Iterator for LabeledSucc<'_, L> {
397397
fn next(&mut self) -> Option<Self::Item> {
398398
self.0.next().map(|(succ, labels)| (*succ, labels.clone()))
399399
}
400+
401+
fn count(self) -> usize {
402+
self.len()
403+
}
400404
}
401405

402406
impl<L: Clone + 'static> ExactSizeIterator for LabeledSucc<'_, L> {
@@ -418,6 +422,10 @@ impl Iterator for Succ<'_> {
418422
fn next(&mut self) -> Option<Self::Item> {
419423
self.0.next()
420424
}
425+
426+
fn count(self) -> usize {
427+
self.len()
428+
}
421429
}
422430

423431
impl ExactSizeIterator for Succ<'_> {

webgraph/src/graphs/bvgraph/masked_iter.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,4 +96,8 @@ impl<I: Iterator<Item = usize>> Iterator for MaskedIter<I> {
9696
self.blocks[self.block_idx] -= 1;
9797
result
9898
}
99+
100+
fn count(self) -> usize {
101+
self.len()
102+
}
99103
}

webgraph/src/graphs/bvgraph/mod.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,12 @@ impl<T: for<'a> DeserInner<DeserType<'a>: IndexedSeq<Input = usize, Output<'a> =
195195
{
196196
}
197197

198-
/// The default version of EliasFano we use for the cumulative function of degrees.
198+
/// The default type we use for the cumulative function of degrees.
199+
///
200+
/// It provides an indexed dictionary](sux::traits::indexed_dict::IndexedDict) with
201+
/// [successor](sux::traits::indexed_dict::Succ) and [predecessor](sux::traits::indexed_dict::Pred) support.
202+
///
203+
/// This is the type returned by [`crate::traits::labels::SequentialLabeling::build_dcf`].
199204
pub type DCF = sux::dict::EliasFano<
200205
sux::rank_sel::SelectZeroAdaptConst<
201206
sux::rank_sel::SelectAdaptConst<sux::bits::BitVec<Box<[usize]>>, Box<[usize]>, 12, 4>,

webgraph/src/graphs/bvgraph/random_access.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -479,6 +479,10 @@ impl<D: Decode> Iterator for Succ<D> {
479479
Some(min)
480480
}
481481

482+
fn count(self) -> usize {
483+
self.len()
484+
}
485+
482486
fn size_hint(&self) -> (usize, Option<usize>) {
483487
(self.size, Some(self.size))
484488
}

webgraph/src/graphs/bvgraph/sequential.rs

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,33 @@ impl<F: SequentialDecoderFactory> SequentialLabeling for BvGraphSeq<F> {
100100

101101
iter
102102
}
103+
104+
fn build_dcf(&self) -> DCF {
105+
let n = self.num_nodes();
106+
let num_arcs = self
107+
.num_arcs_hint()
108+
.expect("build_dcf requires num_arcs_hint()") as usize;
109+
let mut efb = sux::dict::EliasFanoBuilder::new(n + 1, num_arcs);
110+
efb.push(0);
111+
let mut cumul_deg = 0usize;
112+
let mut iter = OffsetDegIter::new(
113+
self.factory.new_decoder().unwrap(),
114+
n,
115+
self.compression_window,
116+
self.min_interval_length,
117+
);
118+
for _ in 0..n {
119+
cumul_deg += iter.next_degree().unwrap();
120+
efb.push(cumul_deg);
121+
}
122+
unsafe {
123+
efb.build().map_high_bits(|high_bits| {
124+
sux::rank_sel::SelectZeroAdaptConst::<_, _, 12, 4>::new(
125+
sux::rank_sel::SelectAdaptConst::<_, _, 12, 4>::new(high_bits),
126+
)
127+
})
128+
}
129+
}
103130
}
104131

105132
impl<F: SequentialDecoderFactory> SequentialGraph for BvGraphSeq<F> {}

webgraph/src/graphs/csr_graph.rs

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,11 @@ use crate::traits::*;
1010
use common_traits::UnsignedInt;
1111
use epserde::Epserde;
1212
use lender::{IntoLender, Lend, Lender, Lending, check_covariance, for_};
13-
use sux::{bits::BitFieldVec, dict::EliasFanoBuilder, prelude::SelectAdaptConst};
13+
use sux::{
14+
bits::BitFieldVec,
15+
dict::EliasFanoBuilder,
16+
rank_sel::{SelectAdaptConst, SelectZeroAdaptConst},
17+
};
1418
use value_traits::{
1519
iter::{IterFrom, IterateByValueFrom},
1620
slices::SliceByValue,
@@ -324,6 +328,22 @@ where
324328
successors_iter: self.successors.iter_value_from(offset),
325329
}
326330
}
331+
332+
fn build_dcf(&self) -> crate::graphs::bvgraph::DCF {
333+
let n = self.num_nodes();
334+
let num_arcs = self.num_arcs_hint().unwrap() as usize;
335+
let mut efb = EliasFanoBuilder::new(n + 1, num_arcs);
336+
for val in self.dcf.iter_value_from(0).take(n + 1) {
337+
efb.push(val);
338+
}
339+
unsafe {
340+
efb.build().map_high_bits(|high_bits| {
341+
SelectZeroAdaptConst::<_, _, 12, 4>::new(
342+
SelectAdaptConst::<_, _, 12, 4>::new(high_bits),
343+
)
344+
})
345+
}
346+
}
327347
}
328348

329349
impl<DCF, S> SequentialLabeling for CsrSortedGraph<DCF, S>
@@ -351,18 +371,22 @@ where
351371
fn iter_from(&self, from: usize) -> Self::Lender<'_> {
352372
LenderSortedImpl(self.0.iter_from(from))
353373
}
374+
375+
fn build_dcf(&self) -> crate::graphs::bvgraph::DCF {
376+
self.0.build_dcf()
377+
}
354378
}
355379

356-
impl<DCF, S> SequentialGraph for CsrGraph<DCF, S>
380+
impl<D, S> SequentialGraph for CsrGraph<D, S>
357381
where
358-
DCF: SliceByValue + IterateByValueFrom<Item = usize>,
382+
D: SliceByValue + IterateByValueFrom<Item = usize>,
359383
S: SliceByValue + IterateByValueFrom<Item = usize>,
360384
{
361385
}
362386

363-
impl<DCF, S> SequentialGraph for CsrSortedGraph<DCF, S>
387+
impl<D, S> SequentialGraph for CsrSortedGraph<D, S>
364388
where
365-
DCF: SliceByValue + IterateByValueFrom<Item = usize>,
389+
D: SliceByValue + IterateByValueFrom<Item = usize>,
366390
S: SliceByValue + IterateByValueFrom<Item = usize>,
367391
{
368392
}
@@ -577,6 +601,11 @@ impl<D: Iterator<Item = usize>> Iterator for SeqSucc<'_, D> {
577601
self.succ_iter.next()
578602
}
579603

604+
#[inline(always)]
605+
fn count(self) -> usize {
606+
self.len()
607+
}
608+
580609
#[inline(always)]
581610
fn size_hint(&self) -> (usize, Option<usize>) {
582611
let len = self.last_offset - *self.current_offset;

webgraph/src/graphs/permuted_graph.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,10 @@ impl<I: Iterator<Item = usize>, P: SliceByValue<Value = usize>> Iterator for Suc
162162
fn next(&mut self) -> Option<Self::Item> {
163163
self.iter.next().map(|succ| self.perm.index_value(succ))
164164
}
165+
166+
fn count(self) -> usize {
167+
self.iter.count()
168+
}
165169
}
166170

167171
impl<I: ExactSizeIterator<Item = usize>, P: SliceByValue<Value = usize>> ExactSizeIterator

0 commit comments

Comments
 (0)