Skip to content

Commit 1bf75d4

Browse files
committed
Removed dangling-node distribution option; added examples; new Mode enum
1 parent 9717dee commit 1bf75d4

8 files changed

Lines changed: 202 additions & 169 deletions

File tree

algo/CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
# Change Log
22

3+
## [unreleased]
4+
5+
### New
6+
7+
- New `PageRank` parallel implementation based on the Gauss–Seidel iterative
8+
method.
9+
310
## [0.6.0] - 2026-02-18
411

512
### Changed

algo/src/rank/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,4 @@
55
*/
66

77
pub mod pagerank;
8-
pub use pagerank::PageRank;
8+
pub use pagerank::{Mode, PageRank};

algo/src/rank/pagerank.rs

Lines changed: 116 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
//! Parallel Gauss–Seidel PageRank.
88
//!
99
//! This implementation uses two vectors of doubles (one for the current
10-
//! approximation, the order for the inverses of outdegrees) and,
10+
//! approximation, the other for the inverses of outdegrees) and,
1111
//! experimentally, converges faster than other implementations. Moreover, it
1212
//! scales linearly with the number of cores.
1313
//!
@@ -37,22 +37,25 @@
3737
//!
3838
//! to which we can apply the Gauss–Seidel method.
3939
//!
40-
//! By default, when a preference vector is set via
41-
//! [`preference`](PageRank::preference), we compute _strongly preferential_
42-
//! PageRank (**u** = **v**). To obtain _weakly preferential_ PageRank, set an
43-
//! explicit uniform
44-
//! [`dangling_distribution`](PageRank::dangling_distribution). Setting
45-
//! [`pseudo_rank`](PageRank::pseudo_rank) to `true` zeroes out the
46-
//! dangling-node contribution entirely (**u** = **0**), yielding a
47-
//! non-stochastic vector sometimes called _pseudo-rank_.
40+
//! The [`mode`](PageRank::mode) setter selects among three variants:
41+
//!
42+
//! - [`StronglyPreferential`](Mode::StronglyPreferential) (the default):
43+
//! **u** = **v**, so the preference vector doubles as the dangling-node
44+
//! distribution.
45+
//! - [`WeaklyPreferential`](Mode::WeaklyPreferential): **u** = **1**/*n*, so
46+
//! dangling nodes distribute their rank uniformly regardless of the
47+
//! preference vector.
48+
//! - [`PseudoRank`](Mode::PseudoRank): **u** = **0**, zeroing out the
49+
//! dangling-node contribution entirely and yielding a non-stochastic vector
50+
//! sometimes called _pseudorank_.
4851
//!
4952
//! # The Gauss–Seidel method
5053
//!
5154
//! The formula above can be rewritten as the linear system
5255
//!
53-
//! > **x** ( *I* − α (*P* + **u**ᵀ **d**) ) = (1 − α) **v**
56+
//! > **x** ( *I* − α (*P* + **d**ᵀ **u**) ) = (1 − α) **v**
5457
//!
55-
//! that is, **x** *M* = **b** where *M* = *I* − α (*P* + **u**ᵀ **d**) and
58+
//! that is, **x** *M* = **b** where *M* = *I* − α (*P* + **d**ᵀ **u**) and
5659
//! **b** = (1 − α) **v**. The [Gauss–Seidel method] solves this system by
5760
//! updating a _single_ vector in place:
5861
//!
@@ -259,6 +262,35 @@ pub mod preds {
259262
}
260263
}
261264

265+
/// Selects the PageRank variant to compute.
266+
///
267+
/// See the [module-level documentation](self) for the mathematical details.
268+
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
269+
pub enum Mode {
270+
/// Uses the preference vector **v** as the dangling-node distribution
271+
/// (**u** = **v**). This is the default.
272+
#[default]
273+
StronglyPreferential,
274+
/// Uses a uniform dangling-node distribution (**u** = **1**/*n*) regardless
275+
/// of the preference vector.
276+
WeaklyPreferential,
277+
/// Zeroes out the dangling-node contribution (**u** = **0**), yielding in
278+
/// the case there are dangling nodes a non-stochastic vector which however
279+
/// is identical to the [strongly preferential](Mode::StronglyPreferential)
280+
/// variant modulo normalization.
281+
PseudoRank,
282+
}
283+
284+
impl std::fmt::Display for Mode {
285+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
286+
match self {
287+
Mode::StronglyPreferential => f.write_str("strongly preferential"),
288+
Mode::WeaklyPreferential => f.write_str("weakly preferential"),
289+
Mode::PseudoRank => f.write_str("pseudorank"),
290+
}
291+
}
292+
}
293+
262294
use dsi_progress_logger::{ConcurrentProgressLog, ProgressLog, no_logging};
263295
use kahan::KahanSum;
264296
use lender::prelude::*;
@@ -283,13 +315,56 @@ use webgraph::utils::Granularity;
283315
///
284316
/// The constructor takes the _transpose_ of the graph, because the algorithm
285317
/// needs to iterate over the predecessors of each node.
318+
///
319+
/// # Examples
320+
///
321+
/// Default PageRank (strongly preferential, α = 0.85) on a small graph:
322+
///
323+
/// ```
324+
/// use webgraph::graphs::vec_graph::VecGraph;
325+
/// use webgraph_algo::rank::pagerank::{PageRank, preds};
326+
///
327+
/// // Build the transpose of a 5-node graph:
328+
/// // 0 → 1, 0 → 2, 1 → 2, 2 → 0, 3 → 0, 4 → 3
329+
/// let mut gt = VecGraph::empty(5);
330+
/// gt.add_arcs([(1, 0), (2, 0), (2, 1), (0, 2), (0, 3), (3, 4)]);
331+
///
332+
/// let mut pr = PageRank::new(&gt);
333+
/// pr.run(preds::L1Norm::try_from(1E-9).unwrap());
334+
///
335+
/// assert_eq!(pr.rank().len(), 5);
336+
/// assert!((pr.rank().iter().sum::<f64>() - 1.0).abs() < 1E-9);
337+
/// ```
338+
///
339+
/// Weakly preferential PageRank with a custom preference vector:
340+
///
341+
/// ```
342+
/// use webgraph::graphs::vec_graph::VecGraph;
343+
/// use webgraph_algo::rank::pagerank::{Mode, PageRank, preds};
344+
///
345+
/// let mut gt = VecGraph::empty(5);
346+
/// gt.add_arcs([(1, 0), (2, 0), (2, 1), (0, 2), (0, 3), (3, 4)]);
347+
///
348+
/// // Custom preference: favor node 0
349+
/// let pref = [0.5, 0.2, 0.1, 0.1, 0.1];
350+
///
351+
/// let mut pr = PageRank::new(&gt);
352+
/// pr.alpha(0.9)
353+
/// .preference(Some(&pref))
354+
/// .mode(Mode::WeaklyPreferential);
355+
/// pr.run(preds::L1Norm::try_from(1E-9).unwrap());
356+
///
357+
/// // Node 0 has the highest rank
358+
/// assert!(pr.rank()[0] > pr.rank()[1]);
359+
/// assert_eq!(pr.rank().len(), 5);
360+
/// assert!((pr.rank().iter().sum::<f64>() - 1.0).abs() < 1E-9);
361+
/// ```
286362
pub struct PageRank<'a, G: RandomAccessGraph + Sync> {
287363
transpose: &'a G,
288364
alpha: f64,
289365
inv_outdegrees: Option<Box<[f64]>>,
290366
preference: Option<&'a [f64]>,
291-
dangling_dist: Option<&'a [f64]>,
292-
pseudo_rank: bool,
367+
mode: Mode,
293368
granularity: Granularity,
294369
norm_delta: f64,
295370

@@ -310,8 +385,7 @@ impl<'a, G: RandomAccessGraph + Sync> PageRank<'a, G> {
310385
alpha: 0.85,
311386
inv_outdegrees: None,
312387
preference: None,
313-
dangling_dist: None,
314-
pseudo_rank: false,
388+
mode: Mode::default(),
315389
granularity: Granularity::default(),
316390
norm_delta: f64::INFINITY,
317391
rank,
@@ -336,18 +410,17 @@ impl<'a, G: RandomAccessGraph + Sync> PageRank<'a, G> {
336410

337411
/// Sets the preference (personalization) vector.
338412
///
339-
/// When set, and no explicit
340-
/// [`dangling_distribution`](Self::dangling_distribution) is provided, the
341-
/// preference vector is also used as the dangling-node distribution
342-
/// (strongly preferential PageRank).
413+
/// When set, the preference vector is also used as the dangling-node
414+
/// distribution in [`StronglyPreferential`](Mode::StronglyPreferential)
415+
/// mode.
343416
///
344417
/// Pass `None` to revert to the uniform preference (1/*n*).
345418
///
346419
/// # Panics
347420
///
348-
/// Panics if the length of the vector does not match the number of nodes,
349-
/// In test mode, we also call
350-
/// [`assert_stochastic`](Self::assert_stochastic).
421+
/// Panics if the length of the vector does not match the number of nodes.
422+
/// In test mode, we also check for stochasticity (nonnegative entries
423+
/// summing to 1 within a tolerance of 1E-6) and panic if the check fails.
351424
pub fn preference(&mut self, preference: Option<&'a [f64]>) -> &mut Self {
352425
if let Some(v) = preference {
353426
let n = self.transpose.num_nodes();
@@ -364,41 +437,9 @@ impl<'a, G: RandomAccessGraph + Sync> PageRank<'a, G> {
364437
self
365438
}
366439

367-
/// Sets the dangling-node distribution.
368-
///
369-
/// This overrides the default behaviour (which uses the preference vector,
370-
/// or the uniform distribution if no preference is set).
371-
///
372-
/// Pass `None` to revert to the default.
373-
///
374-
/// # Panics
375-
///
376-
/// Panics if the length of the vector does not match the number of nodes.
377-
/// In test mode, we also call
378-
/// [`assert_stochastic`](Self::assert_stochastic).
379-
pub fn dangling_distribution(&mut self, distribution: Option<&'a [f64]>) -> &mut Self {
380-
if let Some(v) = distribution {
381-
let n = self.transpose.num_nodes();
382-
assert_eq!(
383-
v.len(),
384-
n,
385-
"Dangling distribution length ({}) does not match the number of nodes ({n})",
386-
v.len()
387-
);
388-
#[cfg(test)]
389-
Self::assert_stochastic(v, "dangling distribution");
390-
}
391-
self.dangling_dist = distribution;
392-
self
393-
}
394-
395-
/// Sets pseudo-rank mode.
396-
///
397-
/// When `true`, the contribution of dangling nodes is zeroed out, yielding a
398-
/// non-stochastic vector. The resulting vector differs from strongly
399-
/// preferential PageRank only by a multiplicative factor of 1 − α.
400-
pub fn pseudo_rank(&mut self, pseudo_rank: bool) -> &mut Self {
401-
self.pseudo_rank = pseudo_rank;
440+
/// Sets the PageRank [mode](Mode).
441+
pub fn mode(&mut self, mode: Mode) -> &mut Self {
442+
self.mode = mode;
402443
self
403444
}
404445

@@ -449,7 +490,7 @@ impl<'a, G: RandomAccessGraph + Sync> PageRank<'a, G> {
449490
/// making thus possible to customize the logs.
450491
///
451492
/// It is possible to specify either `pl` or `cpl` as
452-
/// [`no_logging![]`](dsi_progress_logger::no_logging) if don't want to log
493+
/// [`no_logging![]`](dsi_progress_logger::no_logging) if you don't want to log
453494
/// the corresponding part of the computation, albeit having the latter one
454495
/// and not the first one will lead to confusing logs.
455496
pub fn run_with_logging(
@@ -463,14 +504,7 @@ impl<'a, G: RandomAccessGraph + Sync> PageRank<'a, G> {
463504
return;
464505
}
465506

466-
let mode = if self.pseudo_rank {
467-
"pseudo-rank"
468-
} else if self.dangling_dist.is_some() || self.preference.is_none() {
469-
"weakly preferential"
470-
} else {
471-
"strongly preferential"
472-
};
473-
log::info!("Mode: {}", mode);
507+
log::info!("Mode: {}", self.mode);
474508
log::info!("Alpha: {}", self.alpha);
475509
log::info!(
476510
"Preference: {}",
@@ -552,13 +586,6 @@ impl<'a, G: RandomAccessGraph + Sync> PageRank<'a, G> {
552586
self.alpha
553587
));
554588

555-
// Resolve the effective dangling distribution: explicit > preference > uniform
556-
let dangling_dist: Option<&[f64]> = if self.pseudo_rank {
557-
None // unused
558-
} else {
559-
self.dangling_dist.or(self.preference)
560-
};
561-
562589
loop {
563590
let norm_delta_accum = Mutex::new(0.0f64);
564591
let dangling_rank_accum = Mutex::new(0.0f64);
@@ -602,18 +629,28 @@ impl<'a, G: RandomAccessGraph + Sync> PageRank<'a, G> {
602629
}
603630
}
604631

632+
// Preference and dangling distribution for node i
633+
let v_i = match self.preference {
634+
Some(v) => v[i],
635+
None => inv_n,
636+
};
637+
// u_i = v_i in strongly preferential mode,
638+
// u_i = 1/n in weakly preferential mode.
639+
let u_i = match self.mode {
640+
Mode::StronglyPreferential => v_i,
641+
Mode::WeaklyPreferential => inv_n,
642+
Mode::PseudoRank => 0.0, // unused, but avoids branching
643+
};
644+
605645
// Compute self-loop correction and self dangling rank
606646
let (self_dangling_rank, self_loop_factor) = if inv_outdegrees[i] == 0.0
607647
{
608648
// Dangling node
609649
let sdr = rank_sync[i].get();
610-
let slf = if self.pseudo_rank {
650+
let slf = if self.mode == Mode::PseudoRank {
611651
1.0
612652
} else {
613-
match dangling_dist {
614-
Some(u) => 1.0 - self.alpha * u[i],
615-
None => 1.0 - self.alpha * inv_n,
616-
}
653+
1.0 - self.alpha * u_i
617654
};
618655
(sdr, slf)
619656
} else {
@@ -627,20 +664,10 @@ impl<'a, G: RandomAccessGraph + Sync> PageRank<'a, G> {
627664
};
628665

629666
// Add dangling rank contribution
630-
if !self.pseudo_rank {
631-
let u_i = match dangling_dist {
632-
Some(u) => u[i],
633-
None => inv_n,
634-
};
667+
if self.mode != Mode::PseudoRank {
635668
sigma += (dangling_rank - self_dangling_rank) * u_i;
636669
}
637670

638-
// Preference contribution
639-
let v_i = match self.preference {
640-
Some(v) => v[i],
641-
None => inv_n,
642-
};
643-
644671
let new_rank = ((1.0 - self.alpha) * v_i + self.alpha * sigma.sum())
645672
/ self_loop_factor;
646673

0 commit comments

Comments
 (0)