77//! Parallel Gauss–Seidel PageRank.
88//!
99//! This implementation uses two vectors of doubles (one for the current
10- //! approximation, the order for the inverses of outdegrees) and,
10+ //! approximation, the other for the inverses of outdegrees) and,
1111//! experimentally, converges faster than other implementations. Moreover, it
1212//! scales linearly with the number of cores.
1313//!
3737//!
3838//! to which we can apply the Gauss–Seidel method.
3939//!
40- //! By default, when a preference vector is set via
41- //! [`preference`](PageRank::preference), we compute _strongly preferential_
42- //! PageRank (**u** = **v**). To obtain _weakly preferential_ PageRank, set an
43- //! explicit uniform
44- //! [`dangling_distribution`](PageRank::dangling_distribution). Setting
45- //! [`pseudo_rank`](PageRank::pseudo_rank) to `true` zeroes out the
46- //! dangling-node contribution entirely (**u** = **0**), yielding a
47- //! non-stochastic vector sometimes called _pseudo-rank_.
40+ //! The [`mode`](PageRank::mode) setter selects among three variants:
41+ //!
42+ //! - [`StronglyPreferential`](Mode::StronglyPreferential) (the default):
43+ //! **u** = **v**, so the preference vector doubles as the dangling-node
44+ //! distribution.
45+ //! - [`WeaklyPreferential`](Mode::WeaklyPreferential): **u** = **1**/*n*, so
46+ //! dangling nodes distribute their rank uniformly regardless of the
47+ //! preference vector.
48+ //! - [`PseudoRank`](Mode::PseudoRank): **u** = **0**, zeroing out the
49+ //! dangling-node contribution entirely and yielding a non-stochastic vector
50+ //! sometimes called _pseudorank_.
4851//!
4952//! # The Gauss–Seidel method
5053//!
5154//! The formula above can be rewritten as the linear system
5255//!
53- //! > **x** ( *I* − α (*P* + **u **ᵀ **d **) ) = (1 − α) **v**
56+ //! > **x** ( *I* − α (*P* + **d **ᵀ **u **) ) = (1 − α) **v**
5457//!
55- //! that is, **x** *M* = **b** where *M* = *I* − α (*P* + **u **ᵀ **d **) and
58+ //! that is, **x** *M* = **b** where *M* = *I* − α (*P* + **d **ᵀ **u **) and
5659//! **b** = (1 − α) **v**. The [Gauss–Seidel method] solves this system by
5760//! updating a _single_ vector in place:
5861//!
@@ -259,6 +262,35 @@ pub mod preds {
259262 }
260263}
261264
265+ /// Selects the PageRank variant to compute.
266+ ///
267+ /// See the [module-level documentation](self) for the mathematical details.
268+ #[ derive( Debug , Clone , Copy , PartialEq , Eq , Default ) ]
269+ pub enum Mode {
270+ /// Uses the preference vector **v** as the dangling-node distribution
271+ /// (**u** = **v**). This is the default.
272+ #[ default]
273+ StronglyPreferential ,
274+ /// Uses a uniform dangling-node distribution (**u** = **1**/*n*) regardless
275+ /// of the preference vector.
276+ WeaklyPreferential ,
277+ /// Zeroes out the dangling-node contribution (**u** = **0**), yielding in
278+ /// the case there are dangling nodes a non-stochastic vector which however
279+ /// is identical to the [strongly preferential](Mode::StronglyPreferential)
280+ /// variant modulo normalization.
281+ PseudoRank ,
282+ }
283+
284+ impl std:: fmt:: Display for Mode {
285+ fn fmt ( & self , f : & mut std:: fmt:: Formatter < ' _ > ) -> std:: fmt:: Result {
286+ match self {
287+ Mode :: StronglyPreferential => f. write_str ( "strongly preferential" ) ,
288+ Mode :: WeaklyPreferential => f. write_str ( "weakly preferential" ) ,
289+ Mode :: PseudoRank => f. write_str ( "pseudorank" ) ,
290+ }
291+ }
292+ }
293+
262294use dsi_progress_logger:: { ConcurrentProgressLog , ProgressLog , no_logging} ;
263295use kahan:: KahanSum ;
264296use lender:: prelude:: * ;
@@ -283,13 +315,56 @@ use webgraph::utils::Granularity;
283315///
284316/// The constructor takes the _transpose_ of the graph, because the algorithm
285317/// needs to iterate over the predecessors of each node.
318+ ///
319+ /// # Examples
320+ ///
321+ /// Default PageRank (strongly preferential, α = 0.85) on a small graph:
322+ ///
323+ /// ```
324+ /// use webgraph::graphs::vec_graph::VecGraph;
325+ /// use webgraph_algo::rank::pagerank::{PageRank, preds};
326+ ///
327+ /// // Build the transpose of a 5-node graph:
328+ /// // 0 → 1, 0 → 2, 1 → 2, 2 → 0, 3 → 0, 4 → 3
329+ /// let mut gt = VecGraph::empty(5);
330+ /// gt.add_arcs([(1, 0), (2, 0), (2, 1), (0, 2), (0, 3), (3, 4)]);
331+ ///
332+ /// let mut pr = PageRank::new(>);
333+ /// pr.run(preds::L1Norm::try_from(1E-9).unwrap());
334+ ///
335+ /// assert_eq!(pr.rank().len(), 5);
336+ /// assert!((pr.rank().iter().sum::<f64>() - 1.0).abs() < 1E-9);
337+ /// ```
338+ ///
339+ /// Weakly preferential PageRank with a custom preference vector:
340+ ///
341+ /// ```
342+ /// use webgraph::graphs::vec_graph::VecGraph;
343+ /// use webgraph_algo::rank::pagerank::{Mode, PageRank, preds};
344+ ///
345+ /// let mut gt = VecGraph::empty(5);
346+ /// gt.add_arcs([(1, 0), (2, 0), (2, 1), (0, 2), (0, 3), (3, 4)]);
347+ ///
348+ /// // Custom preference: favor node 0
349+ /// let pref = [0.5, 0.2, 0.1, 0.1, 0.1];
350+ ///
351+ /// let mut pr = PageRank::new(>);
352+ /// pr.alpha(0.9)
353+ /// .preference(Some(&pref))
354+ /// .mode(Mode::WeaklyPreferential);
355+ /// pr.run(preds::L1Norm::try_from(1E-9).unwrap());
356+ ///
357+ /// // Node 0 has the highest rank
358+ /// assert!(pr.rank()[0] > pr.rank()[1]);
359+ /// assert_eq!(pr.rank().len(), 5);
360+ /// assert!((pr.rank().iter().sum::<f64>() - 1.0).abs() < 1E-9);
361+ /// ```
286362pub struct PageRank < ' a , G : RandomAccessGraph + Sync > {
287363 transpose : & ' a G ,
288364 alpha : f64 ,
289365 inv_outdegrees : Option < Box < [ f64 ] > > ,
290366 preference : Option < & ' a [ f64 ] > ,
291- dangling_dist : Option < & ' a [ f64 ] > ,
292- pseudo_rank : bool ,
367+ mode : Mode ,
293368 granularity : Granularity ,
294369 norm_delta : f64 ,
295370
@@ -310,8 +385,7 @@ impl<'a, G: RandomAccessGraph + Sync> PageRank<'a, G> {
310385 alpha : 0.85 ,
311386 inv_outdegrees : None ,
312387 preference : None ,
313- dangling_dist : None ,
314- pseudo_rank : false ,
388+ mode : Mode :: default ( ) ,
315389 granularity : Granularity :: default ( ) ,
316390 norm_delta : f64:: INFINITY ,
317391 rank,
@@ -336,18 +410,17 @@ impl<'a, G: RandomAccessGraph + Sync> PageRank<'a, G> {
336410
337411 /// Sets the preference (personalization) vector.
338412 ///
339- /// When set, and no explicit
340- /// [`dangling_distribution`](Self::dangling_distribution) is provided, the
341- /// preference vector is also used as the dangling-node distribution
342- /// (strongly preferential PageRank).
413+ /// When set, the preference vector is also used as the dangling-node
414+ /// distribution in [`StronglyPreferential`](Mode::StronglyPreferential)
415+ /// mode.
343416 ///
344417 /// Pass `None` to revert to the uniform preference (1/*n*).
345418 ///
346419 /// # Panics
347420 ///
348- /// Panics if the length of the vector does not match the number of nodes,
349- /// In test mode, we also call
350- /// [`assert_stochastic`](Self::assert_stochastic) .
421+ /// Panics if the length of the vector does not match the number of nodes.
422+ /// In test mode, we also check for stochasticity (nonnegative entries
423+ /// summing to 1 within a tolerance of 1E-6) and panic if the check fails .
351424 pub fn preference ( & mut self , preference : Option < & ' a [ f64 ] > ) -> & mut Self {
352425 if let Some ( v) = preference {
353426 let n = self . transpose . num_nodes ( ) ;
@@ -364,41 +437,9 @@ impl<'a, G: RandomAccessGraph + Sync> PageRank<'a, G> {
364437 self
365438 }
366439
367- /// Sets the dangling-node distribution.
368- ///
369- /// This overrides the default behaviour (which uses the preference vector,
370- /// or the uniform distribution if no preference is set).
371- ///
372- /// Pass `None` to revert to the default.
373- ///
374- /// # Panics
375- ///
376- /// Panics if the length of the vector does not match the number of nodes.
377- /// In test mode, we also call
378- /// [`assert_stochastic`](Self::assert_stochastic).
379- pub fn dangling_distribution ( & mut self , distribution : Option < & ' a [ f64 ] > ) -> & mut Self {
380- if let Some ( v) = distribution {
381- let n = self . transpose . num_nodes ( ) ;
382- assert_eq ! (
383- v. len( ) ,
384- n,
385- "Dangling distribution length ({}) does not match the number of nodes ({n})" ,
386- v. len( )
387- ) ;
388- #[ cfg( test) ]
389- Self :: assert_stochastic ( v, "dangling distribution" ) ;
390- }
391- self . dangling_dist = distribution;
392- self
393- }
394-
395- /// Sets pseudo-rank mode.
396- ///
397- /// When `true`, the contribution of dangling nodes is zeroed out, yielding a
398- /// non-stochastic vector. The resulting vector differs from strongly
399- /// preferential PageRank only by a multiplicative factor of 1 − α.
400- pub fn pseudo_rank ( & mut self , pseudo_rank : bool ) -> & mut Self {
401- self . pseudo_rank = pseudo_rank;
440+ /// Sets the PageRank [mode](Mode).
441+ pub fn mode ( & mut self , mode : Mode ) -> & mut Self {
442+ self . mode = mode;
402443 self
403444 }
404445
@@ -449,7 +490,7 @@ impl<'a, G: RandomAccessGraph + Sync> PageRank<'a, G> {
449490 /// making thus possible to customize the logs.
450491 ///
451492 /// It is possible to specify either `pl` or `cpl` as
452- /// [`no_logging![]`](dsi_progress_logger::no_logging) if don't want to log
493+ /// [`no_logging![]`](dsi_progress_logger::no_logging) if you don't want to log
453494 /// the corresponding part of the computation, albeit having the latter one
454495 /// and not the first one will lead to confusing logs.
455496 pub fn run_with_logging (
@@ -463,14 +504,7 @@ impl<'a, G: RandomAccessGraph + Sync> PageRank<'a, G> {
463504 return ;
464505 }
465506
466- let mode = if self . pseudo_rank {
467- "pseudo-rank"
468- } else if self . dangling_dist . is_some ( ) || self . preference . is_none ( ) {
469- "weakly preferential"
470- } else {
471- "strongly preferential"
472- } ;
473- log:: info!( "Mode: {}" , mode) ;
507+ log:: info!( "Mode: {}" , self . mode) ;
474508 log:: info!( "Alpha: {}" , self . alpha) ;
475509 log:: info!(
476510 "Preference: {}" ,
@@ -552,13 +586,6 @@ impl<'a, G: RandomAccessGraph + Sync> PageRank<'a, G> {
552586 self . alpha
553587 ) ) ;
554588
555- // Resolve the effective dangling distribution: explicit > preference > uniform
556- let dangling_dist: Option < & [ f64 ] > = if self . pseudo_rank {
557- None // unused
558- } else {
559- self . dangling_dist . or ( self . preference )
560- } ;
561-
562589 loop {
563590 let norm_delta_accum = Mutex :: new ( 0.0f64 ) ;
564591 let dangling_rank_accum = Mutex :: new ( 0.0f64 ) ;
@@ -602,18 +629,28 @@ impl<'a, G: RandomAccessGraph + Sync> PageRank<'a, G> {
602629 }
603630 }
604631
632+ // Preference and dangling distribution for node i
633+ let v_i = match self . preference {
634+ Some ( v) => v[ i] ,
635+ None => inv_n,
636+ } ;
637+ // u_i = v_i in strongly preferential mode,
638+ // u_i = 1/n in weakly preferential mode.
639+ let u_i = match self . mode {
640+ Mode :: StronglyPreferential => v_i,
641+ Mode :: WeaklyPreferential => inv_n,
642+ Mode :: PseudoRank => 0.0 , // unused, but avoids branching
643+ } ;
644+
605645 // Compute self-loop correction and self dangling rank
606646 let ( self_dangling_rank, self_loop_factor) = if inv_outdegrees[ i] == 0.0
607647 {
608648 // Dangling node
609649 let sdr = rank_sync[ i] . get( ) ;
610- let slf = if self . pseudo_rank {
650+ let slf = if self . mode == Mode :: PseudoRank {
611651 1.0
612652 } else {
613- match dangling_dist {
614- Some ( u) => 1.0 - self . alpha * u[ i] ,
615- None => 1.0 - self . alpha * inv_n,
616- }
653+ 1.0 - self . alpha * u_i
617654 } ;
618655 ( sdr, slf)
619656 } else {
@@ -627,20 +664,10 @@ impl<'a, G: RandomAccessGraph + Sync> PageRank<'a, G> {
627664 } ;
628665
629666 // Add dangling rank contribution
630- if !self . pseudo_rank {
631- let u_i = match dangling_dist {
632- Some ( u) => u[ i] ,
633- None => inv_n,
634- } ;
667+ if self . mode != Mode :: PseudoRank {
635668 sigma += ( dangling_rank - self_dangling_rank) * u_i;
636669 }
637670
638- // Preference contribution
639- let v_i = match self . preference {
640- Some ( v) => v[ i] ,
641- None => inv_n,
642- } ;
643-
644671 let new_rank = ( ( 1.0 - self . alpha) * v_i + self . alpha * sigma. sum( ) )
645672 / self_loop_factor;
646673
0 commit comments