Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions src/backend/libc/thread/syscalls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -709,6 +709,28 @@ pub(crate) fn sched_getcpu() -> usize {
r as usize
}

#[cfg(linux_kernel)]
#[inline]
pub(crate) fn getcpu() -> (usize, usize) {
let (mut cpu, mut node): (core::mem::MaybeUninit<u32>, core::mem::MaybeUninit<u32>) = (
core::mem::MaybeUninit::uninit(),
core::mem::MaybeUninit::uninit(),
);

let r = unsafe {
libc::syscall(
libc::SYS_getcpu,
cpu.as_mut_ptr(),
node.as_mut_ptr(),
core::ptr::null::<libc::c_void>(),
)
};

debug_assert!(r >= 0);

unsafe { (cpu.assume_init() as usize, node.assume_init() as usize) }
}

#[cfg(any(freebsdlike, linux_kernel, target_os = "fuchsia"))]
#[inline]
pub(crate) fn sched_getaffinity(pid: Option<Pid>, cpuset: &mut RawCpuSet) -> io::Result<()> {
Expand Down
30 changes: 28 additions & 2 deletions src/backend/linux_raw/thread/syscalls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,7 @@ pub(crate) fn setgroups_thread(gids: &[crate::ugid::Gid]) -> io::Result<()> {
unsafe { ret(syscall_readonly!(__NR_setgroups, len, addr)) }
}

// `sched_getcpu` has special optimizations via the vDSO on some architectures.
// `sched_getcpu` and `getcpu` have special optimizations via the vDSO on some architectures.
#[cfg(any(
target_arch = "x86_64",
target_arch = "x86",
Expand All @@ -455,7 +455,30 @@ pub(crate) fn setgroups_thread(gids: &[crate::ugid::Gid]) -> io::Result<()> {
target_arch = "powerpc64",
target_arch = "s390x"
))]
pub(crate) use crate::backend::vdso_wrappers::sched_getcpu;
pub(crate) use crate::backend::vdso_wrappers::{getcpu, sched_getcpu};

// `getcpu` on platforms without a vDSO entry for it.
#[cfg(not(any(
target_arch = "x86_64",
target_arch = "x86",
target_arch = "riscv64",
target_arch = "powerpc",
target_arch = "powerpc64",
target_arch = "s390x"
)))]
#[inline]
pub(crate) fn getcpu() -> (usize, usize) {
let mut cpu = MaybeUninit::<u32>::uninit();
let mut numa_node = MaybeUninit::<u32>::uninit();

unsafe {
let r = ret(syscall!(__NR_getcpu, &mut cpu, &mut numa_node, zero()));

debug_assert!(r.is_ok());

(cpu.assume_init() as usize, numa_node.assume_init() as usize)
}
}

// `sched_getcpu` on platforms without a vDSO entry for it.
#[cfg(not(any(
Expand All @@ -468,6 +491,9 @@ pub(crate) use crate::backend::vdso_wrappers::sched_getcpu;
)))]
#[inline]
pub(crate) fn sched_getcpu() -> usize {
// We should not implement this function by using the `getcpu` function definded above
// because we want to provide exactly one pointer to the system call.

let mut cpu = MaybeUninit::<u32>::uninit();
unsafe {
let r = ret(syscall!(__NR_getcpu, &mut cpu, zero(), zero()));
Expand Down
33 changes: 33 additions & 0 deletions src/backend/linux_raw/vdso_wrappers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,35 @@ pub(crate) fn clock_gettime_dynamic(id: DynamicClockId<'_>) -> io::Result<Timesp
}
}

#[cfg(feature = "thread")]
#[cfg(any(
target_arch = "x86_64",
target_arch = "x86",
target_arch = "riscv64",
target_arch = "powerpc",
target_arch = "powerpc64",
target_arch = "s390x",
))]
#[inline]
pub(crate) fn getcpu() -> (usize, usize) {
// SAFETY: `GETCPU` contains either null or the address of a function with
// an ABI like libc `getcpu`, and calling it has the side effect of writing
// to the result buffers, and no others.
unsafe {
let mut cpu = MaybeUninit::<u32>::uninit();
let mut numa_node = MaybeUninit::<u32>::uninit();
let callee = match transmute(GETCPU.load(Relaxed)) {
Some(callee) => callee,
None => init_getcpu(),
};
let r0 = callee(cpu.as_mut_ptr(), numa_node.as_mut_ptr(), null_mut());

debug_assert_eq!(r0, 0);

(cpu.assume_init() as usize, numa_node.assume_init() as usize)
}
}

#[cfg(feature = "thread")]
#[cfg(any(
target_arch = "x86_64",
Expand All @@ -128,6 +157,9 @@ pub(crate) fn clock_gettime_dynamic(id: DynamicClockId<'_>) -> io::Result<Timesp
))]
#[inline]
pub(crate) fn sched_getcpu() -> usize {
// We should not implement this function by using the `getcpu` function definded above
// because we want to provide exactly one pointer to the system call.

// SAFETY: `GETCPU` contains either null or the address of a function with
// an ABI like libc `getcpu`, and calling it has the side effect of writing
// to the result buffers, and no others.
Expand Down Expand Up @@ -308,6 +340,7 @@ fn init_clock_gettime() -> ClockGettimeType {
target_arch = "s390x",
))]
#[cold]
#[inline(never)]
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason for inline(never) here? It's already marked #[cold], which should have the desired effect. It a compiler decides it really wants to inline this, even given what we've told it, that seems fine.

Copy link
Copy Markdown
Author

@Eugene-Usachev Eugene-Usachev Feb 21, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am still sure that non-inlining provides a better performance, but I agree with both points: the difference is tiny and the compiler can do it as it wants. But I want to explicitly tell the compiler to generate exactly one call method for the calling this function that should be called only once. If you don't like this, I can't roll back this change. After all, the main goal of the PR is adding getcpu.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not so sure, myself. In a compiler with hot/cold basic block partitioning, inlining and then moving the cold blocks away from the hot path achieves a similar result to not-inlining, except that the compiler can more easily use an effectively custom calling convention. I don't know how much it matters in the code in question here, but in general, I don't like prohibiting compilers from doing things unless I have specific reasons.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have removed this line.

fn init_getcpu() -> GetcpuType {
init();
// SAFETY: Load the function address from static storage that we just
Expand Down
16 changes: 8 additions & 8 deletions src/buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -381,8 +381,8 @@ mod tests {
let nread = read(&input, &mut buf).unwrap();
assert_eq!(nread, buf.len());
assert_eq!(
&buf[..58],
b"//! Utilities for functions that return data via buffers.\n"
&buf[..57],
b"//! Utilities for functions that return data via buffers."
Comment thread
Eugene-Usachev marked this conversation as resolved.
);
input.seek(SeekFrom::End(-1)).unwrap();
let nread = read(&input, &mut buf).unwrap();
Expand All @@ -407,13 +407,13 @@ mod tests {
let (init, uninit) = read(&input, &mut buf).unwrap();
assert_eq!(uninit.len(), 0);
assert_eq!(
&init[..58],
b"//! Utilities for functions that return data via buffers.\n"
&init[..57],
b"//! Utilities for functions that return data via buffers."
);
assert_eq!(init.len(), buf.len());
assert_eq!(
unsafe { core::mem::transmute::<&mut [MaybeUninit<u8>], &mut [u8]>(&mut buf[..58]) },
b"//! Utilities for functions that return data via buffers.\n"
unsafe { core::mem::transmute::<&mut [MaybeUninit<u8>], &mut [u8]>(&mut buf[..57]) },
b"//! Utilities for functions that return data via buffers."
);
input.seek(SeekFrom::End(-1)).unwrap();
let (init, uninit) = read(&input, &mut buf).unwrap();
Expand All @@ -440,8 +440,8 @@ mod tests {
assert_eq!(nread, buf.capacity());
assert_eq!(nread, buf.len());
assert_eq!(
&buf[..58],
b"//! Utilities for functions that return data via buffers.\n"
&buf[..57],
b"//! Utilities for functions that return data via buffers."
);
buf.clear();
input.seek(SeekFrom::End(-1)).unwrap();
Expand Down
26 changes: 24 additions & 2 deletions src/thread/sched.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ use core::{fmt, hash};
/// - [Linux]
///
/// [Linux]: https://man7.org/linux/man-pages/man3/CPU_SET.3.html
/// [`sched_setaffinity`]: crate::thread::sched_setaffinity
/// [`sched_getaffinity`]: crate::thread::sched_getaffinity
/// [`sched_setaffinity`]: sched_setaffinity
/// [`sched_getaffinity`]: sched_getaffinity
#[repr(transparent)]
#[derive(Clone, Copy)]
pub struct CpuSet {
Expand Down Expand Up @@ -159,3 +159,25 @@ pub fn sched_getaffinity(pid: Option<Pid>) -> io::Result<CpuSet> {
pub fn sched_getcpu() -> usize {
backend::thread::syscalls::sched_getcpu()
}

/// `sched_getcpu()`—Get the CPU and NUMA node that the current thread is currently on.
///
/// # Example
///
/// ```rust
/// use rustix::thread::getcpu;
///
/// let (core, numa_node) = getcpu();
///
/// println!("The current thread was on the {core} core and {numa_node} numa node.");
/// ```
///
/// # References
/// - [Linux]
///
/// [Linux]: https://man7.org/linux/man-pages/man2/getcpu.2.html
#[cfg(linux_kernel)]
#[inline]
pub fn getcpu() -> (usize, usize) {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

According to the Linux docs, the values written by getcpu have type unsigned int.

   int getcpu(unsigned int *_Nullable cpu, unsigned int *_Nullable node);

Would it be better to reflect them here as u32, rather than usize?

I see that sched_getcpu already returns usize, but that appears to be an error.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried to do the same as sched_getcpu. To be honest, I am not sure what users prefer more here. I use numa_node: usize in my code, but I can't say everyone does it. If you want me to change it I can do it but I think it is not important.

backend::thread::syscalls::getcpu()
}
Loading