Skip to content

Commit 40ed9ed

Browse files
authored
Change epoll's timeout to Option<&Timespec>. (#1324)
This eliminates the last place in rustix's public API that exposed a time value as a `c_int` milliseconds. On Linux, the syscall needed to pass a full timespec is only available on Linux >= 5.11, so add a "linux_5_11" cargo feature to enable it.
1 parent 3cbb4c3 commit 40ed9ed

10 files changed

Lines changed: 176 additions & 69 deletions

File tree

Cargo.toml

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -208,11 +208,14 @@ use-explicitly-provided-auxv = []
208208

209209
# OS compatibility features
210210

211-
# Optimize for Linux 4.11 or later
211+
# Specialize for Linux 4.11 or later
212212
linux_4_11 = []
213213

214-
# Enable all optimizations for the latest Linux versions.
215-
linux_latest = ["linux_4_11"]
214+
# Specialize for Linux 5.11 or later
215+
linux_5_11 = ["linux_4_11"]
216+
217+
# Enable all specializations for the latest Linux versions.
218+
linux_latest = ["linux_5_11"]
216219

217220
# Enable features which depend on the Rust global allocator, such as functions
218221
# that return owned strings or `Vec`s.

src/backend/libc/event/syscalls.rs

Lines changed: 65 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -200,21 +200,7 @@ pub(crate) fn poll(fds: &mut [PollFd<'_>], timeout: Option<&Timespec>) -> io::Re
200200
{
201201
let timeout = match timeout {
202202
None => -1,
203-
Some(timeout) => {
204-
// Convert from `Timespec` to `c_int` milliseconds.
205-
let secs = timeout.tv_sec;
206-
if secs < 0 {
207-
return Err(io::Errno::INVAL);
208-
}
209-
secs.checked_mul(1000)
210-
.and_then(|millis| {
211-
// Add the nanoseconds, converted to millis, rounding
212-
// up. With Rust 1.73.0 this can use `div_ceil`.
213-
millis.checked_add((i64::from(timeout.tv_nsec) + 999_999) / 1_000_000)
214-
})
215-
.and_then(|millis| c::c_int::try_from(millis).ok())
216-
.ok_or(io::Errno::INVAL)?
217-
}
203+
Some(timeout) => timeout.as_c_int_millis().ok_or(io::Errno::INVAL)?,
218204
};
219205
ret_c_int(unsafe { c::poll(fds.as_mut_ptr().cast(), nfds, timeout) })
220206
.map(|nready| nready as usize)
@@ -531,9 +517,72 @@ pub(crate) fn epoll_del(epoll: BorrowedFd<'_>, source: BorrowedFd<'_>) -> io::Re
531517
pub(crate) fn epoll_wait(
532518
epoll: BorrowedFd<'_>,
533519
events: &mut [MaybeUninit<crate::event::epoll::Event>],
534-
timeout: c::c_int,
520+
timeout: Option<&Timespec>,
535521
) -> io::Result<usize> {
522+
// If we're on Linux >= 5.11 and a libc that has an `epoll_pwait2`
523+
// function, and it's y2038-safe, use it.
524+
#[cfg(all(
525+
linux_kernel,
526+
feature = "linux_5_11",
527+
target_env = "gnu",
528+
not(fix_y2038)
529+
))]
530+
unsafe {
531+
weak! {
532+
fn epoll_pwait2(
533+
c::c_int,
534+
*mut c::epoll_event,
535+
c::c_int,
536+
*const c::timespec,
537+
*const c::sigset_t
538+
) -> c::c_int
539+
}
540+
541+
if let Some(epoll_pwait2_func) = epoll_pwait2.get() {
542+
return ret_u32(epoll_pwait2_func(
543+
borrowed_fd(epoll),
544+
events.as_mut_ptr().cast::<c::epoll_event>(),
545+
events.len().try_into().unwrap_or(i32::MAX),
546+
crate::utils::option_as_ptr(timeout).cast(),
547+
null(),
548+
))
549+
.map(|i| i as usize);
550+
}
551+
}
552+
553+
// If we're on Linux >= 5.11, use `epoll_pwait2` via `libc::syscall`.
554+
#[cfg(all(linux_kernel, feature = "linux_5_11"))]
536555
unsafe {
556+
use linux_raw_sys::general::__kernel_timespec as timespec;
557+
558+
syscall! {
559+
fn epoll_pwait2(
560+
epfd: c::c_int,
561+
events: *mut c::epoll_event,
562+
maxevents: c::c_int,
563+
timeout: *const timespec,
564+
sigmask: *const c::sigset_t
565+
) via SYS_epoll_pwait2 -> c::c_int
566+
}
567+
568+
ret_u32(epoll_pwait2(
569+
borrowed_fd(epoll),
570+
events.as_mut_ptr().cast::<c::epoll_event>(),
571+
events.len().try_into().unwrap_or(i32::MAX),
572+
crate::utils::option_as_ptr(timeout).cast(),
573+
null(),
574+
))
575+
.map(|i| i as usize)
576+
}
577+
578+
// Othewise just use `epoll_wait`.
579+
#[cfg(not(all(linux_kernel, feature = "linux_5_11")))]
580+
unsafe {
581+
let timeout = match timeout {
582+
None => -1,
583+
Some(timeout) => timeout.as_c_int_millis().ok_or(io::Errno::INVAL)?,
584+
};
585+
537586
ret_u32(c::epoll_wait(
538587
borrowed_fd(epoll),
539588
events.as_mut_ptr().cast::<c::epoll_event>(),

src/backend/libc/event/windows_syscalls.rs

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -13,21 +13,7 @@ pub(crate) fn poll(fds: &mut [PollFd<'_>], timeout: Option<&Timespec>) -> io::Re
1313

1414
let timeout = match timeout {
1515
None => -1,
16-
Some(timeout) => {
17-
// Convert from `Timespec` to `c_int` milliseconds.
18-
let secs = timeout.tv_sec;
19-
if secs < 0 {
20-
return Err(io::Errno::INVAL);
21-
}
22-
secs.checked_mul(1000)
23-
.and_then(|millis| {
24-
// Add the nanoseconds, converted to millis, rounding up.
25-
// With Rust 1.73.0 this can use `div_ceil`.
26-
millis.checked_add((i64::from(timeout.tv_nsec) + 999_999) / 1_000_000)
27-
})
28-
.and_then(|millis| c::c_int::try_from(millis).ok())
29-
.ok_or(io::Errno::INVAL)?
30-
}
16+
Some(timeout) => timeout.as_c_int_millis().ok_or(io::Errno::INVAL)?,
3117
};
3218

3319
ret_c_int(unsafe { c::poll(fds.as_mut_ptr().cast(), nfds, timeout) })

src/backend/libc/fs/syscalls.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2009,7 +2009,7 @@ pub(crate) fn statx(
20092009
}
20102010
}
20112011

2012-
#[cfg(linux_kernel)]
2012+
#[cfg(all(linux_kernel, not(feature = "linux_4_11")))]
20132013
#[inline]
20142014
pub(crate) fn is_statx_available() -> bool {
20152015
unsafe {

src/backend/linux_raw/event/syscalls.rs

Lines changed: 48 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,16 @@
55
//! See the `rustix::backend` module documentation for details.
66
#![allow(unsafe_code, clippy::undocumented_unsafe_blocks)]
77

8-
#[cfg(feature = "alloc")]
9-
use crate::backend::c;
108
use crate::backend::conv::{
119
by_ref, c_int, c_uint, opt_ref, ret, ret_c_int, ret_error, ret_owned_fd, ret_usize, size_of,
1210
slice_mut, zero,
1311
};
1412
use crate::event::{epoll, EventfdFlags, FdSetElement, PollFd, Timespec};
1513
use crate::fd::{BorrowedFd, OwnedFd};
1614
use crate::io;
17-
use crate::utils::{as_mut_ptr, option_as_ptr};
15+
use crate::utils::as_mut_ptr;
16+
#[cfg(feature = "linux_5_11")]
17+
use crate::utils::option_as_ptr;
1818
#[cfg(feature = "alloc")]
1919
use core::mem::MaybeUninit;
2020
use core::ptr::null_mut;
@@ -24,14 +24,12 @@ use linux_raw_sys::general::{kernel_sigset_t, EPOLL_CTL_ADD, EPOLL_CTL_DEL, EPOL
2424
pub(crate) fn poll(fds: &mut [PollFd<'_>], timeout: Option<&Timespec>) -> io::Result<usize> {
2525
let (fds_addr_mut, fds_len) = slice_mut(fds);
2626

27-
let timeout = option_as_ptr(timeout);
28-
2927
unsafe {
3028
ret_usize(syscall!(
3129
__NR_ppoll,
3230
fds_addr_mut,
3331
fds_len,
34-
opt_ref(timeout.as_ref()),
32+
opt_ref(timeout),
3533
zero(),
3634
size_of::<kernel_sigset_t, _>()
3735
))
@@ -181,33 +179,53 @@ pub(crate) fn epoll_del(epfd: BorrowedFd<'_>, fd: BorrowedFd<'_>) -> io::Result<
181179
pub(crate) fn epoll_wait(
182180
epfd: BorrowedFd<'_>,
183181
events: &mut [MaybeUninit<crate::event::epoll::Event>],
184-
timeout: c::c_int,
182+
timeout: Option<&Timespec>,
185183
) -> io::Result<usize> {
186184
let (buf_addr_mut, buf_len) = slice_mut(events);
187-
// SAFETY: `__NR_epoll_wait` doesn't access any user memory outside of
188-
// the `events` array.
189-
#[cfg(not(any(target_arch = "aarch64", target_arch = "riscv64")))]
190-
unsafe {
191-
ret_usize(syscall!(
192-
__NR_epoll_wait,
193-
epfd,
194-
buf_addr_mut,
195-
buf_len,
196-
c_int(timeout)
197-
))
185+
186+
// If we have Linux 5.11, use `epoll_pwait2`, which takes a `timespec`.
187+
#[cfg(feature = "linux_5_11")]
188+
{
189+
let timeout = option_as_ptr(timeout);
190+
191+
// SAFETY: `__NR_epoll_pwait2` doesn't access any user memory outside of
192+
// the `events` array, as we don't pass it a `sigmask`.
193+
unsafe {
194+
ret_usize(syscall!(
195+
__NR_epoll_pwait2,
196+
epfd,
197+
buf_addr_mut,
198+
buf_len,
199+
timeout,
200+
zero()
201+
))
202+
}
198203
}
199-
// SAFETY: `__NR_epoll_pwait` doesn't access any user memory outside of
200-
// the `events` array, as we don't pass it a `sigmask`.
201-
#[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))]
202-
unsafe {
203-
ret_usize(syscall!(
204-
__NR_epoll_pwait,
205-
epfd,
206-
buf_addr_mut,
207-
buf_len,
208-
c_int(timeout),
209-
zero()
210-
))
204+
205+
// If we don't have Linux 5.11, use `epoll_pwait`, which takes a `c_int`.
206+
//
207+
// We do this unconditionally, rather than trying `epoll_pwait2` and
208+
// falling back on `Errno::NOSYS`, because seccomp configurations will
209+
// sometimes abort the process on syscalls they don't recognize.
210+
#[cfg(not(feature = "linux_5_11"))]
211+
{
212+
let timeout = match timeout {
213+
None => -1,
214+
Some(timeout) => timeout.as_c_int_millis().ok_or(io::Errno::INVAL)?,
215+
};
216+
217+
// SAFETY: `__NR_epoll_pwait` doesn't access any user memory outside of
218+
// the `events` array, as we don't pass it a `sigmask`.
219+
unsafe {
220+
ret_usize(syscall!(
221+
__NR_epoll_pwait,
222+
epfd,
223+
buf_addr_mut,
224+
buf_len,
225+
c_int(timeout),
226+
zero()
227+
))
228+
}
211229
}
212230
}
213231

src/event/epoll.rs

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
//! // Process events.
3939
//! let mut event_list = Vec::with_capacity(4);
4040
//! loop {
41-
//! epoll::wait(&epoll, &mut event_list, -1)?;
41+
//! epoll::wait(&epoll, &mut event_list, None)?;
4242
//! for event in &event_list {
4343
//! let target = event.data;
4444
//! if target.u64() == 1 {
@@ -78,6 +78,8 @@ use crate::backend::event::syscalls;
7878
use crate::fd::{AsFd, OwnedFd};
7979
use crate::io;
8080
#[cfg(feature = "alloc")]
81+
use crate::timespec::Timespec;
82+
#[cfg(feature = "alloc")]
8183
use alloc::vec::Vec;
8284
use core::ffi::c_void;
8385
use core::hash::{Hash, Hasher};
@@ -190,6 +192,12 @@ pub fn delete<EpollFd: AsFd, SourceFd: AsFd>(epoll: EpollFd, source: SourceFd) -
190192
/// For each event of interest, an element is written to `events`. On
191193
/// success, this returns the number of written elements.
192194
///
195+
/// Linux versions older than 5.11 (those that don't support `epoll_pwait2`)
196+
/// don't support timeouts greater than `c_int::MAX` milliseconds; if an
197+
/// unsupported timeout is passed, this function fails with
198+
/// [`io::Errno::INVAL`]. Enable the "linux_5_11" feature to enable the full
199+
/// range of timeouts.
200+
///
193201
/// # References
194202
/// - [Linux]
195203
/// - [illumos]
@@ -202,7 +210,7 @@ pub fn delete<EpollFd: AsFd, SourceFd: AsFd>(epoll: EpollFd, source: SourceFd) -
202210
pub fn wait<EpollFd: AsFd>(
203211
epoll: EpollFd,
204212
event_list: &mut Vec<Event>,
205-
timeout: crate::ffi::c_int,
213+
timeout: Option<&Timespec>,
206214
) -> io::Result<()> {
207215
// SAFETY: We're calling `epoll_wait` via FFI and we know how it
208216
// behaves.

src/timespec.rs

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ use core::num::TryFromIntError;
77
use core::ops::{Add, AddAssign, Neg, Sub, SubAssign};
88
use core::time::Duration;
99

10-
#[cfg(not(fix_y2038))]
1110
use crate::backend::c;
1211
#[allow(unused)]
1312
use crate::ffi;
@@ -128,6 +127,21 @@ impl Timespec {
128127
None
129128
}
130129
}
130+
131+
/// Convert from `Timespec` to `c::c_int` milliseconds, rounded up.
132+
pub(crate) fn as_c_int_millis(&self) -> Option<c::c_int> {
133+
let secs = self.tv_sec;
134+
if secs < 0 {
135+
return None;
136+
}
137+
secs.checked_mul(1000)
138+
.and_then(|millis| {
139+
// Add the nanoseconds, converted to millis, rounding
140+
// up. With Rust 1.73.0 this can use `div_ceil`.
141+
millis.checked_add((i64::from(self.tv_nsec) + 999_999) / 1_000_000)
142+
})
143+
.and_then(|millis| c::c_int::try_from(millis).ok())
144+
}
131145
}
132146

133147
impl TryFrom<Timespec> for Duration {

tests/event/epoll.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ fn server(ready: Arc<(Mutex<u16>, Condvar)>) {
4040

4141
let mut event_list = Vec::with_capacity(4);
4242
loop {
43-
epoll::wait(&epoll, &mut event_list, -1).unwrap();
43+
epoll::wait(&epoll, &mut event_list, None).unwrap();
4444
for event in &event_list {
4545
let target = event.data;
4646
if target.u64() == 1 {

tests/event/epoll_timeout.rs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
use rustix::event::{epoll, Timespec};
2+
use std::time::Instant;
3+
4+
#[test]
5+
fn epoll_timeout() {
6+
let epoll_fd = epoll::create(epoll::CreateFlags::CLOEXEC).unwrap();
7+
8+
let start = Instant::now();
9+
let mut events = Vec::with_capacity(1);
10+
epoll::wait(
11+
&epoll_fd,
12+
&mut events,
13+
Some(&Timespec {
14+
tv_sec: 0,
15+
tv_nsec: 1_000_000,
16+
}),
17+
)
18+
.unwrap();
19+
20+
let duration = start.elapsed();
21+
22+
assert!(
23+
duration.as_secs() > 0 || (duration.as_secs() == 0 && duration.subsec_nanos() >= 1_000_000)
24+
);
25+
}

tests/event/main.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@
66
#[cfg(feature = "net")]
77
#[cfg(any(linux_kernel, target_os = "illumos", target_os = "redox"))]
88
mod epoll;
9+
#[cfg(not(feature = "rustc-dep-of-std"))] // TODO
10+
#[cfg(feature = "net")]
11+
#[cfg(any(linux_kernel, target_os = "illumos", target_os = "redox"))]
12+
mod epoll_timeout;
913
#[cfg(not(windows))]
1014
#[cfg(not(target_os = "wasi"))]
1115
mod eventfd;

0 commit comments

Comments
 (0)