Skip to content

Commit bc46b12

Browse files
authored
Couple of tls handling changes (#120)
* Pass module id to current_tls_addr * Extract initialize_tls function to share between initialize_main and create * Extract calculate_tls_size to share between initialize_main and create The only functional change is that for spawned threads the padding necessary to correctly align the TLS data is no longer added to the stack itself and instead becomes unused. Most of the time TLS data will be aligned no more than the stack alignment, so no memory should be wasted most of the time.
1 parent b391fc6 commit bc46b12

2 files changed

Lines changed: 86 additions & 94 deletions

File tree

src/thread/libc.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -213,12 +213,12 @@ pub fn errno_location() -> *mut i32 {
213213
unsafe { __errno_location() }
214214
}
215215

216-
/// Return the TLS address for the given `offset` for the current thread.
216+
/// Return the TLS address for the given `module` and `offset` for the current
217+
/// thread.
217218
#[inline]
218219
#[must_use]
219-
pub fn current_tls_addr(offset: usize) -> *mut c_void {
220-
let p = [1, offset];
221-
unsafe { __tls_get_addr(&p) }
220+
pub fn current_tls_addr(module: usize, offset: usize) -> *mut c_void {
221+
unsafe { __tls_get_addr(&[module, offset]) }
222222
}
223223

224224
/// Return the current thread's stack address (lowest address), size, and guard

src/thread/linux_raw.rs

Lines changed: 82 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -336,58 +336,96 @@ pub(super) unsafe fn initialize_main(mem: *mut c_void) {
336336
let canary = random_ptr.read_unaligned();
337337
__stack_chk_guard = canary;
338338

339-
let map_size = 0;
339+
let mut alloc_size = 0;
340+
let (tls_data_bottom, header) = calculate_tls_size(&mut alloc_size);
341+
342+
// Allocate the thread data. Use `mmap_anonymous` rather than `alloc` here
343+
// as the allocator may depend on thread-local data, which is what we're
344+
// initializing here.
345+
let new = mmap_anonymous(
346+
null_mut(),
347+
alloc_size,
348+
ProtFlags::READ | ProtFlags::WRITE,
349+
MapFlags::PRIVATE,
350+
)
351+
.unwrap()
352+
.cast::<u8>();
353+
354+
let metadata_align = max(unsafe { STARTUP_TLS_INFO.align }, align_of::<Metadata>());
355+
debug_assert_eq!(new.addr() % metadata_align, 0);
356+
357+
let tls_data = new.add(tls_data_bottom);
358+
let metadata: *mut Metadata = new.add(header).cast();
359+
360+
let (newtls, thread_id_ptr) = initialize_tls(
361+
tls_data,
362+
metadata,
363+
canary,
364+
stack_least,
365+
stack_size,
366+
guard_size,
367+
0,
368+
);
369+
let tid = rustix::runtime::set_tid_address(thread_id_ptr.cast());
370+
*thread_id_ptr = tid.as_raw_nonzero().get();
371+
372+
// Point the platform thread-pointer register at the new thread metadata.
373+
set_thread_pointer(newtls);
374+
}
375+
376+
fn calculate_tls_size(map_size: &mut usize) -> (usize, usize) {
377+
// SAFETY: `STARTUP_TLS_INFO` is initialized at program startup before
378+
// we come here creating new threads.
379+
let (startup_tls_align, startup_tls_mem_size) =
380+
unsafe { (STARTUP_TLS_INFO.align, STARTUP_TLS_INFO.mem_size) };
340381

341382
// Compute relevant alignments.
342-
let tls_data_align = STARTUP_TLS_INFO.align;
383+
let tls_data_align = startup_tls_align;
384+
let page_align = page_size();
343385
let header_align = align_of::<Metadata>();
344386
let metadata_align = max(tls_data_align, header_align);
387+
debug_assert!(metadata_align <= page_align);
345388

346-
// Compute the size to allocate for thread data.
347-
let mut alloc_size = 0;
389+
*map_size = round_up(*map_size, metadata_align);
348390

349391
// Variant II: TLS data goes below the TCB.
350392
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
351-
let tls_data_bottom = alloc_size;
393+
let tls_data_bottom = *map_size;
352394

353395
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
354396
{
355-
alloc_size += round_up(STARTUP_TLS_INFO.mem_size, metadata_align);
397+
*map_size += round_up(startup_tls_mem_size, tls_data_align);
356398
}
357399

358-
let header = alloc_size;
400+
let header = *map_size;
359401

360-
alloc_size += size_of::<Metadata>();
402+
*map_size += size_of::<Metadata>();
361403

362404
// Variant I: TLS data goes above the TCB.
363405
#[cfg(any(target_arch = "aarch64", target_arch = "arm", target_arch = "riscv64"))]
364406
{
365-
alloc_size = round_up(alloc_size, tls_data_align);
407+
*map_size = round_up(*map_size, tls_data_align);
366408
}
367409

368410
#[cfg(any(target_arch = "aarch64", target_arch = "arm", target_arch = "riscv64"))]
369-
let tls_data_bottom = alloc_size;
411+
let tls_data_bottom = *map_size;
370412

371413
#[cfg(any(target_arch = "aarch64", target_arch = "arm", target_arch = "riscv64"))]
372414
{
373-
alloc_size += round_up(STARTUP_TLS_INFO.mem_size, tls_data_align);
415+
*map_size += round_up(startup_tls_mem_size, tls_data_align);
374416
}
417+
(tls_data_bottom, header)
418+
}
375419

376-
// Allocate the thread data. Use `mmap_anonymous` rather than `alloc` here
377-
// as the allocator may depend on thread-local data, which is what we're
378-
// initializing here.
379-
let new = mmap_anonymous(
380-
null_mut(),
381-
alloc_size,
382-
ProtFlags::READ | ProtFlags::WRITE,
383-
MapFlags::PRIVATE,
384-
)
385-
.unwrap()
386-
.cast::<u8>();
387-
debug_assert_eq!(new.addr() % metadata_align, 0);
388-
389-
let tls_data = new.add(tls_data_bottom);
390-
let metadata: *mut Metadata = new.add(header).cast();
420+
unsafe fn initialize_tls(
421+
tls_data: *mut u8,
422+
metadata: *mut Metadata,
423+
canary: usize,
424+
stack_least: *mut u8,
425+
stack_size: usize,
426+
guard_size: usize,
427+
map_size: usize,
428+
) -> (*mut c_void, *mut i32) {
391429
let newtls: *mut c_void = (*metadata).abi.thread_pointee.as_mut_ptr().cast();
392430

393431
// Initialize the thread metadata.
@@ -419,11 +457,8 @@ pub(super) unsafe fn initialize_main(mem: *mut c_void) {
419457
.fill(0);
420458

421459
let thread_id_ptr = (*metadata).thread.thread_id.as_ptr();
422-
let tid = rustix::runtime::set_tid_address(thread_id_ptr.cast());
423-
*thread_id_ptr = tid.as_raw_nonzero().get();
424460

425-
// Point the platform thread-pointer register at the new thread metadata.
426-
set_thread_pointer(newtls);
461+
(newtls, thread_id_ptr)
427462
}
428463

429464
/// Creates a new thread.
@@ -442,19 +477,9 @@ pub unsafe fn create(
442477
stack_size: usize,
443478
guard_size: usize,
444479
) -> io::Result<Thread> {
445-
// SAFETY: `STARTUP_TLS_INFO` is initialized at program startup before
446-
// we come here creating new threads.
447-
let (startup_tls_align, startup_tls_mem_size) =
448-
unsafe { (STARTUP_TLS_INFO.align, STARTUP_TLS_INFO.mem_size) };
449-
450480
// Compute relevant alignments.
451-
let tls_data_align = startup_tls_align;
452481
let page_align = page_size();
453482
let stack_align = 16;
454-
let header_align = align_of::<Metadata>();
455-
let metadata_align = max(tls_data_align, header_align);
456-
let stack_metadata_align = max(stack_align, metadata_align);
457-
debug_assert!(stack_metadata_align <= page_align);
458483

459484
// Compute the `mmap` size.
460485
let mut map_size = 0;
@@ -463,36 +488,11 @@ pub unsafe fn create(
463488

464489
let stack_bottom = map_size;
465490

466-
map_size += round_up(stack_size, stack_metadata_align);
491+
map_size += round_up(stack_size, stack_align);
467492

468493
let stack_top = map_size;
469494

470-
// Variant II: TLS data goes below the TCB.
471-
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
472-
let tls_data_bottom = map_size;
473-
474-
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
475-
{
476-
map_size += round_up(startup_tls_mem_size, tls_data_align);
477-
}
478-
479-
let header = map_size;
480-
481-
map_size += size_of::<Metadata>();
482-
483-
// Variant I: TLS data goes above the TCB.
484-
#[cfg(any(target_arch = "aarch64", target_arch = "arm", target_arch = "riscv64"))]
485-
{
486-
map_size = round_up(map_size, tls_data_align);
487-
}
488-
489-
#[cfg(any(target_arch = "aarch64", target_arch = "arm", target_arch = "riscv64"))]
490-
let tls_data_bottom = map_size;
491-
492-
#[cfg(any(target_arch = "aarch64", target_arch = "arm", target_arch = "riscv64"))]
493-
{
494-
map_size += round_up(startup_tls_mem_size, tls_data_align);
495-
}
495+
let (tls_data_bottom, header) = calculate_tls_size(&mut map_size);
496496

497497
// Now we'll `mmap` the memory, initialize it, and create the OS thread.
498498
unsafe {
@@ -519,30 +519,18 @@ pub unsafe fn create(
519519

520520
let tls_data = map.add(tls_data_bottom);
521521
let metadata: *mut Metadata = map.add(header).cast();
522-
let newtls: *mut c_void = (*metadata).abi.thread_pointee.as_mut_ptr().cast();
523522

524523
// Copy the current thread's canary to the new thread.
525524
let canary = (*current_metadata()).abi.canary;
526525

527-
// Initialize the thread metadata.
528-
metadata.write(Metadata {
529-
abi: Abi {
530-
canary,
531-
dtv: null(),
532-
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
533-
this: newtls,
534-
_pad: Default::default(),
535-
thread_pointee: [],
536-
},
537-
thread: ThreadData::new(stack_least.cast(), stack_size, guard_size, map_size),
538-
});
539-
540-
// Initialize the TLS data with explicit initializer data.
541-
slice::from_raw_parts_mut(tls_data, STARTUP_TLS_INFO.file_size).copy_from_slice(
542-
slice::from_raw_parts(
543-
STARTUP_TLS_INFO.addr.cast::<u8>(),
544-
STARTUP_TLS_INFO.file_size,
545-
),
526+
let (newtls, thread_id_ptr) = initialize_tls(
527+
tls_data,
528+
metadata,
529+
canary,
530+
stack_least,
531+
stack_size,
532+
guard_size,
533+
map_size,
546534
);
547535

548536
// Allocate space for the thread arguments on the child's stack.
@@ -582,7 +570,6 @@ pub unsafe fn create(
582570
| CloneFlags::CHILD_CLEARTID
583571
| CloneFlags::CHILD_SETTID
584572
| CloneFlags::PARENT_SETTID;
585-
let thread_id_ptr = (*metadata).thread.thread_id.as_ptr();
586573
let clone_res = clone(
587574
flags.bits(),
588575
stack.cast(),
@@ -1005,10 +992,15 @@ pub fn errno_location() -> *mut i32 {
1005992
unsafe { core::ptr::addr_of_mut!((*current_metadata()).thread.errno_val).cast::<i32>() }
1006993
}
1007994

1008-
/// Return the TLS address for the given `offset` for the current thread.
995+
/// Return the TLS address for the given `module` and `offset` for the current
996+
/// thread.
1009997
#[inline]
1010998
#[must_use]
1011-
pub fn current_tls_addr(offset: usize) -> *mut c_void {
999+
pub fn current_tls_addr(module: usize, offset: usize) -> *mut c_void {
1000+
// Offset 0 is the generation field, and we don't support dynamic linking,
1001+
// so we should only ever see 1 here.
1002+
assert_eq!(module, 1);
1003+
10121004
// Platforms where TLS data goes after the ABI-exposed fields.
10131005
#[cfg(any(target_arch = "aarch64", target_arch = "arm", target_arch = "riscv64"))]
10141006
{

0 commit comments

Comments
 (0)