Skip to content

Commit 8c92870

Browse files
mpegregkh
authored andcommitted
powerpc/numa: Fix percpu allocations to be NUMA aware
commit ba4a648f12f4cd0a8003dd229b6ca8a53348ee4b upstream. In commit 8c27226 ("powerpc/numa: Enable USE_PERCPU_NUMA_NODE_ID"), we switched to the generic implementation of cpu_to_node(), which uses a percpu variable to hold the NUMA node for each CPU. Unfortunately we neglected to notice that we use cpu_to_node() in the allocation of our percpu areas, leading to a chicken and egg problem. In practice what happens is when we are setting up the percpu areas, cpu_to_node() reports that all CPUs are on node 0, so we allocate all percpu areas on node 0. This is visible in the dmesg output, as all pcpu allocs being in group 0: pcpu-alloc: [0] 00 01 02 03 [0] 04 05 06 07 pcpu-alloc: [0] 08 09 10 11 [0] 12 13 14 15 pcpu-alloc: [0] 16 17 18 19 [0] 20 21 22 23 pcpu-alloc: [0] 24 25 26 27 [0] 28 29 30 31 pcpu-alloc: [0] 32 33 34 35 [0] 36 37 38 39 pcpu-alloc: [0] 40 41 42 43 [0] 44 45 46 47 To fix it we need an early_cpu_to_node() which can run prior to percpu being setup. We already have the numa_cpu_lookup_table we can use, so just plumb it in. With the patch dmesg output shows two groups, 0 and 1: pcpu-alloc: [0] 00 01 02 03 [0] 04 05 06 07 pcpu-alloc: [0] 08 09 10 11 [0] 12 13 14 15 pcpu-alloc: [0] 16 17 18 19 [0] 20 21 22 23 pcpu-alloc: [1] 24 25 26 27 [1] 28 29 30 31 pcpu-alloc: [1] 32 33 34 35 [1] 36 37 38 39 pcpu-alloc: [1] 40 41 42 43 [1] 44 45 46 47 We can also check the data_offset in the paca of various CPUs, with the fix we see: CPU 0: data_offset = 0x0ffe8b0000 CPU 24: data_offset = 0x1ffe5b0000 And we can see from dmesg that CPU 24 has an allocation on node 1: node 0: [mem 0x0000000000000000-0x0000000fffffffff] node 1: [mem 0x0000001000000000-0x0000001fffffffff] Fixes: 8c27226 ("powerpc/numa: Enable USE_PERCPU_NUMA_NODE_ID") Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Reviewed-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
1 parent fc7fb94 commit 8c92870

2 files changed

Lines changed: 16 additions & 2 deletions

File tree

arch/powerpc/include/asm/topology.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,22 @@ extern void __init dump_numa_cpu_topology(void);
4444
extern int sysfs_add_device_to_node(struct device *dev, int nid);
4545
extern void sysfs_remove_device_from_node(struct device *dev, int nid);
4646

47+
static inline int early_cpu_to_node(int cpu)
48+
{
49+
int nid;
50+
51+
nid = numa_cpu_lookup_table[cpu];
52+
53+
/*
54+
* Fall back to node 0 if nid is unset (it should be, except bugs).
55+
* This allows callers to safely do NODE_DATA(early_cpu_to_node(cpu)).
56+
*/
57+
return (nid < 0) ? 0 : nid;
58+
}
4759
#else
4860

61+
static inline int early_cpu_to_node(int cpu) { return 0; }
62+
4963
static inline void dump_numa_cpu_topology(void) {}
5064

5165
static inline int sysfs_add_device_to_node(struct device *dev, int nid)

arch/powerpc/kernel/setup_64.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -751,7 +751,7 @@ void __init setup_arch(char **cmdline_p)
751751

752752
static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
753753
{
754-
return __alloc_bootmem_node(NODE_DATA(cpu_to_node(cpu)), size, align,
754+
return __alloc_bootmem_node(NODE_DATA(early_cpu_to_node(cpu)), size, align,
755755
__pa(MAX_DMA_ADDRESS));
756756
}
757757

@@ -762,7 +762,7 @@ static void __init pcpu_fc_free(void *ptr, size_t size)
762762

763763
static int pcpu_cpu_distance(unsigned int from, unsigned int to)
764764
{
765-
if (cpu_to_node(from) == cpu_to_node(to))
765+
if (early_cpu_to_node(from) == early_cpu_to_node(to))
766766
return LOCAL_DISTANCE;
767767
else
768768
return REMOTE_DISTANCE;

0 commit comments

Comments
 (0)