Skip to content

Commit a146f6b

Browse files
mrutland-armAlex Shi
authored andcommitted
arm64: mm: create new fine-grained mappings at boot
At boot we may change the granularity of the tables mapping the kernel (by splitting or making sections). This may happen when we create the linear mapping (in __map_memblock), or at any point we try to apply fine-grained permissions to the kernel (e.g. fixup_executable, mark_rodata_ro, fixup_init). Changing the active page tables in this manner may result in multiple entries for the same address being allocated into TLBs, risking problems such as TLB conflict aborts or issues derived from the amalgamation of TLB entries. Generally, a break-before-make (BBM) approach is necessary to avoid conflicts, but we cannot do this for the kernel tables as it risks unmapping text or data being used to do so. Instead, we can create a new set of tables from scratch in the safety of the existing mappings, and subsequently migrate over to these using the new cpu_replace_ttbr1 helper, which avoids the two sets of tables being active simultaneously. To avoid issues when we later modify permissions of the page tables (e.g. in fixup_init), we must create the page tables at a granularity such that later modification does not result in splitting of tables. This patch applies this strategy, creating a new set of fine-grained page tables from scratch, and safely migrating to them. The existing fixmap and kasan shadow page tables are reused in the new fine-grained tables. Signed-off-by: Mark Rutland <mark.rutland@arm.com> Reviewed-by: Catalin Marinas <catalin.marinas@arm.com> Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com> Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Tested-by: Jeremy Linton <jeremy.linton@arm.com> Cc: Laura Abbott <labbott@fedoraproject.org> Cc: Will Deacon <will.deacon@arm.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com> (cherry picked from commit 068a17a5805dfbca4bbf03e664ca6b19709cc7a8) Signed-off-by: Alex Shi <alex.shi@linaro.org>
1 parent c2b11f6 commit a146f6b

3 files changed

Lines changed: 109 additions & 62 deletions

File tree

arch/arm64/include/asm/kasan.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
#include <linux/linkage.h>
99
#include <asm/memory.h>
10+
#include <asm/pgtable-types.h>
1011

1112
/*
1213
* KASAN_SHADOW_START: beginning of the kernel virtual addresses.
@@ -28,10 +29,12 @@
2829
#define KASAN_SHADOW_OFFSET (KASAN_SHADOW_END - (1ULL << (64 - 3)))
2930

3031
void kasan_init(void);
32+
void kasan_copy_shadow(pgd_t *pgdir);
3133
asmlinkage void kasan_early_init(void);
3234

3335
#else
3436
static inline void kasan_init(void) { }
37+
static inline void kasan_copy_shadow(pgd_t *pgdir) { }
3538
#endif
3639

3740
#endif

arch/arm64/mm/kasan_init.c

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,21 @@ asmlinkage void __init kasan_early_init(void)
9696
kasan_map_early_shadow();
9797
}
9898

99+
/*
100+
* Copy the current shadow region into a new pgdir.
101+
*/
102+
void __init kasan_copy_shadow(pgd_t *pgdir)
103+
{
104+
pgd_t *pgd, *pgd_new, *pgd_end;
105+
106+
pgd = pgd_offset_k(KASAN_SHADOW_START);
107+
pgd_end = pgd_offset_k(KASAN_SHADOW_END);
108+
pgd_new = pgd_offset_raw(pgdir, KASAN_SHADOW_START);
109+
do {
110+
set_pgd(pgd_new, *pgd);
111+
} while (pgd++, pgd_new++, pgd != pgd_end);
112+
}
113+
99114
static void __init clear_pgds(unsigned long start,
100115
unsigned long end)
101116
{

arch/arm64/mm/mmu.c

Lines changed: 91 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#include <asm/barrier.h>
3434
#include <asm/cputype.h>
3535
#include <asm/fixmap.h>
36+
#include <asm/kasan.h>
3637
#include <asm/kernel-pgtable.h>
3738
#include <asm/sections.h>
3839
#include <asm/setup.h>
@@ -336,49 +337,42 @@ static void create_mapping_late(phys_addr_t phys, unsigned long virt,
336337
late_pgtable_alloc);
337338
}
338339

339-
#ifdef CONFIG_DEBUG_RODATA
340-
static void __init __map_memblock(phys_addr_t start, phys_addr_t end)
340+
static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end)
341341
{
342+
343+
unsigned long kernel_start = __pa(_stext);
344+
unsigned long kernel_end = __pa(_end);
345+
342346
/*
343-
* Set up the executable regions using the existing section mappings
344-
* for now. This will get more fine grained later once all memory
345-
* is mapped
347+
* The kernel itself is mapped at page granularity. Map all other
348+
* memory, making sure we don't overwrite the existing kernel mappings.
346349
*/
347-
unsigned long kernel_x_start = round_down(__pa(_stext), SWAPPER_BLOCK_SIZE);
348-
unsigned long kernel_x_end = round_up(__pa(__init_end), SWAPPER_BLOCK_SIZE);
349-
350-
if (end < kernel_x_start) {
351-
create_mapping(start, __phys_to_virt(start),
352-
end - start, PAGE_KERNEL);
353-
} else if (start >= kernel_x_end) {
354-
create_mapping(start, __phys_to_virt(start),
355-
end - start, PAGE_KERNEL);
356-
} else {
357-
if (start < kernel_x_start)
358-
create_mapping(start, __phys_to_virt(start),
359-
kernel_x_start - start,
360-
PAGE_KERNEL);
361-
create_mapping(kernel_x_start,
362-
__phys_to_virt(kernel_x_start),
363-
kernel_x_end - kernel_x_start,
364-
PAGE_KERNEL_EXEC);
365-
if (kernel_x_end < end)
366-
create_mapping(kernel_x_end,
367-
__phys_to_virt(kernel_x_end),
368-
end - kernel_x_end,
369-
PAGE_KERNEL);
350+
351+
/* No overlap with the kernel. */
352+
if (end < kernel_start || start >= kernel_end) {
353+
__create_pgd_mapping(pgd, start, __phys_to_virt(start),
354+
end - start, PAGE_KERNEL,
355+
early_pgtable_alloc);
356+
return;
370357
}
371358

359+
/*
360+
* This block overlaps the kernel mapping. Map the portion(s) which
361+
* don't overlap.
362+
*/
363+
if (start < kernel_start)
364+
__create_pgd_mapping(pgd, start,
365+
__phys_to_virt(start),
366+
kernel_start - start, PAGE_KERNEL,
367+
early_pgtable_alloc);
368+
if (kernel_end < end)
369+
__create_pgd_mapping(pgd, kernel_end,
370+
__phys_to_virt(kernel_end),
371+
end - kernel_end, PAGE_KERNEL,
372+
early_pgtable_alloc);
372373
}
373-
#else
374-
static void __init __map_memblock(phys_addr_t start, phys_addr_t end)
375-
{
376-
create_mapping(start, __phys_to_virt(start), end - start,
377-
PAGE_KERNEL_EXEC);
378-
}
379-
#endif
380374

381-
static void __init map_mem(void)
375+
static void __init map_mem(pgd_t *pgd)
382376
{
383377
struct memblock_region *reg;
384378

@@ -390,33 +384,10 @@ static void __init map_mem(void)
390384
if (start >= end)
391385
break;
392386

393-
__map_memblock(start, end);
387+
__map_memblock(pgd, start, end);
394388
}
395389
}
396390

397-
static void __init fixup_executable(void)
398-
{
399-
#ifdef CONFIG_DEBUG_RODATA
400-
/* now that we are actually fully mapped, make the start/end more fine grained */
401-
if (!IS_ALIGNED((unsigned long)_stext, SWAPPER_BLOCK_SIZE)) {
402-
unsigned long aligned_start = round_down(__pa(_stext),
403-
SWAPPER_BLOCK_SIZE);
404-
405-
create_mapping(aligned_start, __phys_to_virt(aligned_start),
406-
__pa(_stext) - aligned_start,
407-
PAGE_KERNEL);
408-
}
409-
410-
if (!IS_ALIGNED((unsigned long)__init_end, SWAPPER_BLOCK_SIZE)) {
411-
unsigned long aligned_end = round_up(__pa(__init_end),
412-
SWAPPER_BLOCK_SIZE);
413-
create_mapping(__pa(__init_end), (unsigned long)__init_end,
414-
aligned_end - __pa(__init_end),
415-
PAGE_KERNEL);
416-
}
417-
#endif
418-
}
419-
420391
#ifdef CONFIG_DEBUG_RODATA
421392
void mark_rodata_ro(void)
422393
{
@@ -434,14 +405,72 @@ void fixup_init(void)
434405
PAGE_KERNEL);
435406
}
436407

408+
static void __init map_kernel_chunk(pgd_t *pgd, void *va_start, void *va_end,
409+
pgprot_t prot)
410+
{
411+
phys_addr_t pa_start = __pa(va_start);
412+
unsigned long size = va_end - va_start;
413+
414+
BUG_ON(!PAGE_ALIGNED(pa_start));
415+
BUG_ON(!PAGE_ALIGNED(size));
416+
417+
__create_pgd_mapping(pgd, pa_start, (unsigned long)va_start, size, prot,
418+
early_pgtable_alloc);
419+
}
420+
421+
/*
422+
* Create fine-grained mappings for the kernel.
423+
*/
424+
static void __init map_kernel(pgd_t *pgd)
425+
{
426+
427+
map_kernel_chunk(pgd, _stext, _etext, PAGE_KERNEL_EXEC);
428+
map_kernel_chunk(pgd, __init_begin, __init_end, PAGE_KERNEL_EXEC);
429+
map_kernel_chunk(pgd, _data, _end, PAGE_KERNEL);
430+
431+
/*
432+
* The fixmap falls in a separate pgd to the kernel, and doesn't live
433+
* in the carveout for the swapper_pg_dir. We can simply re-use the
434+
* existing dir for the fixmap.
435+
*/
436+
set_pgd(pgd_offset_raw(pgd, FIXADDR_START), *pgd_offset_k(FIXADDR_START));
437+
438+
kasan_copy_shadow(pgd);
439+
}
440+
437441
/*
438442
* paging_init() sets up the page tables, initialises the zone memory
439443
* maps and sets up the zero page.
440444
*/
441445
void __init paging_init(void)
442446
{
443-
map_mem();
444-
fixup_executable();
447+
phys_addr_t pgd_phys = early_pgtable_alloc();
448+
pgd_t *pgd = pgd_set_fixmap(pgd_phys);
449+
450+
map_kernel(pgd);
451+
map_mem(pgd);
452+
453+
/*
454+
* We want to reuse the original swapper_pg_dir so we don't have to
455+
* communicate the new address to non-coherent secondaries in
456+
* secondary_entry, and so cpu_switch_mm can generate the address with
457+
* adrp+add rather than a load from some global variable.
458+
*
459+
* To do this we need to go via a temporary pgd.
460+
*/
461+
cpu_replace_ttbr1(__va(pgd_phys));
462+
memcpy(swapper_pg_dir, pgd, PAGE_SIZE);
463+
cpu_replace_ttbr1(swapper_pg_dir);
464+
465+
pgd_clear_fixmap();
466+
memblock_free(pgd_phys, PAGE_SIZE);
467+
468+
/*
469+
* We only reuse the PGD from the swapper_pg_dir, not the pud + pmd
470+
* allocated with it.
471+
*/
472+
memblock_free(__pa(swapper_pg_dir) + PAGE_SIZE,
473+
SWAPPER_DIR_SIZE - PAGE_SIZE);
445474

446475
bootmem_init();
447476
}

0 commit comments

Comments
 (0)