Merge branch 'context-id-fix' into fixes

This merges a fix for a bug in our context id handling on 64-bit hash CPUs. The fix was written against v5.1 to ease backporting to stable releases. Here we are merging it up to a v5.2-rc2 base, which involves a bit of manual resolution. It also adds a test case for the bug. Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2019-06-13 15:00:34 +10:00 · 2019-06-13 15:00:34 +10:00 · 65565a68c5
parent c21f5a9ed8 16391bfc86
commit 65565a68c5
4 changed files with 139 additions and 10 deletions
--- a/arch/powerpc/mm/book3s64/mmu_context.c
+++ b/arch/powerpc/mm/book3s64/mmu_context.c
@ -55,20 +55,52 @@ EXPORT_SYMBOL_GPL(hash__alloc_context_id);

 void slb_setup_new_exec(void);

+static int realloc_context_ids(mm_context_t *ctx)
+{
+	int i, id;
+
+	/*
+	 * id 0 (aka. ctx->id) is special, we always allocate a new one, even if
+	 * there wasn't one allocated previously (which happens in the exec
+	 * case where ctx is newly allocated).
+	 *
+	 * We have to be a bit careful here. We must keep the existing ids in
+	 * the array, so that we can test if they're non-zero to decide if we
+	 * need to allocate a new one. However in case of error we must free the
+	 * ids we've allocated but *not* any of the existing ones (or risk a
+	 * UAF). That's why we decrement i at the start of the error handling
+	 * loop, to skip the id that we just tested but couldn't reallocate.
+	 */
+	for (i = 0; i < ARRAY_SIZE(ctx->extended_id); i++) {
+		if (i == 0 || ctx->extended_id[i]) {
+			id = hash__alloc_context_id();
+			if (id < 0)
+				goto error;
+
+			ctx->extended_id[i] = id;
+		}
+	}
+
+	/* The caller expects us to return id */
+	return ctx->id;
+
+error:
+	for (i--; i >= 0; i--) {
+		if (ctx->extended_id[i])
+			ida_free(&mmu_context_ida, ctx->extended_id[i]);
+	}
+
+	return id;
+}
+
 static int hash__init_new_context(struct mm_struct *mm)
 {
 	int index;

-	index = hash__alloc_context_id();
-	if (index < 0)
-		return index;
-
 	mm->context.hash_context = kmalloc(sizeof(struct hash_mm_context),
 					   GFP_KERNEL);
-	if (!mm->context.hash_context) {
-		ida_free(&mmu_context_ida, index);
+	if (!mm->context.hash_context)
 		return -ENOMEM;
-	}

 	/*
 	 * The old code would re-promote on fork, we don't do that when using
@ -96,13 +128,20 @@ static int hash__init_new_context(struct mm_struct *mm)
 			mm->context.hash_context->spt = kmalloc(sizeof(struct subpage_prot_table),
 								GFP_KERNEL);
 			if (!mm->context.hash_context->spt) {
-				ida_free(&mmu_context_ida, index);
 				kfree(mm->context.hash_context);
 				return -ENOMEM;
 			}
 		}
 #endif
+	}

+	index = realloc_context_ids(&mm->context);
+	if (index < 0) {
+#ifdef CONFIG_PPC_SUBPAGE_PROT
+		kfree(mm->context.hash_context->spt);
+#endif
+		kfree(mm->context.hash_context);
+		return index;
 	}

 	pkey_mm_init(mm);
--- a/tools/testing/selftests/powerpc/mm/.gitignore
+++ b/tools/testing/selftests/powerpc/mm/.gitignore
@ -3,4 +3,5 @@ subpage_prot
 tempfile
 prot_sao
 segv_errors
-wild_bctr
+wild_bctr
+large_vm_fork_separation
--- a/tools/testing/selftests/powerpc/mm/Makefile
+++ b/tools/testing/selftests/powerpc/mm/Makefile
@ -2,7 +2,8 @@
 noarg:
 	$(MAKE) -C ../

-TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr
+TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr \
+		  large_vm_fork_separation
 TEST_GEN_FILES := tempfile

 top_srcdir = ../../../../..
@ -13,6 +14,7 @@ $(TEST_GEN_PROGS): ../harness.c
 $(OUTPUT)/prot_sao: ../utils.c

 $(OUTPUT)/wild_bctr: CFLAGS += -m64
+$(OUTPUT)/large_vm_fork_separation: CFLAGS += -m64

 $(OUTPUT)/tempfile:
 	dd if=/dev/zero of=$@ bs=64k count=1
--- a/tools/testing/selftests/powerpc/mm/large_vm_fork_separation.c
+++ b/tools/testing/selftests/powerpc/mm/large_vm_fork_separation.c
@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Copyright 2019, Michael Ellerman, IBM Corp.
+//
+// Test that allocating memory beyond the memory limit and then forking is
+// handled correctly, ie. the child is able to access the mappings beyond the
+// memory limit and the child's writes are not visible to the parent.
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "utils.h"
+
+
+#ifndef MAP_FIXED_NOREPLACE
+#define MAP_FIXED_NOREPLACE	MAP_FIXED	// "Should be safe" above 512TB
+#endif
+
+
+static int test(void)
+{
+	int p2c[2], c2p[2], rc, status, c, *p;
+	unsigned long page_size;
+	pid_t pid;
+
+	page_size = sysconf(_SC_PAGESIZE);
+	SKIP_IF(page_size != 65536);
+
+	// Create a mapping at 512TB to allocate an extended_id
+	p = mmap((void *)(512ul << 40), page_size, PROT_READ | PROT_WRITE,
+		MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE, -1, 0);
+	if (p == MAP_FAILED) {
+		perror("mmap");
+		printf("Error: couldn't mmap(), confirm kernel has 4TB support?\n");
+		return 1;
+	}
+
+	printf("parent writing %p = 1\n", p);
+	*p = 1;
+
+	FAIL_IF(pipe(p2c) == -1 || pipe(c2p) == -1);
+
+	pid = fork();
+	if (pid == 0) {
+		FAIL_IF(read(p2c[0], &c, 1) != 1);
+
+		pid = getpid();
+		printf("child writing  %p = %d\n", p, pid);
+		*p = pid;
+
+		FAIL_IF(write(c2p[1], &c, 1) != 1);
+		FAIL_IF(read(p2c[0], &c, 1) != 1);
+		exit(0);
+	}
+
+	c = 0;
+	FAIL_IF(write(p2c[1], &c, 1) != 1);
+	FAIL_IF(read(c2p[0], &c, 1) != 1);
+
+	// Prevent compiler optimisation
+	barrier();
+
+	rc = 0;
+	printf("parent reading %p = %d\n", p, *p);
+	if (*p != 1) {
+		printf("Error: BUG! parent saw child's write! *p = %d\n", *p);
+		rc = 1;
+	}
+
+	FAIL_IF(write(p2c[1], &c, 1) != 1);
+	FAIL_IF(waitpid(pid, &status, 0) == -1);
+	FAIL_IF(!WIFEXITED(status) || WEXITSTATUS(status));
+
+	if (rc == 0)
+		printf("success: test completed OK\n");
+
+	return rc;
+}
+
+int main(void)
+{
+	return test_harness(test, "large_vm_fork_separation");
+}