aboutsummaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
authorLinus Torvalds2022-08-05 10:47:40 -0700
committerLinus Torvalds2022-08-05 10:47:40 -0700
commit9e2f40233670c70c25e0681cb66d50d1e2742829 (patch)
treec9b4e29575d4512d38cebd753d08a305fee06f57 /arch/x86
parent3bd6e5854bf9bb5436d6b533e206561839e3b284 (diff)
parente0a5915f1cca21da8ffc0563aea9fa1df5d16fb4 (diff)
Merge tag 'x86_sgx_for_v6.0-2022-08-03.1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 SGX updates from Dave Hansen: "A set of x86/sgx changes focused on implementing the "SGX2" features, plus a minor cleanup: - SGX2 ISA support which makes enclave memory management much more dynamic. For instance, enclaves can now change enclave page permissions on the fly. - Removal of an unused structure member" * tag 'x86_sgx_for_v6.0-2022-08-03.1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (32 commits) x86/sgx: Drop 'page_index' from sgx_backing selftests/sgx: Page removal stress test selftests/sgx: Test reclaiming of untouched page selftests/sgx: Test invalid access to removed enclave page selftests/sgx: Test faulty enclave behavior selftests/sgx: Test complete changing of page type flow selftests/sgx: Introduce TCS initialization enclave operation selftests/sgx: Introduce dynamic entry point selftests/sgx: Test two different SGX2 EAUG flows selftests/sgx: Add test for TCS page permission changes selftests/sgx: Add test for EPCM permission changes Documentation/x86: Introduce enclave runtime management section x86/sgx: Free up EPC pages directly to support large page ranges x86/sgx: Support complete page removal x86/sgx: Support modifying SGX page type x86/sgx: Tighten accessible memory range after enclave initialization x86/sgx: Support adding of pages to an initialized enclave x86/sgx: Support restricting of enclave page permissions x86/sgx: Support VA page allocation without reclaiming x86/sgx: Export sgx_encl_page_alloc() ...
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/sgx.h8
-rw-r--r--arch/x86/include/uapi/asm/sgx.h62
-rw-r--r--arch/x86/kernel/cpu/sgx/encl.c330
-rw-r--r--arch/x86/kernel/cpu/sgx/encl.h16
-rw-r--r--arch/x86/kernel/cpu/sgx/encls.h33
-rw-r--r--arch/x86/kernel/cpu/sgx/ioctl.c641
-rw-r--r--arch/x86/kernel/cpu/sgx/main.c75
-rw-r--r--arch/x86/kernel/cpu/sgx/sgx.h3
8 files changed, 1038 insertions, 130 deletions
diff --git a/arch/x86/include/asm/sgx.h b/arch/x86/include/asm/sgx.h
index 3f9334ef67cd..eae20fa52b93 100644
--- a/arch/x86/include/asm/sgx.h
+++ b/arch/x86/include/asm/sgx.h
@@ -65,17 +65,22 @@ enum sgx_encls_function {
/**
* enum sgx_return_code - The return code type for ENCLS, ENCLU and ENCLV
+ * %SGX_EPC_PAGE_CONFLICT: Page is being written by other ENCLS function.
* %SGX_NOT_TRACKED: Previous ETRACK's shootdown sequence has not
* been completed yet.
* %SGX_CHILD_PRESENT SECS has child pages present in the EPC.
* %SGX_INVALID_EINITTOKEN: EINITTOKEN is invalid and enclave signer's
* public key does not match IA32_SGXLEPUBKEYHASH.
+ * %SGX_PAGE_NOT_MODIFIABLE: The EPC page cannot be modified because it
+ * is in the PENDING or MODIFIED state.
* %SGX_UNMASKED_EVENT: An unmasked event, e.g. INTR, was received
*/
enum sgx_return_code {
+ SGX_EPC_PAGE_CONFLICT = 7,
SGX_NOT_TRACKED = 11,
SGX_CHILD_PRESENT = 13,
SGX_INVALID_EINITTOKEN = 16,
+ SGX_PAGE_NOT_MODIFIABLE = 20,
SGX_UNMASKED_EVENT = 128,
};
@@ -234,6 +239,9 @@ struct sgx_pageinfo {
* %SGX_PAGE_TYPE_REG: a regular page
* %SGX_PAGE_TYPE_VA: a VA page
* %SGX_PAGE_TYPE_TRIM: a page in trimmed state
+ *
+ * Make sure when making changes to this enum that its values can still fit
+ * in the bitfield within &struct sgx_encl_page
*/
enum sgx_page_type {
SGX_PAGE_TYPE_SECS,
diff --git a/arch/x86/include/uapi/asm/sgx.h b/arch/x86/include/uapi/asm/sgx.h
index f4b81587e90b..2dd35bbdc822 100644
--- a/arch/x86/include/uapi/asm/sgx.h
+++ b/arch/x86/include/uapi/asm/sgx.h
@@ -29,6 +29,12 @@ enum sgx_page_flags {
_IOW(SGX_MAGIC, 0x03, struct sgx_enclave_provision)
#define SGX_IOC_VEPC_REMOVE_ALL \
_IO(SGX_MAGIC, 0x04)
+#define SGX_IOC_ENCLAVE_RESTRICT_PERMISSIONS \
+ _IOWR(SGX_MAGIC, 0x05, struct sgx_enclave_restrict_permissions)
+#define SGX_IOC_ENCLAVE_MODIFY_TYPES \
+ _IOWR(SGX_MAGIC, 0x06, struct sgx_enclave_modify_types)
+#define SGX_IOC_ENCLAVE_REMOVE_PAGES \
+ _IOWR(SGX_MAGIC, 0x07, struct sgx_enclave_remove_pages)
/**
* struct sgx_enclave_create - parameter structure for the
@@ -76,6 +82,62 @@ struct sgx_enclave_provision {
__u64 fd;
};
+/**
+ * struct sgx_enclave_restrict_permissions - parameters for ioctl
+ * %SGX_IOC_ENCLAVE_RESTRICT_PERMISSIONS
+ * @offset: starting page offset (page aligned relative to enclave base
+ * address defined in SECS)
+ * @length: length of memory (multiple of the page size)
+ * @permissions:new permission bits for pages in range described by @offset
+ * and @length
+ * @result: (output) SGX result code of ENCLS[EMODPR] function
+ * @count: (output) bytes successfully changed (multiple of page size)
+ */
+struct sgx_enclave_restrict_permissions {
+ __u64 offset;
+ __u64 length;
+ __u64 permissions;
+ __u64 result;
+ __u64 count;
+};
+
+/**
+ * struct sgx_enclave_modify_types - parameters for ioctl
+ * %SGX_IOC_ENCLAVE_MODIFY_TYPES
+ * @offset: starting page offset (page aligned relative to enclave base
+ * address defined in SECS)
+ * @length: length of memory (multiple of the page size)
+ * @page_type: new type for pages in range described by @offset and @length
+ * @result: (output) SGX result code of ENCLS[EMODT] function
+ * @count: (output) bytes successfully changed (multiple of page size)
+ */
+struct sgx_enclave_modify_types {
+ __u64 offset;
+ __u64 length;
+ __u64 page_type;
+ __u64 result;
+ __u64 count;
+};
+
+/**
+ * struct sgx_enclave_remove_pages - %SGX_IOC_ENCLAVE_REMOVE_PAGES parameters
+ * @offset: starting page offset (page aligned relative to enclave base
+ * address defined in SECS)
+ * @length: length of memory (multiple of the page size)
+ * @count: (output) bytes successfully changed (multiple of page size)
+ *
+ * Regular (PT_REG) or TCS (PT_TCS) can be removed from an initialized
+ * enclave if the system supports SGX2. First, the %SGX_IOC_ENCLAVE_MODIFY_TYPES
+ * ioctl() should be used to change the page type to PT_TRIM. After that
+ * succeeds ENCLU[EACCEPT] should be run from within the enclave and then
+ * %SGX_IOC_ENCLAVE_REMOVE_PAGES can be used to complete the page removal.
+ */
+struct sgx_enclave_remove_pages {
+ __u64 offset;
+ __u64 length;
+ __u64 count;
+};
+
struct sgx_enclave_run;
/**
diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
index 19876ebfb504..24c1bb8eb196 100644
--- a/arch/x86/kernel/cpu/sgx/encl.c
+++ b/arch/x86/kernel/cpu/sgx/encl.c
@@ -232,25 +232,10 @@ static struct sgx_epc_page *sgx_encl_eldu(struct sgx_encl_page *encl_page,
return epc_page;
}
-static struct sgx_encl_page *sgx_encl_load_page(struct sgx_encl *encl,
- unsigned long addr,
- unsigned long vm_flags)
+static struct sgx_encl_page *__sgx_encl_load_page(struct sgx_encl *encl,
+ struct sgx_encl_page *entry)
{
- unsigned long vm_prot_bits = vm_flags & (VM_READ | VM_WRITE | VM_EXEC);
struct sgx_epc_page *epc_page;
- struct sgx_encl_page *entry;
-
- entry = xa_load(&encl->page_array, PFN_DOWN(addr));
- if (!entry)
- return ERR_PTR(-EFAULT);
-
- /*
- * Verify that the faulted page has equal or higher build time
- * permissions than the VMA permissions (i.e. the subset of {VM_READ,
- * VM_WRITE, VM_EXECUTE} in vma->vm_flags).
- */
- if ((entry->vm_max_prot_bits & vm_prot_bits) != vm_prot_bits)
- return ERR_PTR(-EFAULT);
/* Entry successfully located. */
if (entry->epc_page) {
@@ -276,6 +261,146 @@ static struct sgx_encl_page *sgx_encl_load_page(struct sgx_encl *encl,
return entry;
}
+static struct sgx_encl_page *sgx_encl_load_page_in_vma(struct sgx_encl *encl,
+ unsigned long addr,
+ unsigned long vm_flags)
+{
+ unsigned long vm_prot_bits = vm_flags & (VM_READ | VM_WRITE | VM_EXEC);
+ struct sgx_encl_page *entry;
+
+ entry = xa_load(&encl->page_array, PFN_DOWN(addr));
+ if (!entry)
+ return ERR_PTR(-EFAULT);
+
+ /*
+ * Verify that the page has equal or higher build time
+ * permissions than the VMA permissions (i.e. the subset of {VM_READ,
+ * VM_WRITE, VM_EXECUTE} in vma->vm_flags).
+ */
+ if ((entry->vm_max_prot_bits & vm_prot_bits) != vm_prot_bits)
+ return ERR_PTR(-EFAULT);
+
+ return __sgx_encl_load_page(encl, entry);
+}
+
+struct sgx_encl_page *sgx_encl_load_page(struct sgx_encl *encl,
+ unsigned long addr)
+{
+ struct sgx_encl_page *entry;
+
+ entry = xa_load(&encl->page_array, PFN_DOWN(addr));
+ if (!entry)
+ return ERR_PTR(-EFAULT);
+
+ return __sgx_encl_load_page(encl, entry);
+}
+
+/**
+ * sgx_encl_eaug_page() - Dynamically add page to initialized enclave
+ * @vma: VMA obtained from fault info from where page is accessed
+ * @encl: enclave accessing the page
+ * @addr: address that triggered the page fault
+ *
+ * When an initialized enclave accesses a page with no backing EPC page
+ * on a SGX2 system then the EPC can be added dynamically via the SGX2
+ * ENCLS[EAUG] instruction.
+ *
+ * Returns: Appropriate vm_fault_t: VM_FAULT_NOPAGE when PTE was installed
+ * successfully, VM_FAULT_SIGBUS or VM_FAULT_OOM as error otherwise.
+ */
+static vm_fault_t sgx_encl_eaug_page(struct vm_area_struct *vma,
+ struct sgx_encl *encl, unsigned long addr)
+{
+ vm_fault_t vmret = VM_FAULT_SIGBUS;
+ struct sgx_pageinfo pginfo = {0};
+ struct sgx_encl_page *encl_page;
+ struct sgx_epc_page *epc_page;
+ struct sgx_va_page *va_page;
+ unsigned long phys_addr;
+ u64 secinfo_flags;
+ int ret;
+
+ if (!test_bit(SGX_ENCL_INITIALIZED, &encl->flags))
+ return VM_FAULT_SIGBUS;
+
+ /*
+ * Ignore internal permission checking for dynamically added pages.
+ * They matter only for data added during the pre-initialization
+ * phase. The enclave decides the permissions by the means of
+ * EACCEPT, EACCEPTCOPY and EMODPE.
+ */
+ secinfo_flags = SGX_SECINFO_R | SGX_SECINFO_W | SGX_SECINFO_X;
+ encl_page = sgx_encl_page_alloc(encl, addr - encl->base, secinfo_flags);
+ if (IS_ERR(encl_page))
+ return VM_FAULT_OOM;
+
+ mutex_lock(&encl->lock);
+
+ epc_page = sgx_alloc_epc_page(encl_page, false);
+ if (IS_ERR(epc_page)) {
+ if (PTR_ERR(epc_page) == -EBUSY)
+ vmret = VM_FAULT_NOPAGE;
+ goto err_out_unlock;
+ }
+
+ va_page = sgx_encl_grow(encl, false);
+ if (IS_ERR(va_page))
+ goto err_out_epc;
+
+ if (va_page)
+ list_add(&va_page->list, &encl->va_pages);
+
+ ret = xa_insert(&encl->page_array, PFN_DOWN(encl_page->desc),
+ encl_page, GFP_KERNEL);
+ /*
+ * If ret == -EBUSY then page was created in another flow while
+ * running without encl->lock
+ */
+ if (ret)
+ goto err_out_shrink;
+
+ pginfo.secs = (unsigned long)sgx_get_epc_virt_addr(encl->secs.epc_page);
+ pginfo.addr = encl_page->desc & PAGE_MASK;
+ pginfo.metadata = 0;
+
+ ret = __eaug(&pginfo, sgx_get_epc_virt_addr(epc_page));
+ if (ret)
+ goto err_out;
+
+ encl_page->encl = encl;
+ encl_page->epc_page = epc_page;
+ encl_page->type = SGX_PAGE_TYPE_REG;
+ encl->secs_child_cnt++;
+
+ sgx_mark_page_reclaimable(encl_page->epc_page);
+
+ phys_addr = sgx_get_epc_phys_addr(epc_page);
+ /*
+ * Do not undo everything when creating PTE entry fails - next #PF
+ * would find page ready for a PTE.
+ */
+ vmret = vmf_insert_pfn(vma, addr, PFN_DOWN(phys_addr));
+ if (vmret != VM_FAULT_NOPAGE) {
+ mutex_unlock(&encl->lock);
+ return VM_FAULT_SIGBUS;
+ }
+ mutex_unlock(&encl->lock);
+ return VM_FAULT_NOPAGE;
+
+err_out:
+ xa_erase(&encl->page_array, PFN_DOWN(encl_page->desc));
+
+err_out_shrink:
+ sgx_encl_shrink(encl, va_page);
+err_out_epc:
+ sgx_encl_free_epc_page(epc_page);
+err_out_unlock:
+ mutex_unlock(&encl->lock);
+ kfree(encl_page);
+
+ return vmret;
+}
+
static vm_fault_t sgx_vma_fault(struct vm_fault *vmf)
{
unsigned long addr = (unsigned long)vmf->address;
@@ -295,9 +420,20 @@ static vm_fault_t sgx_vma_fault(struct vm_fault *vmf)
if (unlikely(!encl))
return VM_FAULT_SIGBUS;
+ /*
+ * The page_array keeps track of all enclave pages, whether they
+ * are swapped out or not. If there is no entry for this page and
+ * the system supports SGX2 then it is possible to dynamically add
+ * a new enclave page. This is only possible for an initialized
+ * enclave that will be checked for right away.
+ */
+ if (cpu_feature_enabled(X86_FEATURE_SGX2) &&
+ (!xa_load(&encl->page_array, PFN_DOWN(addr))))
+ return sgx_encl_eaug_page(vma, encl, addr);
+
mutex_lock(&encl->lock);
- entry = sgx_encl_load_page(encl, addr, vma->vm_flags);
+ entry = sgx_encl_load_page_in_vma(encl, addr, vma->vm_flags);
if (IS_ERR(entry)) {
mutex_unlock(&encl->lock);
@@ -367,6 +503,11 @@ int sgx_encl_may_map(struct sgx_encl *encl, unsigned long start,
XA_STATE(xas, &encl->page_array, PFN_DOWN(start));
+ /* Disallow mapping outside enclave's address range. */
+ if (test_bit(SGX_ENCL_INITIALIZED, &encl->flags) &&
+ (start < encl->base || end > encl->base + encl->size))
+ return -EACCES;
+
/*
* Disallow READ_IMPLIES_EXEC tasks as their VMA permissions might
* conflict with the enclave page permissions.
@@ -445,7 +586,7 @@ static struct sgx_encl_page *sgx_encl_reserve_page(struct sgx_encl *encl,
for ( ; ; ) {
mutex_lock(&encl->lock);
- entry = sgx_encl_load_page(encl, addr, vm_flags);
+ entry = sgx_encl_load_page_in_vma(encl, addr, vm_flags);
if (PTR_ERR(entry) != -EBUSY)
break;
@@ -687,7 +828,7 @@ int sgx_encl_mm_add(struct sgx_encl *encl, struct mm_struct *mm)
spin_lock(&encl->mm_lock);
list_add_rcu(&encl_mm->list, &encl->mm_list);
- /* Pairs with smp_rmb() in sgx_reclaimer_block(). */
+ /* Pairs with smp_rmb() in sgx_zap_enclave_ptes(). */
smp_wmb();
encl->mm_list_version++;
spin_unlock(&encl->mm_lock);
@@ -695,6 +836,73 @@ int sgx_encl_mm_add(struct sgx_encl *encl, struct mm_struct *mm)
return 0;
}
+/**
+ * sgx_encl_cpumask() - Query which CPUs might be accessing the enclave
+ * @encl: the enclave
+ *
+ * Some SGX functions require that no cached linear-to-physical address
+ * mappings are present before they can succeed. For example, ENCLS[EWB]
+ * copies a page from the enclave page cache to regular main memory but
+ * it fails if it cannot ensure that there are no cached
+ * linear-to-physical address mappings referring to the page.
+ *
+ * SGX hardware flushes all cached linear-to-physical mappings on a CPU
+ * when an enclave is exited via ENCLU[EEXIT] or an Asynchronous Enclave
+ * Exit (AEX). Exiting an enclave will thus ensure cached linear-to-physical
+ * address mappings are cleared but coordination with the tracking done within
+ * the SGX hardware is needed to support the SGX functions that depend on this
+ * cache clearing.
+ *
+ * When the ENCLS[ETRACK] function is issued on an enclave the hardware
+ * tracks threads operating inside the enclave at that time. The SGX
+ * hardware tracking require that all the identified threads must have
+ * exited the enclave in order to flush the mappings before a function such
+ * as ENCLS[EWB] will be permitted
+ *
+ * The following flow is used to support SGX functions that require that
+ * no cached linear-to-physical address mappings are present:
+ * 1) Execute ENCLS[ETRACK] to initiate hardware tracking.
+ * 2) Use this function (sgx_encl_cpumask()) to query which CPUs might be
+ * accessing the enclave.
+ * 3) Send IPI to identified CPUs, kicking them out of the enclave and
+ * thus flushing all locally cached linear-to-physical address mappings.
+ * 4) Execute SGX function.
+ *
+ * Context: It is required to call this function after ENCLS[ETRACK].
+ * This will ensure that if any new mm appears (racing with
+ * sgx_encl_mm_add()) then the new mm will enter into the
+ * enclave with fresh linear-to-physical address mappings.
+ *
+ * It is required that all IPIs are completed before a new
+ * ENCLS[ETRACK] is issued so be sure to protect steps 1 to 3
+ * of the above flow with the enclave's mutex.
+ *
+ * Return: cpumask of CPUs that might be accessing @encl
+ */
+const cpumask_t *sgx_encl_cpumask(struct sgx_encl *encl)
+{
+ cpumask_t *cpumask = &encl->cpumask;
+ struct sgx_encl_mm *encl_mm;
+ int idx;
+
+ cpumask_clear(cpumask);
+
+ idx = srcu_read_lock(&encl->srcu);
+
+ list_for_each_entry_rcu(encl_mm, &encl->mm_list, list) {
+ if (!mmget_not_zero(encl_mm->mm))
+ continue;
+
+ cpumask_or(cpumask, cpumask, mm_cpumask(encl_mm->mm));
+
+ mmput_async(encl_mm->mm);
+ }
+
+ srcu_read_unlock(&encl->srcu, idx);
+
+ return cpumask;
+}
+
static struct page *sgx_encl_get_backing_page(struct sgx_encl *encl,
pgoff_t index)
{
@@ -735,7 +943,6 @@ static int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index,
return PTR_ERR(pcmd);
}
- backing->page_index = page_index;
backing->contents = contents;
backing->pcmd = pcmd;
backing->pcmd_offset = page_pcmd_off & (PAGE_SIZE - 1);
@@ -902,8 +1109,85 @@ int sgx_encl_test_and_clear_young(struct mm_struct *mm,
return ret;
}
+struct sgx_encl_page *sgx_encl_page_alloc(struct sgx_encl *encl,
+ unsigned long offset,
+ u64 secinfo_flags)
+{
+ struct sgx_encl_page *encl_page;
+ unsigned long prot;
+
+ encl_page = kzalloc(sizeof(*encl_page), GFP_KERNEL);
+ if (!encl_page)
+ return ERR_PTR(-ENOMEM);
+
+ encl_page->desc = encl->base + offset;
+ encl_page->encl = encl;
+
+ prot = _calc_vm_trans(secinfo_flags, SGX_SECINFO_R, PROT_READ) |
+ _calc_vm_trans(secinfo_flags, SGX_SECINFO_W, PROT_WRITE) |
+ _calc_vm_trans(secinfo_flags, SGX_SECINFO_X, PROT_EXEC);
+
+ /*
+ * TCS pages must always RW set for CPU access while the SECINFO
+ * permissions are *always* zero - the CPU ignores the user provided
+ * values and silently overwrites them with zero permissions.
+ */
+ if ((secinfo_flags & SGX_SECINFO_PAGE_TYPE_MASK) == SGX_SECINFO_TCS)
+ prot |= PROT_READ | PROT_WRITE;
+
+ /* Calculate maximum of the VM flags for the page. */
+ encl_page->vm_max_prot_bits = calc_vm_prot_bits(prot, 0);
+
+ return encl_page;
+}
+
+/**
+ * sgx_zap_enclave_ptes() - remove PTEs mapping the address from enclave
+ * @encl: the enclave
+ * @addr: page aligned pointer to single page for which PTEs will be removed
+ *
+ * Multiple VMAs may have an enclave page mapped. Remove the PTE mapping
+ * @addr from each VMA. Ensure that page fault handler is ready to handle
+ * new mappings of @addr before calling this function.
+ */
+void sgx_zap_enclave_ptes(struct sgx_encl *encl, unsigned long addr)
+{
+ unsigned long mm_list_version;
+ struct sgx_encl_mm *encl_mm;
+ struct vm_area_struct *vma;
+ int idx, ret;
+
+ do {
+ mm_list_version = encl->mm_list_version;
+
+ /* Pairs with smp_wmb() in sgx_encl_mm_add(). */
+ smp_rmb();
+
+ idx = srcu_read_lock(&encl->srcu);
+
+ list_for_each_entry_rcu(encl_mm, &encl->mm_list, list) {
+ if (!mmget_not_zero(encl_mm->mm))
+ continue;
+
+ mmap_read_lock(encl_mm->mm);
+
+ ret = sgx_encl_find(encl_mm->mm, addr, &vma);
+ if (!ret && encl == vma->vm_private_data)
+ zap_vma_ptes(vma, addr, PAGE_SIZE);
+
+ mmap_read_unlock(encl_mm->mm);
+
+ mmput_async(encl_mm->mm);
+ }
+
+ srcu_read_unlock(&encl->srcu, idx);
+ } while (unlikely(encl->mm_list_version != mm_list_version));
+}
+
/**
* sgx_alloc_va_page() - Allocate a Version Array (VA) page
+ * @reclaim: Reclaim EPC pages directly if none available. Enclave
+ * mutex should not be held if this is set.
*
* Allocate a free EPC page and convert it to a Version Array (VA) page.
*
@@ -911,12 +1195,12 @@ int sgx_encl_test_and_clear_young(struct mm_struct *mm,
* a VA page,
* -errno otherwise
*/
-struct sgx_epc_page *sgx_alloc_va_page(void)
+struct sgx_epc_page *sgx_alloc_va_page(bool reclaim)
{
struct sgx_epc_page *epc_page;
int ret;
- epc_page = sgx_alloc_epc_page(NULL, true);
+ epc_page = sgx_alloc_epc_page(NULL, reclaim);
if (IS_ERR(epc_page))
return ERR_CAST(epc_page);
diff --git a/arch/x86/kernel/cpu/sgx/encl.h b/arch/x86/kernel/cpu/sgx/encl.h
index 332ef3568267..a65a952116fd 100644
--- a/arch/x86/kernel/cpu/sgx/encl.h
+++ b/arch/x86/kernel/cpu/sgx/encl.h
@@ -27,7 +27,8 @@
struct sgx_encl_page {
unsigned long desc;
- unsigned long vm_max_prot_bits;
+ unsigned long vm_max_prot_bits:8;
+ enum sgx_page_type type:16;
struct sgx_epc_page *epc_page;
struct sgx_encl *encl;
struct sgx_va_page *va_page;
@@ -78,7 +79,6 @@ struct sgx_va_page {
};
struct sgx_backing {
- pgoff_t page_index;
struct page *contents;
struct page *pcmd;
unsigned long pcmd_offset;
@@ -106,6 +106,7 @@ int sgx_encl_may_map(struct sgx_encl *encl, unsigned long start,
bool current_is_ksgxd(void);
void sgx_encl_release(struct kref *ref);
int sgx_encl_mm_add(struct sgx_encl *encl, struct mm_struct *mm);
+const cpumask_t *sgx_encl_cpumask(struct sgx_encl *encl);
int sgx_encl_lookup_backing(struct sgx_encl *encl, unsigned long page_index,
struct sgx_backing *backing);
int sgx_encl_alloc_backing(struct sgx_encl *encl, unsigned long page_index,
@@ -113,11 +114,18 @@ int sgx_encl_alloc_backing(struct sgx_encl *encl, unsigned long page_index,
void sgx_encl_put_backing(struct sgx_backing *backing);
int sgx_encl_test_and_clear_young(struct mm_struct *mm,
struct sgx_encl_page *page);
-
-struct sgx_epc_page *sgx_alloc_va_page(void);
+struct sgx_encl_page *sgx_encl_page_alloc(struct sgx_encl *encl,
+ unsigned long offset,
+ u64 secinfo_flags);
+void sgx_zap_enclave_ptes(struct sgx_encl *encl, unsigned long addr);
+struct sgx_epc_page *sgx_alloc_va_page(bool reclaim);
unsigned int sgx_alloc_va_slot(struct sgx_va_page *va_page);
void sgx_free_va_slot(struct sgx_va_page *va_page, unsigned int offset);
bool sgx_va_page_full(struct sgx_va_page *va_page);
void sgx_encl_free_epc_page(struct sgx_epc_page *page);
+struct sgx_encl_page *sgx_encl_load_page(struct sgx_encl *encl,
+ unsigned long addr);
+struct sgx_va_page *sgx_encl_grow(struct sgx_encl *encl, bool reclaim);
+void sgx_encl_shrink(struct sgx_encl *encl, struct sgx_va_page *va_page);
#endif /* _X86_ENCL_H */
diff --git a/arch/x86/kernel/cpu/sgx/encls.h b/arch/x86/kernel/cpu/sgx/encls.h
index fa04a73daf9c..99004b02e2ed 100644
--- a/arch/x86/kernel/cpu/sgx/encls.h
+++ b/arch/x86/kernel/cpu/sgx/encls.h
@@ -136,57 +136,71 @@ static inline bool encls_failed(int ret)
ret; \
})
+/* Initialize an EPC page into an SGX Enclave Control Structure (SECS) page. */
static inline int __ecreate(struct sgx_pageinfo *pginfo, void *secs)
{
return __encls_2(ECREATE, pginfo, secs);
}
+/* Hash a 256 byte region of an enclave page to SECS:MRENCLAVE. */
static inline int __eextend(void *secs, void *addr)
{
return __encls_2(EEXTEND, secs, addr);
}
+/*
+ * Associate an EPC page to an enclave either as a REG or TCS page
+ * populated with the provided data.
+ */
static inline int __eadd(struct sgx_pageinfo *pginfo, void *addr)
{
return __encls_2(EADD, pginfo, addr);
}
+/* Finalize enclave build, initialize enclave for user code execution. */
static inline int __einit(void *sigstruct, void *token, void *secs)
{
return __encls_ret_3(EINIT, sigstruct, secs, token);
}
+/* Disassociate EPC page from its enclave and mark it as unused. */
static inline int __eremove(void *addr)
{
return __encls_ret_1(EREMOVE, addr);
}
+/* Copy data to an EPC page belonging to a debug enclave. */
static inline int __edbgwr(void *addr, unsigned long *data)
{
return __encls_2(EDGBWR, *data, addr);
}
+/* Copy data from an EPC page belonging to a debug enclave. */
static inline int __edbgrd(void *addr, unsigned long *data)
{
return __encls_1_1(EDGBRD, *data, addr);
}
+/* Track that software has completed the required TLB address clears. */
static inline int __etrack(void *addr)
{
return __encls_ret_1(ETRACK, addr);
}
+/* Load, verify, and unblock an EPC page. */
static inline int __eldu(struct sgx_pageinfo *pginfo, void *addr,
void *va)
{
return __encls_ret_3(ELDU, pginfo, addr, va);
}
+/* Make EPC page inaccessible to enclave, ready to be written to memory. */
static inline int __eblock(void *addr)
{
return __encls_ret_1(EBLOCK, addr);
}
+/* Initialize an EPC page into a Version Array (VA) page. */
static inline int __epa(void *addr)
{
unsigned long rbx = SGX_PAGE_TYPE_VA;
@@ -194,10 +208,29 @@ static inline int __epa(void *addr)
return __encls_2(EPA, rbx, addr);
}
+/* Invalidate an EPC page and write it out to main memory. */
static inline int __ewb(struct sgx_pageinfo *pginfo, void *addr,
void *va)
{
return __encls_ret_3(EWB, pginfo, addr, va);
}
+/* Restrict the EPCM permissions of an EPC page. */
+static inline int __emodpr(struct sgx_secinfo *secinfo, void *addr)
+{
+ return __encls_ret_2(EMODPR, secinfo, addr);
+}
+
+/* Change the type of an EPC page. */
+static inline int __emodt(struct sgx_secinfo *secinfo, void *addr)
+{
+ return __encls_ret_2(EMODT, secinfo, addr);
+}
+
+/* Zero a page of EPC memory and add it to an initialized enclave. */
+static inline int __eaug(struct sgx_pageinfo *pginfo, void *addr)
+{
+ return __encls_2(EAUG, pginfo, addr);
+}
+
#endif /* _X86_ENCLS_H */
diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
index 83df20e3e633..ebe79d60619f 100644
--- a/arch/x86/kernel/cpu/sgx/ioctl.c
+++ b/arch/x86/kernel/cpu/sgx/ioctl.c
@@ -17,7 +17,7 @@
#include "encl.h"
#include "encls.h"
-static struct sgx_va_page *sgx_encl_grow(struct sgx_encl *encl)
+struct sgx_va_page *sgx_encl_grow(struct sgx_encl *encl, bool reclaim)
{
struct sgx_va_page *va_page = NULL;
void *err;
@@ -30,7 +30,7 @@ static struct sgx_va_page *sgx_encl_grow(struct sgx_encl *encl)
if (!va_page)
return ERR_PTR(-ENOMEM);
- va_page->epc_page = sgx_alloc_va_page();
+ va_page->epc_page = sgx_alloc_va_page(reclaim);
if (IS_ERR(va_page->epc_page)) {
err = ERR_CAST(va_page->epc_page);
kfree(va_page);
@@ -43,7 +43,7 @@ static struct sgx_va_page *sgx_encl_grow(struct sgx_encl *encl)
return va_page;
}
-static void sgx_encl_shrink(struct sgx_encl *encl, struct sgx_va_page *va_page)
+void sgx_encl_shrink(struct sgx_encl *encl, struct sgx_va_page *va_page)
{
encl->page_cnt--;
@@ -64,7 +64,7 @@ static int sgx_encl_create(struct sgx_encl *encl, struct sgx_secs *secs)
struct file *backing;
long ret;
- va_page = sgx_encl_grow(encl);
+ va_page = sgx_encl_grow(encl, true);
if (IS_ERR(va_page))
return PTR_ERR(va_page);
else if (va_page)
@@ -107,6 +107,7 @@ static int sgx_encl_create(struct sgx_encl *encl, struct sgx_secs *secs)
set_bit(SGX_ENCL_DEBUG, &encl->flags);
encl->secs.encl = encl;
+ encl->secs.type = SGX_PAGE_TYPE_SECS;
encl->base = secs->base;
encl->size = secs->size;
encl->attributes = secs->attributes;
@@ -168,38 +169,6 @@ static long sgx_ioc_enclave_create(struct sgx_encl *encl, void __user *arg)
return ret;
}
-static struct sgx_encl_page *sgx_encl_page_alloc(struct sgx_encl *encl,
- unsigned long offset,
- u64 secinfo_flags)
-{
- struct sgx_encl_page *encl_page;
- unsigned long prot;
-
- encl_page = kzalloc(sizeof(*encl_page), GFP_KERNEL);
- if (!encl_page)
- return ERR_PTR(-ENOMEM);
-
- encl_page->desc = encl->base + offset;
- encl_page->encl = encl;
-
- prot = _calc_vm_trans(secinfo_flags, SGX_SECINFO_R, PROT_READ) |
- _calc_vm_trans(secinfo_flags, SGX_SECINFO_W, PROT_WRITE) |
- _calc_vm_trans(secinfo_flags, SGX_SECINFO_X, PROT_EXEC);
-
- /*
- * TCS pages must always RW set for CPU access while the SECINFO
- * permissions are *always* zero - the CPU ignores the user provided
- * values and silently overwrites them with zero permissions.
- */
- if ((secinfo_flags & SGX_SECINFO_PAGE_TYPE_MASK) == SGX_SECINFO_TCS)
- prot |= PROT_READ | PROT_WRITE;
-
- /* Calculate maximum of the VM flags for the page. */
- encl_page->vm_max_prot_bits = calc_vm_prot_bits(prot, 0);
-
- return encl_page;
-}
-
static int sgx_validate_secinfo(struct sgx_secinfo *secinfo)
{
u64 perm = secinfo->flags & SGX_SECINFO_PERMISSION_MASK;
@@ -306,7 +275,7 @@ static int sgx_encl_add_page(struct sgx_encl *encl, unsigned long src,
return PTR_ERR(epc_page);
}
- va_page = sgx_encl_grow(encl);
+ va_page = sgx_encl_grow(encl, true);
if (IS_ERR(va_page)) {
ret = PTR_ERR(va_page);
goto err_out_free;
@@ -344,6 +313,7 @@ static int sgx_encl_add_page(struct sgx_encl *encl, unsigned long src,
*/
encl_page->encl = encl;
encl_page->epc_page = epc_page;
+ encl_page->type = (secinfo->flags & SGX_SECINFO_PAGE_TYPE_MASK) >> 8;
encl->secs_child_cnt++;
if (flags & SGX_PAGE_MEASURE) {
@@ -372,6 +342,26 @@ err_out_free:
return ret;
}
+/*
+ * Ensure user provided offset and length values are valid for
+ * an enclave.
+ */
+static int sgx_validate_offset_length(struct sgx_encl *encl,
+ unsigned long offset,
+ unsigned long length)
+{
+ if (!IS_ALIGNED(offset, PAGE_SIZE))
+ return -EINVAL;
+
+ if (!length || !IS_ALIGNED(length, PAGE_SIZE))
+ return -EINVAL;
+
+ if (offset + length - PAGE_SIZE >= encl->size)
+ return -EINVAL;
+
+ return 0;
+}
+
/**
* sgx_ioc_enclave_add_pages() - The handler for %SGX_IOC_ENCLAVE_ADD_PAGES
* @encl: an enclave pointer
@@ -425,14 +415,10 @@ static long sgx_ioc_enclave_add_pages(struct sgx_encl *encl, void __user *arg)
if (copy_from_user(&add_arg, arg, sizeof(add_arg)))
return -EFAULT;
- if (!IS_ALIGNED(add_arg.offset, PAGE_SIZE) ||
- !IS_ALIGNED(add_arg.src, PAGE_SIZE))
- return -EINVAL;
-
- if (!add_arg.length || add_arg.length & (PAGE_SIZE - 1))
+ if (!IS_ALIGNED(add_arg.src, PAGE_SIZE))
return -EINVAL;
- if (add_arg.offset + add_arg.length - PAGE_SIZE >= encl->size)
+ if (sgx_validate_offset_length(encl, add_arg.offset, add_arg.length))
return -EINVAL;
if (copy_from_user(&secinfo, (void __user *)add_arg.secinfo,
@@ -674,6 +660,565 @@ static long sgx_ioc_enclave_provision(struct sgx_encl *encl, void __user *arg)
return sgx_set_attribute(&encl->attributes_mask, params.fd);
}
+/*
+ * Ensure enclave is ready for SGX2 functions. Readiness is checked
+ * by ensuring the hardware supports SGX2 and the enclave is initialized
+ * and thus able to handle requests to modify pages within it.
+ */
+static int sgx_ioc_sgx2_ready(struct sgx_encl *encl)
+{
+ if (!(cpu_feature_enabled(X86_FEATURE_SGX2)))
+ return -ENODEV;
+
+ if (!test_bit(SGX_ENCL_INITIALIZED, &encl->flags))
+ return -EINVAL;
+
+ return 0;
+}
+
+/*
+ * Some SGX functions require that no cached linear-to-physical address
+ * mappings are present before they can succeed. Collaborate with
+ * hardware via ENCLS[ETRACK] to ensure that all cached
+ * linear-to-physical address mappings belonging to all threads of
+ * the enclave are cleared. See sgx_encl_cpumask() for details.
+ *
+ * Must be called with enclave's mutex held from the time the
+ * SGX function requiring that no cached linear-to-physical mappings
+ * are present is executed until this ETRACK flow is complete.
+ */
+static int sgx_enclave_etrack(struct sgx_encl *encl)
+{
+ void *epc_virt;
+ int ret;
+
+ epc_virt = sgx_get_epc_virt_addr(encl->secs.epc_page);
+ ret = __etrack(epc_virt);
+ if (ret) {
+ /*
+ * ETRACK only fails when there is an OS issue. For
+ * example, two consecutive ETRACK was sent without
+ * completed IPI between.
+ */
+ pr_err_once("ETRACK returned %d (0x%x)", ret, ret);
+ /*
+ * Send IPIs to kick CPUs out of the enclave and
+ * try ETRACK again.
+ */
+ on_each_cpu_mask(sgx_encl_cpumask(encl), sgx_ipi_cb, NULL, 1);
+ ret = __etrack(epc_virt);
+ if (ret) {
+ pr_err_once("ETRACK repeat returned %d (0x%x)",
+ ret, ret);
+ return -EFAULT;
+ }
+ }
+ on_each_cpu_mask(sgx_encl_cpumask(encl), sgx_ipi_cb, NULL, 1);
+
+ return 0;
+}
+
+/**
+ * sgx_enclave_restrict_permissions() - Restrict EPCM permissions
+ * @encl: Enclave to which the pages belong.
+ * @modp: Checked parameters from user on which pages need modifying and
+ * their new permissions.
+ *
+ * Return:
+ * - 0: Success.
+ * - -errno: Otherwise.
+ */
+static long
+sgx_enclave_restrict_permissions(struct sgx_encl *encl,
+ struct sgx_enclave_restrict_permissions *modp)
+{
+ struct sgx_encl_page *entry;
+ struct sgx_secinfo secinfo;
+ unsigned long addr;
+ unsigned long c;
+ void *epc_virt;
+ int ret;
+
+ memset(&secinfo, 0, sizeof(secinfo));
+ secinfo.flags = modp->permissions & SGX_SECINFO_PERMISSION_MASK;
+
+ for (c = 0 ; c < modp->length; c += PAGE_SIZE) {
+ addr = encl->base + modp->offset + c;
+
+ sgx_reclaim_direct();
+
+ mutex_lock(&encl->lock);
+
+ entry = sgx_encl_load_page(encl, addr);
+ if (IS_ERR(entry)) {
+ ret = PTR_ERR(entry) == -EBUSY ? -EAGAIN : -EFAULT;
+ goto out_unlock;
+ }
+
+ /*
+ * Changing EPCM permissions is only supported on regular
+ * SGX pages. Attempting this change on other pages will
+ * result in #PF.
+ */
+ if (entry->type != SGX_PAGE_TYPE_REG) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ /*
+ * Apart from ensuring that read-access remains, do not verify
+ * the permission bits requested. Kernel has no control over
+ * how EPCM permissions can be relaxed from within the enclave.
+ * ENCLS[EMODPR] can only remove existing EPCM permissions,
+ * attempting to set new permissions will be ignored by the
+ * hardware.
+ */
+
+ /* Change EPCM permissions. */
+ epc_virt = sgx_get_epc_virt_addr(entry->epc_page);
+ ret = __emodpr(&secinfo, epc_virt);
+ if (encls_faulted(ret)) {
+ /*
+ * All possible faults should be avoidable:
+ * parameters have been checked, will only change
+ * permissions of a regular page, and no concurrent
+ * SGX1/SGX2 ENCLS instructions since these
+ * are protected with mutex.
+ */
+ pr_err_once("EMODPR encountered exception %d\n",
+ ENCLS_TRAPNR(ret));
+ ret = -EFAULT;
+ goto out_unlock;
+ }
+ if (encls_failed(ret)) {
+ modp->result = ret;
+ ret = -EFAULT;
+ goto out_unlock;
+ }
+
+ ret = sgx_enclave_etrack(encl);
+ if (ret) {
+ ret = -EFAULT;
+ goto out_unlock;
+ }
+
+ mutex_unlock(&encl->lock);
+ }
+
+ ret = 0;
+ goto out;
+
+out_unlock:
+ mutex_unlock(&encl->lock);
+out:
+ modp->count = c;
+
+ return ret;
+}
+
+/**
+ * sgx_ioc_enclave_restrict_permissions() - handler for
+ * %SGX_IOC_ENCLAVE_RESTRICT_PERMISSIONS
+ * @encl: an enclave pointer
+ * @arg: userspace pointer to a &struct sgx_enclave_restrict_permissions
+ * instance
+ *
+ * SGX2 distinguishes between relaxing and restricting the enclave page
+ * permissions maintained by the hardware (EPCM permissions) of pages
+ * belonging to an initialized enclave (after SGX_IOC_ENCLAVE_INIT).
+ *
+ * EPCM permissions cannot be restricted from within the enclave, the enclave
+ * requires the kernel to run the privileged level 0 instructions ENCLS[EMODPR]
+ * and ENCLS[ETRACK]. An attempt to relax EPCM permissions with this call
+ * will be ignored by the hardware.
+ *
+ * Return:
+ * - 0: Success
+ * - -errno: Otherwise
+ */
+static long sgx_ioc_enclave_restrict_permissions(struct sgx_encl *encl,
+ void __user *arg)
+{
+ struct sgx_enclave_restrict_permissions params;
+ long ret;
+
+ ret = sgx_ioc_sgx2_ready(encl);
+ if (ret)
+ return ret;
+
+ if (copy_from_user(&params, arg, sizeof(params)))
+ return -EFAULT;
+
+ if (sgx_validate_offset_length(encl, params.offset, params.length))
+ return -EINVAL;
+
+ if (params.permissions & ~SGX_SECINFO_PERMISSION_MASK)
+ return -EINVAL;
+
+ /*
+ * Fail early if invalid permissions requested to prevent ENCLS[EMODPR]
+ * from faulting later when the CPU does the same check.
+ */
+ if ((params.permissions & SGX_SECINFO_W) &&
+ !(params.permissions & SGX_SECINFO_R))
+ return -EINVAL;
+
+ if (params.result || params.count)
+ return -EINVAL;
+
+ ret = sgx_enclave_restrict_permissions(encl, &params);
+
+ if (copy_to_user(arg, &params, sizeof(params)))
+ return -EFAULT;
+
+ return ret;
+}
+
+/**
+ * sgx_enclave_modify_types() - Modify type of SGX enclave pages
+ * @encl: Enclave to which the pages belong.
+ * @modt: Checked parameters from user about which pages need modifying
+ * and their new page type.
+ *
+ * Return:
+ * - 0: Success
+ * - -errno: Otherwise
+ */
+static long sgx_enclave_modify_types(struct sgx_encl *encl,
+ struct sgx_enclave_modify_types *modt)
+{
+ unsigned long max_prot_restore;
+ enum sgx_page_type page_type;
+ struct sgx_encl_page *entry;
+ struct sgx_secinfo secinfo;
+ unsigned long prot;
+ unsigned long addr;
+ unsigned long c;
+ void *epc_virt;
+ int ret;
+
+ page_type = modt->page_type & SGX_PAGE_TYPE_MASK;
+
+ /*
+ * The only new page types allowed by hardware are PT_TCS and PT_TRIM.
+ */
+ if (page_type != SGX_PAGE_TYPE_TCS && page_type != SGX_PAGE_TYPE_TRIM)
+ return -EINVAL;
+
+ memset(&secinfo, 0, sizeof(secinfo));
+
+ secinfo.flags = page_type << 8;
+
+ for (c = 0 ; c < modt->length; c += PAGE_SIZE) {
+ addr = encl->base + modt->offset + c;
+
+ sgx_reclaim_direct();
+
+ mutex_lock(&encl->lock);
+
+ entry = sgx_encl_load_page(encl, addr);
+ if (IS_ERR(entry)) {
+ ret = PTR_ERR(entry) == -EBUSY ? -EAGAIN : -EFAULT;
+ goto out_unlock;
+ }
+
+ /*
+ * Borrow the logic from the Intel SDM. Regular pages
+ * (SGX_PAGE_TYPE_REG) can change type to SGX_PAGE_TYPE_TCS
+ * or SGX_PAGE_TYPE_TRIM but TCS pages can only be trimmed.
+ * CET pages not supported yet.
+ */
+ if (!(entry->type == SGX_PAGE_TYPE_REG ||
+ (entry->type == SGX_PAGE_TYPE_TCS &&
+ page_type == SGX_PAGE_TYPE_TRIM))) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ max_prot_restore = entry->vm_max_prot_bits;
+
+ /*
+ * Once a regular page becomes a TCS page it cannot be
+ * changed back. So the maximum allowed protection reflects
+ * the TCS page that is always RW from kernel perspective but
+ * will be inaccessible from within enclave. Before doing
+ * so, do make sure that the new page type continues to
+ * respect the originally vetted page permissions.
+ */
+ if (entry->type == SGX_PAGE_TYPE_REG &&
+ page_type == SGX_PAGE_TYPE_TCS) {
+ if (~entry->vm_max_prot_bits & (VM_READ | VM_WRITE)) {
+ ret = -EPERM;
+ goto out_unlock;
+ }
+ prot = PROT_READ | PROT_WRITE;
+ entry->vm_max_prot_bits = calc_vm_prot_bits(prot, 0);
+
+ /*
+ * Prevent page from being reclaimed while mutex
+ * is released.
+ */
+ if (sgx_unmark_page_reclaimable(entry->epc_page)) {
+ ret = -EAGAIN;
+ goto out_entry_changed;
+ }
+
+ /*
+ * Do not keep encl->lock because of dependency on
+ * mmap_lock acquired in sgx_zap_enclave_ptes().
+ */
+ mutex_unlock(&encl->lock);
+
+ sgx_zap_enclave_ptes(encl, addr);
+
+ mutex_lock(&encl->lock);
+
+ sgx_mark_page_reclaimable(entry->epc_page);
+ }
+
+ /* Change EPC type */
+ epc_virt = sgx_get_epc_virt_addr(entry->epc_page);
+ ret = __emodt(&secinfo, epc_virt);
+ if (encls_faulted(ret)) {
+ /*
+ * All possible faults should be avoidable:
+ * parameters have been checked, will only change
+ * valid page types, and no concurrent
+ * SGX1/SGX2 ENCLS instructions since these are
+ * protected with mutex.
+ */
+ pr_err_once("EMODT encountered exception %d\n",
+ ENCLS_TRAPNR(ret));
+ ret = -EFAULT;
+ goto out_entry_changed;
+ }
+ if (encls_failed(ret)) {
+ modt->result = ret;
+ ret = -EFAULT;
+ goto out_entry_changed;
+ }
+
+ ret = sgx_enclave_etrack(encl);
+ if (ret) {
+ ret = -EFAULT;
+ goto out_unlock;
+ }
+
+ entry->type = page_type;
+
+ mutex_unlock(&encl->lock);
+ }
+
+ ret = 0;
+ goto out;
+
+out_entry_changed:
+ entry->vm_max_prot_bits = max_prot_restore;
+out_unlock:
+ mutex_unlock(&encl->lock);
+out:
+ modt->count = c;
+
+ return ret;
+}
+
+/**
+ * sgx_ioc_enclave_modify_types() - handler for %SGX_IOC_ENCLAVE_MODIFY_TYPES
+ * @encl: an enclave pointer
+ * @arg: userspace pointer to a &struct sgx_enclave_modify_types instance
+ *
+ * Ability to change the enclave page type supports the following use cases:
+ *
+ * * It is possible to add TCS pages to an enclave by changing the type of
+ * regular pages (%SGX_PAGE_TYPE_REG) to TCS (%SGX_PAGE_TYPE_TCS) pages.
+ * With this support the number of threads supported by an initialized
+ * enclave can be increased dynamically.
+ *
+ * * Regular or TCS pages can dynamically be removed from an initialized
+ * enclave by changing the page type to %SGX_PAGE_TYPE_TRIM. Changing the
+ * page type to %SGX_PAGE_TYPE_TRIM marks the page for removal with actual
+ * removal done by handler of %SGX_IOC_ENCLAVE_REMOVE_PAGES ioctl() called
+ * after ENCLU[EACCEPT] is run on %SGX_PAGE_TYPE_TRIM page from within the
+ * enclave.
+ *
+ * Return:
+ * - 0: Success
+ * - -errno: Otherwise
+ */
+static long sgx_ioc_enclave_modify_types(struct sgx_encl *encl,
+ void __user *arg)
+{
+ struct sgx_enclave_modify_types params;
+ long ret;
+
+ ret = sgx_ioc_sgx2_ready(encl);
+ if (ret)
+ return ret;
+
+ if (copy_from_user(&params, arg, sizeof(params)))
+ return -EFAULT;
+
+ if (sgx_validate_offset_length(encl, params.offset, params.length))
+ return -EINVAL;
+
+ if (params.page_type & ~SGX_PAGE_TYPE_MASK)
+ return -EINVAL;
+
+ if (params.result || params.count)
+ return -EINVAL;
+
+ ret = sgx_enclave_modify_types(encl, &params);
+
+ if (copy_to_user(arg, &params, sizeof(params)))
+ return -EFAULT;
+
+ return ret;
+}
+
+/**
+ * sgx_encl_remove_pages() - Remove trimmed pages from SGX enclave
+ * @encl: Enclave to which the pages belong
+ * @params: Checked parameters from user on which pages need to be removed
+ *
+ * Return:
+ * - 0: Success.
+ * - -errno: Otherwise.
+ */
+static long sgx_encl_remove_pages(struct sgx_encl *encl,
+ struct sgx_enclave_remove_pages *params)
+{
+ struct sgx_encl_page *entry;
+ struct sgx_secinfo secinfo;
+ unsigned long addr;
+ unsigned long c;
+ void *epc_virt;
+ int ret;
+
+ memset(&secinfo, 0, sizeof(secinfo));
+ secinfo.flags = SGX_SECINFO_R | SGX_SECINFO_W | SGX_SECINFO_X;
+
+ for (c = 0 ; c < params->length; c += PAGE_SIZE) {
+ addr = encl->base + params->offset + c;
+
+ sgx_reclaim_direct();
+
+ mutex_lock(&encl->lock);
+
+ entry = sgx_encl_load_page(encl, addr);
+ if (IS_ERR(entry)) {
+ ret = PTR_ERR(entry) == -EBUSY ? -EAGAIN : -EFAULT;
+ goto out_unlock;
+ }
+
+ if (entry->type != SGX_PAGE_TYPE_TRIM) {
+ ret = -EPERM;
+ goto out_unlock;
+ }
+
+ /*
+ * ENCLS[EMODPR] is a no-op instruction used to inform if
+ * ENCLU[EACCEPT] was run from within the enclave. If
+ * ENCLS[EMODPR] is run with RWX on a trimmed page that is
+ * not yet accepted then it will return
+ * %SGX_PAGE_NOT_MODIFIABLE, after the trimmed page is
+ * accepted the instruction will encounter a page fault.
+ */
+ epc_virt = sgx_get_epc_virt_addr(entry->epc_page);
+ ret = __emodpr(&secinfo, epc_virt);
+ if (!encls_faulted(ret) || ENCLS_TRAPNR(ret) != X86_TRAP_PF) {
+ ret = -EPERM;
+ goto out_unlock;
+ }
+
+ if (sgx_unmark_page_reclaimable(entry->epc_page)) {
+ ret = -EBUSY;
+ goto out_unlock;
+ }
+
+ /*
+ * Do not keep encl->lock because of dependency on
+ * mmap_lock acquired in sgx_zap_enclave_ptes().
+ */
+ mutex_unlock(&encl->lock);
+
+ sgx_zap_enclave_ptes(encl, addr);
+
+ mutex_lock(&encl->lock);
+
+ sgx_encl_free_epc_page(entry->epc_page);
+ encl->secs_child_cnt--;
+ entry->epc_page = NULL;
+ xa_erase(&encl->page_array, PFN_DOWN(entry->desc));
+ sgx_encl_shrink(encl, NULL);
+ kfree(entry);
+
+ mutex_unlock(&encl->lock);
+ }
+
+ ret = 0;
+ goto out;
+
+out_unlock:
+ mutex_unlock(&encl->lock);
+out:
+ params->count = c;
+
+ return ret;
+}
+
+/**
+ * sgx_ioc_enclave_remove_pages() - handler for %SGX_IOC_ENCLAVE_REMOVE_PAGES
+ * @encl: an enclave pointer
+ * @arg: userspace pointer to &struct sgx_enclave_remove_pages instance
+ *
+ * Final step of the flow removing pages from an initialized enclave. The
+ * complete flow is:
+ *
+ * 1) User changes the type of the pages to be removed to %SGX_PAGE_TYPE_TRIM
+ * using the %SGX_IOC_ENCLAVE_MODIFY_TYPES ioctl().
+ * 2) User approves the page removal by running ENCLU[EACCEPT] from within
+ * the enclave.
+ * 3) User initiates actual page removal using the
+ * %SGX_IOC_ENCLAVE_REMOVE_PAGES ioctl() that is handled here.
+ *
+ * First remove any page table entries pointing to the page and then proceed
+ * with the actual removal of the enclave page and data in support of it.
+ *
+ * VA pages are not affected by this removal. It is thus possible that the
+ * enclave may end up with more VA pages than needed to support all its
+ * pages.
+ *
+ * Return:
+ * - 0: Success
+ * - -errno: Otherwise
+ */
+static long sgx_ioc_enclave_remove_pages(struct sgx_encl *encl,
+ void __user *arg)
+{
+ struct sgx_enclave_remove_pages params;
+ long ret;
+
+ ret = sgx_ioc_sgx2_ready(encl);
+ if (ret)
+ return ret;
+
+ if (copy_from_user(&params, arg, sizeof(params)))
+ return -EFAULT;
+
+ if (sgx_validate_offset_length(encl, params.offset, params.length))
+ return -EINVAL;
+
+ if (params.count)
+ return -EINVAL;
+
+ ret = sgx_encl_remove_pages(encl, &params);
+
+ if (copy_to_user(arg, &params, sizeof(params)))
+ return -EFAULT;
+
+ return ret;
+}
+
long sgx_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
{
struct sgx_encl *encl = filep->private_data;
@@ -695,6 +1240,16 @@ long sgx_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
case SGX_IOC_ENCLAVE_PROVISION:
ret = sgx_ioc_enclave_provision(encl, (void __user *)arg);
break;
+ case SGX_IOC_ENCLAVE_RESTRICT_PERMISSIONS:
+ ret = sgx_ioc_enclave_restrict_permissions(encl,
+ (void __user *)arg);
+ break;
+ case SGX_IOC_ENCLAVE_MODIFY_TYPES:
+ ret = sgx_ioc_enclave_modify_types(encl, (void __user *)arg);
+ break;
+ case SGX_IOC_ENCLAVE_REMOVE_PAGES:
+ ret = sgx_ioc_enclave_remove_pages(encl, (void __user *)arg);
+ break;
default:
ret = -ENOIOCTLCMD;
break;
diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
index a78652d43e61..515e2a5f25bb 100644
--- a/arch/x86/kernel/cpu/sgx/main.c
+++ b/arch/x86/kernel/cpu/sgx/main.c
@@ -137,36 +137,9 @@ static void sgx_reclaimer_block(struct sgx_epc_page *epc_page)
struct sgx_encl_page *page = epc_page->owner;
unsigned long addr = page->desc & PAGE_MASK;
struct sgx_encl *encl = page->encl;
- unsigned long mm_list_version;
- struct sgx_encl_mm *encl_mm;
- struct vm_area_struct *vma;
- int idx, ret;
-
- do {
- mm_list_version = encl->mm_list_version;
-
- /* Pairs with smp_rmb() in sgx_encl_mm_add(). */
- smp_rmb();
-
- idx = srcu_read_lock(&encl->srcu);
-
- list_for_each_entry_rcu(encl_mm, &encl->mm_list, list) {
- if (!mmget_not_zero(encl_mm->mm))
- continue;
-
- mmap_read_lock(encl_mm->mm);
-
- ret = sgx_encl_find(encl_mm->mm, addr, &vma);
- if (!ret && encl == vma->vm_private_data)
- zap_vma_ptes(vma, addr, PAGE_SIZE);
-
- mmap_read_unlock(encl_mm->mm);
-
- mmput_async(encl_mm->mm);
- }
+ int ret;
- srcu_read_unlock(&encl->srcu, idx);
- } while (unlikely(encl->mm_list_version != mm_list_version));
+ sgx_zap_enclave_ptes(encl, addr);
mutex_lock(&encl->lock);
@@ -201,39 +174,10 @@ static int __sgx_encl_ewb(struct sgx_epc_page *epc_page, void *va_slot,
return ret;
}
-static void sgx_ipi_cb(void *info)
+void sgx_ipi_cb(void *info)
{
}
-static const cpumask_t *sgx_encl_ewb_cpumask(struct sgx_encl *encl)
-{
- cpumask_t *cpumask = &encl->cpumask;
- struct sgx_encl_mm *encl_mm;
- int idx;
-
- /*
- * Can race with sgx_encl_mm_add(), but ETRACK has already been
- * executed, which means that the CPUs running in the new mm will enter
- * into the enclave with a fresh epoch.
- */
- cpumask_clear(cpumask);
-
- idx = srcu_read_lock(&encl->srcu);
-
- list_for_each_entry_rcu(encl_mm, &encl->mm_list, list) {
- if (!mmget_not_zero(encl_mm->mm))
- continue;
-
- cpumask_or(cpumask, cpumask, mm_cpumask(encl_mm->mm));
-
- mmput_async(encl_mm->mm);
- }
-
- srcu_read_unlock(&encl->srcu, idx);
-
- return cpumask;
-}
-
/*
* Swap page to the regular memory transformed to the blocked state by using
* EBLOCK, which means that it can no longer be referenced (no new TLB entries).
@@ -280,7 +224,7 @@ static void sgx_encl_ewb(struct sgx_epc_page *epc_page,
* miss cpus that entered the enclave between
* generating the mask and incrementing epoch.
*/
- on_each_cpu_mask(sgx_encl_ewb_cpumask(encl),
+ on_each_cpu_mask(sgx_encl_cpumask(encl),
sgx_ipi_cb, NULL, 1);
ret = __sgx_encl_ewb(epc_page, va_slot, backing);
}
@@ -431,6 +375,17 @@ static bool sgx_should_reclaim(unsigned long watermark)
!list_empty(&sgx_active_page_list);
}
+/*
+ * sgx_reclaim_direct() should be called (without enclave's mutex held)
+ * in locations where SGX memory resources might be low and might be
+ * needed in order to make forward progress.
+ */
+void sgx_reclaim_direct(void)
+{
+ if (sgx_should_reclaim(SGX_NR_LOW_PAGES))
+ sgx_reclaim_pages();
+}
+
static int ksgxd(void *p)
{
set_freezable();
diff --git a/arch/x86/kernel/cpu/sgx/sgx.h b/arch/x86/kernel/cpu/sgx/sgx.h
index 0f17def9fe6f..0f2020653fba 100644
--- a/arch/x86/kernel/cpu/sgx/sgx.h
+++ b/arch/x86/kernel/cpu/sgx/sgx.h
@@ -86,10 +86,13 @@ static inline void *sgx_get_epc_virt_addr(struct sgx_epc_page *page)
struct sgx_epc_page *__sgx_alloc_epc_page(void);
void sgx_free_epc_page(struct sgx_epc_page *page);
+void sgx_reclaim_direct(void);
void sgx_mark_page_reclaimable(struct sgx_epc_page *page);
int sgx_unmark_page_reclaimable(struct sgx_epc_page *page);
struct sgx_epc_page *sgx_alloc_epc_page(void *owner, bool reclaim);
+void sgx_ipi_cb(void *info);
+
#ifdef CONFIG_X86_SGX_KVM
int __init sgx_vepc_init(void);
#else