diff options
Diffstat (limited to 'drivers/gpu/drm/gma500/mmu.c')
-rw-r--r-- | drivers/gpu/drm/gma500/mmu.c | 858 |
1 files changed, 858 insertions, 0 deletions
diff --git a/drivers/gpu/drm/gma500/mmu.c b/drivers/gpu/drm/gma500/mmu.c new file mode 100644 index 000000000000..c904d73b1de3 --- /dev/null +++ b/drivers/gpu/drm/gma500/mmu.c @@ -0,0 +1,858 @@ +/************************************************************************** + * Copyright (c) 2007, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + **************************************************************************/ +#include <drm/drmP.h> +#include "psb_drv.h" +#include "psb_reg.h" + +/* + * Code for the SGX MMU: + */ + +/* + * clflush on one processor only: + * clflush should apparently flush the cache line on all processors in an + * SMP system. + */ + +/* + * kmap atomic: + * The usage of the slots must be completely encapsulated within a spinlock, and + * no other functions that may be using the locks for other purposed may be + * called from within the locked region. + * Since the slots are per processor, this will guarantee that we are the only + * user. + */ + +/* + * TODO: Inserting ptes from an interrupt handler: + * This may be desirable for some SGX functionality where the GPU can fault in + * needed pages. For that, we need to make an atomic insert_pages function, that + * may fail. + * If it fails, the caller need to insert the page using a workqueue function, + * but on average it should be fast. + */ + +struct psb_mmu_driver { + /* protects driver- and pd structures. Always take in read mode + * before taking the page table spinlock. + */ + struct rw_semaphore sem; + + /* protects page tables, directory tables and pt tables. + * and pt structures. + */ + spinlock_t lock; + + atomic_t needs_tlbflush; + + uint8_t __iomem *register_map; + struct psb_mmu_pd *default_pd; + /*uint32_t bif_ctrl;*/ + int has_clflush; + int clflush_add; + unsigned long clflush_mask; + + struct drm_psb_private *dev_priv; +}; + +struct psb_mmu_pd; + +struct psb_mmu_pt { + struct psb_mmu_pd *pd; + uint32_t index; + uint32_t count; + struct page *p; + uint32_t *v; +}; + +struct psb_mmu_pd { + struct psb_mmu_driver *driver; + int hw_context; + struct psb_mmu_pt **tables; + struct page *p; + struct page *dummy_pt; + struct page *dummy_page; + uint32_t pd_mask; + uint32_t invalid_pde; + uint32_t invalid_pte; +}; + +static inline uint32_t psb_mmu_pt_index(uint32_t offset) +{ + return (offset >> PSB_PTE_SHIFT) & 0x3FF; +} + +static inline uint32_t psb_mmu_pd_index(uint32_t offset) +{ + return offset >> PSB_PDE_SHIFT; +} + +static inline void psb_clflush(void *addr) +{ + __asm__ __volatile__("clflush (%0)\n" : : "r"(addr) : "memory"); +} + +static inline void psb_mmu_clflush(struct psb_mmu_driver *driver, + void *addr) +{ + if (!driver->has_clflush) + return; + + mb(); + psb_clflush(addr); + mb(); +} + +static void psb_page_clflush(struct psb_mmu_driver *driver, struct page* page) +{ + uint32_t clflush_add = driver->clflush_add >> PAGE_SHIFT; + uint32_t clflush_count = PAGE_SIZE / clflush_add; + int i; + uint8_t *clf; + + clf = kmap_atomic(page, KM_USER0); + mb(); + for (i = 0; i < clflush_count; ++i) { + psb_clflush(clf); + clf += clflush_add; + } + mb(); + kunmap_atomic(clf, KM_USER0); +} + +static void psb_pages_clflush(struct psb_mmu_driver *driver, + struct page *page[], unsigned long num_pages) +{ + int i; + + if (!driver->has_clflush) + return ; + + for (i = 0; i < num_pages; i++) + psb_page_clflush(driver, *page++); +} + +static void psb_mmu_flush_pd_locked(struct psb_mmu_driver *driver, + int force) +{ + atomic_set(&driver->needs_tlbflush, 0); +} + +static void psb_mmu_flush_pd(struct psb_mmu_driver *driver, int force) +{ + down_write(&driver->sem); + psb_mmu_flush_pd_locked(driver, force); + up_write(&driver->sem); +} + +void psb_mmu_flush(struct psb_mmu_driver *driver, int rc_prot) +{ + if (rc_prot) + down_write(&driver->sem); + if (rc_prot) + up_write(&driver->sem); +} + +void psb_mmu_set_pd_context(struct psb_mmu_pd *pd, int hw_context) +{ + /*ttm_tt_cache_flush(&pd->p, 1);*/ + psb_pages_clflush(pd->driver, &pd->p, 1); + down_write(&pd->driver->sem); + wmb(); + psb_mmu_flush_pd_locked(pd->driver, 1); + pd->hw_context = hw_context; + up_write(&pd->driver->sem); + +} + +static inline unsigned long psb_pd_addr_end(unsigned long addr, + unsigned long end) +{ + + addr = (addr + PSB_PDE_MASK + 1) & ~PSB_PDE_MASK; + return (addr < end) ? addr : end; +} + +static inline uint32_t psb_mmu_mask_pte(uint32_t pfn, int type) +{ + uint32_t mask = PSB_PTE_VALID; + + if (type & PSB_MMU_CACHED_MEMORY) + mask |= PSB_PTE_CACHED; + if (type & PSB_MMU_RO_MEMORY) + mask |= PSB_PTE_RO; + if (type & PSB_MMU_WO_MEMORY) + mask |= PSB_PTE_WO; + + return (pfn << PAGE_SHIFT) | mask; +} + +struct psb_mmu_pd *psb_mmu_alloc_pd(struct psb_mmu_driver *driver, + int trap_pagefaults, int invalid_type) +{ + struct psb_mmu_pd *pd = kmalloc(sizeof(*pd), GFP_KERNEL); + uint32_t *v; + int i; + + if (!pd) + return NULL; + + pd->p = alloc_page(GFP_DMA32); + if (!pd->p) + goto out_err1; + pd->dummy_pt = alloc_page(GFP_DMA32); + if (!pd->dummy_pt) + goto out_err2; + pd->dummy_page = alloc_page(GFP_DMA32); + if (!pd->dummy_page) + goto out_err3; + + if (!trap_pagefaults) { + pd->invalid_pde = + psb_mmu_mask_pte(page_to_pfn(pd->dummy_pt), + invalid_type); + pd->invalid_pte = + psb_mmu_mask_pte(page_to_pfn(pd->dummy_page), + invalid_type); + } else { + pd->invalid_pde = 0; + pd->invalid_pte = 0; + } + + v = kmap(pd->dummy_pt); + for (i = 0; i < (PAGE_SIZE / sizeof(uint32_t)); ++i) + v[i] = pd->invalid_pte; + + kunmap(pd->dummy_pt); + + v = kmap(pd->p); + for (i = 0; i < (PAGE_SIZE / sizeof(uint32_t)); ++i) + v[i] = pd->invalid_pde; + + kunmap(pd->p); + + clear_page(kmap(pd->dummy_page)); + kunmap(pd->dummy_page); + + pd->tables = vmalloc_user(sizeof(struct psb_mmu_pt *) * 1024); + if (!pd->tables) + goto out_err4; + + pd->hw_context = -1; + pd->pd_mask = PSB_PTE_VALID; + pd->driver = driver; + + return pd; + +out_err4: + __free_page(pd->dummy_page); +out_err3: + __free_page(pd->dummy_pt); +out_err2: + __free_page(pd->p); +out_err1: + kfree(pd); + return NULL; +} + +void psb_mmu_free_pt(struct psb_mmu_pt *pt) +{ + __free_page(pt->p); + kfree(pt); +} + +void psb_mmu_free_pagedir(struct psb_mmu_pd *pd) +{ + struct psb_mmu_driver *driver = pd->driver; + struct psb_mmu_pt *pt; + int i; + + down_write(&driver->sem); + if (pd->hw_context != -1) + psb_mmu_flush_pd_locked(driver, 1); + + /* Should take the spinlock here, but we don't need to do that + since we have the semaphore in write mode. */ + + for (i = 0; i < 1024; ++i) { + pt = pd->tables[i]; + if (pt) + psb_mmu_free_pt(pt); + } + + vfree(pd->tables); + __free_page(pd->dummy_page); + __free_page(pd->dummy_pt); + __free_page(pd->p); + kfree(pd); + up_write(&driver->sem); +} + +static struct psb_mmu_pt *psb_mmu_alloc_pt(struct psb_mmu_pd *pd) +{ + struct psb_mmu_pt *pt = kmalloc(sizeof(*pt), GFP_KERNEL); + void *v; + uint32_t clflush_add = pd->driver->clflush_add >> PAGE_SHIFT; + uint32_t clflush_count = PAGE_SIZE / clflush_add; + spinlock_t *lock = &pd->driver->lock; + uint8_t *clf; + uint32_t *ptes; + int i; + + if (!pt) + return NULL; + + pt->p = alloc_page(GFP_DMA32); + if (!pt->p) { + kfree(pt); + return NULL; + } + + spin_lock(lock); + + v = kmap_atomic(pt->p, KM_USER0); + clf = (uint8_t *) v; + ptes = (uint32_t *) v; + for (i = 0; i < (PAGE_SIZE / sizeof(uint32_t)); ++i) + *ptes++ = pd->invalid_pte; + + + if (pd->driver->has_clflush && pd->hw_context != -1) { + mb(); + for (i = 0; i < clflush_count; ++i) { + psb_clflush(clf); + clf += clflush_add; + } + mb(); + } + + kunmap_atomic(v, KM_USER0); + spin_unlock(lock); + + pt->count = 0; + pt->pd = pd; + pt->index = 0; + + return pt; +} + +struct psb_mmu_pt *psb_mmu_pt_alloc_map_lock(struct psb_mmu_pd *pd, + unsigned long addr) +{ + uint32_t index = psb_mmu_pd_index(addr); + struct psb_mmu_pt *pt; + uint32_t *v; + spinlock_t *lock = &pd->driver->lock; + + spin_lock(lock); + pt = pd->tables[index]; + while (!pt) { + spin_unlock(lock); + pt = psb_mmu_alloc_pt(pd); + if (!pt) + return NULL; + spin_lock(lock); + + if (pd->tables[index]) { + spin_unlock(lock); + psb_mmu_free_pt(pt); + spin_lock(lock); + pt = pd->tables[index]; + continue; + } + + v = kmap_atomic(pd->p, KM_USER0); + pd->tables[index] = pt; + v[index] = (page_to_pfn(pt->p) << 12) | pd->pd_mask; + pt->index = index; + kunmap_atomic((void *) v, KM_USER0); + + if (pd->hw_context != -1) { + psb_mmu_clflush(pd->driver, (void *) &v[index]); + atomic_set(&pd->driver->needs_tlbflush, 1); + } + } + pt->v = kmap_atomic(pt->p, KM_USER0); + return pt; +} + +static struct psb_mmu_pt *psb_mmu_pt_map_lock(struct psb_mmu_pd *pd, + unsigned long addr) +{ + uint32_t index = psb_mmu_pd_index(addr); + struct psb_mmu_pt *pt; + spinlock_t *lock = &pd->driver->lock; + + spin_lock(lock); + pt = pd->tables[index]; + if (!pt) { + spin_unlock(lock); + return NULL; + } + pt->v = kmap_atomic(pt->p, KM_USER0); + return pt; +} + +static void psb_mmu_pt_unmap_unlock(struct psb_mmu_pt *pt) +{ + struct psb_mmu_pd *pd = pt->pd; + uint32_t *v; + + kunmap_atomic(pt->v, KM_USER0); + if (pt->count == 0) { + v = kmap_atomic(pd->p, KM_USER0); + v[pt->index] = pd->invalid_pde; + pd->tables[pt->index] = NULL; + + if (pd->hw_context != -1) { + psb_mmu_clflush(pd->driver, + (void *) &v[pt->index]); + atomic_set(&pd->driver->needs_tlbflush, 1); + } + kunmap_atomic(pt->v, KM_USER0); + spin_unlock(&pd->driver->lock); + psb_mmu_free_pt(pt); + return; + } + spin_unlock(&pd->driver->lock); +} + +static inline void psb_mmu_set_pte(struct psb_mmu_pt *pt, + unsigned long addr, uint32_t pte) +{ + pt->v[psb_mmu_pt_index(addr)] = pte; +} + +static inline void psb_mmu_invalidate_pte(struct psb_mmu_pt *pt, + unsigned long addr) +{ + pt->v[psb_mmu_pt_index(addr)] = pt->pd->invalid_pte; +} + + +void psb_mmu_mirror_gtt(struct psb_mmu_pd *pd, + uint32_t mmu_offset, uint32_t gtt_start, + uint32_t gtt_pages) +{ + uint32_t *v; + uint32_t start = psb_mmu_pd_index(mmu_offset); + struct psb_mmu_driver *driver = pd->driver; + int num_pages = gtt_pages; + + down_read(&driver->sem); + spin_lock(&driver->lock); + + v = kmap_atomic(pd->p, KM_USER0); + v += start; + + while (gtt_pages--) { + *v++ = gtt_start | pd->pd_mask; + gtt_start += PAGE_SIZE; + } + + /*ttm_tt_cache_flush(&pd->p, num_pages);*/ + psb_pages_clflush(pd->driver, &pd->p, num_pages); + kunmap_atomic(v, KM_USER0); + spin_unlock(&driver->lock); + + if (pd->hw_context != -1) + atomic_set(&pd->driver->needs_tlbflush, 1); + + up_read(&pd->driver->sem); + psb_mmu_flush_pd(pd->driver, 0); +} + +struct psb_mmu_pd *psb_mmu_get_default_pd(struct psb_mmu_driver *driver) +{ + struct psb_mmu_pd *pd; + + /* down_read(&driver->sem); */ + pd = driver->default_pd; + /* up_read(&driver->sem); */ + + return pd; +} + +/* Returns the physical address of the PD shared by sgx/msvdx */ +uint32_t psb_get_default_pd_addr(struct psb_mmu_driver *driver) +{ + struct psb_mmu_pd *pd; + + pd = psb_mmu_get_default_pd(driver); + return page_to_pfn(pd->p) << PAGE_SHIFT; +} + +void psb_mmu_driver_takedown(struct psb_mmu_driver *driver) +{ + psb_mmu_free_pagedir(driver->default_pd); + kfree(driver); +} + +struct psb_mmu_driver *psb_mmu_driver_init(uint8_t __iomem * registers, + int trap_pagefaults, + int invalid_type, + struct drm_psb_private *dev_priv) +{ + struct psb_mmu_driver *driver; + + driver = kmalloc(sizeof(*driver), GFP_KERNEL); + + if (!driver) + return NULL; + driver->dev_priv = dev_priv; + + driver->default_pd = psb_mmu_alloc_pd(driver, trap_pagefaults, + invalid_type); + if (!driver->default_pd) + goto out_err1; + + spin_lock_init(&driver->lock); + init_rwsem(&driver->sem); + down_write(&driver->sem); + driver->register_map = registers; + atomic_set(&driver->needs_tlbflush, 1); + + driver->has_clflush = 0; + + if (boot_cpu_has(X86_FEATURE_CLFLSH)) { + uint32_t tfms, misc, cap0, cap4, clflush_size; + + /* + * clflush size is determined at kernel setup for x86_64 + * but not for i386. We have to do it here. + */ + + cpuid(0x00000001, &tfms, &misc, &cap0, &cap4); + clflush_size = ((misc >> 8) & 0xff) * 8; + driver->has_clflush = 1; + driver->clflush_add = + PAGE_SIZE * clflush_size / sizeof(uint32_t); + driver->clflush_mask = driver->clflush_add - 1; + driver->clflush_mask = ~driver->clflush_mask; + } + + up_write(&driver->sem); + return driver; + +out_err1: + kfree(driver); + return NULL; +} + +static void psb_mmu_flush_ptes(struct psb_mmu_pd *pd, + unsigned long address, uint32_t num_pages, + uint32_t desired_tile_stride, + uint32_t hw_tile_stride) +{ + struct psb_mmu_pt *pt; + uint32_t rows = 1; + uint32_t i; + unsigned long addr; + unsigned long end; + unsigned long next; + unsigned long add; + unsigned long row_add; + unsigned long clflush_add = pd->driver->clflush_add; + unsigned long clflush_mask = pd->driver->clflush_mask; + + if (!pd->driver->has_clflush) { + /*ttm_tt_cache_flush(&pd->p, num_pages);*/ + psb_pages_clflush(pd->driver, &pd->p, num_pages); + return; + } + + if (hw_tile_stride) + rows = num_pages / desired_tile_stride; + else + desired_tile_stride = num_pages; + + add = desired_tile_stride << PAGE_SHIFT; + row_add = hw_tile_stride << PAGE_SHIFT; + mb(); + for (i = 0; i < rows; ++i) { + + addr = address; + end = addr + add; + + do { + next = psb_pd_addr_end(addr, end); + pt = psb_mmu_pt_map_lock(pd, addr); + if (!pt) + continue; + do { + psb_clflush(&pt->v + [psb_mmu_pt_index(addr)]); + } while (addr += + clflush_add, + (addr & clflush_mask) < next); + + psb_mmu_pt_unmap_unlock(pt); + } while (addr = next, next != end); + address += row_add; + } + mb(); +} + +void psb_mmu_remove_pfn_sequence(struct psb_mmu_pd *pd, + unsigned long address, uint32_t num_pages) +{ + struct psb_mmu_pt *pt; + unsigned long addr; + unsigned long end; + unsigned long next; + unsigned long f_address = address; + + down_read(&pd->driver->sem); + + addr = address; + end = addr + (num_pages << PAGE_SHIFT); + + do { + next = psb_pd_addr_end(addr, end); + pt = psb_mmu_pt_alloc_map_lock(pd, addr); + if (!pt) + goto out; + do { + psb_mmu_invalidate_pte(pt, addr); + --pt->count; + } while (addr += PAGE_SIZE, addr < next); + psb_mmu_pt_unmap_unlock(pt); + + } while (addr = next, next != end); + +out: + if (pd->hw_context != -1) + psb_mmu_flush_ptes(pd, f_address, num_pages, 1, 1); + + up_read(&pd->driver->sem); + + if (pd->hw_context != -1) + psb_mmu_flush(pd->driver, 0); + + return; +} + +void psb_mmu_remove_pages(struct psb_mmu_pd *pd, unsigned long address, + uint32_t num_pages, uint32_t desired_tile_stride, + uint32_t hw_tile_stride) +{ + struct psb_mmu_pt *pt; + uint32_t rows = 1; + uint32_t i; + unsigned long addr; + unsigned long end; + unsigned long next; + unsigned long add; + unsigned long row_add; + unsigned long f_address = address; + + if (hw_tile_stride) + rows = num_pages / desired_tile_stride; + else + desired_tile_stride = num_pages; + + add = desired_tile_stride << PAGE_SHIFT; + row_add = hw_tile_stride << PAGE_SHIFT; + + /* down_read(&pd->driver->sem); */ + + /* Make sure we only need to flush this processor's cache */ + + for (i = 0; i < rows; ++i) { + + addr = address; + end = addr + add; + + do { + next = psb_pd_addr_end(addr, end); + pt = psb_mmu_pt_map_lock(pd, addr); + if (!pt) + continue; + do { + psb_mmu_invalidate_pte(pt, addr); + --pt->count; + + } while (addr += PAGE_SIZE, addr < next); + psb_mmu_pt_unmap_unlock(pt); + + } while (addr = next, next != end); + address += row_add; + } + if (pd->hw_context != -1) + psb_mmu_flush_ptes(pd, f_address, num_pages, + desired_tile_stride, hw_tile_stride); + + /* up_read(&pd->driver->sem); */ + + if (pd->hw_context != -1) + psb_mmu_flush(pd->driver, 0); +} + +int psb_mmu_insert_pfn_sequence(struct psb_mmu_pd *pd, uint32_t start_pfn, + unsigned long address, uint32_t num_pages, + int type) +{ + struct psb_mmu_pt *pt; + uint32_t pte; + unsigned long addr; + unsigned long end; + unsigned long next; + unsigned long f_address = address; + int ret = 0; + + down_read(&pd->driver->sem); + + addr = address; + end = addr + (num_pages << PAGE_SHIFT); + + do { + next = psb_pd_addr_end(addr, end); + pt = psb_mmu_pt_alloc_map_lock(pd, addr); + if (!pt) { + ret = -ENOMEM; + goto out; + } + do { + pte = psb_mmu_mask_pte(start_pfn++, type); + psb_mmu_set_pte(pt, addr, pte); + pt->count++; + } while (addr += PAGE_SIZE, addr < next); + psb_mmu_pt_unmap_unlock(pt); + + } while (addr = next, next != end); + +out: + if (pd->hw_context != -1) + psb_mmu_flush_ptes(pd, f_address, num_pages, 1, 1); + + up_read(&pd->driver->sem); + + if (pd->hw_context != -1) + psb_mmu_flush(pd->driver, 1); + + return ret; +} + +int psb_mmu_insert_pages(struct psb_mmu_pd *pd, struct page **pages, + unsigned long address, uint32_t num_pages, + uint32_t desired_tile_stride, + uint32_t hw_tile_stride, int type) +{ + struct psb_mmu_pt *pt; + uint32_t rows = 1; + uint32_t i; + uint32_t pte; + unsigned long addr; + unsigned long end; + unsigned long next; + unsigned long add; + unsigned long row_add; + unsigned long f_address = address; + int ret = 0; + + if (hw_tile_stride) { + if (num_pages % desired_tile_stride != 0) + return -EINVAL; + rows = num_pages / desired_tile_stride; + } else { + desired_tile_stride = num_pages; + } + + add = desired_tile_stride << PAGE_SHIFT; + row_add = hw_tile_stride << PAGE_SHIFT; + + down_read(&pd->driver->sem); + + for (i = 0; i < rows; ++i) { + + addr = address; + end = addr + add; + + do { + next = psb_pd_addr_end(addr, end); + pt = psb_mmu_pt_alloc_map_lock(pd, addr); + if (!pt) { + ret = -ENOMEM; + goto out; + } + do { + pte = + psb_mmu_mask_pte(page_to_pfn(*pages++), + type); + psb_mmu_set_pte(pt, addr, pte); + pt->count++; + } while (addr += PAGE_SIZE, addr < next); + psb_mmu_pt_unmap_unlock(pt); + + } while (addr = next, next != end); + + address += row_add; + } +out: + if (pd->hw_context != -1) + psb_mmu_flush_ptes(pd, f_address, num_pages, + desired_tile_stride, hw_tile_stride); + + up_read(&pd->driver->sem); + + if (pd->hw_context != -1) + psb_mmu_flush(pd->driver, 1); + + return ret; +} + +int psb_mmu_virtual_to_pfn(struct psb_mmu_pd *pd, uint32_t virtual, + unsigned long *pfn) +{ + int ret; + struct psb_mmu_pt *pt; + uint32_t tmp; + spinlock_t *lock = &pd->driver->lock; + + down_read(&pd->driver->sem); + pt = psb_mmu_pt_map_lock(pd, virtual); + if (!pt) { + uint32_t *v; + + spin_lock(lock); + v = kmap_atomic(pd->p, KM_USER0); + tmp = v[psb_mmu_pd_index(virtual)]; + kunmap_atomic(v, KM_USER0); + spin_unlock(lock); + + if (tmp != pd->invalid_pde || !(tmp & PSB_PTE_VALID) || + !(pd->invalid_pte & PSB_PTE_VALID)) { + ret = -EINVAL; + goto out; + } + ret = 0; + *pfn = pd->invalid_pte >> PAGE_SHIFT; + goto out; + } + tmp = pt->v[psb_mmu_pt_index(virtual)]; + if (!(tmp & PSB_PTE_VALID)) { + ret = -EINVAL; + } else { + ret = 0; + *pfn = tmp >> PAGE_SHIFT; + } + psb_mmu_pt_unmap_unlock(pt); +out: + up_read(&pd->driver->sem); + return ret; +} |