powerpc: Merge enough to start building in arch/powerpc.

This creates the directory structure under arch/powerpc and a bunch of Kconfig files. It does a first-cut merge of arch/powerpc/mm, arch/powerpc/lib and arch/powerpc/platforms/powermac. This is enough to build a 32-bit powermac kernel with ARCH=powerpc. For now we are getting some unmerged files from arch/ppc/kernel and arch/ppc/syslib, or arch/ppc64/kernel. This makes some minor changes to files in those directories and files outside arch/powerpc. The boot directory is still not merged. That's going to be interesting. Signed-off-by: Paul Mackerras <paulus@samba.org>
author: Paul Mackerras 2005-09-26 16:04:21 +1000
committer: Paul Mackerras 2005-09-26 16:04:21 +1000
commit: 14cf11af6cf608eb8c23e989ddb17a715ddce109 (patch)
tree: 271a97ce73e265f39c569cb159c195c5b4bb3f8c /arch/powerpc/mm
parent: e5baa396af7560382d2cf3f0871d616b61fc284c (diff)
19 files changed, 4794 insertions, 0 deletions
diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/44x_mmu.c
new file mode 100644
index 000000000000..3d79ce281b67
--- /dev/null
+++ b/arch/powerpc/mm/44x_mmu.c
@@ -0,0 +1,120 @@
+/*
+ * Modifications by Matt Porter (mporter@mvista.com) to support
+ * PPC44x Book E processors.
+ *
+ * This file contains the routines for initializing the MMU
+ * on the 4xx series of chips.
+ *  -- paulus
+ *
+ *  Derived from arch/ppc/mm/init.c:
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/stddef.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/highmem.h>
+
+#include <asm/pgalloc.h>
+#include <asm/prom.h>
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+#include <asm/uaccess.h>
+#include <asm/smp.h>
+#include <asm/bootx.h>
+#include <asm/machdep.h>
+#include <asm/setup.h>
+
+#include "mmu_decl.h"
+
+extern char etext[], _stext[];
+
+/* Used by the 44x TLB replacement exception handler.
+ * Just needed it declared someplace.
+ */
+unsigned int tlb_44x_index = 0;
+unsigned int tlb_44x_hwater = 62;
+
+/*
+ * "Pins" a 256MB TLB entry in AS0 for kernel lowmem
+ */
+static void __init
+ppc44x_pin_tlb(int slot, unsigned int virt, unsigned int phys)
+{
+	unsigned long attrib = 0;
+
+	__asm__ __volatile__("\
+	clrrwi	%2,%2,10\n\
+	ori	%2,%2,%4\n\
+	clrrwi	%1,%1,10\n\
+	li	%0,0\n\
+	ori	%0,%0,%5\n\
+	tlbwe	%2,%3,%6\n\
+	tlbwe	%1,%3,%7\n\
+	tlbwe	%0,%3,%8"
+	:
+	: "r" (attrib), "r" (phys), "r" (virt), "r" (slot),
+	  "i" (PPC44x_TLB_VALID | PPC44x_TLB_256M),
+	  "i" (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G),
+	  "i" (PPC44x_TLB_PAGEID),
+	  "i" (PPC44x_TLB_XLAT),
+	  "i" (PPC44x_TLB_ATTRIB));
+}
+
+/*
+ * MMU_init_hw does the chip-specific initialization of the MMU hardware.
+ */
+void __init MMU_init_hw(void)
+{
+	flush_instruction_cache();
+}
+
+unsigned long __init mmu_mapin_ram(void)
+{
+	unsigned int pinned_tlbs = 1;
+	int i;
+
+	/* Determine number of entries necessary to cover lowmem */
+	pinned_tlbs = (unsigned int)
+		(_ALIGN(total_lowmem, PPC44x_PIN_SIZE) >> PPC44x_PIN_SHIFT);
+
+	/* Write upper watermark to save location */
+	tlb_44x_hwater = PPC44x_LOW_SLOT - pinned_tlbs;
+
+	/* If necessary, set additional pinned TLBs */
+	if (pinned_tlbs > 1)
+		for (i = (PPC44x_LOW_SLOT-(pinned_tlbs-1)); i < PPC44x_LOW_SLOT; i++) {
+			unsigned int phys_addr = (PPC44x_LOW_SLOT-i) * PPC44x_PIN_SIZE;
+			ppc44x_pin_tlb(i, phys_addr+PAGE_OFFSET, phys_addr);
+		}
+
+	return total_lowmem;
+}
diff --git a/arch/powerpc/mm/4xx_mmu.c b/arch/powerpc/mm/4xx_mmu.c
new file mode 100644
index 000000000000..b7bcbc232f39
--- /dev/null
+++ b/arch/powerpc/mm/4xx_mmu.c
@@ -0,0 +1,141 @@
+/*
+ * This file contains the routines for initializing the MMU
+ * on the 4xx series of chips.
+ *  -- paulus
+ *
+ *  Derived from arch/ppc/mm/init.c:
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/stddef.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/highmem.h>
+
+#include <asm/pgalloc.h>
+#include <asm/prom.h>
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+#include <asm/uaccess.h>
+#include <asm/smp.h>
+#include <asm/bootx.h>
+#include <asm/machdep.h>
+#include <asm/setup.h>
+#include "mmu_decl.h"
+
+extern int __map_without_ltlbs;
+/*
+ * MMU_init_hw does the chip-specific initialization of the MMU hardware.
+ */
+void __init MMU_init_hw(void)
+{
+	/*
+	 * The Zone Protection Register (ZPR) defines how protection will
+	 * be applied to every page which is a member of a given zone. At
+	 * present, we utilize only two of the 4xx's zones.
+	 * The zone index bits (of ZSEL) in the PTE are used for software
+	 * indicators, except the LSB.  For user access, zone 1 is used,
+	 * for kernel access, zone 0 is used.  We set all but zone 1
+	 * to zero, allowing only kernel access as indicated in the PTE.
+	 * For zone 1, we set a 01 binary (a value of 10 will not work)
+	 * to allow user access as indicated in the PTE.  This also allows
+	 * kernel access as indicated in the PTE.
+	 */
+
+        mtspr(SPRN_ZPR, 0x10000000);
+
+	flush_instruction_cache();
+
+	/*
+	 * Set up the real-mode cache parameters for the exception vector
+	 * handlers (which are run in real-mode).
+	 */
+
+        mtspr(SPRN_DCWR, 0x00000000);	/* All caching is write-back */
+
+        /*
+	 * Cache instruction and data space where the exception
+	 * vectors and the kernel live in real-mode.
+	 */
+
+        mtspr(SPRN_DCCR, 0xF0000000);	/* 512 MB of data space at 0x0. */
+        mtspr(SPRN_ICCR, 0xF0000000);	/* 512 MB of instr. space at 0x0. */
+}
+
+#define LARGE_PAGE_SIZE_16M	(1<<24)
+#define LARGE_PAGE_SIZE_4M	(1<<22)
+
+unsigned long __init mmu_mapin_ram(void)
+{
+	unsigned long v, s;
+	phys_addr_t p;
+
+	v = KERNELBASE;
+	p = PPC_MEMSTART;
+	s = 0;
+
+	if (__map_without_ltlbs) {
+		return s;
+	}
+
+	while (s <= (total_lowmem - LARGE_PAGE_SIZE_16M)) {
+		pmd_t *pmdp;
+		unsigned long val = p | _PMD_SIZE_16M | _PAGE_HWEXEC | _PAGE_HWWRITE;
+
+		spin_lock(&init_mm.page_table_lock);
+		pmdp = pmd_offset(pgd_offset_k(v), v);
+		pmd_val(*pmdp++) = val;
+		pmd_val(*pmdp++) = val;
+		pmd_val(*pmdp++) = val;
+		pmd_val(*pmdp++) = val;
+		spin_unlock(&init_mm.page_table_lock);
+
+		v += LARGE_PAGE_SIZE_16M;
+		p += LARGE_PAGE_SIZE_16M;
+		s += LARGE_PAGE_SIZE_16M;
+	}
+
+	while (s <= (total_lowmem - LARGE_PAGE_SIZE_4M)) {
+		pmd_t *pmdp;
+		unsigned long val = p | _PMD_SIZE_4M | _PAGE_HWEXEC | _PAGE_HWWRITE;
+
+		spin_lock(&init_mm.page_table_lock);
+		pmdp = pmd_offset(pgd_offset_k(v), v);
+		pmd_val(*pmdp) = val;
+		spin_unlock(&init_mm.page_table_lock);
+
+		v += LARGE_PAGE_SIZE_4M;
+		p += LARGE_PAGE_SIZE_4M;
+		s += LARGE_PAGE_SIZE_4M;
+	}
+
+	return s;
+}
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
new file mode 100644
index 000000000000..9f52c26acd86
--- /dev/null
+++ b/arch/powerpc/mm/Makefile
@@ -0,0 +1,12 @@
+#
+# Makefile for the linux ppc-specific parts of the memory manager.
+#
+
+obj-y				:= fault.o mem.o
+obj-$(CONFIG_PPC32)		+= init.o pgtable.o mmu_context.o \
+				   mem_pieces.o tlb.o
+obj-$(CONFIG_PPC64)		+= init64.o pgtable64.o mmu_context64.o
+obj-$(CONFIG_PPC_STD_MMU_32)	+= ppc_mmu.o hash_32.o
+obj-$(CONFIG_40x)		+= 4xx_mmu.o
+obj-$(CONFIG_44x)		+= 44x_mmu.o
+obj-$(CONFIG_FSL_BOOKE)		+= fsl_booke_mmu.o
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
new file mode 100644
index 000000000000..3df641fa789d
--- /dev/null
+++ b/arch/powerpc/mm/fault.c
@@ -0,0 +1,391 @@
+/*
+ *  arch/ppc/mm/fault.c
+ *
+ *  PowerPC version
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Derived from "arch/i386/mm/fault.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  Modified by Cort Dougan and Paul Mackerras.
+ *
+ *  Modified for PPC64 by Dave Engebretsen (engebret@ibm.com)
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/highmem.h>
+#include <linux/module.h>
+#include <linux/kprobes.h>
+
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+#include <asm/mmu_context.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/tlbflush.h>
+#include <asm/kdebug.h>
+#include <asm/siginfo.h>
+
+/*
+ * Check whether the instruction at regs->nip is a store using
+ * an update addressing form which will update r1.
+ */
+static int store_updates_sp(struct pt_regs *regs)
+{
+	unsigned int inst;
+
+	if (get_user(inst, (unsigned int __user *)regs->nip))
+		return 0;
+	/* check for 1 in the rA field */
+	if (((inst >> 16) & 0x1f) != 1)
+		return 0;
+	/* check major opcode */
+	switch (inst >> 26) {
+	case 37:	/* stwu */
+	case 39:	/* stbu */
+	case 45:	/* sthu */
+	case 53:	/* stfsu */
+	case 55:	/* stfdu */
+		return 1;
+	case 62:	/* std or stdu */
+		return (inst & 3) == 1;
+	case 31:
+		/* check minor opcode */
+		switch ((inst >> 1) & 0x3ff) {
+		case 181:	/* stdux */
+		case 183:	/* stwux */
+		case 247:	/* stbux */
+		case 439:	/* sthux */
+		case 695:	/* stfsux */
+		case 759:	/* stfdux */
+			return 1;
+		}
+	}
+	return 0;
+}
+
+static void do_dabr(struct pt_regs *regs, unsigned long error_code)
+{
+	siginfo_t info;
+
+	if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
+			11, SIGSEGV) == NOTIFY_STOP)
+		return;
+
+	if (debugger_dabr_match(regs))
+		return;
+
+	/* Clear the DABR */
+	set_dabr(0);
+
+	/* Deliver the signal to userspace */
+	info.si_signo = SIGTRAP;
+	info.si_errno = 0;
+	info.si_code = TRAP_HWBKPT;
+	info.si_addr = (void __user *)regs->nip;
+	force_sig_info(SIGTRAP, &info, current);
+}
+
+/*
+ * For 600- and 800-family processors, the error_code parameter is DSISR
+ * for a data fault, SRR1 for an instruction fault. For 400-family processors
+ * the error_code parameter is ESR for a data fault, 0 for an instruction
+ * fault.
+ * For 64-bit processors, the error_code parameter is
+ *  - DSISR for a non-SLB data access fault,
+ *  - SRR1 & 0x08000000 for a non-SLB instruction access fault
+ *  - 0 any SLB fault.
+ *
+ * The return value is 0 if the fault was handled, or the signal
+ * number if this is a kernel fault that can't be handled here.
+ */
+int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
+			    unsigned long error_code)
+{
+	struct vm_area_struct * vma;
+	struct mm_struct *mm = current->mm;
+	siginfo_t info;
+	int code = SEGV_MAPERR;
+	int is_write = 0;
+	int trap = TRAP(regs);
+ 	int is_exec = trap == 0x400;
+
+#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
+	/*
+	 * Fortunately the bit assignments in SRR1 for an instruction
+	 * fault and DSISR for a data fault are mostly the same for the
+	 * bits we are interested in.  But there are some bits which
+	 * indicate errors in DSISR but can validly be set in SRR1.
+	 */
+	if (trap == 0x400)
+		error_code &= 0x48200000;
+	else
+		is_write = error_code & DSISR_ISSTORE;
+#else
+	is_write = error_code & ESR_DST;
+#endif /* CONFIG_4xx || CONFIG_BOOKE */
+
+	if (notify_die(DIE_PAGE_FAULT, "page_fault", regs, error_code,
+				11, SIGSEGV) == NOTIFY_STOP)
+		return 0;
+
+	if (trap == 0x300) {
+		if (debugger_fault_handler(regs))
+			return 0;
+	}
+
+	/* On a kernel SLB miss we can only check for a valid exception entry */
+	if (!user_mode(regs) && (address >= TASK_SIZE))
+		return SIGSEGV;
+
+#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
+  	if (error_code & DSISR_DABRMATCH) {
+		/* DABR match */
+		do_dabr(regs, error_code);
+		return 0;
+	}
+#endif /* !(CONFIG_4xx || CONFIG_BOOKE)*/
+
+	if (in_atomic() || mm == NULL) {
+		if (!user_mode(regs))
+			return SIGSEGV;
+		/* in_atomic() in user mode is really bad,
+		   as is current->mm == NULL. */
+		printk(KERN_EMERG "Page fault in user mode with"
+		       "in_atomic() = %d mm = %p\n", in_atomic(), mm);
+		printk(KERN_EMERG "NIP = %lx  MSR = %lx\n",
+		       regs->nip, regs->msr);
+		die("Weird page fault", regs, SIGSEGV);
+	}
+
+	/* When running in the kernel we expect faults to occur only to
+	 * addresses in user space.  All other faults represent errors in the
+	 * kernel and should generate an OOPS.  Unfortunatly, in the case of an
+	 * erroneous fault occuring in a code path which already holds mmap_sem
+	 * we will deadlock attempting to validate the fault against the
+	 * address space.  Luckily the kernel only validly references user
+	 * space from well defined areas of code, which are listed in the
+	 * exceptions table.
+	 *
+	 * As the vast majority of faults will be valid we will only perform
+	 * the source reference check when there is a possibilty of a deadlock.
+	 * Attempt to lock the address space, if we cannot we then validate the
+	 * source.  If this is invalid we can skip the address space check,
+	 * thus avoiding the deadlock.
+	 */
+	if (!down_read_trylock(&mm->mmap_sem)) {
+		if (!user_mode(regs) && !search_exception_tables(regs->nip))
+			goto bad_area_nosemaphore;
+
+		down_read(&mm->mmap_sem);
+	}
+
+	vma = find_vma(mm, address);
+	if (!vma)
+		goto bad_area;
+	if (vma->vm_start <= address)
+		goto good_area;
+	if (!(vma->vm_flags & VM_GROWSDOWN))
+		goto bad_area;
+
+	/*
+	 * N.B. The POWER/Open ABI allows programs to access up to
+	 * 288 bytes below the stack pointer.
+	 * The kernel signal delivery code writes up to about 1.5kB
+	 * below the stack pointer (r1) before decrementing it.
+	 * The exec code can write slightly over 640kB to the stack
+	 * before setting the user r1.  Thus we allow the stack to
+	 * expand to 1MB without further checks.
+	 */
+	if (address + 0x100000 < vma->vm_end) {
+		/* get user regs even if this fault is in kernel mode */
+		struct pt_regs *uregs = current->thread.regs;
+		if (uregs == NULL)
+			goto bad_area;
+
+		/*
+		 * A user-mode access to an address a long way below
+		 * the stack pointer is only valid if the instruction
+		 * is one which would update the stack pointer to the
+		 * address accessed if the instruction completed,
+		 * i.e. either stwu rs,n(r1) or stwux rs,r1,rb
+		 * (or the byte, halfword, float or double forms).
+		 *
+		 * If we don't check this then any write to the area
+		 * between the last mapped region and the stack will
+		 * expand the stack rather than segfaulting.
+		 */
+		if (address + 2048 < uregs->gpr[1]
+		    && (!user_mode(regs) || !store_updates_sp(regs)))
+			goto bad_area;
+	}
+	if (expand_stack(vma, address))
+		goto bad_area;
+
+good_area:
+	code = SEGV_ACCERR;
+#if defined(CONFIG_6xx)
+	if (error_code & 0x95700000)
+		/* an error such as lwarx to I/O controller space,
+		   address matching DABR, eciwx, etc. */
+		goto bad_area;
+#endif /* CONFIG_6xx */
+#if defined(CONFIG_8xx)
+        /* The MPC8xx seems to always set 0x80000000, which is
+         * "undefined".  Of those that can be set, this is the only
+         * one which seems bad.
+         */
+	if (error_code & 0x10000000)
+                /* Guarded storage error. */
+		goto bad_area;
+#endif /* CONFIG_8xx */
+
+	if (is_exec) {
+#ifdef CONFIG_PPC64
+		/* protection fault */
+		if (error_code & DSISR_PROTFAULT)
+			goto bad_area;
+		if (!(vma->vm_flags & VM_EXEC))
+			goto bad_area;
+#endif
+#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
+		pte_t *ptep;
+
+		/* Since 4xx/Book-E supports per-page execute permission,
+		 * we lazily flush dcache to icache. */
+		ptep = NULL;
+		if (get_pteptr(mm, address, &ptep) && pte_present(*ptep)) {
+			struct page *page = pte_page(*ptep);
+
+			if (! test_bit(PG_arch_1, &page->flags)) {
+				flush_dcache_icache_page(page);
+				set_bit(PG_arch_1, &page->flags);
+			}
+			pte_update(ptep, 0, _PAGE_HWEXEC);
+			_tlbie(address);
+			pte_unmap(ptep);
+			up_read(&mm->mmap_sem);
+			return 0;
+		}
+		if (ptep != NULL)
+			pte_unmap(ptep);
+#endif
+	/* a write */
+	} else if (is_write) {
+		if (!(vma->vm_flags & VM_WRITE))
+			goto bad_area;
+	/* a read */
+	} else {
+		/* protection fault */
+		if (error_code & 0x08000000)
+			goto bad_area;
+		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
+			goto bad_area;
+	}
+
+	/*
+	 * If for any reason at all we couldn't handle the fault,
+	 * make sure we exit gracefully rather than endlessly redo
+	 * the fault.
+	 */
+ survive:
+	switch (handle_mm_fault(mm, vma, address, is_write)) {
+
+	case VM_FAULT_MINOR:
+		current->min_flt++;
+		break;
+	case VM_FAULT_MAJOR:
+		current->maj_flt++;
+		break;
+	case VM_FAULT_SIGBUS:
+		goto do_sigbus;
+	case VM_FAULT_OOM:
+		goto out_of_memory;
+	default:
+		BUG();
+	}
+
+	up_read(&mm->mmap_sem);
+	return 0;
+
+bad_area:
+	up_read(&mm->mmap_sem);
+
+bad_area_nosemaphore:
+	/* User mode accesses cause a SIGSEGV */
+	if (user_mode(regs)) {
+		_exception(SIGSEGV, regs, code, address);
+		return 0;
+	}
+
+	if (is_exec && (error_code & DSISR_PROTFAULT)
+	    && printk_ratelimit())
+		printk(KERN_CRIT "kernel tried to execute NX-protected"
+		       " page (%lx) - exploit attempt? (uid: %d)\n",
+		       address, current->uid);
+
+	return SIGSEGV;
+
+/*
+ * We ran out of memory, or some other thing happened to us that made
+ * us unable to handle the page fault gracefully.
+ */
+out_of_memory:
+	up_read(&mm->mmap_sem);
+	if (current->pid == 1) {
+		yield();
+		down_read(&mm->mmap_sem);
+		goto survive;
+	}
+	printk("VM: killing process %s\n", current->comm);
+	if (user_mode(regs))
+		do_exit(SIGKILL);
+	return SIGKILL;
+
+do_sigbus:
+	up_read(&mm->mmap_sem);
+	if (user_mode(regs)) {
+		info.si_signo = SIGBUS;
+		info.si_errno = 0;
+		info.si_code = BUS_ADRERR;
+		info.si_addr = (void __user *)address;
+		force_sig_info(SIGBUS, &info, current);
+		return 0;
+	}
+	return SIGBUS;
+}
+
+/*
+ * bad_page_fault is called when we have a bad access from the kernel.
+ * It is called from the DSI and ISI handlers in head.S and from some
+ * of the procedures in traps.c.
+ */
+void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
+{
+	const struct exception_table_entry *entry;
+
+	/* Are we prepared to handle this fault?  */
+	if ((entry = search_exception_tables(regs->nip)) != NULL) {
+		regs->nip = entry->fixup;
+		return;
+	}
+
+	/* kernel has accessed a bad area */
+	die("Kernel access of bad area", regs, sig);
+}
diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c
new file mode 100644
index 000000000000..af9ca0eb6d55
--- /dev/null
+++ b/arch/powerpc/mm/fsl_booke_mmu.c
@@ -0,0 +1,237 @@
+/*
+ * Modifications by Kumar Gala (kumar.gala@freescale.com) to support
+ * E500 Book E processors.
+ *
+ * Copyright 2004 Freescale Semiconductor, Inc
+ *
+ * This file contains the routines for initializing the MMU
+ * on the 4xx series of chips.
+ *  -- paulus
+ *
+ *  Derived from arch/ppc/mm/init.c:
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/stddef.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/highmem.h>
+
+#include <asm/pgalloc.h>
+#include <asm/prom.h>
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+#include <asm/uaccess.h>
+#include <asm/smp.h>
+#include <asm/bootx.h>
+#include <asm/machdep.h>
+#include <asm/setup.h>
+
+extern void loadcam_entry(unsigned int index);
+unsigned int tlbcam_index;
+unsigned int num_tlbcam_entries;
+static unsigned long __cam0, __cam1, __cam2;
+extern unsigned long total_lowmem;
+extern unsigned long __max_low_memory;
+#define MAX_LOW_MEM	CONFIG_LOWMEM_SIZE
+
+#define NUM_TLBCAMS	(16)
+
+struct tlbcam {
+   	u32	MAS0;
+	u32	MAS1;
+	u32	MAS2;
+	u32	MAS3;
+	u32	MAS7;
+} TLBCAM[NUM_TLBCAMS];
+
+struct tlbcamrange {
+   	unsigned long start;
+	unsigned long limit;
+	phys_addr_t phys;
+} tlbcam_addrs[NUM_TLBCAMS];
+
+extern unsigned int tlbcam_index;
+
+/*
+ * Return PA for this VA if it is mapped by a CAM, or 0
+ */
+unsigned long v_mapped_by_tlbcam(unsigned long va)
+{
+	int b;
+	for (b = 0; b < tlbcam_index; ++b)
+		if (va >= tlbcam_addrs[b].start && va < tlbcam_addrs[b].limit)
+			return tlbcam_addrs[b].phys + (va - tlbcam_addrs[b].start);
+	return 0;
+}
+
+/*
+ * Return VA for a given PA or 0 if not mapped
+ */
+unsigned long p_mapped_by_tlbcam(unsigned long pa)
+{
+	int b;
+	for (b = 0; b < tlbcam_index; ++b)
+		if (pa >= tlbcam_addrs[b].phys
+	    	    && pa < (tlbcam_addrs[b].limit-tlbcam_addrs[b].start)
+		              +tlbcam_addrs[b].phys)
+			return tlbcam_addrs[b].start+(pa-tlbcam_addrs[b].phys);
+	return 0;
+}
+
+/*
+ * Set up one of the I/D BAT (block address translation) register pairs.
+ * The parameters are not checked; in particular size must be a power
+ * of 4 between 4k and 256M.
+ */
+void settlbcam(int index, unsigned long virt, phys_addr_t phys,
+		unsigned int size, int flags, unsigned int pid)
+{
+	unsigned int tsize, lz;
+
+	asm ("cntlzw %0,%1" : "=r" (lz) : "r" (size));
+	tsize = (21 - lz) / 2;
+
+#ifdef CONFIG_SMP
+	if ((flags & _PAGE_NO_CACHE) == 0)
+		flags |= _PAGE_COHERENT;
+#endif
+
+	TLBCAM[index].MAS0 = MAS0_TLBSEL(1) | MAS0_ESEL(index) | MAS0_NV(index+1);
+	TLBCAM[index].MAS1 = MAS1_VALID | MAS1_IPROT | MAS1_TSIZE(tsize) | MAS1_TID(pid);
+	TLBCAM[index].MAS2 = virt & PAGE_MASK;
+
+	TLBCAM[index].MAS2 |= (flags & _PAGE_WRITETHRU) ? MAS2_W : 0;
+	TLBCAM[index].MAS2 |= (flags & _PAGE_NO_CACHE) ? MAS2_I : 0;
+	TLBCAM[index].MAS2 |= (flags & _PAGE_COHERENT) ? MAS2_M : 0;
+	TLBCAM[index].MAS2 |= (flags & _PAGE_GUARDED) ? MAS2_G : 0;
+	TLBCAM[index].MAS2 |= (flags & _PAGE_ENDIAN) ? MAS2_E : 0;
+
+	TLBCAM[index].MAS3 = (phys & PAGE_MASK) | MAS3_SX | MAS3_SR;
+	TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_SW : 0);
+
+#ifndef CONFIG_KGDB /* want user access for breakpoints */
+	if (flags & _PAGE_USER) {
+	   TLBCAM[index].MAS3 |= MAS3_UX | MAS3_UR;
+	   TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_UW : 0);
+	}
+#else
+	TLBCAM[index].MAS3 |= MAS3_UX | MAS3_UR;
+	TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_UW : 0);
+#endif
+
+	tlbcam_addrs[index].start = virt;
+	tlbcam_addrs[index].limit = virt + size - 1;
+	tlbcam_addrs[index].phys = phys;
+
+	loadcam_entry(index);
+}
+
+void invalidate_tlbcam_entry(int index)
+{
+	TLBCAM[index].MAS0 = MAS0_TLBSEL(1) | MAS0_ESEL(index);
+	TLBCAM[index].MAS1 = ~MAS1_VALID;
+
+	loadcam_entry(index);
+}
+
+void __init cam_mapin_ram(unsigned long cam0, unsigned long cam1,
+		unsigned long cam2)
+{
+	settlbcam(0, KERNELBASE, PPC_MEMSTART, cam0, _PAGE_KERNEL, 0);
+	tlbcam_index++;
+	if (cam1) {
+		tlbcam_index++;
+		settlbcam(1, KERNELBASE+cam0, PPC_MEMSTART+cam0, cam1, _PAGE_KERNEL, 0);
+	}
+	if (cam2) {
+		tlbcam_index++;
+		settlbcam(2, KERNELBASE+cam0+cam1, PPC_MEMSTART+cam0+cam1, cam2, _PAGE_KERNEL, 0);
+	}
+}
+
+/*
+ * MMU_init_hw does the chip-specific initialization of the MMU hardware.
+ */
+void __init MMU_init_hw(void)
+{
+	flush_instruction_cache();
+}
+
+unsigned long __init mmu_mapin_ram(void)
+{
+	cam_mapin_ram(__cam0, __cam1, __cam2);
+
+	return __cam0 + __cam1 + __cam2;
+}
+
+
+void __init
+adjust_total_lowmem(void)
+{
+	unsigned long max_low_mem = MAX_LOW_MEM;
+	unsigned long cam_max = 0x10000000;
+	unsigned long ram;
+
+	/* adjust CAM size to max_low_mem */
+	if (max_low_mem < cam_max)
+		cam_max = max_low_mem;
+
+	/* adjust lowmem size to max_low_mem */
+	if (max_low_mem < total_lowmem)
+		ram = max_low_mem;
+	else
+		ram = total_lowmem;
+
+	/* Calculate CAM values */
+	__cam0 = 1UL << 2 * (__ilog2(ram) / 2);
+	if (__cam0 > cam_max)
+		__cam0 = cam_max;
+	ram -= __cam0;
+	if (ram) {
+		__cam1 = 1UL << 2 * (__ilog2(ram) / 2);
+		if (__cam1 > cam_max)
+			__cam1 = cam_max;
+		ram -= __cam1;
+	}
+	if (ram) {
+		__cam2 = 1UL << 2 * (__ilog2(ram) / 2);
+		if (__cam2 > cam_max)
+			__cam2 = cam_max;
+		ram -= __cam2;
+	}
+
+	printk(KERN_INFO "Memory CAM mapping: CAM0=%ldMb, CAM1=%ldMb,"
+			" CAM2=%ldMb residual: %ldMb\n",
+			__cam0 >> 20, __cam1 >> 20, __cam2 >> 20,
+			(total_lowmem - __cam0 - __cam1 - __cam2) >> 20);
+	__max_low_memory = max_low_mem = __cam0 + __cam1 + __cam2;
+}
diff --git a/arch/powerpc/mm/hash_32.S b/arch/powerpc/mm/hash_32.S
new file mode 100644
index 000000000000..57278a8dd132
--- /dev/null
+++ b/arch/powerpc/mm/hash_32.S
@@ -0,0 +1,618 @@
+/*
+ *  arch/ppc/kernel/hashtable.S
+ *
+ *  $Id: hashtable.S,v 1.6 1999/10/08 01:56:15 paulus Exp $
+ *
+ *  PowerPC version
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *  Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP
+ *    Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu>
+ *  Adapted for Power Macintosh by Paul Mackerras.
+ *  Low-level exception handlers and MMU support
+ *  rewritten by Paul Mackerras.
+ *    Copyright (C) 1996 Paul Mackerras.
+ *
+ *  This file contains low-level assembler routines for managing
+ *  the PowerPC MMU hash table.  (PPC 8xx processors don't use a
+ *  hash table, so this file is not used on them.)
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/cputable.h>
+#include <asm/ppc_asm.h>
+#include <asm/thread_info.h>
+#include <asm/asm-offsets.h>
+
+#ifdef CONFIG_SMP
+	.comm	mmu_hash_lock,4
+#endif /* CONFIG_SMP */
+
+/*
+ * Sync CPUs with hash_page taking & releasing the hash
+ * table lock
+ */
+#ifdef CONFIG_SMP
+	.text
+_GLOBAL(hash_page_sync)
+	lis	r8,mmu_hash_lock@h
+	ori	r8,r8,mmu_hash_lock@l
+	lis	r0,0x0fff
+	b	10f
+11:	lwz	r6,0(r8)
+	cmpwi	0,r6,0
+	bne	11b
+10:	lwarx	r6,0,r8
+	cmpwi	0,r6,0
+	bne-	11b
+	stwcx.	r0,0,r8
+	bne-	10b
+	isync
+	eieio
+	li	r0,0
+	stw	r0,0(r8)
+	blr	
+#endif
+
+/*
+ * Load a PTE into the hash table, if possible.
+ * The address is in r4, and r3 contains an access flag:
+ * _PAGE_RW (0x400) if a write.
+ * r9 contains the SRR1 value, from which we use the MSR_PR bit.
+ * SPRG3 contains the physical address of the current task's thread.
+ *
+ * Returns to the caller if the access is illegal or there is no
+ * mapping for the address.  Otherwise it places an appropriate PTE
+ * in the hash table and returns from the exception.
+ * Uses r0, r3 - r8, ctr, lr.
+ */
+	.text
+_GLOBAL(hash_page)
+#ifdef CONFIG_PPC64BRIDGE
+	mfmsr	r0
+	clrldi	r0,r0,1		/* make sure it's in 32-bit mode */
+	MTMSRD(r0)
+	isync
+#endif
+	tophys(r7,0)			/* gets -KERNELBASE into r7 */
+#ifdef CONFIG_SMP
+	addis	r8,r7,mmu_hash_lock@h
+	ori	r8,r8,mmu_hash_lock@l
+	lis	r0,0x0fff
+	b	10f
+11:	lwz	r6,0(r8)
+	cmpwi	0,r6,0
+	bne	11b
+10:	lwarx	r6,0,r8
+	cmpwi	0,r6,0
+	bne-	11b
+	stwcx.	r0,0,r8
+	bne-	10b
+	isync
+#endif
+	/* Get PTE (linux-style) and check access */
+	lis	r0,KERNELBASE@h		/* check if kernel address */
+	cmplw	0,r4,r0
+	mfspr	r8,SPRN_SPRG3		/* current task's THREAD (phys) */
+	ori	r3,r3,_PAGE_USER|_PAGE_PRESENT /* test low addresses as user */
+	lwz	r5,PGDIR(r8)		/* virt page-table root */
+	blt+	112f			/* assume user more likely */
+	lis	r5,swapper_pg_dir@ha	/* if kernel address, use */
+	addi	r5,r5,swapper_pg_dir@l	/* kernel page table */
+	rlwimi	r3,r9,32-12,29,29	/* MSR_PR -> _PAGE_USER */
+112:	add	r5,r5,r7		/* convert to phys addr */
+	rlwimi	r5,r4,12,20,29		/* insert top 10 bits of address */
+	lwz	r8,0(r5)		/* get pmd entry */
+	rlwinm.	r8,r8,0,0,19		/* extract address of pte page */
+#ifdef CONFIG_SMP
+	beq-	hash_page_out		/* return if no mapping */
+#else
+	/* XXX it seems like the 601 will give a machine fault on the
+	   rfi if its alignment is wrong (bottom 4 bits of address are
+	   8 or 0xc) and we have had a not-taken conditional branch
+	   to the address following the rfi. */
+	beqlr-
+#endif
+	rlwimi	r8,r4,22,20,29		/* insert next 10 bits of address */
+	rlwinm	r0,r3,32-3,24,24	/* _PAGE_RW access -> _PAGE_DIRTY */
+	ori	r0,r0,_PAGE_ACCESSED|_PAGE_HASHPTE
+
+	/*
+	 * Update the linux PTE atomically.  We do the lwarx up-front
+	 * because almost always, there won't be a permission violation
+	 * and there won't already be an HPTE, and thus we will have
+	 * to update the PTE to set _PAGE_HASHPTE.  -- paulus.
+	 */
+retry:
+	lwarx	r6,0,r8			/* get linux-style pte */
+	andc.	r5,r3,r6		/* check access & ~permission */
+#ifdef CONFIG_SMP
+	bne-	hash_page_out		/* return if access not permitted */
+#else
+	bnelr-
+#endif
+	or	r5,r0,r6		/* set accessed/dirty bits */
+	stwcx.	r5,0,r8			/* attempt to update PTE */
+	bne-	retry			/* retry if someone got there first */
+
+	mfsrin	r3,r4			/* get segment reg for segment */
+	mfctr	r0
+	stw	r0,_CTR(r11)
+	bl	create_hpte		/* add the hash table entry */
+
+#ifdef CONFIG_SMP
+	eieio
+	addis	r8,r7,mmu_hash_lock@ha
+	li	r0,0
+	stw	r0,mmu_hash_lock@l(r8)
+#endif
+
+	/* Return from the exception */
+	lwz	r5,_CTR(r11)
+	mtctr	r5
+	lwz	r0,GPR0(r11)
+	lwz	r7,GPR7(r11)
+	lwz	r8,GPR8(r11)
+	b	fast_exception_return
+
+#ifdef CONFIG_SMP
+hash_page_out:
+	eieio
+	addis	r8,r7,mmu_hash_lock@ha
+	li	r0,0
+	stw	r0,mmu_hash_lock@l(r8)
+	blr
+#endif /* CONFIG_SMP */
+
+/*
+ * Add an entry for a particular page to the hash table.
+ *
+ * add_hash_page(unsigned context, unsigned long va, unsigned long pmdval)
+ *
+ * We assume any necessary modifications to the pte (e.g. setting
+ * the accessed bit) have already been done and that there is actually
+ * a hash table in use (i.e. we're not on a 603).
+ */
+_GLOBAL(add_hash_page)
+	mflr	r0
+	stw	r0,4(r1)
+
+	/* Convert context and va to VSID */
+	mulli	r3,r3,897*16		/* multiply context by context skew */
+	rlwinm	r0,r4,4,28,31		/* get ESID (top 4 bits of va) */
+	mulli	r0,r0,0x111		/* multiply by ESID skew */
+	add	r3,r3,r0		/* note create_hpte trims to 24 bits */
+
+#ifdef CONFIG_SMP
+	rlwinm	r8,r1,0,0,18		/* use cpu number to make tag */
+	lwz	r8,TI_CPU(r8)		/* to go in mmu_hash_lock */
+	oris	r8,r8,12
+#endif /* CONFIG_SMP */
+
+	/*
+	 * We disable interrupts here, even on UP, because we don't
+	 * want to race with hash_page, and because we want the
+	 * _PAGE_HASHPTE bit to be a reliable indication of whether
+	 * the HPTE exists (or at least whether one did once).
+	 * We also turn off the MMU for data accesses so that we
+	 * we can't take a hash table miss (assuming the code is
+	 * covered by a BAT).  -- paulus
+	 */
+	mfmsr	r10
+	SYNC
+	rlwinm	r0,r10,0,17,15		/* clear bit 16 (MSR_EE) */
+	rlwinm	r0,r0,0,28,26		/* clear MSR_DR */
+	mtmsr	r0
+	SYNC_601
+	isync
+
+	tophys(r7,0)
+
+#ifdef CONFIG_SMP
+	addis	r9,r7,mmu_hash_lock@ha
+	addi	r9,r9,mmu_hash_lock@l
+10:	lwarx	r0,0,r9			/* take the mmu_hash_lock */
+	cmpi	0,r0,0
+	bne-	11f
+	stwcx.	r8,0,r9
+	beq+	12f
+11:	lwz	r0,0(r9)
+	cmpi	0,r0,0
+	beq	10b
+	b	11b
+12:	isync
+#endif
+
+	/*
+	 * Fetch the linux pte and test and set _PAGE_HASHPTE atomically.
+	 * If _PAGE_HASHPTE was already set, we don't replace the existing
+	 * HPTE, so we just unlock and return.
+	 */
+	mr	r8,r5
+	rlwimi	r8,r4,22,20,29
+1:	lwarx	r6,0,r8
+	andi.	r0,r6,_PAGE_HASHPTE
+	bne	9f			/* if HASHPTE already set, done */
+	ori	r5,r6,_PAGE_HASHPTE
+	stwcx.	r5,0,r8
+	bne-	1b
+
+	bl	create_hpte
+
+9:
+#ifdef CONFIG_SMP
+	eieio
+	li	r0,0
+	stw	r0,0(r9)		/* clear mmu_hash_lock */
+#endif
+
+	/* reenable interrupts and DR */
+	mtmsr	r10
+	SYNC_601
+	isync
+
+	lwz	r0,4(r1)
+	mtlr	r0
+	blr
+
+/*
+ * This routine adds a hardware PTE to the hash table.
+ * It is designed to be called with the MMU either on or off.
+ * r3 contains the VSID, r4 contains the virtual address,
+ * r5 contains the linux PTE, r6 contains the old value of the
+ * linux PTE (before setting _PAGE_HASHPTE) and r7 contains the
+ * offset to be added to addresses (0 if the MMU is on,
+ * -KERNELBASE if it is off).
+ * On SMP, the caller should have the mmu_hash_lock held.
+ * We assume that the caller has (or will) set the _PAGE_HASHPTE
+ * bit in the linux PTE in memory.  The value passed in r6 should
+ * be the old linux PTE value; if it doesn't have _PAGE_HASHPTE set
+ * this routine will skip the search for an existing HPTE.
+ * This procedure modifies r0, r3 - r6, r8, cr0.
+ *  -- paulus.
+ *
+ * For speed, 4 of the instructions get patched once the size and
+ * physical address of the hash table are known.  These definitions
+ * of Hash_base and Hash_bits below are just an example.
+ */
+Hash_base = 0xc0180000
+Hash_bits = 12				/* e.g. 256kB hash table */
+Hash_msk = (((1 << Hash_bits) - 1) * 64)
+
+#ifndef CONFIG_PPC64BRIDGE
+/* defines for the PTE format for 32-bit PPCs */
+#define PTE_SIZE	8
+#define PTEG_SIZE	64
+#define LG_PTEG_SIZE	6
+#define LDPTEu		lwzu
+#define STPTE		stw
+#define CMPPTE		cmpw
+#define PTE_H		0x40
+#define PTE_V		0x80000000
+#define TST_V(r)	rlwinm. r,r,0,0,0
+#define SET_V(r)	oris r,r,PTE_V@h
+#define CLR_V(r,t)	rlwinm r,r,0,1,31
+
+#else
+/* defines for the PTE format for 64-bit PPCs */
+#define PTE_SIZE	16
+#define PTEG_SIZE	128
+#define LG_PTEG_SIZE	7
+#define LDPTEu		ldu
+#define STPTE		std
+#define CMPPTE		cmpd
+#define PTE_H		2
+#define PTE_V		1
+#define TST_V(r)	andi. r,r,PTE_V
+#define SET_V(r)	ori r,r,PTE_V
+#define CLR_V(r,t)	li t,PTE_V; andc r,r,t
+#endif /* CONFIG_PPC64BRIDGE */
+
+#define HASH_LEFT	31-(LG_PTEG_SIZE+Hash_bits-1)
+#define HASH_RIGHT	31-LG_PTEG_SIZE
+
+_GLOBAL(create_hpte)
+	/* Convert linux-style PTE (r5) to low word of PPC-style PTE (r8) */
+	rlwinm	r8,r5,32-10,31,31	/* _PAGE_RW -> PP lsb */
+	rlwinm	r0,r5,32-7,31,31	/* _PAGE_DIRTY -> PP lsb */
+	and	r8,r8,r0		/* writable if _RW & _DIRTY */
+	rlwimi	r5,r5,32-1,30,30	/* _PAGE_USER -> PP msb */
+	rlwimi	r5,r5,32-2,31,31	/* _PAGE_USER -> PP lsb */
+	ori	r8,r8,0xe14		/* clear out reserved bits and M */
+	andc	r8,r5,r8		/* PP = user? (rw&dirty? 2: 3): 0 */
+BEGIN_FTR_SECTION
+	ori	r8,r8,_PAGE_COHERENT	/* set M (coherence required) */
+END_FTR_SECTION_IFSET(CPU_FTR_NEED_COHERENT)
+
+	/* Construct the high word of the PPC-style PTE (r5) */
+#ifndef CONFIG_PPC64BRIDGE
+	rlwinm	r5,r3,7,1,24		/* put VSID in 0x7fffff80 bits */
+	rlwimi	r5,r4,10,26,31		/* put in API (abbrev page index) */
+#else /* CONFIG_PPC64BRIDGE */
+	clrlwi	r3,r3,8			/* reduce vsid to 24 bits */
+	sldi	r5,r3,12		/* shift vsid into position */
+	rlwimi	r5,r4,16,20,24		/* put in API (abbrev page index) */
+#endif /* CONFIG_PPC64BRIDGE */
+	SET_V(r5)			/* set V (valid) bit */
+
+	/* Get the address of the primary PTE group in the hash table (r3) */
+_GLOBAL(hash_page_patch_A)
+	addis	r0,r7,Hash_base@h	/* base address of hash table */
+	rlwimi	r0,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT    /* VSID -> hash */
+	rlwinm	r3,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */
+	xor	r3,r3,r0		/* make primary hash */
+	li	r0,8			/* PTEs/group */
+
+	/*
+	 * Test the _PAGE_HASHPTE bit in the old linux PTE, and skip the search
+	 * if it is clear, meaning that the HPTE isn't there already...
+	 */
+	andi.	r6,r6,_PAGE_HASHPTE
+	beq+	10f			/* no PTE: go look for an empty slot */
+	tlbie	r4
+
+	addis	r4,r7,htab_hash_searches@ha
+	lwz	r6,htab_hash_searches@l(r4)
+	addi	r6,r6,1			/* count how many searches we do */
+	stw	r6,htab_hash_searches@l(r4)
+
+	/* Search the primary PTEG for a PTE whose 1st (d)word matches r5 */
+	mtctr	r0
+	addi	r4,r3,-PTE_SIZE
+1:	LDPTEu	r6,PTE_SIZE(r4)		/* get next PTE */
+	CMPPTE	0,r6,r5
+	bdnzf	2,1b			/* loop while ctr != 0 && !cr0.eq */
+	beq+	found_slot
+
+	/* Search the secondary PTEG for a matching PTE */
+	ori	r5,r5,PTE_H		/* set H (secondary hash) bit */
+_GLOBAL(hash_page_patch_B)
+	xoris	r4,r3,Hash_msk>>16	/* compute secondary hash */
+	xori	r4,r4,(-PTEG_SIZE & 0xffff)
+	addi	r4,r4,-PTE_SIZE
+	mtctr	r0
+2:	LDPTEu	r6,PTE_SIZE(r4)
+	CMPPTE	0,r6,r5
+	bdnzf	2,2b
+	beq+	found_slot
+	xori	r5,r5,PTE_H		/* clear H bit again */
+
+	/* Search the primary PTEG for an empty slot */
+10:	mtctr	r0
+	addi	r4,r3,-PTE_SIZE		/* search primary PTEG */
+1:	LDPTEu	r6,PTE_SIZE(r4)		/* get next PTE */
+	TST_V(r6)			/* test valid bit */
+	bdnzf	2,1b			/* loop while ctr != 0 && !cr0.eq */
+	beq+	found_empty
+
+	/* update counter of times that the primary PTEG is full */
+	addis	r4,r7,primary_pteg_full@ha
+	lwz	r6,primary_pteg_full@l(r4)
+	addi	r6,r6,1
+	stw	r6,primary_pteg_full@l(r4)
+
+	/* Search the secondary PTEG for an empty slot */
+	ori	r5,r5,PTE_H		/* set H (secondary hash) bit */
+_GLOBAL(hash_page_patch_C)
+	xoris	r4,r3,Hash_msk>>16	/* compute secondary hash */
+	xori	r4,r4,(-PTEG_SIZE & 0xffff)
+	addi	r4,r4,-PTE_SIZE
+	mtctr	r0
+2:	LDPTEu	r6,PTE_SIZE(r4)
+	TST_V(r6)
+	bdnzf	2,2b
+	beq+	found_empty
+	xori	r5,r5,PTE_H		/* clear H bit again */
+
+	/*
+	 * Choose an arbitrary slot in the primary PTEG to overwrite.
+	 * Since both the primary and secondary PTEGs are full, and we
+	 * have no information that the PTEs in the primary PTEG are
+	 * more important or useful than those in the secondary PTEG,
+	 * and we know there is a definite (although small) speed
+	 * advantage to putting the PTE in the primary PTEG, we always
+	 * put the PTE in the primary PTEG.
+	 */
+	addis	r4,r7,next_slot@ha
+	lwz	r6,next_slot@l(r4)
+	addi	r6,r6,PTE_SIZE
+	andi.	r6,r6,7*PTE_SIZE
+	stw	r6,next_slot@l(r4)
+	add	r4,r3,r6
+
+#ifndef CONFIG_SMP
+	/* Store PTE in PTEG */
+found_empty:
+	STPTE	r5,0(r4)
+found_slot:
+	STPTE	r8,PTE_SIZE/2(r4)
+
+#else /* CONFIG_SMP */
+/*
+ * Between the tlbie above and updating the hash table entry below,
+ * another CPU could read the hash table entry and put it in its TLB.
+ * There are 3 cases:
+ * 1. using an empty slot
+ * 2. updating an earlier entry to change permissions (i.e. enable write)
+ * 3. taking over the PTE for an unrelated address
+ *
+ * In each case it doesn't really matter if the other CPUs have the old
+ * PTE in their TLB.  So we don't need to bother with another tlbie here,
+ * which is convenient as we've overwritten the register that had the
+ * address. :-)  The tlbie above is mainly to make sure that this CPU comes
+ * and gets the new PTE from the hash table.
+ *
+ * We do however have to make sure that the PTE is never in an invalid
+ * state with the V bit set.
+ */
+found_empty:
+found_slot:
+	CLR_V(r5,r0)		/* clear V (valid) bit in PTE */
+	STPTE	r5,0(r4)
+	sync
+	TLBSYNC
+	STPTE	r8,PTE_SIZE/2(r4) /* put in correct RPN, WIMG, PP bits */
+	sync
+	SET_V(r5)
+	STPTE	r5,0(r4)	/* finally set V bit in PTE */
+#endif /* CONFIG_SMP */
+
+	sync		/* make sure pte updates get to memory */
+	blr
+
+	.comm	next_slot,4
+	.comm	primary_pteg_full,4
+	.comm	htab_hash_searches,4
+
+/*
+ * Flush the entry for a particular page from the hash table.
+ *
+ * flush_hash_pages(unsigned context, unsigned long va, unsigned long pmdval,
+ *		    int count)
+ *
+ * We assume that there is a hash table in use (Hash != 0).
+ */
+_GLOBAL(flush_hash_pages)
+	tophys(r7,0)
+
+	/*
+	 * We disable interrupts here, even on UP, because we want
+	 * the _PAGE_HASHPTE bit to be a reliable indication of
+	 * whether the HPTE exists (or at least whether one did once).
+	 * We also turn off the MMU for data accesses so that we
+	 * we can't take a hash table miss (assuming the code is
+	 * covered by a BAT).  -- paulus
+	 */
+	mfmsr	r10
+	SYNC
+	rlwinm	r0,r10,0,17,15		/* clear bit 16 (MSR_EE) */
+	rlwinm	r0,r0,0,28,26		/* clear MSR_DR */
+	mtmsr	r0
+	SYNC_601
+	isync
+
+	/* First find a PTE in the range that has _PAGE_HASHPTE set */
+	rlwimi	r5,r4,22,20,29
+1:	lwz	r0,0(r5)
+	cmpwi	cr1,r6,1
+	andi.	r0,r0,_PAGE_HASHPTE
+	bne	2f
+	ble	cr1,19f
+	addi	r4,r4,0x1000
+	addi	r5,r5,4
+	addi	r6,r6,-1
+	b	1b
+
+	/* Convert context and va to VSID */
+2:	mulli	r3,r3,897*16		/* multiply context by context skew */
+	rlwinm	r0,r4,4,28,31		/* get ESID (top 4 bits of va) */
+	mulli	r0,r0,0x111		/* multiply by ESID skew */
+	add	r3,r3,r0		/* note code below trims to 24 bits */
+
+	/* Construct the high word of the PPC-style PTE (r11) */
+#ifndef CONFIG_PPC64BRIDGE
+	rlwinm	r11,r3,7,1,24		/* put VSID in 0x7fffff80 bits */
+	rlwimi	r11,r4,10,26,31		/* put in API (abbrev page index) */
+#else /* CONFIG_PPC64BRIDGE */
+	clrlwi	r3,r3,8			/* reduce vsid to 24 bits */
+	sldi	r11,r3,12		/* shift vsid into position */
+	rlwimi	r11,r4,16,20,24		/* put in API (abbrev page index) */
+#endif /* CONFIG_PPC64BRIDGE */
+	SET_V(r11)			/* set V (valid) bit */
+
+#ifdef CONFIG_SMP
+	addis	r9,r7,mmu_hash_lock@ha
+	addi	r9,r9,mmu_hash_lock@l
+	rlwinm	r8,r1,0,0,18
+	add	r8,r8,r7
+	lwz	r8,TI_CPU(r8)
+	oris	r8,r8,9
+10:	lwarx	r0,0,r9
+	cmpi	0,r0,0
+	bne-	11f
+	stwcx.	r8,0,r9
+	beq+	12f
+11:	lwz	r0,0(r9)
+	cmpi	0,r0,0
+	beq	10b
+	b	11b
+12:	isync
+#endif
+
+	/*
+	 * Check the _PAGE_HASHPTE bit in the linux PTE.  If it is
+	 * already clear, we're done (for this pte).  If not,
+	 * clear it (atomically) and proceed.  -- paulus.
+	 */
+33:	lwarx	r8,0,r5			/* fetch the pte */
+	andi.	r0,r8,_PAGE_HASHPTE
+	beq	8f			/* done if HASHPTE is already clear */
+	rlwinm	r8,r8,0,31,29		/* clear HASHPTE bit */
+	stwcx.	r8,0,r5			/* update the pte */
+	bne-	33b
+
+	/* Get the address of the primary PTE group in the hash table (r3) */
+_GLOBAL(flush_hash_patch_A)
+	addis	r8,r7,Hash_base@h	/* base address of hash table */
+	rlwimi	r8,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT    /* VSID -> hash */
+	rlwinm	r0,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */
+	xor	r8,r0,r8		/* make primary hash */
+
+	/* Search the primary PTEG for a PTE whose 1st (d)word matches r5 */
+	li	r0,8			/* PTEs/group */
+	mtctr	r0
+	addi	r12,r8,-PTE_SIZE
+1:	LDPTEu	r0,PTE_SIZE(r12)	/* get next PTE */
+	CMPPTE	0,r0,r11
+	bdnzf	2,1b			/* loop while ctr != 0 && !cr0.eq */
+	beq+	3f
+
+	/* Search the secondary PTEG for a matching PTE */
+	ori	r11,r11,PTE_H		/* set H (secondary hash) bit */
+	li	r0,8			/* PTEs/group */
+_GLOBAL(flush_hash_patch_B)
+	xoris	r12,r8,Hash_msk>>16	/* compute secondary hash */
+	xori	r12,r12,(-PTEG_SIZE & 0xffff)
+	addi	r12,r12,-PTE_SIZE
+	mtctr	r0
+2:	LDPTEu	r0,PTE_SIZE(r12)
+	CMPPTE	0,r0,r11
+	bdnzf	2,2b
+	xori	r11,r11,PTE_H		/* clear H again */
+	bne-	4f			/* should rarely fail to find it */
+
+3:	li	r0,0
+	STPTE	r0,0(r12)		/* invalidate entry */
+4:	sync
+	tlbie	r4			/* in hw tlb too */
+	sync
+
+8:	ble	cr1,9f			/* if all ptes checked */
+81:	addi	r6,r6,-1
+	addi	r5,r5,4			/* advance to next pte */
+	addi	r4,r4,0x1000
+	lwz	r0,0(r5)		/* check next pte */
+	cmpwi	cr1,r6,1
+	andi.	r0,r0,_PAGE_HASHPTE
+	bne	33b
+	bgt	cr1,81b
+
+9:
+#ifdef CONFIG_SMP
+	TLBSYNC
+	li	r0,0
+	stw	r0,0(r9)		/* clear mmu_hash_lock */
+#endif
+
+19:	mtmsr	r10
+	SYNC_601
+	isync
+	blr
diff --git a/arch/powerpc/mm/init.c b/arch/powerpc/mm/init.c
new file mode 100644
index 000000000000..f4d983a6e521
--- /dev/null
+++ b/arch/powerpc/mm/init.c
@@ -0,0 +1,581 @@
+/*
+ *  PowerPC version
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *  PPC44x/36-bit changes by Matt Porter (mporter@mvista.com)
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/highmem.h>
+#include <linux/initrd.h>
+#include <linux/pagemap.h>
+
+#include <asm/pgalloc.h>
+#include <asm/prom.h>
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/btext.h>
+#include <asm/tlb.h>
+#include <asm/bootinfo.h>
+#include <asm/prom.h>
+
+#include "mem_pieces.h"
+#include "mmu_decl.h"
+
+#if defined(CONFIG_KERNEL_START_BOOL) || defined(CONFIG_LOWMEM_SIZE_BOOL)
+/* The ammount of lowmem must be within 0xF0000000 - KERNELBASE. */
+#if (CONFIG_LOWMEM_SIZE > (0xF0000000 - KERNELBASE))
+#error "You must adjust CONFIG_LOWMEM_SIZE or CONFIG_START_KERNEL"
+#endif
+#endif
+#define MAX_LOW_MEM	CONFIG_LOWMEM_SIZE
+
+DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+
+unsigned long total_memory;
+unsigned long total_lowmem;
+
+unsigned long ppc_memstart;
+unsigned long ppc_memoffset = PAGE_OFFSET;
+
+int mem_init_done;
+int init_bootmem_done;
+int boot_mapsize;
+#ifdef CONFIG_PPC_PMAC
+unsigned long agp_special_page;
+#endif
+
+extern char _end[];
+extern char etext[], _stext[];
+extern char __init_begin, __init_end;
+
+#ifdef CONFIG_HIGHMEM
+pte_t *kmap_pte;
+pgprot_t kmap_prot;
+
+EXPORT_SYMBOL(kmap_prot);
+EXPORT_SYMBOL(kmap_pte);
+#endif
+
+void MMU_init(void);
+void set_phys_avail(unsigned long total_ram);
+
+/* XXX should be in current.h  -- paulus */
+extern struct task_struct *current_set[NR_CPUS];
+
+char *klimit = _end;
+struct mem_pieces phys_avail;
+struct device_node *memory_node;
+
+/*
+ * this tells the system to map all of ram with the segregs
+ * (i.e. page tables) instead of the bats.
+ * -- Cort
+ */
+int __map_without_bats;
+int __map_without_ltlbs;
+
+/* max amount of RAM to use */
+unsigned long __max_memory;
+/* max amount of low RAM to map in */
+unsigned long __max_low_memory = MAX_LOW_MEM;
+
+/*
+ * Read in a property describing some pieces of memory.
+ */
+static int __init get_mem_prop(char *name, struct mem_pieces *mp)
+{
+	struct reg_property *rp;
+	int i, s;
+	unsigned int *ip;
+	int nac = prom_n_addr_cells(memory_node);
+	int nsc = prom_n_size_cells(memory_node);
+
+	ip = (unsigned int *) get_property(memory_node, name, &s);
+	if (ip == NULL) {
+		printk(KERN_ERR "error: couldn't get %s property on /memory\n",
+		       name);
+		return 0;
+	}
+	s /= (nsc + nac) * 4;
+	rp = mp->regions;
+	for (i = 0; i < s; ++i, ip += nac+nsc) {
+		if (nac >= 2 && ip[nac-2] != 0)
+			continue;
+		rp->address = ip[nac-1];
+		if (nsc >= 2 && ip[nac+nsc-2] != 0)
+			rp->size = ~0U;
+		else
+			rp->size = ip[nac+nsc-1];
+		++rp;
+	}
+	mp->n_regions = rp - mp->regions;
+
+	/* Make sure the pieces are sorted. */
+	mem_pieces_sort(mp);
+	mem_pieces_coalesce(mp);
+	return 1;
+}
+
+/*
+ * Collect information about physical RAM and which pieces are
+ * already in use from the device tree.
+ */
+unsigned long __init find_end_of_memory(void)
+{
+	unsigned long a, total;
+	struct mem_pieces phys_mem;
+
+	/*
+	 * Find out where physical memory is, and check that it
+	 * starts at 0 and is contiguous.  It seems that RAM is
+	 * always physically contiguous on Power Macintoshes.
+	 *
+	 * Supporting discontiguous physical memory isn't hard,
+	 * it just makes the virtual <-> physical mapping functions
+	 * more complicated (or else you end up wasting space
+	 * in mem_map).
+	 */
+	memory_node = find_devices("memory");
+	if (memory_node == NULL || !get_mem_prop("reg", &phys_mem)
+	    || phys_mem.n_regions == 0)
+		panic("No RAM??");
+	a = phys_mem.regions[0].address;
+	if (a != 0)
+		panic("RAM doesn't start at physical address 0");
+	total = phys_mem.regions[0].size;
+
+	if (phys_mem.n_regions > 1) {
+		printk("RAM starting at 0x%x is not contiguous\n",
+		       phys_mem.regions[1].address);
+		printk("Using RAM from 0 to 0x%lx\n", total-1);
+	}
+
+	return total;
+}
+
+/*
+ * Check for command-line options that affect what MMU_init will do.
+ */
+void MMU_setup(void)
+{
+	/* Check for nobats option (used in mapin_ram). */
+	if (strstr(cmd_line, "nobats")) {
+		__map_without_bats = 1;
+	}
+
+	if (strstr(cmd_line, "noltlbs")) {
+		__map_without_ltlbs = 1;
+	}
+
+	/* Look for mem= option on command line */
+	if (strstr(cmd_line, "mem=")) {
+		char *p, *q;
+		unsigned long maxmem = 0;
+
+		for (q = cmd_line; (p = strstr(q, "mem=")) != 0; ) {
+			q = p + 4;
+			if (p > cmd_line && p[-1] != ' ')
+				continue;
+			maxmem = simple_strtoul(q, &q, 0);
+			if (*q == 'k' || *q == 'K') {
+				maxmem <<= 10;
+				++q;
+			} else if (*q == 'm' || *q == 'M') {
+				maxmem <<= 20;
+				++q;
+			}
+		}
+		__max_memory = maxmem;
+	}
+}
+
+/*
+ * MMU_init sets up the basic memory mappings for the kernel,
+ * including both RAM and possibly some I/O regions,
+ * and sets up the page tables and the MMU hardware ready to go.
+ */
+void __init MMU_init(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("MMU:enter", 0x111);
+
+	/* parse args from command line */
+	MMU_setup();
+
+	/*
+	 * Figure out how much memory we have, how much
+	 * is lowmem, and how much is highmem.  If we were
+	 * passed the total memory size from the bootloader,
+	 * just use it.
+	 */
+	if (boot_mem_size)
+		total_memory = boot_mem_size;
+	else
+		total_memory = ppc_md.find_end_of_memory();
+
+	if (__max_memory && total_memory > __max_memory)
+		total_memory = __max_memory;
+	total_lowmem = total_memory;
+#ifdef CONFIG_FSL_BOOKE
+	/* Freescale Book-E parts expect lowmem to be mapped by fixed TLB
+	 * entries, so we need to adjust lowmem to match the amount we can map
+	 * in the fixed entries */
+	adjust_total_lowmem();
+#endif /* CONFIG_FSL_BOOKE */
+	if (total_lowmem > __max_low_memory) {
+		total_lowmem = __max_low_memory;
+#ifndef CONFIG_HIGHMEM
+		total_memory = total_lowmem;
+#endif /* CONFIG_HIGHMEM */
+	}
+	set_phys_avail(total_lowmem);
+
+	/* Initialize the MMU hardware */
+	if (ppc_md.progress)
+		ppc_md.progress("MMU:hw init", 0x300);
+	MMU_init_hw();
+
+	/* Map in all of RAM starting at KERNELBASE */
+	if (ppc_md.progress)
+		ppc_md.progress("MMU:mapin", 0x301);
+	mapin_ram();
+
+#ifdef CONFIG_HIGHMEM
+	ioremap_base = PKMAP_BASE;
+#else
+	ioremap_base = 0xfe000000UL;	/* for now, could be 0xfffff000 */
+#endif /* CONFIG_HIGHMEM */
+	ioremap_bot = ioremap_base;
+
+	/* Map in I/O resources */
+	if (ppc_md.progress)
+		ppc_md.progress("MMU:setio", 0x302);
+	if (ppc_md.setup_io_mappings)
+		ppc_md.setup_io_mappings();
+
+	/* Initialize the context management stuff */
+	mmu_context_init();
+
+	if (ppc_md.progress)
+		ppc_md.progress("MMU:exit", 0x211);
+
+#ifdef CONFIG_BOOTX_TEXT
+	/* By default, we are no longer mapped */
+       	boot_text_mapped = 0;
+	/* Must be done last, or ppc_md.progress will die. */
+	map_boot_text();
+#endif
+}
+
+/* This is only called until mem_init is done. */
+void __init *early_get_page(void)
+{
+	void *p;
+
+	if (init_bootmem_done) {
+		p = alloc_bootmem_pages(PAGE_SIZE);
+	} else {
+		p = mem_pieces_find(PAGE_SIZE, PAGE_SIZE);
+	}
+	return p;
+}
+
+/* Free up now-unused memory */
+static void free_sec(unsigned long start, unsigned long end, const char *name)
+{
+	unsigned long cnt = 0;
+
+	while (start < end) {
+		ClearPageReserved(virt_to_page(start));
+		set_page_count(virt_to_page(start), 1);
+		free_page(start);
+		cnt++;
+		start += PAGE_SIZE;
+ 	}
+	if (cnt) {
+		printk(" %ldk %s", cnt << (PAGE_SHIFT - 10), name);
+		totalram_pages += cnt;
+	}
+}
+
+void free_initmem(void)
+{
+#define FREESEC(TYPE) \
+	free_sec((unsigned long)(&__ ## TYPE ## _begin), \
+		 (unsigned long)(&__ ## TYPE ## _end), \
+		 #TYPE);
+
+	printk ("Freeing unused kernel memory:");
+	FREESEC(init);
+ 	printk("\n");
+	ppc_md.progress = NULL;
+#undef FREESEC
+}
+
+#ifdef CONFIG_BLK_DEV_INITRD
+void free_initrd_mem(unsigned long start, unsigned long end)
+{
+	if (start < end)
+		printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
+	for (; start < end; start += PAGE_SIZE) {
+		ClearPageReserved(virt_to_page(start));
+		set_page_count(virt_to_page(start), 1);
+		free_page(start);
+		totalram_pages++;
+	}
+}
+#endif
+
+/*
+ * Initialize the bootmem system and give it all the memory we
+ * have available.
+ */
+void __init do_init_bootmem(void)
+{
+	unsigned long start, size;
+	int i;
+
+	/*
+	 * Find an area to use for the bootmem bitmap.
+	 * We look for the first area which is at least
+	 * 128kB in length (128kB is enough for a bitmap
+	 * for 4GB of memory, using 4kB pages), plus 1 page
+	 * (in case the address isn't page-aligned).
+	 */
+	start = 0;
+	size = 0;
+	for (i = 0; i < phys_avail.n_regions; ++i) {
+		unsigned long a = phys_avail.regions[i].address;
+		unsigned long s = phys_avail.regions[i].size;
+		if (s <= size)
+			continue;
+		start = a;
+		size = s;
+		if (s >= 33 * PAGE_SIZE)
+			break;
+	}
+	start = PAGE_ALIGN(start);
+
+	min_low_pfn = start >> PAGE_SHIFT;
+	max_low_pfn = (PPC_MEMSTART + total_lowmem) >> PAGE_SHIFT;
+	max_pfn = (PPC_MEMSTART + total_memory) >> PAGE_SHIFT;
+	boot_mapsize = init_bootmem_node(&contig_page_data, min_low_pfn,
+					 PPC_MEMSTART >> PAGE_SHIFT,
+					 max_low_pfn);
+
+	/* remove the bootmem bitmap from the available memory */
+	mem_pieces_remove(&phys_avail, start, boot_mapsize, 1);
+
+	/* add everything in phys_avail into the bootmem map */
+	for (i = 0; i < phys_avail.n_regions; ++i)
+		free_bootmem(phys_avail.regions[i].address,
+			     phys_avail.regions[i].size);
+
+	init_bootmem_done = 1;
+}
+
+/*
+ * paging_init() sets up the page tables - in fact we've already done this.
+ */
+void __init paging_init(void)
+{
+	unsigned long zones_size[MAX_NR_ZONES], i;
+
+#ifdef CONFIG_HIGHMEM
+	map_page(PKMAP_BASE, 0, 0);	/* XXX gross */
+	pkmap_page_table = pte_offset_kernel(pmd_offset(pgd_offset_k
+			(PKMAP_BASE), PKMAP_BASE), PKMAP_BASE);
+	map_page(KMAP_FIX_BEGIN, 0, 0);	/* XXX gross */
+	kmap_pte = pte_offset_kernel(pmd_offset(pgd_offset_k
+			(KMAP_FIX_BEGIN), KMAP_FIX_BEGIN), KMAP_FIX_BEGIN);
+	kmap_prot = PAGE_KERNEL;
+#endif /* CONFIG_HIGHMEM */
+
+	/*
+	 * All pages are DMA-able so we put them all in the DMA zone.
+	 */
+	zones_size[ZONE_DMA] = total_lowmem >> PAGE_SHIFT;
+	for (i = 1; i < MAX_NR_ZONES; i++)
+		zones_size[i] = 0;
+
+#ifdef CONFIG_HIGHMEM
+	zones_size[ZONE_HIGHMEM] = (total_memory - total_lowmem) >> PAGE_SHIFT;
+#endif /* CONFIG_HIGHMEM */
+
+	free_area_init(zones_size);
+}
+
+void __init mem_init(void)
+{
+	unsigned long addr;
+	int codepages = 0;
+	int datapages = 0;
+	int initpages = 0;
+#ifdef CONFIG_HIGHMEM
+	unsigned long highmem_mapnr;
+
+	highmem_mapnr = total_lowmem >> PAGE_SHIFT;
+#endif /* CONFIG_HIGHMEM */
+	max_mapnr = total_memory >> PAGE_SHIFT;
+
+	high_memory = (void *) __va(PPC_MEMSTART + total_lowmem);
+	num_physpages = max_mapnr;	/* RAM is assumed contiguous */
+
+	totalram_pages += free_all_bootmem();
+
+#ifdef CONFIG_BLK_DEV_INITRD
+	/* if we are booted from BootX with an initial ramdisk,
+	   make sure the ramdisk pages aren't reserved. */
+	if (initrd_start) {
+		for (addr = initrd_start; addr < initrd_end; addr += PAGE_SIZE)
+			ClearPageReserved(virt_to_page(addr));
+	}
+#endif /* CONFIG_BLK_DEV_INITRD */
+
+#ifdef CONFIG_PPC_OF
+	/* mark the RTAS pages as reserved */
+	if ( rtas_data )
+		for (addr = (ulong)__va(rtas_data);
+		     addr < PAGE_ALIGN((ulong)__va(rtas_data)+rtas_size) ;
+		     addr += PAGE_SIZE)
+			SetPageReserved(virt_to_page(addr));
+#endif
+#ifdef CONFIG_PPC_PMAC
+	if (agp_special_page)
+		SetPageReserved(virt_to_page(agp_special_page));
+#endif
+	for (addr = PAGE_OFFSET; addr < (unsigned long)high_memory;
+	     addr += PAGE_SIZE) {
+		if (!PageReserved(virt_to_page(addr)))
+			continue;
+		if (addr < (ulong) etext)
+			codepages++;
+		else if (addr >= (unsigned long)&__init_begin
+			 && addr < (unsigned long)&__init_end)
+			initpages++;
+		else if (addr < (ulong) klimit)
+			datapages++;
+	}
+
+#ifdef CONFIG_HIGHMEM
+	{
+		unsigned long pfn;
+
+		for (pfn = highmem_mapnr; pfn < max_mapnr; ++pfn) {
+			struct page *page = mem_map + pfn;
+
+			ClearPageReserved(page);
+			set_page_count(page, 1);
+			__free_page(page);
+			totalhigh_pages++;
+		}
+		totalram_pages += totalhigh_pages;
+	}
+#endif /* CONFIG_HIGHMEM */
+
+        printk("Memory: %luk available (%dk kernel code, %dk data, %dk init, %ldk highmem)\n",
+	       (unsigned long)nr_free_pages()<< (PAGE_SHIFT-10),
+	       codepages<< (PAGE_SHIFT-10), datapages<< (PAGE_SHIFT-10),
+	       initpages<< (PAGE_SHIFT-10),
+	       (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)));
+
+#ifdef CONFIG_PPC_PMAC
+	if (agp_special_page)
+		printk(KERN_INFO "AGP special page: 0x%08lx\n", agp_special_page);
+#endif
+
+	mem_init_done = 1;
+}
+
+/*
+ * Set phys_avail to the amount of physical memory,
+ * less the kernel text/data/bss.
+ */
+void __init
+set_phys_avail(unsigned long total_memory)
+{
+	unsigned long kstart, ksize;
+
+	/*
+	 * Initially, available physical memory is equivalent to all
+	 * physical memory.
+	 */
+
+	phys_avail.regions[0].address = PPC_MEMSTART;
+	phys_avail.regions[0].size = total_memory;
+	phys_avail.n_regions = 1;
+
+	/*
+	 * Map out the kernel text/data/bss from the available physical
+	 * memory.
+	 */
+
+	kstart = __pa(_stext);	/* should be 0 */
+	ksize = PAGE_ALIGN(klimit - _stext);
+
+	mem_pieces_remove(&phys_avail, kstart, ksize, 0);
+	mem_pieces_remove(&phys_avail, 0, 0x4000, 0);
+
+#if defined(CONFIG_BLK_DEV_INITRD)
+	/* Remove the init RAM disk from the available memory. */
+	if (initrd_start) {
+		mem_pieces_remove(&phys_avail, __pa(initrd_start),
+				  initrd_end - initrd_start, 1);
+	}
+#endif /* CONFIG_BLK_DEV_INITRD */
+#ifdef CONFIG_PPC_OF
+	/* remove the RTAS pages from the available memory */
+	if (rtas_data)
+		mem_pieces_remove(&phys_avail, rtas_data, rtas_size, 1);
+#endif
+#ifdef CONFIG_PPC_PMAC
+	/* Because of some uninorth weirdness, we need a page of
+	 * memory as high as possible (it must be outside of the
+	 * bus address seen as the AGP aperture). It will be used
+	 * by the r128 DRM driver
+	 *
+	 * FIXME: We need to make sure that page doesn't overlap any of the\
+	 * above. This could be done by improving mem_pieces_find to be able
+	 * to do a backward search from the end of the list.
+	 */
+	if (_machine == _MACH_Pmac && find_devices("uni-north-agp")) {
+		agp_special_page = (total_memory - PAGE_SIZE);
+		mem_pieces_remove(&phys_avail, agp_special_page, PAGE_SIZE, 0);
+		agp_special_page = (unsigned long)__va(agp_special_page);
+	}
+#endif /* CONFIG_PPC_PMAC */
+}
+
+/* Mark some memory as reserved by removing it from phys_avail. */
+void __init reserve_phys_mem(unsigned long start, unsigned long size)
+{
+	mem_pieces_remove(&phys_avail, start, size, 1);
+}
diff --git a/arch/powerpc/mm/init64.c b/arch/powerpc/mm/init64.c
new file mode 100644
index 000000000000..81f6745b31ef
--- /dev/null
+++ b/arch/powerpc/mm/init64.c
@@ -0,0 +1,385 @@
+/*
+ *  PowerPC version
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  Dave Engebretsen <engebret@us.ibm.com>
+ *      Rework for PPC64 port.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/stddef.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/bootmem.h>
+#include <linux/highmem.h>
+#include <linux/idr.h>
+#include <linux/nodemask.h>
+#include <linux/module.h>
+
+#include <asm/pgalloc.h>
+#include <asm/page.h>
+#include <asm/prom.h>
+#include <asm/lmb.h>
+#include <asm/rtas.h>
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+#include <asm/uaccess.h>
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/tlb.h>
+#include <asm/eeh.h>
+#include <asm/processor.h>
+#include <asm/mmzone.h>
+#include <asm/cputable.h>
+#include <asm/ppcdebug.h>
+#include <asm/sections.h>
+#include <asm/system.h>
+#include <asm/iommu.h>
+#include <asm/abs_addr.h>
+#include <asm/vdso.h>
+#include <asm/imalloc.h>
+
+#if PGTABLE_RANGE > USER_VSID_RANGE
+#warning Limited user VSID range means pagetable space is wasted
+#endif
+
+#if (TASK_SIZE_USER64 < PGTABLE_RANGE) && (TASK_SIZE_USER64 < USER_VSID_RANGE)
+#warning TASK_SIZE is smaller than it needs to be.
+#endif
+
+int mem_init_done;
+unsigned long ioremap_bot = IMALLOC_BASE;
+static unsigned long phbs_io_bot = PHBS_IO_BASE;
+
+extern pgd_t swapper_pg_dir[];
+extern struct task_struct *current_set[NR_CPUS];
+
+unsigned long klimit = (unsigned long)_end;
+
+unsigned long _SDR1=0;
+unsigned long _ASR=0;
+
+/* max amount of RAM to use */
+unsigned long __max_memory;
+
+/* info on what we think the IO hole is */
+unsigned long 	io_hole_start;
+unsigned long	io_hole_size;
+
+/*
+ * Do very early mm setup.
+ */
+void __init mm_init_ppc64(void)
+{
+#ifndef CONFIG_PPC_ISERIES
+	unsigned long i;
+#endif
+
+	ppc64_boot_msg(0x100, "MM Init");
+
+	/* This is the story of the IO hole... please, keep seated,
+	 * unfortunately, we are out of oxygen masks at the moment.
+	 * So we need some rough way to tell where your big IO hole
+	 * is. On pmac, it's between 2G and 4G, on POWER3, it's around
+	 * that area as well, on POWER4 we don't have one, etc...
+	 * We need that as a "hint" when sizing the TCE table on POWER3
+	 * So far, the simplest way that seem work well enough for us it
+	 * to just assume that the first discontinuity in our physical
+	 * RAM layout is the IO hole. That may not be correct in the future
+	 * (and isn't on iSeries but then we don't care ;)
+	 */
+
+#ifndef CONFIG_PPC_ISERIES
+	for (i = 1; i < lmb.memory.cnt; i++) {
+		unsigned long base, prevbase, prevsize;
+
+		prevbase = lmb.memory.region[i-1].base;
+		prevsize = lmb.memory.region[i-1].size;
+		base = lmb.memory.region[i].base;
+		if (base > (prevbase + prevsize)) {
+			io_hole_start = prevbase + prevsize;
+			io_hole_size = base  - (prevbase + prevsize);
+			break;
+		}
+	}
+#endif /* CONFIG_PPC_ISERIES */
+	if (io_hole_start)
+		printk("IO Hole assumed to be %lx -> %lx\n",
+		       io_hole_start, io_hole_start + io_hole_size - 1);
+
+	ppc64_boot_msg(0x100, "MM Init Done");
+}
+
+void free_initmem(void)
+{
+	unsigned long addr;
+
+	addr = (unsigned long)__init_begin;
+	for (; addr < (unsigned long)__init_end; addr += PAGE_SIZE) {
+		memset((void *)addr, 0xcc, PAGE_SIZE);
+		ClearPageReserved(virt_to_page(addr));
+		set_page_count(virt_to_page(addr), 1);
+		free_page(addr);
+		totalram_pages++;
+	}
+	printk ("Freeing unused kernel memory: %luk freed\n",
+		((unsigned long)__init_end - (unsigned long)__init_begin) >> 10);
+}
+
+#ifdef CONFIG_BLK_DEV_INITRD
+void free_initrd_mem(unsigned long start, unsigned long end)
+{
+	if (start < end)
+		printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
+	for (; start < end; start += PAGE_SIZE) {
+		ClearPageReserved(virt_to_page(start));
+		set_page_count(virt_to_page(start), 1);
+		free_page(start);
+		totalram_pages++;
+	}
+}
+#endif
+
+/*
+ * Initialize the bootmem system and give it all the memory we
+ * have available.
+ */
+#ifndef CONFIG_NEED_MULTIPLE_NODES
+void __init do_init_bootmem(void)
+{
+	unsigned long i;
+	unsigned long start, bootmap_pages;
+	unsigned long total_pages = lmb_end_of_DRAM() >> PAGE_SHIFT;
+	int boot_mapsize;
+
+	/*
+	 * Find an area to use for the bootmem bitmap.  Calculate the size of
+	 * bitmap required as (Total Memory) / PAGE_SIZE / BITS_PER_BYTE.
+	 * Add 1 additional page in case the address isn't page-aligned.
+	 */
+	bootmap_pages = bootmem_bootmap_pages(total_pages);
+
+	start = lmb_alloc(bootmap_pages<<PAGE_SHIFT, PAGE_SIZE);
+	BUG_ON(!start);
+
+	boot_mapsize = init_bootmem(start >> PAGE_SHIFT, total_pages);
+
+	max_pfn = max_low_pfn;
+
+	/* Add all physical memory to the bootmem map, mark each area
+	 * present.
+	 */
+	for (i=0; i < lmb.memory.cnt; i++)
+		free_bootmem(lmb.memory.region[i].base,
+			     lmb_size_bytes(&lmb.memory, i));
+
+	/* reserve the sections we're already using */
+	for (i=0; i < lmb.reserved.cnt; i++)
+		reserve_bootmem(lmb.reserved.region[i].base,
+				lmb_size_bytes(&lmb.reserved, i));
+
+	for (i=0; i < lmb.memory.cnt; i++)
+		memory_present(0, lmb_start_pfn(&lmb.memory, i),
+			       lmb_end_pfn(&lmb.memory, i));
+}
+
+/*
+ * paging_init() sets up the page tables - in fact we've already done this.
+ */
+void __init paging_init(void)
+{
+	unsigned long zones_size[MAX_NR_ZONES];
+	unsigned long zholes_size[MAX_NR_ZONES];
+	unsigned long total_ram = lmb_phys_mem_size();
+	unsigned long top_of_ram = lmb_end_of_DRAM();
+
+	printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
+	       top_of_ram, total_ram);
+	printk(KERN_INFO "Memory hole size: %ldMB\n",
+	       (top_of_ram - total_ram) >> 20);
+	/*
+	 * All pages are DMA-able so we put them all in the DMA zone.
+	 */
+	memset(zones_size, 0, sizeof(zones_size));
+	memset(zholes_size, 0, sizeof(zholes_size));
+
+	zones_size[ZONE_DMA] = top_of_ram >> PAGE_SHIFT;
+	zholes_size[ZONE_DMA] = (top_of_ram - total_ram) >> PAGE_SHIFT;
+
+	free_area_init_node(0, NODE_DATA(0), zones_size,
+			    __pa(PAGE_OFFSET) >> PAGE_SHIFT, zholes_size);
+}
+#endif /* ! CONFIG_NEED_MULTIPLE_NODES */
+
+static struct kcore_list kcore_vmem;
+
+static int __init setup_kcore(void)
+{
+	int i;
+
+	for (i=0; i < lmb.memory.cnt; i++) {
+		unsigned long base, size;
+		struct kcore_list *kcore_mem;
+
+		base = lmb.memory.region[i].base;
+		size = lmb.memory.region[i].size;
+
+		/* GFP_ATOMIC to avoid might_sleep warnings during boot */
+		kcore_mem = kmalloc(sizeof(struct kcore_list), GFP_ATOMIC);
+		if (!kcore_mem)
+			panic("mem_init: kmalloc failed\n");
+
+		kclist_add(kcore_mem, __va(base), size);
+	}
+
+	kclist_add(&kcore_vmem, (void *)VMALLOC_START, VMALLOC_END-VMALLOC_START);
+
+	return 0;
+}
+module_init(setup_kcore);
+
+void __init mem_init(void)
+{
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+	int nid;
+#endif
+	pg_data_t *pgdat;
+	unsigned long i;
+	struct page *page;
+	unsigned long reservedpages = 0, codesize, initsize, datasize, bsssize;
+
+	num_physpages = max_low_pfn;	/* RAM is assumed contiguous */
+	high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
+
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+        for_each_online_node(nid) {
+		if (NODE_DATA(nid)->node_spanned_pages != 0) {
+			printk("freeing bootmem node %x\n", nid);
+			totalram_pages +=
+				free_all_bootmem_node(NODE_DATA(nid));
+		}
+	}
+#else
+	max_mapnr = num_physpages;
+	totalram_pages += free_all_bootmem();
+#endif
+
+	for_each_pgdat(pgdat) {
+		for (i = 0; i < pgdat->node_spanned_pages; i++) {
+			page = pgdat_page_nr(pgdat, i);
+			if (PageReserved(page))
+				reservedpages++;
+		}
+	}
+
+	codesize = (unsigned long)&_etext - (unsigned long)&_stext;
+	initsize = (unsigned long)&__init_end - (unsigned long)&__init_begin;
+	datasize = (unsigned long)&_edata - (unsigned long)&__init_end;
+	bsssize = (unsigned long)&__bss_stop - (unsigned long)&__bss_start;
+
+	printk(KERN_INFO "Memory: %luk/%luk available (%luk kernel code, "
+	       "%luk reserved, %luk data, %luk bss, %luk init)\n",
+		(unsigned long)nr_free_pages() << (PAGE_SHIFT-10),
+		num_physpages << (PAGE_SHIFT-10),
+		codesize >> 10,
+		reservedpages << (PAGE_SHIFT-10),
+		datasize >> 10,
+		bsssize >> 10,
+		initsize >> 10);
+
+	mem_init_done = 1;
+
+	/* Initialize the vDSO */
+	vdso_init();
+}
+
+void __iomem * reserve_phb_iospace(unsigned long size)
+{
+	void __iomem *virt_addr;
+		
+	if (phbs_io_bot >= IMALLOC_BASE) 
+		panic("reserve_phb_iospace(): phb io space overflow\n");
+			
+	virt_addr = (void __iomem *) phbs_io_bot;
+	phbs_io_bot += size;
+
+	return virt_addr;
+}
+
+static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags)
+{
+	memset(addr, 0, kmem_cache_size(cache));
+}
+
+static const int pgtable_cache_size[2] = {
+	PTE_TABLE_SIZE, PMD_TABLE_SIZE
+};
+static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {
+	"pgd_pte_cache", "pud_pmd_cache",
+};
+
+kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)];
+
+void pgtable_cache_init(void)
+{
+	int i;
+
+	BUILD_BUG_ON(PTE_TABLE_SIZE != pgtable_cache_size[PTE_CACHE_NUM]);
+	BUILD_BUG_ON(PMD_TABLE_SIZE != pgtable_cache_size[PMD_CACHE_NUM]);
+	BUILD_BUG_ON(PUD_TABLE_SIZE != pgtable_cache_size[PUD_CACHE_NUM]);
+	BUILD_BUG_ON(PGD_TABLE_SIZE != pgtable_cache_size[PGD_CACHE_NUM]);
+
+	for (i = 0; i < ARRAY_SIZE(pgtable_cache_size); i++) {
+		int size = pgtable_cache_size[i];
+		const char *name = pgtable_cache_name[i];
+
+		pgtable_cache[i] = kmem_cache_create(name,
+						     size, size,
+						     SLAB_HWCACHE_ALIGN
+						     | SLAB_MUST_HWCACHE_ALIGN,
+						     zero_ctor,
+						     NULL);
+		if (! pgtable_cache[i])
+			panic("pgtable_cache_init(): could not create %s!\n",
+			      name);
+	}
+}
+
+pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr,
+			      unsigned long size, pgprot_t vma_prot)
+{
+	if (ppc_md.phys_mem_access_prot)
+		return ppc_md.phys_mem_access_prot(file, addr, size, vma_prot);
+
+	if (!page_is_ram(addr >> PAGE_SHIFT))
+		vma_prot = __pgprot(pgprot_val(vma_prot)
+				    | _PAGE_GUARDED | _PAGE_NO_CACHE);
+	return vma_prot;
+}
+EXPORT_SYMBOL(phys_mem_access_prot);
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
new file mode 100644
index 000000000000..345db08e5d20
--- /dev/null
+++ b/arch/powerpc/mm/mem.c
@@ -0,0 +1,299 @@
+/*
+ *  PowerPC version
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *  PPC44x/36-bit changes by Matt Porter (mporter@mvista.com)
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/highmem.h>
+#include <linux/initrd.h>
+#include <linux/pagemap.h>
+
+#include <asm/pgalloc.h>
+#include <asm/prom.h>
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/btext.h>
+#include <asm/tlb.h>
+#include <asm/bootinfo.h>
+#include <asm/prom.h>
+
+#include "mem_pieces.h"
+#include "mmu_decl.h"
+
+#ifndef CPU_FTR_COHERENT_ICACHE
+#define CPU_FTR_COHERENT_ICACHE	0	/* XXX for now */
+#define CPU_FTR_NOEXECUTE	0
+#endif
+
+/*
+ * This is called by /dev/mem to know if a given address has to
+ * be mapped non-cacheable or not
+ */
+int page_is_ram(unsigned long pfn)
+{
+	unsigned long paddr = (pfn << PAGE_SHIFT);
+
+#ifndef CONFIG_PPC64	/* XXX for now */
+	return paddr < __pa(high_memory);
+#else
+	int i;
+	for (i=0; i < lmb.memory.cnt; i++) {
+		unsigned long base;
+
+		base = lmb.memory.region[i].base;
+
+		if ((paddr >= base) &&
+			(paddr < (base + lmb.memory.region[i].size))) {
+			return 1;
+		}
+	}
+
+	return 0;
+#endif
+}
+EXPORT_SYMBOL(page_is_ram);
+
+pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr,
+			      unsigned long size, pgprot_t vma_prot)
+{
+	if (ppc_md.phys_mem_access_prot)
+		return ppc_md.phys_mem_access_prot(file, addr, size, vma_prot);
+
+	if (!page_is_ram(addr >> PAGE_SHIFT))
+		vma_prot = __pgprot(pgprot_val(vma_prot)
+				    | _PAGE_GUARDED | _PAGE_NO_CACHE);
+	return vma_prot;
+}
+EXPORT_SYMBOL(phys_mem_access_prot);
+
+void show_mem(void)
+{
+	unsigned long total = 0, reserved = 0;
+	unsigned long shared = 0, cached = 0;
+	unsigned long highmem = 0;
+	struct page *page;
+	pg_data_t *pgdat;
+	unsigned long i;
+
+	printk("Mem-info:\n");
+	show_free_areas();
+	printk("Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
+	for_each_pgdat(pgdat) {
+		for (i = 0; i < pgdat->node_spanned_pages; i++) {
+			page = pgdat_page_nr(pgdat, i);
+			total++;
+			if (PageHighMem(page))
+				highmem++;
+			if (PageReserved(page))
+				reserved++;
+			else if (PageSwapCache(page))
+				cached++;
+			else if (page_count(page))
+				shared += page_count(page) - 1;
+		}
+	}
+	printk("%ld pages of RAM\n", total);
+#ifdef CONFIG_HIGHMEM
+	printk("%ld pages of HIGHMEM\n", highmem);
+#endif
+	printk("%ld reserved pages\n", reserved);
+	printk("%ld pages shared\n", shared);
+	printk("%ld pages swap cached\n", cached);
+}
+
+/*
+ * This is called when a page has been modified by the kernel.
+ * It just marks the page as not i-cache clean.  We do the i-cache
+ * flush later when the page is given to a user process, if necessary.
+ */
+void flush_dcache_page(struct page *page)
+{
+	if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
+		return;
+	/* avoid an atomic op if possible */
+	if (test_bit(PG_arch_1, &page->flags))
+		clear_bit(PG_arch_1, &page->flags);
+}
+EXPORT_SYMBOL(flush_dcache_page);
+
+void flush_dcache_icache_page(struct page *page)
+{
+#ifdef CONFIG_BOOKE
+	void *start = kmap_atomic(page, KM_PPC_SYNC_ICACHE);
+	__flush_dcache_icache(start);
+	kunmap_atomic(start, KM_PPC_SYNC_ICACHE);
+#elif defined(CONFIG_8xx)
+	/* On 8xx there is no need to kmap since highmem is not supported */
+	__flush_dcache_icache(page_address(page)); 
+#else
+	__flush_dcache_icache_phys(page_to_pfn(page) << PAGE_SHIFT);
+#endif
+
+}
+void clear_user_page(void *page, unsigned long vaddr, struct page *pg)
+{
+	clear_page(page);
+
+	if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
+		return;
+	/*
+	 * We shouldnt have to do this, but some versions of glibc
+	 * require it (ld.so assumes zero filled pages are icache clean)
+	 * - Anton
+	 */
+
+	/* avoid an atomic op if possible */
+	if (test_bit(PG_arch_1, &pg->flags))
+		clear_bit(PG_arch_1, &pg->flags);
+}
+EXPORT_SYMBOL(clear_user_page);
+
+void copy_user_page(void *vto, void *vfrom, unsigned long vaddr,
+		    struct page *pg)
+{
+	copy_page(vto, vfrom);
+
+	/*
+	 * We should be able to use the following optimisation, however
+	 * there are two problems.
+	 * Firstly a bug in some versions of binutils meant PLT sections
+	 * were not marked executable.
+	 * Secondly the first word in the GOT section is blrl, used
+	 * to establish the GOT address. Until recently the GOT was
+	 * not marked executable.
+	 * - Anton
+	 */
+#if 0
+	if (!vma->vm_file && ((vma->vm_flags & VM_EXEC) == 0))
+		return;
+#endif
+
+	if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
+		return;
+
+	/* avoid an atomic op if possible */
+	if (test_bit(PG_arch_1, &pg->flags))
+		clear_bit(PG_arch_1, &pg->flags);
+}
+
+void flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
+			     unsigned long addr, int len)
+{
+	unsigned long maddr;
+
+	maddr = (unsigned long) kmap(page) + (addr & ~PAGE_MASK);
+	flush_icache_range(maddr, maddr + len);
+	kunmap(page);
+}
+EXPORT_SYMBOL(flush_icache_user_range);
+
+/*
+ * This is called at the end of handling a user page fault, when the
+ * fault has been handled by updating a PTE in the linux page tables.
+ * We use it to preload an HPTE into the hash table corresponding to
+ * the updated linux PTE.
+ * 
+ * This must always be called with the mm->page_table_lock held
+ */
+void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
+		      pte_t pte)
+{
+	/* handle i-cache coherency */
+	unsigned long pfn = pte_pfn(pte);
+#ifdef CONFIG_PPC32
+	pmd_t *pmd;
+#else
+	unsigned long vsid;
+	void *pgdir;
+	pte_t *ptep;
+	int local = 0;
+	cpumask_t tmp;
+	unsigned long flags;
+#endif
+
+	/* handle i-cache coherency */
+	if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE) &&
+	    !cpu_has_feature(CPU_FTR_NOEXECUTE) &&
+	    pfn_valid(pfn)) {
+		struct page *page = pfn_to_page(pfn);
+		if (!PageReserved(page)
+		    && !test_bit(PG_arch_1, &page->flags)) {
+			if (vma->vm_mm == current->active_mm) {
+#ifdef CONFIG_8xx
+			/* On 8xx, cache control instructions (particularly 
+		 	 * "dcbst" from flush_dcache_icache) fault as write 
+			 * operation if there is an unpopulated TLB entry 
+			 * for the address in question. To workaround that, 
+			 * we invalidate the TLB here, thus avoiding dcbst 
+			 * misbehaviour.
+			 */
+				_tlbie(address);
+#endif
+				__flush_dcache_icache((void *) address);
+			} else
+				flush_dcache_icache_page(page);
+			set_bit(PG_arch_1, &page->flags);
+		}
+	}
+
+#ifdef CONFIG_PPC_STD_MMU
+	/* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
+	if (!pte_young(pte) || address >= TASK_SIZE)
+		return;
+#ifdef CONFIG_PPC32
+	if (Hash == 0)
+		return;
+	pmd = pmd_offset(pgd_offset(vma->vm_mm, address), address);
+	if (!pmd_none(*pmd))
+		add_hash_page(vma->vm_mm->context, address, pmd_val(*pmd));
+#else
+	pgdir = vma->vm_mm->pgd;
+	if (pgdir == NULL)
+		return;
+
+	ptep = find_linux_pte(pgdir, ea);
+	if (!ptep)
+		return;
+
+	vsid = get_vsid(vma->vm_mm->context.id, ea);
+
+	local_irq_save(flags);
+	tmp = cpumask_of_cpu(smp_processor_id());
+	if (cpus_equal(vma->vm_mm->cpu_vm_mask, tmp))
+		local = 1;
+
+	__hash_page(ea, pte_val(pte) & (_PAGE_USER|_PAGE_RW), vsid, ptep,
+		    0x300, local);
+	local_irq_restore(flags);
+#endif
+#endif
+}
diff --git a/arch/powerpc/mm/mem64.c b/arch/powerpc/mm/mem64.c
new file mode 100644
index 000000000000..ef765a84433f
--- /dev/null
+++ b/arch/powerpc/mm/mem64.c
@@ -0,0 +1,259 @@
+/*
+ *  PowerPC version 
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  Dave Engebretsen <engebret@us.ibm.com>
+ *      Rework for PPC64 port.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/stddef.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/bootmem.h>
+#include <linux/highmem.h>
+#include <linux/idr.h>
+#include <linux/nodemask.h>
+#include <linux/module.h>
+
+#include <asm/pgalloc.h>
+#include <asm/page.h>
+#include <asm/prom.h>
+#include <asm/lmb.h>
+#include <asm/rtas.h>
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+#include <asm/uaccess.h>
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/tlb.h>
+#include <asm/eeh.h>
+#include <asm/processor.h>
+#include <asm/mmzone.h>
+#include <asm/cputable.h>
+#include <asm/ppcdebug.h>
+#include <asm/sections.h>
+#include <asm/system.h>
+#include <asm/iommu.h>
+#include <asm/abs_addr.h>
+#include <asm/vdso.h>
+#include <asm/imalloc.h>
+
+/*
+ * This is called by /dev/mem to know if a given address has to
+ * be mapped non-cacheable or not
+ */
+int page_is_ram(unsigned long pfn)
+{
+	int i;
+	unsigned long paddr = (pfn << PAGE_SHIFT);
+
+	for (i=0; i < lmb.memory.cnt; i++) {
+		unsigned long base;
+
+		base = lmb.memory.region[i].base;
+
+		if ((paddr >= base) &&
+			(paddr < (base + lmb.memory.region[i].size))) {
+			return 1;
+		}
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(page_is_ram);
+
+pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr,
+			      unsigned long size, pgprot_t vma_prot)
+{
+	if (ppc_md.phys_mem_access_prot)
+		return ppc_md.phys_mem_access_prot(file, addr, size, vma_prot);
+
+	if (!page_is_ram(addr >> PAGE_SHIFT))
+		vma_prot = __pgprot(pgprot_val(vma_prot)
+				    | _PAGE_GUARDED | _PAGE_NO_CACHE);
+	return vma_prot;
+}
+EXPORT_SYMBOL(phys_mem_access_prot);
+
+void show_mem(void)
+{
+	unsigned long total = 0, reserved = 0;
+	unsigned long shared = 0, cached = 0;
+	struct page *page;
+	pg_data_t *pgdat;
+	unsigned long i;
+
+	printk("Mem-info:\n");
+	show_free_areas();
+	printk("Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
+	for_each_pgdat(pgdat) {
+		for (i = 0; i < pgdat->node_spanned_pages; i++) {
+			page = pgdat_page_nr(pgdat, i);
+			total++;
+			if (PageReserved(page))
+				reserved++;
+			else if (PageSwapCache(page))
+				cached++;
+			else if (page_count(page))
+				shared += page_count(page) - 1;
+		}
+	}
+	printk("%ld pages of RAM\n", total);
+	printk("%ld reserved pages\n", reserved);
+	printk("%ld pages shared\n", shared);
+	printk("%ld pages swap cached\n", cached);
+}
+
+/*
+ * This is called when a page has been modified by the kernel.
+ * It just marks the page as not i-cache clean.  We do the i-cache
+ * flush later when the page is given to a user process, if necessary.
+ */
+void flush_dcache_page(struct page *page)
+{
+	if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
+		return;
+	/* avoid an atomic op if possible */
+	if (test_bit(PG_arch_1, &page->flags))
+		clear_bit(PG_arch_1, &page->flags);
+}
+EXPORT_SYMBOL(flush_dcache_page);
+
+void clear_user_page(void *page, unsigned long vaddr, struct page *pg)
+{
+	clear_page(page);
+
+	if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
+		return;
+	/*
+	 * We shouldnt have to do this, but some versions of glibc
+	 * require it (ld.so assumes zero filled pages are icache clean)
+	 * - Anton
+	 */
+
+	/* avoid an atomic op if possible */
+	if (test_bit(PG_arch_1, &pg->flags))
+		clear_bit(PG_arch_1, &pg->flags);
+}
+EXPORT_SYMBOL(clear_user_page);
+
+void copy_user_page(void *vto, void *vfrom, unsigned long vaddr,
+		    struct page *pg)
+{
+	copy_page(vto, vfrom);
+
+	/*
+	 * We should be able to use the following optimisation, however
+	 * there are two problems.
+	 * Firstly a bug in some versions of binutils meant PLT sections
+	 * were not marked executable.
+	 * Secondly the first word in the GOT section is blrl, used
+	 * to establish the GOT address. Until recently the GOT was
+	 * not marked executable.
+	 * - Anton
+	 */
+#if 0
+	if (!vma->vm_file && ((vma->vm_flags & VM_EXEC) == 0))
+		return;
+#endif
+
+	if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
+		return;
+
+	/* avoid an atomic op if possible */
+	if (test_bit(PG_arch_1, &pg->flags))
+		clear_bit(PG_arch_1, &pg->flags);
+}
+
+void flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
+			     unsigned long addr, int len)
+{
+	unsigned long maddr;
+
+	maddr = (unsigned long)page_address(page) + (addr & ~PAGE_MASK);
+	flush_icache_range(maddr, maddr + len);
+}
+EXPORT_SYMBOL(flush_icache_user_range);
+
+/*
+ * This is called at the end of handling a user page fault, when the
+ * fault has been handled by updating a PTE in the linux page tables.
+ * We use it to preload an HPTE into the hash table corresponding to
+ * the updated linux PTE.
+ * 
+ * This must always be called with the mm->page_table_lock held
+ */
+void update_mmu_cache(struct vm_area_struct *vma, unsigned long ea,
+		      pte_t pte)
+{
+	unsigned long vsid;
+	void *pgdir;
+	pte_t *ptep;
+	int local = 0;
+	cpumask_t tmp;
+	unsigned long flags;
+
+	/* handle i-cache coherency */
+	if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE) &&
+	    !cpu_has_feature(CPU_FTR_NOEXECUTE)) {
+		unsigned long pfn = pte_pfn(pte);
+		if (pfn_valid(pfn)) {
+			struct page *page = pfn_to_page(pfn);
+			if (!PageReserved(page)
+			    && !test_bit(PG_arch_1, &page->flags)) {
+				__flush_dcache_icache(page_address(page));
+				set_bit(PG_arch_1, &page->flags);
+			}
+		}
+	}
+
+	/* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
+	if (!pte_young(pte))
+		return;
+
+	pgdir = vma->vm_mm->pgd;
+	if (pgdir == NULL)
+		return;
+
+	ptep = find_linux_pte(pgdir, ea);
+	if (!ptep)
+		return;
+
+	vsid = get_vsid(vma->vm_mm->context.id, ea);
+
+	local_irq_save(flags);
+	tmp = cpumask_of_cpu(smp_processor_id());
+	if (cpus_equal(vma->vm_mm->cpu_vm_mask, tmp))
+		local = 1;
+
+	__hash_page(ea, pte_val(pte) & (_PAGE_USER|_PAGE_RW), vsid, ptep,
+		    0x300, local);
+	local_irq_restore(flags);
+}
diff --git a/arch/powerpc/mm/mem_pieces.c b/arch/powerpc/mm/mem_pieces.c
new file mode 100644
index 000000000000..3d639052017e
--- /dev/null
+++ b/arch/powerpc/mm/mem_pieces.c
@@ -0,0 +1,163 @@
+/*
+ *    Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au>
+ *      Changes to accommodate Power Macintoshes.
+ *    Cort Dougan <cort@cs.nmt.edu>
+ *      Rewrites.
+ *    Grant Erickson <grant@lcse.umn.edu>
+ *      General rework and split from mm/init.c.
+ *
+ *    Module name: mem_pieces.c
+ *
+ *    Description:
+ *      Routines and data structures for manipulating and representing
+ *      phyiscal memory extents (i.e. address/length pairs).
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/stddef.h>
+#include <linux/init.h>
+#include <asm/page.h>
+
+#include "mem_pieces.h"
+
+extern struct mem_pieces phys_avail;
+
+static void mem_pieces_print(struct mem_pieces *);
+
+/*
+ * Scan a region for a piece of a given size with the required alignment.
+ */
+void __init *
+mem_pieces_find(unsigned int size, unsigned int align)
+{
+	int i;
+	unsigned a, e;
+	struct mem_pieces *mp = &phys_avail;
+
+	for (i = 0; i < mp->n_regions; ++i) {
+		a = mp->regions[i].address;
+		e = a + mp->regions[i].size;
+		a = (a + align - 1) & -align;
+		if (a + size <= e) {
+			mem_pieces_remove(mp, a, size, 1);
+			return (void *) __va(a);
+		}
+	}
+	panic("Couldn't find %u bytes at %u alignment\n", size, align);
+
+	return NULL;
+}
+
+/*
+ * Remove some memory from an array of pieces
+ */
+void __init
+mem_pieces_remove(struct mem_pieces *mp, unsigned int start, unsigned int size,
+		  int must_exist)
+{
+	int i, j;
+	unsigned int end, rs, re;
+	struct reg_property *rp;
+
+	end = start + size;
+	for (i = 0, rp = mp->regions; i < mp->n_regions; ++i, ++rp) {
+		if (end > rp->address && start < rp->address + rp->size)
+			break;
+	}
+	if (i >= mp->n_regions) {
+		if (must_exist)
+			printk("mem_pieces_remove: [%x,%x) not in any region\n",
+			       start, end);
+		return;
+	}
+	for (; i < mp->n_regions && end > rp->address; ++i, ++rp) {
+		rs = rp->address;
+		re = rs + rp->size;
+		if (must_exist && (start < rs || end > re)) {
+			printk("mem_pieces_remove: bad overlap [%x,%x) with",
+			       start, end);
+			mem_pieces_print(mp);
+			must_exist = 0;
+		}
+		if (start > rs) {
+			rp->size = start - rs;
+			if (end < re) {
+				/* need to split this entry */
+				if (mp->n_regions >= MEM_PIECES_MAX)
+					panic("eek... mem_pieces overflow");
+				for (j = mp->n_regions; j > i + 1; --j)
+					mp->regions[j] = mp->regions[j-1];
+				++mp->n_regions;
+				rp[1].address = end;
+				rp[1].size = re - end;
+			}
+		} else {
+			if (end < re) {
+				rp->address = end;
+				rp->size = re - end;
+			} else {
+				/* need to delete this entry */
+				for (j = i; j < mp->n_regions - 1; ++j)
+					mp->regions[j] = mp->regions[j+1];
+				--mp->n_regions;
+				--i;
+				--rp;
+			}
+		}
+	}
+}
+
+static void __init
+mem_pieces_print(struct mem_pieces *mp)
+{
+	int i;
+
+	for (i = 0; i < mp->n_regions; ++i)
+		printk(" [%x, %x)", mp->regions[i].address,
+		       mp->regions[i].address + mp->regions[i].size);
+	printk("\n");
+}
+
+void __init
+mem_pieces_sort(struct mem_pieces *mp)
+{
+	unsigned long a, s;
+	int i, j;
+
+	for (i = 1; i < mp->n_regions; ++i) {
+		a = mp->regions[i].address;
+		s = mp->regions[i].size;
+		for (j = i - 1; j >= 0; --j) {
+			if (a >= mp->regions[j].address)
+				break;
+			mp->regions[j+1] = mp->regions[j];
+		}
+		mp->regions[j+1].address = a;
+		mp->regions[j+1].size = s;
+	}
+}
+
+void __init
+mem_pieces_coalesce(struct mem_pieces *mp)
+{
+	unsigned long a, s, ns;
+	int i, j, d;
+
+	d = 0;
+	for (i = 0; i < mp->n_regions; i = j) {
+		a = mp->regions[i].address;
+		s = mp->regions[i].size;
+		for (j = i + 1; j < mp->n_regions
+			     && mp->regions[j].address - a <= s; ++j) {
+			ns = mp->regions[j].address + mp->regions[j].size - a;
+			if (ns > s)
+				s = ns;
+		}
+		mp->regions[d].address = a;
+		mp->regions[d].size = s;
+		++d;
+	}
+	mp->n_regions = d;
+}
diff --git a/arch/powerpc/mm/mem_pieces.h b/arch/powerpc/mm/mem_pieces.h
new file mode 100644
index 000000000000..e2b700dc7f18
--- /dev/null
+++ b/arch/powerpc/mm/mem_pieces.h
@@ -0,0 +1,48 @@
+/*
+ *    Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au>
+ *      Changes to accommodate Power Macintoshes.
+ *    Cort Dougan <cort@cs.nmt.edu>
+ *      Rewrites.
+ *    Grant Erickson <grant@lcse.umn.edu>
+ *      General rework and split from mm/init.c.
+ *
+ *    Module name: mem_pieces.h
+ *
+ *    Description:
+ *      Routines and data structures for manipulating and representing
+ *      phyiscal memory extents (i.e. address/length pairs).
+ *
+ */
+
+#ifndef __MEM_PIECES_H__
+#define	__MEM_PIECES_H__
+
+#include <asm/prom.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/* Type Definitions */
+
+#define	MEM_PIECES_MAX	32
+
+struct mem_pieces {
+    int n_regions;
+    struct reg_property regions[MEM_PIECES_MAX];
+};
+
+/* Function Prototypes */
+
+extern void	*mem_pieces_find(unsigned int size, unsigned int align);
+extern void	 mem_pieces_remove(struct mem_pieces *mp, unsigned int start,
+				   unsigned int size, int must_exist);
+extern void	 mem_pieces_coalesce(struct mem_pieces *mp);
+extern void	 mem_pieces_sort(struct mem_pieces *mp);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MEM_PIECES_H__ */
diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c
new file mode 100644
index 000000000000..a8816e0f6a86
--- /dev/null
+++ b/arch/powerpc/mm/mmu_context.c
@@ -0,0 +1,86 @@
+/*
+ * This file contains the routines for handling the MMU on those
+ * PowerPC implementations where the MMU substantially follows the
+ * architecture specification.  This includes the 6xx, 7xx, 7xxx,
+ * 8260, and POWER3 implementations but excludes the 8xx and 4xx.
+ *  -- paulus
+ *
+ *  Derived from arch/ppc/mm/init.c:
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+
+#include <asm/mmu_context.h>
+#include <asm/tlbflush.h>
+
+mm_context_t next_mmu_context;
+unsigned long context_map[LAST_CONTEXT / BITS_PER_LONG + 1];
+#ifdef FEW_CONTEXTS
+atomic_t nr_free_contexts;
+struct mm_struct *context_mm[LAST_CONTEXT+1];
+void steal_context(void);
+#endif /* FEW_CONTEXTS */
+
+/*
+ * Initialize the context management stuff.
+ */
+void __init
+mmu_context_init(void)
+{
+	/*
+	 * Some processors have too few contexts to reserve one for
+	 * init_mm, and require using context 0 for a normal task.
+	 * Other processors reserve the use of context zero for the kernel.
+	 * This code assumes FIRST_CONTEXT < 32.
+	 */
+	context_map[0] = (1 << FIRST_CONTEXT) - 1;
+	next_mmu_context = FIRST_CONTEXT;
+#ifdef FEW_CONTEXTS
+	atomic_set(&nr_free_contexts, LAST_CONTEXT - FIRST_CONTEXT + 1);
+#endif /* FEW_CONTEXTS */
+}
+
+#ifdef FEW_CONTEXTS
+/*
+ * Steal a context from a task that has one at the moment.
+ * This is only used on 8xx and 4xx and we presently assume that
+ * they don't do SMP.  If they do then this will have to check
+ * whether the MM we steal is in use.
+ * We also assume that this is only used on systems that don't
+ * use an MMU hash table - this is true for 8xx and 4xx.
+ * This isn't an LRU system, it just frees up each context in
+ * turn (sort-of pseudo-random replacement :).  This would be the
+ * place to implement an LRU scheme if anyone was motivated to do it.
+ *  -- paulus
+ */
+void
+steal_context(void)
+{
+	struct mm_struct *mm;
+
+	/* free up context `next_mmu_context' */
+	/* if we shouldn't free context 0, don't... */
+	if (next_mmu_context < FIRST_CONTEXT)
+		next_mmu_context = FIRST_CONTEXT;
+	mm = context_mm[next_mmu_context];
+	flush_tlb_mm(mm);
+	destroy_context(mm);
+}
+#endif /* FEW_CONTEXTS */
diff --git a/arch/powerpc/mm/mmu_context64.c b/arch/powerpc/mm/mmu_context64.c
new file mode 100644
index 000000000000..714a84dd8d5d
--- /dev/null
+++ b/arch/powerpc/mm/mmu_context64.c
@@ -0,0 +1,63 @@
+/*
+ *  MMU context allocation for 64-bit kernels.
+ *
+ *  Copyright (C) 2004 Anton Blanchard, IBM Corp. <anton@samba.org>
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include <linux/idr.h>
+
+#include <asm/mmu_context.h>
+
+static DEFINE_SPINLOCK(mmu_context_lock);
+static DEFINE_IDR(mmu_context_idr);
+
+int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
+{
+	int index;
+	int err;
+
+again:
+	if (!idr_pre_get(&mmu_context_idr, GFP_KERNEL))
+		return -ENOMEM;
+
+	spin_lock(&mmu_context_lock);
+	err = idr_get_new_above(&mmu_context_idr, NULL, 1, &index);
+	spin_unlock(&mmu_context_lock);
+
+	if (err == -EAGAIN)
+		goto again;
+	else if (err)
+		return err;
+
+	if (index > MAX_CONTEXT) {
+		idr_remove(&mmu_context_idr, index);
+		return -ENOMEM;
+	}
+
+	mm->context.id = index;
+
+	return 0;
+}
+
+void destroy_context(struct mm_struct *mm)
+{
+	spin_lock(&mmu_context_lock);
+	idr_remove(&mmu_context_idr, mm->context.id);
+	spin_unlock(&mmu_context_lock);
+
+	mm->context.id = NO_CONTEXT;
+}
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
new file mode 100644
index 000000000000..540f3292b229
--- /dev/null
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -0,0 +1,85 @@
+/*
+ * Declarations of procedures and variables shared between files
+ * in arch/ppc/mm/.
+ *
+ *  Derived from arch/ppc/mm/init.c:
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+#include <asm/tlbflush.h>
+#include <asm/mmu.h>
+
+extern void mapin_ram(void);
+extern int map_page(unsigned long va, phys_addr_t pa, int flags);
+extern void setbat(int index, unsigned long virt, unsigned long phys,
+		   unsigned int size, int flags);
+extern void reserve_phys_mem(unsigned long start, unsigned long size);
+extern void settlbcam(int index, unsigned long virt, phys_addr_t phys,
+		      unsigned int size, int flags, unsigned int pid);
+extern void invalidate_tlbcam_entry(int index);
+
+extern int __map_without_bats;
+extern unsigned long ioremap_base;
+extern unsigned long ioremap_bot;
+extern unsigned int rtas_data, rtas_size;
+
+extern unsigned long total_memory;
+extern unsigned long total_lowmem;
+extern int mem_init_done;
+
+extern PTE *Hash, *Hash_end;
+extern unsigned long Hash_size, Hash_mask;
+
+extern unsigned int num_tlbcam_entries;
+
+/* ...and now those things that may be slightly different between processor
+ * architectures.  -- Dan
+ */
+#if defined(CONFIG_8xx)
+#define flush_HPTE(X, va, pg)	_tlbie(va)
+#define MMU_init_hw()		do { } while(0)
+#define mmu_mapin_ram()		(0UL)
+
+#elif defined(CONFIG_4xx)
+#define flush_HPTE(X, va, pg)	_tlbie(va)
+extern void MMU_init_hw(void);
+extern unsigned long mmu_mapin_ram(void);
+
+#elif defined(CONFIG_FSL_BOOKE)
+#define flush_HPTE(X, va, pg)	_tlbie(va)
+extern void MMU_init_hw(void);
+extern unsigned long mmu_mapin_ram(void);
+extern void adjust_total_lowmem(void);
+
+#else
+/* anything except 4xx or 8xx */
+extern void MMU_init_hw(void);
+extern unsigned long mmu_mapin_ram(void);
+
+/* Be careful....this needs to be updated if we ever encounter 603 SMPs,
+ * which includes all new 82xx processors.  We need tlbie/tlbsync here
+ * in that case (I think). -- Dan.
+ */
+static inline void flush_HPTE(unsigned context, unsigned long va,
+			      unsigned long pdval)
+{
+	if ((Hash != 0) &&
+	    cpu_has_feature(CPU_FTR_HPTE_TABLE))
+		flush_hash_pages(0, va, pdval, 1);
+	else
+		_tlbie(va);
+}
+#endif
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
new file mode 100644
index 000000000000..81a3d7446d37
--- /dev/null
+++ b/arch/powerpc/mm/pgtable.c
@@ -0,0 +1,470 @@
+/*
+ * This file contains the routines setting up the linux page tables.
+ *  -- paulus
+ *
+ *  Derived from arch/ppc/mm/init.c:
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/highmem.h>
+
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/io.h>
+
+#include "mmu_decl.h"
+
+unsigned long ioremap_base;
+unsigned long ioremap_bot;
+int io_bat_index;
+
+#if defined(CONFIG_6xx) || defined(CONFIG_POWER3)
+#define HAVE_BATS	1
+#endif
+
+#if defined(CONFIG_FSL_BOOKE)
+#define HAVE_TLBCAM	1
+#endif
+
+extern char etext[], _stext[];
+
+#ifdef CONFIG_SMP
+extern void hash_page_sync(void);
+#endif
+
+#ifdef HAVE_BATS
+extern unsigned long v_mapped_by_bats(unsigned long va);
+extern unsigned long p_mapped_by_bats(unsigned long pa);
+void setbat(int index, unsigned long virt, unsigned long phys,
+	    unsigned int size, int flags);
+
+#else /* !HAVE_BATS */
+#define v_mapped_by_bats(x)	(0UL)
+#define p_mapped_by_bats(x)	(0UL)
+#endif /* HAVE_BATS */
+
+#ifdef HAVE_TLBCAM
+extern unsigned int tlbcam_index;
+extern unsigned long v_mapped_by_tlbcam(unsigned long va);
+extern unsigned long p_mapped_by_tlbcam(unsigned long pa);
+#else /* !HAVE_TLBCAM */
+#define v_mapped_by_tlbcam(x)	(0UL)
+#define p_mapped_by_tlbcam(x)	(0UL)
+#endif /* HAVE_TLBCAM */
+
+#ifdef CONFIG_PTE_64BIT
+/* 44x uses an 8kB pgdir because it has 8-byte Linux PTEs. */
+#define PGDIR_ORDER	1
+#else
+#define PGDIR_ORDER	0
+#endif
+
+pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+	pgd_t *ret;
+
+	ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, PGDIR_ORDER);
+	return ret;
+}
+
+void pgd_free(pgd_t *pgd)
+{
+	free_pages((unsigned long)pgd, PGDIR_ORDER);
+}
+
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+{
+	pte_t *pte;
+	extern int mem_init_done;
+	extern void *early_get_page(void);
+
+	if (mem_init_done) {
+		pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+	} else {
+		pte = (pte_t *)early_get_page();
+		if (pte)
+			clear_page(pte);
+	}
+	return pte;
+}
+
+struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
+{
+	struct page *ptepage;
+
+#ifdef CONFIG_HIGHPTE
+	int flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_REPEAT;
+#else
+	int flags = GFP_KERNEL | __GFP_REPEAT;
+#endif
+
+	ptepage = alloc_pages(flags, 0);
+	if (ptepage)
+		clear_highpage(ptepage);
+	return ptepage;
+}
+
+void pte_free_kernel(pte_t *pte)
+{
+#ifdef CONFIG_SMP
+	hash_page_sync();
+#endif
+	free_page((unsigned long)pte);
+}
+
+void pte_free(struct page *ptepage)
+{
+#ifdef CONFIG_SMP
+	hash_page_sync();
+#endif
+	__free_page(ptepage);
+}
+
+#ifndef CONFIG_PHYS_64BIT
+void __iomem *
+ioremap(phys_addr_t addr, unsigned long size)
+{
+	return __ioremap(addr, size, _PAGE_NO_CACHE);
+}
+#else /* CONFIG_PHYS_64BIT */
+void __iomem *
+ioremap64(unsigned long long addr, unsigned long size)
+{
+	return __ioremap(addr, size, _PAGE_NO_CACHE);
+}
+
+void __iomem *
+ioremap(phys_addr_t addr, unsigned long size)
+{
+	phys_addr_t addr64 = fixup_bigphys_addr(addr, size);
+
+	return ioremap64(addr64, size);
+}
+#endif /* CONFIG_PHYS_64BIT */
+
+void __iomem *
+__ioremap(phys_addr_t addr, unsigned long size, unsigned long flags)
+{
+	unsigned long v, i;
+	phys_addr_t p;
+	int err;
+
+	/*
+	 * Choose an address to map it to.
+	 * Once the vmalloc system is running, we use it.
+	 * Before then, we use space going down from ioremap_base
+	 * (ioremap_bot records where we're up to).
+	 */
+	p = addr & PAGE_MASK;
+	size = PAGE_ALIGN(addr + size) - p;
+
+	/*
+	 * If the address lies within the first 16 MB, assume it's in ISA
+	 * memory space
+	 */
+	if (p < 16*1024*1024)
+		p += _ISA_MEM_BASE;
+
+	/*
+	 * Don't allow anybody to remap normal RAM that we're using.
+	 * mem_init() sets high_memory so only do the check after that.
+	 */
+	if ( mem_init_done && (p < virt_to_phys(high_memory)) )
+	{
+		printk("__ioremap(): phys addr "PHYS_FMT" is RAM lr %p\n", p,
+		       __builtin_return_address(0));
+		return NULL;
+	}
+
+	if (size == 0)
+		return NULL;
+
+	/*
+	 * Is it already mapped?  Perhaps overlapped by a previous
+	 * BAT mapping.  If the whole area is mapped then we're done,
+	 * otherwise remap it since we want to keep the virt addrs for
+	 * each request contiguous.
+	 *
+	 * We make the assumption here that if the bottom and top
+	 * of the range we want are mapped then it's mapped to the
+	 * same virt address (and this is contiguous).
+	 *  -- Cort
+	 */
+	if ((v = p_mapped_by_bats(p)) /*&& p_mapped_by_bats(p+size-1)*/ )
+		goto out;
+
+	if ((v = p_mapped_by_tlbcam(p)))
+		goto out;
+
+	if (mem_init_done) {
+		struct vm_struct *area;
+		area = get_vm_area(size, VM_IOREMAP);
+		if (area == 0)
+			return NULL;
+		v = (unsigned long) area->addr;
+	} else {
+		v = (ioremap_bot -= size);
+	}
+
+	if ((flags & _PAGE_PRESENT) == 0)
+		flags |= _PAGE_KERNEL;
+	if (flags & _PAGE_NO_CACHE)
+		flags |= _PAGE_GUARDED;
+
+	/*
+	 * Should check if it is a candidate for a BAT mapping
+	 */
+
+	err = 0;
+	for (i = 0; i < size && err == 0; i += PAGE_SIZE)
+		err = map_page(v+i, p+i, flags);
+	if (err) {
+		if (mem_init_done)
+			vunmap((void *)v);
+		return NULL;
+	}
+
+out:
+	return (void __iomem *) (v + ((unsigned long)addr & ~PAGE_MASK));
+}
+
+void iounmap(volatile void __iomem *addr)
+{
+	/*
+	 * If mapped by BATs then there is nothing to do.
+	 * Calling vfree() generates a benign warning.
+	 */
+	if (v_mapped_by_bats((unsigned long)addr)) return;
+
+	if (addr > high_memory && (unsigned long) addr < ioremap_bot)
+		vunmap((void *) (PAGE_MASK & (unsigned long)addr));
+}
+
+void __iomem *ioport_map(unsigned long port, unsigned int len)
+{
+	return (void __iomem *) (port + _IO_BASE);
+}
+
+void ioport_unmap(void __iomem *addr)
+{
+	/* Nothing to do */
+}
+EXPORT_SYMBOL(ioport_map);
+EXPORT_SYMBOL(ioport_unmap);
+
+int
+map_page(unsigned long va, phys_addr_t pa, int flags)
+{
+	pmd_t *pd;
+	pte_t *pg;
+	int err = -ENOMEM;
+
+	spin_lock(&init_mm.page_table_lock);
+	/* Use upper 10 bits of VA to index the first level map */
+	pd = pmd_offset(pgd_offset_k(va), va);
+	/* Use middle 10 bits of VA to index the second-level map */
+	pg = pte_alloc_kernel(&init_mm, pd, va);
+	if (pg != 0) {
+		err = 0;
+		set_pte_at(&init_mm, va, pg, pfn_pte(pa >> PAGE_SHIFT, __pgprot(flags)));
+		if (mem_init_done)
+			flush_HPTE(0, va, pmd_val(*pd));
+	}
+	spin_unlock(&init_mm.page_table_lock);
+	return err;
+}
+
+/*
+ * Map in all of physical memory starting at KERNELBASE.
+ */
+void __init mapin_ram(void)
+{
+	unsigned long v, p, s, f;
+
+	s = mmu_mapin_ram();
+	v = KERNELBASE + s;
+	p = PPC_MEMSTART + s;
+	for (; s < total_lowmem; s += PAGE_SIZE) {
+		if ((char *) v >= _stext && (char *) v < etext)
+			f = _PAGE_RAM_TEXT;
+		else
+			f = _PAGE_RAM;
+		map_page(v, p, f);
+		v += PAGE_SIZE;
+		p += PAGE_SIZE;
+	}
+}
+
+/* is x a power of 2? */
+#define is_power_of_2(x)	((x) != 0 && (((x) & ((x) - 1)) == 0))
+
+/* is x a power of 4? */
+#define is_power_of_4(x)	((x) != 0 && (((x) & (x-1)) == 0) && (ffs(x) & 1))
+
+/*
+ * Set up a mapping for a block of I/O.
+ * virt, phys, size must all be page-aligned.
+ * This should only be called before ioremap is called.
+ */
+void __init io_block_mapping(unsigned long virt, phys_addr_t phys,
+			     unsigned int size, int flags)
+{
+	int i;
+
+	if (virt > KERNELBASE && virt < ioremap_bot)
+		ioremap_bot = ioremap_base = virt;
+
+#ifdef HAVE_BATS
+	/*
+	 * Use a BAT for this if possible...
+	 */
+	if (io_bat_index < 2 && is_power_of_2(size)
+	    && (virt & (size - 1)) == 0 && (phys & (size - 1)) == 0) {
+		setbat(io_bat_index, virt, phys, size, flags);
+		++io_bat_index;
+		return;
+	}
+#endif /* HAVE_BATS */
+
+#ifdef HAVE_TLBCAM
+	/*
+	 * Use a CAM for this if possible...
+	 */
+	if (tlbcam_index < num_tlbcam_entries && is_power_of_4(size)
+	    && (virt & (size - 1)) == 0 && (phys & (size - 1)) == 0) {
+		settlbcam(tlbcam_index, virt, phys, size, flags, 0);
+		++tlbcam_index;
+		return;
+	}
+#endif /* HAVE_TLBCAM */
+
+	/* No BATs available, put it in the page tables. */
+	for (i = 0; i < size; i += PAGE_SIZE)
+		map_page(virt + i, phys + i, flags);
+}
+
+/* Scan the real Linux page tables and return a PTE pointer for
+ * a virtual address in a context.
+ * Returns true (1) if PTE was found, zero otherwise.  The pointer to
+ * the PTE pointer is unmodified if PTE is not found.
+ */
+int
+get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep)
+{
+        pgd_t	*pgd;
+        pmd_t	*pmd;
+        pte_t	*pte;
+        int     retval = 0;
+
+        pgd = pgd_offset(mm, addr & PAGE_MASK);
+        if (pgd) {
+                pmd = pmd_offset(pgd, addr & PAGE_MASK);
+                if (pmd_present(*pmd)) {
+                        pte = pte_offset_map(pmd, addr & PAGE_MASK);
+                        if (pte) {
+				retval = 1;
+				*ptep = pte;
+				/* XXX caller needs to do pte_unmap, yuck */
+                        }
+                }
+        }
+        return(retval);
+}
+
+/* Find physical address for this virtual address.  Normally used by
+ * I/O functions, but anyone can call it.
+ */
+unsigned long iopa(unsigned long addr)
+{
+	unsigned long pa;
+
+	/* I don't know why this won't work on PMacs or CHRP.  It
+	 * appears there is some bug, or there is some implicit
+	 * mapping done not properly represented by BATs or in page
+	 * tables.......I am actively working on resolving this, but
+	 * can't hold up other stuff.  -- Dan
+	 */
+	pte_t *pte;
+	struct mm_struct *mm;
+
+	/* Check the BATs */
+	pa = v_mapped_by_bats(addr);
+	if (pa)
+		return pa;
+
+	/* Allow mapping of user addresses (within the thread)
+	 * for DMA if necessary.
+	 */
+	if (addr < TASK_SIZE)
+		mm = current->mm;
+	else
+		mm = &init_mm;
+
+	pa = 0;
+	if (get_pteptr(mm, addr, &pte)) {
+		pa = (pte_val(*pte) & PAGE_MASK) | (addr & ~PAGE_MASK);
+		pte_unmap(pte);
+	}
+
+	return(pa);
+}
+
+/* This is will find the virtual address for a physical one....
+ * Swiped from APUS, could be dangerous :-).
+ * This is only a placeholder until I really find a way to make this
+ * work.  -- Dan
+ */
+unsigned long
+mm_ptov (unsigned long paddr)
+{
+	unsigned long ret;
+#if 0
+	if (paddr < 16*1024*1024)
+		ret = ZTWO_VADDR(paddr);
+	else {
+		int i;
+
+		for (i = 0; i < kmap_chunk_count;){
+			unsigned long phys = kmap_chunks[i++];
+			unsigned long size = kmap_chunks[i++];
+			unsigned long virt = kmap_chunks[i++];
+			if (paddr >= phys
+			    && paddr < (phys + size)){
+				ret = virt + paddr - phys;
+				goto exit;
+			}
+		}
+	
+		ret = (unsigned long) __va(paddr);
+	}
+exit:
+#ifdef DEBUGPV
+	printk ("PTOV(%lx)=%lx\n", paddr, ret);
+#endif
+#else
+	ret = (unsigned long)paddr + KERNELBASE;
+#endif
+	return ret;
+}
+
diff --git a/arch/powerpc/mm/pgtable64.c b/arch/powerpc/mm/pgtable64.c
new file mode 100644
index 000000000000..724f97e5dee5
--- /dev/null
+++ b/arch/powerpc/mm/pgtable64.c
@@ -0,0 +1,357 @@
+/*
+ *  This file contains ioremap and related functions for 64-bit machines.
+ *
+ *  Derived from arch/ppc64/mm/init.c
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@samba.org)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  Dave Engebretsen <engebret@us.ibm.com>
+ *      Rework for PPC64 port.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/stddef.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/bootmem.h>
+#include <linux/highmem.h>
+#include <linux/idr.h>
+#include <linux/nodemask.h>
+#include <linux/module.h>
+
+#include <asm/pgalloc.h>
+#include <asm/page.h>
+#include <asm/prom.h>
+#include <asm/lmb.h>
+#include <asm/rtas.h>
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+#include <asm/uaccess.h>
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/tlb.h>
+#include <asm/eeh.h>
+#include <asm/processor.h>
+#include <asm/mmzone.h>
+#include <asm/cputable.h>
+#include <asm/ppcdebug.h>
+#include <asm/sections.h>
+#include <asm/system.h>
+#include <asm/iommu.h>
+#include <asm/abs_addr.h>
+#include <asm/vdso.h>
+#include <asm/imalloc.h>
+
+#if PGTABLE_RANGE > USER_VSID_RANGE
+#warning Limited user VSID range means pagetable space is wasted
+#endif
+
+#if (TASK_SIZE_USER64 < PGTABLE_RANGE) && (TASK_SIZE_USER64 < USER_VSID_RANGE)
+#warning TASK_SIZE is smaller than it needs to be.
+#endif
+
+int mem_init_done;
+unsigned long ioremap_bot = IMALLOC_BASE;
+static unsigned long phbs_io_bot = PHBS_IO_BASE;
+
+extern pgd_t swapper_pg_dir[];
+extern struct task_struct *current_set[NR_CPUS];
+
+unsigned long klimit = (unsigned long)_end;
+
+/* max amount of RAM to use */
+unsigned long __max_memory;
+
+/* info on what we think the IO hole is */
+unsigned long 	io_hole_start;
+unsigned long	io_hole_size;
+
+#ifdef CONFIG_PPC_ISERIES
+
+void __iomem *ioremap(unsigned long addr, unsigned long size)
+{
+	return (void __iomem *)addr;
+}
+
+extern void __iomem *__ioremap(unsigned long addr, unsigned long size,
+		       unsigned long flags)
+{
+	return (void __iomem *)addr;
+}
+
+void iounmap(volatile void __iomem *addr)
+{
+	return;
+}
+
+#else
+
+/*
+ * map_io_page currently only called by __ioremap
+ * map_io_page adds an entry to the ioremap page table
+ * and adds an entry to the HPT, possibly bolting it
+ */
+static int map_io_page(unsigned long ea, unsigned long pa, int flags)
+{
+	pgd_t *pgdp;
+	pud_t *pudp;
+	pmd_t *pmdp;
+	pte_t *ptep;
+	unsigned long vsid;
+
+	if (mem_init_done) {
+		spin_lock(&init_mm.page_table_lock);
+		pgdp = pgd_offset_k(ea);
+		pudp = pud_alloc(&init_mm, pgdp, ea);
+		if (!pudp)
+			return -ENOMEM;
+		pmdp = pmd_alloc(&init_mm, pudp, ea);
+		if (!pmdp)
+			return -ENOMEM;
+		ptep = pte_alloc_kernel(&init_mm, pmdp, ea);
+		if (!ptep)
+			return -ENOMEM;
+		set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
+							  __pgprot(flags)));
+		spin_unlock(&init_mm.page_table_lock);
+	} else {
+		unsigned long va, vpn, hash, hpteg;
+
+		/*
+		 * If the mm subsystem is not fully up, we cannot create a
+		 * linux page table entry for this mapping.  Simply bolt an
+		 * entry in the hardware page table.
+		 */
+		vsid = get_kernel_vsid(ea);
+		va = (vsid << 28) | (ea & 0xFFFFFFF);
+		vpn = va >> PAGE_SHIFT;
+
+		hash = hpt_hash(vpn, 0);
+
+		hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
+
+		/* Panic if a pte grpup is full */
+		if (ppc_md.hpte_insert(hpteg, va, pa >> PAGE_SHIFT,
+				       HPTE_V_BOLTED,
+				       _PAGE_NO_CACHE|_PAGE_GUARDED|PP_RWXX)
+		    == -1) {
+			panic("map_io_page: could not insert mapping");
+		}
+	}
+	return 0;
+}
+
+
+static void __iomem * __ioremap_com(unsigned long addr, unsigned long pa,
+			    unsigned long ea, unsigned long size,
+			    unsigned long flags)
+{
+	unsigned long i;
+
+	if ((flags & _PAGE_PRESENT) == 0)
+		flags |= pgprot_val(PAGE_KERNEL);
+
+	for (i = 0; i < size; i += PAGE_SIZE)
+		if (map_io_page(ea+i, pa+i, flags))
+			return NULL;
+
+	return (void __iomem *) (ea + (addr & ~PAGE_MASK));
+}
+
+
+void __iomem *
+ioremap(unsigned long addr, unsigned long size)
+{
+	return __ioremap(addr, size, _PAGE_NO_CACHE | _PAGE_GUARDED);
+}
+
+void __iomem * __ioremap(unsigned long addr, unsigned long size,
+			 unsigned long flags)
+{
+	unsigned long pa, ea;
+	void __iomem *ret;
+
+	/*
+	 * Choose an address to map it to.
+	 * Once the imalloc system is running, we use it.
+	 * Before that, we map using addresses going
+	 * up from ioremap_bot.  imalloc will use
+	 * the addresses from ioremap_bot through
+	 * IMALLOC_END
+	 * 
+	 */
+	pa = addr & PAGE_MASK;
+	size = PAGE_ALIGN(addr + size) - pa;
+
+	if (size == 0)
+		return NULL;
+
+	if (mem_init_done) {
+		struct vm_struct *area;
+		area = im_get_free_area(size);
+		if (area == NULL)
+			return NULL;
+		ea = (unsigned long)(area->addr);
+		ret = __ioremap_com(addr, pa, ea, size, flags);
+		if (!ret)
+			im_free(area->addr);
+	} else {
+		ea = ioremap_bot;
+		ret = __ioremap_com(addr, pa, ea, size, flags);
+		if (ret)
+			ioremap_bot += size;
+	}
+	return ret;
+}
+
+#define IS_PAGE_ALIGNED(_val) ((_val) == ((_val) & PAGE_MASK))
+
+int __ioremap_explicit(unsigned long pa, unsigned long ea,
+		       unsigned long size, unsigned long flags)
+{
+	struct vm_struct *area;
+	void __iomem *ret;
+	
+	/* For now, require page-aligned values for pa, ea, and size */
+	if (!IS_PAGE_ALIGNED(pa) || !IS_PAGE_ALIGNED(ea) ||
+	    !IS_PAGE_ALIGNED(size)) {
+		printk(KERN_ERR	"unaligned value in %s\n", __FUNCTION__);
+		return 1;
+	}
+	
+	if (!mem_init_done) {
+		/* Two things to consider in this case:
+		 * 1) No records will be kept (imalloc, etc) that the region
+		 *    has been remapped
+		 * 2) It won't be easy to iounmap() the region later (because
+		 *    of 1)
+		 */
+		;
+	} else {
+		area = im_get_area(ea, size,
+			IM_REGION_UNUSED|IM_REGION_SUBSET|IM_REGION_EXISTS);
+		if (area == NULL) {
+			/* Expected when PHB-dlpar is in play */
+			return 1;
+		}
+		if (ea != (unsigned long) area->addr) {
+			printk(KERN_ERR "unexpected addr return from "
+			       "im_get_area\n");
+			return 1;
+		}
+	}
+	
+	ret = __ioremap_com(pa, pa, ea, size, flags);
+	if (ret == NULL) {
+		printk(KERN_ERR "ioremap_explicit() allocation failure !\n");
+		return 1;
+	}
+	if (ret != (void *) ea) {
+		printk(KERN_ERR "__ioremap_com() returned unexpected addr\n");
+		return 1;
+	}
+
+	return 0;
+}
+
+/*  
+ * Unmap an IO region and remove it from imalloc'd list.
+ * Access to IO memory should be serialized by driver.
+ * This code is modeled after vmalloc code - unmap_vm_area()
+ *
+ * XXX	what about calls before mem_init_done (ie python_countermeasures())
+ */
+void iounmap(volatile void __iomem *token)
+{
+	void *addr;
+
+	if (!mem_init_done)
+		return;
+	
+	addr = (void *) ((unsigned long __force) token & PAGE_MASK);
+
+	im_free(addr);
+}
+
+static int iounmap_subset_regions(unsigned long addr, unsigned long size)
+{
+	struct vm_struct *area;
+
+	/* Check whether subsets of this region exist */
+	area = im_get_area(addr, size, IM_REGION_SUPERSET);
+	if (area == NULL)
+		return 1;
+
+	while (area) {
+		iounmap((void __iomem *) area->addr);
+		area = im_get_area(addr, size,
+				IM_REGION_SUPERSET);
+	}
+
+	return 0;
+}
+
+int iounmap_explicit(volatile void __iomem *start, unsigned long size)
+{
+	struct vm_struct *area;
+	unsigned long addr;
+	int rc;
+	
+	addr = (unsigned long __force) start & PAGE_MASK;
+
+	/* Verify that the region either exists or is a subset of an existing
+	 * region.  In the latter case, split the parent region to create 
+	 * the exact region 
+	 */
+	area = im_get_area(addr, size, 
+			    IM_REGION_EXISTS | IM_REGION_SUBSET);
+	if (area == NULL) {
+		/* Determine whether subset regions exist.  If so, unmap */
+		rc = iounmap_subset_regions(addr, size);
+		if (rc) {
+			printk(KERN_ERR
+			       "%s() cannot unmap nonexistent range 0x%lx\n",
+ 				__FUNCTION__, addr);
+			return 1;
+		}
+	} else {
+		iounmap((void __iomem *) area->addr);
+	}
+	/*
+	 * FIXME! This can't be right:
+	iounmap(area->addr);
+	 * Maybe it should be "iounmap(area);"
+	 */
+	return 0;
+}
+
+#endif
+
+EXPORT_SYMBOL(ioremap);
+EXPORT_SYMBOL(__ioremap);
+EXPORT_SYMBOL(iounmap);
diff --git a/arch/powerpc/mm/ppc_mmu.c b/arch/powerpc/mm/ppc_mmu.c
new file mode 100644
index 000000000000..9a381ed5eb21
--- /dev/null
+++ b/arch/powerpc/mm/ppc_mmu.c
@@ -0,0 +1,296 @@
+/*
+ * This file contains the routines for handling the MMU on those
+ * PowerPC implementations where the MMU substantially follows the
+ * architecture specification.  This includes the 6xx, 7xx, 7xxx,
+ * 8260, and POWER3 implementations but excludes the 8xx and 4xx.
+ *  -- paulus
+ *
+ *  Derived from arch/ppc/mm/init.c:
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/highmem.h>
+
+#include <asm/prom.h>
+#include <asm/mmu.h>
+#include <asm/machdep.h>
+
+#include "mmu_decl.h"
+#include "mem_pieces.h"
+
+PTE *Hash, *Hash_end;
+unsigned long Hash_size, Hash_mask;
+unsigned long _SDR1;
+
+union ubat {			/* BAT register values to be loaded */
+	BAT	bat;
+#ifdef CONFIG_PPC64BRIDGE
+	u64	word[2];
+#else
+	u32	word[2];
+#endif
+} BATS[4][2];			/* 4 pairs of IBAT, DBAT */
+
+struct batrange {		/* stores address ranges mapped by BATs */
+	unsigned long start;
+	unsigned long limit;
+	unsigned long phys;
+} bat_addrs[4];
+
+/*
+ * Return PA for this VA if it is mapped by a BAT, or 0
+ */
+unsigned long v_mapped_by_bats(unsigned long va)
+{
+	int b;
+	for (b = 0; b < 4; ++b)
+		if (va >= bat_addrs[b].start && va < bat_addrs[b].limit)
+			return bat_addrs[b].phys + (va - bat_addrs[b].start);
+	return 0;
+}
+
+/*
+ * Return VA for a given PA or 0 if not mapped
+ */
+unsigned long p_mapped_by_bats(unsigned long pa)
+{
+	int b;
+	for (b = 0; b < 4; ++b)
+		if (pa >= bat_addrs[b].phys
+	    	    && pa < (bat_addrs[b].limit-bat_addrs[b].start)
+		              +bat_addrs[b].phys)
+			return bat_addrs[b].start+(pa-bat_addrs[b].phys);
+	return 0;
+}
+
+unsigned long __init mmu_mapin_ram(void)
+{
+#ifdef CONFIG_POWER4
+	return 0;
+#else
+	unsigned long tot, bl, done;
+	unsigned long max_size = (256<<20);
+	unsigned long align;
+
+	if (__map_without_bats)
+		return 0;
+
+	/* Set up BAT2 and if necessary BAT3 to cover RAM. */
+
+	/* Make sure we don't map a block larger than the
+	   smallest alignment of the physical address. */
+	/* alignment of PPC_MEMSTART */
+	align = ~(PPC_MEMSTART-1) & PPC_MEMSTART;
+	/* set BAT block size to MIN(max_size, align) */
+	if (align && align < max_size)
+		max_size = align;
+
+	tot = total_lowmem;
+	for (bl = 128<<10; bl < max_size; bl <<= 1) {
+		if (bl * 2 > tot)
+			break;
+	}
+
+	setbat(2, KERNELBASE, PPC_MEMSTART, bl, _PAGE_RAM);
+	done = (unsigned long)bat_addrs[2].limit - KERNELBASE + 1;
+	if ((done < tot) && !bat_addrs[3].limit) {
+		/* use BAT3 to cover a bit more */
+		tot -= done;
+		for (bl = 128<<10; bl < max_size; bl <<= 1)
+			if (bl * 2 > tot)
+				break;
+		setbat(3, KERNELBASE+done, PPC_MEMSTART+done, bl, _PAGE_RAM);
+		done = (unsigned long)bat_addrs[3].limit - KERNELBASE + 1;
+	}
+
+	return done;
+#endif
+}
+
+/*
+ * Set up one of the I/D BAT (block address translation) register pairs.
+ * The parameters are not checked; in particular size must be a power
+ * of 2 between 128k and 256M.
+ */
+void __init setbat(int index, unsigned long virt, unsigned long phys,
+		   unsigned int size, int flags)
+{
+	unsigned int bl;
+	int wimgxpp;
+	union ubat *bat = BATS[index];
+
+	if (((flags & _PAGE_NO_CACHE) == 0) &&
+	    cpu_has_feature(CPU_FTR_NEED_COHERENT))
+		flags |= _PAGE_COHERENT;
+
+	bl = (size >> 17) - 1;
+	if (PVR_VER(mfspr(SPRN_PVR)) != 1) {
+		/* 603, 604, etc. */
+		/* Do DBAT first */
+		wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE
+				   | _PAGE_COHERENT | _PAGE_GUARDED);
+		wimgxpp |= (flags & _PAGE_RW)? BPP_RW: BPP_RX;
+		bat[1].word[0] = virt | (bl << 2) | 2; /* Vs=1, Vp=0 */
+		bat[1].word[1] = phys | wimgxpp;
+#ifndef CONFIG_KGDB /* want user access for breakpoints */
+		if (flags & _PAGE_USER)
+#endif
+			bat[1].bat.batu.vp = 1;
+		if (flags & _PAGE_GUARDED) {
+			/* G bit must be zero in IBATs */
+			bat[0].word[0] = bat[0].word[1] = 0;
+		} else {
+			/* make IBAT same as DBAT */
+			bat[0] = bat[1];
+		}
+	} else {
+		/* 601 cpu */
+		if (bl > BL_8M)
+			bl = BL_8M;
+		wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE
+				   | _PAGE_COHERENT);
+		wimgxpp |= (flags & _PAGE_RW)?
+			((flags & _PAGE_USER)? PP_RWRW: PP_RWXX): PP_RXRX;
+		bat->word[0] = virt | wimgxpp | 4;	/* Ks=0, Ku=1 */
+		bat->word[1] = phys | bl | 0x40;	/* V=1 */
+	}
+
+	bat_addrs[index].start = virt;
+	bat_addrs[index].limit = virt + ((bl + 1) << 17) - 1;
+	bat_addrs[index].phys = phys;
+}
+
+/*
+ * Initialize the hash table and patch the instructions in hashtable.S.
+ */
+void __init MMU_init_hw(void)
+{
+	unsigned int hmask, mb, mb2;
+	unsigned int n_hpteg, lg_n_hpteg;
+
+	extern unsigned int hash_page_patch_A[];
+	extern unsigned int hash_page_patch_B[], hash_page_patch_C[];
+	extern unsigned int hash_page[];
+	extern unsigned int flush_hash_patch_A[], flush_hash_patch_B[];
+
+	if (!cpu_has_feature(CPU_FTR_HPTE_TABLE)) {
+		/*
+		 * Put a blr (procedure return) instruction at the
+		 * start of hash_page, since we can still get DSI
+		 * exceptions on a 603.
+		 */
+		hash_page[0] = 0x4e800020;
+		flush_icache_range((unsigned long) &hash_page[0],
+				   (unsigned long) &hash_page[1]);
+		return;
+	}
+
+	if ( ppc_md.progress ) ppc_md.progress("hash:enter", 0x105);
+
+#ifdef CONFIG_PPC64BRIDGE
+#define LG_HPTEG_SIZE	7		/* 128 bytes per HPTEG */
+#define SDR1_LOW_BITS	(lg_n_hpteg - 11)
+#define MIN_N_HPTEG	2048		/* min 256kB hash table */
+#else
+#define LG_HPTEG_SIZE	6		/* 64 bytes per HPTEG */
+#define SDR1_LOW_BITS	((n_hpteg - 1) >> 10)
+#define MIN_N_HPTEG	1024		/* min 64kB hash table */
+#endif
+
+#ifdef CONFIG_POWER4
+	/* The hash table has already been allocated and initialized
+	   in prom.c */
+	n_hpteg = Hash_size >> LG_HPTEG_SIZE;
+	lg_n_hpteg = __ilog2(n_hpteg);
+
+	/* Remove the hash table from the available memory */
+	if (Hash)
+		reserve_phys_mem(__pa(Hash), Hash_size);
+
+#else /* CONFIG_POWER4 */
+	/*
+	 * Allow 1 HPTE (1/8 HPTEG) for each page of memory.
+	 * This is less than the recommended amount, but then
+	 * Linux ain't AIX.
+	 */
+	n_hpteg = total_memory / (PAGE_SIZE * 8);
+	if (n_hpteg < MIN_N_HPTEG)
+		n_hpteg = MIN_N_HPTEG;
+	lg_n_hpteg = __ilog2(n_hpteg);
+	if (n_hpteg & (n_hpteg - 1)) {
+		++lg_n_hpteg;		/* round up if not power of 2 */
+		n_hpteg = 1 << lg_n_hpteg;
+	}
+	Hash_size = n_hpteg << LG_HPTEG_SIZE;
+
+	/*
+	 * Find some memory for the hash table.
+	 */
+	if ( ppc_md.progress ) ppc_md.progress("hash:find piece", 0x322);
+	Hash = mem_pieces_find(Hash_size, Hash_size);
+	cacheable_memzero(Hash, Hash_size);
+	_SDR1 = __pa(Hash) | SDR1_LOW_BITS;
+#endif /* CONFIG_POWER4 */
+
+	Hash_end = (PTE *) ((unsigned long)Hash + Hash_size);
+
+	printk("Total memory = %ldMB; using %ldkB for hash table (at %p)\n",
+	       total_memory >> 20, Hash_size >> 10, Hash);
+
+
+	/*
+	 * Patch up the instructions in hashtable.S:create_hpte
+	 */
+	if ( ppc_md.progress ) ppc_md.progress("hash:patch", 0x345);
+	Hash_mask = n_hpteg - 1;
+	hmask = Hash_mask >> (16 - LG_HPTEG_SIZE);
+	mb2 = mb = 32 - LG_HPTEG_SIZE - lg_n_hpteg;
+	if (lg_n_hpteg > 16)
+		mb2 = 16 - LG_HPTEG_SIZE;
+
+	hash_page_patch_A[0] = (hash_page_patch_A[0] & ~0xffff)
+		| ((unsigned int)(Hash) >> 16);
+	hash_page_patch_A[1] = (hash_page_patch_A[1] & ~0x7c0) | (mb << 6);
+	hash_page_patch_A[2] = (hash_page_patch_A[2] & ~0x7c0) | (mb2 << 6);
+	hash_page_patch_B[0] = (hash_page_patch_B[0] & ~0xffff) | hmask;
+	hash_page_patch_C[0] = (hash_page_patch_C[0] & ~0xffff) | hmask;
+
+	/*
+	 * Ensure that the locations we've patched have been written
+	 * out from the data cache and invalidated in the instruction
+	 * cache, on those machines with split caches.
+	 */
+	flush_icache_range((unsigned long) &hash_page_patch_A[0],
+			   (unsigned long) &hash_page_patch_C[1]);
+
+	/*
+	 * Patch up the instructions in hashtable.S:flush_hash_page
+	 */
+	flush_hash_patch_A[0] = (flush_hash_patch_A[0] & ~0xffff)
+		| ((unsigned int)(Hash) >> 16);
+	flush_hash_patch_A[1] = (flush_hash_patch_A[1] & ~0x7c0) | (mb << 6);
+	flush_hash_patch_A[2] = (flush_hash_patch_A[2] & ~0x7c0) | (mb2 << 6);
+	flush_hash_patch_B[0] = (flush_hash_patch_B[0] & ~0xffff) | hmask;
+	flush_icache_range((unsigned long) &flush_hash_patch_A[0],
+			   (unsigned long) &flush_hash_patch_B[1]);
+
+	if ( ppc_md.progress ) ppc_md.progress("hash:done", 0x205);
+}
diff --git a/arch/powerpc/mm/tlb.c b/arch/powerpc/mm/tlb.c
new file mode 100644
index 000000000000..6c3dc3c44c86
--- /dev/null
+++ b/arch/powerpc/mm/tlb.c
@@ -0,0 +1,183 @@
+/*
+ * This file contains the routines for TLB flushing.
+ * On machines where the MMU uses a hash table to store virtual to
+ * physical translations, these routines flush entries from the
+ * hash table also.
+ *  -- paulus
+ *
+ *  Derived from arch/ppc/mm/init.c:
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/highmem.h>
+#include <asm/tlbflush.h>
+#include <asm/tlb.h>
+
+#include "mmu_decl.h"
+
+/*
+ * Called when unmapping pages to flush entries from the TLB/hash table.
+ */
+void flush_hash_entry(struct mm_struct *mm, pte_t *ptep, unsigned long addr)
+{
+	unsigned long ptephys;
+
+	if (Hash != 0) {
+		ptephys = __pa(ptep) & PAGE_MASK;
+		flush_hash_pages(mm->context, addr, ptephys, 1);
+	}
+}
+
+/*
+ * Called by ptep_set_access_flags, must flush on CPUs for which the
+ * DSI handler can't just "fixup" the TLB on a write fault
+ */
+void flush_tlb_page_nohash(struct vm_area_struct *vma, unsigned long addr)
+{
+	if (Hash != 0)
+		return;
+	_tlbie(addr);
+}
+
+/*
+ * Called at the end of a mmu_gather operation to make sure the
+ * TLB flush is completely done.
+ */
+void tlb_flush(struct mmu_gather *tlb)
+{
+	if (Hash == 0) {
+		/*
+		 * 603 needs to flush the whole TLB here since
+		 * it doesn't use a hash table.
+		 */
+		_tlbia();
+	}
+}
+
+/*
+ * TLB flushing:
+ *
+ *  - flush_tlb_mm(mm) flushes the specified mm context TLB's
+ *  - flush_tlb_page(vma, vmaddr) flushes one page
+ *  - flush_tlb_range(vma, start, end) flushes a range of pages
+ *  - flush_tlb_kernel_range(start, end) flushes kernel pages
+ *
+ * since the hardware hash table functions as an extension of the
+ * tlb as far as the linux tables are concerned, flush it too.
+ *    -- Cort
+ */
+
+/*
+ * 750 SMP is a Bad Idea because the 750 doesn't broadcast all
+ * the cache operations on the bus.  Hence we need to use an IPI
+ * to get the other CPU(s) to invalidate their TLBs.
+ */
+#ifdef CONFIG_SMP_750
+#define FINISH_FLUSH	smp_send_tlb_invalidate(0)
+#else
+#define FINISH_FLUSH	do { } while (0)
+#endif
+
+static void flush_range(struct mm_struct *mm, unsigned long start,
+			unsigned long end)
+{
+	pmd_t *pmd;
+	unsigned long pmd_end;
+	int count;
+	unsigned int ctx = mm->context;
+
+	if (Hash == 0) {
+		_tlbia();
+		return;
+	}
+	start &= PAGE_MASK;
+	if (start >= end)
+		return;
+	end = (end - 1) | ~PAGE_MASK;
+	pmd = pmd_offset(pgd_offset(mm, start), start);
+	for (;;) {
+		pmd_end = ((start + PGDIR_SIZE) & PGDIR_MASK) - 1;
+		if (pmd_end > end)
+			pmd_end = end;
+		if (!pmd_none(*pmd)) {
+			count = ((pmd_end - start) >> PAGE_SHIFT) + 1;
+			flush_hash_pages(ctx, start, pmd_val(*pmd), count);
+		}
+		if (pmd_end == end)
+			break;
+		start = pmd_end + 1;
+		++pmd;
+	}
+}
+
+/*
+ * Flush kernel TLB entries in the given range
+ */
+void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+	flush_range(&init_mm, start, end);
+	FINISH_FLUSH;
+}
+
+/*
+ * Flush all the (user) entries for the address space described by mm.
+ */
+void flush_tlb_mm(struct mm_struct *mm)
+{
+	struct vm_area_struct *mp;
+
+	if (Hash == 0) {
+		_tlbia();
+		return;
+	}
+
+	for (mp = mm->mmap; mp != NULL; mp = mp->vm_next)
+		flush_range(mp->vm_mm, mp->vm_start, mp->vm_end);
+	FINISH_FLUSH;
+}
+
+void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
+{
+	struct mm_struct *mm;
+	pmd_t *pmd;
+
+	if (Hash == 0) {
+		_tlbie(vmaddr);
+		return;
+	}
+	mm = (vmaddr < TASK_SIZE)? vma->vm_mm: &init_mm;
+	pmd = pmd_offset(pgd_offset(mm, vmaddr), vmaddr);
+	if (!pmd_none(*pmd))
+		flush_hash_pages(mm->context, vmaddr, pmd_val(*pmd), 1);
+	FINISH_FLUSH;
+}
+
+/*
+ * For each address in the range, find the pte for the address
+ * and check _PAGE_HASHPTE bit; if it is set, find and destroy
+ * the corresponding HPTE.
+ */
+void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+		     unsigned long end)
+{
+	flush_range(vma->vm_mm, start, end);
+	FINISH_FLUSH;
+}
author	Paul Mackerras	2005-09-26 16:04:21 +1000
committer	Paul Mackerras	2005-09-26 16:04:21 +1000
commit	14cf11af6cf608eb8c23e989ddb17a715ddce109 (patch)
tree	271a97ce73e265f39c569cb159c195c5b4bb3f8c /arch/powerpc/mm
parent	e5baa396af7560382d2cf3f0871d616b61fc284c (diff)