| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544 | /* *  linux/arch/arm/mm/fault.c * *  Copyright (C) 1995  Linus Torvalds *  Modifications for ARM processor (c) 1995-2004 Russell King * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */#include <linux/module.h>#include <linux/signal.h>#include <linux/mm.h>#include <linux/hardirq.h>#include <linux/init.h>#include <linux/kprobes.h>#include <linux/uaccess.h>#include <linux/page-flags.h>#include <linux/sched.h>#include <linux/highmem.h>#include <linux/perf_event.h>#include <asm/exception.h>#include <asm/pgtable.h>#include <asm/system_misc.h>#include <asm/system_info.h>#include <asm/tlbflush.h>#include "fault.h"#ifdef CONFIG_MMU#ifdef CONFIG_KPROBESstatic inline int notify_page_fault(struct pt_regs *regs, unsigned int fsr){	int ret = 0;	if (!user_mode(regs)) {		/* kprobe_running() needs smp_processor_id() */		preempt_disable();		if (kprobe_running() && kprobe_fault_handler(regs, fsr))			ret = 1;		preempt_enable();	}	return ret;}#elsestatic inline int notify_page_fault(struct pt_regs *regs, unsigned int fsr){	return 0;}#endif/* * This is useful to dump out the page tables associated with * 'addr' in mm 'mm'. */void show_pte(struct mm_struct *mm, unsigned long addr){	pgd_t *pgd;	if (!mm)		mm = &init_mm;	printk(KERN_ALERT "pgd = %p\n", mm->pgd);	pgd = pgd_offset(mm, addr);	printk(KERN_ALERT "[%08lx] *pgd=%08llx",			addr, (long long)pgd_val(*pgd));	do {		pud_t *pud;		pmd_t *pmd;		pte_t *pte;		if (pgd_none(*pgd))			break;		if (pgd_bad(*pgd)) {			printk("(bad)");			break;		}		pud = pud_offset(pgd, addr);		if (PTRS_PER_PUD != 1)			printk(", *pud=%08llx", (long long)pud_val(*pud));		if (pud_none(*pud))			break;		if (pud_bad(*pud)) {			printk("(bad)");			break;		}		pmd = pmd_offset(pud, addr);		if (PTRS_PER_PMD != 1)			printk(", *pmd=%08llx", (long long)pmd_val(*pmd));		if (pmd_none(*pmd))			break;		if (pmd_bad(*pmd)) {			printk("(bad)");			break;		}		/* We must not map this if we have highmem enabled */		if (PageHighMem(pfn_to_page(pmd_val(*pmd) >> PAGE_SHIFT)))			break;		pte = pte_offset_map(pmd, addr);		printk(", *pte=%08llx", (long long)pte_val(*pte));#ifndef CONFIG_ARM_LPAE		printk(", *ppte=%08llx",		       (long long)pte_val(pte[PTE_HWTABLE_PTRS]));#endif		pte_unmap(pte);	} while(0);	printk("\n");}#else					/* CONFIG_MMU */void show_pte(struct mm_struct *mm, unsigned long addr){ }#endif					/* CONFIG_MMU *//* * Oops.  The kernel tried to access some page that wasn't present. */static void__do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,		  struct pt_regs *regs){	/*	 * Are we prepared to handle this kernel fault?	 */	if (fixup_exception(regs))		return;	/*	 * No handler, we'll have to terminate things with extreme prejudice.	 */	bust_spinlocks(1);	printk(KERN_ALERT		"Unable to handle kernel %s at virtual address %08lx\n",		(addr < PAGE_SIZE) ? "NULL pointer dereference" :		"paging request", addr);	show_pte(mm, addr);	die("Oops", regs, fsr);	bust_spinlocks(0);	do_exit(SIGKILL);}/* * Something tried to access memory that isn't in our memory map.. * User mode accesses just cause a SIGSEGV */static void__do_user_fault(struct task_struct *tsk, unsigned long addr,		unsigned int fsr, unsigned int sig, int code,		struct pt_regs *regs){	struct siginfo si;#ifdef CONFIG_DEBUG_USER	if (((user_debug & UDBG_SEGV) && (sig == SIGSEGV)) ||	    ((user_debug & UDBG_BUS)  && (sig == SIGBUS))) {		printk(KERN_DEBUG "%s: unhandled page fault (%d) at 0x%08lx, code 0x%03x\n",		       tsk->comm, sig, addr, fsr);		show_pte(tsk->mm, addr);		show_regs(regs);	}#endif	tsk->thread.address = addr;	tsk->thread.error_code = fsr;	tsk->thread.trap_no = 14;	si.si_signo = sig;	si.si_errno = 0;	si.si_code = code;	si.si_addr = (void __user *)addr;	force_sig_info(sig, &si, tsk);}void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs){	struct task_struct *tsk = current;	struct mm_struct *mm = tsk->active_mm;	/*	 * If we are in kernel mode at this point, we	 * have no context to handle this fault with.	 */	if (user_mode(regs))		__do_user_fault(tsk, addr, fsr, SIGSEGV, SEGV_MAPERR, regs);	else		__do_kernel_fault(mm, addr, fsr, regs);}#ifdef CONFIG_MMU#define VM_FAULT_BADMAP		0x010000#define VM_FAULT_BADACCESS	0x020000/* * Check that the permissions on the VMA allow for the fault which occurred. * If we encountered a write fault, we must have write permission, otherwise * we allow any permission. */static inline bool access_error(unsigned int fsr, struct vm_area_struct *vma){	unsigned int mask = VM_READ | VM_WRITE | VM_EXEC;	if (fsr & FSR_WRITE)		mask = VM_WRITE;	if (fsr & FSR_LNX_PF)		mask = VM_EXEC;	return vma->vm_flags & mask ? false : true;}static int __kprobes__do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,		unsigned int flags, struct task_struct *tsk){	struct vm_area_struct *vma;	int fault;	vma = find_vma(mm, addr);	fault = VM_FAULT_BADMAP;	if (unlikely(!vma))		goto out;	if (unlikely(vma->vm_start > addr))		goto check_stack;	/*	 * Ok, we have a good vm_area for this	 * memory access, so we can handle it.	 */good_area:	if (access_error(fsr, vma)) {		fault = VM_FAULT_BADACCESS;		goto out;	}	return handle_mm_fault(mm, vma, addr & PAGE_MASK, flags);check_stack:	/* Don't allow expansion below FIRST_USER_ADDRESS */	if (vma->vm_flags & VM_GROWSDOWN &&	    addr >= FIRST_USER_ADDRESS && !expand_stack(vma, addr))		goto good_area;out:	return fault;}static int __kprobesdo_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs){	struct task_struct *tsk;	struct mm_struct *mm;	int fault, sig, code;	int write = fsr & FSR_WRITE;	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |				(write ? FAULT_FLAG_WRITE : 0);	if (notify_page_fault(regs, fsr))		return 0;	tsk = current;	mm  = tsk->mm;	/* Enable interrupts if they were enabled in the parent context. */	if (interrupts_enabled(regs))		local_irq_enable();	/*	 * If we're in an interrupt or have no user	 * context, we must not take the fault..	 */	if (in_atomic() || !mm)		goto no_context;	/*	 * As per x86, we may deadlock here.  However, since the kernel only	 * validly references user space from well defined areas of the code,	 * we can bug out early if this is from code which shouldn't.	 */	if (!down_read_trylock(&mm->mmap_sem)) {		if (!user_mode(regs) && !search_exception_tables(regs->ARM_pc))			goto no_context;retry:		down_read(&mm->mmap_sem);	} else {		/*		 * The above down_read_trylock() might have succeeded in		 * which case, we'll have missed the might_sleep() from		 * down_read()		 */		might_sleep();#ifdef CONFIG_DEBUG_VM		if (!user_mode(regs) &&		    !search_exception_tables(regs->ARM_pc))			goto no_context;#endif	}	fault = __do_page_fault(mm, addr, fsr, flags, tsk);	/* If we need to retry but a fatal signal is pending, handle the	 * signal first. We do not need to release the mmap_sem because	 * it would already be released in __lock_page_or_retry in	 * mm/filemap.c. */	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))		return 0;	/*	 * Major/minor page fault accounting is only done on the	 * initial attempt. If we go through a retry, it is extremely	 * likely that the page will be found in page cache at that point.	 */	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);	if (!(fault & VM_FAULT_ERROR) && flags & FAULT_FLAG_ALLOW_RETRY) {		if (fault & VM_FAULT_MAJOR) {			tsk->maj_flt++;			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,					regs, addr);		} else {			tsk->min_flt++;			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,					regs, addr);		}		if (fault & VM_FAULT_RETRY) {			/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk			* of starvation. */			flags &= ~FAULT_FLAG_ALLOW_RETRY;			flags |= FAULT_FLAG_TRIED;			goto retry;		}	}	up_read(&mm->mmap_sem);	/*	 * Handle the "normal" case first - VM_FAULT_MAJOR / VM_FAULT_MINOR	 */	if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS))))		return 0;	if (fault & VM_FAULT_OOM) {		/*		 * We ran out of memory, call the OOM killer, and return to		 * userspace (which will retry the fault, or kill us if we		 * got oom-killed)		 */		pagefault_out_of_memory();		return 0;	}	/*	 * If we are in kernel mode at this point, we	 * have no context to handle this fault with.	 */	if (!user_mode(regs))		goto no_context;	if (fault & VM_FAULT_SIGBUS) {		/*		 * We had some memory, but were unable to		 * successfully fix up this page fault.		 */		sig = SIGBUS;		code = BUS_ADRERR;	} else {		/*		 * Something tried to access memory that		 * isn't in our memory map..		 */		sig = SIGSEGV;		code = fault == VM_FAULT_BADACCESS ?			SEGV_ACCERR : SEGV_MAPERR;	}	__do_user_fault(tsk, addr, fsr, sig, code, regs);	return 0;no_context:	__do_kernel_fault(mm, addr, fsr, regs);	return 0;}#else					/* CONFIG_MMU */static intdo_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs){	return 0;}#endif					/* CONFIG_MMU *//* * First Level Translation Fault Handler * * We enter here because the first level page table doesn't contain * a valid entry for the address. * * If the address is in kernel space (>= TASK_SIZE), then we are * probably faulting in the vmalloc() area. * * If the init_task's first level page tables contains the relevant * entry, we copy the it to this task.  If not, we send the process * a signal, fixup the exception, or oops the kernel. * * NOTE! We MUST NOT take any locks for this case. We may be in an * interrupt or a critical region, and should only copy the information * from the master page table, nothing more. */#ifdef CONFIG_MMUstatic int __kprobesdo_translation_fault(unsigned long addr, unsigned int fsr,		     struct pt_regs *regs){	unsigned int index;	pgd_t *pgd, *pgd_k;	pud_t *pud, *pud_k;	pmd_t *pmd, *pmd_k;	if (addr < TASK_SIZE)		return do_page_fault(addr, fsr, regs);	if (user_mode(regs))		goto bad_area;	index = pgd_index(addr);	pgd = cpu_get_pgd() + index;	pgd_k = init_mm.pgd + index;	if (pgd_none(*pgd_k))		goto bad_area;	if (!pgd_present(*pgd))		set_pgd(pgd, *pgd_k);	pud = pud_offset(pgd, addr);	pud_k = pud_offset(pgd_k, addr);	if (pud_none(*pud_k))		goto bad_area;	if (!pud_present(*pud))		set_pud(pud, *pud_k);	pmd = pmd_offset(pud, addr);	pmd_k = pmd_offset(pud_k, addr);#ifdef CONFIG_ARM_LPAE	/*	 * Only one hardware entry per PMD with LPAE.	 */	index = 0;#else	/*	 * On ARM one Linux PGD entry contains two hardware entries (see page	 * tables layout in pgtable.h). We normally guarantee that we always	 * fill both L1 entries. But create_mapping() doesn't follow the rule.	 * It can create inidividual L1 entries, so here we have to call	 * pmd_none() check for the entry really corresponded to address, not	 * for the first of pair.	 */	index = (addr >> SECTION_SHIFT) & 1;#endif	if (pmd_none(pmd_k[index]))		goto bad_area;	copy_pmd(pmd, pmd_k);	return 0;bad_area:	do_bad_area(addr, fsr, regs);	return 0;}#else					/* CONFIG_MMU */static intdo_translation_fault(unsigned long addr, unsigned int fsr,		     struct pt_regs *regs){	return 0;}#endif					/* CONFIG_MMU *//* * Some section permission faults need to be handled gracefully. * They can happen due to a __{get,put}_user during an oops. */static intdo_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs){	do_bad_area(addr, fsr, regs);	return 0;}/* * This abort handler always returns "fault". */static intdo_bad(unsigned long addr, unsigned int fsr, struct pt_regs *regs){	return 1;}struct fsr_info {	int	(*fn)(unsigned long addr, unsigned int fsr, struct pt_regs *regs);	int	sig;	int	code;	const char *name;};/* FSR definition */#ifdef CONFIG_ARM_LPAE#include "fsr-3level.c"#else#include "fsr-2level.c"#endifvoid __inithook_fault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *),		int sig, int code, const char *name){	if (nr < 0 || nr >= ARRAY_SIZE(fsr_info))		BUG();	fsr_info[nr].fn   = fn;	fsr_info[nr].sig  = sig;	fsr_info[nr].code = code;	fsr_info[nr].name = name;}/* * Dispatch a data abort to the relevant handler. */asmlinkage void __exceptiondo_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs){	const struct fsr_info *inf = fsr_info + fsr_fs(fsr);	struct siginfo info;
 |