/* * linux/arch/arm/mm/dma-mapping.c * * Copyright (C) 2000-2004 Russell King * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. * * DMA uncached mapping support. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "mm.h" /* * The DMA API is built upon the notion of "buffer ownership". A buffer * is either exclusively owned by the CPU (and therefore may be accessed * by it) or exclusively owned by the DMA device. These helper functions * represent the transitions between these two ownership states. * * Note, however, that on later ARMs, this notion does not work due to * speculative prefetches. We model our approach on the assumption that * the CPU does do speculative prefetches, which means we clean caches * before transfers and delay cache invalidation until transfer completion. * */ static void __dma_page_cpu_to_dev(struct page *, unsigned long, size_t, enum dma_data_direction); static void __dma_page_dev_to_cpu(struct page *, unsigned long, size_t, enum dma_data_direction); /** * arm_dma_map_page - map a portion of a page for streaming DMA * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices * @page: page that buffer resides in * @offset: offset into page for start of buffer * @size: size of buffer to map * @dir: DMA transfer direction * * Ensure that any data held in the cache is appropriately discarded * or written back. * * The device owns this memory once this call has completed. The CPU * can regain ownership by calling dma_unmap_page(). */ static dma_addr_t arm_dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) __dma_page_cpu_to_dev(page, offset, size, dir); return pfn_to_dma(dev, page_to_pfn(page)) + offset; } static dma_addr_t arm_coherent_dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { return pfn_to_dma(dev, page_to_pfn(page)) + offset; } /** * arm_dma_unmap_page - unmap a buffer previously mapped through dma_map_page() * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices * @handle: DMA address of buffer * @size: size of buffer (same as passed to dma_map_page) * @dir: DMA transfer direction (same as passed to dma_map_page) * * Unmap a page streaming mode DMA translation. The handle and size * must match what was provided in the previous dma_map_page() call. * All other usages are undefined. * * After this call, reads by the CPU to the buffer are guaranteed to see * whatever the device wrote there. */ static void arm_dma_unmap_page(struct device *dev, dma_addr_t handle, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) __dma_page_dev_to_cpu(pfn_to_page(dma_to_pfn(dev, handle)), handle & ~PAGE_MASK, size, dir); } static void arm_dma_sync_single_for_cpu(struct device *dev, dma_addr_t handle, size_t size, enum dma_data_direction dir) { unsigned int offset = handle & (PAGE_SIZE - 1); struct page *page = pfn_to_page(dma_to_pfn(dev, handle-offset)); __dma_page_dev_to_cpu(page, offset, size, dir); } static void arm_dma_sync_single_for_device(struct device *dev, dma_addr_t handle, size_t size, enum dma_data_direction dir) { unsigned int offset = handle & (PAGE_SIZE - 1); struct page *page = pfn_to_page(dma_to_pfn(dev, handle-offset)); __dma_page_cpu_to_dev(page, offset, size, dir); } struct dma_map_ops arm_dma_ops = { .alloc = arm_dma_alloc, .free = arm_dma_free, .mmap = arm_dma_mmap, .get_sgtable = arm_dma_get_sgtable, .map_page = arm_dma_map_page, .unmap_page = arm_dma_unmap_page, .map_sg = arm_dma_map_sg, .unmap_sg = arm_dma_unmap_sg, .sync_single_for_cpu = arm_dma_sync_single_for_cpu, .sync_single_for_device = arm_dma_sync_single_for_device, .sync_sg_for_cpu = arm_dma_sync_sg_for_cpu, .sync_sg_for_device = arm_dma_sync_sg_for_device, .set_dma_mask = arm_dma_set_mask, }; EXPORT_SYMBOL(arm_dma_ops); static void *arm_coherent_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs); static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle, struct dma_attrs *attrs); struct dma_map_ops arm_coherent_dma_ops = { .alloc = arm_coherent_dma_alloc, .free = arm_coherent_dma_free, .mmap = arm_dma_mmap, .get_sgtable = arm_dma_get_sgtable, .map_page = arm_coherent_dma_map_page, .map_sg = arm_dma_map_sg, .set_dma_mask = arm_dma_set_mask, }; EXPORT_SYMBOL(arm_coherent_dma_ops); static u64 get_coherent_dma_mask(struct device *dev) { u64 mask = (u64)arm_dma_limit; if (dev) { mask = dev->coherent_dma_mask; /* * Sanity check the DMA mask - it must be non-zero, and * must be able to be satisfied by a DMA allocation. */ if (mask == 0) { dev_warn(dev, "coherent DMA mask is unset\n"); return 0; } if ((~mask) & (u64)arm_dma_limit) { dev_warn(dev, "coherent DMA mask %#llx is smaller " "than system GFP_DMA mask %#llx\n", mask, (u64)arm_dma_limit); return 0; } } return mask; } static void __dma_clear_buffer(struct page *page, size_t size) { void *ptr; /* * Ensure that the allocated pages are zeroed, and that any data * lurking in the kernel direct-mapped region is invalidated. */ ptr = page_address(page); if (ptr) { memset(ptr, 0, size); dmac_flush_range(ptr, ptr + size); outer_flush_range(__pa(ptr), __pa(ptr) + size); } } /* * Allocate a DMA buffer for 'dev' of size 'size' using the * specified gfp mask. Note that 'size' must be page aligned. */ static struct page *__dma_alloc_buffer(struct device *dev, size_t size, gfp_t gfp) { unsigned long order = get_order(size); struct page *page, *p, *e; page = alloc_pages(gfp, order); if (!page) return NULL; /* * Now split the huge page and free the excess pages */ split_page(page, order); for (p = page + (size >> PAGE_SHIFT), e = page + (1 << order); p < e; p++) __free_page(p); __dma_clear_buffer(page, size); return page; } /* * Free a DMA buffer. 'size' must be page aligned. */ static void __dma_free_buffer(struct page *page, size_t size) { struct page *e = page + (size >> PAGE_SHIFT); while (page < e) { __free_page(page); page++; } } #ifdef CONFIG_MMU #ifdef CONFIG_HUGETLB_PAGE #error ARM Coherent DMA allocator does not (yet) support huge TLB #endif static void *__alloc_from_contiguous(struct device *dev, size_t size, pgprot_t prot, struct page **ret_page); static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp, pgprot_t prot, struct page **ret_page, const void *caller); static void * __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot, const void *caller) { struct vm_struct *area; unsigned long addr; /* * DMA allocation can be mapped to user space, so lets * set VM_USERMAP flags too. */ area = get_vm_area_caller(size, VM_ARM_DMA_CONSISTENT | VM_USERMAP, caller); if (!area) return NULL; addr = (unsigned long)area->addr; area->phys_addr = __pfn_to_phys(page_to_pfn(page)); if (ioremap_page_range(addr, addr + size, area->phys_addr, prot)) { vunmap((void *)addr); return NULL; } return (void *)addr; } static void __dma_free_remap(void *cpu_addr, size_t size) { unsigned int flags = VM_ARM_DMA_CONSISTENT | VM_USERMAP; struct vm_struct *area = find_vm_area(cpu_addr); if (!area || (area->flags & flags) != flags) { WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr); return; } unmap_kernel_range((unsigned long)cpu_addr, size); vunmap(cpu_addr); } #define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K struct dma_pool { size_t size; spinlock_t lock; unsigned long *bitmap; unsigned long nr_pages; void *vaddr; struct page **pages; }; static struct dma_pool atomic_pool = { .size = DEFAULT_DMA_COHERENT_POOL_SIZE, }; static int __init early_coherent_pool(char *p) { atomic_pool.size = memparse(p, &p); return 0; } early_param("coherent_pool", early_coherent_pool); void __init init_dma_coherent_pool_size(unsigned long size) { /* * Catch any attempt to set the pool size too late. */ BUG_ON(atomic_pool.vaddr); /* * Set architecture specific coherent pool size only if * it has not been changed by kernel command line parameter. */ if (atomic_pool.size == DEFAULT_DMA_COHERENT_POOL_SIZE) atomic_pool.size = size; } /* * Initialise the coherent pool for atomic allocations. */ static int __init atomic_pool_init(void) { struct dma_pool *pool = &atomic_pool; pgprot_t prot = pgprot_dmacoherent(pgprot_kernel); gfp_t gfp = GFP_KERNEL | GFP_DMA; unsigned long nr_pages = pool->size >> PAGE_SHIFT; unsigned long *bitmap; struct page *page; struct page **pages; void *ptr; int bitmap_size = BITS_TO_LONGS(nr_pages) * sizeof(long); bitmap = kzalloc(bitmap_size, GFP_KERNEL); if (!bitmap) goto no_bitmap; pages = kzalloc(nr_pages * sizeof(struct page *), GFP_KERNEL); if (!pages) goto no_pages; if (IS_ENABLED(CONFIG_CMA)) ptr = __alloc_from_contiguous(NULL, pool->size, prot, &page); else ptr = __alloc_remap_buffer(NULL, pool->size, gfp, prot, &page, NULL); if (ptr) { int i; for (i = 0; i < nr_pages; i++) pages[i] = page + i; spin_lock_init(&pool->lock); pool->vaddr = ptr; pool->pages = pages; pool->bitmap = bitmap; pool->nr_pages = nr_pages; pr_info("DMA: preallocated %u KiB pool for atomic coherent allocations\n", (unsigned)pool->size / 1024); return 0; } kfree(pages); no_pages: kfree(bitmap); no_bitmap: pr_err("DMA: failed to allocate %u KiB pool for atomic coherent allocation\n", (unsigned)pool->size / 1024); return -ENOMEM; } /* * CMA is activated by core_initcall, so we must be called after it. */ postcore_initcall(atomic_pool_init); struct dma_contig_early_reserve { phys_addr_t base; unsigned long size; }; static struct dma_contig_early_reserve dma_mmu_remap[MAX_CMA_AREAS] __initdata; static int dma_mmu_remap_num __initdata; void __init dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) { dma_mmu_remap[dma_mmu_remap_num].base = base; dma_mmu_remap[dma_mmu_remap_num].size = size; dma_mmu_remap_num++; } void __init dma_contiguous_remap(void) { int i; for (i = 0; i < dma_mmu_remap_num; i++) { phys_addr_t start = dma_mmu_remap[i].base; phys_addr_t end = start + dma_mmu_remap[i].size; struct map_desc map; unsigned long addr; if (end > arm_lowmem_limit) end = arm_lowmem_limit; if (start >= end) continue; map.pfn = __phys_to_pfn(start); map.virtual = __phys_to_virt(start); map.length = end - start; map.type = MT_MEMORY_DMA_READY; /* * Clear previous low-memory mapping */ for (addr = __phys_to_virt(start); addr < __phys_to_virt(end); addr += PMD_SIZE) pmd_clear(pmd_off_k(addr)); iotable_init(&map, 1); } } static int __dma_update_pte(pte_t *pte, pgtable_t token, unsigned long addr, void *data) { struct page *page = virt_to_page(addr); pgprot_t prot = *(pgprot_t *)data; set_pte_ext(pte, mk_pte(page, prot), 0); return 0; } static void __dma_remap(struct page *page, size_t size, pgprot_t prot) { unsigned long start = (unsigned long) page_address(page); unsigned end = start + size; apply_to_page_range(&init_mm, start, size, __dma_update_pte, &prot); dsb(); flush_tlb_kernel_range(start, end); } static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp, pgprot_t prot, struct page **ret_page, const void *caller) { struct page *page; void *ptr; page = __dma_alloc_buffer(dev, size, gfp); if (!page) return NULL; ptr = __dma_alloc_remap(page, size, gfp, prot, caller); if (!ptr) { __dma_free_buffer(page, size); return NULL; } *ret_page = page; return ptr; } static void *__alloc_from_pool(size_t size, struct page **ret_page) { struct dma_pool *pool = &atomic_pool; unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; unsigned int pageno; unsigned long flags; void *ptr = NULL; unsigned long align_mask; if (!pool->vaddr) { WARN(1, "coherent pool not initialised!\n"); return NULL; } /* * Align the region allocation - allocations from pool are rather * small, so align them to their order in pages, minimum is a page * size. This helps reduce fragmentation of the DMA space. */ align_mask = (1 << get_order(size)) - 1; spin_lock_irqsave(&pool->lock, flags); pageno = bitmap_find_next_zero_area(pool->bitmap, pool->nr_pages, 0, count, align_mask); if (pageno < pool->nr_pages) { bitmap_set(pool->bitmap, pageno, count); ptr = pool->vaddr + PAGE_SIZE * pageno; *ret_page = pool->pages[pageno]; } else { pr_err_once("ERROR: %u KiB atomic DMA coherent pool is too small!\n" "Please increase it with coherent_pool= kernel parameter!\n", (unsigned)pool->size / 1024); } spin_unlock_irqrestore(&pool->lock, flags); return ptr; } static bool __in_atomic_pool(void *start, size_t size) { struct dma_pool *pool = &atomic_pool; void *end = start + size; void *pool_start = pool->vaddr; void *pool_end = pool->vaddr + pool->size; if (start < pool_start || start >= pool_end) return false; if (end <= pool_end) return true; WARN(1, "Wrong coherent size(%p-%p) from atomic pool(%p-%p)\n", start, end - 1, pool_start, pool_end - 1); return false; } static int __free_from_pool(void *start, size_t size) { struct dma_pool *pool = &atomic_pool; unsigned long pageno, count; unsigned long flags; if (!__in_atomic_pool(start, size)) return 0; pageno = (start - pool->vaddr) >> PAGE_SHIFT; count = size >> PAGE_SHIFT; spin_lock_irqsave(&pool->lock, flags); bitmap_clear(pool->bitmap, pageno, count); spin_unlock_irqrestore(&pool->lock, flags); return 1; } static void *__alloc_from_contiguous(struct device *dev, size_t size, pgprot_t prot, struct page **ret_page) { unsigned long order = get_order(size); size_t count = size >> PAGE_SHIFT; struct page *page; page = dma_alloc_from_contiguous(dev, count, order); if (!page) return NULL; __dma_clear_buffer(page, size); __dma_remap(page, size, prot); *ret_page = page; return page_address(page); } static void __free_from_contiguous(struct device *dev, struct page *page, size_t size) { __dma_remap(page, size, pgprot_kernel); dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT); } static inline pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot) { prot = dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs) ? pgprot_writecombine(prot) : pgprot_dmacoherent(prot); return prot; } #define nommu() 0 #else /* !CONFIG_MMU */ #define nommu() 1 #define __get_dma_pgprot(attrs, prot) __pgprot(0) #define __alloc_remap_buffer(dev, size, gfp, prot, ret, c) NULL #define __alloc_from_pool(size, ret_page) NULL #define __alloc_from_contiguous(dev, size, prot, ret) NULL #define __free_from_pool(cpu_addr, size) 0 #define __free_from_contiguous(dev, page, size) do { } while (0) #define __dma_free_remap(cpu_addr, size) do { } while (0) #endif /* CONFIG_MMU */ static void *__alloc_simple_buffer(struct device *dev, size_t size, gfp_t gfp, struct page **ret_page) { struct page *page; page = __dma_alloc_buffer(dev, size, gfp); if (!page) return NULL; *ret_page = page; return page_address(page); } static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, pgprot_t prot, bool is_coherent, const void *caller) { u64 mask = get_coherent_dma_mask(dev); struct page *page = NULL; void *addr; #ifdef CONFIG_DMA_API_DEBUG u64 limit = (mask + 1) & ~mask; if (limit && size >= limit) { dev_warn(dev, "coherent allocation too big (requested %#x mask %#llx)\n", size, mask); return NULL; } #endif if (!mask) return NULL; if (mask < 0xffffffffULL) gfp |= GFP_DMA; /* * Following is a work-around (a.k.a. hack) to prevent pages * with __GFP_COMP being passed to split_page() which cannot * handle them. The real problem is that this flag probably * should be 0 on ARM as it is not supported on this * platform; see CONFIG_HUGETLBFS. */ gfp &= ~(__GFP_COMP); *handle = DMA_ERROR_CODE; size = PAGE_ALIGN(size); if (is_coherent || nommu()) addr = __alloc_simple_buffer(dev, size, gfp, &page); else if (!(gfp & __GFP_WAIT)) addr = __alloc_from_pool(size, &page); else if (!IS_ENABLED(CONFIG_CMA)) addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller); else addr = __alloc_from_contiguous(dev, size, prot, &page); if (addr) *handle = pfn_to_dma(dev, page_to_pfn(page));