/* * linux/arch/alpha/kernel/core_cia.c * * Written by David A Rusling (david.rusling@reo.mts.dec.com). * December 1995. * * Copyright (C) 1995 David A Rusling * Copyright (C) 1997, 1998 Jay Estabrook * Copyright (C) 1998, 1999, 2000 Richard Henderson * * Code common to all CIA core logic chips. */ #define __EXTERN_INLINE inline #include #include #undef __EXTERN_INLINE #include #include #include #include #include #include #include #include "proto.h" #include "pci_impl.h" /* * NOTE: Herein lie back-to-back mb instructions. They are magic. * One plausible explanation is that the i/o controller does not properly * handle the system transaction. Another involves timing. Ho hum. */ #define DEBUG_CONFIG 0 #if DEBUG_CONFIG # define DBGC(args) printk args #else # define DBGC(args) #endif #define vip volatile int * /* * Given a bus, device, and function number, compute resulting * configuration space address. It is therefore not safe to have * concurrent invocations to configuration space access routines, but * there really shouldn't be any need for this. * * Type 0: * * 3 3|3 3 2 2|2 2 2 2|2 2 2 2|1 1 1 1|1 1 1 1|1 1 * 3 2|1 0 9 8|7 6 5 4|3 2 1 0|9 8 7 6|5 4 3 2|1 0 9 8|7 6 5 4|3 2 1 0 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | | |D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|F|F|F|R|R|R|R|R|R|0|0| * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * * 31:11 Device select bit. * 10:8 Function number * 7:2 Register number * * Type 1: * * 3 3|3 3 2 2|2 2 2 2|2 2 2 2|1 1 1 1|1 1 1 1|1 1 * 3 2|1 0 9 8|7 6 5 4|3 2 1 0|9 8 7 6|5 4 3 2|1 0 9 8|7 6 5 4|3 2 1 0 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | | | | | | | | | | |B|B|B|B|B|B|B|B|D|D|D|D|D|F|F|F|R|R|R|R|R|R|0|1| * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * * 31:24 reserved * 23:16 bus number (8 bits = 128 possible buses) * 15:11 Device number (5 bits) * 10:8 function number * 7:2 register number * * Notes: * The function number selects which function of a multi-function device * (e.g., SCSI and Ethernet). * * The register selects a DWORD (32 bit) register offset. Hence it * doesn't get shifted by 2 bits as we want to "drop" the bottom two * bits. */ static int mk_conf_addr(struct pci_bus *bus_dev, unsigned int device_fn, int where, unsigned long *pci_addr, unsigned char *type1) { u8 bus = bus_dev->number; *type1 = (bus != 0); *pci_addr = (bus << 16) | (device_fn << 8) | where; DBGC(("mk_conf_addr(bus=%d ,device_fn=0x%x, where=0x%x," " returning address 0x%p\n" bus, device_fn, where, *pci_addr)); return 0; } static unsigned int conf_read(unsigned long addr, unsigned char type1) { unsigned long flags; int stat0, value; int cia_cfg = 0; DBGC(("conf_read(addr=0x%lx, type1=%d) ", addr, type1)); local_irq_save(flags); /* Reset status register to avoid losing errors. */ stat0 = *(vip)CIA_IOC_CIA_ERR; *(vip)CIA_IOC_CIA_ERR = stat0; mb(); *(vip)CIA_IOC_CIA_ERR; /* re-read to force write */ /* If Type1 access, must set CIA CFG. */ if (type1) { cia_cfg = *(vip)CIA_IOC_CFG; *(vip)CIA_IOC_CFG = (cia_cfg & ~3) | 1; mb(); *(vip)CIA_IOC_CFG; } mb(); draina(); mcheck_expected(0) = 1; mcheck_taken(0) = 0; mb(); /* Access configuration space. */ value = *(vip)addr; mb(); mb(); /* magic */ if (mcheck_taken(0)) { mcheck_taken(0) = 0; value = 0xffffffff; mb(); } mcheck_expected(0) = 0; mb(); /* If Type1 access, must reset IOC CFG so normal IO space ops work. */ if (type1) { *(vip)CIA_IOC_CFG = cia_cfg; mb(); *(vip)CIA_IOC_CFG; } local_irq_restore(flags); DBGC(("done\n")); return value; } static void conf_write(unsigned long addr, unsigned int value, unsigned char type1) { unsigned long flags; int stat0, cia_cfg = 0; DBGC(("conf_write(addr=0x%lx, type1=%d) ", addr, type1)); local_irq_save(flags); /* Reset status register to avoid losing errors. */ stat0 = *(vip)CIA_IOC_CIA_ERR; *(vip)CIA_IOC_CIA_ERR = stat0; mb(); *(vip)CIA_IOC_CIA_ERR; /* re-read to force write */ /* If Type1 access, must set CIA CFG. */ if (type1) { cia_cfg = *(vip)CIA_IOC_CFG; *(vip)CIA_IOC_CFG = (cia_cfg & ~3) | 1; mb(); *(vip)CIA_IOC_CFG; } mb(); draina(); mcheck_expected(0) = 1; mcheck_taken(0) = 0; mb(); /* Access configuration space. */ *(vip)addr = value; mb(); *(vip)addr; /* read back to force the write */ mcheck_expected(0) = 0; mb(); /* If Type1 access, must reset IOC CFG so normal IO space ops work. */ if (type1) { *(vip)CIA_IOC_CFG = cia_cfg; mb(); *(vip)CIA_IOC_CFG; } local_irq_restore(flags); DBGC(("done\n")); } static int cia_read_config(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *value) { unsigned long addr, pci_addr; long mask; unsigned char type1; int shift; if (mk_conf_addr(bus, devfn, where, &pci_addr, &type1)) return PCIBIOS_DEVICE_NOT_FOUND; mask = (size - 1) * 8; shift = (where & 3) * 8; addr = (pci_addr << 5) + mask + CIA_CONF; *value = conf_read(addr, type1) >> (shift); return PCIBIOS_SUCCESSFUL; } static int cia_write_config(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 value) { unsigned long addr, pci_addr; long mask; unsigned char type1; if (mk_conf_addr(bus, devfn, where, &pci_addr, &type1)) return PCIBIOS_DEVICE_NOT_FOUND; mask = (size - 1) * 8; addr = (pci_addr << 5) + mask + CIA_CONF; conf_write(addr, value << ((where & 3) * 8), type1); return PCIBIOS_SUCCESSFUL; } struct pci_ops cia_pci_ops = { .read = cia_read_config, .write = cia_write_config, }; /* * CIA Pass 1 and PYXIS Pass 1 and 2 have a broken scatter-gather tlb. * It cannot be invalidated. Rather than hard code the pass numbers, * actually try the tbia to see if it works. */ void cia_pci_tbi(struct pci_controller *hose, dma_addr_t start, dma_addr_t end) { wmb(); *(vip)CIA_IOC_PCI_TBIA = 3; /* Flush all locked and unlocked. */ mb(); *(vip)CIA_IOC_PCI_TBIA; } /* * On PYXIS, even if the tbia works, we cannot use it. It effectively locks * the chip (as well as direct write to the tag registers) if there is a * SG DMA operation in progress. This is true at least for PYXIS rev. 1, * so always use the method below. */ /* * This is the method NT and NetBSD use. * * Allocate mappings, and put the chip into DMA loopback mode to read a * garbage page. This works by causing TLB misses, causing old entries to * be purged to make room for the new entries coming in for the garbage page. */ #define CIA_BROKEN_TBIA_BASE 0x30000000 #define CIA_BROKEN_TBIA_SIZE 1024 /* Always called with interrupts disabled */ void cia_pci_tbi_try2(struct pci_controller *hose, dma_addr_t start, dma_addr_t end) { void __iomem *bus_addr; int ctrl; /* Put the chip into PCI loopback mode. */ mb(); ctrl = *(vip)CIA_IOC_CIA_CTRL; *(vip)CIA_IOC_CIA_CTRL = ctrl | CIA_CTRL_PCI_LOOP_EN; mb(); *(vip)CIA_IOC_CIA_CTRL; mb(); /* Read from PCI dense memory space at TBI_ADDR, skipping 32k on each read. This forces SG TLB misses. NetBSD claims that the TLB entries are not quite LRU, meaning that we need to read more times than there are actual tags. The 2117x docs claim strict round-robin. Oh well, we've come this far... */ /* Even better - as seen on the PYXIS rev 1 the TLB tags 0-3 can be filled by the TLB misses *only once* after being invalidated (by tbia or direct write). Next misses won't update them even though the lock bits are cleared. Tags 4-7 are "quite LRU" though, so use them and read at window 3 base exactly 4 times. Reading more sometimes makes the chip crazy. -ink */ bus_addr = cia_ioremap(CIA_BROKEN_TBIA_BASE, 32768 * 4); cia_readl(bus_addr + 0x00000); cia_readl(bus_addr + 0x08000); cia_readl(bus_addr + 0x10000); cia_readl(bus_addr + 0x18000); cia_iounmap(bus_addr); /* Restore normal PCI operation. */ mb(); *(vip)CIA_IOC_CIA_CTRL = ctrl; mb(); *(vip)CIA_IOC_CIA_CTRL; mb(); } static inline void cia_prepare_tbia_workaround(int window) { unsigned long *ppte, pte; long i; /* Use minimal 1K map. */ ppte = __alloc_bootmem(CIA_BROKEN_TBIA_SIZE, 32768, 0); pte = (virt_to_phys(ppte) >> (PAGE_SHIFT - 1)) | 1; for (i = 0; i < CIA_BROKEN_TBIA_SIZE / sizeof(unsigned long); ++i) ppte[i] = pte; *(vip)CIA_IOC_PCI_Wn_BASE(window) = CIA_BROKEN_TBIA_BASE | 3; *(vip)CIA_IOC_PCI_Wn_MASK(window) = (CIA_BROKEN_TBIA_SIZE*1024 - 1) & 0xfff00000; *(vip)CIA_IOC_PCI_Tn_BASE(window) = virt_to_phys(ppte) >> 2; } static void __init verify_tb_operation(void) { static int page[PAGE_SIZE/4] __attribute__((aligned(PAGE_SIZE))) __initdata = { 0 }; struct pci_iommu_arena *arena = pci_isa_hose->sg_isa; int ctrl, addr0, tag0, pte0, data0; int temp, use_tbia_try2 = 0; void __iomem *bus_addr; /* pyxis -- tbia is broken */ if (pci_isa_hose->dense_io_base) use_tbia_try2 = 1; /* Put the chip into PCI loopback mode. */ mb(); ctrl = *(vip)CIA_IOC_CIA_CTRL; *(vip)CIA_IOC_CIA_CTRL = ctrl | CIA_CTRL_PCI_LOOP_EN; mb(); *(vip)CIA_IOC_CIA_CTRL; mb(); /* Write a valid entry directly into the TLB registers. */ addr0 = arena->dma_base; tag0 = addr0 | 1; pte0 = (virt_to_phys(page) >> (PAGE_SHIFT - 1)) | 1; *(vip)CIA_IOC_TB_TAGn(0) = tag0; *(vip)CIA_IOC_TB_TAGn(1) = 0; *(vip)CIA_IOC_TB_TAGn(2) = 0; *(vip)CIA_IOC_TB_TAGn(3) = 0; *(vip)CIA_IOC_TB_TAGn(4) = 0; *(vip)CIA_IOC_TB_TAGn(5) = 0; *(vip)CIA_IOC_TB_TAGn(6) = 0; *(vip)CIA_IOC_TB_TAGn(7) = 0; *(vip)CIA_IOC_TBn_PAGEm(0,0) = pte0; *(vip)CIA_IOC_TBn_PAGEm(0,1) = 0; *(vip)CIA_IOC_TBn_PAGEm(0,2) = 0; *(vip)CIA_IOC_TBn_PAGEm(0,3) = 0; mb(); /* Get a usable bus address */ bus_addr = cia_ioremap(addr0, 8*PAGE_SIZE); /* First, verify we can read back what we've written. If this fails, we can't be sure of any of the other testing we're going to do, so bail. */ /* ??? Actually, we could do the work with machine checks. By passing this register update test, we pretty much guarantee that cia_pci_tbi_try1 works. If this test fails, cia_pci_tbi_try2 might still work. */ temp = *(vip)CIA_IOC_TB_TAGn(0); if (temp != tag0) { printk("pci: failed tb register update test " "(tag0 %#x != %#x)\n", temp, tag0); goto failed; } temp = *(vip)CIA_IOC_TB_TAGn(1); if (temp != 0) { printk("pci: failed tb register update test " "(tag1 %#x != 0)\n", temp); goto failed; } temp = *(vip)CIA_IOC_TBn_PAGEm(0,0); if (temp != pte0) { printk("pci: failed tb register update test " "(pte0 %#x != %#x)\n", temp, pte0); goto failed; } printk("pci: passed tb register update test\n"); /* Second, verify we can actually do I/O through this entry. */ data0 = 0xdeadbeef; page[0] = data0; mcheck_expected(0) = 1; mcheck_taken(0) = 0;