Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- diff --git a/sys/arch/amd64/amd64/conf.c b/sys/arch/amd64/amd64/conf.c
- index ece073225..ad10a38a1 100644
- --- a/sys/arch/amd64/amd64/conf.c
- +++ b/sys/arch/amd64/amd64/conf.c
- @@ -103,7 +103,7 @@ int nblkdev = nitems(bdevsw);
- (dev_type_write((*))) enodev, \
- dev_init(c,n,ioctl), \
- (dev_type_stop((*))) enodev, 0, seltrue, \
- - (dev_type_mmap((*))) enodev, 0, 0, seltrue_kqfilter }
- + dev_init(c,n,mmap) }
- #define mmread mmrw
- #define mmwrite mmrw
- diff --git a/sys/arch/amd64/amd64/vmm.c b/sys/arch/amd64/amd64/vmm.c
- index 84fcb23a5..a71765555 100644
- --- a/sys/arch/amd64/amd64/vmm.c
- +++ b/sys/arch/amd64/amd64/vmm.c
- @@ -41,10 +41,17 @@
- #include <dev/isa/isareg.h>
- #include <dev/pv/pvreg.h>
- +#include <dev/pci/pcireg.h>
- +#include <dev/pci/pcivar.h>
- +#include <dev/pci/pcidevs.h>
- +
- /* #define VMM_DEBUG */
- void *l1tf_flush_region;
- +extern void *_iommu_domain(int, int, int, int, int *);
- +extern void _iommu_map(void *, vaddr_t, bus_addr_t, bus_size_t);
- +
- #ifdef VMM_DEBUG
- #define DPRINTF(x...) do { printf(x); } while(0)
- #else
- @@ -114,6 +121,7 @@ void vmm_attach(struct device *, struct device *, void *);
- int vmmopen(dev_t, int, int, struct proc *);
- int vmmioctl(dev_t, u_long, caddr_t, int, struct proc *);
- int vmmclose(dev_t, int, int, struct proc *);
- +paddr_t vmmmmap(dev_t, off_t, int);
- int vmm_start(void);
- int vmm_stop(void);
- size_t vm_create_check_mem_ranges(struct vm_create_params *);
- @@ -303,6 +311,252 @@ extern struct gate_descriptor *idt;
- #define CR_CLTS 2
- #define CR_LMSW 3
- +/* Keep track of interrupts for PCI device */
- +struct vppt {
- + pci_chipset_tag_t pc;
- + pcitag_t tag;
- + pci_intr_handle_t ih;
- + uint32_t pending;
- + void *cookie;
- + TAILQ_ENTRY(vppt) next;
- +};
- +TAILQ_HEAD(,vppt) vppts = TAILQ_HEAD_INITIALIZER(vppts);
- +
- +void
- +vmm_mapintr(pci_chipset_tag_t pc, struct pci_attach_args *pa) {
- + int bus, dev, fun;
- + struct vppt *ppt;
- +
- + TAILQ_FOREACH(ppt, &vppts, next) {
- + if (ppt->pc == pc && ppt->tag == pa->pa_tag)
- + return;
- + }
- +
- + /* Add PCI device to list */
- + ppt = malloc(sizeof(*ppt), M_DEVBUF, M_ZERO | M_WAITOK);
- + if (!ppt)
- + return;
- + TAILQ_INSERT_TAIL(&vppts, ppt, next);
- +
- + ppt->pc = pc;
- + ppt->tag = pa->pa_tag;
- + pci_decompose_tag(pc, pa->pa_tag, &bus, &dev, &fun);
- + printf("Check Interrupt: %d/%d/%d : %d\n", bus, dev, fun, pa->pa_intrpin);
- + if (pci_intr_map_msi(pa, &ppt->ih) || pci_intr_map(pa, &ppt->ih)) {
- + printf("Couldn't map %d/%d/%d\n", bus, dev, fun);
- + return;
- + }
- + printf("Mapped %d/%d/%d intr %d/%d\n", bus, dev, fun, ppt->ih.line, ppt->ih.pin);
- +}
- +
- +/* Issue PCI Read/Write to physical device */
- +static int
- +vm_pciio(struct vm_pciio *ptd)
- +{
- + pci_chipset_tag_t pc = NULL;
- + pcitag_t tag;
- +
- + if (ptd->reg & 3)
- + return (EINVAL);
- + tag = pci_make_tag(pc, ptd->bus, ptd->dev, ptd->func);
- + if (ptd->dir == VEI_DIR_OUT) {
- + pci_conf_write(pc, tag, ptd->reg, ptd->val);
- + } else {
- + ptd->val = pci_conf_read(pc, tag, ptd->reg);
- + }
- + return 0;
- +}
- +
- +/* Probably should pre-register bus_space_map/bus_space_read_xx? */
- +static int
- +vm_pio(struct vm_pio *pio)
- +{
- + bus_space_tag_t iot;
- + bus_space_handle_t ioh;
- + int rc;
- +
- + iot = (pio->type == 1 ? X86_BUS_SPACE_IO : X86_BUS_SPACE_MEM);
- + rc = bus_space_map(iot, pio->base, pio->size, 0, &ioh);
- + if (rc != 0) {
- + printf("iomap of %x fails %x\n", pio->base, rc);
- + return -EINVAL;
- + }
- + if (pio->dir == VEI_DIR_OUT) {
- + switch (pio->size) {
- + case 1:
- + bus_space_write_1(iot, ioh, 0, pio->data);
- + break;
- + case 2:
- + bus_space_write_2(iot, ioh, 0, pio->data);
- + break;
- + case 4:
- + bus_space_write_4(iot, ioh, 0, pio->data);
- + break;
- + default:
- + printf("pio:no wrsize: %d\n", pio->base);
- + return EINVAL;
- + }
- + } else {
- + switch (pio->size) {
- + case 1:
- + pio->data = bus_space_read_1(iot, ioh, 0);
- + break;
- + case 2:
- + pio->data = bus_space_read_2(iot, ioh, 0);
- + break;
- + case 4:
- + pio->data = bus_space_read_4(iot, ioh, 0);
- + break;
- + default:
- + printf("pio:no rdsize: %d\n", pio->base);
- + return EINVAL;
- + }
- + }
- + bus_space_unmap(iot, ioh, pio->size);
- +#if 0
- + if (pio->dir == VEI_DIR_OUT) {
- + switch (pio->size) {
- + case 1:
- + outb(pio->base, pio->data);
- + break;
- + case 2:
- + outw(pio->base, pio->data);
- + break;
- + case 4:
- + outl(pio->base, pio->data);
- + break;
- + default:
- + printf("pio:no wrsize: %d\n", pio->base);
- + return EINVAL;
- + }
- + } else {
- + switch (pio->size) {
- + case 1:
- + pio->data = inb(pio->base);
- + break;
- + case 2:
- + pio->data = inw(pio->base);
- + break;
- + case 4:
- + pio->data = inl(pio->base);
- + break;
- + default:
- + printf("pio:no rdsize: %d\n", pio->base);
- + return EINVAL;
- + }
- + }
- +#endif
- +#if 0
- + printf("%ld pio; %s(%x,%llx)\n", sizeof(*pio),
- + pio->dir == VEI_DIR_OUT ? "out" : "in", pio->base, pio->data);
- +#endif
- + return 0;
- +}
- +
- +/* Device interrupt handler. Increase pending count */
- +static int
- +vmm_intr(void *arg)
- +{
- + struct vppt *ppt = arg;
- +
- + ppt->pending++;
- + return 1;
- +}
- +
- +/* Get interrupt pending count for a device */
- +static int
- +vm_getintr(struct vm_ptdpci *ptd)
- +{
- + pci_chipset_tag_t pc = NULL;
- + pcitag_t tag;
- + struct vppt *ppt;
- +
- + tag = pci_make_tag(pc, ptd->bus, ptd->dev, ptd->func);
- + TAILQ_FOREACH(ppt, &vppts, next) {
- + if (ppt->tag == tag) {
- + ptd->pending = ppt->pending;
- + }
- + }
- + return (0);
- +}
- +
- +/* Get PCI/Bar info */
- +static int
- +vm_getbar(struct vm_ptdpci *ptd)
- +{
- + pci_chipset_tag_t pc = NULL;
- + pcitag_t tag;
- + bus_addr_t base;
- + bus_size_t size;
- + pcireg_t type = 0;
- + int i, reg, did;
- + void *dom;
- + struct vm *vm;
- + struct vppt *ppt;
- + uint32_t id_reg;
- +
- + /* Make sure this is a valid PCI device */
- + tag = pci_make_tag(pc, ptd->bus, ptd->dev, ptd->func);
- + id_reg = pci_conf_read(pc, tag, PCI_ID_REG);
- + printf("getbar: %d.%d.%d %x\n",
- + ptd->bus, ptd->dev, ptd->func, id_reg);
- + if (PCI_VENDOR(id_reg) == PCI_VENDOR_INVALID)
- + return ENODEV;
- + if (PCI_VENDOR(id_reg) == 0)
- + return ENODEV;
- +
- + /* Scan all BARs and get type/address/length */
- + memset(&ptd->barinfo, 0, sizeof(ptd->barinfo));
- + for (i = 0, reg = PCI_MAPREG_START; reg < PCI_MAPREG_END; i++, reg += 4) {
- + if (!pci_mapreg_probe(pc, tag, reg, &type))
- + continue;
- + if (pci_mapreg_info(pc, tag, reg, type, &base, &size, NULL))
- + continue;
- + printf(" %d: %x %.8lx %.16lx\n", i, type, size, base);
- + ptd->barinfo[i].type = type;
- + ptd->barinfo[i].size = size;
- + ptd->barinfo[i].addr = base;
- + /* Skip next BAR for 64-bit type */
- + if (type & PCI_MAPREG_MEM_TYPE_64BIT) {
- + reg += 4;
- + i++;
- + }
- + }
- +
- + /* don't support if mmio and no domain? */
- + did = 0xdeadcafe;
- + dom = _iommu_domain(0, ptd->bus, ptd->dev, ptd->func, &did);
- + printf("domain is: %p:%x\n", dom, did);
- + if (!dom) {
- + return (ENODEV);
- + }
- + /* Map VMM DMA to iommu */
- + vm = SLIST_FIRST(&vmm_softc->vm_list);
- + if (vm != NULL) {
- + paddr_t pa;
- +
- + for (i = 0; i < vm->vm_nmemranges; i++) {
- + printf("mapping va:%lx pa:%lx\n", vm->vm_memranges[i].vmr_va, pa);
- + _iommu_map(dom,
- + vm->vm_memranges[i].vmr_va,
- + vm->vm_memranges[i].vmr_gpa,
- + vm->vm_memranges[i].vmr_size);
- + }
- + }
- + /* Setup interrupt */
- + TAILQ_FOREACH(ppt, &vppts, next) {
- + if (ppt->tag == tag) {
- + if (!ppt->cookie) {
- + ppt->cookie = pci_intr_establish(ppt->pc, ppt->ih, IPL_BIO, vmm_intr,
- + ppt, "ppt");
- + }
- + printf("Establish intr : %p\n", ppt->cookie);
- + ppt->pending = 0;
- + }
- + }
- + return 0;
- +}
- +
- /*
- * vmm_enabled
- *
- @@ -506,7 +760,18 @@ vmmioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
- case VMM_IOC_WRITEVMPARAMS:
- ret = vm_rwvmparams((struct vm_rwvmparams_params *)data, 1);
- break;
- -
- + case VMM_IOC_BARINFO:
- + ret = vm_getbar((struct vm_ptdpci *)data);
- + break;
- + case VMM_IOC_GETINTR:
- + ret = vm_getintr((struct vm_ptdpci *)data);
- + break;
- + case VMM_IOC_PCIIO:
- + ret = vm_pciio((struct vm_pciio *)data);
- + break;
- + case VMM_IOC_PIO:
- + ret = vm_pio((struct vm_pio *)data);
- + break;
- default:
- DPRINTF("%s: unknown ioctl code 0x%lx\n", __func__, cmd);
- ret = ENOTTY;
- @@ -515,6 +780,13 @@ vmmioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
- return (ret);
- }
- +/* MMAP any address (TODO: fixme) for mapping BAR */
- +paddr_t
- +vmmmmap(dev_t dev, off_t off, int prot)
- +{
- + return off;
- +}
- +
- /*
- * pledge_ioctl_vmm
- *
- @@ -541,6 +813,10 @@ pledge_ioctl_vmm(struct proc *p, long com)
- case VMM_IOC_MPROTECT_EPT:
- case VMM_IOC_READVMPARAMS:
- case VMM_IOC_WRITEVMPARAMS:
- + case VMM_IOC_BARINFO:
- + case VMM_IOC_PCIIO:
- + case VMM_IOC_PIO:
- + case VMM_IOC_GETINTR:
- return (0);
- }
- @@ -558,6 +834,34 @@ vmmclose(dev_t dev, int flag, int mode, struct proc *p)
- return 0;
- }
- +/*
- + * vm_find_vcpu
- + *
- + * Lookup VMM VCPU by ID number
- + *
- + * Parameters:
- + * vm: vm structure
- + * id: index id of vcpu
- + *
- + * Returns pointer to vcpu structure if successful, NULL otherwise
- + */
- +static struct vcpu *
- +vm_find_vcpu(struct vm *vm, uint32_t id)
- +{
- + struct vcpu *vcpu;
- +
- + if (vm == NULL)
- + return NULL;
- + rw_enter_read(&vm->vm_vcpu_lock);
- + SLIST_FOREACH(vcpu, &vm->vm_vcpu_list, vc_vcpu_link) {
- + if (vcpu->vc_id == id)
- + break;
- + }
- + rw_exit_read(&vm->vm_vcpu_lock);
- + return vcpu;
- +}
- +
- +
- /*
- * vm_resetcpu
- *
- @@ -591,12 +895,7 @@ vm_resetcpu(struct vm_resetcpu_params *vrp)
- return (error);
- }
- - rw_enter_read(&vm->vm_vcpu_lock);
- - SLIST_FOREACH(vcpu, &vm->vm_vcpu_list, vc_vcpu_link) {
- - if (vcpu->vc_id == vrp->vrp_vcpu_id)
- - break;
- - }
- - rw_exit_read(&vm->vm_vcpu_lock);
- + vcpu = vm_find_vcpu(vm, vrp->vrp_vcpu_id);
- if (vcpu == NULL) {
- DPRINTF("%s: vcpu id %u of vm %u not found\n", __func__,
- @@ -657,12 +956,7 @@ vm_intr_pending(struct vm_intr_params *vip)
- return (error);
- }
- - rw_enter_read(&vm->vm_vcpu_lock);
- - SLIST_FOREACH(vcpu, &vm->vm_vcpu_list, vc_vcpu_link) {
- - if (vcpu->vc_id == vip->vip_vcpu_id)
- - break;
- - }
- - rw_exit_read(&vm->vm_vcpu_lock);
- + vcpu = vm_find_vcpu(vm, vip->vip_vcpu_id);
- rw_exit_read(&vmm_softc->vm_lock);
- if (vcpu == NULL)
- @@ -722,12 +1016,7 @@ vm_rwvmparams(struct vm_rwvmparams_params *vpp, int dir) {
- return (error);
- }
- - rw_enter_read(&vm->vm_vcpu_lock);
- - SLIST_FOREACH(vcpu, &vm->vm_vcpu_list, vc_vcpu_link) {
- - if (vcpu->vc_id == vpp->vpp_vcpu_id)
- - break;
- - }
- - rw_exit_read(&vm->vm_vcpu_lock);
- + vcpu = vm_find_vcpu(vm, vpp->vpp_vcpu_id);
- rw_exit_read(&vmm_softc->vm_lock);
- if (vcpu == NULL)
- @@ -786,12 +1075,7 @@ vm_rwregs(struct vm_rwregs_params *vrwp, int dir)
- return (error);
- }
- - rw_enter_read(&vm->vm_vcpu_lock);
- - SLIST_FOREACH(vcpu, &vm->vm_vcpu_list, vc_vcpu_link) {
- - if (vcpu->vc_id == vrwp->vrwp_vcpu_id)
- - break;
- - }
- - rw_exit_read(&vm->vm_vcpu_lock);
- + vcpu = vm_find_vcpu(vm, vrwp->vrwp_vcpu_id);
- rw_exit_read(&vmm_softc->vm_lock);
- if (vcpu == NULL)
- @@ -858,12 +1142,7 @@ vm_mprotect_ept(struct vm_mprotect_ept_params *vmep)
- return (ret);
- }
- - rw_enter_read(&vm->vm_vcpu_lock);
- - SLIST_FOREACH(vcpu, &vm->vm_vcpu_list, vc_vcpu_link) {
- - if (vcpu->vc_id == vmep->vmep_vcpu_id)
- - break;
- - }
- - rw_exit_read(&vm->vm_vcpu_lock);
- + vcpu = vm_find_vcpu(vm, vmep->vmep_vcpu_id);
- if (vcpu == NULL) {
- DPRINTF("%s: vcpu id %u of vm %u not found\n", __func__,
- @@ -1907,6 +2186,7 @@ vcpu_readregs_svm(struct vcpu *vcpu, uint64_t regmask,
- gprs[VCPU_REGS_R14] = vcpu->vc_gueststate.vg_r14;
- gprs[VCPU_REGS_R15] = vcpu->vc_gueststate.vg_r15;
- gprs[VCPU_REGS_RBP] = vcpu->vc_gueststate.vg_rbp;
- + gprs[VCPU_REGS_RAX] = vmcb->v_rax;
- gprs[VCPU_REGS_RIP] = vmcb->v_rip;
- gprs[VCPU_REGS_RSP] = vmcb->v_rsp;
- gprs[VCPU_REGS_RFLAGS] = vmcb->v_rflags;
- @@ -2186,6 +2466,7 @@ vcpu_writeregs_svm(struct vcpu *vcpu, uint64_t regmask,
- vcpu->vc_gueststate.vg_rbp = gprs[VCPU_REGS_RBP];
- vcpu->vc_gueststate.vg_rip = gprs[VCPU_REGS_RIP];
- + vmcb->v_rax = gprs[VCPU_REGS_RAX];
- vmcb->v_rip = gprs[VCPU_REGS_RIP];
- vmcb->v_rsp = gprs[VCPU_REGS_RSP];
- vmcb->v_rflags = gprs[VCPU_REGS_RFLAGS];
- @@ -5348,7 +5629,6 @@ vmm_get_guest_memtype(struct vm *vm, paddr_t gpa)
- struct vm_mem_range *vmr;
- if (gpa >= VMM_PCI_MMIO_BAR_BASE && gpa <= VMM_PCI_MMIO_BAR_END) {
- - DPRINTF("guest mmio access @ 0x%llx\n", (uint64_t)gpa);
- return (VMM_MEM_TYPE_REGULAR);
- }
- @@ -5457,6 +5737,11 @@ svm_fault_page(struct vcpu *vcpu, paddr_t gpa)
- fault_type = svm_get_guest_faulttype(vmcb);
- + vcpu->vc_exit.vee.vee_gpa = gpa;
- + if ((gpa >= VMM_PCI_MMIO_BAR_BASE && gpa <= VMM_PCI_MMIO_BAR_END) || fault_type == VM_FAULT_PROTECT) {
- + vcpu->vc_exit.vee.vee_fault_type = VEE_FAULT_PROTECT;
- + return (EAGAIN);
- + }
- ret = uvm_fault(vcpu->vc_parent->vm_map, gpa, fault_type,
- PROT_READ | PROT_WRITE | PROT_EXEC);
- if (ret)
- @@ -5517,7 +5802,8 @@ vmx_fault_page(struct vcpu *vcpu, paddr_t gpa)
- return (EINVAL);
- }
- - if (fault_type == VM_FAULT_PROTECT) {
- + vcpu->vc_exit.vee.vee_gpa = gpa;
- + if ((gpa >= VMM_PCI_MMIO_BAR_BASE && gpa <= VMM_PCI_MMIO_BAR_END) || fault_type == VM_FAULT_PROTECT) {
- vcpu->vc_exit.vee.vee_fault_type = VEE_FAULT_PROTECT;
- return (EAGAIN);
- }
- diff --git a/sys/arch/amd64/conf/GENERIC b/sys/arch/amd64/conf/GENERIC
- index 2c49f91a1..6f4f6b74c 100644
- --- a/sys/arch/amd64/conf/GENERIC
- +++ b/sys/arch/amd64/conf/GENERIC
- @@ -45,6 +45,7 @@ acpibtn* at acpi?
- acpicpu* at acpi?
- acpicmos* at acpi?
- acpidock* at acpi?
- +acpidmar0 at acpi?
- acpiec* at acpi?
- acpipci* at acpi?
- acpiprt* at acpi?
- @@ -379,7 +380,7 @@ drm0 at radeondrm? primary 1
- drm* at radeondrm?
- wsdisplay0 at radeondrm? primary 1
- wsdisplay* at radeondrm? mux -1
- -amdgpu* at pci?
- +amdgpu* at pci? disable
- drm0 at amdgpu? primary 1
- drm* at amdgpu?
- wsdisplay0 at amdgpu? primary 1
- diff --git a/sys/arch/amd64/conf/RAMDISK_CD b/sys/arch/amd64/conf/RAMDISK_CD
- index 91022751e..c87cea352 100644
- --- a/sys/arch/amd64/conf/RAMDISK_CD
- +++ b/sys/arch/amd64/conf/RAMDISK_CD
- @@ -48,6 +48,7 @@ sdhc* at acpi?
- acpihve* at acpi?
- chvgpio* at acpi?
- glkgpio* at acpi?
- +acpidmar0 at acpi?
- mpbios0 at bios0
- diff --git a/sys/arch/amd64/include/pci_machdep.h b/sys/arch/amd64/include/pci_machdep.h
- index bc295cc22..c725bdc73 100644
- --- a/sys/arch/amd64/include/pci_machdep.h
- +++ b/sys/arch/amd64/include/pci_machdep.h
- @@ -91,7 +91,8 @@ void *pci_intr_establish_cpu(pci_chipset_tag_t, pci_intr_handle_t,
- int, struct cpu_info *,
- int (*)(void *), void *, const char *);
- void pci_intr_disestablish(pci_chipset_tag_t, void *);
- -#define pci_probe_device_hook(c, a) (0)
- +int pci_probe_device_hook(pci_chipset_tag_t,
- + struct pci_attach_args *);
- void pci_dev_postattach(struct device *, struct pci_attach_args *);
- diff --git a/sys/arch/amd64/include/vmmvar.h b/sys/arch/amd64/include/vmmvar.h
- index 4990a5c53..ffbc74528 100644
- --- a/sys/arch/amd64/include/vmmvar.h
- +++ b/sys/arch/amd64/include/vmmvar.h
- @@ -32,6 +32,7 @@
- #define VMM_MAX_VCPUS_PER_VM 64
- #define VMM_MAX_VM_MEM_SIZE 32768
- #define VMM_MAX_NICS_PER_VM 4
- +#define VMM_MAX_PCI_PTHRU 4
- #define VMM_PCI_MMIO_BAR_BASE 0xF0000000ULL
- #define VMM_PCI_MMIO_BAR_END 0xFFFFFFFFULL
- @@ -359,6 +360,7 @@ struct vm_exit_inout {
- */
- struct vm_exit_eptviolation {
- uint8_t vee_fault_type;
- + uint64_t vee_gpa;
- };
- /*
- @@ -480,6 +482,9 @@ struct vm_create_params {
- /* Output parameter from VMM_IOC_CREATE */
- uint32_t vcp_id;
- +
- + size_t vcp_npcis;
- + uint32_t vcp_pcis[VMM_MAX_PCI_PTHRU];
- };
- struct vm_run_params {
- @@ -578,6 +583,47 @@ struct vm_mprotect_ept_params {
- int vmep_prot;
- };
- +struct vm_pciio {
- + /* input */
- + uint32_t seg;
- + uint32_t bus;
- + uint32_t dev;
- + uint32_t func;
- +
- + uint32_t dir;
- + uint32_t reg;
- +
- + /* output */
- + uint32_t val;
- +};
- +
- +#define MAXBAR 6
- +struct vm_pio {
- + uint32_t type;
- + uint32_t dir;
- + uint32_t size;
- + uint32_t base;
- + uint64_t data;
- +};
- +
- +/* Passthrough PCI device structure */
- +struct vm_ptdpci {
- + uint8_t bus;
- + uint8_t dev;
- + uint8_t func;
- +
- + uint8_t id;
- + uint32_t pending;
- + uint32_t flags;
- +
- + struct {
- + uint32_t type;
- + uint32_t size;
- + uint64_t addr;
- + void *va;
- + } barinfo[MAXBAR];
- +};
- +
- /* IOCTL definitions */
- #define VMM_IOC_CREATE _IOWR('V', 1, struct vm_create_params) /* Create VM */
- #define VMM_IOC_RUN _IOWR('V', 2, struct vm_run_params) /* Run VCPU */
- @@ -594,6 +640,11 @@ struct vm_mprotect_ept_params {
- /* Control the protection of ept pages*/
- #define VMM_IOC_MPROTECT_EPT _IOW('V', 11, struct vm_mprotect_ept_params)
- +#define VMM_IOC_BARINFO _IOWR('V', 12, struct vm_ptdpci)
- +#define VMM_IOC_GETINTR _IOWR('V', 13, struct vm_ptdpci)
- +#define VMM_IOC_PCIIO _IOWR('V', 14, struct vm_pciio)
- +#define VMM_IOC_PIO _IOWR('V', 15, struct vm_pio)
- +
- /* CPUID masks */
- /*
- * clone host capabilities minus:
- diff --git a/sys/arch/amd64/pci/pci_machdep.c b/sys/arch/amd64/pci/pci_machdep.c
- index cf4e835de..e9902a231 100644
- --- a/sys/arch/amd64/pci/pci_machdep.c
- +++ b/sys/arch/amd64/pci/pci_machdep.c
- @@ -89,6 +89,11 @@
- #include <machine/mpbiosvar.h>
- #endif
- +#include "acpi.h"
- +#if NACPI > 0
- +#include <dev/acpi/acpidmar.h>
- +#endif
- +
- /*
- * Memory Mapped Configuration space access.
- *
- @@ -797,7 +802,19 @@ pci_init_extents(void)
- }
- }
- -#include "acpi.h"
- +extern void vmm_mapintr(pci_chipset_tag_t pc, struct pci_attach_args *pa);
- +
- +int
- +pci_probe_device_hook(pci_chipset_tag_t pc, struct pci_attach_args *pa)
- +{
- +#if NACPI > 0
- + if (acpidmar_sc)
- + acpidmar_pci_hook(pc, pa);
- +#endif
- + vmm_mapintr(pc, pa);
- + return 0;
- +}
- +
- #if NACPI > 0
- void acpi_pci_match(struct device *, struct pci_attach_args *);
- pcireg_t acpi_pci_min_powerstate(pci_chipset_tag_t, pcitag_t);
- diff --git a/sys/dev/acpi/acpi.c b/sys/dev/acpi/acpi.c
- index a6239198e..484f03fa1 100644
- --- a/sys/dev/acpi/acpi.c
- +++ b/sys/dev/acpi/acpi.c
- @@ -49,6 +49,7 @@
- #include <dev/acpi/amltypes.h>
- #include <dev/acpi/acpidev.h>
- #include <dev/acpi/dsdt.h>
- +#include <dev/acpi/acpidmar.h>
- #include <dev/wscons/wsdisplayvar.h>
- #include <dev/pci/pcidevs.h>
- @@ -2448,6 +2449,9 @@ acpi_sleep_pm(struct acpi_softc *sc, int state)
- sc->sc_fadt->pm2_cnt_blk && sc->sc_fadt->pm2_cnt_len)
- acpi_write_pmreg(sc, ACPIREG_PM2_CNT, 0, ACPI_PM2_ARB_DIS);
- + if (acpidmar_sc)
- + acpidmar_sw(DVACT_SUSPEND);
- +
- /* Write SLP_TYPx values */
- rega = acpi_read_pmreg(sc, ACPIREG_PM1A_CNT, 0);
- regb = acpi_read_pmreg(sc, ACPIREG_PM1B_CNT, 0);
- @@ -2483,6 +2487,9 @@ acpi_resume_pm(struct acpi_softc *sc, int fromstate)
- {
- uint16_t rega, regb, en;
- + if (acpidmar_sc)
- + acpidmar_sw(DVACT_RESUME);
- +
- /* Write SLP_TYPx values */
- rega = acpi_read_pmreg(sc, ACPIREG_PM1A_CNT, 0);
- regb = acpi_read_pmreg(sc, ACPIREG_PM1B_CNT, 0);
- diff --git a/sys/dev/acpi/acpidmar.c b/sys/dev/acpi/acpidmar.c
- new file mode 100644
- index 000000000..c384f4a15
- --- /dev/null
- +++ b/sys/dev/acpi/acpidmar.c
- @@ -0,0 +1,3051 @@
- +/*
- + * Copyright (c) 2015 Jordan Hargrave <[email protected]>
- + *
- + * Permission to use, copy, modify, and distribute this software for any
- + * purpose with or without fee is hereby granted, provided that the above
- + * copyright notice and this permission notice appear in all copies.
- + *
- + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- + */
- +
- +#include <sys/param.h>
- +#include <sys/systm.h>
- +#include <sys/kernel.h>
- +#include <sys/device.h>
- +#include <sys/malloc.h>
- +#include <sys/queue.h>
- +#include <sys/types.h>
- +#include <sys/mbuf.h>
- +#include <sys/proc.h>
- +
- +#include <uvm/uvm_extern.h>
- +
- +#include <machine/apicvar.h>
- +#include <machine/biosvar.h>
- +#include <machine/cpuvar.h>
- +#include <machine/bus.h>
- +
- +#include <dev/acpi/acpireg.h>
- +#include <dev/acpi/acpivar.h>
- +#include <dev/acpi/acpidev.h>
- +#include <dev/acpi/amltypes.h>
- +#include <dev/acpi/dsdt.h>
- +
- +#include <uvm/uvm_extern.h>
- +
- +#include <machine/i8259.h>
- +#include <machine/i82093reg.h>
- +#include <machine/i82093var.h>
- +#include <machine/i82489reg.h>
- +#include <machine/i82489var.h>
- +
- +#include <machine/mpbiosvar.h>
- +
- +#include <dev/pci/pcireg.h>
- +#include <dev/pci/pcivar.h>
- +#include <dev/pci/pcidevs.h>
- +#include <dev/pci/ppbreg.h>
- +
- +#include "ioapic.h"
- +
- +#include "acpidmar.h"
- +#include "amd_iommu.h"
- +
- +#define dprintf(x...)
- +
- +#ifdef DDB
- +int acpidmar_ddb = 0;
- +#endif
- +
- +int intel_iommu_gfx_mapped = 0;
- +int force_cm = 1;
- +
- +void showahci(void *);
- +
- +/* Page Table Entry per domain */
- +struct iommu_softc;
- +
- +static inline int
- +mksid(int b, int d, int f)
- +{
- + return (b << 8) + (d << 3) + f;
- +}
- +
- +static inline int
- +sid_devfn(int sid)
- +{
- + return sid & 0xff;
- +}
- +
- +static inline int
- +sid_bus(int sid)
- +{
- + return (sid >> 8) & 0xff;
- +}
- +
- +static inline int
- +sid_dev(int sid)
- +{
- + return (sid >> 3) & 0x1f;
- +}
- +
- +static inline int
- +sid_fun(int sid)
- +{
- + return (sid >> 0) & 0x7;
- +}
- +
- +/* Page Table Entry per domain */
- +static struct ivhd_dte hwdte[65536] __aligned(PAGE_SIZE);
- +
- +/* Alias mapping */
- +#define ALIAS_VALID 0x10000
- +static int sid_alias[65536];
- +
- +struct domain_dev {
- + int sid;
- + int sec;
- + int sub;
- + TAILQ_ENTRY(domain_dev) link;
- +};
- +
- +struct domain {
- + struct iommu_softc *iommu;
- + int did;
- + int gaw;
- + struct pte_entry *pte;
- + paddr_t ptep;
- + struct bus_dma_tag dmat;
- + int flag;
- +
- + struct mutex exlck;
- + char exname[32];
- + struct extent *iovamap;
- + TAILQ_HEAD(,domain_dev) devices;
- + TAILQ_ENTRY(domain) link;
- +};
- +
- +#define DOM_DEBUG 0x1
- +#define DOM_NOMAP 0x2
- +
- +struct dmar_devlist {
- + int type;
- + int bus;
- + int ndp;
- + struct acpidmar_devpath *dp;
- + TAILQ_ENTRY(dmar_devlist) link;
- +};
- +
- +TAILQ_HEAD(devlist_head, dmar_devlist);
- +
- +struct ivhd_devlist {
- + int start_id;
- + int end_id;
- + int cfg;
- + TAILQ_ENTRY(ivhd_devlist) link;
- +};
- +
- +struct rmrr_softc {
- + TAILQ_ENTRY(rmrr_softc) link;
- + struct devlist_head devices;
- + int segment;
- + uint64_t start;
- + uint64_t end;
- +};
- +
- +struct atsr_softc {
- + TAILQ_ENTRY(atsr_softc) link;
- + struct devlist_head devices;
- + int segment;
- + int flags;
- +};
- +
- +struct iommu_pic {
- + struct pic pic;
- + struct iommu_softc *iommu;
- +};
- +
- +#define IOMMU_FLAGS_CATCHALL 0x1
- +#define IOMMU_FLAGS_BAD 0x2
- +#define IOMMU_FLAGS_SUSPEND 0x4
- +
- +struct iommu_softc {
- + TAILQ_ENTRY(iommu_softc)link;
- + struct devlist_head devices;
- + int id;
- + int flags;
- + int segment;
- +
- + struct mutex reg_lock;
- +
- + bus_space_tag_t iot;
- + bus_space_handle_t ioh;
- +
- + uint64_t cap;
- + uint64_t ecap;
- + uint32_t gcmd;
- +
- + int mgaw;
- + int agaw;
- + int ndoms;
- +
- + struct root_entry *root;
- + struct context_entry *ctx[256];
- +
- + void *intr;
- + struct iommu_pic pic;
- + int fedata;
- + uint64_t feaddr;
- + uint64_t rtaddr;
- +
- + // Queued Invalidation
- + int qi_head;
- + int qi_tail;
- + paddr_t qip;
- + struct qi_entry *qi;
- +
- + struct domain *unity;
- + TAILQ_HEAD(,domain) domains;
- +
- + // AMD iommu
- + struct ivhd_dte *dte;
- + void *cmd_tbl;
- + void *evt_tbl;
- +};
- +
- +static inline int iommu_bad(struct iommu_softc *sc)
- +{
- + return (sc->flags & IOMMU_FLAGS_BAD);
- +}
- +
- +static inline int iommu_enabled(struct iommu_softc *sc)
- +{
- + if (sc->dte) {
- + return 1;
- + }
- + return (sc->gcmd & GCMD_TE);
- +}
- +
- +struct acpidmar_softc {
- + struct device sc_dev;
- +
- + pci_chipset_tag_t sc_pc;
- + bus_space_tag_t sc_memt;
- + int sc_haw;
- + int sc_flags;
- +
- + TAILQ_HEAD(,iommu_softc)sc_drhds;
- + TAILQ_HEAD(,rmrr_softc) sc_rmrrs;
- + TAILQ_HEAD(,atsr_softc) sc_atsrs;
- +};
- +
- +int acpidmar_activate(struct device *, int);
- +int acpidmar_match(struct device *, void *, void *);
- +void acpidmar_attach(struct device *, struct device *, void *);
- +struct domain *acpidmar_pci_attach(struct acpidmar_softc *, int, int, int);
- +
- +struct cfattach acpidmar_ca = {
- + sizeof(struct acpidmar_softc), acpidmar_match, acpidmar_attach,
- +};
- +
- +struct cfdriver acpidmar_cd = {
- + NULL, "acpidmar", DV_DULL
- +};
- +
- +struct acpidmar_softc *acpidmar_sc;
- +int acpidmar_intr(void *);
- +
- +#define DID_UNITY 0x1
- +
- +void _dumppte(struct pte_entry *, int, vaddr_t);
- +
- +struct domain *domain_create(struct iommu_softc *, int);
- +struct domain *domain_lookup(struct acpidmar_softc *, int, int);
- +
- +void domain_unload_map(struct domain *, bus_dmamap_t);
- +void domain_load_map(struct domain *, bus_dmamap_t, int, int, const char *);
- +
- +void (*domain_map_page)(struct domain *, vaddr_t, paddr_t, uint64_t);
- +void domain_map_page_amd(struct domain *, vaddr_t, paddr_t, uint64_t);
- +void domain_map_page_intel(struct domain *, vaddr_t, paddr_t, uint64_t);
- +void domain_map_pthru(struct domain *, paddr_t, paddr_t);
- +
- +void acpidmar_pci_hook(pci_chipset_tag_t, struct pci_attach_args *);
- +void acpidmar_parse_devscope(union acpidmar_entry *, int, int,
- + struct devlist_head *);
- +int acpidmar_match_devscope(struct devlist_head *, pci_chipset_tag_t, int);
- +
- +void acpidmar_init(struct acpidmar_softc *, struct acpi_dmar *);
- +void acpidmar_drhd(struct acpidmar_softc *, union acpidmar_entry *);
- +void acpidmar_rmrr(struct acpidmar_softc *, union acpidmar_entry *);
- +void acpidmar_atsr(struct acpidmar_softc *, union acpidmar_entry *);
- +void acpiivrs_init(struct acpidmar_softc *, struct acpi_ivrs *);
- +
- +void *acpidmar_intr_establish(void *, int, int (*)(void *), void *,
- + const char *);
- +
- +void iommu_writel(struct iommu_softc *, int, uint32_t);
- +uint32_t iommu_readl(struct iommu_softc *, int);
- +void iommu_writeq(struct iommu_softc *, int, uint64_t);
- +uint64_t iommu_readq(struct iommu_softc *, int);
- +void iommu_showfault(struct iommu_softc *, int,
- + struct fault_entry *);
- +void iommu_showcfg(struct iommu_softc *, int);
- +
- +int iommu_init(struct acpidmar_softc *, struct iommu_softc *,
- + struct acpidmar_drhd *);
- +int iommu_enable_translation(struct iommu_softc *, int);
- +void iommu_enable_qi(struct iommu_softc *, int);
- +void iommu_flush_cache(struct iommu_softc *, void *, size_t);
- +void *iommu_alloc_page(struct iommu_softc *, paddr_t *);
- +void iommu_flush_write_buffer(struct iommu_softc *);
- +void iommu_issue_qi(struct iommu_softc *, struct qi_entry *);
- +
- +void iommu_flush_ctx(struct iommu_softc *, int, int, int, int);
- +void iommu_flush_ctx_qi(struct iommu_softc *, int, int, int, int);
- +void iommu_flush_tlb(struct iommu_softc *, int, int);
- +void iommu_flush_tlb_qi(struct iommu_softc *, int, int);
- +
- +void iommu_set_rtaddr(struct iommu_softc *, paddr_t);
- +void acpidmar_sw(int);
- +
- +const char *dmar_bdf(int);
- +
- +const char *
- +dmar_bdf(int sid)
- +{
- + static char bdf[32];
- +
- + snprintf(bdf, sizeof(bdf), "%.4x:%.2x:%.2x.%x", 0,
- + sid_bus(sid), sid_dev(sid), sid_fun(sid));
- +
- + return (bdf);
- +}
- +
- +/* busdma */
- +static int dmar_dmamap_create(bus_dma_tag_t, bus_size_t, int, bus_size_t,
- + bus_size_t, int, bus_dmamap_t *);
- +static void dmar_dmamap_destroy(bus_dma_tag_t, bus_dmamap_t);
- +static int dmar_dmamap_load(bus_dma_tag_t, bus_dmamap_t, void *, bus_size_t,
- + struct proc *, int);
- +static int dmar_dmamap_load_mbuf(bus_dma_tag_t, bus_dmamap_t, struct mbuf *,
- + int);
- +static int dmar_dmamap_load_uio(bus_dma_tag_t, bus_dmamap_t, struct uio *, int);
- +static int dmar_dmamap_load_raw(bus_dma_tag_t, bus_dmamap_t,
- + bus_dma_segment_t *, int, bus_size_t, int);
- +static void dmar_dmamap_unload(bus_dma_tag_t, bus_dmamap_t);
- +static void dmar_dmamap_sync(bus_dma_tag_t, bus_dmamap_t, bus_addr_t,
- + bus_size_t, int);
- +static int dmar_dmamem_alloc(bus_dma_tag_t, bus_size_t, bus_size_t, bus_size_t,
- + bus_dma_segment_t *, int, int *, int);
- +static void dmar_dmamem_free(bus_dma_tag_t, bus_dma_segment_t *, int);
- +static int dmar_dmamem_map(bus_dma_tag_t, bus_dma_segment_t *, int, size_t,
- + caddr_t *, int);
- +static void dmar_dmamem_unmap(bus_dma_tag_t, caddr_t, size_t);
- +static paddr_t dmar_dmamem_mmap(bus_dma_tag_t, bus_dma_segment_t *, int, off_t,
- + int, int);
- +
- +static void dmar_dumpseg(bus_dma_tag_t, int, bus_dma_segment_t *, const char *);
- +const char *dom_bdf(struct domain *dom);
- +void domain_map_check(struct domain *dom);
- +
- +struct pte_entry *pte_lvl(struct iommu_softc *iommu, struct pte_entry *npte, vaddr_t va, int shift, uint64_t flags);
- +int ivhd_poll_events(struct iommu_softc *iommu);
- +void ivhd_showit(struct iommu_softc *);
- +void ivhd_showdte(void);
- +void ivhd_showcmd(struct iommu_softc *);
- +void ivhd_showevt(struct iommu_softc *);
- +
- +static inline int
- +debugme(struct domain *dom)
- +{
- + return 0;
- + return (dom->flag & DOM_DEBUG);
- +}
- +
- +void
- +domain_map_check(struct domain *dom)
- +{
- + struct iommu_softc *iommu;
- + struct domain_dev *dd;
- + struct context_entry *ctx;
- + int v;
- +
- + iommu = dom->iommu;
- + TAILQ_FOREACH(dd, &dom->devices, link) {
- + acpidmar_pci_attach(acpidmar_sc, iommu->segment, dd->sid, 1);
- +
- + if (iommu->dte)
- + continue;
- +
- + /* Check if this is the first time we are mapped */
- + ctx = &iommu->ctx[sid_bus(dd->sid)][sid_devfn(dd->sid)];
- + v = context_user(ctx);
- + if (v != 0xA) {
- + printf(" map: %.4x:%.2x:%.2x.%x iommu:%d did:%.4x\n",
- + iommu->segment,
- + sid_bus(dd->sid),
- + sid_dev(dd->sid),
- + sid_fun(dd->sid),
- + iommu->id,
- + dom->did);
- + context_set_user(ctx, 0xA);
- + }
- + }
- +}
- +
- +/* Map a single page as passthrough - used for DRM */
- +void
- +dmar_ptmap(bus_dma_tag_t tag, bus_addr_t addr)
- +{
- + struct domain *dom = tag->_cookie;
- +
- + if (!acpidmar_sc)
- + return;
- + domain_map_check(dom);
- + domain_map_page(dom, addr, addr, PTE_P | PTE_R | PTE_W);
- +}
- +
- +/* Map a range of pages 1:1 */
- +void
- +domain_map_pthru(struct domain *dom, paddr_t start, paddr_t end)
- +{
- + domain_map_check(dom);
- + while (start < end) {
- + domain_map_page(dom, start, start, PTE_P | PTE_R | PTE_W);
- + start += VTD_PAGE_SIZE;
- + }
- +}
- +
- +/* Map a single paddr to IOMMU paddr */
- +void
- +domain_map_page_intel(struct domain *dom, vaddr_t va, paddr_t pa, uint64_t flags)
- +{
- + paddr_t paddr;
- + struct pte_entry *pte, *npte;
- + int lvl, idx;
- + struct iommu_softc *iommu;
- +
- + iommu = dom->iommu;
- + /* Insert physical address into virtual address map
- + * XXX: could we use private pmap here?
- + * essentially doing a pmap_enter(map, va, pa, prot);
- + */
- +
- + /* Only handle 4k pages for now */
- + npte = dom->pte;
- + for (lvl = iommu->agaw - VTD_STRIDE_SIZE; lvl>= VTD_LEVEL0;
- + lvl -= VTD_STRIDE_SIZE) {
- + idx = (va >> lvl) & VTD_STRIDE_MASK;
- + pte = &npte[idx];
- + if (lvl == VTD_LEVEL0) {
- + /* Level 1: Page Table - add physical address */
- + pte->val = pa | flags;
- + iommu_flush_cache(iommu, pte, sizeof(*pte));
- + break;
- + } else if (!(pte->val & PTE_P)) {
- + /* Level N: Point to lower level table */
- + iommu_alloc_page(iommu, &paddr);
- + pte->val = paddr | PTE_P | PTE_R | PTE_W;
- + iommu_flush_cache(iommu, pte, sizeof(*pte));
- + }
- + npte = (void *)PMAP_DIRECT_MAP((pte->val & VTD_PTE_MASK));
- + }
- +}
- +
- +/* Map a single paddr to IOMMU paddr: AMD
- + * physical address breakdown into levels:
- + * xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx
- + * 5.55555555.44444444.43333333,33222222.22211111.1111----.--------
- + * mode:
- + * 000 = none shift
- + * 001 = 1 [21].12
- + * 010 = 2 [30].21
- + * 011 = 3 [39].30
- + * 100 = 4 [48].39
- + * 101 = 5 [57]
- + * 110 = 6
- + * 111 = reserved
- + */
- +struct pte_entry *pte_lvl(struct iommu_softc *iommu, struct pte_entry *pte, vaddr_t va,
- + int shift, uint64_t flags)
- +{
- + paddr_t paddr;
- + int idx;
- +
- + idx = (va >> shift) & VTD_STRIDE_MASK;
- + if (!(pte[idx].val & PTE_P)) {
- + /* Page Table entry is not present... create a new page entry */
- + iommu_alloc_page(iommu, &paddr);
- + pte[idx].val = paddr | flags;
- + iommu_flush_cache(iommu, &pte[idx], sizeof(pte[idx]));
- + }
- + return (void *)PMAP_DIRECT_MAP((pte[idx].val & PTE_PADDR_MASK));
- +}
- +
- +void
- +domain_map_page_amd(struct domain *dom, vaddr_t va, paddr_t pa, uint64_t flags)
- +{
- + struct pte_entry *pte;
- + struct iommu_softc *iommu;
- + int idx;
- +
- + iommu = dom->iommu;
- + /* Insert physical address into virtual address map
- + * XXX: could we use private pmap here?
- + * essentially doing a pmap_enter(map, va, pa, prot);
- + */
- +
- + /* Always assume AMD levels=4 */
- + /* 39 30 21 12 */
- + /* ---------|---------|---------|---------|------------ */
- + pte = dom->pte;
- + //pte = pte_lvl(iommu, pte, va, 39, PTE_NXTLVL(3) | PTE_IR | PTE_IW | PTE_P);
- + pte = pte_lvl(iommu, pte, va, 30, PTE_NXTLVL(2) | PTE_IR | PTE_IW | PTE_P);
- + pte = pte_lvl(iommu, pte, va, 21, PTE_NXTLVL(1) | PTE_IR | PTE_IW | PTE_P);
- + //pte = pte_lvl(iommu, pte, va, 12, PTE_NXTLVL(7) | PTE_IR | PTE_IW | PTE_P);
- +
- + if (flags)
- + flags = PTE_P | PTE_R | PTE_W | PTE_IW | PTE_IR | PTE_NXTLVL(0);
- +
- + /* Level 1: Page Table - add physical address */
- + idx = (va >> 12) & 0x1FF;
- + pte[idx].val = pa | flags;
- +
- + iommu_flush_cache(iommu, pte, sizeof(*pte));
- +}
- +
- +static void
- +dmar_dumpseg(bus_dma_tag_t tag, int nseg, bus_dma_segment_t *segs,
- + const char *lbl)
- +{
- + struct domain *dom = tag->_cookie;
- + int i;
- +
- + return;
- + if (!debugme(dom))
- + return;
- + printf("%s: %s\n", lbl, dom_bdf(dom));
- + for (i = 0; i < nseg; i++) {
- + printf(" %.16llx %.8x\n",
- + (uint64_t)segs[i].ds_addr,
- + (uint32_t)segs[i].ds_len);
- + }
- +}
- +
- +/* Unload mapping */
- +void
- +domain_unload_map(struct domain *dom, bus_dmamap_t dmam)
- +{
- + bus_dma_segment_t *seg;
- + paddr_t base, end, idx;
- + psize_t alen;
- + int i;
- +
- + if (iommu_bad(dom->iommu)) {
- + printf("unload map no iommu\n");
- + return;
- + }
- +
- + //acpidmar_intr(dom->iommu);
- + for (i = 0; i < dmam->dm_nsegs; i++) {
- + seg = &dmam->dm_segs[i];
- +
- + base = trunc_page(seg->ds_addr);
- + end = roundup(seg->ds_addr + seg->ds_len, VTD_PAGE_SIZE);
- + alen = end - base;
- +
- + if (debugme(dom)) {
- + printf(" va:%.16llx len:%x\n",
- + (uint64_t)base, (uint32_t)alen);
- + }
- +
- + /* Clear PTE */
- + for (idx = 0; idx < alen; idx += VTD_PAGE_SIZE)
- + domain_map_page(dom, base + idx, 0, 0);
- +
- + if (dom->flag & DOM_NOMAP) {
- + //printf("%s: nomap %.16llx\n", dom_bdf(dom), (uint64_t)base);
- + continue;
- + }
- +
- + mtx_enter(&dom->exlck);
- + if (extent_free(dom->iovamap, base, alen, EX_NOWAIT)) {
- + panic("domain_unload_map: extent_free");
- + }
- + mtx_leave(&dom->exlck);
- + }
- +}
- +
- +/* map.segs[x].ds_addr is modified to IOMMU virtual PA */
- +void
- +domain_load_map(struct domain *dom, bus_dmamap_t map, int flags, int pteflag, const char *fn)
- +{
- + bus_dma_segment_t *seg;
- + struct iommu_softc *iommu;
- + paddr_t base, end, idx;
- + psize_t alen;
- + u_long res;
- + int i;
- +
- + iommu = dom->iommu;
- + if (!iommu_enabled(iommu)) {
- + /* Lazy enable translation when required */
- + if (iommu_enable_translation(iommu, 1)) {
- + return;
- + }
- + }
- + domain_map_check(dom);
- + //acpidmar_intr(iommu);
- + for (i = 0; i < map->dm_nsegs; i++) {
- + seg = &map->dm_segs[i];
- +
- + base = trunc_page(seg->ds_addr);
- + end = roundup(seg->ds_addr + seg->ds_len, VTD_PAGE_SIZE);
- + alen = end - base;
- + res = base;
- +
- + if (dom->flag & DOM_NOMAP) {
- + goto nomap;
- + }
- +
- + /* Allocate DMA Virtual Address */
- + mtx_enter(&dom->exlck);
- + if (extent_alloc(dom->iovamap, alen, VTD_PAGE_SIZE, 0,
- + map->_dm_boundary, EX_NOWAIT, &res)) {
- + panic("domain_load_map: extent_alloc");
- + }
- + if (res == -1) {
- + panic("got -1 address\n");
- + }
- + mtx_leave(&dom->exlck);
- +
- + /* Reassign DMA address */
- + seg->ds_addr = res | (seg->ds_addr & VTD_PAGE_MASK);
- +nomap:
- + if (debugme(dom)) {
- + printf(" LOADMAP: %.16llx %x => %.16llx\n",
- + (uint64_t)seg->ds_addr, (uint32_t)seg->ds_len,
- + (uint64_t)res);
- + }
- + for (idx = 0; idx < alen; idx += VTD_PAGE_SIZE) {
- + domain_map_page(dom, res + idx, base + idx,
- + PTE_P | pteflag);
- + }
- + }
- + if ((iommu->cap & CAP_CM) || force_cm) {
- + iommu_flush_tlb(iommu, IOTLB_DOMAIN, dom->did);
- + } else {
- + iommu_flush_write_buffer(iommu);
- + }
- +}
- +
- +const char *
- +dom_bdf(struct domain *dom)
- +{
- + struct domain_dev *dd;
- + static char mmm[48];
- +
- + dd = TAILQ_FIRST(&dom->devices);
- + snprintf(mmm, sizeof(mmm), "%s iommu:%d did:%.4x%s",
- + dmar_bdf(dd->sid), dom->iommu->id, dom->did,
- + dom->did == DID_UNITY ? " [unity]" : "");
- + return (mmm);
- +}
- +
- +/* Bus DMA Map functions */
- +static int
- +dmar_dmamap_create(bus_dma_tag_t tag, bus_size_t size, int nsegments,
- + bus_size_t maxsegsz, bus_size_t boundary, int flags, bus_dmamap_t *dmamp)
- +{
- + int rc;
- +
- + rc = _bus_dmamap_create(tag, size, nsegments, maxsegsz, boundary,
- + flags, dmamp);
- + if (!rc) {
- + dmar_dumpseg(tag, (*dmamp)->dm_nsegs, (*dmamp)->dm_segs,
- + __FUNCTION__);
- + }
- + return (rc);
- +}
- +
- +static void
- +dmar_dmamap_destroy(bus_dma_tag_t tag, bus_dmamap_t dmam)
- +{
- + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs, __FUNCTION__);
- + _bus_dmamap_destroy(tag, dmam);
- +}
- +
- +static int
- +dmar_dmamap_load(bus_dma_tag_t tag, bus_dmamap_t dmam, void *buf,
- + bus_size_t buflen, struct proc *p, int flags)
- +{
- + struct domain *dom = tag->_cookie;
- + int rc;
- +
- + rc = _bus_dmamap_load(tag, dmam, buf, buflen, p, flags);
- + if (!rc) {
- + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
- + __FUNCTION__);
- + domain_load_map(dom, dmam, flags, PTE_R|PTE_W, __FUNCTION__);
- + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
- + __FUNCTION__);
- + }
- + return (rc);
- +}
- +
- +static int
- +dmar_dmamap_load_mbuf(bus_dma_tag_t tag, bus_dmamap_t dmam, struct mbuf *chain,
- + int flags)
- +{
- + struct domain *dom = tag->_cookie;
- + int rc;
- +
- + rc = _bus_dmamap_load_mbuf(tag, dmam, chain, flags);
- + if (!rc) {
- + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
- + __FUNCTION__);
- + domain_load_map(dom, dmam, flags, PTE_R|PTE_W,__FUNCTION__);
- + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
- + __FUNCTION__);
- + }
- + return (rc);
- +}
- +
- +static int
- +dmar_dmamap_load_uio(bus_dma_tag_t tag, bus_dmamap_t dmam, struct uio *uio,
- + int flags)
- +{
- + struct domain *dom = tag->_cookie;
- + int rc;
- +
- + rc = _bus_dmamap_load_uio(tag, dmam, uio, flags);
- + if (!rc) {
- + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
- + __FUNCTION__);
- + domain_load_map(dom, dmam, flags, PTE_R|PTE_W, __FUNCTION__);
- + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
- + __FUNCTION__);
- + }
- + return (rc);
- +}
- +
- +static int
- +dmar_dmamap_load_raw(bus_dma_tag_t tag, bus_dmamap_t dmam,
- + bus_dma_segment_t *segs, int nsegs, bus_size_t size, int flags)
- +{
- + struct domain *dom = tag->_cookie;
- + int rc;
- +
- + rc = _bus_dmamap_load_raw(tag, dmam, segs, nsegs, size, flags);
- + if (!rc) {
- + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
- + __FUNCTION__);
- + domain_load_map(dom, dmam, flags, PTE_R|PTE_W, __FUNCTION__);
- + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
- + __FUNCTION__);
- + }
- + return (rc);
- +}
- +
- +static void
- +dmar_dmamap_unload(bus_dma_tag_t tag, bus_dmamap_t dmam)
- +{
- + struct domain *dom = tag->_cookie;
- +
- + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs, __FUNCTION__);
- + domain_unload_map(dom, dmam);
- + _bus_dmamap_unload(tag, dmam);
- +}
- +
- +static void
- +dmar_dmamap_sync(bus_dma_tag_t tag, bus_dmamap_t dmam, bus_addr_t offset,
- + bus_size_t len, int ops)
- +{
- +#if 0
- + struct domain *dom = tag->_cookie;
- + //int flag;
- +
- + flag = PTE_P;
- + //acpidmar_intr(dom->iommu);
- + if (ops == BUS_DMASYNC_PREREAD) {
- + /* make readable */
- + flag |= PTE_R;
- + }
- + else if (ops == BUS_DMASYNC_PREWRITE) {
- + /* make writeable */
- + flag |= PTE_W;
- + }
- + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs, __FUNCTION__);
- +#endif
- + _bus_dmamap_sync(tag, dmam, offset, len, ops);
- +}
- +
- +static int
- +dmar_dmamem_alloc(bus_dma_tag_t tag, bus_size_t size, bus_size_t alignment,
- + bus_size_t boundary, bus_dma_segment_t *segs, int nsegs, int *rsegs,
- + int flags)
- +{
- + int rc;
- +
- + rc = _bus_dmamem_alloc(tag, size, alignment, boundary, segs, nsegs,
- + rsegs, flags);
- + if (!rc) {
- + dmar_dumpseg(tag, *rsegs, segs, __FUNCTION__);
- + }
- + return (rc);
- +}
- +
- +static void
- +dmar_dmamem_free(bus_dma_tag_t tag, bus_dma_segment_t *segs, int nsegs)
- +{
- + dmar_dumpseg(tag, nsegs, segs, __FUNCTION__);
- + _bus_dmamem_free(tag, segs, nsegs);
- +}
- +
- +static int
- +dmar_dmamem_map(bus_dma_tag_t tag, bus_dma_segment_t *segs, int nsegs,
- + size_t size, caddr_t *kvap, int flags)
- +{
- + dmar_dumpseg(tag, nsegs, segs, __FUNCTION__);
- + return (_bus_dmamem_map(tag, segs, nsegs, size, kvap, flags));
- +}
- +
- +static void
- +dmar_dmamem_unmap(bus_dma_tag_t tag, caddr_t kva, size_t size)
- +{
- + struct domain *dom = tag->_cookie;
- +
- + if (debugme(dom)) {
- + printf("dmamap_unmap: %s\n", dom_bdf(dom));
- + }
- + _bus_dmamem_unmap(tag, kva, size);
- +}
- +
- +static paddr_t
- +dmar_dmamem_mmap(bus_dma_tag_t tag, bus_dma_segment_t *segs, int nsegs,
- + off_t off, int prot, int flags)
- +{
- + dmar_dumpseg(tag, nsegs, segs, __FUNCTION__);
- + return (_bus_dmamem_mmap(tag, segs, nsegs, off, prot, flags));
- +}
- +
- +/*===================================
- + * IOMMU code
- + *===================================*/
- +
- +/* Intel: Set Context Root Address */
- +void
- +iommu_set_rtaddr(struct iommu_softc *iommu, paddr_t paddr)
- +{
- + int i, sts;
- +
- + mtx_enter(&iommu->reg_lock);
- + iommu_writeq(iommu, DMAR_RTADDR_REG, paddr);
- + iommu_writel(iommu, DMAR_GCMD_REG, iommu->gcmd | GCMD_SRTP);
- + for (i = 0; i < 5; i++) {
- + sts = iommu_readl(iommu, DMAR_GSTS_REG);
- + if (sts & GSTS_RTPS)
- + break;
- + }
- + mtx_leave(&iommu->reg_lock);
- +
- + if (i == 5) {
- + printf("set_rtaddr fails\n");
- + }
- +}
- +
- +/* COMMON: Allocate a new memory page */
- +void *
- +iommu_alloc_page(struct iommu_softc *iommu, paddr_t *paddr)
- +{
- + void *va;
- +
- + *paddr = 0;
- + va = km_alloc(VTD_PAGE_SIZE, &kv_page, &kp_zero, &kd_nowait);
- + if (va == NULL) {
- + panic("can't allocate page\n");
- + }
- + pmap_extract(pmap_kernel(), (vaddr_t)va, paddr);
- + return (va);
- +}
- +
- +
- +/* Intel: Issue command via queued invalidation */
- +void
- +iommu_issue_qi(struct iommu_softc *iommu, struct qi_entry *qi)
- +{
- +#if 0
- + struct qi_entry *pi, *pw;
- +
- + idx = iommu->qi_head;
- + pi = &iommu->qi[idx];
- + pw = &iommu->qi[(idx+1) % MAXQ];
- + iommu->qi_head = (idx+2) % MAXQ;
- +
- + memcpy(pw, &qi, sizeof(qi));
- + issue command;
- + while (pw->xxx)
- + ;
- +#endif
- +}
- +
- +/* Intel: Flush TLB entries, Queued Invalidation mode */
- +void
- +iommu_flush_tlb_qi(struct iommu_softc *iommu, int mode, int did)
- +{
- + struct qi_entry qi;
- +
- + /* Use queued invalidation */
- + qi.hi = 0;
- + switch (mode) {
- + case IOTLB_GLOBAL:
- + qi.lo = QI_IOTLB | QI_IOTLB_IG_GLOBAL;
- + break;
- + case IOTLB_DOMAIN:
- + qi.lo = QI_IOTLB | QI_IOTLB_IG_DOMAIN |
- + QI_IOTLB_DID(did);
- + break;
- + case IOTLB_PAGE:
- + qi.lo = QI_IOTLB | QI_IOTLB_IG_PAGE | QI_IOTLB_DID(did);
- + qi.hi = 0;
- + break;
- + }
- + if (iommu->cap & CAP_DRD)
- + qi.lo |= QI_IOTLB_DR;
- + if (iommu->cap & CAP_DWD)
- + qi.lo |= QI_IOTLB_DW;
- + iommu_issue_qi(iommu, &qi);
- +}
- +
- +/* Intel: Flush Context entries, Queued Invalidation mode */
- +void
- +iommu_flush_ctx_qi(struct iommu_softc *iommu, int mode, int did,
- + int sid, int fm)
- +{
- + struct qi_entry qi;
- +
- + /* Use queued invalidation */
- + qi.hi = 0;
- + switch (mode) {
- + case CTX_GLOBAL:
- + qi.lo = QI_CTX | QI_CTX_IG_GLOBAL;
- + break;
- + case CTX_DOMAIN:
- + qi.lo = QI_CTX | QI_CTX_IG_DOMAIN | QI_CTX_DID(did);
- + break;
- + case CTX_DEVICE:
- + qi.lo = QI_CTX | QI_CTX_IG_DEVICE | QI_CTX_DID(did) |
- + QI_CTX_SID(sid) | QI_CTX_FM(fm);
- + break;
- + }
- + iommu_issue_qi(iommu, &qi);
- +}
- +
- +/* Intel: Flush write buffers */
- +void
- +iommu_flush_write_buffer(struct iommu_softc *iommu)
- +{
- + int i, sts;
- +
- + if (iommu->dte)
- + return;
- + if (!(iommu->cap & CAP_RWBF))
- + return;
- + printf("writebuf\n");
- + iommu_writel(iommu, DMAR_GCMD_REG, iommu->gcmd | GCMD_WBF);
- + for (i = 0; i < 5; i++) {
- + sts = iommu_readl(iommu, DMAR_GSTS_REG);
- + if (sts & GSTS_WBFS)
- + break;
- + delay(10000);
- + }
- + if (i == 5) {
- + printf("write buffer flush fails\n");
- + }
- +}
- +
- +void
- +iommu_flush_cache(struct iommu_softc *iommu, void *addr, size_t size)
- +{
- + if (iommu->dte) {
- + pmap_flush_cache((vaddr_t)addr, size);
- + return;
- + }
- + if (!(iommu->ecap & ECAP_C))
- + pmap_flush_cache((vaddr_t)addr, size);
- +}
- +
- +/*
- + * Intel: Flush IOMMU TLB Entries
- + * Flushing can occur globally, per domain or per page
- + */
- +void
- +iommu_flush_tlb(struct iommu_softc *iommu, int mode, int did)
- +{
- + int n;
- + uint64_t val;
- +
- + /* Call AMD */
- + if (iommu->dte) {
- + ivhd_invalidate_domain(iommu, did);
- + //ivhd_poll_events(iommu);
- + return;
- + }
- + val = IOTLB_IVT;
- + switch (mode) {
- + case IOTLB_GLOBAL:
- + val |= IIG_GLOBAL;
- + break;
- + case IOTLB_DOMAIN:
- + val |= IIG_DOMAIN | IOTLB_DID(did);
- + break;
- + case IOTLB_PAGE:
- + val |= IIG_PAGE | IOTLB_DID(did);
- + break;
- + }
- +
- + /* Check for Read/Write Drain */
- + if (iommu->cap & CAP_DRD)
- + val |= IOTLB_DR;
- + if (iommu->cap & CAP_DWD)
- + val |= IOTLB_DW;
- +
- + mtx_enter(&iommu->reg_lock);
- +
- + iommu_writeq(iommu, DMAR_IOTLB_REG(iommu), val);
- + n = 0;
- + do {
- + val = iommu_readq(iommu, DMAR_IOTLB_REG(iommu));
- + } while (n++ < 5 && val & IOTLB_IVT);
- +
- + mtx_leave(&iommu->reg_lock);
- +
- +#ifdef DEBUG
- + {
- + static int rg;
- + int a, r;
- +
- + if (!rg) {
- + a = (val >> IOTLB_IAIG_SHIFT) & IOTLB_IAIG_MASK;
- + r = (val >> IOTLB_IIRG_SHIFT) & IOTLB_IIRG_MASK;
- + if (a != r) {
- + printf("TLB Requested:%d Actual:%d\n", r, a);
- + rg = 1;
- + }
- + }
- + }
- +#endif
- +}
- +
- +/* Intel: Flush IOMMU settings
- + * Flushes can occur globally, per domain, or per device
- + */
- +void
- +iommu_flush_ctx(struct iommu_softc *iommu, int mode, int did, int sid, int fm)
- +{
- + uint64_t val;
- + int n;
- +
- + if (iommu->dte)
- + return;
- + val = CCMD_ICC;
- + switch (mode) {
- + case CTX_GLOBAL:
- + val |= CIG_GLOBAL;
- + break;
- + case CTX_DOMAIN:
- + val |= CIG_DOMAIN | CCMD_DID(did);
- + break;
- + case CTX_DEVICE:
- + val |= CIG_DEVICE | CCMD_DID(did) |
- + CCMD_SID(sid) | CCMD_FM(fm);
- + break;
- + }
- +
- + mtx_enter(&iommu->reg_lock);
- +
- + n = 0;
- + iommu_writeq(iommu, DMAR_CCMD_REG, val);
- + do {
- + val = iommu_readq(iommu, DMAR_CCMD_REG);
- + } while (n++ < 5 && val & CCMD_ICC);
- +
- + mtx_leave(&iommu->reg_lock);
- +
- +#ifdef DEBUG
- + {
- + static int rg;
- + int a, r;
- +
- + if (!rg) {
- + a = (val >> CCMD_CAIG_SHIFT) & CCMD_CAIG_MASK;
- + r = (val >> CCMD_CIRG_SHIFT) & CCMD_CIRG_MASK;
- + if (a != r) {
- + printf("CTX Requested:%d Actual:%d\n", r, a);
- + rg = 1;
- + }
- + }
- + }
- +#endif
- +}
- +
- +/* Intel: Enable Queued Invalidation */
- +void
- +iommu_enable_qi(struct iommu_softc *iommu, int enable)
- +{
- + int n = 0;
- + int sts;
- +
- + if (!(iommu->ecap & ECAP_QI))
- + return;
- +
- + if (enable) {
- + iommu->gcmd |= GCMD_QIE;
- +
- + mtx_enter(&iommu->reg_lock);
- +
- + iommu_writel(iommu, DMAR_GCMD_REG, iommu->gcmd);
- + do {
- + sts = iommu_readl(iommu, DMAR_GSTS_REG);
- + } while (n++ < 5 && !(sts & GSTS_QIES));
- +
- + mtx_leave(&iommu->reg_lock);
- +
- + printf("set.qie: %d\n", n);
- + } else {
- + iommu->gcmd &= ~GCMD_QIE;
- +
- + mtx_enter(&iommu->reg_lock);
- +
- + iommu_writel(iommu, DMAR_GCMD_REG, iommu->gcmd);
- + do {
- + sts = iommu_readl(iommu, DMAR_GSTS_REG);
- + } while (n++ < 5 && sts & GSTS_QIES);
- +
- + mtx_leave(&iommu->reg_lock);
- +
- + printf("clr.qie: %d\n", n);
- + }
- +}
- +
- +/* Intel: Enable IOMMU translation */
- +int
- +iommu_enable_translation(struct iommu_softc *iommu, int enable)
- +{
- + uint32_t sts;
- + uint64_t reg;
- + int n = 0;
- +
- + if (iommu->dte)
- + return (0);
- + reg = 0;
- + if (enable) {
- + printf("enable iommu %d\n", iommu->id);
- + iommu_showcfg(iommu, -1);
- +
- + iommu->gcmd |= GCMD_TE;
- +
- + /* Enable translation */
- + printf(" pre tes: ");
- +
- + mtx_enter(&iommu->reg_lock);
- + iommu_writel(iommu, DMAR_GCMD_REG, iommu->gcmd);
- + printf("xxx");
- + do {
- + printf("yyy");
- + sts = iommu_readl(iommu, DMAR_GSTS_REG);
- + delay(n * 10000);
- + } while (n++ < 5 && !(sts & GSTS_TES));
- + mtx_leave(&iommu->reg_lock);
- +
- + printf(" set.tes: %d\n", n);
- +
- + if (n >= 5) {
- + printf("error.. unable to initialize iommu %d\n",
- + iommu->id);
- + iommu->flags |= IOMMU_FLAGS_BAD;
- +
- + /* Disable IOMMU */
- + iommu->gcmd &= ~GCMD_TE;
- + mtx_enter(&iommu->reg_lock);
- + iommu_writel(iommu, DMAR_GCMD_REG, iommu->gcmd);
- + mtx_leave(&iommu->reg_lock);
- +
- + return (1);
- + }
- +
- + iommu_flush_ctx(iommu, CTX_GLOBAL, 0, 0, 0);
- + iommu_flush_tlb(iommu, IOTLB_GLOBAL, 0);
- + } else {
- + iommu->gcmd &= ~GCMD_TE;
- +
- + mtx_enter(&iommu->reg_lock);
- +
- + iommu_writel(iommu, DMAR_GCMD_REG, iommu->gcmd);
- + do {
- + sts = iommu_readl(iommu, DMAR_GSTS_REG);
- + } while (n++ < 5 && sts & GSTS_TES);
- + mtx_leave(&iommu->reg_lock);
- +
- + printf(" clr.tes: %d\n", n);
- + }
- +
- + return (0);
- +}
- +
- +/* Intel: Initialize IOMMU */
- +int
- +iommu_init(struct acpidmar_softc *sc, struct iommu_softc *iommu,
- + struct acpidmar_drhd *dh)
- +{
- + static int niommu;
- + int len = VTD_PAGE_SIZE;
- + int i, gaw;
- + uint32_t sts;
- + paddr_t paddr;
- +
- + if (_bus_space_map(sc->sc_memt, dh->address, len, 0, &iommu->ioh) != 0) {
- + return (-1);
- + }
- +
- + TAILQ_INIT(&iommu->domains);
- + iommu->id = ++niommu;
- + iommu->flags = dh->flags;
- + iommu->segment = dh->segment;
- + iommu->iot = sc->sc_memt;
- +
- + iommu->cap = iommu_readq(iommu, DMAR_CAP_REG);
- + iommu->ecap = iommu_readq(iommu, DMAR_ECAP_REG);
- + iommu->ndoms = cap_nd(iommu->cap);
- +
- + printf(" caps: %s%s%s%s%s%s%s%s%s%s%s\n",
- + iommu->cap & CAP_AFL ? "afl " : "", // adv fault
- + iommu->cap & CAP_RWBF ? "rwbf " : "", // write-buffer flush
- + iommu->cap & CAP_PLMR ? "plmr " : "", // protected lo region
- + iommu->cap & CAP_PHMR ? "phmr " : "", // protected hi region
- + iommu->cap & CAP_CM ? "cm " : "", // caching mode
- + iommu->cap & CAP_ZLR ? "zlr " : "", // zero-length read
- + iommu->cap & CAP_PSI ? "psi " : "", // page invalidate
- + iommu->cap & CAP_DWD ? "dwd " : "", // write drain
- + iommu->cap & CAP_DRD ? "drd " : "", // read drain
- + iommu->cap & CAP_FL1GP ? "Gb " : "", // 1Gb pages
- + iommu->cap & CAP_PI ? "pi " : ""); // posted interrupts
- + printf(" ecap: %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
- + iommu->ecap & ECAP_C ? "c " : "", // coherent
- + iommu->ecap & ECAP_QI ? "qi " : "", // queued invalidate
- + iommu->ecap & ECAP_DT ? "dt " : "", // device iotlb
- + iommu->ecap & ECAP_IR ? "ir " : "", // intr remap
- + iommu->ecap & ECAP_EIM ? "eim " : "", // x2apic
- + iommu->ecap & ECAP_PT ? "pt " : "", // passthrough
- + iommu->ecap & ECAP_SC ? "sc " : "", // snoop control
- + iommu->ecap & ECAP_ECS ? "ecs " : "", // extended context
- + iommu->ecap & ECAP_MTS ? "mts " : "", // memory type
- + iommu->ecap & ECAP_NEST ? "nest " : "", // nested translations
- + iommu->ecap & ECAP_DIS ? "dis " : "", // deferred invalidation
- + iommu->ecap & ECAP_PASID ? "pas " : "", // pasid
- + iommu->ecap & ECAP_PRS ? "prs " : "", // page request
- + iommu->ecap & ECAP_ERS ? "ers " : "", // execute request
- + iommu->ecap & ECAP_SRS ? "srs " : "", // supervisor request
- + iommu->ecap & ECAP_NWFS ? "nwfs " : "", // no write flag
- + iommu->ecap & ECAP_EAFS ? "eafs " : ""); // extended accessed flag
- +
- + mtx_init(&iommu->reg_lock, IPL_HIGH);
- +
- + /* Clear Interrupt Masking */
- + iommu_writel(iommu, DMAR_FSTS_REG, FSTS_PFO | FSTS_PPF);
- +
- + iommu->intr = acpidmar_intr_establish(iommu, IPL_HIGH,
- + acpidmar_intr, iommu, "dmarintr");
- +
- + /* Enable interrupts */
- + sts = iommu_readl(iommu, DMAR_FECTL_REG);
- + iommu_writel(iommu, DMAR_FECTL_REG, sts & ~FECTL_IM);
- +
- + /* Allocate root pointer */
- + iommu->root = iommu_alloc_page(iommu, &paddr);
- +#ifdef DEBUG
- + printf("Allocated root pointer: pa:%.16llx va:%p\n",
- + (uint64_t)paddr, iommu->root);
- +#endif
- + iommu->rtaddr = paddr;
- + iommu_flush_write_buffer(iommu);
- + iommu_set_rtaddr(iommu, paddr);
- +
- +#if 0
- + if (iommu->ecap & ECAP_QI) {
- + /* Queued Invalidation support */
- + iommu->qi = iommu_alloc_page(iommu, &iommu->qip);
- + iommu_writeq(iommu, DMAR_IQT_REG, 0);
- + iommu_writeq(iommu, DMAR_IQA_REG, iommu->qip | IQA_QS_256);
- + }
- + if (iommu->ecap & ECAP_IR) {
- + /* Interrupt remapping support */
- + iommu_writeq(iommu, DMAR_IRTA_REG, 0);
- + }
- +#endif
- +
- + /* Calculate guest address width and supported guest widths */
- + gaw = -1;
- + iommu->mgaw = cap_mgaw(iommu->cap);
- + printf("gaw: %d { ", iommu->mgaw);
- + for (i = 0; i < 5; i++) {
- + if (cap_sagaw(iommu->cap) & (1L << i)) {
- + gaw = VTD_LEVELTOAW(i);
- + printf("%d ", gaw);
- + iommu->agaw = gaw;
- + }
- + }
- + printf("}\n");
- +
- + /* Cache current status register bits */
- + sts = iommu_readl(iommu, DMAR_GSTS_REG);
- + if (sts & GSTS_TES)
- + iommu->gcmd |= GCMD_TE;
- + if (sts & GSTS_QIES)
- + iommu->gcmd |= GCMD_QIE;
- + if (sts & GSTS_IRES)
- + iommu->gcmd |= GCMD_IRE;
- + if (iommu->gcmd) {
- + printf("gcmd: %x preset\n", iommu->gcmd);
- + }
- + acpidmar_intr(iommu);
- + return (0);
- +}
- +
- +const char *dmar_rn(int reg);
- +
- +const char *
- +dmar_rn(int reg)
- +{
- + switch (reg) {
- + case EVT_HEAD_REG: return "evthead";
- + case EVT_TAIL_REG: return "evttail";
- + case CMD_HEAD_REG: return "cmdhead";
- + case CMD_TAIL_REG: return "cmdtail";
- + case CMD_BASE_REG: return "cmdbase";
- + case EVT_BASE_REG: return "evtbase";
- + case DEV_TAB_BASE_REG: return "devtblbase";
- + case IOMMUCTL_REG: return "iommuctl";
- +#if 0
- + case DMAR_VER_REG: return "ver";
- + case DMAR_CAP_REG: return "cap";
- + case DMAR_ECAP_REG: return "ecap";
- + case DMAR_GSTS_REG: return "gsts";
- + case DMAR_GCMD_REG: return "gcmd";
- + case DMAR_FSTS_REG: return "fsts";
- + case DMAR_FECTL_REG: return "fectl";
- + case DMAR_RTADDR_REG: return "rtaddr";
- + case DMAR_FEDATA_REG: return "fedata";
- + case DMAR_FEADDR_REG: return "feaddr";
- + case DMAR_FEUADDR_REG: return "feuaddr";
- + case DMAR_PMEN_REG: return "pmen";
- + case DMAR_IEDATA_REG: return "iedata";
- + case DMAR_IEADDR_REG: return "ieaddr";
- + case DMAR_IEUADDR_REG: return "ieuaddr";
- + case DMAR_IRTA_REG: return "irta";
- + case DMAR_CCMD_REG: return "ccmd";
- + case DMAR_IQH_REG: return "iqh";
- + case DMAR_IQT_REG: return "iqt";
- + case DMAR_IQA_REG: return "iqa";
- +#endif
- + }
- + return "unknown";
- +}
- +
- +/* Read/Write IOMMU register */
- +uint32_t
- +iommu_readl(struct iommu_softc *iommu, int reg)
- +{
- + uint32_t v;
- +
- + v = bus_space_read_4(iommu->iot, iommu->ioh, reg);
- + if (reg < 00) {
- + printf("iommu%d: read %x %.8lx [%s]\n",
- + iommu->id, reg, (unsigned long)v, dmar_rn(reg));
- + }
- +
- + return (v);
- +}
- +
- +
- +#define dbprintf(x...)
- +
- +void
- +iommu_writel(struct iommu_softc *iommu, int reg, uint32_t v)
- +{
- + dbprintf("iommu%d: write %.8x %.16lx [%s]\n",
- + iommu->id, reg, (unsigned long)v, dmar_rn(reg));
- + bus_space_write_4(iommu->iot, iommu->ioh, reg, (uint32_t)v);
- +}
- +
- +uint64_t
- +iommu_readq(struct iommu_softc *iommu, int reg)
- +{
- + uint64_t v;
- +
- + v = bus_space_read_8(iommu->iot, iommu->ioh, reg);
- + if (reg < 00) {
- + printf("iommu%d: read %x %.8lx [%s]\n",
- + iommu->id, reg, (unsigned long)v, dmar_rn(reg));
- + }
- +
- + return (v);
- +}
- +
- +void
- +iommu_writeq(struct iommu_softc *iommu, int reg, uint64_t v)
- +{
- + dbprintf("iommu%d: write %.8x %.16lx [%s]\n",
- + iommu->id, reg, (unsigned long)v, dmar_rn(reg));
- + bus_space_write_8(iommu->iot, iommu->ioh, reg, v);
- +}
- +
- +/* Check if a device is within a device scope */
- +int
- +acpidmar_match_devscope(struct devlist_head *devlist, pci_chipset_tag_t pc,
- + int sid)
- +{
- + struct dmar_devlist *ds;
- + int sub, sec, i;
- + int bus, dev, fun, sbus;
- + pcireg_t reg;
- + pcitag_t tag;
- +
- + sbus = sid_bus(sid);
- + TAILQ_FOREACH(ds, devlist, link) {
- + bus = ds->bus;
- + dev = ds->dp[0].device;
- + fun = ds->dp[0].function;
- + /* Walk PCI bridges in path */
- + for (i = 1; i < ds->ndp; i++) {
- + tag = pci_make_tag(pc, bus, dev, fun);
- + reg = pci_conf_read(pc, tag, PPB_REG_BUSINFO);
- + bus = PPB_BUSINFO_SECONDARY(reg);
- + dev = ds->dp[i].device;
- + fun = ds->dp[i].function;
- + }
- +
- + /* Check for device exact match */
- + if (sid == mksid(bus, dev, fun)) {
- + return DMAR_ENDPOINT;
- + }
- +
- + /* Check for device subtree match */
- + if (ds->type == DMAR_BRIDGE) {
- + tag = pci_make_tag(pc, bus, dev, fun);
- + reg = pci_conf_read(pc, tag, PPB_REG_BUSINFO);
- + sec = PPB_BUSINFO_SECONDARY(reg);
- + sub = PPB_BUSINFO_SUBORDINATE(reg);
- + if (sec <= sbus && sbus <= sub) {
- + return DMAR_BRIDGE;
- + }
- + }
- + }
- +
- + return (0);
- +}
- +
- +struct domain *
- +domain_create(struct iommu_softc *iommu, int did)
- +{
- + struct domain *dom;
- + int gaw;
- +
- + printf("iommu%d: create domain: %.4x\n", iommu->id, did);
- + dom = malloc(sizeof(*dom), M_DEVBUF, M_ZERO | M_WAITOK);
- + dom->did = did;
- + dom->iommu = iommu;
- + dom->pte = iommu_alloc_page(iommu, &dom->ptep);
- + TAILQ_INIT(&dom->devices);
- +
- + /* Setup DMA */
- + dom->dmat._cookie = dom;
- + dom->dmat._dmamap_create = dmar_dmamap_create; // nop
- + dom->dmat._dmamap_destroy = dmar_dmamap_destroy; // nop
- + dom->dmat._dmamap_load = dmar_dmamap_load; // lm
- + dom->dmat._dmamap_load_mbuf = dmar_dmamap_load_mbuf; // lm
- + dom->dmat._dmamap_load_uio = dmar_dmamap_load_uio; // lm
- + dom->dmat._dmamap_load_raw = dmar_dmamap_load_raw; // lm
- + dom->dmat._dmamap_unload = dmar_dmamap_unload; // um
- + dom->dmat._dmamap_sync = dmar_dmamap_sync; // lm
- + dom->dmat._dmamem_alloc = dmar_dmamem_alloc; // nop
- + dom->dmat._dmamem_free = dmar_dmamem_free; // nop
- + dom->dmat._dmamem_map = dmar_dmamem_map; // nop
- + dom->dmat._dmamem_unmap = dmar_dmamem_unmap; // nop
- + dom->dmat._dmamem_mmap = dmar_dmamem_mmap;
- +
- + snprintf(dom->exname, sizeof(dom->exname), "did:%x.%.4x",
- + iommu->id, dom->did);
- +
- + /* Setup IOMMU address map */
- + gaw = min(iommu->agaw, iommu->mgaw);
- + printf("Creating Domain with %d bits\n", gaw);
- + dom->iovamap = extent_create(dom->exname, 1024*1024*16,
- + (1LL << gaw)-1,
- + M_DEVBUF, NULL, 0,
- + EX_WAITOK|EX_NOCOALESCE);
- +
- + /* Zero out Interrupt region */
- + extent_alloc_region(dom->iovamap, 0xFEE00000L, 0x100000,
- + EX_WAITOK);
- + mtx_init(&dom->exlck, IPL_HIGH);
- +
- + TAILQ_INSERT_TAIL(&iommu->domains, dom, link);
- +
- + return dom;
- +}
- +
- +void domain_add_device(struct domain *dom, int sid)
- +{
- + struct domain_dev *ddev;
- +
- + printf("add %s to iommu%d.%.4x\n", dmar_bdf(sid), dom->iommu->id, dom->did);
- + ddev = malloc(sizeof(*ddev), M_DEVBUF, M_ZERO | M_WAITOK);
- + ddev->sid = sid;
- + TAILQ_INSERT_TAIL(&dom->devices, ddev, link);
- +
- + /* Should set context entry here?? */
- +}
- +
- +void domain_remove_device(struct domain *dom, int sid)
- +{
- + struct domain_dev *ddev, *tmp;
- +
- + TAILQ_FOREACH_SAFE(ddev, &dom->devices, link, tmp) {
- + if (ddev->sid == sid) {
- + TAILQ_REMOVE(&dom->devices, ddev, link);
- + free(ddev, sizeof(*ddev), M_DEVBUF);
- + }
- + }
- +}
- +
- +/* Lookup domain by segment & source id (bus.device.function) */
- +struct domain *
- +domain_lookup(struct acpidmar_softc *sc, int segment, int sid)
- +{
- + struct iommu_softc *iommu;
- + struct domain_dev *ddev;
- + struct domain *dom;
- + int rc;
- +
- + if (sc == NULL) {
- + return NULL;
- + }
- +
- + /* Lookup IOMMU for this device */
- + TAILQ_FOREACH(iommu, &sc->sc_drhds, link) {
- + if (iommu->segment != segment)
- + continue;
- + /* Check for devscope match or catchall iommu */
- + rc = acpidmar_match_devscope(&iommu->devices, sc->sc_pc, sid);
- + if (rc != 0 || iommu->flags) {
- + break;
- + }
- + }
- + if (!iommu) {
- + printf("%s: no iommu found\n", dmar_bdf(sid));
- + return NULL;
- + }
- +
- + //acpidmar_intr(iommu);
- +
- + /* Search domain devices */
- + TAILQ_FOREACH(dom, &iommu->domains, link) {
- + TAILQ_FOREACH(ddev, &dom->devices, link) {
- + /* XXX: match all functions? */
- + if (ddev->sid == sid) {
- + return dom;
- + }
- + }
- + }
- + if (iommu->ndoms <= 2) {
- + /* Running out of domains.. create catchall domain */
- + if (!iommu->unity) {
- + iommu->unity = domain_create(iommu, 1);
- + }
- + dom = iommu->unity;
- + } else {
- + dom = domain_create(iommu, --iommu->ndoms);
- + }
- + if (!dom) {
- + printf("no domain here\n");
- + return NULL;
- + }
- +
- + /* Add device to domain */
- + domain_add_device(dom, sid);
- +
- + return dom;
- +}
- +
- +/* Map Guest Pages into IOMMU */
- +void _iommu_map(void *dom, vaddr_t va, bus_addr_t gpa, bus_size_t len)
- +{
- + bus_size_t i;
- + paddr_t hpa;
- +
- + if (dom == NULL) {
- + return;
- + }
- + printf("Mapping dma: %lx = %lx/%lx\n", va, gpa, len);
- + for (i = 0; i < len; i += PAGE_SIZE) {
- + hpa = 0;
- + pmap_extract(curproc->p_vmspace->vm_map.pmap, va, &hpa);
- + domain_map_page(dom, gpa, hpa, PTE_P | PTE_R | PTE_W);
- + gpa += PAGE_SIZE;
- + va += PAGE_SIZE;
- + }
- +}
- +
- +/* Find IOMMU for a given PCI device */
- +void *_iommu_domain(int segment, int bus, int dev, int func, int *id)
- +{
- + struct domain *dom;
- +
- + dom = domain_lookup(acpidmar_sc, segment, mksid(bus, dev, func));
- + if (dom) {
- + *id = dom->did;
- + }
- + return dom;
- +}
- +
- +void domain_map_device(struct domain *dom, int sid);
- +void ivhd_intr_map(struct iommu_softc *);
- +
- +void
- +domain_map_device(struct domain *dom, int sid)
- +{
- + struct iommu_softc *iommu;
- + struct context_entry *ctx;
- + paddr_t paddr;
- + int bus, devfn;
- + int tt, lvl;
- +
- + iommu = dom->iommu;
- +
- + bus = sid_bus(sid);
- + devfn = sid_devfn(sid);
- + /* AMD attach device */
- + if (iommu->dte) {
- + struct ivhd_dte *dte = &iommu->dte[sid];
- + if (!dte->dw0) {
- + /* Setup Device Table Entry: bus.devfn */
- + printf("@@@ PCI Attach: %.4x[%s] %.4x\n", sid, dmar_bdf(sid), dom->did);
- + dte_set_host_page_table_root_ptr(dte, dom->ptep);
- + dte_set_domain(dte, dom->did);
- + dte_set_mode(dte, 3); // Set 4 level PTE
- + dte_set_tv(dte);
- + dte_set_valid(dte);
- + ivhd_flush_devtab(iommu, dom->did);
- + //ivhd_showit(iommu);
- + ivhd_showdte();
- + }
- + //ivhd_poll_events(iommu);
- + return;
- + }
- +
- + /* Create Bus mapping */
- + if (!root_entry_is_valid(&iommu->root[bus])) {
- + iommu->ctx[bus] = iommu_alloc_page(iommu, &paddr);
- + iommu->root[bus].lo = paddr | ROOT_P;
- + iommu_flush_cache(iommu, &iommu->root[bus],
- + sizeof(struct root_entry));
- + dprintf("iommu%d: Allocate context for bus: %.2x pa:%.16llx va:%p\n",
- + iommu->id, bus, (uint64_t)paddr,
- + iommu->ctx[bus]);
- + }
- +
- + /* Create DevFn mapping */
- + ctx = iommu->ctx[bus] + devfn;
- + if (!context_entry_is_valid(ctx)) {
- + tt = CTX_T_MULTI;
- + lvl = VTD_AWTOLEVEL(iommu->agaw);
- +
- + /* Initialize context */
- + context_set_slpte(ctx, dom->ptep);
- + context_set_translation_type(ctx, tt);
- + context_set_domain_id(ctx, dom->did);
- + context_set_address_width(ctx, lvl);
- + context_set_present(ctx);
- +
- + /* Flush it */
- + iommu_flush_cache(iommu, ctx, sizeof(struct context_entry));
- + if ((iommu->cap & CAP_CM) || force_cm) {
- + iommu_flush_ctx(iommu, CTX_DEVICE, dom->did, sid, 0);
- + iommu_flush_tlb(iommu, IOTLB_GLOBAL, 0);
- + } else {
- + iommu_flush_write_buffer(iommu);
- + }
- + dprintf("iommu%d: %s set context ptep:%.16llx lvl:%d did:%.4x tt:%d\n",
- + iommu->id, dmar_bdf(sid), (uint64_t)dom->ptep, lvl,
- + dom->did, tt);
- + }
- +}
- +
- +struct domain *
- +acpidmar_pci_attach(struct acpidmar_softc *sc, int segment, int sid, int mapctx)
- +{
- + static struct domain *dom;
- +
- + dom = domain_lookup(sc, segment, sid);
- + if (!dom) {
- + printf("no domain: %s\n", dmar_bdf(sid));
- + return NULL;
- + }
- +
- + if (mapctx) {
- + domain_map_device(dom, sid);
- + }
- +
- + return dom;
- +}
- +
- +int ismap(int bus, int dev, int fun) {
- + return (bus > 0);
- + return (bus == 1);
- +
- + if (bus == 3 && dev == 0 && fun == 6)
- + return 1;
- + if (bus == 0 && dev == 8 && fun == 1)
- + return 1;
- + return 0;
- +}
- +
- +void
- +acpidmar_pci_hook(pci_chipset_tag_t pc, struct pci_attach_args *pa)
- +{
- + int bus, dev, fun;
- + struct domain *dom;
- + pcireg_t reg;
- +
- + if (!acpidmar_sc) {
- + /* No DMAR, ignore */
- + return;
- + }
- +
- + /* Add device to our list */
- + pci_decompose_tag(pc, pa->pa_tag, &bus, &dev, &fun);
- + reg = pci_conf_read(pc, pa->pa_tag, PCI_CLASS_REG);
- + if (!ismap(bus, dev, fun))
- + return;
- +#if 0
- + if (PCI_CLASS(reg) == PCI_CLASS_DISPLAY &&
- + PCI_SUBCLASS(reg) == PCI_SUBCLASS_DISPLAY_VGA) {
- + printf("dmar: %.4x:%.2x:%.2x.%x is VGA, ignoring\n",
- + pa->pa_domain, bus, dev, fun);
- + return;
- + }
- +#endif
- + /* Add device to domain */
- + dom = acpidmar_pci_attach(acpidmar_sc, pa->pa_domain,
- + mksid(bus, dev, fun), 0);
- + if (dom == NULL)
- + return;
- +
- + if (PCI_CLASS(reg) == PCI_CLASS_DISPLAY &&
- + PCI_SUBCLASS(reg) == PCI_SUBCLASS_DISPLAY_VGA) {
- + dom->flag = DOM_NOMAP;
- + }
- + if (PCI_CLASS(reg) == PCI_CLASS_BRIDGE &&
- + PCI_SUBCLASS(reg) == PCI_SUBCLASS_BRIDGE_ISA) {
- + /* For ISA Bridges, map 0-16Mb as 1:1 */
- + printf("dmar: %.4x:%.2x:%.2x.%x mapping ISA\n",
- + pa->pa_domain, bus, dev, fun);
- + domain_map_pthru(dom, 0x00, 16*1024*1024);
- + }
- + ivhd_intr_map(dom->iommu);
- +
- + /* Change DMA tag */
- + pa->pa_dmat = &dom->dmat;
- +}
- +
- +/* Create list of device scope entries from ACPI table */
- +void
- +acpidmar_parse_devscope(union acpidmar_entry *de, int off, int segment,
- + struct devlist_head *devlist)
- +{
- + struct acpidmar_devscope *ds;
- + struct dmar_devlist *d;
- + int dplen, i;
- +
- + TAILQ_INIT(devlist);
- + while (off < de->length) {
- + ds = (struct acpidmar_devscope *)((unsigned char *)de + off);
- + off += ds->length;
- +
- + /* We only care about bridges and endpoints */
- + if (ds->type != DMAR_ENDPOINT && ds->type != DMAR_BRIDGE)
- + continue;
- +
- + dplen = ds->length - sizeof(*ds);
- + d = malloc(sizeof(*d) + dplen, M_DEVBUF, M_ZERO | M_WAITOK);
- + d->bus = ds->bus;
- + d->type = ds->type;
- + d->ndp = dplen / 2;
- + d->dp = (void *)&d[1];
- + memcpy(d->dp, &ds[1], dplen);
- + TAILQ_INSERT_TAIL(devlist, d, link);
- +
- + printf(" %8s %.4x:%.2x.%.2x.%x {",
- + ds->type == DMAR_BRIDGE ? "bridge" : "endpoint",
- + segment, ds->bus,
- + d->dp[0].device,
- + d->dp[0].function);
- +
- + for (i = 1; i < d->ndp; i++) {
- + printf(" %2x.%x ",
- + d->dp[i].device,
- + d->dp[i].function);
- + }
- + printf("}\n");
- + }
- +}
- +
- +/* DMA Remapping Hardware Unit */
- +void
- +acpidmar_drhd(struct acpidmar_softc *sc, union acpidmar_entry *de)
- +{
- + struct iommu_softc *iommu;
- +
- + printf("DRHD: segment:%.4x base:%.16llx flags:%.2x\n",
- + de->drhd.segment,
- + de->drhd.address,
- + de->drhd.flags);
- + iommu = malloc(sizeof(*iommu), M_DEVBUF, M_ZERO | M_WAITOK);
- + acpidmar_parse_devscope(de, sizeof(de->drhd), de->drhd.segment,
- + &iommu->devices);
- + iommu_init(sc, iommu, &de->drhd);
- +
- + if (de->drhd.flags) {
- + /* Catchall IOMMU goes at end of list */
- + TAILQ_INSERT_TAIL(&sc->sc_drhds, iommu, link);
- + } else {
- + TAILQ_INSERT_HEAD(&sc->sc_drhds, iommu, link);
- + }
- +}
- +
- +/* Reserved Memory Region Reporting */
- +void
- +acpidmar_rmrr(struct acpidmar_softc *sc, union acpidmar_entry *de)
- +{
- + struct rmrr_softc *rmrr;
- + bios_memmap_t *im, *jm;
- + uint64_t start, end;
- +
- + printf("RMRR: segment:%.4x range:%.16llx-%.16llx\n",
- + de->rmrr.segment, de->rmrr.base, de->rmrr.limit);
- + if (de->rmrr.limit <= de->rmrr.base) {
- + printf(" buggy BIOS\n");
- + return;
- + }
- +
- + rmrr = malloc(sizeof(*rmrr), M_DEVBUF, M_ZERO | M_WAITOK);
- + rmrr->start = trunc_page(de->rmrr.base);
- + rmrr->end = round_page(de->rmrr.limit);
- + rmrr->segment = de->rmrr.segment;
- + acpidmar_parse_devscope(de, sizeof(de->rmrr), de->rmrr.segment,
- + &rmrr->devices);
- +
- + for (im = bios_memmap; im->type != BIOS_MAP_END; im++) {
- + if (im->type != BIOS_MAP_RES)
- + continue;
- + /* Search for adjacent reserved regions */
- + start = im->addr;
- + end = im->addr+im->size;
- + for (jm = im+1; jm->type == BIOS_MAP_RES && end == jm->addr;
- + jm++) {
- + end = jm->addr+jm->size;
- + }
- + printf("e820: %.16llx - %.16llx\n", start, end);
- + if (start <= rmrr->start && rmrr->end <= end) {
- + /* Bah.. some buggy BIOS stomp outside RMRR */
- + printf(" ** inside E820 Reserved %.16llx %.16llx\n",
- + start, end);
- + rmrr->start = trunc_page(start);
- + rmrr->end = round_page(end);
- + break;
- + }
- + }
- + TAILQ_INSERT_TAIL(&sc->sc_rmrrs, rmrr, link);
- +}
- +
- +/* Root Port ATS Reporting */
- +void
- +acpidmar_atsr(struct acpidmar_softc *sc, union acpidmar_entry *de)
- +{
- + struct atsr_softc *atsr;
- +
- + printf("ATSR: segment:%.4x flags:%x\n",
- + de->atsr.segment,
- + de->atsr.flags);
- +
- + atsr = malloc(sizeof(*atsr), M_DEVBUF, M_ZERO | M_WAITOK);
- + atsr->flags = de->atsr.flags;
- + atsr->segment = de->atsr.segment;
- + acpidmar_parse_devscope(de, sizeof(de->atsr), de->atsr.segment,
- + &atsr->devices);
- +
- + TAILQ_INSERT_TAIL(&sc->sc_atsrs, atsr, link);
- +}
- +
- +void
- +acpidmar_init(struct acpidmar_softc *sc, struct acpi_dmar *dmar)
- +{
- + struct rmrr_softc *rmrr;
- + struct iommu_softc *iommu;
- + struct domain *dom;
- + struct dmar_devlist *dl;
- + union acpidmar_entry *de;
- + int off, sid, rc;
- +
- + domain_map_page = domain_map_page_intel;
- + printf(": hardware width: %d, intr_remap:%d x2apic_opt_out:%d\n",
- + dmar->haw+1,
- + !!(dmar->flags & 0x1),
- + !!(dmar->flags & 0x2));
- + sc->sc_haw = dmar->haw+1;
- + sc->sc_flags = dmar->flags;
- +
- + TAILQ_INIT(&sc->sc_drhds);
- + TAILQ_INIT(&sc->sc_rmrrs);
- + TAILQ_INIT(&sc->sc_atsrs);
- +
- + off = sizeof(*dmar);
- + while (off < dmar->hdr.length) {
- + de = (union acpidmar_entry *)((unsigned char *)dmar + off);
- + switch (de->type) {
- + case DMAR_DRHD:
- + acpidmar_drhd(sc, de);
- + break;
- + case DMAR_RMRR:
- + acpidmar_rmrr(sc, de);
- + break;
- + case DMAR_ATSR:
- + acpidmar_atsr(sc, de);
- + break;
- + default:
- + printf("DMAR: unknown %x\n", de->type);
- + break;
- + }
- + off += de->length;
- + }
- +
- + /* Pre-create domains for iommu devices */
- + TAILQ_FOREACH(iommu, &sc->sc_drhds, link) {
- + TAILQ_FOREACH(dl, &iommu->devices, link) {
- + sid = mksid(dl->bus, dl->dp[0].device,
- + dl->dp[0].function);
- + dom = acpidmar_pci_attach(sc, iommu->segment, sid, 0);
- + if (dom != NULL) {
- + printf("%.4x:%.2x:%.2x.%x iommu:%d did:%.4x\n",
- + iommu->segment, dl->bus, dl->dp[0].device, dl->dp[0].function,
- + iommu->id, dom->did);
- + }
- + }
- + }
- + /* Map passthrough pages for RMRR */
- + TAILQ_FOREACH(rmrr, &sc->sc_rmrrs, link) {
- + TAILQ_FOREACH(dl, &rmrr->devices, link) {
- + sid = mksid(dl->bus, dl->dp[0].device,
- + dl->dp[0].function);
- + dom = acpidmar_pci_attach(sc, rmrr->segment, sid, 0);
- + if (dom != NULL) {
- + printf("%s map ident: %.16llx %.16llx\n",
- + dom_bdf(dom), rmrr->start, rmrr->end);
- + domain_map_pthru(dom, rmrr->start, rmrr->end);
- + rc = extent_alloc_region(dom->iovamap,
- + rmrr->start, rmrr->end, EX_WAITOK);
- + }
- + }
- + }
- +}
- +
- +
- +/*=====================================================
- + * AMD Vi
- + *=====================================================*/
- +void acpiivrs_ivhd(struct acpidmar_softc *, struct acpi_ivhd *);
- +int acpiivrs_iommu_match(struct pci_attach_args *);
- +int ivhd_iommu_init(struct acpidmar_softc *, struct iommu_softc *,
- + struct acpi_ivhd *);
- +void iommu_ivhd_add(struct iommu_softc *, int, int, int);
- +int _ivhd_issue_command(struct iommu_softc *iommu, const struct ivhd_command *cmd);
- +void ivhd_show_event(struct iommu_softc *, struct ivhd_event *evt, int);
- +int ivhd_issue_command(struct iommu_softc *iommu, const struct ivhd_command *cmd, int wait);
- +int ivhd_invalidate_domain(struct iommu_softc *iommu, int did);
- +void acpiivrs_mkalias(struct acpi_ivhd *, int, uint16_t, uint16_t, uint16_t);
- +
- +/* Setup interrupt for AMD */
- +void ivhd_intr_map(struct iommu_softc *iommu) {
- + struct pci_attach_args ipa;
- + pci_intr_handle_t ih;
- +
- + if (iommu->intr)
- + return;
- + if (pci_find_device(&ipa, acpiivrs_iommu_match)) {
- + printf("found iommu pci\n");
- + if (pci_intr_map_msi(&ipa, &ih) && pci_intr_map(&ipa, &ih)) {
- + printf("couldn't map interrupt\n");
- + }
- + else {
- + iommu->intr = pci_intr_establish(ipa.pa_pc, ih, IPL_NET | IPL_MPSAFE,
- + acpidmar_intr, iommu, "amd_iommu");
- + if (!iommu->intr) {
- + printf("NOINTR\n");
- + iommu->intr = (void *)0xdeadbeef;
- + }
- + }
- + }
- +}
- +
- +void _dumppte(struct pte_entry *pte, int lvl, vaddr_t va)
- +{
- + char *pfx[] = { " ", " ", " ", " ", "" };
- + uint64_t i, sh;
- + struct pte_entry *npte;
- +
- + for (i = 0; i < 512; i++) {
- + sh = (i << (((lvl-1) * 9) + 12));
- + if (pte[i].val & PTE_P) {
- + if (lvl > 1) {
- + npte = (void *)PMAP_DIRECT_MAP((pte[i].val & PTE_PADDR_MASK));
- + printf("%slvl%d: %.16llx nxt:%llu\n", pfx[lvl], lvl, pte[i].val, (pte[i].val >> 9) & 7);
- + _dumppte(npte, lvl-1, va | sh);
- + }
- + else {
- + printf("%slvl%d: %.16llx <- %.16llx \n", pfx[lvl], lvl, pte[i].val, va | sh);
- + }
- + }
- + }
- +#if 0
- + uint64_t i;
- + struct pte_entry *np;
- +
- + // lvl 48 : 39-47 -> pte[512]
- + // lvl 39 : 30-38 -> pte[512]
- + // lvl 30 : 21-29 -> pte[512]
- + // lvl 21 : 12-20 -> page
- + for (i = 0; i < 512; i++) {
- + if (pte[i].val & PTE_P) {
- + if (lvl > 1) {
- + printf(" lvl%d: %.3lx:%.3lx:%.3lx:%.3lx %.16llx\n", lvl,
- + (va >> 39) & 0x1ff,
- + (va >> 30) & 0x1ff,
- + (va >> 21) & 0x1ff,
- + (va >> 12) & 0x1ff, pte[i].val);
- + np = (void *)PMAP_DIRECT_MAP((pte[i].val & PTE_PADDR_MASK));
- + _dumppte(np, lvl - 9, va | (i << (lvl-9)));
- + }
- + else {
- + printf(" %.16lx %.16llx\n", va, pte[i].val);
- + }
- + }
- + }
- +#endif
- +}
- +
- +void showpage(int sid, paddr_t paddr)
- +{
- + struct domain *dom;
- + static int show = 0;
- +
- + if (show > 10)
- + return;
- + show++;
- + dom = acpidmar_pci_attach(acpidmar_sc, 0, sid, 0);
- + if (!dom)
- + return;
- + printf("DTE: %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
- + hwdte[sid].dw0,
- + hwdte[sid].dw1,
- + hwdte[sid].dw2,
- + hwdte[sid].dw3,
- + hwdte[sid].dw4,
- + hwdte[sid].dw5,
- + hwdte[sid].dw6,
- + hwdte[sid].dw7);
- + _dumppte(dom->pte, 3, 0);
- +}
- +
- +/* Display AMD IOMMU Error */
- +void
- +ivhd_show_event(struct iommu_softc *iommu, struct ivhd_event *evt, int head)
- +{
- + int type, sid, did, flag;
- + uint64_t address;
- +
- + /* Get Device, Domain, Address and Type of event */
- + sid = __EXTRACT(evt->dw0, EVT_SID);
- + type = __EXTRACT(evt->dw1, EVT_TYPE);
- + did = __EXTRACT(evt->dw1, EVT_DID);
- + flag = __EXTRACT(evt->dw1, EVT_FLAG);
- + address = _get64(&evt->dw2);
- +
- + printf("=== IOMMU Error[%.4x]: ", head);
- + switch (type) {
- + case ILLEGAL_DEV_TABLE_ENTRY: // ok
- + printf("illegal dev table entry dev=%s addr=0x%.16llx %s, %s, %s, %s\n",
- + dmar_bdf(sid), address,
- + evt->dw1 & EVT_TR ? "translation" : "transaction",
- + evt->dw1 & EVT_RZ ? "reserved bit" : "invalid level",
- + evt->dw1 & EVT_RW ? "write" : "read",
- + evt->dw1 & EVT_I ? "interrupt" : "memory");
- + ivhd_showdte();
- + break;
- + case IO_PAGE_FAULT: // ok
- + printf("io page fault dev=%s did=0x%.4x addr=0x%.16llx\n%s, %s, %s, %s, %s, %s\n",
- + dmar_bdf(sid), did, address,
- + evt->dw1 & EVT_TR ? "translation" : "transaction",
- + evt->dw1 & EVT_RZ ? "reserved bit" : "invalid level",
- + evt->dw1 & EVT_PE ? "no perm" : "perm",
- + evt->dw1 & EVT_RW ? "write" : "read",
- + evt->dw1 & EVT_PR ? "present" : "not present",
- + evt->dw1 & EVT_I ? "interrupt" : "memory");
- + ivhd_showdte();
- + showpage(sid, address);
- + break;
- + case DEV_TAB_HARDWARE_ERROR: // ok
- + printf("device table hardware error dev=%s addr=0x%.16llx %s, %s, %s\n",
- + dmar_bdf(sid), address,
- + evt->dw1 & EVT_TR ? "translation" : "transaction",
- + evt->dw1 & EVT_RW ? "write" : "read",
- + evt->dw1 & EVT_I ? "interrupt" : "memory");
- + ivhd_showdte();
- + break;
- + case PAGE_TAB_HARDWARE_ERROR:
- + printf("page table hardware error dev=%s addr=0x%.16llx %s, %s, %s\n",
- + dmar_bdf(sid), address,
- + evt->dw1 & EVT_TR ? "translation" : "transaction",
- + evt->dw1 & EVT_RW ? "write" : "read",
- + evt->dw1 & EVT_I ? "interrupt" : "memory");
- + ivhd_showdte();
- + break;
- + case ILLEGAL_COMMAND_ERROR: // ok
- + printf("illegal command addr=0x%.16llx\n", address);
- + ivhd_showcmd(iommu);
- + break;
- + case COMMAND_HARDWARE_ERROR:
- + printf("command hardware error addr=0x%.16llx flag=0x%.4x\n",
- + address, flag);
- + ivhd_showcmd(iommu);
- + break;
- + case IOTLB_INV_TIMEOUT:
- + printf("iotlb invalidation timeout dev=%s address=0x%.16llx\n",
- + dmar_bdf(sid), address);
- + break;
- + case INVALID_DEVICE_REQUEST:
- + printf("invalid device request dev=%s addr=0x%.16llx flag=0x%.4x\n",
- + dmar_bdf(sid), address, flag);
- + break;
- + default:
- + printf("unknown type=0x%.2x\n", type);
- + break;
- + }
- + //ivhd_showdte();
- + /* Clear old event */
- + evt->dw0 = 0;
- + evt->dw1 = 0;
- + evt->dw2 = 0;
- + evt->dw3 = 0;
- +}
- +
- +/* AMD: Process IOMMU error from hardware */
- +int
- +ivhd_poll_events(struct iommu_softc *iommu)
- +{
- + uint32_t head, tail;
- + int sz;
- +
- + sz = sizeof(struct ivhd_event);
- + head = iommu_readl(iommu, EVT_HEAD_REG);
- + tail = iommu_readl(iommu, EVT_TAIL_REG);
- + if (head == tail) {
- + /* No pending events */
- + return (0);
- + }
- + ivhd_showevt(iommu);
- + while (head != tail) {
- + ivhd_show_event(iommu, iommu->evt_tbl + head, head);
- + head = (head + sz) % EVT_TBL_SIZE;
- + }
- + iommu_writel(iommu, EVT_HEAD_REG, head);
- + return (0);
- +}
- +
- +/* AMD: Issue command to IOMMU queue */
- +int
- +_ivhd_issue_command(struct iommu_softc *iommu, const struct ivhd_command *cmd)
- +{
- + u_long rf;
- + uint32_t head, tail, next;
- + int sz;
- +
- + head = iommu_readl(iommu, CMD_HEAD_REG);
- + sz = sizeof(*cmd);
- + rf = intr_disable();
- + tail = iommu_readl(iommu, CMD_TAIL_REG);
- + next = (tail + sz) % CMD_TBL_SIZE;
- + if (next == head) {
- + /* Queue is full */
- + intr_restore(rf);
- + return -EBUSY;
- + }
- + memcpy(iommu->cmd_tbl + tail, cmd, sz);
- + iommu_writel(iommu, CMD_TAIL_REG, next);
- + intr_restore(rf);
- + return (tail);
- +}
- +
- +int
- +ivhd_issue_command(struct iommu_softc *iommu, const struct ivhd_command *cmd, int wait)
- +{
- + struct ivhd_command wq = { 0 };
- + uint64_t wv __aligned(16) = 0;
- + paddr_t paddr;
- + int rc, i;
- +
- + rc = _ivhd_issue_command(iommu, cmd);
- + if (rc >= 0 && wait) {
- + /* Wait for previous commands to complete.
- + * Store address of completion variable to command */
- + pmap_extract(pmap_kernel(), (vaddr_t)&wv, &paddr);
- + wq.dw0 = (paddr & ~0x7) | 0x1;
- + wq.dw1 = (COMPLETION_WAIT << CMD_SHIFT) | ((paddr >> 32) & 0xFFFFF);
- + wq.dw2 = 0xDEADBEEF;
- + wq.dw3 = 0xFEEDC0DE;
- +
- + rc = _ivhd_issue_command(iommu, &wq);
- + /* wv will change to value in dw2/dw3 when command is complete */
- + for (i = 0; i < 1000 && !wv; i++) {
- + DELAY(1000);
- + }
- + if (i == 1000) {
- + printf("ivhd command timeout: %.8x %.8x %.8x %.8x wv:%llx idx:%x\n",
- + cmd->dw0, cmd->dw1, cmd->dw2, cmd->dw3, wv, rc);
- + ivhd_showcmd(iommu);
- + }
- + }
- + return rc;
- +
- +}
- +
- +/* AMD: Flush changes to Device Table Entry for a specific domain */
- +int ivhd_flush_devtab(struct iommu_softc *iommu, int did)
- +{
- + struct ivhd_command cmd = { .dw0 = did, .dw1 = INVALIDATE_DEVTAB_ENTRY << CMD_SHIFT };
- + return ivhd_issue_command(iommu, &cmd, 1);
- +}
- +
- +/* AMD: Invalidate all IOMMU device and page tables */
- +int ivhd_invalidate_iommu_all(struct iommu_softc *iommu)
- +{
- + struct ivhd_command cmd = { .dw1 = INVALIDATE_IOMMU_ALL << CMD_SHIFT };
- +#if 0
- + int i;
- +
- + for (i = 0; i < 65536; i++) {
- + if (iommu->dte[i].dw0) {
- + printf("dte%.4x: %.8lx %.8lx %.8lx %.8lx\n",
- + i, (unsigned long)iommu->dte[i].dw0,
- + (unsigned long)iommu->dte[i].dw1,
- + (unsigned long)iommu->dte[i].dw2,
- + (unsigned long)iommu->dte[i].dw3);
- + }
- + }
- +#endif
- + return ivhd_issue_command(iommu, &cmd, 0);
- +}
- +
- +/* AMD: Invalidate interrupt remapping */
- +int ivhd_invalidate_interrupt_table(struct iommu_softc *iommu, int did)
- +{
- + struct ivhd_command cmd = { .dw0 = did, .dw1 = INVALIDATE_INTERRUPT_TABLE << CMD_SHIFT };
- + return ivhd_issue_command(iommu, &cmd, 0);
- +}
- +
- +/* AMD: Invalidate all page tables in a domain */
- +int ivhd_invalidate_domain(struct iommu_softc *iommu, int did)
- +{
- + struct ivhd_command cmd = { .dw1 = did | (INVALIDATE_IOMMU_PAGES << CMD_SHIFT) };
- +
- + cmd.dw2 = 0xFFFFF000 | 0x3;
- + cmd.dw3 = 0x7FFFFFFF;
- + return ivhd_issue_command(iommu, &cmd, 1);
- +}
- +
- +/* AMD: Display Registers */
- +void ivhd_showit(struct iommu_softc *iommu)
- +{
- + printf("---- dt:%.16llx cmd:%.16llx evt:%.16llx ctl:%.16llx sts:%.16llx\n",
- + iommu_readq(iommu, DEV_TAB_BASE_REG),
- + iommu_readq(iommu, CMD_BASE_REG),
- + iommu_readq(iommu, EVT_BASE_REG),
- + iommu_readq(iommu, IOMMUCTL_REG),
- + iommu_readq(iommu, IOMMUSTS_REG));
- + printf("---- cmd queue:%.16llx %.16llx evt queue:%.16llx %.16llx\n",
- + iommu_readq(iommu, CMD_HEAD_REG),
- + iommu_readq(iommu, CMD_TAIL_REG),
- + iommu_readq(iommu, EVT_HEAD_REG),
- + iommu_readq(iommu, EVT_TAIL_REG));
- +}
- +
- +/* AMD: Generate Errors to test event handler */
- +void ivhd_checkerr(struct iommu_softc *iommu);
- +void ivhd_checkerr(struct iommu_softc *iommu)
- +{
- + struct ivhd_command cmd = { -1, -1, -1, -1 };
- +
- + /* Generate ILLEGAL DEV TAB entry? */
- + iommu->dte[0x2303].dw0 = -1; // invalid
- + iommu->dte[0x2303].dw2 = 0x1234; // domain
- + iommu->dte[0x2303].dw7 = -1; // reserved
- + ivhd_flush_devtab(iommu, 0x1234);
- + ivhd_poll_events(iommu);
- +
- + /* Generate ILLEGAL_COMMAND_ERROR : ok */
- + ivhd_issue_command(iommu, &cmd, 0);
- + ivhd_poll_events(iommu);
- +
- + /* Generate page hardware error */
- +}
- +
- +/* AMD: Show Device Table Entry */
- +void ivhd_showdte(void)
- +{
- + int i;
- +
- + for (i = 0; i < 65536; i++) {
- + if (hwdte[i].dw0) {
- + printf("%.2x:%.2x.%x: %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
- + i >> 8, (i >> 3) & 0x1F, i & 0x7,
- + hwdte[i].dw0, hwdte[i].dw1,
- + hwdte[i].dw2, hwdte[i].dw3,
- + hwdte[i].dw4, hwdte[i].dw5,
- + hwdte[i].dw6, hwdte[i].dw7);
- + }
- + }
- +}
- +
- +/* AMD: Show command entries */
- +void ivhd_showcmd(struct iommu_softc *iommu)
- +{
- + struct ivhd_command *ihd;
- + paddr_t phd;
- + int i;
- +
- + ihd = iommu->cmd_tbl;
- + phd = iommu_readq(iommu, CMD_BASE_REG) & CMD_BASE_MASK;
- + for (i = 0; i < 4096 / 128; i++) {
- + printf("%.2x: %.16llx %.8x %.8x %.8x %.8x\n", i,
- + (uint64_t)phd + i * sizeof(*ihd),
- + ihd[i].dw0,ihd[i].dw1,ihd[i].dw2,ihd[i].dw3);
- + }
- +}
- +
- +void ivhd_showevt(struct iommu_softc *iommu)
- +{
- +}
- +
- +#define _c(x) (int)((iommu->ecap >> x ##_SHIFT) & x ## _MASK)
- +
- +/* AMD: Initialize IOMMU */
- +int
- +ivhd_iommu_init(struct acpidmar_softc *sc, struct iommu_softc *iommu,
- + struct acpi_ivhd *ivhd)
- +{
- + static int niommu;
- + paddr_t paddr;
- + uint64_t ov;
- +
- + if (sc == NULL || iommu == NULL || ivhd == NULL) {
- + printf("Bad pointer to iommu_init!\n");
- + return -1;
- + }
- + if (_bus_space_map(sc->sc_memt, ivhd->address, 0x80000, 0, &iommu->ioh) != 0) {
- + printf("Bus Space Map fails\n");
- + return -1;
- + }
- + TAILQ_INIT(&iommu->domains);
- + TAILQ_INIT(&iommu->devices);
- +
- + /* Setup address width and number of domains */
- + iommu->id = ++niommu;
- + iommu->iot = sc->sc_memt;
- + iommu->mgaw = 48;
- + iommu->agaw = 48;
- + iommu->flags = 1;
- + iommu->segment = 0;
- + iommu->ndoms = 256;
- +
- + iommu->ecap = iommu_readq(iommu, EXTFEAT_REG);
- + printf("ecap = %.16llx\n", iommu->ecap);
- + printf("%s%s%s%s%s%s%s%s\n",
- + iommu->ecap & EFR_PREFSUP ? "pref " : "",
- + iommu->ecap & EFR_PPRSUP ? "ppr " : "",
- + iommu->ecap & EFR_NXSUP ? "nx " : "",
- + iommu->ecap & EFR_GTSUP ? "gt " : "",
- + iommu->ecap & EFR_IASUP ? "ia " : "",
- + iommu->ecap & EFR_GASUP ? "ga " : "",
- + iommu->ecap & EFR_HESUP ? "he " : "",
- + iommu->ecap & EFR_PCSUP ? "pc " : "");
- + printf("hats:%x gats:%x glxsup:%x smif:%x smifrc:%x gam:%x\n",
- + _c(EFR_HATS), _c(EFR_GATS), _c(EFR_GLXSUP), _c(EFR_SMIFSUP),
- + _c(EFR_SMIFRC), _c(EFR_GAMSUP));
- +
- + /* Turn off iommu */
- + ov = iommu_readq(iommu, IOMMUCTL_REG);
- + iommu_writeq(iommu, IOMMUCTL_REG, ov & ~(CTL_IOMMUEN | CTL_COHERENT |
- + CTL_HTTUNEN | CTL_RESPASSPW | CTL_PASSPW | CTL_ISOC));
- +
- + /* Setup command buffer with 4k buffer (128 entries) */
- + iommu->cmd_tbl = iommu_alloc_page(iommu, &paddr);
- + iommu_writeq(iommu, CMD_BASE_REG, (paddr & CMD_BASE_MASK) | CMD_TBL_LEN_4K);
- + iommu_writel(iommu, CMD_HEAD_REG, 0x00);
- + iommu_writel(iommu, CMD_TAIL_REG, 0x00);
- +
- + /* Setup event log with 4k buffer (128 entries) */
- + iommu->evt_tbl = iommu_alloc_page(iommu, &paddr);
- + iommu_writeq(iommu, EVT_BASE_REG, (paddr & EVT_BASE_MASK) | EVT_TBL_LEN_4K);
- + iommu_writel(iommu, EVT_HEAD_REG, 0x00);
- + iommu_writel(iommu, EVT_TAIL_REG, 0x00);
- +
- + /* Setup device table
- + * 1 entry per source ID (bus:device:function - 64k entries)
- + */
- + iommu->dte = hwdte;
- + pmap_extract(pmap_kernel(), (vaddr_t)iommu->dte, &paddr);
- + iommu_writeq(iommu, DEV_TAB_BASE_REG, (paddr & DEV_TAB_MASK) | DEV_TAB_LEN);
- +
- + /* Enable IOMMU */
- + ov |= (CTL_IOMMUEN | CTL_EVENTLOGEN | CTL_CMDBUFEN | CTL_EVENTINTEN);
- + if (ivhd->flags & IVHD_COHERENT)
- + ov |= CTL_COHERENT;
- + if (ivhd->flags & IVHD_HTTUNEN)
- + ov |= CTL_HTTUNEN;
- + if (ivhd->flags & IVHD_RESPASSPW)
- + ov |= CTL_RESPASSPW;
- + if (ivhd->flags & IVHD_PASSPW)
- + ov |= CTL_PASSPW;
- + if (ivhd->flags & IVHD_ISOC)
- + ov |= CTL_ISOC;
- + ov &= ~(CTL_INVTIMEOUT_MASK << CTL_INVTIMEOUT_SHIFT);
- + ov |= (CTL_INVTIMEOUT_1MS << CTL_INVTIMEOUT_SHIFT);
- + iommu_writeq(iommu, IOMMUCTL_REG, ov);
- +
- + ivhd_invalidate_iommu_all(iommu);
- + //ivhd_checkerr(iommu);
- +
- + TAILQ_INSERT_TAIL(&sc->sc_drhds, iommu, link);
- + return 0;
- +}
- +
- +void
- +iommu_ivhd_add(struct iommu_softc *iommu, int start, int end, int cfg)
- +{
- + struct ivhd_devlist *idev;
- +
- + idev = malloc(sizeof(*idev), M_DEVBUF, M_ZERO | M_WAITOK);
- + idev->start_id = start;
- + idev->end_id = end;
- + idev->cfg = cfg;
- +}
- +
- +int acpiivrs_iommu_match(struct pci_attach_args *pa)
- +{
- + int b,d,f;
- +
- + pci_decompose_tag(pa->pa_pc, pa->pa_tag, &b, &d, &f);
- + printf(" matchdev: %d.%d.%d\n", b, d, f);
- + if (PCI_CLASS(pa->pa_class) == PCI_CLASS_SYSTEM &&
- + PCI_SUBCLASS(pa->pa_class) == PCI_SUBCLASS_SYSTEM_IOMMU) {
- + printf("iziommu\n");
- + return (1);
- + }
- + return (0);
- +}
- +
- +/* Setup alias mapping, either 1:1 or a->b */
- +void
- +acpiivrs_mkalias(struct acpi_ivhd *ivhd, int off, uint16_t start, uint16_t alias, uint16_t step)
- +{
- + union acpi_ivhd_entry *ie = NULL;
- + int i;
- +
- + if (off+sizeof(ie->eor) >= ivhd->length)
- + return;
- + ie = (void *)ivhd + off;
- + if (ie->type != IVHD_EOR)
- + return;
- + printf("Set Alias: %.4x %.4x : %.4x/%x\n", start, ie->eor.devid, alias, step);
- + for (i = start; i < ie->eor.devid; i++) {
- + sid_alias[i] = alias | ALIAS_VALID;
- + alias += step;
- + }
- +}
- +
- +void
- +acpiivrs_ivhd(struct acpidmar_softc *sc, struct acpi_ivhd *ivhd)
- +{
- + struct iommu_softc *iommu;
- + struct acpi_ivhd_ext *ext;
- + union acpi_ivhd_entry *ie;
- + int off, dte, all_dte = 0;
- + int alias, start;
- +
- + if (ivhd->type == IVRS_IVHD_EXT) {
- + ext = (struct acpi_ivhd_ext *)ivhd;
- + printf("ivhd: %.2x %.2x %.4x %.4x:%s %.4x %.16llx %.4x %.8x %.16llx\n",
- + ext->type, ext->flags, ext->length,
- + ext->segment, dmar_bdf(ext->devid), ext->cap,
- + ext->address, ext->info,
- + ext->attrib, ext->efr);
- + if (ext->flags & IVHD_PPRSUP)
- + printf(" PPRSup");
- + if (ext->flags & IVHD_PREFSUP)
- + printf(" PreFSup");
- + if (ext->flags & IVHD_COHERENT)
- + printf(" Coherent");
- + if (ext->flags & IVHD_IOTLB)
- + printf(" Iotlb");
- + if (ext->flags & IVHD_ISOC)
- + printf(" ISoc");
- + if (ext->flags & IVHD_RESPASSPW)
- + printf(" ResPassPW");
- + if (ext->flags & IVHD_PASSPW)
- + printf(" PassPW");
- + if (ext->flags & IVHD_HTTUNEN)
- + printf( " HtTunEn");
- + if (ext->flags)
- + printf("\n");
- + off = sizeof(*ext);
- + iommu = malloc(sizeof(*iommu), M_DEVBUF, M_ZERO|M_WAITOK);
- + ivhd_iommu_init(sc, iommu, ivhd);
- + } else {
- + printf("ivhd: %.2x %.2x %.4x %.4x:%s %.4x %.16llx %.4x %.8x\n",
- + ivhd->type, ivhd->flags, ivhd->length,
- + ivhd->segment, dmar_bdf(ivhd->devid), ivhd->cap,
- + ivhd->address, ivhd->info,
- + ivhd->feature);
- + if (ivhd->flags & IVHD_PPRSUP)
- + printf(" PPRSup");
- + if (ivhd->flags & IVHD_PREFSUP)
- + printf(" PreFSup");
- + if (ivhd->flags & IVHD_COHERENT)
- + printf(" Coherent");
- + if (ivhd->flags & IVHD_IOTLB)
- + printf(" Iotlb");
- + if (ivhd->flags & IVHD_ISOC)
- + printf(" ISoc");
- + if (ivhd->flags & IVHD_RESPASSPW)
- + printf(" ResPassPW");
- + if (ivhd->flags & IVHD_PASSPW)
- + printf(" PassPW");
- + if (ivhd->flags & IVHD_HTTUNEN)
- + printf( " HtTunEn");
- + if (ivhd->flags)
- + printf("\n");
- + off = sizeof(*ivhd);
- + }
- + while (off < ivhd->length) {
- + ie = (void *)ivhd + off;
- + switch (ie->type) {
- + case IVHD_ALL:
- + all_dte = ie->all.data;
- + printf(" ALL %.4x\n", dte);
- + off += sizeof(ie->all);
- + break;
- + case IVHD_SEL:
- + dte = ie->sel.data;
- + printf(" SELECT: %s %.4x\n", dmar_bdf(ie->sel.devid), dte);
- + off += sizeof(ie->sel);
- + break;
- + case IVHD_SOR:
- + dte = ie->sor.data;
- + start = ie->sor.devid;
- + printf(" SOR: %s %.4x\n", dmar_bdf(start), dte);
- + off += sizeof(ie->sor);
- + /* Setup 1:1 alias mapping */
- + acpiivrs_mkalias(ivhd, off, start, start, 1);
- + break;
- + case IVHD_EOR:
- + printf(" EOR: %s\n", dmar_bdf(ie->eor.devid));
- + off += sizeof(ie->eor);
- + break;
- + case IVHD_ALIAS_SEL:
- + dte = ie->alias.data;
- + printf(" ALIAS: src=%s: ", dmar_bdf(ie->alias.srcid));
- + printf(" %s %.4x\n", dmar_bdf(ie->alias.devid), dte);
- + off += sizeof(ie->alias);
- + break;
- + case IVHD_ALIAS_SOR:
- + dte = ie->alias.data;
- + start = ie->alias.srcid;
- + alias = ie->alias.devid;
- + printf(" ALIAS_SOR: %s %.4x ", dmar_bdf(ie->alias.devid), dte);
- + printf(" src=%s\n", dmar_bdf(ie->alias.srcid));
- + off += sizeof(ie->alias);
- + /* Setup alias mapping */
- + acpiivrs_mkalias(ivhd, off, start, alias, 0);
- + break;
- + case IVHD_EXT_SEL:
- + dte = ie->ext.data;
- + printf(" EXT SEL: %s %.4x %.8x\n", dmar_bdf(ie->ext.devid),
- + dte, ie->ext.extdata);
- + off += sizeof(ie->ext);
- + break;
- + case IVHD_EXT_SOR:
- + dte = ie->ext.data;
- + printf(" EXT SOR: %s %.4x %.8x\n", dmar_bdf(ie->ext.devid),
- + dte, ie->ext.extdata);
- + off += sizeof(ie->ext);
- + break;
- + case IVHD_SPECIAL:
- + printf(" SPECIAL\n");
- + off += sizeof(ie->special);
- + break;
- + default:
- + printf(" 2:unknown %x\n", ie->type);
- + off = ivhd->length;
- + break;
- + }
- + }
- +}
- +
- +void
- +acpiivrs_init(struct acpidmar_softc *sc, struct acpi_ivrs *ivrs)
- +{
- + union acpi_ivrs_entry *ie;
- + int off;
- +
- + domain_map_page = domain_map_page_amd;
- + printf("IVRS Version: %d\n", ivrs->hdr.revision);
- + printf(" VA Size: %d\n", (ivrs->ivinfo >> IVRS_VASIZE_SHIFT) & IVRS_VASIZE_MASK);
- + printf(" PA Size: %d\n", (ivrs->ivinfo >> IVRS_PASIZE_SHIFT) & IVRS_PASIZE_MASK);
- +
- + TAILQ_INIT(&sc->sc_drhds);
- + TAILQ_INIT(&sc->sc_rmrrs);
- + TAILQ_INIT(&sc->sc_atsrs);
- +
- + printf("======== IVRS\n");
- + off = sizeof(*ivrs);
- + while (off < ivrs->hdr.length) {
- + ie = (void *)ivrs + off;
- + switch (ie->type) {
- + case IVRS_IVHD:
- + case IVRS_IVHD_EXT:
- + acpiivrs_ivhd(sc, &ie->ivhd);
- + break;
- + case IVRS_IVMD_ALL:
- + case IVRS_IVMD_SPECIFIED:
- + case IVRS_IVMD_RANGE:
- + printf("ivmd\n");
- + break;
- + default:
- + printf("1:unknown: %x\n", ie->type);
- + break;
- + }
- + off += ie->length;
- + }
- + printf("======== End IVRS\n");
- +}
- +
- +static int
- +acpiivhd_activate(struct iommu_softc *iommu, int act)
- +{
- + switch (act) {
- + case DVACT_SUSPEND:
- + iommu->flags |= IOMMU_FLAGS_SUSPEND;
- + break;
- + case DVACT_RESUME:
- + break;
- + }
- + return (0);
- +}
- +
- +int
- +acpidmar_activate(struct device *self, int act)
- +{
- + struct acpidmar_softc *sc = (struct acpidmar_softc *)self;
- + struct iommu_softc *iommu;
- +
- + printf("called acpidmar_activate %d %p\n", act, sc);
- +
- + if (sc == NULL) {
- + return (0);
- + }
- +
- + switch (act) {
- + case DVACT_RESUME:
- + TAILQ_FOREACH(iommu, &sc->sc_drhds, link) {
- + printf("iommu%d resume\n", iommu->id);
- + if (iommu->dte) {
- + acpiivhd_activate(iommu, act);
- + continue;
- + }
- + iommu_flush_write_buffer(iommu);
- + iommu_set_rtaddr(iommu, iommu->rtaddr);
- + iommu_writel(iommu, DMAR_FEDATA_REG, iommu->fedata);
- + iommu_writel(iommu, DMAR_FEADDR_REG, iommu->feaddr);
- + iommu_writel(iommu, DMAR_FEUADDR_REG,
- + iommu->feaddr >> 32);
- + if ((iommu->flags & (IOMMU_FLAGS_BAD|IOMMU_FLAGS_SUSPEND)) ==
- + IOMMU_FLAGS_SUSPEND) {
- + printf("enable wakeup translation\n");
- + iommu_enable_translation(iommu, 1);
- + }
- + iommu_showcfg(iommu, -1);
- + }
- + break;
- + case DVACT_SUSPEND:
- + TAILQ_FOREACH(iommu, &sc->sc_drhds, link) {
- + printf("iommu%d suspend\n", iommu->id);
- + if (iommu->flags & IOMMU_FLAGS_BAD)
- + continue;
- + if (iommu->dte) {
- + acpiivhd_activate(iommu, act);
- + continue;
- + }
- + iommu->flags |= IOMMU_FLAGS_SUSPEND;
- + iommu_enable_translation(iommu, 0);
- + iommu_showcfg(iommu, -1);
- + }
- + break;
- + }
- + return (0);
- +}
- +
- +void
- +acpidmar_sw(int act)
- +{
- + if (acpidmar_sc)
- + acpidmar_activate((void*)acpidmar_sc, act);
- +}
- +
- +int
- +acpidmar_match(struct device *parent, void *match, void *aux)
- +{
- + struct acpi_attach_args *aaa = aux;
- + struct acpi_table_header *hdr;
- +
- + /* If we do not have a table, it is not us */
- + if (aaa->aaa_table == NULL)
- + return (0);
- +
- + /* If it is an DMAR table, we can attach */
- + hdr = (struct acpi_table_header *)aaa->aaa_table;
- + if (memcmp(hdr->signature, DMAR_SIG, sizeof(DMAR_SIG) - 1) == 0)
- + return (1);
- + if (memcmp(hdr->signature, IVRS_SIG, sizeof(IVRS_SIG) - 1) == 0)
- + return (1);
- +
- + return (0);
- +}
- +
- +void
- +acpidmar_attach(struct device *parent, struct device *self, void *aux)
- +{
- + struct acpidmar_softc *sc = (void *)self;
- + struct acpi_attach_args *aaa = aux;
- + struct acpi_dmar *dmar = (struct acpi_dmar *)aaa->aaa_table;
- + struct acpi_ivrs *ivrs = (struct acpi_ivrs *)aaa->aaa_table;
- + struct acpi_table_header *hdr;
- +
- + hdr = (struct acpi_table_header *)aaa->aaa_table;
- + sc->sc_memt = aaa->aaa_memt;
- + if (memcmp(hdr->signature, DMAR_SIG, sizeof(DMAR_SIG) - 1) == 0) {
- + acpidmar_sc = sc;
- + acpidmar_init(sc, dmar);
- + }
- + if (memcmp(hdr->signature, IVRS_SIG, sizeof(IVRS_SIG) - 1) == 0) {
- + acpidmar_sc = sc;
- + acpiivrs_init(sc, ivrs);
- + }
- +}
- +
- +/* Interrupt shiz */
- +void acpidmar_msi_hwmask(struct pic *, int);
- +void acpidmar_msi_hwunmask(struct pic *, int);
- +void acpidmar_msi_addroute(struct pic *, struct cpu_info *, int, int, int);
- +void acpidmar_msi_delroute(struct pic *, struct cpu_info *, int, int, int);
- +
- +void
- +acpidmar_msi_hwmask(struct pic *pic, int pin)
- +{
- + struct iommu_pic *ip = (void *)pic;
- + struct iommu_softc *iommu = ip->iommu;
- +
- + printf("msi_hwmask\n");
- +
- + mtx_enter(&iommu->reg_lock);
- +
- + iommu_writel(iommu, DMAR_FECTL_REG, FECTL_IM);
- + iommu_readl(iommu, DMAR_FECTL_REG);
- +
- + mtx_leave(&iommu->reg_lock);
- +}
- +
- +void
- +acpidmar_msi_hwunmask(struct pic *pic, int pin)
- +{
- + struct iommu_pic *ip = (void *)pic;
- + struct iommu_softc *iommu = ip->iommu;
- +
- + printf("msi_hwunmask\n");
- +
- + mtx_enter(&iommu->reg_lock);
- +
- + iommu_writel(iommu, DMAR_FECTL_REG, 0);
- + iommu_readl(iommu, DMAR_FECTL_REG);
- +
- + mtx_leave(&iommu->reg_lock);
- +}
- +
- +void
- +acpidmar_msi_addroute(struct pic *pic, struct cpu_info *ci, int pin, int vec,
- + int type)
- +{
- + struct iommu_pic *ip = (void *)pic;
- + struct iommu_softc *iommu = ip->iommu;
- +
- + mtx_enter(&iommu->reg_lock);
- +
- + iommu->fedata = vec;
- + iommu->feaddr = 0xfee00000L | (ci->ci_apicid << 12);
- + iommu_writel(iommu, DMAR_FEDATA_REG, vec);
- + iommu_writel(iommu, DMAR_FEADDR_REG, iommu->feaddr);
- + iommu_writel(iommu, DMAR_FEUADDR_REG, iommu->feaddr >> 32);
- +
- + mtx_leave(&iommu->reg_lock);
- +}
- +
- +void
- +acpidmar_msi_delroute(struct pic *pic, struct cpu_info *ci, int pin, int vec,
- + int type)
- +{
- + printf("msi_delroute\n");
- +}
- +
- +void *
- +acpidmar_intr_establish(void *ctx, int level, int (*func)(void *),
- + void *arg, const char *what)
- +{
- + struct iommu_softc *iommu = ctx;
- + struct pic *pic;
- +
- + pic = &iommu->pic.pic;
- + iommu->pic.iommu = iommu;
- +
- + strlcpy(pic->pic_dev.dv_xname, "dmarpic",
- + sizeof(pic->pic_dev.dv_xname));
- + pic->pic_type = PIC_MSI;
- + pic->pic_hwmask = acpidmar_msi_hwmask;
- + pic->pic_hwunmask = acpidmar_msi_hwunmask;
- + pic->pic_addroute = acpidmar_msi_addroute;
- + pic->pic_delroute = acpidmar_msi_delroute;
- + pic->pic_edge_stubs = ioapic_edge_stubs;
- +#ifdef MULTIPROCESSOR
- + mtx_init(&pic->pic_mutex, level);
- +#endif
- +
- + return intr_establish(-1, pic, 0, IST_PULSE, level, NULL, func, arg, what);
- +}
- +
- +int
- +acpidmar_intr(void *ctx)
- +{
- + struct iommu_softc *iommu = ctx;
- + struct fault_entry fe;
- + static struct fault_entry ofe;
- + int fro, nfr, fri, i;
- + uint32_t sts;
- +
- + if (iommu->dte) {
- + ivhd_poll_events(iommu);
- + return 1;
- + }
- +
- + //splassert(IPL_HIGH);
- +
- + if (!(iommu->gcmd & GCMD_TE)) {
- + return (1);
- + }
- + mtx_enter(&iommu->reg_lock);
- + sts = iommu_readl(iommu, DMAR_FECTL_REG);
- + sts = iommu_readl(iommu, DMAR_FSTS_REG);
- +
- + if (!(sts & FSTS_PPF)) {
- + mtx_leave(&iommu->reg_lock);
- + return (1);
- + }
- +
- + nfr = cap_nfr(iommu->cap);
- + fro = cap_fro(iommu->cap);
- + fri = (sts >> FSTS_FRI_SHIFT) & FSTS_FRI_MASK;
- + for (i = 0; i < nfr; i++) {
- + fe.hi = iommu_readq(iommu, fro + (fri*16) + 8);
- + if (!(fe.hi & FRCD_HI_F))
- + break;
- +
- + fe.lo = iommu_readq(iommu, fro + (fri*16));
- + if (ofe.hi != fe.hi || ofe.lo != fe.lo) {
- + iommu_showfault(iommu, fri, &fe);
- + ofe.hi = fe.hi;
- + ofe.lo = fe.lo;
- + }
- + fri = (fri + 1) % nfr;
- + }
- +
- + iommu_writel(iommu, DMAR_FSTS_REG, FSTS_PFO | FSTS_PPF);
- +
- + mtx_leave(&iommu->reg_lock);
- +
- + return (1);
- +}
- +
- +const char *vtd_faults[] = {
- + "Software",
- + "Root Entry Not Present", /* ok (rtaddr + 4096) */
- + "Context Entry Not Present", /* ok (no CTX_P) */
- + "Context Entry Invalid", /* ok (tt = 3) */
- + "Address Beyond MGAW",
- + "Write", /* ok */
- + "Read", /* ok */
- + "Paging Entry Invalid", /* ok */
- + "Root Table Invalid",
- + "Context Table Invalid",
- + "Root Entry Reserved", /* ok (root.lo |= 0x4) */
- + "Context Entry Reserved",
- + "Paging Entry Reserved",
- + "Context Entry TT",
- + "Reserved",
- +};
- +
- +void iommu_showpte(uint64_t, int, uint64_t);
- +
- +void
- +iommu_showpte(uint64_t ptep, int lvl, uint64_t base)
- +{
- + uint64_t nb, pb, i;
- + struct pte_entry *pte;
- +
- + pte = (void *)PMAP_DIRECT_MAP(ptep);
- + for (i = 0; i < 512; i++) {
- + if (!(pte[i].val & PTE_P))
- + continue;
- + nb = base + (i << lvl);
- + pb = pte[i].val & ~VTD_PAGE_MASK;
- + if(lvl == VTD_LEVEL0) {
- + printf(" %3llx %.16llx = %.16llx %c%c %s\n",
- + i, nb, pb,
- + pte[i].val == PTE_R ? 'r' : ' ',
- + pte[i].val & PTE_W ? 'w' : ' ',
- + (nb == pb) ? " ident" : "");
- + if (nb == pb)
- + return;
- + } else {
- + iommu_showpte(pb, lvl - VTD_STRIDE_SIZE, nb);
- + }
- + }
- +}
- +
- +void
- +iommu_showcfg(struct iommu_softc *iommu, int sid)
- +{
- + int i, j, sts, cmd;
- + struct context_entry *ctx;
- + pcitag_t tag;
- + pcireg_t clc;
- +
- + cmd = iommu_readl(iommu, DMAR_GCMD_REG);
- + sts = iommu_readl(iommu, DMAR_GSTS_REG);
- + printf("iommu%d: flags:%d root pa:%.16llx %s %s %s %.8x %.8x\n",
- + iommu->id, iommu->flags, iommu_readq(iommu, DMAR_RTADDR_REG),
- + sts & GSTS_TES ? "enabled" : "disabled",
- + sts & GSTS_QIES ? "qi" : "ccmd",
- + sts & GSTS_IRES ? "ir" : "",
- + cmd, sts);
- + for (i = 0; i < 256; i++) {
- + if (!root_entry_is_valid(&iommu->root[i])) {
- + continue;
- + }
- + for (j = 0; j < 256; j++) {
- + ctx = iommu->ctx[i] + j;
- + if (!context_entry_is_valid(ctx)) {
- + continue;
- + }
- + tag = pci_make_tag(NULL, i, (j >> 3), j & 0x7);
- + clc = pci_conf_read(NULL, tag, 0x08) >> 8;
- + printf(" %.2x:%.2x.%x lvl:%d did:%.4x tt:%d ptep:%.16llx flag:%x cc:%.6x\n",
- + i, (j >> 3), j & 7,
- + context_address_width(ctx),
- + context_domain_id(ctx),
- + context_translation_type(ctx),
- + context_pte(ctx),
- + context_user(ctx),
- + clc);
- +#if 0
- + /* dump pagetables */
- + iommu_showpte(ctx->lo & ~VTD_PAGE_MASK, iommu->agaw -
- + VTD_STRIDE_SIZE, 0);
- +#endif
- + }
- + }
- +}
- +
- +void
- +iommu_showfault(struct iommu_softc *iommu, int fri, struct fault_entry *fe)
- +{
- + int bus, dev, fun, type, fr, df;
- + bios_memmap_t *im;
- + const char *mapped;
- +
- + if (!(fe->hi & FRCD_HI_F))
- + return;
- + type = (fe->hi & FRCD_HI_T) ? 'r' : 'w';
- + fr = (fe->hi >> FRCD_HI_FR_SHIFT) & FRCD_HI_FR_MASK;
- + bus = (fe->hi >> FRCD_HI_BUS_SHIFT) & FRCD_HI_BUS_MASK;
- + dev = (fe->hi >> FRCD_HI_DEV_SHIFT) & FRCD_HI_DEV_MASK;
- + fun = (fe->hi >> FRCD_HI_FUN_SHIFT) & FRCD_HI_FUN_MASK;
- + df = (fe->hi >> FRCD_HI_FUN_SHIFT) & 0xFF;
- + iommu_showcfg(iommu, mksid(bus,dev,fun));
- + if (!iommu->ctx[bus]) {
- + /* Bus is not initialized */
- + mapped = "nobus";
- + } else if (!context_entry_is_valid(&iommu->ctx[bus][df])) {
- + /* DevFn not initialized */
- + mapped = "nodevfn";
- + } else if (context_user(&iommu->ctx[bus][df]) != 0xA) {
- + /* no bus_space_map */
- + mapped = "nomap";
- + } else {
- + /* bus_space_map */
- + mapped = "mapped";
- + }
- + printf("fri%d: dmar: %.2x:%.2x.%x %s error at %llx fr:%d [%s] iommu:%d [%s]\n",
- + fri, bus, dev, fun,
- + type == 'r' ? "read" : "write",
- + fe->lo,
- + fr, fr <= 13 ? vtd_faults[fr] : "unknown",
- + iommu->id,
- + mapped);
- + for (im = bios_memmap; im->type != BIOS_MAP_END; im++) {
- + if ((im->type == BIOS_MAP_RES) &&
- + (im->addr <= fe->lo) &&
- + (fe->lo <= im->addr+im->size)) {
- + printf("mem in e820.reserved\n");
- + }
- + }
- +#ifdef DDB
- + if (acpidmar_ddb)
- + db_enter();
- +#endif
- +}
- +
- +
- diff --git a/sys/dev/acpi/acpidmar.h b/sys/dev/acpi/acpidmar.h
- new file mode 100644
- index 000000000..2a0b74b10
- --- /dev/null
- +++ b/sys/dev/acpi/acpidmar.h
- @@ -0,0 +1,536 @@
- +/*
- + * Copyright (c) 2015 Jordan Hargrave <[email protected]>
- + *
- + * Permission to use, copy, modify, and distribute this software for any
- + * purpose with or without fee is hereby granted, provided that the above
- + * copyright notice and this permission notice appear in all copies.
- + *
- + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- + */
- +
- +#ifndef _DEV_ACPI_DMARREG_H_
- +#define _DEV_ACPI_DMARREG_H_
- +
- +#define VTD_STRIDE_MASK 0x1FF
- +#define VTD_STRIDE_SIZE 9
- +#define VTD_PAGE_SIZE 4096
- +#define VTD_PAGE_MASK 0xFFF
- +#define VTD_PTE_MASK 0x0000FFFFFFFFF000LL
- +
- +#define VTD_LEVEL0 12
- +#define VTD_LEVEL1 21
- +#define VTD_LEVEL2 30 /* Minimum level supported */
- +#define VTD_LEVEL3 39 /* Also supported */
- +#define VTD_LEVEL4 48
- +#define VTD_LEVEL5 57
- +
- +#define _xbit(x,y) (((x)>> (y)) & 1)
- +#define _xfld(x,y) (uint32_t)(((x)>> y##_SHIFT) & y##_MASK)
- +
- +#define VTD_AWTOLEVEL(x) (((x) - 30) / VTD_STRIDE_SIZE)
- +#define VTD_LEVELTOAW(x) (((x) * VTD_STRIDE_SIZE) + 30)
- +
- +#define DMAR_VER_REG 0x00 /* 32:Arch version supported by this IOMMU */
- +#define DMAR_RTADDR_REG 0x20 /* 64:Root entry table */
- +#define DMAR_FEDATA_REG 0x3c /* 32:Fault event interrupt data register */
- +#define DMAR_FEADDR_REG 0x40 /* 32:Fault event interrupt addr register */
- +#define DMAR_FEUADDR_REG 0x44 /* 32:Upper address register */
- +#define DMAR_AFLOG_REG 0x58 /* 64:Advanced Fault control */
- +#define DMAR_PMEN_REG 0x64 /* 32:Enable Protected Memory Region */
- +#define DMAR_PLMBASE_REG 0x68 /* 32:PMRR Low addr */
- +#define DMAR_PLMLIMIT_REG 0x6c /* 32:PMRR low limit */
- +#define DMAR_PHMBASE_REG 0x70 /* 64:pmrr high base addr */
- +#define DMAR_PHMLIMIT_REG 0x78 /* 64:pmrr high limit */
- +#define DMAR_ICS_REG 0x9C /* 32:Invalidation complete status register */
- +#define DMAR_IECTL_REG 0xa0 /* 32:Invalidation event control register */
- +#define DMAR_IEDATA_REG 0xa4 /* 32:Invalidation event data register */
- +#define DMAR_IEADDR_REG 0xa8 /* 32:Invalidation event address register */
- +#define DMAR_IEUADDR_REG 0xac /* 32:Invalidation event upper address register */
- +#define DMAR_IRTA_REG 0xb8 /* 64:Interrupt remapping table addr register */
- +#define DMAR_CAP_REG 0x08 /* 64:Hardware supported capabilities */
- +#define CAP_PI (1LL << 59)
- +#define CAP_FL1GP (1LL << 56)
- +#define CAP_DRD (1LL << 55)
- +#define CAP_DWD (1LL << 54)
- +#define CAP_MAMV_MASK 0x3F
- +#define CAP_MAMV_SHIFT 48LL
- +#define cap_mamv(x) _xfld(x,CAP_MAMV)
- +#define CAP_NFR_MASK 0xFF
- +#define CAP_NFR_SHIFT 40LL
- +#define cap_nfr(x) (_xfld(x,CAP_NFR) + 1)
- +#define CAP_PSI (1LL << 39)
- +#define CAP_SLLPS_MASK 0xF
- +#define CAP_SLLPS_SHIFT 34LL
- +#define cap_sllps(x) _xfld(x,CAP_SLLPS)
- +#define CAP_FRO_MASK 0x3FF
- +#define CAP_FRO_SHIFT 24LL
- +#define cap_fro(x) (_xfld(x,CAP_FRO) * 16)
- +#define CAP_ZLR (1LL << 22)
- +#define CAP_MGAW_MASK 0x3F
- +#define CAP_MGAW_SHIFT 16LL
- +#define cap_mgaw(x) (_xfld(x,CAP_MGAW) + 1)
- +#define CAP_SAGAW_MASK 0x1F
- +#define CAP_SAGAW_SHIFT 8LL
- +#define cap_sagaw(x) _xfld(x,CAP_SAGAW)
- +#define CAP_CM (1LL << 7)
- +#define CAP_PHMR (1LL << 6)
- +#define CAP_PLMR (1LL << 5)
- +#define CAP_RWBF (1LL << 4)
- +#define CAP_AFL (1LL << 3)
- +#define CAP_ND_MASK 0x7
- +#define CAP_ND_SHIFT 0x00
- +#define cap_nd(x) (16 << (((x) & CAP_ND_MASK) << 1))
- +
- +#define DMAR_ECAP_REG 0x10 /* 64:Extended capabilities supported */
- +#define ECAP_PSS_MASK 0x1F
- +#define ECAP_PSS_SHIFT 35
- +#define ECAP_EAFS (1LL << 34)
- +#define ECAP_NWFS (1LL << 33)
- +#define ECAP_SRS (1LL << 31)
- +#define ECAP_ERS (1LL << 30)
- +#define ECAP_PRS (1LL << 29)
- +#define ECAP_PASID (1LL << 28)
- +#define ECAP_DIS (1LL << 27)
- +#define ECAP_NEST (1LL << 26)
- +#define ECAP_MTS (1LL << 25)
- +#define ECAP_ECS (1LL << 24)
- +#define ECAP_MHMV_MASK 0xF
- +#define ECAP_MHMV_SHIFT 0x20
- +#define ecap_mhmv(x) _xfld(x,ECAP_MHMV)
- +#define ECAP_IRO_MASK 0x3FF /* IOTLB Register */
- +#define ECAP_IRO_SHIFT 0x8
- +#define ecap_iro(x) (_xfld(x,ECAP_IRO) * 16)
- +#define ECAP_SC (1LL << 7) /* Snoop Control */
- +#define ECAP_PT (1LL << 6) /* HW Passthru */
- +#define ECAP_EIM (1LL << 4)
- +#define ECAP_IR (1LL << 3) /* Interrupt remap */
- +#define ECAP_DT (1LL << 2) /* Device IOTLB */
- +#define ECAP_QI (1LL << 1) /* Queued Invalidation */
- +#define ECAP_C (1LL << 0) /* Coherent cache */
- +
- +#define DMAR_GCMD_REG 0x18 /* 32:Global command register */
- +#define GCMD_TE (1LL << 31)
- +#define GCMD_SRTP (1LL << 30)
- +#define GCMD_SFL (1LL << 29)
- +#define GCMD_EAFL (1LL << 28)
- +#define GCMD_WBF (1LL << 27)
- +#define GCMD_QIE (1LL << 26)
- +#define GCMD_IRE (1LL << 25)
- +#define GCMD_SIRTP (1LL << 24)
- +#define GCMD_CFI (1LL << 23)
- +
- +#define DMAR_GSTS_REG 0x1c /* 32:Global status register */
- +#define GSTS_TES (1LL << 31)
- +#define GSTS_RTPS (1LL << 30)
- +#define GSTS_FLS (1LL << 29)
- +#define GSTS_AFLS (1LL << 28)
- +#define GSTS_WBFS (1LL << 27)
- +#define GSTS_QIES (1LL << 26)
- +#define GSTS_IRES (1LL << 25)
- +#define GSTS_IRTPS (1LL << 24)
- +#define GSTS_CFIS (1LL << 23)
- +
- +#define DMAR_CCMD_REG 0x28 /* 64:Context command reg */
- +#define CCMD_ICC (1LL << 63)
- +#define CCMD_CIRG_MASK 0x3
- +#define CCMD_CIRG_SHIFT 61
- +#define CCMD_CIRG(x) ((uint64_t)(x) << CCMD_CIRG_SHIFT)
- +#define CCMD_CAIG_MASK 0x3
- +#define CCMD_CAIG_SHIFT 59
- +#define CCMD_FM_MASK 0x3
- +#define CCMD_FM_SHIFT 32
- +#define CCMD_FM(x) (((uint64_t)(x) << CCMD_FM_SHIFT))
- +#define CCMD_SID_MASK 0xFFFF
- +#define CCMD_SID_SHIFT 8
- +#define CCMD_SID(x) (((x) << CCMD_SID_SHIFT))
- +#define CCMD_DID_MASK 0xFFFF
- +#define CCMD_DID_SHIFT 0
- +#define CCMD_DID(x) (((x) << CCMD_DID_SHIFT))
- +
- +#define CIG_GLOBAL CCMD_CIRG(CTX_GLOBAL)
- +#define CIG_DOMAIN CCMD_CIRG(CTX_DOMAIN)
- +#define CIG_DEVICE CCMD_CIRG(CTX_DEVICE)
- +
- +
- +#define DMAR_FSTS_REG 0x34 /* 32:Fault Status register */
- +#define FSTS_FRI_MASK 0xFF
- +#define FSTS_FRI_SHIFT 8
- +#define FSTS_PRO (1LL << 7)
- +#define FSTS_ITE (1LL << 6)
- +#define FSTS_ICE (1LL << 5)
- +#define FSTS_IQE (1LL << 4)
- +#define FSTS_APF (1LL << 3)
- +#define FSTS_APO (1LL << 2)
- +#define FSTS_PPF (1LL << 1)
- +#define FSTS_PFO (1LL << 0)
- +
- +#define DMAR_FECTL_REG 0x38 /* 32:Fault control register */
- +#define FECTL_IM (1LL << 31)
- +#define FECTL_IP (1LL << 30)
- +
- +#define FRCD_HI_F (1LL << (127-64))
- +#define FRCD_HI_T (1LL << (126-64))
- +#define FRCD_HI_AT_MASK 0x3
- +#define FRCD_HI_AT_SHIFT (124-64)
- +#define FRCD_HI_PV_MASK 0xFFFFF
- +#define FRCD_HI_PV_SHIFT (104-64)
- +#define FRCD_HI_FR_MASK 0xFF
- +#define FRCD_HI_FR_SHIFT (96-64)
- +#define FRCD_HI_PP (1LL << (95-64))
- +
- +#define FRCD_HI_SID_MASK 0xFF
- +#define FRCD_HI_SID_SHIFT 0
- +#define FRCD_HI_BUS_SHIFT 8
- +#define FRCD_HI_BUS_MASK 0xFF
- +#define FRCD_HI_DEV_SHIFT 3
- +#define FRCD_HI_DEV_MASK 0x1F
- +#define FRCD_HI_FUN_SHIFT 0
- +#define FRCD_HI_FUN_MASK 0x7
- +
- +#define DMAR_IOTLB_REG(x) (ecap_iro((x)->ecap) + 8)
- +#define DMAR_IVA_REG(x) (ecap_iro((x)->ecap) + 0)
- +
- +#define DMAR_FRIH_REG(x,i) (cap_fro((x)->cap) + 16*(i) + 8)
- +#define DMAR_FRIL_REG(x,i) (cap_fro((x)->cap) + 16*(i) + 0)
- +
- +#define IOTLB_IVT (1LL << 63)
- +#define IOTLB_IIRG_MASK 0x3
- +#define IOTLB_IIRG_SHIFT 60
- +#define IOTLB_IIRG(x) ((uint64_t)(x) << IOTLB_IIRG_SHIFT)
- +#define IOTLB_IAIG_MASK 0x3
- +#define IOTLB_IAIG_SHIFT 57
- +#define IOTLB_DR (1LL << 49)
- +#define IOTLB_DW (1LL << 48)
- +#define IOTLB_DID_MASK 0xFFFF
- +#define IOTLB_DID_SHIFT 32
- +#define IOTLB_DID(x) ((uint64_t)(x) << IOTLB_DID_SHIFT)
- +
- +#define IIG_GLOBAL IOTLB_IIRG(IOTLB_GLOBAL)
- +#define IIG_DOMAIN IOTLB_IIRG(IOTLB_DOMAIN)
- +#define IIG_PAGE IOTLB_IIRG(IOTLB_PAGE)
- +
- +#define DMAR_IQH_REG 0x80 /* 64:Invalidation queue head register */
- +#define DMAR_IQT_REG 0x88 /* 64:Invalidation queue tail register */
- +#define DMAR_IQA_REG 0x90 /* 64:Invalidation queue addr register */
- +#define IQA_QS_256 0 /* 256 entries */
- +#define IQA_QS_512 1 /* 512 */
- +#define IQA_QS_1K 2 /* 1024 */
- +#define IQA_QS_2K 3 /* 2048 */
- +#define IQA_QS_4K 4 /* 4096 */
- +#define IQA_QS_8K 5 /* 8192 */
- +#define IQA_QS_16K 6 /* 16384 */
- +#define IQA_QS_32K 7 /* 32768 */
- +
- +/* Read-Modify-Write helpers */
- +static inline void iommu_rmw32(void *ov, uint32_t mask, uint32_t shift, uint32_t nv)
- +{
- + *(uint32_t *)ov &= ~(mask << shift);
- + *(uint32_t *)ov |= (nv & mask) << shift;
- +}
- +static inline void iommu_rmw64(void *ov, uint32_t mask, uint32_t shift, uint64_t nv)
- +{
- + *(uint64_t *)ov &= ~(mask << shift);
- + *(uint64_t *)ov |= (nv & mask) << shift;
- +}
- +
- +/*
- + * Root Entry: one per bus (256 x 128 bit = 4k)
- + * 0 = Present
- + * 1:11 = Reserved
- + * 12:HAW-1 = Context Table Pointer
- + * HAW:63 = Reserved
- + * 64:127 = Reserved
- + */
- +#define ROOT_P (1L << 0)
- +struct root_entry {
- + uint64_t lo;
- + uint64_t hi;
- +};
- +
- +/* Check if root entry is valid */
- +static inline bool
- +root_entry_is_valid(struct root_entry *re)
- +{
- + return (re->lo & ROOT_P);
- +}
- +
- +/*
- + * Context Entry: one per devfn (256 x 128 bit = 4k)
- + * 0 = Present
- + * 1 = Fault Processing Disable
- + * 2:3 = Translation Type
- + * 4:11 = Reserved
- + * 12:63 = Second Level Page Translation
- + * 64:66 = Address Width (# PTE levels)
- + * 67:70 = Ignore
- + * 71 = Reserved
- + * 72:87 = Domain ID
- + * 88:127 = Reserved
- + */
- +#define CTX_P (1L << 0)
- +#define CTX_FPD (1L << 1)
- +#define CTX_T_MASK 0x3
- +#define CTX_T_SHIFT 2
- +enum {
- + CTX_T_MULTI,
- + CTX_T_IOTLB,
- + CTX_T_PASSTHRU
- +};
- +
- +#define CTX_H_AW_MASK 0x7
- +#define CTX_H_AW_SHIFT 0
- +#define CTX_H_USER_MASK 0xF
- +#define CTX_H_USER_SHIFT 3
- +#define CTX_H_DID_MASK 0xFFFF
- +#define CTX_H_DID_SHIFT 8
- +
- +struct context_entry {
- + uint64_t lo;
- + uint64_t hi;
- +};
- +
- +/* Set fault processing enable/disable */
- +static inline void
- +context_set_fpd(struct context_entry *ce, int enable)
- +{
- + ce->lo &= ~CTX_FPD;
- + if (enable)
- + ce->lo |= CTX_FPD;
- +}
- +
- +/* Set context entry present */
- +static inline void
- +context_set_present(struct context_entry *ce)
- +{
- + ce->lo |= CTX_P;
- +}
- +
- +/* Set Second Level Page Table Entry PA */
- +static inline void
- +context_set_slpte(struct context_entry *ce, paddr_t slpte)
- +{
- + ce->lo &= VTD_PAGE_MASK;
- + ce->lo |= (slpte & ~VTD_PAGE_MASK);
- +}
- +
- +/* Set translation type */
- +static inline void
- +context_set_translation_type(struct context_entry *ce, int tt)
- +{
- + ce->lo &= ~(CTX_T_MASK << CTX_T_SHIFT);
- + ce->lo |= ((tt & CTX_T_MASK) << CTX_T_SHIFT);
- +}
- +
- +/* Set Address Width (# of Page Table levels) */
- +static inline void
- +context_set_address_width(struct context_entry *ce, int lvl)
- +{
- + ce->hi &= ~(CTX_H_AW_MASK << CTX_H_AW_SHIFT);
- + ce->hi |= ((lvl & CTX_H_AW_MASK) << CTX_H_AW_SHIFT);
- +}
- +
- +/* Set domain ID */
- +static inline void
- +context_set_domain_id(struct context_entry *ce, int did)
- +{
- + ce->hi &= ~(CTX_H_DID_MASK << CTX_H_DID_SHIFT);
- + ce->hi |= ((did & CTX_H_DID_MASK) << CTX_H_DID_SHIFT);
- +}
- +
- +/* Get Second Level Page Table PA */
- +static inline uint64_t
- +context_pte(struct context_entry *ce)
- +{
- + return (ce->lo & ~VTD_PAGE_MASK);
- +}
- +
- +/* Get translation type */
- +static inline int
- +context_translation_type(struct context_entry *ce)
- +{
- + return (ce->lo >> CTX_T_SHIFT) & CTX_T_MASK;
- +}
- +
- +/* Get domain ID */
- +static inline int
- +context_domain_id(struct context_entry *ce)
- +{
- + return (ce->hi >> CTX_H_DID_SHIFT) & CTX_H_DID_MASK;
- +}
- +
- +/* Get Address Width */
- +static inline int
- +context_address_width(struct context_entry *ce)
- +{
- + return VTD_LEVELTOAW((ce->hi >> CTX_H_AW_SHIFT) & CTX_H_AW_MASK);
- +}
- +
- +/* Check if context entry is valid */
- +static inline bool
- +context_entry_is_valid(struct context_entry *ce)
- +{
- + return (ce->lo & CTX_P);
- +}
- +
- +/* User-available bits in context entry */
- +static inline int
- +context_user(struct context_entry *ce)
- +{
- + return (ce->hi >> CTX_H_USER_SHIFT) & CTX_H_USER_MASK;
- +}
- +
- +static inline void
- +context_set_user(struct context_entry *ce, int v)
- +{
- + ce->hi &= ~(CTX_H_USER_MASK << CTX_H_USER_SHIFT);
- + ce->hi |= ((v & CTX_H_USER_MASK) << CTX_H_USER_SHIFT);
- +}
- +
- +/*
- + * Fault entry
- + * 0..HAW-1 = Fault address
- + * HAW:63 = Reserved
- + * 64:71 = Source ID
- + * 96:103 = Fault Reason
- + * 104:123 = PV
- + * 124:125 = Address Translation type
- + * 126 = Type (0 = Read, 1 = Write)
- + * 127 = Fault bit
- + */
- +struct fault_entry
- +{
- + uint64_t lo;
- + uint64_t hi;
- +};
- +
- +/* PTE Entry: 512 x 64-bit = 4k */
- +#define PTE_P (1L << 0)
- +#define PTE_R 0x00
- +#define PTE_W (1L << 1)
- +#define PTE_US (1L << 2)
- +#define PTE_PWT (1L << 3)
- +#define PTE_PCD (1L << 4)
- +#define PTE_A (1L << 5)
- +#define PTE_D (1L << 6)
- +#define PTE_PAT (1L << 7)
- +#define PTE_G (1L << 8)
- +#define PTE_EA (1L << 10)
- +#define PTE_XD (1LL << 63)
- +
- +/* PDE Level entry */
- +#define PTE_PS (1L << 7)
- +
- +/* PDPE Level entry */
- +
- +/* ----------------------------------------------------------------
- + * 5555555444444444333333333222222222111111111000000000------------
- + * [PML4 ->] PDPE.1GB
- + * [PML4 ->] PDPE.PDE -> PDE.2MB
- + * [PML4 ->] PDPE.PDE -> PDE -> PTE
- + * GAW0 = (12.20) (PTE)
- + * GAW1 = (21.29) (PDE)
- + * GAW2 = (30.38) (PDPE)
- + * GAW3 = (39.47) (PML4)
- + * GAW4 = (48.57) (n/a)
- + * GAW5 = (58.63) (n/a)
- + */
- +struct pte_entry {
- + uint64_t val;
- +};
- +
- +/*
- + * Queued Invalidation entry
- + * 0:3 = 01h
- + * 4:5 = Granularity
- + * 6:15 = Reserved
- + * 16:31 = Domain ID
- + * 32:47 = Source ID
- + * 48:49 = FM
- + */
- +
- +/* Invalidate Context Entry */
- +#define QI_CTX_DID_MASK 0xFFFF
- +#define QI_CTX_DID_SHIFT 16
- +#define QI_CTX_SID_MASK 0xFFFF
- +#define QI_CTX_SID_SHIFT 32
- +#define QI_CTX_FM_MASK 0x3
- +#define QI_CTX_FM_SHIFT 48
- +#define QI_CTX_IG_MASK 0x3
- +#define QI_CTX_IG_SHIFT 4
- +#define QI_CTX_DID(x) (((uint64_t)(x) << QI_CTX_DID_SHIFT))
- +#define QI_CTX_SID(x) (((uint64_t)(x) << QI_CTX_SID_SHIFT))
- +#define QI_CTX_FM(x) (((uint64_t)(x) << QI_CTX_FM_SHIFT))
- +
- +#define QI_CTX_IG_GLOBAL (CTX_GLOBAL << QI_CTX_IG_SHIFT)
- +#define QI_CTX_IG_DOMAIN (CTX_DOMAIN << QI_CTX_IG_SHIFT)
- +#define QI_CTX_IG_DEVICE (CTX_DEVICE << QI_CTX_IG_SHIFT)
- +
- +/* Invalidate IOTLB Entry */
- +#define QI_IOTLB_DID_MASK 0xFFFF
- +#define QI_IOTLB_DID_SHIFT 16
- +#define QI_IOTLB_IG_MASK 0x3
- +#define QI_IOTLB_IG_SHIFT 4
- +#define QI_IOTLB_DR (1LL << 6)
- +#define QI_IOTLB_DW (1LL << 5)
- +#define QI_IOTLB_DID(x) (((uint64_t)(x) << QI_IOTLB_DID_SHIFT))
- +
- +#define QI_IOTLB_IG_GLOBAL (1 << QI_IOTLB_IG_SHIFT)
- +#define QI_IOTLB_IG_DOMAIN (2 << QI_IOTLB_IG_SHIFT)
- +#define QI_IOTLB_IG_PAGE (3 << QI_IOTLB_IG_SHIFT)
- +
- +/* QI Commands */
- +#define QI_CTX 0x1
- +#define QI_IOTLB 0x2
- +#define QI_DEVTLB 0x3
- +#define QI_INTR 0x4
- +#define QI_WAIT 0x5
- +#define QI_EXTTLB 0x6
- +#define QI_PAS 0x7
- +#define QI_EXTDEV 0x8
- +
- +struct qi_entry {
- + uint64_t lo;
- + uint64_t hi;
- +};
- +
- +enum {
- + CTX_GLOBAL = 1,
- + CTX_DOMAIN,
- + CTX_DEVICE,
- +
- + IOTLB_GLOBAL = 1,
- + IOTLB_DOMAIN,
- + IOTLB_PAGE,
- +};
- +
- +enum {
- + VTD_FAULT_ROOT_P = 0x1, /* P field in root entry is 0 */
- + VTD_FAULT_CTX_P = 0x2, /* P field in context entry is 0 */
- + VTD_FAULT_CTX_INVAL = 0x3, /* context AW/TT/SLPPTR invalid */
- + VTD_FAULT_LIMIT = 0x4, /* Address is outside of MGAW */
- + VTD_FAULT_WRITE = 0x5, /* Address-translation fault, non-writable */
- + VTD_FAULT_READ = 0x6, /* Address-translation fault, non-readable */
- + VTD_FAULT_PTE_INVAL = 0x7, /* page table hw access error */
- + VTD_FAULT_ROOT_INVAL = 0x8, /* root table hw access error */
- + VTD_FAULT_CTX_TBL_INVAL = 0x9, /* context entry hw access error */
- + VTD_FAULT_ROOT_RESERVED = 0xa, /* non-zero reserved field in root entry */
- + VTD_FAULT_CTX_RESERVED = 0xb, /* non-zero reserved field in context entry */
- + VTD_FAULT_PTE_RESERVED = 0xc, /* non-zero reserved field in paging entry */
- + VTD_FAULT_CTX_TT = 0xd, /* invalid translation type */
- +};
- +
- +#endif
- +
- +void acpidmar_pci_hook(pci_chipset_tag_t, struct pci_attach_args *);
- +void dmar_ptmap(bus_dma_tag_t, bus_addr_t);
- +void acpidmar_sw(int);
- +
- +extern struct acpidmar_softc *acpidmar_sc;
- +
- +#define __EXTRACT(v,m) (((v) >> m##_SHIFT) & m##_MASK)
- diff --git a/sys/dev/acpi/acpireg.h b/sys/dev/acpi/acpireg.h
- index bfbb73ce2..8ba55c8ee 100644
- --- a/sys/dev/acpi/acpireg.h
- +++ b/sys/dev/acpi/acpireg.h
- @@ -623,6 +623,9 @@ struct acpi_ivmd {
- struct acpi_ivhd {
- uint8_t type;
- uint8_t flags;
- +#define IVHD_PPRSUP (1L << 7)
- +#define IVHD_PREFSUP (1L << 6)
- +#define IVHD_COHERENT (1L << 5)
- #define IVHD_IOTLB (1L << 4)
- #define IVHD_ISOC (1L << 3)
- #define IVHD_RESPASSPW (1L << 2)
- @@ -638,13 +641,28 @@ struct acpi_ivhd {
- #define IVHD_UNITID_MASK 0x1F
- #define IVHD_MSINUM_SHIFT 0
- #define IVHD_MSINUM_MASK 0x1F
- - uint32_t reserved;
- + uint32_t feature;
- } __packed;
- +struct acpi_ivhd_ext {
- + uint8_t type;
- + uint8_t flags;
- + uint16_t length;
- + uint16_t devid;
- + uint16_t cap;
- + uint64_t address;
- + uint16_t segment;
- + uint16_t info;
- + uint32_t attrib;
- + uint64_t efr;
- + uint8_t reserved[8];
- +} __packd;
- +
- union acpi_ivrs_entry {
- struct {
- uint8_t type;
- #define IVRS_IVHD 0x10
- +#define IVRS_IVHD_EXT 0x11
- #define IVRS_IVMD_ALL 0x20
- #define IVRS_IVMD_SPECIFIED 0x21
- #define IVRS_IVMD_RANGE 0x22
- @@ -652,6 +670,7 @@ union acpi_ivrs_entry {
- uint16_t length;
- } __packed;
- struct acpi_ivhd ivhd;
- + struct acpi_ivhd_ext ivhd_ext;
- struct acpi_ivmd ivmd;
- } __packed;
- diff --git a/sys/dev/acpi/amd_iommu.h b/sys/dev/acpi/amd_iommu.h
- new file mode 100644
- index 000000000..db6d371aa
- --- /dev/null
- +++ b/sys/dev/acpi/amd_iommu.h
- @@ -0,0 +1,358 @@
- +/*
- + * Copyright (c) 2019 Jordan Hargrave <[email protected]>
- + *
- + * Permission to use, copy, modify, and distribute this software for any
- + * purpose with or without fee is hereby granted, provided that the above
- + * copyright notice and this permission notice appear in all copies.
- + *
- + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- + */
- +#ifndef __amd_iommu_h__
- +#define __amd_iommu_h__
- +
- +#define DEV_TAB_BASE_REG 0x0000
- +#define CMD_BASE_REG 0x0008
- +#define EVT_BASE_REG 0x0010
- +
- +#define EXCL_BASE_REG 0x0020
- +#define EXCL_LIMIT_REG 0x0028
- +
- +/* Extended Feature Register */
- +#define EXTFEAT_REG 0x0030
- +#define EFR_PREFSUP (1L << 0)
- +#define EFR_PPRSUP (1L << 1)
- +#define EFR_NXSUP (1L << 3)
- +#define EFR_GTSUP (1L << 4)
- +#define EFR_IASUP (1L << 6)
- +#define EFR_GASUP (1L << 7)
- +#define EFR_HESUP (1L << 8)
- +#define EFR_PCSUP (1L << 9)
- +#define EFR_HATS_SHIFT 10
- +#define EFR_HATS_MASK 0x3
- +#define EFR_GATS_SHIFT 12
- +#define EFR_GATS_MASK 0x3
- +#define EFR_GLXSUP_SHIFT 14
- +#define EFR_GLXSUP_MASK 0x3
- +#define EFR_SMIFSUP_SHIFT 16
- +#define EFR_SMIFSUP_MASK 0x3
- +#define EFR_SMIFRC_SHIFT 18
- +#define EFR_SMIFRC_MASK 0x7
- +#define EFR_GAMSUP_SHIFT 21
- +#define EFR_GAMSUP_MASK 0x7
- +
- +#define CMD_HEAD_REG 0x2000
- +#define CMD_TAIL_REG 0x2008
- +#define EVT_HEAD_REG 0x2010
- +#define EVT_TAIL_REG 0x2018
- +
- +#define IOMMUSTS_REG 0x2020
- +
- +#define DEV_TAB_MASK 0x000FFFFFFFFFF000LL
- +#define DEV_TAB_LEN 0x1FF
- +
- +/* IOMMU Control */
- +#define IOMMUCTL_REG 0x0018
- +#define CTL_IOMMUEN (1L << 0)
- +#define CTL_HTTUNEN (1L << 1)
- +#define CTL_EVENTLOGEN (1L << 2)
- +#define CTL_EVENTINTEN (1L << 3)
- +#define CTL_COMWAITINTEN (1L << 4)
- +#define CTL_INVTIMEOUT_SHIFT 5
- +#define CTL_INVTIMEOUT_MASK 0x7
- +#define CTL_INVTIMEOUT_NONE 0
- +#define CTL_INVTIMEOUT_1MS 1
- +#define CTL_INVTIMEOUT_10MS 2
- +#define CTL_INVTIMEOUT_100MS 3
- +#define CTL_INVTIMEOUT_1S 4
- +#define CTL_INVTIMEOUT_10S 5
- +#define CTL_INVTIMEOUT_100S 6
- +#define CTL_PASSPW (1L << 8)
- +#define CTL_RESPASSPW (1L << 9)
- +#define CTL_COHERENT (1L << 10)
- +#define CTL_ISOC (1L << 11)
- +#define CTL_CMDBUFEN (1L << 12)
- +#define CTL_PPRLOGEN (1L << 13)
- +#define CTL_PPRINTEN (1L << 14)
- +#define CTL_PPREN (1L << 15)
- +#define CTL_GTEN (1L << 16)
- +#define CTL_GAEN (1L << 17)
- +#define CTL_CRW_SHIFT 18
- +#define CTL_CRW_MASK 0xF
- +#define CTL_SMIFEN (1L << 22)
- +#define CTL_SLFWBDIS (1L << 23)
- +#define CTL_SMIFLOGEN (1L << 24)
- +#define CTL_GAMEN_SHIFT 25
- +#define CTL_GAMEN_MASK 0x7
- +#define CTL_GALOGEN (1L << 28)
- +#define CTL_GAINTEN (1L << 29)
- +#define CTL_DUALPPRLOGEN_SHIFT 30
- +#define CTL_DUALPPRLOGEN_MASK 0x3
- +#define CTL_DUALEVTLOGEN_SHIFT 32
- +#define CTL_DUALEVTLOGEN_MASK 0x3
- +#define CTL_DEVTBLSEGEN_SHIFT 34
- +#define CTL_DEVTBLSEGEN_MASK 0x7
- +#define CTL_PRIVABRTEN_SHIFT 37
- +#define CTL_PRIVABRTEN_MASK 0x3
- +#define CTL_PPRAUTORSPEN (1LL << 39)
- +#define CTL_MARCEN (1LL << 40)
- +#define CTL_BLKSTOPMRKEN (1LL << 41)
- +#define CTL_PPRAUTOSPAON (1LL << 42)
- +#define CTL_DOMAINIDPNE (1LL << 43)
- +
- +#define CMD_BASE_MASK 0x000FFFFFFFFFF000LL
- +#define CMD_TBL_SIZE 4096
- +#define CMD_TBL_LEN_4K (8LL << 56)
- +#define CMD_TBL_LEN_8K (9lL << 56)
- +
- +#define EVT_BASE_MASK 0x000FFFFFFFFFF000LL
- +#define EVT_TBL_SIZE 4096
- +#define EVT_TBL_LEN_4K (8LL << 56)
- +#define EVT_TBL_LEN_8K (9LL << 56)
- +
- +/*========================
- + * DEVICE TABLE ENTRY
- + * Contains mapping of bus-device-function
- + *
- + * 0 Valid (V)
- + * 1 Translation Valid (TV)
- + * 7:8 Host Address Dirty (HAD)
- + * 9:11 Page Table Depth (usually 4)
- + * 12:51 Page Table Physical Address
- + * 52 PPR Enable
- + * 53 GPRP
- + * 54 Guest I/O Protection Valid (GIoV)
- + * 55 Guest Translation Valid (GV)
- + * 56:57 Guest Levels translated (GLX)
- + * 58:60 Guest CR3 bits 12:14 (GCR3TRP)
- + * 61 I/O Read Permission (IR)
- + * 62 I/O Write Permission (IW)
- + * 64:79 Domain ID
- + * 80:95 Guest CR3 bits 15:30 (GCR3TRP)
- + * 96 IOTLB Enable (I)
- + * 97 Suppress multiple I/O page faults (I)
- + * 98 Supress all I/O page faults (SA)
- + * 99:100 Port I/O Control (IoCTL)
- + * 101 Cache IOTLB Hint
- + * 102 Snoop Disable (SD)
- + * 103 Allow Exclusion (EX)
- + * 104:105 System Management Message (SysMgt)
- + * 107:127 Guest CR3 bits 31:51 (GCR3TRP)
- + * 128 Interrupt Map Valid (IV)
- + * 129:132 Interrupt Table Length (IntTabLen)
- + *========================*/
- +struct ivhd_dte {
- + uint32_t dw0;
- + uint32_t dw1;
- + uint32_t dw2;
- + uint32_t dw3;
- + uint32_t dw4;
- + uint32_t dw5;
- + uint32_t dw6;
- + uint32_t dw7;
- +} __packed;
- +
- +#define DTE_V (1L << 0) // dw0
- +#define DTE_TV (1L << 1) // dw0
- +#define DTE_LEVEL_SHIFT 9 // dw0
- +#define DTE_LEVEL_MASK 0x7 // dw0
- +#define DTE_HPTRP_MASK 0x000FFFFFFFFFF000LL // dw0,1
- +
- +#define DTE_PPR (1L << 20) // dw1
- +#define DTE_GPRP (1L << 21) // dw1
- +#define DTE_GIOV (1L << 22) // dw1
- +#define DTE_GV (1L << 23) // dw1
- +#define DTE_IR (1L << 29) // dw1
- +#define DTE_IW (1L << 30) // dw1
- +
- +#define DTE_DID_MASK 0xFFFF // dw2
- +
- +#define DTE_IV (1L << 0) // dw3
- +#define DTE_SE (1L << 1)
- +#define DTE_SA (1L << 2)
- +#define DTE_INTTABLEN_SHIFT 1
- +#define DTE_INTTABLEN_MASK 0xF
- +#define DTE_IRTP_MASK 0x000FFFFFFFFFFFC0LL
- +
- +#define PTE_LVL5 48
- +#define PTE_LVL4 39
- +#define PTE_LVL3 30
- +#define PTE_LVL2 21
- +#define PTE_LVL1 12
- +
- +#define PTE_NXTLVL(x) (((x) & 0x7) << 9)
- +#define PTE_PADDR_MASK 0x000FFFFFFFFFF000LL
- +#define PTE_IR (1LL << 61)
- +#define PTE_IW (1LL << 62)
- +
- +#define DTE_GCR312_MASK 0x3
- +#define DTE_GCR312_SHIFT 24
- +
- +#define DTE_GCR315_MASK 0xFFFF
- +#define DTE_GCR315_SHIFT 16
- +
- +#define DTE_GCR331_MASK 0xFFFFF
- +#define DTE_GCR331_SHIFT 12
- +
- +#define _get64(x) *(uint64_t *)(x)
- +#define _put64(x,v) *(uint64_t *)(x) = (v)
- +
- +/* Set Guest CR3 address */
- +static inline void
- +dte_set_guest_cr3(struct ivhd_dte *dte, paddr_t paddr)
- +{
- + iommu_rmw32(&dte->dw1, DTE_GCR312_MASK, DTE_GCR312_SHIFT, paddr >> 12);
- + iommu_rmw32(&dte->dw2, DTE_GCR315_MASK, DTE_GCR315_SHIFT, paddr >> 15);
- + iommu_rmw32(&dte->dw3, DTE_GCR331_MASK, DTE_GCR331_SHIFT, paddr >> 31);
- +}
- +
- +/* Set Interrupt Remapping Root Pointer */
- +static inline void
- +dte_set_interrupt_table_root_ptr(struct ivhd_dte *dte, paddr_t paddr)
- +{
- + uint64_t ov = _get64(&dte->dw4);
- + _put64(&dte->dw4, (ov & ~DTE_IRTP_MASK) | (paddr & DTE_IRTP_MASK));
- +}
- +
- +/* Set Interrupt Remapping Table length */
- +static inline void
- +dte_set_interrupt_table_length(struct ivhd_dte *dte, int nEnt)
- +{
- + iommu_rmw32(&dte->dw4, DTE_INTTABLEN_MASK, DTE_INTTABLEN_SHIFT, nEnt);
- +}
- +
- +/* Set Interrupt Remapping Valid */
- +static inline void
- +dte_set_interrupt_valid(struct ivhd_dte *dte)
- +{
- + dte->dw4 |= DTE_IV;
- +}
- +
- +/* Set Domain ID in Device Table Entry */
- +static inline void
- +dte_set_domain(struct ivhd_dte *dte, uint16_t did)
- +{
- + dte->dw2 = (dte->dw2 & ~DTE_DID_MASK) | (did & DTE_DID_MASK);
- +}
- +
- +/* Set Page Table Pointer for device */
- +static inline void
- +dte_set_host_page_table_root_ptr(struct ivhd_dte *dte, paddr_t paddr)
- +{
- + uint64_t ov;
- +
- + ov = _get64(&dte->dw0) & ~DTE_HPTRP_MASK;
- + ov |= (paddr & DTE_HPTRP_MASK) | PTE_IW | PTE_IR;
- +
- + _put64(&dte->dw0, ov);
- +}
- +
- +/* Set Page Table Levels Mask */
- +static inline void
- +dte_set_mode(struct ivhd_dte *dte, int mode)
- +{
- + iommu_rmw32(&dte->dw0, DTE_LEVEL_MASK, DTE_LEVEL_SHIFT, mode);
- +}
- +
- +static inline void
- +dte_set_tv(struct ivhd_dte *dte)
- +{
- + dte->dw0 |= DTE_TV;
- +}
- +
- +/* Set Device Table Entry valid.
- + * Domain/Level/Mode/PageTable should already be set
- + */
- +static inline void
- +dte_set_valid(struct ivhd_dte *dte)
- +{
- + dte->dw0 |= DTE_V;
- +}
- +
- +/* Check if Device Table Entry is valid */
- +static inline int
- +dte_is_valid(struct ivhd_dte *dte)
- +{
- + return (dte->dw0 & DTE_V);
- +}
- +
- +/*=========================================
- + * COMMAND
- + *=========================================*/
- +struct ivhd_command {
- + uint32_t dw0;
- + uint32_t dw1;
- + uint32_t dw2;
- + uint32_t dw3;
- +} __packed;
- +
- +#define CMD_SHIFT 28
- +
- +enum {
- + COMPLETION_WAIT = 0x01,
- + INVALIDATE_DEVTAB_ENTRY = 0x02,
- + INVALIDATE_IOMMU_PAGES = 0x03,
- + INVALIDATE_IOTLB_PAGES = 0x04,
- + INVALIDATE_INTERRUPT_TABLE = 0x05,
- + PREFETCH_IOMMU_PAGES = 0x06,
- + COMPLETE_PPR_REQUEST = 0x07,
- + INVALIDATE_IOMMU_ALL = 0x08,
- +};
- +
- +/*=========================================
- + * EVENT
- + *=========================================*/
- +struct ivhd_event {
- + uint32_t dw0;
- + uint32_t dw1;
- + uint32_t dw2; // address.lo
- + uint32_t dw3; // address.hi
- +} __packed;
- +
- +#define EVT_TYPE_SHIFT 28 // dw1.0xF0000000
- +#define EVT_TYPE_MASK 0xF
- +#define EVT_SID_SHIFT 0 // dw0.0x0000FFFF
- +#define EVT_SID_MASK 0xFFFF
- +#define EVT_DID_SHIFT 0
- +#define EVT_DID_MASK 0xFFFF // dw1.0x0000FFFF
- +#define EVT_FLAG_SHIFT 16
- +#define EVT_FLAG_MASK 0xFFF // dw1.0x0FFF0000
- +
- +/* IOMMU Fault reasons */
- +enum {
- + ILLEGAL_DEV_TABLE_ENTRY = 0x1,
- + IO_PAGE_FAULT = 0x2,
- + DEV_TAB_HARDWARE_ERROR = 0x3,
- + PAGE_TAB_HARDWARE_ERROR = 0x4,
- + ILLEGAL_COMMAND_ERROR = 0x5,
- + COMMAND_HARDWARE_ERROR = 0x6,
- + IOTLB_INV_TIMEOUT = 0x7,
- + INVALID_DEVICE_REQUEST = 0x8,
- +};
- +
- +#define EVT_GN (1L << 16)
- +#define EVT_NX (1L << 17)
- +#define EVT_US (1L << 18)
- +#define EVT_I (1L << 19)
- +#define EVT_PR (1L << 20)
- +#define EVT_RW (1L << 21)
- +#define EVT_PE (1L << 22)
- +#define EVT_RZ (1L << 23)
- +#define EVT_TR (1L << 24)
- +
- +struct iommu_softc;
- +
- +int ivhd_flush_devtab(struct iommu_softc *, int);
- +int ivhd_invalidate_iommu_all(struct iommu_softc *);
- +int ivhd_invalidate_interrupt_table(struct iommu_softc *, int);
- +int ivhd_issue_command(struct iommu_softc *, const struct ivhd_command *, int);
- +int ivhd_invalidate_domain(struct iommu_softc *, int);
- +
- +void _dumppte(struct pte_entry *, int, vaddr_t);
- +
- +#endif
- diff --git a/sys/dev/acpi/files.acpi b/sys/dev/acpi/files.acpi
- index e57c39938..1cf6f2fbb 100644
- --- a/sys/dev/acpi/files.acpi
- +++ b/sys/dev/acpi/files.acpi
- @@ -70,6 +70,11 @@ device acpiprt
- attach acpiprt at acpi
- file dev/acpi/acpiprt.c acpiprt needs-flag
- +# DMAR device
- +device acpidmar
- +attach acpidmar at acpi
- +file dev/acpi/acpidmar.c acpidmar
- +
- # Docking station
- device acpidock
- attach acpidock at acpi
- diff --git a/sys/dev/pci/pci.c b/sys/dev/pci/pci.c
- index bf75f875e..546927971 100644
- --- a/sys/dev/pci/pci.c
- +++ b/sys/dev/pci/pci.c
- @@ -1211,6 +1211,8 @@ pciioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
- if (pci_vga_pci == NULL)
- return EINVAL;
- break;
- + case PCIOCUNBIND:
- + break;
- default:
- return ENOTTY;
- }
- @@ -1234,6 +1236,25 @@ pciioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
- tag = pci_make_tag(pc, sel->pc_bus, sel->pc_dev, sel->pc_func);
- switch (cmd) {
- + case PCIOCUNBIND:
- + {
- + struct pci_dev *pd, *pdt;
- + uint32_t val;
- + int i;
- +
- + LIST_FOREACH_SAFE(pd, &pci->sc_devs, pd_next, pdt) {
- + if (tag == pd->pd_tag) {
- + for (i = PCI_MAPREG_START; i <= PCI_MAPREG_END; i += 4) {
- + int n = (i - PCI_MAPREG_START) / 4;
- + val = pci_conf_read(NULL, tag, i);
- + printf(" bar%d: %x %x\n", n, val, pd->pd_mask[n]);
- + }
- + config_detach(pd->pd_dev, 0);
- + LIST_REMOVE(pd, pd_next);
- + }
- + }
- + }
- + break;
- case PCIOCREAD:
- io = (struct pci_io *)data;
- switch (io->pi_width) {
- diff --git a/sys/sys/pciio.h b/sys/sys/pciio.h
- index 394dd083d..2237f6784 100644
- --- a/sys/sys/pciio.h
- +++ b/sys/sys/pciio.h
- @@ -83,4 +83,6 @@ struct pci_vga {
- #define PCIOCREADMASK _IOWR('p', 8, struct pci_io)
- #define PCIOCGETVPD _IOWR('p', 9, struct pci_vpd_req)
- +#define PCIOCUNBIND _IOWR('p', 9, struct pcisel)
- +
- #endif /* !_SYS_PCIIO_H_ */
- diff --git a/usr.sbin/vmctl/main.c b/usr.sbin/vmctl/main.c
- index 249eaa3de..d2ca5a64e 100644
- --- a/usr.sbin/vmctl/main.c
- +++ b/usr.sbin/vmctl/main.c
- @@ -83,7 +83,7 @@ struct ctl_command ctl_commands[] = {
- { "show", CMD_STATUS, ctl_status, "[id]" },
- { "start", CMD_START, ctl_start,
- "[-cL] [-B device] [-b path] [-d disk] [-i count]\n"
- - "\t\t[-m size] [-n switch] [-r path] [-t name] id | name" },
- + "\t\t[-m size] [-n switch] [-r path] [-t name] [-p bus:dev:func] id | name" },
- { "status", CMD_STATUS, ctl_status, "[id]" },
- { "stop", CMD_STOP, ctl_stop, "[-fw] [id | -a]" },
- { "unpause", CMD_UNPAUSE, ctl_unpause, "id" },
- @@ -224,7 +224,8 @@ vmmaction(struct parse_result *res)
- case CMD_START:
- ret = vm_start(res->id, res->name, res->size, res->nifs,
- res->nets, res->ndisks, res->disks, res->disktypes,
- - res->path, res->isopath, res->instance, res->bootdevice);
- + res->path, res->isopath, res->instance, res->bootdevice,
- + res->npcis, res->pcis);
- if (ret) {
- errno = ret;
- err(1, "start VM operation failed");
- @@ -480,6 +481,32 @@ parse_disktype(const char *s, const char **ret)
- return (VMDF_RAW);
- }
- +int
- +parse_pcis(struct parse_result *res, char *pcipath)
- +{
- + uint32_t *pcis;
- + uint32_t bus, dev, func;
- +
- + if (res->npcis >= VMM_MAX_PCI_PTHRU) {
- + warn("too many pci devices");
- + return -1;
- + }
- + if (sscanf(pcipath, "%d:%d:%d", &bus, &dev, &func) != 3) {
- + warn("pci format b:d:f");
- + return -1;
- + }
- + if ((pcis = reallocarray(res->pcis, res->npcis + 1,
- + sizeof(uint32_t *))) == NULL) {
- + warn("reallocarray");
- + return -1;
- + }
- + pcis[res->npcis] = (bus << 8) | ((dev & 0x1f) << 3) | (func & 0x7);
- + res->pcis = pcis;
- + res->npcis++;
- +
- + return (0);
- +}
- +
- int
- parse_disk(struct parse_result *res, char *word, int type)
- {
- @@ -835,7 +862,7 @@ ctl_start(struct parse_result *res, int argc, char *argv[])
- char path[PATH_MAX];
- const char *s;
- - while ((ch = getopt(argc, argv, "b:B:cd:i:Lm:n:r:t:")) != -1) {
- + while ((ch = getopt(argc, argv, "b:B:cd:i:Lm:n:r:t:p:")) != -1) {
- switch (ch) {
- case 'b':
- if (res->path)
- @@ -899,6 +926,10 @@ ctl_start(struct parse_result *res, int argc, char *argv[])
- if (parse_instance(res, optarg) == -1)
- errx(1, "invalid name: %s", optarg);
- break;
- + case 'p':
- + if (parse_pcis(res, optarg) == -1)
- + errx(1, "invalid pci entry: %s", optarg);
- + break;
- default:
- ctl_usage(res->ctl);
- /* NOTREACHED */
- diff --git a/usr.sbin/vmctl/vmctl.8 b/usr.sbin/vmctl/vmctl.8
- index 6a583d5b2..c77a866ff 100644
- --- a/usr.sbin/vmctl/vmctl.8
- +++ b/usr.sbin/vmctl/vmctl.8
- @@ -155,6 +155,7 @@ command.
- .Op Fl n Ar switch
- .Op Fl r Ar path
- .Op Fl t Ar name
- +.Op Fl p Ar bus:dev:func
- .Ar id | name
- .Ek
- .Xc
- @@ -237,6 +238,10 @@ as a template to create a new VM instance.
- The instance will inherit settings from the parent VM,
- except for exclusive options such as disk, interface lladdr, and
- interface names.
- +.It Fl p Ar bus:dev:func
- +Add Passthrough PCI device at host PCI address
- +.Ar bus:dev:func
- +(decimal) to the guest. Up to 4 devices are allowed.
- .El
- .It Cm status Op Ar id
- List VMs running on the host, optionally listing just the selected VM
- diff --git a/usr.sbin/vmctl/vmctl.c b/usr.sbin/vmctl/vmctl.c
- index dcded0760..4de6dd6f9 100644
- --- a/usr.sbin/vmctl/vmctl.c
- +++ b/usr.sbin/vmctl/vmctl.c
- @@ -73,7 +73,8 @@ unsigned int info_flags;
- int
- vm_start(uint32_t start_id, const char *name, int memsize, int nnics,
- char **nics, int ndisks, char **disks, int *disktypes, char *kernel,
- - char *iso, char *instance, unsigned int bootdevice)
- + char *iso, char *instance, unsigned int bootdevice,
- + int npcis, uint32_t *pcis)
- {
- struct vmop_create_params *vmc;
- struct vm_create_params *vcp;
- @@ -128,6 +129,7 @@ vm_start(uint32_t start_id, const char *name, int memsize, int nnics,
- vcp->vcp_ncpus = 1;
- vcp->vcp_ndisks = ndisks;
- vcp->vcp_nnics = nnics;
- + vcp->vcp_npcis = npcis;
- vcp->vcp_id = start_id;
- for (i = 0 ; i < ndisks; i++) {
- @@ -153,6 +155,9 @@ vm_start(uint32_t start_id, const char *name, int memsize, int nnics,
- errx(1, "interface name too long");
- }
- }
- + for (i = 0; i < npcis; i++)
- + vcp->vcp_pcis[i] = pcis[i];
- +
- if (name != NULL) {
- /*
- * Allow VMs names with alphanumeric characters, dot, hyphen
- diff --git a/usr.sbin/vmctl/vmctl.h b/usr.sbin/vmctl/vmctl.h
- index beb65eae6..aa9cbcba7 100644
- --- a/usr.sbin/vmctl/vmctl.h
- +++ b/usr.sbin/vmctl/vmctl.h
- @@ -55,6 +55,8 @@ struct parse_result {
- size_t ndisks;
- char **disks;
- int *disktypes;
- + int npcis;
- + uint32_t *pcis;
- int verbose;
- char *instance;
- unsigned int flags;
- @@ -80,6 +82,7 @@ int parse_network(struct parse_result *, char *);
- int parse_size(struct parse_result *, char *);
- int parse_disktype(const char *, const char **);
- int parse_disk(struct parse_result *, char *, int);
- +int parse_pcis(struct parse_result *, char *);
- int parse_vmid(struct parse_result *, char *, int);
- int parse_instance(struct parse_result *, char *);
- void parse_free(struct parse_result *);
- @@ -94,7 +97,8 @@ int create_imagefile(int, const char *, const char *, long, const char **);
- int create_raw_imagefile(const char *, long);
- int create_qc2_imagefile(const char *, const char *, long);
- int vm_start(uint32_t, const char *, int, int, char **, int,
- - char **, int *, char *, char *, char *, unsigned int);
- + char **, int *, char *, char *, char *, unsigned int,
- + int, uint32_t *);
- int vm_start_complete(struct imsg *, int *, int);
- void terminate_vm(uint32_t, const char *, unsigned int);
- int terminate_vm_complete(struct imsg *, int *, unsigned int);
- diff --git a/usr.sbin/vmd/Makefile b/usr.sbin/vmd/Makefile
- index 8645df7ae..c819599d2 100644
- --- a/usr.sbin/vmd/Makefile
- +++ b/usr.sbin/vmd/Makefile
- @@ -4,7 +4,7 @@
- PROG= vmd
- SRCS= vmd.c control.c log.c priv.c proc.c config.c vmm.c
- -SRCS+= vm.c loadfile_elf.c pci.c virtio.c i8259.c mc146818.c
- +SRCS+= vm.c loadfile_elf.c pci.c virtio.c i8259.c mc146818.c x86emu.c
- SRCS+= ns8250.c i8253.c vmboot.c ufs.c disklabel.c dhcp.c packet.c
- SRCS+= parse.y atomicio.c vioscsi.c vioraw.c vioqcow2.c fw_cfg.c
- diff --git a/usr.sbin/vmd/pci.c b/usr.sbin/vmd/pci.c
- index 954235eb6..c930ea5a1 100644
- --- a/usr.sbin/vmd/pci.c
- +++ b/usr.sbin/vmd/pci.c
- @@ -25,42 +25,305 @@
- #include <string.h>
- #include <unistd.h>
- +#include <stdlib.h>
- #include "vmd.h"
- #include "pci.h"
- #include "vmm.h"
- #include "i8259.h"
- #include "atomicio.h"
- +#include <sys/mman.h>
- +#include <sys/ioctl.h>
- struct pci pci;
- +extern struct vmd *env;
- +
- extern char *__progname;
- /* PIC IRQs, assigned to devices in order */
- const uint8_t pci_pic_irqs[PCI_MAX_PIC_IRQS] = {3, 5, 6, 7, 9, 10, 11, 12,
- 14, 15};
- +#define PTD_DEVID(d,b) (void *)(uintptr_t)(((d) << 8) | (b))
- +#define PTD_BAR(x) ((uintptr_t)(x) & 0xFF)
- +#define PTD_DEV(x) ((uintptr_t)(x) >> 8)
- +
- +void io_copy(void *, const void *, int);
- +int mem_chkint(void);
- +
- +int pci_memh2(int, uint64_t, uint32_t, void *, void *);
- +
- +#define PAGE_MASK 0xFFF
- +
- +TAILQ_HEAD(,iohandler) memh = TAILQ_HEAD_INITIALIZER(memh);
- +
- +void
- +register_mem(uint64_t base, uint32_t len, iocb_t handler, void *cookie)
- +{
- + struct iohandler *mem;
- +
- + if (!base)
- + return;
- + fprintf(stderr, "@@@ Registering mem region: %llx - %llx\n", base, base+len-1);
- + TAILQ_FOREACH(mem, &memh, next) {
- + if (base >= mem->start && base+len <= mem->end) {
- + fprintf(stderr,"already registered\n");
- + return;
- + }
- + }
- + mem = calloc(1, sizeof(*mem));
- + mem->start = base;
- + mem->end = base+len-1;
- + mem->handler = handler;
- + mem->cookie = cookie;
- + TAILQ_INSERT_TAIL(&memh, mem, next);
- +}
- +
- +void
- +unregister_mem(uint64_t base)
- +{
- + struct iohandler *mem, *tmp;
- +
- + if (!base)
- + return;
- + fprintf(stderr,"@@@ Unregistering base: %llx\n", base);
- + TAILQ_FOREACH_SAFE(mem, &memh, next, tmp) {
- + if (mem->start == base) {
- + fprintf(stderr, " removed:%llx-%llx\n", mem->start, mem->end);
- + TAILQ_REMOVE(&memh, mem, next);
- + free(mem);
- + }
- + }
- +}
- +
- +int
- +mem_handler(int dir, uint64_t addr, uint32_t size, void *data)
- +{
- + struct iohandler *mem;
- + int rc;
- +
- + TAILQ_FOREACH(mem, &memh, next) {
- + if (addr >= mem->start && addr+size <= mem->end) {
- + rc = mem->handler(dir, addr, size, data, mem->cookie);
- + if (rc != 0) {
- + fprintf(stderr, "Error mem handler: %llx\n", addr);
- + }
- + return rc;
- + }
- + }
- + return -1;
- +}
- +
- +/* Lookup PTD device */
- +static struct vm_ptdpci *
- +ptd_lookup(int devid)
- +{
- + if (devid >= pci.pci_dev_ct)
- + return NULL;
- + return pci.pci_devices[devid].pd_cookie;
- +}
- +
- +/* Map a MMIO Bar Physical address */
- +static void *
- +ptd_mapbar(int bar, uint64_t base, uint64_t size) {
- + uint8_t *va;
- +
- + /* Don't map empty regions */
- + if (!base || !size)
- + return NULL;
- + size = (size + PAGE_MASK) & ~PAGE_MASK;
- + va = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, env->vmd_fd, base & ~PAGE_MASK);
- + if (va == (void *)-1ULL) {
- + fprintf(stderr, "Unable to mmap bar: %.16llx/%.8llx\n",
- + base, size);
- + return NULL;
- + }
- + fprintf(stderr, "0x%.2x: Mapped bar: %.16llx/%.8llx to %p\n",
- + (bar * 4) + 0x10, base, size, va);
- + return va + (base & PAGE_MASK);
- +}
- +
- +/* Unmap MMIO Bar */
- +static void
- +ptd_unmapbar(void *va, uint64_t size) {
- + if (va == NULL)
- + return;
- + size = (size + PAGE_MASK) & ~PAGE_MASK;
- + munmap(va, size);
- + fprintf(stderr, "unmapping bar: %p/%.8llx\n", va, size);
- +}
- +
- +/* Do Passthrough I/O port read/write */
- +static void
- +ptd_pio(int type, int dir, int port, int size, uint32_t *data)
- +{
- + struct vm_pio pio;
- + uint64_t mask;
- + int rc;
- +
- + if (size == 1)
- + mask = 0xff;
- + else if (size == 2)
- + mask = 0xffff;
- + else if (size == 4)
- + mask = 0xffffffff;
- +
- + pio.dir = dir;
- + pio.size = size;
- + pio.base = port;
- + if (dir == VEI_DIR_OUT) {
- + pio.data = *data & mask;
- + rc = ioctl(env->vmd_fd, VMM_IOC_PIO, &pio);
- + }
- + else {
- + rc = ioctl(env->vmd_fd, VMM_IOC_PIO, &pio);
- + *data = (*data & ~mask) | (pio.data & mask);
- + }
- + fprintf(stderr, "pio: rc=%d, %d/%.4x %.8x\n", rc, dir, port, *data);
- +}
- +
- +/* Passthrough PCI config read */
- +static uint32_t
- +ptd_conf_read(uint8_t bus, uint8_t dev, uint8_t func, uint32_t reg)
- +{
- + struct vm_pciio pio;
- +
- + memset(&pio, 0, sizeof(pio));
- + pio.bus = bus;
- + pio.dev = dev;
- + pio.func = func;
- + pio.dir = VEI_DIR_IN;
- + pio.reg = reg & ~0x3;
- + ioctl(env->vmd_fd, VMM_IOC_PCIIO, &pio);
- + return pio.val;
- +}
- +
- +/* Passthrough PCI config write */
- +static void
- +ptd_conf_write(uint8_t bus, uint8_t dev, uint8_t func, uint32_t reg, uint32_t val)
- +{
- + struct vm_pciio pio;
- +
- + memset(&pio, 0, sizeof(pio));
- + pio.bus = bus;
- + pio.dev = dev;
- + pio.func = func;
- + pio.dir = VEI_DIR_OUT;
- + pio.reg = reg & ~0x3;
- + pio.val = val;
- + ioctl(env->vmd_fd, VMM_IOC_PCIIO, &pio);
- +}
- +
- +int
- +mem_chkint(void)
- +{
- + uint32_t pending;
- + struct vm_ptdpci *pd;
- + uint8_t intr = 0xff;
- + int rc, i;
- +
- + /* Loop through all PCI devices, check for interrupt */
- + for (i = 0; i < pci.pci_dev_ct; i++) {
- + pd = ptd_lookup(i);
- + if (pd == NULL)
- + continue;
- +
- + /* Check if pending interrupt count has changed */
- + pending = pd->pending;
- + rc = ioctl(env->vmd_fd, VMM_IOC_GETINTR, pd);
- + if (pd->pending != pending) {
- + fprintf(stderr, "pend:%d %d %d\n", pending, pd->pending, rc);
- + return pci_get_dev_irq(pd->id);
- + }
- + }
- + return intr;
- +}
- +
- +void
- +io_copy(void *dest, const void *src, int size) {
- + memcpy(dest, src, size);
- + return;
- + if (size == 1)
- + *(uint8_t *)dest = *(const uint8_t *)src;
- + else if (size == 2)
- + *(uint16_t *)dest = *(const uint16_t *)src;
- + else if (size == 4)
- + *(uint32_t *)dest = *(const uint32_t *)src;
- + else if (size == 8)
- + *(uint64_t *)dest = *(const uint64_t *)src;
- +}
- +
- +/*
- + * PCI Passthrough MMIO handler
- + * USe memory mapped address of physical bar
- + */
- +int
- +pci_memh2(int dir, uint64_t base, uint32_t size, void *data, void *cookie)
- +{
- + uint8_t devid = PTD_DEV(cookie);
- + uint8_t barid = PTD_BAR(cookie);
- + uint64_t off;
- + uint8_t *va;
- + struct vm_ptdpci *pd;
- +
- + pd = ptd_lookup(devid);
- + if (pd == NULL)
- + return -1;
- + off = base & (pd->barinfo[barid].size - 1);
- + va = pd->barinfo[barid].va;
- + if (va == NULL) {
- + return -1;
- + }
- + if (dir == VEI_DIR_IN) {
- + io_copy(data, va + off, size);
- + }
- + else {
- + io_copy(va + off, data, size);
- + }
- + return 0;
- +}
- +
- +/*
- + * pci_mkbar
- + *
- + * Calculates BAR address is valid
- + * Returns allocated address and updates next address
- + * Returns zero if address is out of range
- + */
- +static uint64_t
- +pci_mkbar(uint64_t *base, uint32_t size, uint64_t maxbase)
- +{
- + uint64_t mask = size - 1;
- + uint64_t cbase;
- +
- + if (*base + size >= maxbase)
- + return (0);
- + cbase = *base;
- + *base = (*base + size + mask) & ~mask;
- + return cbase;
- +}
- +
- /*
- * pci_add_bar
- *
- * Adds a BAR for the PCI device 'id'. On access, 'barfn' will be
- * called, and passed 'cookie' as an identifier.
- *
- - * BARs are fixed size, meaning all I/O BARs requested have the
- - * same size and all MMIO BARs have the same size.
- - *
- * Parameters:
- * id: PCI device to add the BAR to (local count, eg if id == 4,
- * this BAR is to be added to the VM's 5th PCI device)
- * type: type of the BAR to add (PCI_MAPREG_TYPE_xxx)
- + * size: Size of BAR area
- * barfn: callback function invoked on BAR access
- * cookie: cookie passed to barfn on access
- *
- * Returns 0 if the BAR was added successfully, 1 otherwise.
- */
- int
- -pci_add_bar(uint8_t id, uint32_t type, void *barfn, void *cookie)
- +pci_add_bar(uint8_t id, uint32_t type, uint32_t size, void *barfn, void *cookie)
- {
- uint8_t bar_reg_idx, bar_ct;
- + uint64_t base = 0;
- /* Check id */
- if (id >= pci.pci_dev_ct)
- @@ -73,35 +336,50 @@ pci_add_bar(uint8_t id, uint32_t type, void *barfn, void *cookie)
- /* Compute BAR address and add */
- bar_reg_idx = (PCI_MAPREG_START + (bar_ct * 4)) / 4;
- - if (type == PCI_MAPREG_TYPE_MEM) {
- - if (pci.pci_next_mmio_bar >= VMM_PCI_MMIO_BAR_END)
- + if (type == (PCI_MAPREG_TYPE_MEM | PCI_MAPREG_MEM_TYPE_64BIT)) {
- + base = pci_mkbar(&pci.pci_next_mmio_bar, size, VMM_PCI_MMIO_BAR_END);
- + if (base == 0)
- + return (1);
- +
- + pci.pci_devices[id].pd_cfg_space[bar_reg_idx] =
- + PCI_MAPREG_MEM_ADDR(base) | PCI_MAPREG_MEM_TYPE_64BIT;
- + pci.pci_devices[id].pd_barfunc[bar_ct] = barfn;
- + pci.pci_devices[id].pd_bar_cookie[bar_ct] = cookie;
- + pci.pci_devices[id].pd_bartype[bar_ct] = PCI_BAR_TYPE_MMIO;
- + pci.pci_devices[id].pd_barsize[bar_ct] = size;
- + pci.pci_devices[id].pd_bartype[bar_ct+1] = PCI_BAR_TYPE_MMIO;
- + pci.pci_devices[id].pd_barsize[bar_ct+1] = 0;
- + pci.pci_devices[id].pd_bar_ct+=2;
- + } else if (type == PCI_MAPREG_TYPE_MEM) {
- + base = pci_mkbar(&pci.pci_next_mmio_bar, size, VMM_PCI_MMIO_BAR_END);
- + if (base == 0)
- return (1);
- pci.pci_devices[id].pd_cfg_space[bar_reg_idx] =
- - PCI_MAPREG_MEM_ADDR(pci.pci_next_mmio_bar);
- - pci.pci_next_mmio_bar += VMM_PCI_MMIO_BAR_SIZE;
- + PCI_MAPREG_MEM_ADDR(base);
- pci.pci_devices[id].pd_barfunc[bar_ct] = barfn;
- pci.pci_devices[id].pd_bar_cookie[bar_ct] = cookie;
- pci.pci_devices[id].pd_bartype[bar_ct] = PCI_BAR_TYPE_MMIO;
- - pci.pci_devices[id].pd_barsize[bar_ct] = VMM_PCI_MMIO_BAR_SIZE;
- + pci.pci_devices[id].pd_barsize[bar_ct] = size;
- pci.pci_devices[id].pd_bar_ct++;
- } else if (type == PCI_MAPREG_TYPE_IO) {
- - if (pci.pci_next_io_bar >= VMM_PCI_IO_BAR_END)
- + base = pci_mkbar(&pci.pci_next_io_bar, size, VMM_PCI_IO_BAR_END);
- + if (base == 0)
- return (1);
- pci.pci_devices[id].pd_cfg_space[bar_reg_idx] =
- - PCI_MAPREG_IO_ADDR(pci.pci_next_io_bar) |
- + PCI_MAPREG_IO_ADDR(base) |
- PCI_MAPREG_TYPE_IO;
- - pci.pci_next_io_bar += VMM_PCI_IO_BAR_SIZE;
- pci.pci_devices[id].pd_barfunc[bar_ct] = barfn;
- pci.pci_devices[id].pd_bar_cookie[bar_ct] = cookie;
- - DPRINTF("%s: adding pci bar cookie for dev %d bar %d = %p",
- - __progname, id, bar_ct, cookie);
- pci.pci_devices[id].pd_bartype[bar_ct] = PCI_BAR_TYPE_IO;
- - pci.pci_devices[id].pd_barsize[bar_ct] = VMM_PCI_IO_BAR_SIZE;
- + pci.pci_devices[id].pd_barsize[bar_ct] = size;
- pci.pci_devices[id].pd_bar_ct++;
- }
- + log_info("%s: PCI_ADDBAR(%d, %d, %x, %x)", __progname,
- + bar_ct, type, pci.pci_devices[id].pd_cfg_space[bar_reg_idx], size);
- +
- return (0);
- }
- @@ -165,8 +443,10 @@ pci_get_dev_irq(uint8_t id)
- int
- pci_add_device(uint8_t *id, uint16_t vid, uint16_t pid, uint8_t class,
- uint8_t subclass, uint16_t subsys_vid, uint16_t subsys_id,
- - uint8_t irq_needed, pci_cs_fn_t csfunc)
- + uint8_t irq_needed, pci_cs_fn_t csfunc, void *cookie)
- {
- + log_info("%s: add_pci: %x.%x.%x", __progname, vid, pid, class);
- +
- /* Exceeded max devices? */
- if (pci.pci_dev_ct >= PCI_CONFIG_MAX_DEV)
- return (1);
- @@ -186,6 +466,7 @@ pci_add_device(uint8_t *id, uint16_t vid, uint16_t pid, uint8_t class,
- pci.pci_devices[*id].pd_subsys_id = subsys_id;
- pci.pci_devices[*id].pd_csfunc = csfunc;
- + pci.pci_devices[*id].pd_cookie = cookie;
- if (irq_needed) {
- pci.pci_devices[*id].pd_irq =
- @@ -202,6 +483,111 @@ pci_add_device(uint8_t *id, uint16_t vid, uint16_t pid, uint8_t class,
- return (0);
- }
- +#define PCIOCUNBIND _IOWR('p', 9, struct pcisel)
- +
- +/* Callback for I/O ports. Map to new I/O port and do it */
- +static int
- +ptd_iobar(int dir, uint16_t reg, uint32_t *data, uint8_t *intr, void *cookie, uint8_t size)
- +{
- + struct vm_ptdpci *pd;
- + uint8_t devid = PTD_DEV(cookie);
- + uint8_t barid = PTD_BAR(cookie);
- + int hport;
- +
- + *intr = 0xFF;
- +
- + /* Remap guest port to host port */
- + pd = ptd_lookup(devid);
- + if (pd == NULL)
- + return -1;
- + hport = pd->barinfo[barid].addr + reg;
- + ptd_pio(1, dir, hport, size, data);
- + return 0;
- +}
- +
- +static int
- +ptd_mmiobar(int dir, uint32_t ofs, uint32_t *data)
- +{
- + fprintf(stderr,"mmiobar: %d.%x\n", dir, ofs);
- + return 0;
- +}
- +
- +/*
- + * Add Passthrough PCI device to VMM PCI table
- + */
- +void
- +pci_add_pthru(int bus, int dev, int fun)
- +{
- + struct vm_ptdpci *pd;
- + uint32_t id_reg, subid_reg, class_reg, cmd_reg, intr_reg;
- + int i, rc;
- +
- +#if 0
- + /* Unregister previous VMM */
- + for (i = 0; i < MAXBAR; i++) {
- + if (pd->barinfo[i].va) {
- + ptd_unmapbar(pd->barinfo[i].va, pd->barinfo[i].size);
- + }
- + }
- +#endif
- +
- + /* Allocate Passthrough device */
- + pd = malloc(sizeof(*pd));
- + if (pd == NULL)
- + return;
- + pd->bus = bus;
- + pd->dev = dev;
- + pd->func = fun;
- +
- + /* Read physical PCI config space */
- + id_reg = ptd_conf_read(bus, dev, fun, PCI_ID_REG);
- + if (PCI_VENDOR(id_reg) == PCI_VENDOR_INVALID || PCI_VENDOR(id_reg) == 0x0000) {
- + fprintf(stderr, "Error: No PCI device @ %u:%u:%u\n", bus, dev, fun);
- + return;
- + }
- + subid_reg = ptd_conf_read(bus, dev, fun, PCI_SUBSYS_ID_REG);
- + class_reg = ptd_conf_read(bus, dev, fun, PCI_CLASS_REG);
- + cmd_reg = ptd_conf_read(bus, dev, fun, PCI_COMMAND_STATUS_REG);
- + intr_reg = ptd_conf_read(bus, dev, fun, PCI_INTERRUPT_REG);
- +
- + /* Add device to guest */
- + pci_add_device(&pd->id, PCI_VENDOR(id_reg), PCI_PRODUCT(id_reg),
- + PCI_CLASS(class_reg), PCI_SUBCLASS(class_reg),
- + PCI_VENDOR(subid_reg), PCI_PRODUCT(subid_reg),
- + 1, NULL, pd);
- +
- + /* Get BARs of native device */
- + rc = ioctl(env->vmd_fd, VMM_IOC_BARINFO, pd);
- + if (rc != 0) {
- + fprintf(stderr, "%d:%d:%d not valid pci device\n", bus, dev, fun);
- + return;
- + }
- + for (i = 0; i < MAXBAR; i++) {
- + int type;
- +
- + type = pd->barinfo[i].type;
- + fprintf(stderr," Bar%d: type:%x base:%llx size:%x\n",
- + i, pd->barinfo[i].type, pd->barinfo[i].addr, pd->barinfo[i].size);
- + if (!pd->barinfo[i].size) {
- + /* Kick bar index */
- + pci.pci_devices[pd->id].pd_bar_ct++;
- + }
- + else if (PCI_MAPREG_TYPE(type) == PCI_MAPREG_TYPE_MEM) {
- + pci_add_bar(pd->id, type, pd->barinfo[i].size,
- + ptd_mmiobar, PTD_DEVID(pd->id, i));
- + pd->barinfo[i].va = ptd_mapbar(i, pd->barinfo[i].addr, pd->barinfo[i].size);
- + /* Skip empty BAR for 64-bit */
- + if (type == (PCI_MAPREG_TYPE_MEM | PCI_MAPREG_MEM_TYPE_64BIT))
- + i++;
- + }
- + else if (PCI_MAPREG_TYPE(type) == PCI_MAPREG_TYPE_IO) {
- + /* This will get callback via pci_handle_io */
- + pci_add_bar(pd->id, PCI_MAPREG_TYPE_IO, pd->barinfo[i].size,
- + ptd_iobar, PTD_DEVID(pd->id, i));
- + }
- + }
- +}
- +
- /*
- * pci_init
- *
- @@ -219,7 +605,7 @@ pci_init(void)
- if (pci_add_device(&id, PCI_VENDOR_OPENBSD, PCI_PRODUCT_OPENBSD_PCHB,
- PCI_CLASS_BRIDGE, PCI_SUBCLASS_BRIDGE_HOST,
- - PCI_VENDOR_OPENBSD, 0, 0, NULL)) {
- + PCI_VENDOR_OPENBSD, 0, 0, NULL, NULL)) {
- log_warnx("%s: can't add PCI host bridge", __progname);
- return;
- }
- @@ -264,6 +650,8 @@ pci_handle_io(struct vm_run_params *vrp)
- for (i = 0 ; i < pci.pci_dev_ct ; i++) {
- for (j = 0 ; j < pci.pci_devices[i].pd_bar_ct; j++) {
- + if (pci.pci_devices[i].pd_bartype[j] != PCI_BAR_TYPE_IO)
- + continue;
- b_lo = PCI_MAPREG_IO_ADDR(pci.pci_devices[i].pd_bar[j]);
- b_hi = b_lo + VMM_PCI_IO_BAR_SIZE;
- if (reg >= b_lo && reg < b_hi) {
- @@ -286,7 +674,7 @@ pci_handle_io(struct vm_run_params *vrp)
- __progname);
- }
- } else {
- - DPRINTF("%s: no pci i/o function for reg 0x%llx (dir=%d "
- + fprintf(stderr,"%s: no pci i/o function for reg 0x%llx (dir=%d "
- "guest %%rip=0x%llx", __progname, (uint64_t)reg, dir,
- vei->vrs.vrs_gprs[VCPU_REGS_RIP]);
- /* Reads from undefined ports return 0xFF */
- @@ -306,8 +694,10 @@ pci_handle_data_reg(struct vm_run_params *vrp)
- {
- struct vm_exit *vei = vrp->vrp_exit;
- uint8_t b, d, f, o, baridx, ofs, sz;
- + uint32_t barval, barsize, bartype;
- int ret;
- pci_cs_fn_t csfunc;
- + struct vm_ptdpci *pd;
- /* abort if the address register is wack */
- if (!(pci.pci_addr_reg & PCI_MODE1_ENABLE)) {
- @@ -328,9 +718,22 @@ pci_handle_data_reg(struct vm_run_params *vrp)
- f = (pci.pci_addr_reg >> 8) & 0x7;
- o = (pci.pci_addr_reg & 0xfc);
- + /* Do passthrough PCI config space read/write */
- + pd = ptd_lookup(d);
- + if ((o == PCI_COMMAND_STATUS_REG || o == PCI_CLASS_REG ||
- + o == PCI_CAPLISTPTR_REG || o >= 0x40) &&
- + (pd != NULL)) {
- + if (vei->vei.vei_dir == VEI_DIR_IN) {
- + vei->vei.vei_data = ptd_conf_read(pd->bus, pd->dev, pd->func, o);
- + }
- + else {
- + ptd_conf_write(pd->bus, pd->dev, pd->func, o, vei->vei.vei_data);
- + }
- + }
- +
- csfunc = pci.pci_devices[d].pd_csfunc;
- if (csfunc != NULL) {
- - ret = csfunc(vei->vei.vei_dir, (o / 4), &vei->vei.vei_data);
- + ret = csfunc(vei->vei.vei_dir, o, sz, &vei->vei.vei_data, pci.pci_devices[d].pd_cookie);
- if (ret)
- log_warnx("cfg space access function failed for "
- "pci device %d", d);
- @@ -348,31 +751,27 @@ pci_handle_data_reg(struct vm_run_params *vrp)
- * value in the address register.
- */
- if (vei->vei.vei_dir == VEI_DIR_OUT) {
- - if ((o >= 0x10 && o <= 0x24) &&
- - vei->vei.vei_data == 0xffffffff) {
- - /*
- - * Compute BAR index:
- - * o = 0x10 -> baridx = 0
- - * o = 0x14 -> baridx = 1
- - * o = 0x18 -> baridx = 2
- - * o = 0x1c -> baridx = 3
- - * o = 0x20 -> baridx = 4
- - * o = 0x24 -> baridx = 5
- - */
- - baridx = (o / 4) - 4;
- - if (baridx < pci.pci_devices[d].pd_bar_ct)
- - vei->vei.vei_data = 0xfffff000;
- - else
- - vei->vei.vei_data = 0;
- - }
- -
- - /* IOBAR registers must have bit 0 set */
- if (o >= 0x10 && o <= 0x24) {
- - baridx = (o / 4) - 4;
- - if (baridx < pci.pci_devices[d].pd_bar_ct &&
- - pci.pci_devices[d].pd_bartype[baridx] ==
- - PCI_BAR_TYPE_IO)
- - vei->vei.vei_data |= 1;
- + /* When Changing a BAR we must calculate readonly bits */
- + baridx = (o - 0x10) / 4;
- + barval = pci.pci_devices[d].pd_cfg_space[o/4];
- + barsize = pci.pci_devices[d].pd_barsize[baridx];
- + bartype = pci.pci_devices[d].pd_bartype[baridx];
- +
- + /* Mask off size */
- + vei->vei.vei_data &= ~(barsize - 1);
- +
- + /* Keep lower bits of current config space value */
- + if (bartype == PCI_BAR_TYPE_IO)
- + vei->vei.vei_data |= (barval & ~PCI_MAPREG_IO_ADDR_MASK);
- + else {
- + vei->vei.vei_data |= (barval & ~PCI_MAPREG_MEM_ADDR_MASK);
- +
- + /* Remove old BAR value from page fault callback, insert new value */
- + unregister_mem(barval & PCI_MAPREG_MEM_ADDR_MASK);
- + register_mem(vei->vei.vei_data & PCI_MAPREG_MEM_ADDR_MASK,
- + barsize, pci_memh2, PTD_DEVID(d, baridx));
- + }
- }
- /*
- diff --git a/usr.sbin/vmd/pci.h b/usr.sbin/vmd/pci.h
- index 01902d77d..b47a7959f 100644
- --- a/usr.sbin/vmd/pci.h
- +++ b/usr.sbin/vmd/pci.h
- @@ -27,48 +27,65 @@
- #define PCI_MAX_PIC_IRQS 10
- -typedef int (*pci_cs_fn_t)(int dir, uint8_t reg, uint32_t *data);
- +typedef int (*pci_cs_fn_t)(int dir, uint8_t reg, uint8_t sz, uint32_t *data, void *cookie);
- typedef int (*pci_iobar_fn_t)(int dir, uint16_t reg, uint32_t *data, uint8_t *,
- void *, uint8_t);
- typedef int (*pci_mmiobar_fn_t)(int dir, uint32_t ofs, uint32_t *data);
- -union pci_dev {
- - uint32_t pd_cfg_space[PCI_CONFIG_SPACE_SIZE / 4];
- +#define PTD_VALID 0x01
- - struct {
- - uint16_t pd_vid;
- - uint16_t pd_did;
- - uint16_t pd_cmd;
- - uint16_t pd_status;
- - uint8_t pd_rev;
- - uint8_t pd_prog_if;
- - uint8_t pd_subclass;
- - uint8_t pd_class;
- - uint8_t pd_cache_size;
- - uint8_t pd_lat_timer;
- - uint8_t pd_header_type;
- - uint8_t pd_bist;
- - uint32_t pd_bar[PCI_MAX_BARS];
- - uint32_t pd_cardbus_cis;
- - uint16_t pd_subsys_vid;
- - uint16_t pd_subsys_id;
- - uint32_t pd_exp_rom_addr;
- - uint8_t pd_cap;
- - uint32_t pd_reserved0 : 24;
- - uint32_t pd_reserved1;
- - uint8_t pd_irq;
- - uint8_t pd_int;
- - uint8_t pd_min_grant;
- - uint8_t pd_max_grant;
- +typedef int (*iocb_t)(int, uint64_t, uint32_t, void *, void *);
- - uint8_t pd_bar_ct;
- - pci_cs_fn_t pd_csfunc;
- +struct iohandler {
- + uint64_t start;
- + uint64_t end;
- + iocb_t handler;
- + void *cookie;
- + TAILQ_ENTRY(iohandler) next;
- +};
- +
- +void register_mem(uint64_t base, uint32_t len, iocb_t handler, void *cookie);
- +void unregister_mem(uint64_t base);
- +int mem_handler(int dir, uint64_t addr, uint32_t size, void *data);
- +
- +struct pci_dev {
- + union {
- + uint32_t pd_cfg_space[PCI_CONFIG_SPACE_SIZE / 4];
- + struct {
- + uint16_t pd_vid;
- + uint16_t pd_did;
- + uint16_t pd_cmd;
- + uint16_t pd_status;
- + uint8_t pd_rev;
- + uint8_t pd_prog_if;
- + uint8_t pd_subclass;
- + uint8_t pd_class;
- + uint8_t pd_cache_size;
- + uint8_t pd_lat_timer;
- + uint8_t pd_header_type;
- + uint8_t pd_bist;
- + uint32_t pd_bar[PCI_MAX_BARS];
- + uint32_t pd_cardbus_cis;
- + uint16_t pd_subsys_vid;
- + uint16_t pd_subsys_id;
- + uint32_t pd_exp_rom_addr;
- + uint8_t pd_cap;
- + uint32_t pd_reserved0 : 24;
- + uint32_t pd_reserved1;
- + uint8_t pd_irq;
- + uint8_t pd_int;
- + uint8_t pd_min_grant;
- + uint8_t pd_max_grant;
- + } __packed;
- + };
- + uint8_t pd_bar_ct;
- + pci_cs_fn_t pd_csfunc;
- - uint8_t pd_bartype[PCI_MAX_BARS];
- - uint32_t pd_barsize[PCI_MAX_BARS];
- - void *pd_barfunc[PCI_MAX_BARS];
- - void *pd_bar_cookie[PCI_MAX_BARS];
- - } __packed;
- + uint8_t pd_bartype[PCI_MAX_BARS];
- + uint32_t pd_barsize[PCI_MAX_BARS];
- + void *pd_barfunc[PCI_MAX_BARS];
- + void *pd_bar_cookie[PCI_MAX_BARS];
- + void *pd_cookie;
- };
- struct pci {
- @@ -79,7 +96,7 @@ struct pci {
- uint32_t pci_addr_reg;
- uint32_t pci_data_reg;
- - union pci_dev pci_devices[PCI_CONFIG_MAX_DEV];
- + struct pci_dev pci_devices[PCI_CONFIG_MAX_DEV];
- };
- void pci_handle_address_reg(struct vm_run_params *);
- @@ -87,9 +104,10 @@ void pci_handle_data_reg(struct vm_run_params *);
- uint8_t pci_handle_io(struct vm_run_params *);
- void pci_init(void);
- int pci_add_device(uint8_t *, uint16_t, uint16_t, uint8_t, uint8_t, uint16_t,
- - uint16_t, uint8_t, pci_cs_fn_t);
- -int pci_add_bar(uint8_t, uint32_t, void *, void *);
- + uint16_t, uint8_t, pci_cs_fn_t, void *);
- +int pci_add_bar(uint8_t, uint32_t, uint32_t, void *, void *);
- int pci_set_bar_fn(uint8_t, uint8_t, void *, void *);
- uint8_t pci_get_dev_irq(uint8_t);
- int pci_dump(int);
- int pci_restore(int);
- +void pci_add_pthru(int, int, int);
- diff --git a/usr.sbin/vmd/virtio.c b/usr.sbin/vmd/virtio.c
- index 8800594fc..430f41995 100644
- --- a/usr.sbin/vmd/virtio.c
- +++ b/usr.sbin/vmd/virtio.c
- @@ -1797,13 +1797,13 @@ virtio_init(struct vmd_vm *vm, int child_cdrom,
- PCI_PRODUCT_QUMRANET_VIO_RNG, PCI_CLASS_SYSTEM,
- PCI_SUBCLASS_SYSTEM_MISC,
- PCI_VENDOR_OPENBSD,
- - PCI_PRODUCT_VIRTIO_ENTROPY, 1, NULL)) {
- + PCI_PRODUCT_VIRTIO_ENTROPY, 1, NULL, NULL)) {
- log_warnx("%s: can't add PCI virtio rng device",
- __progname);
- return;
- }
- - if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, virtio_rnd_io, NULL)) {
- + if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, VMM_PCI_IO_BAR_SIZE, virtio_rnd_io, NULL)) {
- log_warnx("%s: can't add bar for virtio rng device",
- __progname);
- return;
- @@ -1835,14 +1835,14 @@ virtio_init(struct vmd_vm *vm, int child_cdrom,
- PCI_PRODUCT_QUMRANET_VIO_NET, PCI_CLASS_SYSTEM,
- PCI_SUBCLASS_SYSTEM_MISC,
- PCI_VENDOR_OPENBSD,
- - PCI_PRODUCT_VIRTIO_NETWORK, 1, NULL)) {
- + PCI_PRODUCT_VIRTIO_NETWORK, 1, NULL, NULL)) {
- log_warnx("%s: can't add PCI virtio net device",
- __progname);
- return;
- }
- - if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, virtio_net_io,
- - &vionet[i])) {
- + if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, VMM_PCI_IO_BAR_SIZE,
- + virtio_net_io, &vionet[i])) {
- log_warnx("%s: can't add bar for virtio net "
- "device", __progname);
- return;
- @@ -1923,13 +1923,13 @@ virtio_init(struct vmd_vm *vm, int child_cdrom,
- PCI_CLASS_MASS_STORAGE,
- PCI_SUBCLASS_MASS_STORAGE_SCSI,
- PCI_VENDOR_OPENBSD,
- - PCI_PRODUCT_VIRTIO_BLOCK, 1, NULL)) {
- + PCI_PRODUCT_VIRTIO_BLOCK, 1, NULL, NULL)) {
- log_warnx("%s: can't add PCI virtio block "
- "device", __progname);
- return;
- }
- - if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, virtio_blk_io,
- - &vioblk[i])) {
- + if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, VMM_PCI_IO_BAR_SIZE,
- + virtio_blk_io, &vioblk[i])) {
- log_warnx("%s: can't add bar for virtio block "
- "device", __progname);
- return;
- @@ -1971,13 +1971,14 @@ virtio_init(struct vmd_vm *vm, int child_cdrom,
- PCI_CLASS_MASS_STORAGE,
- PCI_SUBCLASS_MASS_STORAGE_SCSI,
- PCI_VENDOR_OPENBSD,
- - PCI_PRODUCT_VIRTIO_SCSI, 1, NULL)) {
- + PCI_PRODUCT_VIRTIO_SCSI, 1, NULL, NULL)) {
- log_warnx("%s: can't add PCI vioscsi device",
- __progname);
- return;
- }
- - if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, vioscsi_io, vioscsi)) {
- + if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, VMM_PCI_IO_BAR_SIZE,
- + vioscsi_io, vioscsi)) {
- log_warnx("%s: can't add bar for vioscsi device",
- __progname);
- return;
- @@ -2013,13 +2014,13 @@ virtio_init(struct vmd_vm *vm, int child_cdrom,
- PCI_CLASS_COMMUNICATIONS,
- PCI_SUBCLASS_COMMUNICATIONS_MISC,
- PCI_VENDOR_OPENBSD,
- - PCI_PRODUCT_VIRTIO_VMMCI, 1, NULL)) {
- + PCI_PRODUCT_VIRTIO_VMMCI, 1, NULL, NULL)) {
- log_warnx("%s: can't add PCI vmm control device",
- __progname);
- return;
- }
- - if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, vmmci_io, NULL)) {
- + if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, VMM_PCI_IO_BAR_SIZE, vmmci_io, NULL)) {
- log_warnx("%s: can't add bar for vmm control device",
- __progname);
- return;
- diff --git a/usr.sbin/vmd/vm.c b/usr.sbin/vmd/vm.c
- index a9fcce4fa..566c9964c 100644
- --- a/usr.sbin/vmd/vm.c
- +++ b/usr.sbin/vmd/vm.c
- @@ -63,6 +63,7 @@
- #include "mc146818.h"
- #include "fw_cfg.h"
- #include "atomicio.h"
- +#include "x86emu.h"
- io_fn_t ioports_map[MAX_PORTS];
- @@ -947,6 +948,7 @@ alloc_guest_mem(struct vm_create_params *vcp)
- return (ret);
- }
- + memset(p, 0, vmr->vmr_size);
- vmr->vmr_va = (vaddr_t)p;
- }
- @@ -1062,6 +1064,14 @@ init_emulated_hw(struct vmop_create_params *vmc, int child_cdrom,
- /* Initialize virtio devices */
- virtio_init(current_vm, child_cdrom, child_disks, child_taps);
- +
- + /* Add Passthrough Devices */
- + for (i = 0; i < (int)vcp->vcp_npcis; i++) {
- + int bus = (vcp->vcp_pcis[i] >> 8);
- + int dev = (vcp->vcp_pcis[i] >> 3) & 0x1F;
- + int fun = (vcp->vcp_pcis[i] >> 0) & 0x7;
- + pci_add_pthru(bus, dev, fun);
- + }
- }
- /*
- * restore_emulated_hw
- @@ -1585,12 +1595,12 @@ vcpu_exit_inout(struct vm_run_params *vrp)
- if (ioports_map[vei->vei.vei_port] != NULL)
- intr = ioports_map[vei->vei.vei_port](vrp);
- else if (vei->vei.vei_dir == VEI_DIR_IN)
- - set_return_data(vei, 0xFFFFFFFF);
- -
- + set_return_data(vei, 0xFFFFFFFF);
- if (intr != 0xFF)
- vcpu_assert_pic_irq(vrp->vrp_vm_id, vrp->vrp_vcpu_id, intr);
- }
- +
- /*
- * vcpu_exit_eptviolation
- *
- @@ -1604,10 +1614,70 @@ vcpu_exit_inout(struct vm_run_params *vrp)
- * 0: no action required
- * EAGAIN: a protection fault occured, kill the vm.
- */
- +
- +extern int mem_chkint(void);
- +
- int
- vcpu_exit_eptviolation(struct vm_run_params *vrp)
- {
- struct vm_exit *ve = vrp->vrp_exit;
- + uint64_t gip, gpa;
- + uint8_t instr[16] = { 0 };
- + struct vm_rwregs_params vrwp = { 0 };
- + uint64_t *regrw;
- + struct insn ix;
- +
- + /* Read instruction bytes that caused page fault */
- + translate_gva(ve, ve->vrs.vrs_gprs[VCPU_REGS_RIP], &gip, PROT_READ);
- + read_mem(gip, instr, sizeof(instr));
- + fprintf(stderr, "===============\nept violation: %llx rip:0x%llx %.2x %.2x %.2x %.2x %.2x\n",
- + ve->vee.vee_gpa, ve->vrs.vrs_gprs[VCPU_REGS_RIP], instr[0], instr[1], instr[2],
- + instr[3], instr[4]);
- +#if 0
- + fprintf(stderr, " rax:0x%.16llx rbx:0x%.16llx rcx:0x%.16llx rdx:0x%.16llx\n",
- + ve->vrs.vrs_gprs[VCPU_REGS_RAX],
- + ve->vrs.vrs_gprs[VCPU_REGS_RBX],
- + ve->vrs.vrs_gprs[VCPU_REGS_RCX],
- + ve->vrs.vrs_gprs[VCPU_REGS_RDX]);
- + fprintf(stderr, " rsi:0x%.16llx rdi:0x%.16llx rbp:0x%.16llx rsp:0x%.16llx\n",
- + ve->vrs.vrs_gprs[VCPU_REGS_RSI],
- + ve->vrs.vrs_gprs[VCPU_REGS_RDI],
- + ve->vrs.vrs_gprs[VCPU_REGS_RBP],
- + ve->vrs.vrs_gprs[VCPU_REGS_RSP]);
- + fprintf(stderr, " r8: 0x%.16llx r9: 0x%.16llx r10:0x%.16llx r11:0x%.16llx\n",
- + ve->vrs.vrs_gprs[VCPU_REGS_R8],
- + ve->vrs.vrs_gprs[VCPU_REGS_R9],
- + ve->vrs.vrs_gprs[VCPU_REGS_R10],
- + ve->vrs.vrs_gprs[VCPU_REGS_R11]);
- + fprintf(stderr, " r12:0x%.16llx r13:0x%.16llx r14:0x%.16llx r15:0x%.16llx\n",
- + ve->vrs.vrs_gprs[VCPU_REGS_R12],
- + ve->vrs.vrs_gprs[VCPU_REGS_R13],
- + ve->vrs.vrs_gprs[VCPU_REGS_R14],
- + ve->vrs.vrs_gprs[VCPU_REGS_R15]);
- +#endif
- +
- + vrwp.vrwp_mask = VM_RWREGS_GPRS;
- + vrwp.vrwp_vm_id = vrp->vrp_vm_id;
- + vrwp.vrwp_vcpu_id = vrp->vrp_vcpu_id;
- + vrwp.vrwp_regs = ve->vrs;
- + gpa = ve->vee.vee_gpa;
- +
- + /* Decode instruction and get # of bytes, size register for read/write */
- + memset(&ix, 0, sizeof(ix));
- + dodis(instr, &ix, ve->vrs.vrs_sregs[VCPU_REGS_CS].vsi_ar & 0x2000 ?
- + SIZE_QWORD : SIZE_DWORD);
- + if (ix.incr && (gpa >= VMM_PCI_MMIO_BAR_BASE && gpa <= VMM_PCI_MMIO_BAR_END)) {
- + regrw = &vrwp.vrwp_regs.vrs_gprs[ix.reg];
- + mem_handler(ix.dir, gpa, ix.size, regrw);
- + fprintf(stderr, "memhandler : %.16llx %d\n", (uint64_t)*regrw, ix.incr);
- + /* skip this instruction when returning to vm */
- + vrwp.vrwp_regs.vrs_gprs[VCPU_REGS_RIP] += ix.incr;
- + if (ioctl(env->vmd_fd, VMM_IOC_WRITEREGS, &vrwp))
- + fprintf(stderr,"writeregs fails\n");
- + return 0;
- + }
- + fprintf(stderr, "nothandled\n");
- +
- /*
- * vmd may be exiting to vmd to handle a pending interrupt
- * but last exit type may have bee VMX_EXIT_EPT_VIOLATION,
- @@ -1653,7 +1723,6 @@ vcpu_exit(struct vm_run_params *vrp)
- case VMX_EXIT_CPUID:
- case VMX_EXIT_EXTINT:
- case SVM_VMEXIT_INTR:
- - case SVM_VMEXIT_NPF:
- case SVM_VMEXIT_MSR:
- case SVM_VMEXIT_CPUID:
- /*
- @@ -1665,10 +1734,10 @@ vcpu_exit(struct vm_run_params *vrp)
- */
- break;
- case VMX_EXIT_EPT_VIOLATION:
- + case SVM_VMEXIT_NPF:
- ret = vcpu_exit_eptviolation(vrp);
- if (ret)
- return (ret);
- -
- break;
- case VMX_EXIT_IO:
- case SVM_VMEXIT_IOIO:
- @@ -1701,7 +1770,12 @@ vcpu_exit(struct vm_run_params *vrp)
- /* Process any pending traffic */
- vionet_process_rx(vrp->vrp_vm_id);
- -
- + {
- + uint8_t intr;
- + if ((intr = mem_chkint()) != 0xff) {
- + vcpu_assert_pic_irq(vrp->vrp_vm_id, vrp->vrp_vcpu_id, intr);
- + }
- + }
- vrp->vrp_continue = 1;
- return (0);
- @@ -2216,12 +2290,13 @@ translate_gva(struct vm_exit* exit, uint64_t va, uint64_t* pa, int mode)
- return (EPERM);
- pte = pte | PG_U;
- - if (mode == PROT_WRITE)
- + if (mode == PROT_WRITE) {
- pte = pte | PG_M;
- - if (write_mem(pte_paddr, &pte, pte_size)) {
- - log_warn("%s: failed to write back flags to pte",
- - __func__);
- - return (EIO);
- + if (write_mem(pte_paddr, &pte, pte_size)) {
- + log_warn("%s: failed to write back flags to pte",
- + __func__);
- + return (EIO);
- + }
- }
- /* XXX: EINVAL if in 32bit and PG_PS is 1 but CR4.PSE is 0 */
- diff --git a/usr.sbin/vmd/vmm.h b/usr.sbin/vmd/vmm.h
- index 214d41d01..de23fb924 100644
- --- a/usr.sbin/vmd/vmm.h
- +++ b/usr.sbin/vmd/vmm.h
- @@ -22,3 +22,4 @@ void vcpu_assert_pic_irq(uint32_t, uint32_t, int);
- void vcpu_deassert_pic_irq(uint32_t, uint32_t, int);
- void set_return_data(struct vm_exit *, uint32_t);
- void get_input_data(struct vm_exit *, uint32_t *);
- +
- diff --git a/usr.sbin/vmd/x86emu.c b/usr.sbin/vmd/x86emu.c
- new file mode 100644
- index 000000000..857de4710
- --- /dev/null
- +++ b/usr.sbin/vmd/x86emu.c
- @@ -0,0 +1,819 @@
- +/*
- + * Copyright (c) 2020 Jordan Hargrave <[email protected]>
- + *
- + * Permission to use, copy, modify, and distribute this software for any
- + * purpose with or without fee is hereby granted, provided that the above
- + * copyright notice and this permission notice appear in all copies.
- + *
- + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- + */
- +#include <stdio.h>
- +#include <stdlib.h>
- +#include <string.h>
- +#include <fcntl.h>
- +#include <inttypes.h>
- +#include "x86emu.h"
- +#include <machine/vmmvar.h>
- +
- +#define printf(x...) fprintf(stderr, x)
- +
- +#define _(m, a...) { .mnem=#m, a }
- +#define _xxx { }
- +#define __ 0
- +
- +struct opcode {
- + const char *mnem;
- + int arg0;
- + int arg1;
- + int arg2;
- + int flag;
- +};
- +
- +struct opcode hicodes[256] = {
- + [0x30] =
- + _(wrmsr),
- + _(rdtsc),
- + _(rdmsr),
- + _(rdpmc),
- + _(sysenter),
- + _(sysexit),
- +
- + /* 0x40 */
- + [0x40] =
- + _(cmovcc, Gv, Ev, __, FLG_MRR),
- + _(cmovcc, Gv, Ev, __, FLG_MRR),
- + _(cmovcc, Gv, Ev, __, FLG_MRR),
- + _(cmovcc, Gv, Ev, __, FLG_MRR),
- + _(cmovcc, Gv, Ev, __, FLG_MRR),
- + _(cmovcc, Gv, Ev, __, FLG_MRR),
- + _(cmovcc, Gv, Ev, __, FLG_MRR),
- + _(cmovcc, Gv, Ev, __, FLG_MRR),
- + _(cmovcc, Gv, Ev, __, FLG_MRR),
- + _(cmovcc, Gv, Ev, __, FLG_MRR),
- + _(cmovcc, Gv, Ev, __, FLG_MRR),
- + _(cmovcc, Gv, Ev, __, FLG_MRR),
- + _(cmovcc, Gv, Ev, __, FLG_MRR),
- + _(cmovcc, Gv, Ev, __, FLG_MRR),
- + _(cmovcc, Gv, Ev, __, FLG_MRR),
- + _(cmovcc, Gv, Ev, __, FLG_MRR),
- +
- + /* 0x80 */
- + [0x80] =
- + _(jcc, Jz, __, __, FLG_D64),
- + _(jcc, Jz, __, __, FLG_D64),
- + _(jcc, Jz, __, __, FLG_D64),
- + _(jcc, Jz, __, __, FLG_D64),
- + _(jcc, Jz, __, __, FLG_D64),
- + _(jcc, Jz, __, __, FLG_D64),
- + _(jcc, Jz, __, __, FLG_D64),
- + _(jcc, Jz, __, __, FLG_D64),
- + _(jcc, Jz, __, __, FLG_D64),
- + _(jcc, Jz, __, __, FLG_D64),
- + _(jcc, Jz, __, __, FLG_D64),
- + _(jcc, Jz, __, __, FLG_D64),
- + _(jcc, Jz, __, __, FLG_D64),
- + _(jcc, Jz, __, __, FLG_D64),
- + _(jcc, Jz, __, __, FLG_D64),
- + _(jcc, Jz, __, __, FLG_D64),
- +
- + /* 0x90 */
- + _(setcc, Eb, __, __, FLG_MRR),
- + _(setcc, Eb, __, __, FLG_MRR),
- + _(setcc, Eb, __, __, FLG_MRR),
- + _(setcc, Eb, __, __, FLG_MRR),
- + _(setcc, Eb, __, __, FLG_MRR),
- + _(setcc, Eb, __, __, FLG_MRR),
- + _(setcc, Eb, __, __, FLG_MRR),
- + _(setcc, Eb, __, __, FLG_MRR),
- + _(setcc, Eb, __, __, FLG_MRR),
- + _(setcc, Eb, __, __, FLG_MRR),
- + _(setcc, Eb, __, __, FLG_MRR),
- + _(setcc, Eb, __, __, FLG_MRR),
- + _(setcc, Eb, __, __, FLG_MRR),
- + _(setcc, Eb, __, __, FLG_MRR),
- + _(setcc, Eb, __, __, FLG_MRR),
- + _(setcc, Eb, __, __, FLG_MRR),
- +
- + /* 0xa0 */
- + _(push, rFS, __, __, FLG_D64),
- + _(pop, rFS, __, __, FLG_D64),
- + _(cpuid),
- + _(bt, Ev, Gv, __, FLG_MRR),
- + _(shld, Ev, Gv, Ib, FLG_MRR),
- + _(shld, Ev, Gv, rCL,FLG_MRR),
- + _xxx,
- + _xxx,
- + _(push, rGS, __, __, FLG_D64),
- + _(pop, rGS, __, __, FLG_D64),
- + _xxx,
- + _(bts, Ev, Gv, __, FLG_MRR),
- + _(shrd, Ev, Gv, Ib, FLG_MRR),
- + _(shrd, Ev, Gv, rCL,FLG_MRR),
- + _xxx,
- + _(imul, Gv, Ev, __, FLG_MRR),
- +
- + /* 0xb0 */
- + _(cmpxchg, Eb, Gb, __, FLG_MRR),
- + _(cmpxchg, Ev, Gv, __, FLG_MRR),
- + _(lss, Gv, Mp, __, FLG_MRR),
- + _(btr, Ev, Gv, __, FLG_MRR),
- + _(lfs, Gv, Mp, __, FLG_MRR),
- + _(lgs, Gv, Mp, __, FLG_MRR),
- + _(movzx, Gv, Eb, __, FLG_MRR),
- + _(movzx, Gv, Ew, __, FLG_MRR),
- + _xxx,
- + _xxx,
- + _xxx,
- + _(btc, Ev, Gv, __, FLG_MRR),
- + _(bsf, Gv, Ev, __, FLG_MRR),
- + _(bsr, Gv, Ev, __, FLG_MRR),
- + _(movsx, Gv, Eb, __, FLG_MRR),
- + _(movsx, Gv, Ew, __, FLG_MRR),
- +
- + /* 0xc0 */
- + _(xadd, Eb, Gb, __, FLG_MRR),
- + _(xadd, Ev, Gv, __, FLG_MRR),
- + _xxx,
- + _xxx,
- + _xxx,
- + _xxx,
- + _xxx,
- + _xxx,
- + _(bswap, gv),
- + _(bswap, gv),
- + _(bswap, gv),
- + _(bswap, gv),
- + _(bswap, gv),
- + _(bswap, gv),
- + _(bswap, gv),
- + _(bswap, gv),
- +};
- +
- +struct opcode locodes[256] = {
- + _(add, Eb, Gb, __, FLG_MRR),
- + _(add, Ev, Gv, __, FLG_MRR),
- + _(add, Gb, Eb, __, FLG_MRR),
- + _(add, Gv, Ev, __, FLG_MRR),
- + _(add, rAL, Ib),
- + _(add, rvAX, Iz),
- + _(push, rES, __, __, FLG_NO64),
- + _(pop, rES, __, __, FLG_NO64),
- + _(or, Eb, Gb, __, FLG_MRR),
- + _(or, Ev, Gv, __, FLG_MRR),
- + _(or, Gb, Eb, __, FLG_MRR),
- + _(or, Gv, Ev, __, FLG_MRR),
- + _(or, rAL, Ib),
- + _(or, rvAX, Iz),
- + _(push, rCS, __, __, FLG_NO64),
- + _xxx,
- +
- + /* 0x10 */
- + _(adc, Eb, Gb, __, FLG_MRR),
- + _(adc, Ev, Gv, __, FLG_MRR),
- + _(adc, Gb, Eb, __, FLG_MRR),
- + _(adc, Gv, Ev, __, FLG_MRR),
- + _(adc, rAL, Ib),
- + _(adc, rvAX, Iz),
- + _(push, rSS, __, __, FLG_NO64),
- + _(pop, rSS, __, __, FLG_NO64),
- + _(sbb, Eb, Gb, __, FLG_MRR),
- + _(sbb, Ev, Gv, __, FLG_MRR),
- + _(sbb, Gb, Eb, __, FLG_MRR),
- + _(sbb, Gv, Ev, __, FLG_MRR),
- + _(sbb, rAL, Ib),
- + _(sbb, rvAX, Iz),
- + _(push, rDS, __, __, FLG_NO64),
- + _(pop, rDS, __, __, FLG_NO64),
- +
- + /* 0x20 */
- + _(and, Eb, Gb, __, FLG_MRR),
- + _(and, Ev, Gv, __, FLG_MRR),
- + _(and, Gb, Eb, __, FLG_MRR),
- + _(and, Gv, Ev, __, FLG_MRR),
- + _(and, rAL, Ib),
- + _(and, rvAX, Iz),
- + _(pfx, rES, __, __, FLG_SEG),
- + _(daa, __, __, __, FLG_NO64),
- + _(sub, Eb, Gb, __, FLG_MRR),
- + _(sub, Ev, Gv, __, FLG_MRR),
- + _(sub, Gb, Eb, __, FLG_MRR),
- + _(sub, Gv, Ev, __, FLG_MRR),
- + _(sub, rAL, Ib),
- + _(sub, rvAX, Iz),
- + _(pfx, rCS, __, __, FLG_SEG),
- + _(das, __, __, __, FLG_NO64),
- +
- + /* 0x30 */
- + _(xor, Eb, Gb, __, FLG_MRR),
- + _(xor, Ev, Gv, __, FLG_MRR),
- + _(xor, Gb, Eb, __, FLG_MRR),
- + _(xor, Gv, Ev, __, FLG_MRR),
- + _(xor, rAL, Ib),
- + _(xor, rvAX, Iz),
- + _(pfx, rSS, __, __, FLG_SEG),
- + _(aaa, __, __, __, FLG_NO64),
- + _(cmp, Eb, Gb, __, FLG_MRR),
- + _(cmp, Ev, Gv, __, FLG_MRR),
- + _(cmp, Gb, Eb, __, FLG_MRR),
- + _(cmp, Gv, Ev, __, FLG_MRR),
- + _(cmp, rAL, Ib),
- + _(cmp, rvAX, Iz),
- + _(pfx, rDS, __, __, FLG_SEG),
- + _(aas, __, __, __, FLG_NO64),
- +
- + /* 0x40 */
- + _(inc, gv, __, __, FLG_REX),
- + _(inc, gv, __, __, FLG_REX),
- + _(inc, gv, __, __, FLG_REX),
- + _(inc, gv, __, __, FLG_REX),
- + _(inc, gv, __, __, FLG_REX),
- + _(inc, gv, __, __, FLG_REX),
- + _(inc, gv, __, __, FLG_REX),
- + _(inc, gv, __, __, FLG_REX),
- + _(dec, gv, __, __, FLG_REX),
- + _(dec, gv, __, __, FLG_REX),
- + _(dec, gv, __, __, FLG_REX),
- + _(dec, gv, __, __, FLG_REX),
- + _(dec, gv, __, __, FLG_REX),
- + _(dec, gv, __, __, FLG_REX),
- + _(dec, gv, __, __, FLG_REX),
- + _(dec, gv, __, __, FLG_REX),
- +
- + /* 0x50 */
- + _(push, gv, __, __, FLG_D64),
- + _(push, gv, __, __, FLG_D64),
- + _(push, gv, __, __, FLG_D64),
- + _(push, gv, __, __, FLG_D64),
- + _(push, gv, __, __, FLG_D64),
- + _(push, gv, __, __, FLG_D64),
- + _(push, gv, __, __, FLG_D64),
- + _(push, gv, __, __, FLG_D64),
- + _(pop, gv, __, __, FLG_D64),
- + _(pop, gv, __, __, FLG_D64),
- + _(pop, gv, __, __, FLG_D64),
- + _(pop, gv, __, __, FLG_D64),
- + _(pop, gv, __, __, FLG_D64),
- + _(pop, gv, __, __, FLG_D64),
- + _(pop, gv, __, __, FLG_D64),
- + _(pop, gv, __, __, FLG_D64),
- +
- + /* 0x60 */
- + _(pusha, __, __, __, FLG_NO64),
- + _(popa, __, __, __, FLG_NO64),
- + _xxx, /* EVEX */
- + _xxx, /* movsxd Gv, Rd */
- + _(pfx, rFS, __, __, FLG_SEG),
- + _(pfx, rGS, __, __, FLG_SEG),
- + _(pfx, __, __, __, FLG_OSZ),
- + _(pfx, __, __, __, FLG_ASZ),
- + _(push, Iz, __, __, FLG_D64),
- + _(imul, Gv, Ev, Iz, FLG_MRR),
- + _(push, Ib, __, __, FLG_D64),
- + _(imul, Gv, Ev, Ib, FLG_MRR),
- + _(insb, Yb, rDX, __, FLG_MEM), /* rep */
- + _(insv, Yv, rDX, __, FLG_MEM), /* rep */
- + _(outsb, rDX, Xb, __, FLG_MEM), /* rep */
- + _(outsv, rDX, Xv, __, FLG_MEM), /* rep */
- +
- + /* 0x70 */
- + _(jcc, Jb, __, __, FLG_D64),
- + _(jcc, Jb, __, __, FLG_D64),
- + _(jcc, Jb, __, __, FLG_D64),
- + _(jcc, Jb, __, __, FLG_D64),
- + _(jcc, Jb, __, __, FLG_D64),
- + _(jcc, Jb, __, __, FLG_D64),
- + _(jcc, Jb, __, __, FLG_D64),
- + _(jcc, Jb, __, __, FLG_D64),
- + _(jcc, Jb, __, __, FLG_D64),
- + _(jcc, Jb, __, __, FLG_D64),
- + _(jcc, Jb, __, __, FLG_D64),
- + _(jcc, Jb, __, __, FLG_D64),
- + _(jcc, Jb, __, __, FLG_D64),
- + _(jcc, Jb, __, __, FLG_D64),
- + _(jcc, Jb, __, __, FLG_D64),
- + _(jcc, Jb, __, __, FLG_D64),
- +
- + /* 0x80 */
- + _(grp1, Eb, Ib, __, FLG_MRR|FLG_GRP),
- + _(grp1, Ev, Iz, __, FLG_MRR|FLG_GRP),
- + _(grp1, Eb, Ib, __, FLG_MRR|FLG_GRP|FLG_NO64),
- + _(grp1, Ev, Ib, __, FLG_MRR|FLG_GRP),
- + _(test, Eb, Gb, __, FLG_MRR),
- + _(test, Ev, Gv, __, FLG_MRR),
- + _(xchg, Eb, Gb, __, FLG_MRR),
- + _(xchg, Ev, Gv, __, FLG_MRR),
- + _(mov, Eb, Gb, __, FLG_MRR),
- + _(mov, Ev, Gv, __, FLG_MRR),
- + _(mov, Gb, Eb, __, FLG_MRR),
- + _(mov, Gv, Ev, __, FLG_MRR),
- + _(mov, Ew, Sw, __, FLG_MRR),
- + _(lea, Gv, Mp, __, FLG_MRR),
- + _(mov, Sw, Ew, __, FLG_MRR),
- + _(pop, Ev, __, __, FLG_MRR), /* GRP1a [pop] */
- +
- + /* 0x90 */
- + _(nop),
- + _(xchg, rvAX, gv),
- + _(xchg, rvAX, gv),
- + _(xchg, rvAX, gv),
- + _(xchg, rvAX, gv),
- + _(xchg, rvAX, gv),
- + _(xchg, rvAX, gv),
- + _(xchg, rvAX, gv),
- + _(cbw), /* AX=AL / EAX=AX / RAX=EAX */
- + _(cwd), /* DX:AX=AX / EDX:EAX=EAX / RDX:RAX=RAX */
- + _(call, Ap, __, __, FLG_NO64),
- + _(wait),
- + _(pushf, __, __, __, FLG_D64),
- + _(popf, __, __, __, FLG_D64),
- + _(sahf),
- + _(lahf),
- +
- + /* 0xa0 */
- + _(mov, rAL, Ob, __, FLG_MEM),
- + _(mov, rvAX, Ov, __, FLG_MEM),
- + _(mov, Ob, rAL, __, FLG_MEM),
- + _(mov, Ov,rvAX, __, FLG_MEM),
- + _(movsb, Yb, Xb, __, FLG_MEM), /* rep */
- + _(movsv, Yv, Xv, __, FLG_MEM), /* rep */
- + _(cmpsb, Yb, Xb, __, FLG_MEM), /* repz/repnz */
- + _(cmpsv, Yb, Xv, __, FLG_MEM), /* repz/repnz */
- + _(test, rAL, Ib),
- + _(test, rvAX, Iz),
- + _(stosb, Yb, rAL, __, FLG_MEM), /* rep */
- + _(stosv, Yv,rvAX, __, FLG_MEM), /* rep */
- + _(lodsb, rAL, Xb, __, FLG_MEM),
- + _(lodsv, rvAX, Xv, __, FLG_MEM),
- + _(scasb, Yb, rAL, __, FLG_MEM), /* repz/repnz */
- + _(scasv, Yv,rvAX, __, FLG_MEM), /* repz/repnz */
- +
- + /* 0xb0 */
- + _(mov, gb, Ib),
- + _(mov, gb, Ib),
- + _(mov, gb, Ib),
- + _(mov, gb, Ib),
- + _(mov, gb, Ib),
- + _(mov, gb, Ib),
- + _(mov, gb, Ib),
- + _(mov, gb, Ib),
- + _(mov, gv, Iv),
- + _(mov, gv, Iv),
- + _(mov, gv, Iv),
- + _(mov, gv, Iv),
- + _(mov, gv, Iv),
- + _(mov, gv, Iv),
- + _(mov, gv, Iv),
- + _(mov, gv, Iv),
- +
- + /* 0xc0 */
- + _(grp2, Eb, Ib, __, FLG_MRR|FLG_GRP),
- + _(grp2, Ev, Ib, __, FLG_MRR|FLG_GRP),
- + _(ret, Iw, __, __, FLG_D64),
- + _(ret, __, __, __, FLG_D64),
- + _(les, Gv, Mp, __, FLG_MRR|FLG_NO64), /* VEX3 */
- + _(lds, Gv, Mp, __, FLG_MRR|FLG_NO64), /* VEX2 */
- + _(mov, Eb, Ib, __, FLG_MRR), /* GRP11 [mov] */
- + _(mov, Ev, Iz, __, FLG_MRR), /* GRP11 [mov] */
- + _(enter, Iw, Ib, __, FLG_D64),
- + _(leave, __, __, __, FLG_D64),
- + _(retf, Iw),
- + _(retf),
- + _(int, i3),
- + _(int, Ib),
- + _(into, __, __, __, FLG_NO64),
- + _(iret),
- +
- + /* 0xd0 */
- + _(grp2, Eb, i1, __, FLG_MRR|FLG_GRP),
- + _(grp2, Ev, i1, __, FLG_MRR|FLG_GRP),
- + _(grp2, Eb, rCL, __, FLG_MRR|FLG_GRP),
- + _(grp2, Eb, rCL, __, FLG_MRR|FLG_GRP),
- + _(aam, Ib, __, __, FLG_NO64),
- + _(aad, Ib, __, __, FLG_NO64),
- + _(salc, __, __, __, FLG_NO64),
- + _(xlat, __, __, __, FLG_MEM),
- + _xxx,
- + _xxx,
- + _xxx,
- + _xxx,
- + _xxx,
- + _xxx,
- + _xxx,
- + _xxx,
- +
- + /* 0xe0 */
- + _(loopnz, Jb, __, __, FLG_D64),
- + _(loopz, Jb, __, __, FLG_D64),
- + _(loop, Jb, __, __, FLG_D64),
- + _(jcxz, Jb, __, __, FLG_D64),
- + _(in, rAL, Ib),
- + _(in, rvAX, Ib),
- + _(out, Ib, rAL),
- + _(out, Ib, rvAX),
- + _(call, Jz, __, __, FLG_D64),
- + _(jmp, Jz, __, __, FLG_D64),
- + _(jmp, Ap, __, __, FLG_NO64),
- + _(jmp, Jb, __, __, FLG_D64),
- + _(in, rAL, rDX),
- + _(in, rvAX, rDX),
- + _(out, rDX, rAL),
- + _(out, rDX, rvAX),
- +
- + /* 0xf0 */
- + _(pfx, __, __, __, FLG_LOCK),
- + _(int, i1),
- + _(pfx, __, __, __, FLG_REP),
- + _(pfx, __, __, __, FLG_REP),
- + _(hlt),
- + _(cmc),
- + _(grp3, __, __, __, FLG_MRR|FLG_GRP), /* Eb */
- + _(grp3, __, __, __, FLG_MRR|FLG_GRP), /* Ev */
- + _(clc),
- + _(stc),
- + _(cli),
- + _(sti),
- + _(cld),
- + _(std),
- + _(grp4, __, __, __, FLG_MRR|FLG_GRP),
- + _(grp5, __, __, __, FLG_MRR|FLG_GRP),
- +};
- +
- +/* instruction state */
- +struct istate {
- + uint32_t op;
- + uint8_t rep;
- + uint8_t rex;
- + uint8_t mrr;
- + uint8_t sib;
- + uint32_t seg;
- + uint32_t flag;
- + uint32_t osz;
- + uint32_t asz;
- + uint32_t mode;
- +
- + /* number of instruction bytes */
- + int nib;
- +
- + uint8_t *pc;
- +};
- +
- +/* Get byte from code stream */
- +static uint64_t
- +get8(struct istate *i) {
- + i->nib++;
- + return *i->pc++;
- +}
- +
- +/* Get operand size (16/32/64-bit) */
- +static int
- +osize(struct istate *i) {
- + switch (i->mode) {
- + case SIZE_QWORD:
- + /* Default opsize or REX.W */
- + if ((i->flag & FLG_D64) || (i->rex & REX_W))
- + return SIZE_QWORD;
- + return (i->flag & FLG_OSZ) ? SIZE_WORD : SIZE_DWORD;
- + case SIZE_DWORD:
- + return (i->flag & FLG_OSZ) ? SIZE_WORD : SIZE_DWORD;
- + case SIZE_WORD:
- + return (i->flag & FLG_OSZ) ? SIZE_DWORD : SIZE_WORD;
- + }
- + return 0;
- +}
- +
- +/* Get address size (16/32/64-bit) */
- +static int
- +asize(struct istate *i) {
- + switch (i->mode) {
- + case SIZE_QWORD:
- + return (i->flag & FLG_ASZ) ? SIZE_DWORD : SIZE_QWORD;
- + case SIZE_DWORD:
- + return (i->flag & FLG_ASZ) ? SIZE_WORD : SIZE_DWORD;
- + case SIZE_WORD:
- + return (i->flag & FLG_ASZ) ? SIZE_DWORD : SIZE_WORD;
- + }
- + return 0;
- +}
- +
- +/*============================*
- + * Decode opcode
- + *============================*/
- +static struct opcode
- +decodeop(struct istate *i)
- +{
- + struct opcode o;
- + int op;
- +
- + for(;;) {
- + op = get8(i);
- + if (op == 0x0f) {
- + /* Decode 2nd byte */
- + op = (op << 8) | get8(i);
- + o = hicodes[op & 0xFF];
- + } else {
- + o = locodes[op];
- + }
- + i->flag |= o.flag;
- + i->op = op;
- +
- + /* Check if this is a prefix opcode */
- + if (o.flag == FLG_SEG)
- + i->seg = o.arg0;
- + else if (o.flag == FLG_REP)
- + i->rep = op;
- + else if (o.flag == FLG_REX && (i->mode == SIZE_QWORD))
- + i->rex = op;
- + else if (!(o.flag & (FLG_OSZ|FLG_ASZ|FLG_LOCK))) {
- + /* get Mod-Reg-RM byte */
- + if (i->flag & FLG_MRR)
- + i->mrr = get8(i);
- + /* Get operand and address size */
- + i->osz = osize(i);
- + i->asz = asize(i);
- + if (!o.mnem)
- + o.mnem = "---";
- + return o;
- + }
- + }
- +}
- +
- +/*
- + * Register names
- + */
- +static const char *bregs[] = {
- + "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh",
- + "r8b","r9b","r10b","r11b","r12b","r13b","r14b","r15b",
- + "spl","bpl","sil", "dil"
- +};
- +static const char *wregs[] = {
- + "ax", "cx", "dx", "bx", "sp", "bp", "si", "di",
- + "r8w","r9w","r10w","r11w","r12w","r13w","r14w","r15w",
- +};
- +static const char *dregs[] = {
- + "eax","ecx","edx", "ebx", "esp", "ebp", "esi", "edi",
- + "r8d","r9d","r10d","r11d","r12d","r13d","r14d","r15d",
- +};
- +static const char *qregs[] = {
- + "rax","rcx","rdx","rbx","rsp","rbp","rsi","rdi",
- + "r8", "r9", "r10","r11","r12","r13","r14","r15",
- +};
- +
- +static const char *
- +regname(int reg) {
- + int vv = reg & VAL_MASK;
- + int sz = reg & SIZE_MASK;
- +
- + if ((sz != SIZE_BYTE && vv >= 16) || vv >= 20)
- + return "xx";
- + switch (sz) {
- + case SIZE_BYTE: return bregs[vv];
- + case SIZE_WORD: return wregs[vv];
- + case SIZE_DWORD:return dregs[vv];
- + case SIZE_QWORD:return qregs[vv];
- + }
- + return "--";
- +}
- +
- +/* Make register */
- +static uint32_t
- +mkreg(struct istate *i, int sz, int vv, int mask) {
- + /* REX byte */
- + if (mask & i->rex)
- + vv += 8;
- + /* Special case for spl/bpl/sil/dil */
- + if (sz == SIZE_BYTE && i->rex && (vv >= 4 && vv <= 7))
- + vv += 12;
- + vv += TYPE_REG+sz;
- + printf("%%%s ", regname(vv));
- + return vv;
- +}
- +
- +/* Get Embedded or Decoded immediate byte */
- +static uint64_t
- +mkimm(struct istate *i, int sz, uint64_t val, const char *fmt) {
- + switch (sz) {
- + case SIZE_BYTE:
- + val = get8(i);
- + break;
- + case SIZE_WORD:
- + val = get8(i);
- + val |= (get8(i) << 8);
- + break;
- + case SIZE_DWORD:
- + val = get8(i);
- + val |= get8(i) << 8;
- + val |= get8(i) << 16;
- + val |= get8(i) << 24;
- + break;
- + case SIZE_QWORD:
- + val = get8(i);
- + val |= get8(i) << 8;
- + val |= get8(i) << 16;
- + val |= get8(i) << 24;
- + val |= get8(i) << 32LL;
- + val |= get8(i) << 40LL;
- + val |= get8(i) << 48LL;
- + val |= get8(i) << 56LL;
- + break;
- + default:
- + /* val already contains value */
- + break;
- + }
- + printf(fmt, val);
- + return val;
- +}
- +
- +/* Decode effective address */
- +static uint32_t
- +mkea(struct istate *i, int sz) {
- + int mm, rrr;
- +
- + mm = mrr_mm(i->mrr);
- + rrr = mrr_rrr(i->mrr);
- + if (mm == 3) {
- + /* register encoding */
- + return mkreg(i, sz, rrr, REX_B);
- + }
- + switch (i->asz) {
- + case SIZE_QWORD:
- + printf("(");
- + if (rrr == 4) {
- + i->sib = get8(i);
- + rrr = sib_bbb(i->sib);
- + printf("%d,", 1 << sib_ss(i->sib));
- + mkreg(i, SIZE_QWORD, sib_iii(i->sib), REX_X);
- + }
- + if (mm == 1) {
- + mkreg(i, SIZE_QWORD, rrr, REX_B);
- + mkimm(i, SIZE_BYTE, 0, "b[$0x%llx]");
- + }
- + else if (mm == 2) {
- + mkreg(i, SIZE_QWORD, rrr, REX_B);
- + mkimm(i, SIZE_DWORD, 0, "d[$0x%llx]");
- + }
- + else if (rrr == 5) {
- + /* Special case RIP-relative */
- + mkimm(i, SIZE_DWORD, 0, "%%rip[$0x%llx]");
- + }
- + else
- + mkreg(i, SIZE_QWORD, rrr, REX_B);
- + printf(") ");
- + break;
- + case SIZE_DWORD:
- + printf("(");
- + if (rrr == 4) {
- + i->sib = get8(i);
- + rrr = sib_bbb(i->sib);
- + printf("%d,", 1 << sib_ss(i->sib));
- + mkreg(i, SIZE_DWORD, sib_iii(i->sib), REX_X);
- + }
- + if (mm == 1) {
- + mkreg(i, SIZE_DWORD, rrr, REX_B);
- + mkimm(i, SIZE_BYTE, 0, "b[$0x%llx]");
- + }
- + else if (mm == 2) {
- + mkreg(i, SIZE_DWORD, rrr, REX_B);
- + mkimm(i, SIZE_DWORD, 0, "d[$0x%llx]");
- + }
- + else if (rrr == 5) {
- + /* Special case d32 */
- + mkimm(i, SIZE_DWORD, 0, "d32[$0x%llx]");
- + }
- + else
- + mkreg(i, SIZE_DWORD, rrr, REX_B);
- + printf(") ");
- + break;
- + }
- + return 0;
- +}
- +
- +/* Decode opcode argument. Return register/immediate if applicable */
- +static uint32_t
- +decodearg(struct istate *i, int arg) {
- + int tt, sz, vv;
- +
- + if (!arg)
- + return 0;
- + tt = arg & TYPE_MASK;
- + sz = arg & SIZE_MASK;
- + vv = arg & VAL_MASK;
- +
- + if (sz == SIZE_VWORD)
- + sz = i->osz;
- + if (sz == SIZE_ZWORD)
- + sz = SIZE_DWORD;
- + switch (tt) {
- + case TYPE_REG: /* specific register */
- + return mkreg(i, sz, vv, 0);
- + case TYPE_EMBREG: /* embedded in opcode */
- + return mkreg(i, sz, i->op & 0x7, REX_B);
- + case TYPE_EAREG: /* embedded in mrr */
- + return mkreg(i, sz, mrr_ggg(i->mrr), REX_R);
- + case TYPE_EA:
- + case TYPE_EAMEM: /* effective address */
- + return mkea(i, sz);
- + case TYPE_IMM: /* immediate value */
- + return mkimm(i, sz, vv, "imm:$0x%llx ");
- + case TYPE_INDEX: /* string operations */
- + break;
- + default:
- + printf("Unknown arg: %.8x ", arg);
- + break;
- + }
- + return 0;
- +}
- +
- +/* Get size of operand in bytes */
- +static int
- +sz(int arg) {
- + switch (arg & SIZE_MASK) {
- + case SIZE_BYTE: return 1;
- + case SIZE_WORD: return 2;
- + case SIZE_DWORD: return 4;
- + case SIZE_QWORD: return 8;
- + }
- + return 0;
- +}
- +
- +/* Map X86 reg to vmm reg */
- +static int vmmreg[] = {
- + VCPU_REGS_RAX,
- + VCPU_REGS_RCX,
- + VCPU_REGS_RDX,
- + VCPU_REGS_RBX,
- + VCPU_REGS_RSP,
- + VCPU_REGS_RBP,
- + VCPU_REGS_RSI,
- + VCPU_REGS_RDI,
- + VCPU_REGS_R8,
- + VCPU_REGS_R9,
- + VCPU_REGS_R10,
- + VCPU_REGS_R11,
- + VCPU_REGS_R12,
- + VCPU_REGS_R13,
- + VCPU_REGS_R14,
- + VCPU_REGS_R15,
- + VCPU_REGS_RSP, /* spl */
- + VCPU_REGS_RBP, /* bpl */
- + VCPU_REGS_RSI, /* sil */
- + VCPU_REGS_RDI, /* dil */
- +};
- +
- +static int
- +Vreg(int arg) {
- + if ((arg & VAL_MASK) < 20)
- + return vmmreg[arg & VAL_MASK];
- + printf("error bad reg: %x\n", arg);
- + return VCPU_REGS_RAX;
- +}
- +
- +/*
- + * Disassemble opcode for MMIO fault.
- + * Returns the direction, size and register to read/write in memory handler
- + */
- +int
- +dodis(uint8_t *ib, struct insn *ix, int mode) {
- + struct istate i = { 0 };
- + struct opcode o;
- + int a0, a1;
- +
- + /* Get opcode */
- + i.pc = ib;
- + i.mode = mode;
- + o = decodeop(&i);
- + printf("%c%c dis: %.2x %.2x %.2x %.2x | %-6s",
- + (i.osz >> 16), (i.asz >> 16), i.seg, i.rep, i.rex, i.op, o.mnem);
- +
- + /* Decode opcode arguments to register/immed/etc */
- + a0 = decodearg(&i, o.arg0);
- + a1 = decodearg(&i, o.arg1);
- + decodearg(&i, o.arg2);
- + printf(" : %d\n", i.nib);
- +
- + /* Convert to format needed by memhandler. # of instruction bytes, register to
- + * read/write and size */
- + if (strncmp(o.mnem, "mov", 3))
- + return 0;
- + memset(ix, 0, sizeof(*ix));
- + if ((a0 & TYPE_MASK) == TYPE_REG) {
- + ix->dir = VEI_DIR_IN;
- + ix->size = sz(a0);
- + ix->reg = Vreg(a0);
- + ix->incr = i.nib;
- + }
- + else if ((a1 & TYPE_MASK) == TYPE_REG) {
- + ix->dir = VEI_DIR_OUT;
- + ix->size = sz(a1);
- + ix->reg = Vreg(a1);
- + ix->incr = i.nib;
- + }
- + printf("dir:%d size:%d reg:%d incr:%d\n", ix->dir, ix->size, ix->reg, ix->incr);
- + return 1;
- +}
- +
- diff --git a/usr.sbin/vmd/x86emu.h b/usr.sbin/vmd/x86emu.h
- new file mode 100644
- index 000000000..1f2997375
- --- /dev/null
- +++ b/usr.sbin/vmd/x86emu.h
- @@ -0,0 +1,178 @@
- +/*
- + * Copyright (c) 2020 Jordan Hargrave <[email protected]>
- + *
- + * Permission to use, copy, modify, and distribute this software for any
- + * purpose with or without fee is hereby granted, provided that the above
- + * copyright notice and this permission notice appear in all copies.
- + *
- + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- + */
- +#ifndef __x86emu_h__
- +#define __x86emu_h__
- +
- +struct insn {
- + uint8_t sig[3];
- + int siglen;
- + int dir;
- + int size;
- + int incr;
- + int reg;
- +};
- +
- +/* decode mod-reg-rm byte */
- +#define mrr_mm(x) (((x) >> 6) & 3)
- +#define mrr_ggg(x) (((x) >> 3) & 7)
- +#define mrr_rrr(x) (((x) >> 0) & 7)
- +
- +/* decode scaled-index-base byte */
- +#define sib_ss(x) (((x) >> 6) & 3)
- +#define sib_iii(x) (((x) >> 3) & 7)
- +#define sib_bbb(x) (((x) >> 0) & 7)
- +
- +/* Opcode argument types: register, immediate, memory, etc */
- +enum {
- + TYPE_SHIFT = 24,
- + SIZE_SHIFT = 16,
- +
- + VAL_MASK = 0xFFFF,
- +
- + TYPE_MASK = 0xFF << TYPE_SHIFT,
- + TYPE_REG = 'r' << TYPE_SHIFT,
- + TYPE_EMBREG = 'g' << TYPE_SHIFT,
- + TYPE_EA = 'E' << TYPE_SHIFT,
- + TYPE_EAMEM = 'M' << TYPE_SHIFT,
- + TYPE_EAREG = 'G' << TYPE_SHIFT,
- + TYPE_IMM = 'I' << TYPE_SHIFT,
- + TYPE_JMP = 'J' << TYPE_SHIFT,
- + TYPE_OFFSET = 'O' << TYPE_SHIFT,
- + TYPE_INDEX = '$' << TYPE_SHIFT,
- +
- + SIZE_MASK = 0xFF << SIZE_SHIFT,
- + SIZE_BYTE = 'b' << SIZE_SHIFT,
- + SIZE_WORD = 'w' << SIZE_SHIFT,
- + SIZE_DWORD = 'd' << SIZE_SHIFT,
- + SIZE_QWORD = 'q' << SIZE_SHIFT,
- + SIZE_VWORD = 'v' << SIZE_SHIFT, /* 16/32/64-bit opsize */
- + SIZE_ZWORD = 'z' << SIZE_SHIFT, /* 16/32-bit opsize */
- + SIZE_PTR = 'p' << SIZE_SHIFT,
- + SIZE_SREG = 's' << SIZE_SHIFT,
- + SIZE_CREG = 'C' << SIZE_SHIFT,
- + SIZE_DREG = 'D' << SIZE_SHIFT,
- + SIZE_TREG = 'T' << SIZE_SHIFT,
- +
- + Ap = TYPE_IMM+SIZE_PTR,
- + Mp = TYPE_EAMEM+SIZE_PTR,
- + Sw = TYPE_EAREG+SIZE_SREG,
- +
- + Ob = TYPE_OFFSET+SIZE_BYTE,
- + Ov = TYPE_OFFSET+SIZE_VWORD,
- +
- + Eb = TYPE_EA+SIZE_BYTE,
- + Ew = TYPE_EA+SIZE_WORD,
- + Ev = TYPE_EA+SIZE_VWORD,
- +
- + Gb = TYPE_EAREG+SIZE_BYTE,
- + Gv = TYPE_EAREG+SIZE_VWORD,
- +
- + gb = TYPE_EMBREG+SIZE_BYTE,
- + gv = TYPE_EMBREG+SIZE_VWORD,
- +
- + Ib = TYPE_IMM+SIZE_BYTE,
- + Iw = TYPE_IMM+SIZE_WORD,
- + Iv = TYPE_IMM+SIZE_VWORD,
- + Iz = TYPE_IMM+SIZE_ZWORD,
- + i1 = TYPE_IMM+0x01,
- + i3 = TYPE_IMM+0x03,
- +
- + Jb = TYPE_JMP+SIZE_BYTE,
- + Jz = TYPE_JMP+SIZE_ZWORD,
- +
- + Xb = TYPE_INDEX+SIZE_BYTE,
- + Xv = TYPE_INDEX+SIZE_VWORD,
- + Xz = TYPE_INDEX+SIZE_ZWORD,
- + Yb = TYPE_INDEX+SIZE_BYTE+0x1,
- + Yv = TYPE_INDEX+SIZE_VWORD+0x1,
- + Yz = TYPE_INDEX+SIZE_ZWORD+0x1,
- +
- + /* Registers */
- + rAL = TYPE_REG+SIZE_BYTE,
- + rCL,
- + rDL,
- + rBL,
- + rAH,
- + rCH,
- + rDH,
- + rBH,
- + rSPL = TYPE_REG+SIZE_BYTE+0x14,
- + rBPL,
- + rSIL,
- + rDIL,
- +
- + rAX = TYPE_REG+SIZE_WORD,
- + rCX,
- + rDX,
- + rBX,
- + rSP,
- + rBP,
- + rSI,
- + rDI,
- +
- + rEAX = TYPE_REG+SIZE_DWORD,
- + rECX,
- + rEDX,
- + rEBX,
- + rESP,
- + rEBP,
- + rESI,
- + rEDI,
- +
- + rRAX = TYPE_REG+SIZE_QWORD,
- + rRCX,
- + rRDX,
- + rRBX,
- + rRSP,
- + rRBP,
- + rRSI,
- + rRDI,
- +
- + rvAX = TYPE_REG+SIZE_VWORD,
- +
- + rES = TYPE_REG+SIZE_SREG,
- + rCS,
- + rSS,
- + rDS,
- + rFS,
- + rGS,
- +};
- +
- +enum {
- + REX_B = 0x1, /* mrr.rrr or sib.bbb or op.ggg */
- + REX_X = 0x2, /* sib.iii */
- + REX_R = 0x4, /* mrr.ggg */
- + REX_W = 0x8, /* operand size=64-bit */
- +
- + /* Opcode prefix flags */
- + FLG_REX = 0x0001, /* REX byte */
- + FLG_SEG = 0x0002, /* segment prefix */
- + FLG_OSZ = 0x0004, /* operand size */
- + FLG_ASZ = 0x0008, /* address size */
- + FLG_LOCK = 0x0010, /* lock */
- + FLG_REP = 0x0020, /* repz/repnz/rep */
- +
- + /* Additional opcode flags */
- + FLG_MRR = 0x0100, /* has mod-reg-rm byte */
- + FLG_GRP = 0x0200, /* opcode based on mrr.reg */
- + FLG_MEM = 0x0400, /* non-mrr memory */
- + FLG_D64 = 0x0800, /* default size = 64-bit */
- + FLG_NO64 = 0x1000, /* invalid in 64-bit mode */
- +};
- +
- +int dodis(uint8_t *, struct insn *ix, int mode);
- +
- +#endif
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement